sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7 8from sqlglot.optimizer.annotate_types import TypeAnnotator 9 10from sqlglot import exp, generator, jsonpath, parser, tokens, transforms 11from sqlglot._typing import E 12from sqlglot.dialects.dialect import ( 13 Dialect, 14 NormalizationStrategy, 15 annotate_with_type_lambda, 16 arg_max_or_min_no_count, 17 binary_from_function, 18 date_add_interval_sql, 19 datestrtodate_sql, 20 build_formatted_time, 21 filter_array_using_unnest, 22 if_sql, 23 inline_array_unless_query, 24 max_or_greatest, 25 min_or_least, 26 no_ilike_sql, 27 build_date_delta_with_interval, 28 regexp_replace_sql, 29 rename_func, 30 sha256_sql, 31 timestrtotime_sql, 32 ts_or_ds_add_cast, 33 unit_to_var, 34 strposition_sql, 35 groupconcat_sql, 36) 37from sqlglot.helper import seq_get, split_num_words 38from sqlglot.tokens import TokenType 39from sqlglot.generator import unsupported_args 40 41if t.TYPE_CHECKING: 42 from sqlglot._typing import Lit 43 44 from sqlglot.optimizer.annotate_types import TypeAnnotator 45 46logger = logging.getLogger("sqlglot") 47 48 49JSON_EXTRACT_TYPE = t.Union[exp.JSONExtract, exp.JSONExtractScalar, exp.JSONExtractArray] 50 51DQUOTES_ESCAPING_JSON_FUNCTIONS = ("JSON_QUERY", "JSON_VALUE", "JSON_QUERY_ARRAY") 52 53 54def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 55 if not expression.find_ancestor(exp.From, exp.Join): 56 return self.values_sql(expression) 57 58 structs = [] 59 alias = expression.args.get("alias") 60 for tup in expression.find_all(exp.Tuple): 61 field_aliases = ( 62 alias.columns 63 if alias and alias.columns 64 else (f"_c{i}" for i in range(len(tup.expressions))) 65 ) 66 expressions = [ 67 exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) 68 for name, fld in zip(field_aliases, tup.expressions) 69 ] 70 structs.append(exp.Struct(expressions=expressions)) 71 72 # Due to `UNNEST_COLUMN_ONLY`, it is expected that the table alias be contained in the columns expression 73 alias_name_only = exp.TableAlias(columns=[alias.this]) if alias else None 74 return self.unnest_sql( 75 exp.Unnest(expressions=[exp.array(*structs, copy=False)], alias=alias_name_only) 76 ) 77 78 79def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 80 this = expression.this 81 if isinstance(this, exp.Schema): 82 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 83 else: 84 this = self.sql(this) 85 return f"RETURNS {this}" 86 87 88def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 89 returns = expression.find(exp.ReturnsProperty) 90 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 91 expression.set("kind", "TABLE FUNCTION") 92 93 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 94 expression.set("expression", expression.expression.this) 95 96 return self.create_sql(expression) 97 98 99# https://issuetracker.google.com/issues/162294746 100# workaround for bigquery bug when grouping by an expression and then ordering 101# WITH x AS (SELECT 1 y) 102# SELECT y + 1 z 103# FROM x 104# GROUP BY x + 1 105# ORDER by z 106def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 107 if isinstance(expression, exp.Select): 108 group = expression.args.get("group") 109 order = expression.args.get("order") 110 111 if group and order: 112 aliases = { 113 select.this: select.args["alias"] 114 for select in expression.selects 115 if isinstance(select, exp.Alias) 116 } 117 118 for grouped in group.expressions: 119 if grouped.is_int: 120 continue 121 alias = aliases.get(grouped) 122 if alias: 123 grouped.replace(exp.column(alias)) 124 125 return expression 126 127 128def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 129 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 130 if isinstance(expression, exp.CTE) and expression.alias_column_names: 131 cte_query = expression.this 132 133 if cte_query.is_star: 134 logger.warning( 135 "Can't push down CTE column names for star queries. Run the query through" 136 " the optimizer or use 'qualify' to expand the star projections first." 137 ) 138 return expression 139 140 column_names = expression.alias_column_names 141 expression.args["alias"].set("columns", None) 142 143 for name, select in zip(column_names, cte_query.selects): 144 to_replace = select 145 146 if isinstance(select, exp.Alias): 147 select = select.this 148 149 # Inner aliases are shadowed by the CTE column names 150 to_replace.replace(exp.alias_(select, name)) 151 152 return expression 153 154 155def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 156 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 157 this.set("zone", seq_get(args, 2)) 158 return this 159 160 161def _build_timestamp(args: t.List) -> exp.Timestamp: 162 timestamp = exp.Timestamp.from_arg_list(args) 163 timestamp.set("with_tz", True) 164 return timestamp 165 166 167def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 168 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 169 return expr_type.from_arg_list(args) 170 171 172def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 173 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 174 arg = seq_get(args, 0) 175 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.LowerHex(this=arg) 176 177 178def _build_json_strip_nulls(args: t.List) -> exp.JSONStripNulls: 179 expression = exp.JSONStripNulls(this=seq_get(args, 0)) 180 181 for arg in args[1:]: 182 if isinstance(arg, exp.Kwarg): 183 expression.set(arg.this.name.lower(), arg) 184 else: 185 expression.set("expression", arg) 186 187 return expression 188 189 190def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 191 return self.sql( 192 exp.Exists( 193 this=exp.select("1") 194 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 195 .where(exp.column("_col").eq(expression.right)) 196 ) 197 ) 198 199 200def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 201 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 202 203 204def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 205 expression.this.replace(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP)) 206 expression.expression.replace(exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP)) 207 unit = unit_to_var(expression) 208 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 209 210 211def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 212 scale = expression.args.get("scale") 213 timestamp = expression.this 214 215 if scale in (None, exp.UnixToTime.SECONDS): 216 return self.func("TIMESTAMP_SECONDS", timestamp) 217 if scale == exp.UnixToTime.MILLIS: 218 return self.func("TIMESTAMP_MILLIS", timestamp) 219 if scale == exp.UnixToTime.MICROS: 220 return self.func("TIMESTAMP_MICROS", timestamp) 221 222 unix_seconds = exp.cast( 223 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 224 ) 225 return self.func("TIMESTAMP_SECONDS", unix_seconds) 226 227 228def _build_time(args: t.List) -> exp.Func: 229 if len(args) == 1: 230 return exp.TsOrDsToTime(this=args[0]) 231 if len(args) == 2: 232 return exp.Time.from_arg_list(args) 233 return exp.TimeFromParts.from_arg_list(args) 234 235 236def _build_datetime(args: t.List) -> exp.Func: 237 if len(args) == 1: 238 return exp.TsOrDsToDatetime.from_arg_list(args) 239 if len(args) == 2: 240 return exp.Datetime.from_arg_list(args) 241 return exp.TimestampFromParts.from_arg_list(args) 242 243 244def _build_regexp_extract( 245 expr_type: t.Type[E], default_group: t.Optional[exp.Expression] = None 246) -> t.Callable[[t.List], E]: 247 def _builder(args: t.List) -> E: 248 try: 249 group = re.compile(args[1].name).groups == 1 250 except re.error: 251 group = False 252 253 # Default group is used for the transpilation of REGEXP_EXTRACT_ALL 254 return expr_type( 255 this=seq_get(args, 0), 256 expression=seq_get(args, 1), 257 position=seq_get(args, 2), 258 occurrence=seq_get(args, 3), 259 group=exp.Literal.number(1) if group else default_group, 260 ) 261 262 return _builder 263 264 265def _build_extract_json_with_default_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 266 def _builder(args: t.List, dialect: Dialect) -> E: 267 if len(args) == 1: 268 # The default value for the JSONPath is '$' i.e all of the data 269 args.append(exp.Literal.string("$")) 270 return parser.build_extract_json_with_path(expr_type)(args, dialect) 271 272 return _builder 273 274 275def _str_to_datetime_sql( 276 self: BigQuery.Generator, expression: exp.StrToDate | exp.StrToTime 277) -> str: 278 this = self.sql(expression, "this") 279 dtype = "DATE" if isinstance(expression, exp.StrToDate) else "TIMESTAMP" 280 281 if expression.args.get("safe"): 282 fmt = self.format_time( 283 expression, 284 self.dialect.INVERSE_FORMAT_MAPPING, 285 self.dialect.INVERSE_FORMAT_TRIE, 286 ) 287 return f"SAFE_CAST({this} AS {dtype} FORMAT {fmt})" 288 289 fmt = self.format_time(expression) 290 return self.func(f"PARSE_{dtype}", fmt, this, expression.args.get("zone")) 291 292 293def _annotate_math_functions(self: TypeAnnotator, expression: E) -> E: 294 """ 295 Many BigQuery math functions such as CEIL, FLOOR etc follow this return type convention: 296 +---------+---------+---------+------------+---------+ 297 | INPUT | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 | 298 +---------+---------+---------+------------+---------+ 299 | OUTPUT | FLOAT64 | NUMERIC | BIGNUMERIC | FLOAT64 | 300 +---------+---------+---------+------------+---------+ 301 """ 302 self._annotate_args(expression) 303 304 this: exp.Expression = expression.this 305 306 self._set_type( 307 expression, 308 exp.DataType.Type.DOUBLE if this.is_type(*exp.DataType.INTEGER_TYPES) else this.type, 309 ) 310 return expression 311 312 313def _annotate_perncentile_cont( 314 self: TypeAnnotator, expression: exp.PercentileCont 315) -> exp.PercentileCont: 316 """ 317 +------------+-----------+------------+---------+ 318 | INPUT | NUMERIC | BIGNUMERIC | FLOAT64 | 319 +------------+-----------+------------+---------+ 320 | NUMERIC | NUMERIC | BIGNUMERIC | FLOAT64 | 321 | BIGNUMERIC | BIGNUMERIC| BIGNUMERIC | FLOAT64 | 322 | FLOAT64 | FLOAT64 | FLOAT64 | FLOAT64 | 323 +------------+-----------+------------+---------+ 324 """ 325 self._annotate_args(expression) 326 327 self._set_type(expression, self._maybe_coerce(expression.this.type, expression.expression.type)) 328 return expression 329 330 331def _annotate_by_args_approx_top(self: TypeAnnotator, expression: exp.ApproxTopK) -> exp.ApproxTopK: 332 self._annotate_args(expression) 333 334 struct_type = exp.DataType( 335 this=exp.DataType.Type.STRUCT, 336 expressions=[expression.this.type, exp.DataType(this=exp.DataType.Type.BIGINT)], 337 nested=True, 338 ) 339 self._set_type( 340 expression, 341 exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[struct_type], nested=True), 342 ) 343 344 return expression 345 346 347@unsupported_args("ins_cost", "del_cost", "sub_cost") 348def _levenshtein_sql(self: BigQuery.Generator, expression: exp.Levenshtein) -> str: 349 max_dist = expression.args.get("max_dist") 350 if max_dist: 351 max_dist = exp.Kwarg(this=exp.var("max_distance"), expression=max_dist) 352 353 return self.func("EDIT_DISTANCE", expression.this, expression.expression, max_dist) 354 355 356def _build_levenshtein(args: t.List) -> exp.Levenshtein: 357 max_dist = seq_get(args, 2) 358 return exp.Levenshtein( 359 this=seq_get(args, 0), 360 expression=seq_get(args, 1), 361 max_dist=max_dist.expression if max_dist else None, 362 ) 363 364 365def _build_format_time(expr_type: t.Type[exp.Expression]) -> t.Callable[[t.List], exp.TimeToStr]: 366 def _builder(args: t.List) -> exp.TimeToStr: 367 return exp.TimeToStr( 368 this=expr_type(this=seq_get(args, 1)), 369 format=seq_get(args, 0), 370 zone=seq_get(args, 2), 371 ) 372 373 return _builder 374 375 376def _build_contains_substring(args: t.List) -> exp.Contains: 377 # Lowercase the operands in case of transpilation, as exp.Contains 378 # is case-sensitive on other dialects 379 this = exp.Lower(this=seq_get(args, 0)) 380 expr = exp.Lower(this=seq_get(args, 1)) 381 382 return exp.Contains(this=this, expression=expr, json_scope=seq_get(args, 2)) 383 384 385def _json_extract_sql(self: BigQuery.Generator, expression: JSON_EXTRACT_TYPE) -> str: 386 name = (expression._meta and expression.meta.get("name")) or expression.sql_name() 387 upper = name.upper() 388 389 dquote_escaping = upper in DQUOTES_ESCAPING_JSON_FUNCTIONS 390 391 if dquote_escaping: 392 self._quote_json_path_key_using_brackets = False 393 394 sql = rename_func(upper)(self, expression) 395 396 if dquote_escaping: 397 self._quote_json_path_key_using_brackets = True 398 399 return sql 400 401 402def _annotate_concat(self: TypeAnnotator, expression: exp.Concat) -> exp.Concat: 403 annotated = self._annotate_by_args(expression, "expressions") 404 405 # Args must be BYTES or types that can be cast to STRING, return type is either BYTES or STRING 406 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#concat 407 if not annotated.is_type(exp.DataType.Type.BINARY, exp.DataType.Type.UNKNOWN): 408 annotated.type = exp.DataType.Type.VARCHAR 409 410 return annotated 411 412 413def _annotate_array(self: TypeAnnotator, expression: exp.Array) -> exp.Array: 414 array_args = expression.expressions 415 416 # BigQuery behaves as follows: 417 # 418 # SELECT t, TYPEOF(t) FROM (SELECT 'foo') AS t -- foo, STRUCT<STRING> 419 # SELECT ARRAY(SELECT 'foo'), TYPEOF(ARRAY(SELECT 'foo')) -- foo, ARRAY<STRING> 420 if ( 421 len(array_args) == 1 422 and isinstance(select := array_args[0].unnest(), exp.Select) 423 and (query_type := select.meta.get("query_type")) is not None 424 and query_type.is_type(exp.DataType.Type.STRUCT) 425 and len(query_type.expressions) == 1 426 and isinstance(col_def := query_type.expressions[0], exp.ColumnDef) 427 and (projection_type := col_def.kind) is not None 428 and not projection_type.is_type(exp.DataType.Type.UNKNOWN) 429 ): 430 array_type = exp.DataType( 431 this=exp.DataType.Type.ARRAY, 432 expressions=[projection_type.copy()], 433 nested=True, 434 ) 435 return self._annotate_with_type(expression, array_type) 436 437 return self._annotate_by_args(expression, "expressions", array=True) 438 439 440class BigQuery(Dialect): 441 WEEK_OFFSET = -1 442 UNNEST_COLUMN_ONLY = True 443 SUPPORTS_USER_DEFINED_TYPES = False 444 SUPPORTS_SEMI_ANTI_JOIN = False 445 LOG_BASE_FIRST = False 446 HEX_LOWERCASE = True 447 FORCE_EARLY_ALIAS_REF_EXPANSION = True 448 PRESERVE_ORIGINAL_NAMES = True 449 HEX_STRING_IS_INTEGER_TYPE = True 450 451 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 452 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 453 454 # bigquery udfs are case sensitive 455 NORMALIZE_FUNCTIONS = False 456 457 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 458 TIME_MAPPING = { 459 "%D": "%m/%d/%y", 460 "%E6S": "%S.%f", 461 "%e": "%-d", 462 } 463 464 FORMAT_MAPPING = { 465 "DD": "%d", 466 "MM": "%m", 467 "MON": "%b", 468 "MONTH": "%B", 469 "YYYY": "%Y", 470 "YY": "%y", 471 "HH": "%I", 472 "HH12": "%I", 473 "HH24": "%H", 474 "MI": "%M", 475 "SS": "%S", 476 "SSSSS": "%f", 477 "TZH": "%z", 478 } 479 480 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 481 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 482 # https://cloud.google.com/bigquery/docs/querying-wildcard-tables#scanning_a_range_of_tables_using_table_suffix 483 # https://cloud.google.com/bigquery/docs/query-cloud-storage-data#query_the_file_name_pseudo-column 484 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE", "_TABLE_SUFFIX", "_FILE_NAME"} 485 486 # All set operations require either a DISTINCT or ALL specifier 487 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 488 489 # https://cloud.google.com/bigquery/docs/reference/standard-sql/navigation_functions#percentile_cont 490 COERCES_TO = { 491 **TypeAnnotator.COERCES_TO, 492 exp.DataType.Type.BIGDECIMAL: {exp.DataType.Type.DOUBLE}, 493 } 494 COERCES_TO[exp.DataType.Type.DECIMAL] |= {exp.DataType.Type.BIGDECIMAL} 495 496 # BigQuery maps Type.TIMESTAMP to DATETIME, so we need to amend the inferred types 497 TYPE_TO_EXPRESSIONS = { 498 **Dialect.TYPE_TO_EXPRESSIONS, 499 exp.DataType.Type.TIMESTAMPTZ: Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.TIMESTAMP], 500 } 501 TYPE_TO_EXPRESSIONS.pop(exp.DataType.Type.TIMESTAMP) 502 503 ANNOTATORS = { 504 **Dialect.ANNOTATORS, 505 **{ 506 expr_type: annotate_with_type_lambda(data_type) 507 for data_type, expressions in TYPE_TO_EXPRESSIONS.items() 508 for expr_type in expressions 509 }, 510 **{ 511 expr_type: lambda self, e: _annotate_math_functions(self, e) 512 for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round) 513 }, 514 **{ 515 expr_type: lambda self, e: self._annotate_by_args(e, "this") 516 for expr_type in ( 517 exp.Left, 518 exp.Right, 519 exp.Lower, 520 exp.Upper, 521 exp.Pad, 522 exp.Trim, 523 exp.RegexpExtract, 524 exp.RegexpReplace, 525 exp.Repeat, 526 exp.Substring, 527 ) 528 }, 529 exp.ApproxTopSum: lambda self, e: _annotate_by_args_approx_top(self, e), 530 exp.ApproxTopK: lambda self, e: _annotate_by_args_approx_top(self, e), 531 exp.ApproxQuantiles: lambda self, e: self._annotate_by_args(e, "this", array=True), 532 exp.ArgMax: lambda self, e: self._annotate_by_args(e, "this"), 533 exp.ArgMin: lambda self, e: self._annotate_by_args(e, "this"), 534 exp.Array: _annotate_array, 535 exp.ArrayConcat: lambda self, e: self._annotate_by_args(e, "this", "expressions"), 536 exp.Ascii: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 537 exp.BitwiseAndAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 538 exp.BitwiseOrAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 539 exp.BitwiseXorAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 540 exp.BitwiseCountAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 541 exp.ByteLength: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 542 exp.ByteString: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY), 543 exp.CodePointsToBytes: lambda self, e: self._annotate_with_type( 544 e, exp.DataType.Type.BINARY 545 ), 546 exp.CodePointsToString: lambda self, e: self._annotate_with_type( 547 e, exp.DataType.Type.VARCHAR 548 ), 549 exp.Concat: _annotate_concat, 550 exp.Contains: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BOOLEAN), 551 exp.Corr: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 552 exp.CovarPop: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 553 exp.CovarSamp: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 554 exp.CumeDist: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 555 exp.DateFromUnixDate: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DATE), 556 exp.DateTrunc: lambda self, e: self._annotate_by_args(e, "this"), 557 exp.DenseRank: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 558 exp.FarmFingerprint: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 559 exp.FirstValue: lambda self, e: self._annotate_by_args(e, "this"), 560 exp.Unhex: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY), 561 exp.Float64: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 562 exp.Format: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR), 563 exp.GenerateTimestampArray: lambda self, e: self._annotate_with_type( 564 e, exp.DataType.build("ARRAY<TIMESTAMP>", dialect="bigquery") 565 ), 566 exp.Grouping: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 567 exp.IgnoreNulls: lambda self, e: self._annotate_by_args(e, "this"), 568 exp.JSONArray: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON), 569 exp.JSONArrayAppend: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON), 570 exp.JSONArrayInsert: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON), 571 exp.JSONBool: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BOOLEAN), 572 exp.JSONExtractScalar: lambda self, e: self._annotate_with_type( 573 e, exp.DataType.Type.VARCHAR 574 ), 575 exp.JSONExtract: lambda self, e: self._annotate_by_args(e, "this"), 576 exp.JSONExtractArray: lambda self, e: self._annotate_by_args(e, "this", array=True), 577 exp.JSONFormat: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR), 578 exp.JSONKeysAtDepth: lambda self, e: self._annotate_with_type( 579 e, exp.DataType.build("ARRAY<VARCHAR>", dialect="bigquery") 580 ), 581 exp.JSONObject: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON), 582 exp.JSONRemove: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON), 583 exp.JSONSet: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON), 584 exp.JSONStripNulls: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON), 585 exp.JSONType: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR), 586 exp.JSONValueArray: lambda self, e: self._annotate_with_type( 587 e, exp.DataType.build("ARRAY<VARCHAR>", dialect="bigquery") 588 ), 589 exp.Lag: lambda self, e: self._annotate_by_args(e, "this", "default"), 590 exp.Lead: lambda self, e: self._annotate_by_args(e, "this"), 591 exp.LowerHex: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR), 592 exp.LaxBool: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BOOLEAN), 593 exp.LaxFloat64: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 594 exp.LaxInt64: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 595 exp.LaxString: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR), 596 exp.MD5Digest: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY), 597 exp.Normalize: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR), 598 exp.NthValue: lambda self, e: self._annotate_by_args(e, "this"), 599 exp.Ntile: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 600 exp.ParseTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME), 601 exp.ParseDatetime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DATETIME), 602 exp.ParseBignumeric: lambda self, e: self._annotate_with_type( 603 e, exp.DataType.Type.BIGDECIMAL 604 ), 605 exp.ParseNumeric: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DECIMAL), 606 exp.PercentileCont: lambda self, e: _annotate_perncentile_cont(self, e), 607 exp.PercentileDisc: lambda self, e: self._annotate_by_args(e, "this"), 608 exp.PercentRank: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 609 exp.Rank: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 610 exp.RegexpExtractAll: lambda self, e: self._annotate_by_args(e, "this", array=True), 611 exp.RegexpInstr: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 612 exp.Replace: lambda self, e: self._annotate_by_args(e, "this"), 613 exp.RespectNulls: lambda self, e: self._annotate_by_args(e, "this"), 614 exp.Reverse: lambda self, e: self._annotate_by_args(e, "this"), 615 exp.RowNumber: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 616 exp.SafeConvertBytesToString: lambda self, e: self._annotate_with_type( 617 e, exp.DataType.Type.VARCHAR 618 ), 619 exp.Soundex: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR), 620 exp.SHA: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY), 621 exp.SHA2: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY), 622 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 623 exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True), 624 exp.TimestampFromParts: lambda self, e: self._annotate_with_type( 625 e, exp.DataType.Type.DATETIME 626 ), 627 exp.TimestampTrunc: lambda self, e: self._annotate_by_args(e, "this"), 628 exp.TimeFromParts: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME), 629 exp.TimeTrunc: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME), 630 exp.ToCodePoints: lambda self, e: self._annotate_with_type( 631 e, exp.DataType.build("ARRAY<BIGINT>", dialect="bigquery") 632 ), 633 exp.TsOrDsToTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME), 634 exp.Translate: lambda self, e: self._annotate_by_args(e, "this"), 635 exp.Unicode: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 636 } 637 638 def normalize_identifier(self, expression: E) -> E: 639 if ( 640 isinstance(expression, exp.Identifier) 641 and self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 642 ): 643 parent = expression.parent 644 while isinstance(parent, exp.Dot): 645 parent = parent.parent 646 647 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 648 # by default. The following check uses a heuristic to detect tables based on whether 649 # they are qualified. This should generally be correct, because tables in BigQuery 650 # must be qualified with at least a dataset, unless @@dataset_id is set. 651 case_sensitive = ( 652 isinstance(parent, exp.UserDefinedFunction) 653 or ( 654 isinstance(parent, exp.Table) 655 and parent.db 656 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 657 ) 658 or expression.meta.get("is_table") 659 ) 660 if not case_sensitive: 661 expression.set("this", expression.this.lower()) 662 663 return t.cast(E, expression) 664 665 return super().normalize_identifier(expression) 666 667 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 668 VAR_TOKENS = { 669 TokenType.DASH, 670 TokenType.VAR, 671 } 672 673 class Tokenizer(tokens.Tokenizer): 674 QUOTES = ["'", '"', '"""', "'''"] 675 COMMENTS = ["--", "#", ("/*", "*/")] 676 IDENTIFIERS = ["`"] 677 STRING_ESCAPES = ["\\"] 678 679 HEX_STRINGS = [("0x", ""), ("0X", "")] 680 681 BYTE_STRINGS = [ 682 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 683 ] 684 685 RAW_STRINGS = [ 686 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 687 ] 688 689 NESTED_COMMENTS = False 690 691 KEYWORDS = { 692 **tokens.Tokenizer.KEYWORDS, 693 "ANY TYPE": TokenType.VARIANT, 694 "BEGIN": TokenType.COMMAND, 695 "BEGIN TRANSACTION": TokenType.BEGIN, 696 "BYTEINT": TokenType.INT, 697 "BYTES": TokenType.BINARY, 698 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 699 "DATETIME": TokenType.TIMESTAMP, 700 "DECLARE": TokenType.DECLARE, 701 "ELSEIF": TokenType.COMMAND, 702 "EXCEPTION": TokenType.COMMAND, 703 "EXPORT": TokenType.EXPORT, 704 "FLOAT64": TokenType.DOUBLE, 705 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 706 "LOOP": TokenType.COMMAND, 707 "MODEL": TokenType.MODEL, 708 "NOT DETERMINISTIC": TokenType.VOLATILE, 709 "RECORD": TokenType.STRUCT, 710 "REPEAT": TokenType.COMMAND, 711 "TIMESTAMP": TokenType.TIMESTAMPTZ, 712 "WHILE": TokenType.COMMAND, 713 } 714 KEYWORDS.pop("DIV") 715 KEYWORDS.pop("VALUES") 716 KEYWORDS.pop("/*+") 717 718 class Parser(parser.Parser): 719 PREFIXED_PIVOT_COLUMNS = True 720 LOG_DEFAULTS_TO_LN = True 721 SUPPORTS_IMPLICIT_UNNEST = True 722 JOINS_HAVE_EQUAL_PRECEDENCE = True 723 724 # BigQuery does not allow ASC/DESC to be used as an identifier 725 ID_VAR_TOKENS = parser.Parser.ID_VAR_TOKENS - {TokenType.ASC, TokenType.DESC} 726 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 727 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 728 COMMENT_TABLE_ALIAS_TOKENS = parser.Parser.COMMENT_TABLE_ALIAS_TOKENS - { 729 TokenType.ASC, 730 TokenType.DESC, 731 } 732 UPDATE_ALIAS_TOKENS = parser.Parser.UPDATE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 733 734 FUNCTIONS = { 735 **parser.Parser.FUNCTIONS, 736 "APPROX_TOP_COUNT": exp.ApproxTopK.from_arg_list, 737 "BOOL": exp.JSONBool.from_arg_list, 738 "CONTAINS_SUBSTR": _build_contains_substring, 739 "DATE": _build_date, 740 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 741 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 742 "DATE_TRUNC": lambda args: exp.DateTrunc( 743 unit=seq_get(args, 1), 744 this=seq_get(args, 0), 745 zone=seq_get(args, 2), 746 ), 747 "DATETIME": _build_datetime, 748 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 749 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 750 "DIV": binary_from_function(exp.IntDiv), 751 "EDIT_DISTANCE": _build_levenshtein, 752 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 753 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 754 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 755 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 756 "JSON_EXTRACT_STRING_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 757 "JSON_KEYS": exp.JSONKeysAtDepth.from_arg_list, 758 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 759 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 760 "JSON_STRIP_NULLS": _build_json_strip_nulls, 761 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 762 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 763 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 764 "MD5": exp.MD5Digest.from_arg_list, 765 "NORMALIZE_AND_CASEFOLD": lambda args: exp.Normalize( 766 this=seq_get(args, 0), form=seq_get(args, 1), is_casefold=True 767 ), 768 "OCTET_LENGTH": exp.ByteLength.from_arg_list, 769 "TO_HEX": _build_to_hex, 770 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 771 [seq_get(args, 1), seq_get(args, 0)] 772 ), 773 "PARSE_TIME": lambda args: build_formatted_time(exp.ParseTime, "bigquery")( 774 [seq_get(args, 1), seq_get(args, 0)] 775 ), 776 "PARSE_TIMESTAMP": _build_parse_timestamp, 777 "PARSE_DATETIME": lambda args: build_formatted_time(exp.ParseDatetime, "bigquery")( 778 [seq_get(args, 1), seq_get(args, 0)] 779 ), 780 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 781 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 782 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 783 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 784 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 785 ), 786 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 787 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 788 "SPLIT": lambda args: exp.Split( 789 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 790 this=seq_get(args, 0), 791 expression=seq_get(args, 1) or exp.Literal.string(","), 792 ), 793 "STRPOS": exp.StrPosition.from_arg_list, 794 "TIME": _build_time, 795 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 796 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 797 "TIMESTAMP": _build_timestamp, 798 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 799 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 800 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 801 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 802 ), 803 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 804 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 805 ), 806 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 807 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 808 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 809 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 810 "FORMAT_TIME": _build_format_time(exp.TsOrDsToTime), 811 "FROM_HEX": exp.Unhex.from_arg_list, 812 "WEEK": lambda args: exp.WeekStart(this=exp.var(seq_get(args, 0))), 813 } 814 815 FUNCTION_PARSERS = { 816 **parser.Parser.FUNCTION_PARSERS, 817 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 818 "JSON_ARRAY": lambda self: self.expression( 819 exp.JSONArray, expressions=self._parse_csv(self._parse_bitwise) 820 ), 821 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 822 "PREDICT": lambda self: self._parse_predict(), 823 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 824 "GENERATE_EMBEDDING": lambda self: self._parse_generate_embedding(), 825 "VECTOR_SEARCH": lambda self: self._parse_vector_search(), 826 } 827 FUNCTION_PARSERS.pop("TRIM") 828 829 NO_PAREN_FUNCTIONS = { 830 **parser.Parser.NO_PAREN_FUNCTIONS, 831 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 832 } 833 834 NESTED_TYPE_TOKENS = { 835 *parser.Parser.NESTED_TYPE_TOKENS, 836 TokenType.TABLE, 837 } 838 839 PROPERTY_PARSERS = { 840 **parser.Parser.PROPERTY_PARSERS, 841 "NOT DETERMINISTIC": lambda self: self.expression( 842 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 843 ), 844 "OPTIONS": lambda self: self._parse_with_property(), 845 } 846 847 CONSTRAINT_PARSERS = { 848 **parser.Parser.CONSTRAINT_PARSERS, 849 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 850 } 851 852 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 853 RANGE_PARSERS.pop(TokenType.OVERLAPS) 854 855 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 856 857 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 858 859 STATEMENT_PARSERS = { 860 **parser.Parser.STATEMENT_PARSERS, 861 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 862 TokenType.END: lambda self: self._parse_as_command(self._prev), 863 TokenType.FOR: lambda self: self._parse_for_in(), 864 TokenType.EXPORT: lambda self: self._parse_export_data(), 865 TokenType.DECLARE: lambda self: self._parse_declare(), 866 } 867 868 BRACKET_OFFSETS = { 869 "OFFSET": (0, False), 870 "ORDINAL": (1, False), 871 "SAFE_OFFSET": (0, True), 872 "SAFE_ORDINAL": (1, True), 873 } 874 875 def _parse_for_in(self) -> t.Union[exp.ForIn, exp.Command]: 876 index = self._index 877 this = self._parse_range() 878 self._match_text_seq("DO") 879 if self._match(TokenType.COMMAND): 880 self._retreat(index) 881 return self._parse_as_command(self._prev) 882 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 883 884 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 885 this = super()._parse_table_part(schema=schema) or self._parse_number() 886 887 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 888 if isinstance(this, exp.Identifier): 889 table_name = this.name 890 while self._match(TokenType.DASH, advance=False) and self._next: 891 start = self._curr 892 while self._is_connected() and not self._match_set( 893 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 894 ): 895 self._advance() 896 897 if start == self._curr: 898 break 899 900 table_name += self._find_sql(start, self._prev) 901 902 this = exp.Identifier( 903 this=table_name, quoted=this.args.get("quoted") 904 ).update_positions(this) 905 elif isinstance(this, exp.Literal): 906 table_name = this.name 907 908 if self._is_connected() and self._parse_var(any_token=True): 909 table_name += self._prev.text 910 911 this = exp.Identifier(this=table_name, quoted=True).update_positions(this) 912 913 return this 914 915 def _parse_table_parts( 916 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 917 ) -> exp.Table: 918 table = super()._parse_table_parts( 919 schema=schema, is_db_reference=is_db_reference, wildcard=True 920 ) 921 922 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 923 if not table.catalog: 924 if table.db: 925 previous_db = table.args["db"] 926 parts = table.db.split(".") 927 if len(parts) == 2 and not table.args["db"].quoted: 928 table.set( 929 "catalog", exp.Identifier(this=parts[0]).update_positions(previous_db) 930 ) 931 table.set("db", exp.Identifier(this=parts[1]).update_positions(previous_db)) 932 else: 933 previous_this = table.this 934 parts = table.name.split(".") 935 if len(parts) == 2 and not table.this.quoted: 936 table.set( 937 "db", exp.Identifier(this=parts[0]).update_positions(previous_this) 938 ) 939 table.set( 940 "this", exp.Identifier(this=parts[1]).update_positions(previous_this) 941 ) 942 943 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 944 alias = table.this 945 catalog, db, this, *rest = ( 946 exp.to_identifier(p, quoted=True) 947 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 948 ) 949 950 for part in (catalog, db, this): 951 if part: 952 part.update_positions(table.this) 953 954 if rest and this: 955 this = exp.Dot.build([this, *rest]) # type: ignore 956 957 table = exp.Table( 958 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 959 ) 960 table.meta["quoted_table"] = True 961 else: 962 alias = None 963 964 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 965 # dataset, so if the project identifier is omitted we need to fix the ast so that 966 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 967 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 968 # views, because it would seem like the "catalog" part is set, when it'd actually 969 # be the region/dataset. Merging the two identifiers into a single one is done to 970 # avoid producing a 4-part Table reference, which would cause issues in the schema 971 # module, when there are 3-part table names mixed with information schema views. 972 # 973 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 974 table_parts = table.parts 975 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 976 # We need to alias the table here to avoid breaking existing qualified columns. 977 # This is expected to be safe, because if there's an actual alias coming up in 978 # the token stream, it will overwrite this one. If there isn't one, we are only 979 # exposing the name that can be used to reference the view explicitly (a no-op). 980 exp.alias_( 981 table, 982 t.cast(exp.Identifier, alias or table_parts[-1]), 983 table=True, 984 copy=False, 985 ) 986 987 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 988 new_this = exp.Identifier(this=info_schema_view, quoted=True).update_positions( 989 line=table_parts[-2].meta.get("line"), 990 col=table_parts[-1].meta.get("col"), 991 start=table_parts[-2].meta.get("start"), 992 end=table_parts[-1].meta.get("end"), 993 ) 994 table.set("this", new_this) 995 table.set("db", seq_get(table_parts, -3)) 996 table.set("catalog", seq_get(table_parts, -4)) 997 998 return table 999 1000 def _parse_column(self) -> t.Optional[exp.Expression]: 1001 column = super()._parse_column() 1002 if isinstance(column, exp.Column): 1003 parts = column.parts 1004 if any("." in p.name for p in parts): 1005 catalog, db, table, this, *rest = ( 1006 exp.to_identifier(p, quoted=True) 1007 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 1008 ) 1009 1010 if rest and this: 1011 this = exp.Dot.build([this, *rest]) # type: ignore 1012 1013 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 1014 column.meta["quoted_column"] = True 1015 1016 return column 1017 1018 @t.overload 1019 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 1020 1021 @t.overload 1022 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 1023 1024 def _parse_json_object(self, agg=False): 1025 json_object = super()._parse_json_object() 1026 array_kv_pair = seq_get(json_object.expressions, 0) 1027 1028 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 1029 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 1030 if ( 1031 array_kv_pair 1032 and isinstance(array_kv_pair.this, exp.Array) 1033 and isinstance(array_kv_pair.expression, exp.Array) 1034 ): 1035 keys = array_kv_pair.this.expressions 1036 values = array_kv_pair.expression.expressions 1037 1038 json_object.set( 1039 "expressions", 1040 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 1041 ) 1042 1043 return json_object 1044 1045 def _parse_bracket( 1046 self, this: t.Optional[exp.Expression] = None 1047 ) -> t.Optional[exp.Expression]: 1048 bracket = super()._parse_bracket(this) 1049 1050 if this is bracket: 1051 return bracket 1052 1053 if isinstance(bracket, exp.Bracket): 1054 for expression in bracket.expressions: 1055 name = expression.name.upper() 1056 1057 if name not in self.BRACKET_OFFSETS: 1058 break 1059 1060 offset, safe = self.BRACKET_OFFSETS[name] 1061 bracket.set("offset", offset) 1062 bracket.set("safe", safe) 1063 expression.replace(expression.expressions[0]) 1064 1065 return bracket 1066 1067 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 1068 unnest = super()._parse_unnest(with_alias=with_alias) 1069 1070 if not unnest: 1071 return None 1072 1073 unnest_expr = seq_get(unnest.expressions, 0) 1074 if unnest_expr: 1075 from sqlglot.optimizer.annotate_types import annotate_types 1076 1077 unnest_expr = annotate_types(unnest_expr, dialect=self.dialect) 1078 1079 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 1080 # in contrast to other dialects such as DuckDB which flattens only the array by default 1081 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 1082 array_elem.is_type(exp.DataType.Type.STRUCT) 1083 for array_elem in unnest_expr._type.expressions 1084 ): 1085 unnest.set("explode_array", True) 1086 1087 return unnest 1088 1089 def _parse_make_interval(self) -> exp.MakeInterval: 1090 expr = exp.MakeInterval() 1091 1092 for arg_key in expr.arg_types: 1093 value = self._parse_lambda() 1094 1095 if not value: 1096 break 1097 1098 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 1099 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 1100 if isinstance(value, exp.Kwarg): 1101 arg_key = value.this.name 1102 1103 expr.set(arg_key, value) 1104 1105 self._match(TokenType.COMMA) 1106 1107 return expr 1108 1109 def _parse_predict(self) -> exp.Predict: 1110 self._match_text_seq("MODEL") 1111 this = self._parse_table() 1112 1113 self._match(TokenType.COMMA) 1114 self._match_text_seq("TABLE") 1115 1116 return self.expression( 1117 exp.Predict, 1118 this=this, 1119 expression=self._parse_table(), 1120 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 1121 ) 1122 1123 def _parse_generate_embedding(self) -> exp.GenerateEmbedding: 1124 self._match_text_seq("MODEL") 1125 this = self._parse_table() 1126 1127 self._match(TokenType.COMMA) 1128 self._match_text_seq("TABLE") 1129 1130 return self.expression( 1131 exp.GenerateEmbedding, 1132 this=this, 1133 expression=self._parse_table(), 1134 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 1135 ) 1136 1137 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 1138 self._match(TokenType.TABLE) 1139 this = self._parse_table() 1140 1141 expr = self.expression(exp.FeaturesAtTime, this=this) 1142 1143 while self._match(TokenType.COMMA): 1144 arg = self._parse_lambda() 1145 1146 # Get the LHS of the Kwarg and set the arg to that value, e.g 1147 # "num_rows => 1" sets the expr's `num_rows` arg 1148 if arg: 1149 expr.set(arg.this.name, arg) 1150 1151 return expr 1152 1153 def _parse_vector_search(self) -> exp.VectorSearch: 1154 self._match(TokenType.TABLE) 1155 base_table = self._parse_table() 1156 1157 self._match(TokenType.COMMA) 1158 1159 column_to_search = self._parse_bitwise() 1160 self._match(TokenType.COMMA) 1161 1162 self._match(TokenType.TABLE) 1163 query_table = self._parse_table() 1164 1165 expr = self.expression( 1166 exp.VectorSearch, 1167 this=base_table, 1168 column_to_search=column_to_search, 1169 query_table=query_table, 1170 ) 1171 1172 while self._match(TokenType.COMMA): 1173 # query_column_to_search can be named argument or positional 1174 if self._match(TokenType.STRING, advance=False): 1175 query_column = self._parse_string() 1176 expr.set("query_column_to_search", query_column) 1177 else: 1178 arg = self._parse_lambda() 1179 if arg: 1180 expr.set(arg.this.name, arg) 1181 1182 return expr 1183 1184 def _parse_export_data(self) -> exp.Export: 1185 self._match_text_seq("DATA") 1186 1187 return self.expression( 1188 exp.Export, 1189 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 1190 options=self._parse_properties(), 1191 this=self._match_text_seq("AS") and self._parse_select(), 1192 ) 1193 1194 class Generator(generator.Generator): 1195 INTERVAL_ALLOWS_PLURAL_FORM = False 1196 JOIN_HINTS = False 1197 QUERY_HINTS = False 1198 TABLE_HINTS = False 1199 LIMIT_FETCH = "LIMIT" 1200 RENAME_TABLE_WITH_DB = False 1201 NVL2_SUPPORTED = False 1202 UNNEST_WITH_ORDINALITY = False 1203 COLLATE_IS_FUNC = True 1204 LIMIT_ONLY_LITERALS = True 1205 SUPPORTS_TABLE_ALIAS_COLUMNS = False 1206 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 1207 JSON_KEY_VALUE_PAIR_SEP = "," 1208 NULL_ORDERING_SUPPORTED = False 1209 IGNORE_NULLS_IN_FUNC = True 1210 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 1211 CAN_IMPLEMENT_ARRAY_ANY = True 1212 SUPPORTS_TO_NUMBER = False 1213 NAMED_PLACEHOLDER_TOKEN = "@" 1214 HEX_FUNC = "TO_HEX" 1215 WITH_PROPERTIES_PREFIX = "OPTIONS" 1216 SUPPORTS_EXPLODING_PROJECTIONS = False 1217 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1218 SUPPORTS_UNIX_SECONDS = True 1219 1220 SAFE_JSON_PATH_KEY_RE = re.compile(r"^[_\-a-zA-Z][\-\w]*$") 1221 1222 TS_OR_DS_TYPES = ( 1223 exp.TsOrDsToDatetime, 1224 exp.TsOrDsToTimestamp, 1225 exp.TsOrDsToTime, 1226 exp.TsOrDsToDate, 1227 ) 1228 1229 TRANSFORMS = { 1230 **generator.Generator.TRANSFORMS, 1231 exp.ApproxTopK: rename_func("APPROX_TOP_COUNT"), 1232 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1233 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 1234 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 1235 exp.Array: inline_array_unless_query, 1236 exp.ArrayContains: _array_contains_sql, 1237 exp.ArrayFilter: filter_array_using_unnest, 1238 exp.ArrayRemove: filter_array_using_unnest, 1239 exp.ByteLength: rename_func("BYTE_LENGTH"), 1240 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 1241 exp.CollateProperty: lambda self, e: ( 1242 f"DEFAULT COLLATE {self.sql(e, 'this')}" 1243 if e.args.get("default") 1244 else f"COLLATE {self.sql(e, 'this')}" 1245 ), 1246 exp.Commit: lambda *_: "COMMIT TRANSACTION", 1247 exp.CountIf: rename_func("COUNTIF"), 1248 exp.Create: _create_sql, 1249 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 1250 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 1251 exp.DateDiff: lambda self, e: self.func( 1252 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 1253 ), 1254 exp.DateFromParts: rename_func("DATE"), 1255 exp.DateStrToDate: datestrtodate_sql, 1256 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 1257 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 1258 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 1259 exp.DateFromUnixDate: rename_func("DATE_FROM_UNIX_DATE"), 1260 exp.FromTimeZone: lambda self, e: self.func( 1261 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 1262 ), 1263 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 1264 exp.GroupConcat: lambda self, e: groupconcat_sql( 1265 self, e, func_name="STRING_AGG", within_group=False 1266 ), 1267 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 1268 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 1269 exp.If: if_sql(false_value="NULL"), 1270 exp.ILike: no_ilike_sql, 1271 exp.IntDiv: rename_func("DIV"), 1272 exp.Int64: rename_func("INT64"), 1273 exp.JSONBool: rename_func("BOOL"), 1274 exp.JSONExtract: _json_extract_sql, 1275 exp.JSONExtractArray: _json_extract_sql, 1276 exp.JSONExtractScalar: _json_extract_sql, 1277 exp.JSONFormat: rename_func("TO_JSON_STRING"), 1278 exp.JSONKeysAtDepth: rename_func("JSON_KEYS"), 1279 exp.JSONValueArray: rename_func("JSON_VALUE_ARRAY"), 1280 exp.Levenshtein: _levenshtein_sql, 1281 exp.Max: max_or_greatest, 1282 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 1283 exp.MD5Digest: rename_func("MD5"), 1284 exp.Min: min_or_least, 1285 exp.Normalize: lambda self, e: self.func( 1286 "NORMALIZE_AND_CASEFOLD" if e.args.get("is_casefold") else "NORMALIZE", 1287 e.this, 1288 e.args.get("form"), 1289 ), 1290 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1291 exp.RegexpExtract: lambda self, e: self.func( 1292 "REGEXP_EXTRACT", 1293 e.this, 1294 e.expression, 1295 e.args.get("position"), 1296 e.args.get("occurrence"), 1297 ), 1298 exp.RegexpExtractAll: lambda self, e: self.func( 1299 "REGEXP_EXTRACT_ALL", e.this, e.expression 1300 ), 1301 exp.RegexpReplace: regexp_replace_sql, 1302 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 1303 exp.ReturnsProperty: _returnsproperty_sql, 1304 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 1305 exp.ParseTime: lambda self, e: self.func("PARSE_TIME", self.format_time(e), e.this), 1306 exp.ParseDatetime: lambda self, e: self.func( 1307 "PARSE_DATETIME", self.format_time(e), e.this 1308 ), 1309 exp.Select: transforms.preprocess( 1310 [ 1311 transforms.explode_projection_to_unnest(), 1312 transforms.unqualify_unnest, 1313 transforms.eliminate_distinct_on, 1314 _alias_ordered_group, 1315 transforms.eliminate_semi_and_anti_joins, 1316 ] 1317 ), 1318 exp.SHA: rename_func("SHA1"), 1319 exp.SHA2: sha256_sql, 1320 exp.StabilityProperty: lambda self, e: ( 1321 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 1322 ), 1323 exp.String: rename_func("STRING"), 1324 exp.StrPosition: lambda self, e: ( 1325 strposition_sql( 1326 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 1327 ) 1328 ), 1329 exp.StrToDate: _str_to_datetime_sql, 1330 exp.StrToTime: _str_to_datetime_sql, 1331 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 1332 exp.TimeFromParts: rename_func("TIME"), 1333 exp.TimestampFromParts: rename_func("DATETIME"), 1334 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 1335 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 1336 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 1337 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 1338 exp.TimeStrToTime: timestrtotime_sql, 1339 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 1340 exp.TsOrDsAdd: _ts_or_ds_add_sql, 1341 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 1342 exp.TsOrDsToTime: rename_func("TIME"), 1343 exp.TsOrDsToDatetime: rename_func("DATETIME"), 1344 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 1345 exp.Unhex: rename_func("FROM_HEX"), 1346 exp.UnixDate: rename_func("UNIX_DATE"), 1347 exp.UnixToTime: _unix_to_time_sql, 1348 exp.Uuid: lambda *_: "GENERATE_UUID()", 1349 exp.Values: _derived_table_values_to_unnest, 1350 exp.VariancePop: rename_func("VAR_POP"), 1351 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 1352 } 1353 1354 SUPPORTED_JSON_PATH_PARTS = { 1355 exp.JSONPathKey, 1356 exp.JSONPathRoot, 1357 exp.JSONPathSubscript, 1358 } 1359 1360 TYPE_MAPPING = { 1361 **generator.Generator.TYPE_MAPPING, 1362 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 1363 exp.DataType.Type.BIGINT: "INT64", 1364 exp.DataType.Type.BINARY: "BYTES", 1365 exp.DataType.Type.BLOB: "BYTES", 1366 exp.DataType.Type.BOOLEAN: "BOOL", 1367 exp.DataType.Type.CHAR: "STRING", 1368 exp.DataType.Type.DECIMAL: "NUMERIC", 1369 exp.DataType.Type.DOUBLE: "FLOAT64", 1370 exp.DataType.Type.FLOAT: "FLOAT64", 1371 exp.DataType.Type.INT: "INT64", 1372 exp.DataType.Type.NCHAR: "STRING", 1373 exp.DataType.Type.NVARCHAR: "STRING", 1374 exp.DataType.Type.SMALLINT: "INT64", 1375 exp.DataType.Type.TEXT: "STRING", 1376 exp.DataType.Type.TIMESTAMP: "DATETIME", 1377 exp.DataType.Type.TIMESTAMPNTZ: "DATETIME", 1378 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1379 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1380 exp.DataType.Type.TINYINT: "INT64", 1381 exp.DataType.Type.ROWVERSION: "BYTES", 1382 exp.DataType.Type.UUID: "STRING", 1383 exp.DataType.Type.VARBINARY: "BYTES", 1384 exp.DataType.Type.VARCHAR: "STRING", 1385 exp.DataType.Type.VARIANT: "ANY TYPE", 1386 } 1387 1388 PROPERTIES_LOCATION = { 1389 **generator.Generator.PROPERTIES_LOCATION, 1390 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1391 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1392 } 1393 1394 # WINDOW comes after QUALIFY 1395 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1396 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1397 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1398 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1399 } 1400 1401 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1402 RESERVED_KEYWORDS = { 1403 "all", 1404 "and", 1405 "any", 1406 "array", 1407 "as", 1408 "asc", 1409 "assert_rows_modified", 1410 "at", 1411 "between", 1412 "by", 1413 "case", 1414 "cast", 1415 "collate", 1416 "contains", 1417 "create", 1418 "cross", 1419 "cube", 1420 "current", 1421 "default", 1422 "define", 1423 "desc", 1424 "distinct", 1425 "else", 1426 "end", 1427 "enum", 1428 "escape", 1429 "except", 1430 "exclude", 1431 "exists", 1432 "extract", 1433 "false", 1434 "fetch", 1435 "following", 1436 "for", 1437 "from", 1438 "full", 1439 "group", 1440 "grouping", 1441 "groups", 1442 "hash", 1443 "having", 1444 "if", 1445 "ignore", 1446 "in", 1447 "inner", 1448 "intersect", 1449 "interval", 1450 "into", 1451 "is", 1452 "join", 1453 "lateral", 1454 "left", 1455 "like", 1456 "limit", 1457 "lookup", 1458 "merge", 1459 "natural", 1460 "new", 1461 "no", 1462 "not", 1463 "null", 1464 "nulls", 1465 "of", 1466 "on", 1467 "or", 1468 "order", 1469 "outer", 1470 "over", 1471 "partition", 1472 "preceding", 1473 "proto", 1474 "qualify", 1475 "range", 1476 "recursive", 1477 "respect", 1478 "right", 1479 "rollup", 1480 "rows", 1481 "select", 1482 "set", 1483 "some", 1484 "struct", 1485 "tablesample", 1486 "then", 1487 "to", 1488 "treat", 1489 "true", 1490 "unbounded", 1491 "union", 1492 "unnest", 1493 "using", 1494 "when", 1495 "where", 1496 "window", 1497 "with", 1498 "within", 1499 } 1500 1501 def datetrunc_sql(self, expression: exp.DateTrunc) -> str: 1502 unit = expression.unit 1503 unit_sql = unit.name if unit.is_string else self.sql(unit) 1504 return self.func("DATE_TRUNC", expression.this, unit_sql, expression.args.get("zone")) 1505 1506 def mod_sql(self, expression: exp.Mod) -> str: 1507 this = expression.this 1508 expr = expression.expression 1509 return self.func( 1510 "MOD", 1511 this.unnest() if isinstance(this, exp.Paren) else this, 1512 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1513 ) 1514 1515 def column_parts(self, expression: exp.Column) -> str: 1516 if expression.meta.get("quoted_column"): 1517 # If a column reference is of the form `dataset.table`.name, we need 1518 # to preserve the quoted table path, otherwise the reference breaks 1519 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1520 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1521 return f"{table_path}.{self.sql(expression, 'this')}" 1522 1523 return super().column_parts(expression) 1524 1525 def table_parts(self, expression: exp.Table) -> str: 1526 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1527 # we need to make sure the correct quoting is used in each case. 1528 # 1529 # For example, if there is a CTE x that clashes with a schema name, then the former will 1530 # return the table y in that schema, whereas the latter will return the CTE's y column: 1531 # 1532 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1533 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1534 if expression.meta.get("quoted_table"): 1535 table_parts = ".".join(p.name for p in expression.parts) 1536 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1537 1538 return super().table_parts(expression) 1539 1540 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1541 this = expression.this 1542 if isinstance(this, exp.TsOrDsToDatetime): 1543 func_name = "FORMAT_DATETIME" 1544 elif isinstance(this, exp.TsOrDsToTimestamp): 1545 func_name = "FORMAT_TIMESTAMP" 1546 elif isinstance(this, exp.TsOrDsToTime): 1547 func_name = "FORMAT_TIME" 1548 else: 1549 func_name = "FORMAT_DATE" 1550 1551 time_expr = this if isinstance(this, self.TS_OR_DS_TYPES) else expression 1552 return self.func( 1553 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1554 ) 1555 1556 def eq_sql(self, expression: exp.EQ) -> str: 1557 # Operands of = cannot be NULL in BigQuery 1558 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1559 if not isinstance(expression.parent, exp.Update): 1560 return "NULL" 1561 1562 return self.binary(expression, "=") 1563 1564 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1565 parent = expression.parent 1566 1567 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1568 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1569 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1570 return self.func( 1571 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1572 ) 1573 1574 return super().attimezone_sql(expression) 1575 1576 def trycast_sql(self, expression: exp.TryCast) -> str: 1577 return self.cast_sql(expression, safe_prefix="SAFE_") 1578 1579 def bracket_sql(self, expression: exp.Bracket) -> str: 1580 this = expression.this 1581 expressions = expression.expressions 1582 1583 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1584 arg = expressions[0] 1585 if arg.type is None: 1586 from sqlglot.optimizer.annotate_types import annotate_types 1587 1588 arg = annotate_types(arg, dialect=self.dialect) 1589 1590 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1591 # BQ doesn't support bracket syntax with string values for structs 1592 return f"{self.sql(this)}.{arg.name}" 1593 1594 expressions_sql = self.expressions(expression, flat=True) 1595 offset = expression.args.get("offset") 1596 1597 if offset == 0: 1598 expressions_sql = f"OFFSET({expressions_sql})" 1599 elif offset == 1: 1600 expressions_sql = f"ORDINAL({expressions_sql})" 1601 elif offset is not None: 1602 self.unsupported(f"Unsupported array offset: {offset}") 1603 1604 if expression.args.get("safe"): 1605 expressions_sql = f"SAFE_{expressions_sql}" 1606 1607 return f"{self.sql(this)}[{expressions_sql}]" 1608 1609 def in_unnest_op(self, expression: exp.Unnest) -> str: 1610 return self.sql(expression) 1611 1612 def version_sql(self, expression: exp.Version) -> str: 1613 if expression.name == "TIMESTAMP": 1614 expression.set("this", "SYSTEM_TIME") 1615 return super().version_sql(expression) 1616 1617 def contains_sql(self, expression: exp.Contains) -> str: 1618 this = expression.this 1619 expr = expression.expression 1620 1621 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1622 this = this.this 1623 expr = expr.this 1624 1625 return self.func("CONTAINS_SUBSTR", this, expr, expression.args.get("json_scope")) 1626 1627 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1628 this = expression.this 1629 1630 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1631 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1632 # because they aren't literals and so the above syntax is invalid BigQuery. 1633 if isinstance(this, exp.Array): 1634 elem = seq_get(this.expressions, 0) 1635 if not (elem and elem.find(exp.Query)): 1636 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1637 1638 return super().cast_sql(expression, safe_prefix=safe_prefix) 1639 1640 def declareitem_sql(self, expression: exp.DeclareItem) -> str: 1641 variables = self.expressions(expression, "this") 1642 default = self.sql(expression, "default") 1643 default = f" DEFAULT {default}" if default else "" 1644 kind = self.sql(expression, "kind") 1645 kind = f" {kind}" if kind else "" 1646 1647 return f"{variables}{kind}{default}"
441class BigQuery(Dialect): 442 WEEK_OFFSET = -1 443 UNNEST_COLUMN_ONLY = True 444 SUPPORTS_USER_DEFINED_TYPES = False 445 SUPPORTS_SEMI_ANTI_JOIN = False 446 LOG_BASE_FIRST = False 447 HEX_LOWERCASE = True 448 FORCE_EARLY_ALIAS_REF_EXPANSION = True 449 PRESERVE_ORIGINAL_NAMES = True 450 HEX_STRING_IS_INTEGER_TYPE = True 451 452 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 453 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 454 455 # bigquery udfs are case sensitive 456 NORMALIZE_FUNCTIONS = False 457 458 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 459 TIME_MAPPING = { 460 "%D": "%m/%d/%y", 461 "%E6S": "%S.%f", 462 "%e": "%-d", 463 } 464 465 FORMAT_MAPPING = { 466 "DD": "%d", 467 "MM": "%m", 468 "MON": "%b", 469 "MONTH": "%B", 470 "YYYY": "%Y", 471 "YY": "%y", 472 "HH": "%I", 473 "HH12": "%I", 474 "HH24": "%H", 475 "MI": "%M", 476 "SS": "%S", 477 "SSSSS": "%f", 478 "TZH": "%z", 479 } 480 481 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 482 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 483 # https://cloud.google.com/bigquery/docs/querying-wildcard-tables#scanning_a_range_of_tables_using_table_suffix 484 # https://cloud.google.com/bigquery/docs/query-cloud-storage-data#query_the_file_name_pseudo-column 485 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE", "_TABLE_SUFFIX", "_FILE_NAME"} 486 487 # All set operations require either a DISTINCT or ALL specifier 488 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 489 490 # https://cloud.google.com/bigquery/docs/reference/standard-sql/navigation_functions#percentile_cont 491 COERCES_TO = { 492 **TypeAnnotator.COERCES_TO, 493 exp.DataType.Type.BIGDECIMAL: {exp.DataType.Type.DOUBLE}, 494 } 495 COERCES_TO[exp.DataType.Type.DECIMAL] |= {exp.DataType.Type.BIGDECIMAL} 496 497 # BigQuery maps Type.TIMESTAMP to DATETIME, so we need to amend the inferred types 498 TYPE_TO_EXPRESSIONS = { 499 **Dialect.TYPE_TO_EXPRESSIONS, 500 exp.DataType.Type.TIMESTAMPTZ: Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.TIMESTAMP], 501 } 502 TYPE_TO_EXPRESSIONS.pop(exp.DataType.Type.TIMESTAMP) 503 504 ANNOTATORS = { 505 **Dialect.ANNOTATORS, 506 **{ 507 expr_type: annotate_with_type_lambda(data_type) 508 for data_type, expressions in TYPE_TO_EXPRESSIONS.items() 509 for expr_type in expressions 510 }, 511 **{ 512 expr_type: lambda self, e: _annotate_math_functions(self, e) 513 for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round) 514 }, 515 **{ 516 expr_type: lambda self, e: self._annotate_by_args(e, "this") 517 for expr_type in ( 518 exp.Left, 519 exp.Right, 520 exp.Lower, 521 exp.Upper, 522 exp.Pad, 523 exp.Trim, 524 exp.RegexpExtract, 525 exp.RegexpReplace, 526 exp.Repeat, 527 exp.Substring, 528 ) 529 }, 530 exp.ApproxTopSum: lambda self, e: _annotate_by_args_approx_top(self, e), 531 exp.ApproxTopK: lambda self, e: _annotate_by_args_approx_top(self, e), 532 exp.ApproxQuantiles: lambda self, e: self._annotate_by_args(e, "this", array=True), 533 exp.ArgMax: lambda self, e: self._annotate_by_args(e, "this"), 534 exp.ArgMin: lambda self, e: self._annotate_by_args(e, "this"), 535 exp.Array: _annotate_array, 536 exp.ArrayConcat: lambda self, e: self._annotate_by_args(e, "this", "expressions"), 537 exp.Ascii: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 538 exp.BitwiseAndAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 539 exp.BitwiseOrAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 540 exp.BitwiseXorAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 541 exp.BitwiseCountAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 542 exp.ByteLength: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 543 exp.ByteString: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY), 544 exp.CodePointsToBytes: lambda self, e: self._annotate_with_type( 545 e, exp.DataType.Type.BINARY 546 ), 547 exp.CodePointsToString: lambda self, e: self._annotate_with_type( 548 e, exp.DataType.Type.VARCHAR 549 ), 550 exp.Concat: _annotate_concat, 551 exp.Contains: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BOOLEAN), 552 exp.Corr: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 553 exp.CovarPop: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 554 exp.CovarSamp: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 555 exp.CumeDist: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 556 exp.DateFromUnixDate: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DATE), 557 exp.DateTrunc: lambda self, e: self._annotate_by_args(e, "this"), 558 exp.DenseRank: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 559 exp.FarmFingerprint: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 560 exp.FirstValue: lambda self, e: self._annotate_by_args(e, "this"), 561 exp.Unhex: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY), 562 exp.Float64: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 563 exp.Format: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR), 564 exp.GenerateTimestampArray: lambda self, e: self._annotate_with_type( 565 e, exp.DataType.build("ARRAY<TIMESTAMP>", dialect="bigquery") 566 ), 567 exp.Grouping: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 568 exp.IgnoreNulls: lambda self, e: self._annotate_by_args(e, "this"), 569 exp.JSONArray: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON), 570 exp.JSONArrayAppend: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON), 571 exp.JSONArrayInsert: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON), 572 exp.JSONBool: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BOOLEAN), 573 exp.JSONExtractScalar: lambda self, e: self._annotate_with_type( 574 e, exp.DataType.Type.VARCHAR 575 ), 576 exp.JSONExtract: lambda self, e: self._annotate_by_args(e, "this"), 577 exp.JSONExtractArray: lambda self, e: self._annotate_by_args(e, "this", array=True), 578 exp.JSONFormat: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR), 579 exp.JSONKeysAtDepth: lambda self, e: self._annotate_with_type( 580 e, exp.DataType.build("ARRAY<VARCHAR>", dialect="bigquery") 581 ), 582 exp.JSONObject: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON), 583 exp.JSONRemove: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON), 584 exp.JSONSet: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON), 585 exp.JSONStripNulls: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON), 586 exp.JSONType: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR), 587 exp.JSONValueArray: lambda self, e: self._annotate_with_type( 588 e, exp.DataType.build("ARRAY<VARCHAR>", dialect="bigquery") 589 ), 590 exp.Lag: lambda self, e: self._annotate_by_args(e, "this", "default"), 591 exp.Lead: lambda self, e: self._annotate_by_args(e, "this"), 592 exp.LowerHex: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR), 593 exp.LaxBool: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BOOLEAN), 594 exp.LaxFloat64: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 595 exp.LaxInt64: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 596 exp.LaxString: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR), 597 exp.MD5Digest: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY), 598 exp.Normalize: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR), 599 exp.NthValue: lambda self, e: self._annotate_by_args(e, "this"), 600 exp.Ntile: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 601 exp.ParseTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME), 602 exp.ParseDatetime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DATETIME), 603 exp.ParseBignumeric: lambda self, e: self._annotate_with_type( 604 e, exp.DataType.Type.BIGDECIMAL 605 ), 606 exp.ParseNumeric: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DECIMAL), 607 exp.PercentileCont: lambda self, e: _annotate_perncentile_cont(self, e), 608 exp.PercentileDisc: lambda self, e: self._annotate_by_args(e, "this"), 609 exp.PercentRank: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 610 exp.Rank: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 611 exp.RegexpExtractAll: lambda self, e: self._annotate_by_args(e, "this", array=True), 612 exp.RegexpInstr: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 613 exp.Replace: lambda self, e: self._annotate_by_args(e, "this"), 614 exp.RespectNulls: lambda self, e: self._annotate_by_args(e, "this"), 615 exp.Reverse: lambda self, e: self._annotate_by_args(e, "this"), 616 exp.RowNumber: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 617 exp.SafeConvertBytesToString: lambda self, e: self._annotate_with_type( 618 e, exp.DataType.Type.VARCHAR 619 ), 620 exp.Soundex: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR), 621 exp.SHA: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY), 622 exp.SHA2: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY), 623 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 624 exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True), 625 exp.TimestampFromParts: lambda self, e: self._annotate_with_type( 626 e, exp.DataType.Type.DATETIME 627 ), 628 exp.TimestampTrunc: lambda self, e: self._annotate_by_args(e, "this"), 629 exp.TimeFromParts: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME), 630 exp.TimeTrunc: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME), 631 exp.ToCodePoints: lambda self, e: self._annotate_with_type( 632 e, exp.DataType.build("ARRAY<BIGINT>", dialect="bigquery") 633 ), 634 exp.TsOrDsToTime: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.TIME), 635 exp.Translate: lambda self, e: self._annotate_by_args(e, "this"), 636 exp.Unicode: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 637 } 638 639 def normalize_identifier(self, expression: E) -> E: 640 if ( 641 isinstance(expression, exp.Identifier) 642 and self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 643 ): 644 parent = expression.parent 645 while isinstance(parent, exp.Dot): 646 parent = parent.parent 647 648 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 649 # by default. The following check uses a heuristic to detect tables based on whether 650 # they are qualified. This should generally be correct, because tables in BigQuery 651 # must be qualified with at least a dataset, unless @@dataset_id is set. 652 case_sensitive = ( 653 isinstance(parent, exp.UserDefinedFunction) 654 or ( 655 isinstance(parent, exp.Table) 656 and parent.db 657 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 658 ) 659 or expression.meta.get("is_table") 660 ) 661 if not case_sensitive: 662 expression.set("this", expression.this.lower()) 663 664 return t.cast(E, expression) 665 666 return super().normalize_identifier(expression) 667 668 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 669 VAR_TOKENS = { 670 TokenType.DASH, 671 TokenType.VAR, 672 } 673 674 class Tokenizer(tokens.Tokenizer): 675 QUOTES = ["'", '"', '"""', "'''"] 676 COMMENTS = ["--", "#", ("/*", "*/")] 677 IDENTIFIERS = ["`"] 678 STRING_ESCAPES = ["\\"] 679 680 HEX_STRINGS = [("0x", ""), ("0X", "")] 681 682 BYTE_STRINGS = [ 683 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 684 ] 685 686 RAW_STRINGS = [ 687 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 688 ] 689 690 NESTED_COMMENTS = False 691 692 KEYWORDS = { 693 **tokens.Tokenizer.KEYWORDS, 694 "ANY TYPE": TokenType.VARIANT, 695 "BEGIN": TokenType.COMMAND, 696 "BEGIN TRANSACTION": TokenType.BEGIN, 697 "BYTEINT": TokenType.INT, 698 "BYTES": TokenType.BINARY, 699 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 700 "DATETIME": TokenType.TIMESTAMP, 701 "DECLARE": TokenType.DECLARE, 702 "ELSEIF": TokenType.COMMAND, 703 "EXCEPTION": TokenType.COMMAND, 704 "EXPORT": TokenType.EXPORT, 705 "FLOAT64": TokenType.DOUBLE, 706 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 707 "LOOP": TokenType.COMMAND, 708 "MODEL": TokenType.MODEL, 709 "NOT DETERMINISTIC": TokenType.VOLATILE, 710 "RECORD": TokenType.STRUCT, 711 "REPEAT": TokenType.COMMAND, 712 "TIMESTAMP": TokenType.TIMESTAMPTZ, 713 "WHILE": TokenType.COMMAND, 714 } 715 KEYWORDS.pop("DIV") 716 KEYWORDS.pop("VALUES") 717 KEYWORDS.pop("/*+") 718 719 class Parser(parser.Parser): 720 PREFIXED_PIVOT_COLUMNS = True 721 LOG_DEFAULTS_TO_LN = True 722 SUPPORTS_IMPLICIT_UNNEST = True 723 JOINS_HAVE_EQUAL_PRECEDENCE = True 724 725 # BigQuery does not allow ASC/DESC to be used as an identifier 726 ID_VAR_TOKENS = parser.Parser.ID_VAR_TOKENS - {TokenType.ASC, TokenType.DESC} 727 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 728 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 729 COMMENT_TABLE_ALIAS_TOKENS = parser.Parser.COMMENT_TABLE_ALIAS_TOKENS - { 730 TokenType.ASC, 731 TokenType.DESC, 732 } 733 UPDATE_ALIAS_TOKENS = parser.Parser.UPDATE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 734 735 FUNCTIONS = { 736 **parser.Parser.FUNCTIONS, 737 "APPROX_TOP_COUNT": exp.ApproxTopK.from_arg_list, 738 "BOOL": exp.JSONBool.from_arg_list, 739 "CONTAINS_SUBSTR": _build_contains_substring, 740 "DATE": _build_date, 741 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 742 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 743 "DATE_TRUNC": lambda args: exp.DateTrunc( 744 unit=seq_get(args, 1), 745 this=seq_get(args, 0), 746 zone=seq_get(args, 2), 747 ), 748 "DATETIME": _build_datetime, 749 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 750 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 751 "DIV": binary_from_function(exp.IntDiv), 752 "EDIT_DISTANCE": _build_levenshtein, 753 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 754 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 755 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 756 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 757 "JSON_EXTRACT_STRING_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 758 "JSON_KEYS": exp.JSONKeysAtDepth.from_arg_list, 759 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 760 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 761 "JSON_STRIP_NULLS": _build_json_strip_nulls, 762 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 763 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 764 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 765 "MD5": exp.MD5Digest.from_arg_list, 766 "NORMALIZE_AND_CASEFOLD": lambda args: exp.Normalize( 767 this=seq_get(args, 0), form=seq_get(args, 1), is_casefold=True 768 ), 769 "OCTET_LENGTH": exp.ByteLength.from_arg_list, 770 "TO_HEX": _build_to_hex, 771 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 772 [seq_get(args, 1), seq_get(args, 0)] 773 ), 774 "PARSE_TIME": lambda args: build_formatted_time(exp.ParseTime, "bigquery")( 775 [seq_get(args, 1), seq_get(args, 0)] 776 ), 777 "PARSE_TIMESTAMP": _build_parse_timestamp, 778 "PARSE_DATETIME": lambda args: build_formatted_time(exp.ParseDatetime, "bigquery")( 779 [seq_get(args, 1), seq_get(args, 0)] 780 ), 781 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 782 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 783 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 784 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 785 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 786 ), 787 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 788 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 789 "SPLIT": lambda args: exp.Split( 790 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 791 this=seq_get(args, 0), 792 expression=seq_get(args, 1) or exp.Literal.string(","), 793 ), 794 "STRPOS": exp.StrPosition.from_arg_list, 795 "TIME": _build_time, 796 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 797 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 798 "TIMESTAMP": _build_timestamp, 799 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 800 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 801 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 802 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 803 ), 804 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 805 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 806 ), 807 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 808 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 809 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 810 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 811 "FORMAT_TIME": _build_format_time(exp.TsOrDsToTime), 812 "FROM_HEX": exp.Unhex.from_arg_list, 813 "WEEK": lambda args: exp.WeekStart(this=exp.var(seq_get(args, 0))), 814 } 815 816 FUNCTION_PARSERS = { 817 **parser.Parser.FUNCTION_PARSERS, 818 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 819 "JSON_ARRAY": lambda self: self.expression( 820 exp.JSONArray, expressions=self._parse_csv(self._parse_bitwise) 821 ), 822 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 823 "PREDICT": lambda self: self._parse_predict(), 824 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 825 "GENERATE_EMBEDDING": lambda self: self._parse_generate_embedding(), 826 "VECTOR_SEARCH": lambda self: self._parse_vector_search(), 827 } 828 FUNCTION_PARSERS.pop("TRIM") 829 830 NO_PAREN_FUNCTIONS = { 831 **parser.Parser.NO_PAREN_FUNCTIONS, 832 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 833 } 834 835 NESTED_TYPE_TOKENS = { 836 *parser.Parser.NESTED_TYPE_TOKENS, 837 TokenType.TABLE, 838 } 839 840 PROPERTY_PARSERS = { 841 **parser.Parser.PROPERTY_PARSERS, 842 "NOT DETERMINISTIC": lambda self: self.expression( 843 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 844 ), 845 "OPTIONS": lambda self: self._parse_with_property(), 846 } 847 848 CONSTRAINT_PARSERS = { 849 **parser.Parser.CONSTRAINT_PARSERS, 850 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 851 } 852 853 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 854 RANGE_PARSERS.pop(TokenType.OVERLAPS) 855 856 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 857 858 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 859 860 STATEMENT_PARSERS = { 861 **parser.Parser.STATEMENT_PARSERS, 862 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 863 TokenType.END: lambda self: self._parse_as_command(self._prev), 864 TokenType.FOR: lambda self: self._parse_for_in(), 865 TokenType.EXPORT: lambda self: self._parse_export_data(), 866 TokenType.DECLARE: lambda self: self._parse_declare(), 867 } 868 869 BRACKET_OFFSETS = { 870 "OFFSET": (0, False), 871 "ORDINAL": (1, False), 872 "SAFE_OFFSET": (0, True), 873 "SAFE_ORDINAL": (1, True), 874 } 875 876 def _parse_for_in(self) -> t.Union[exp.ForIn, exp.Command]: 877 index = self._index 878 this = self._parse_range() 879 self._match_text_seq("DO") 880 if self._match(TokenType.COMMAND): 881 self._retreat(index) 882 return self._parse_as_command(self._prev) 883 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 884 885 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 886 this = super()._parse_table_part(schema=schema) or self._parse_number() 887 888 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 889 if isinstance(this, exp.Identifier): 890 table_name = this.name 891 while self._match(TokenType.DASH, advance=False) and self._next: 892 start = self._curr 893 while self._is_connected() and not self._match_set( 894 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 895 ): 896 self._advance() 897 898 if start == self._curr: 899 break 900 901 table_name += self._find_sql(start, self._prev) 902 903 this = exp.Identifier( 904 this=table_name, quoted=this.args.get("quoted") 905 ).update_positions(this) 906 elif isinstance(this, exp.Literal): 907 table_name = this.name 908 909 if self._is_connected() and self._parse_var(any_token=True): 910 table_name += self._prev.text 911 912 this = exp.Identifier(this=table_name, quoted=True).update_positions(this) 913 914 return this 915 916 def _parse_table_parts( 917 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 918 ) -> exp.Table: 919 table = super()._parse_table_parts( 920 schema=schema, is_db_reference=is_db_reference, wildcard=True 921 ) 922 923 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 924 if not table.catalog: 925 if table.db: 926 previous_db = table.args["db"] 927 parts = table.db.split(".") 928 if len(parts) == 2 and not table.args["db"].quoted: 929 table.set( 930 "catalog", exp.Identifier(this=parts[0]).update_positions(previous_db) 931 ) 932 table.set("db", exp.Identifier(this=parts[1]).update_positions(previous_db)) 933 else: 934 previous_this = table.this 935 parts = table.name.split(".") 936 if len(parts) == 2 and not table.this.quoted: 937 table.set( 938 "db", exp.Identifier(this=parts[0]).update_positions(previous_this) 939 ) 940 table.set( 941 "this", exp.Identifier(this=parts[1]).update_positions(previous_this) 942 ) 943 944 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 945 alias = table.this 946 catalog, db, this, *rest = ( 947 exp.to_identifier(p, quoted=True) 948 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 949 ) 950 951 for part in (catalog, db, this): 952 if part: 953 part.update_positions(table.this) 954 955 if rest and this: 956 this = exp.Dot.build([this, *rest]) # type: ignore 957 958 table = exp.Table( 959 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 960 ) 961 table.meta["quoted_table"] = True 962 else: 963 alias = None 964 965 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 966 # dataset, so if the project identifier is omitted we need to fix the ast so that 967 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 968 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 969 # views, because it would seem like the "catalog" part is set, when it'd actually 970 # be the region/dataset. Merging the two identifiers into a single one is done to 971 # avoid producing a 4-part Table reference, which would cause issues in the schema 972 # module, when there are 3-part table names mixed with information schema views. 973 # 974 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 975 table_parts = table.parts 976 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 977 # We need to alias the table here to avoid breaking existing qualified columns. 978 # This is expected to be safe, because if there's an actual alias coming up in 979 # the token stream, it will overwrite this one. If there isn't one, we are only 980 # exposing the name that can be used to reference the view explicitly (a no-op). 981 exp.alias_( 982 table, 983 t.cast(exp.Identifier, alias or table_parts[-1]), 984 table=True, 985 copy=False, 986 ) 987 988 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 989 new_this = exp.Identifier(this=info_schema_view, quoted=True).update_positions( 990 line=table_parts[-2].meta.get("line"), 991 col=table_parts[-1].meta.get("col"), 992 start=table_parts[-2].meta.get("start"), 993 end=table_parts[-1].meta.get("end"), 994 ) 995 table.set("this", new_this) 996 table.set("db", seq_get(table_parts, -3)) 997 table.set("catalog", seq_get(table_parts, -4)) 998 999 return table 1000 1001 def _parse_column(self) -> t.Optional[exp.Expression]: 1002 column = super()._parse_column() 1003 if isinstance(column, exp.Column): 1004 parts = column.parts 1005 if any("." in p.name for p in parts): 1006 catalog, db, table, this, *rest = ( 1007 exp.to_identifier(p, quoted=True) 1008 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 1009 ) 1010 1011 if rest and this: 1012 this = exp.Dot.build([this, *rest]) # type: ignore 1013 1014 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 1015 column.meta["quoted_column"] = True 1016 1017 return column 1018 1019 @t.overload 1020 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 1021 1022 @t.overload 1023 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 1024 1025 def _parse_json_object(self, agg=False): 1026 json_object = super()._parse_json_object() 1027 array_kv_pair = seq_get(json_object.expressions, 0) 1028 1029 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 1030 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 1031 if ( 1032 array_kv_pair 1033 and isinstance(array_kv_pair.this, exp.Array) 1034 and isinstance(array_kv_pair.expression, exp.Array) 1035 ): 1036 keys = array_kv_pair.this.expressions 1037 values = array_kv_pair.expression.expressions 1038 1039 json_object.set( 1040 "expressions", 1041 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 1042 ) 1043 1044 return json_object 1045 1046 def _parse_bracket( 1047 self, this: t.Optional[exp.Expression] = None 1048 ) -> t.Optional[exp.Expression]: 1049 bracket = super()._parse_bracket(this) 1050 1051 if this is bracket: 1052 return bracket 1053 1054 if isinstance(bracket, exp.Bracket): 1055 for expression in bracket.expressions: 1056 name = expression.name.upper() 1057 1058 if name not in self.BRACKET_OFFSETS: 1059 break 1060 1061 offset, safe = self.BRACKET_OFFSETS[name] 1062 bracket.set("offset", offset) 1063 bracket.set("safe", safe) 1064 expression.replace(expression.expressions[0]) 1065 1066 return bracket 1067 1068 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 1069 unnest = super()._parse_unnest(with_alias=with_alias) 1070 1071 if not unnest: 1072 return None 1073 1074 unnest_expr = seq_get(unnest.expressions, 0) 1075 if unnest_expr: 1076 from sqlglot.optimizer.annotate_types import annotate_types 1077 1078 unnest_expr = annotate_types(unnest_expr, dialect=self.dialect) 1079 1080 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 1081 # in contrast to other dialects such as DuckDB which flattens only the array by default 1082 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 1083 array_elem.is_type(exp.DataType.Type.STRUCT) 1084 for array_elem in unnest_expr._type.expressions 1085 ): 1086 unnest.set("explode_array", True) 1087 1088 return unnest 1089 1090 def _parse_make_interval(self) -> exp.MakeInterval: 1091 expr = exp.MakeInterval() 1092 1093 for arg_key in expr.arg_types: 1094 value = self._parse_lambda() 1095 1096 if not value: 1097 break 1098 1099 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 1100 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 1101 if isinstance(value, exp.Kwarg): 1102 arg_key = value.this.name 1103 1104 expr.set(arg_key, value) 1105 1106 self._match(TokenType.COMMA) 1107 1108 return expr 1109 1110 def _parse_predict(self) -> exp.Predict: 1111 self._match_text_seq("MODEL") 1112 this = self._parse_table() 1113 1114 self._match(TokenType.COMMA) 1115 self._match_text_seq("TABLE") 1116 1117 return self.expression( 1118 exp.Predict, 1119 this=this, 1120 expression=self._parse_table(), 1121 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 1122 ) 1123 1124 def _parse_generate_embedding(self) -> exp.GenerateEmbedding: 1125 self._match_text_seq("MODEL") 1126 this = self._parse_table() 1127 1128 self._match(TokenType.COMMA) 1129 self._match_text_seq("TABLE") 1130 1131 return self.expression( 1132 exp.GenerateEmbedding, 1133 this=this, 1134 expression=self._parse_table(), 1135 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 1136 ) 1137 1138 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 1139 self._match(TokenType.TABLE) 1140 this = self._parse_table() 1141 1142 expr = self.expression(exp.FeaturesAtTime, this=this) 1143 1144 while self._match(TokenType.COMMA): 1145 arg = self._parse_lambda() 1146 1147 # Get the LHS of the Kwarg and set the arg to that value, e.g 1148 # "num_rows => 1" sets the expr's `num_rows` arg 1149 if arg: 1150 expr.set(arg.this.name, arg) 1151 1152 return expr 1153 1154 def _parse_vector_search(self) -> exp.VectorSearch: 1155 self._match(TokenType.TABLE) 1156 base_table = self._parse_table() 1157 1158 self._match(TokenType.COMMA) 1159 1160 column_to_search = self._parse_bitwise() 1161 self._match(TokenType.COMMA) 1162 1163 self._match(TokenType.TABLE) 1164 query_table = self._parse_table() 1165 1166 expr = self.expression( 1167 exp.VectorSearch, 1168 this=base_table, 1169 column_to_search=column_to_search, 1170 query_table=query_table, 1171 ) 1172 1173 while self._match(TokenType.COMMA): 1174 # query_column_to_search can be named argument or positional 1175 if self._match(TokenType.STRING, advance=False): 1176 query_column = self._parse_string() 1177 expr.set("query_column_to_search", query_column) 1178 else: 1179 arg = self._parse_lambda() 1180 if arg: 1181 expr.set(arg.this.name, arg) 1182 1183 return expr 1184 1185 def _parse_export_data(self) -> exp.Export: 1186 self._match_text_seq("DATA") 1187 1188 return self.expression( 1189 exp.Export, 1190 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 1191 options=self._parse_properties(), 1192 this=self._match_text_seq("AS") and self._parse_select(), 1193 ) 1194 1195 class Generator(generator.Generator): 1196 INTERVAL_ALLOWS_PLURAL_FORM = False 1197 JOIN_HINTS = False 1198 QUERY_HINTS = False 1199 TABLE_HINTS = False 1200 LIMIT_FETCH = "LIMIT" 1201 RENAME_TABLE_WITH_DB = False 1202 NVL2_SUPPORTED = False 1203 UNNEST_WITH_ORDINALITY = False 1204 COLLATE_IS_FUNC = True 1205 LIMIT_ONLY_LITERALS = True 1206 SUPPORTS_TABLE_ALIAS_COLUMNS = False 1207 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 1208 JSON_KEY_VALUE_PAIR_SEP = "," 1209 NULL_ORDERING_SUPPORTED = False 1210 IGNORE_NULLS_IN_FUNC = True 1211 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 1212 CAN_IMPLEMENT_ARRAY_ANY = True 1213 SUPPORTS_TO_NUMBER = False 1214 NAMED_PLACEHOLDER_TOKEN = "@" 1215 HEX_FUNC = "TO_HEX" 1216 WITH_PROPERTIES_PREFIX = "OPTIONS" 1217 SUPPORTS_EXPLODING_PROJECTIONS = False 1218 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1219 SUPPORTS_UNIX_SECONDS = True 1220 1221 SAFE_JSON_PATH_KEY_RE = re.compile(r"^[_\-a-zA-Z][\-\w]*$") 1222 1223 TS_OR_DS_TYPES = ( 1224 exp.TsOrDsToDatetime, 1225 exp.TsOrDsToTimestamp, 1226 exp.TsOrDsToTime, 1227 exp.TsOrDsToDate, 1228 ) 1229 1230 TRANSFORMS = { 1231 **generator.Generator.TRANSFORMS, 1232 exp.ApproxTopK: rename_func("APPROX_TOP_COUNT"), 1233 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1234 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 1235 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 1236 exp.Array: inline_array_unless_query, 1237 exp.ArrayContains: _array_contains_sql, 1238 exp.ArrayFilter: filter_array_using_unnest, 1239 exp.ArrayRemove: filter_array_using_unnest, 1240 exp.ByteLength: rename_func("BYTE_LENGTH"), 1241 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 1242 exp.CollateProperty: lambda self, e: ( 1243 f"DEFAULT COLLATE {self.sql(e, 'this')}" 1244 if e.args.get("default") 1245 else f"COLLATE {self.sql(e, 'this')}" 1246 ), 1247 exp.Commit: lambda *_: "COMMIT TRANSACTION", 1248 exp.CountIf: rename_func("COUNTIF"), 1249 exp.Create: _create_sql, 1250 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 1251 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 1252 exp.DateDiff: lambda self, e: self.func( 1253 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 1254 ), 1255 exp.DateFromParts: rename_func("DATE"), 1256 exp.DateStrToDate: datestrtodate_sql, 1257 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 1258 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 1259 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 1260 exp.DateFromUnixDate: rename_func("DATE_FROM_UNIX_DATE"), 1261 exp.FromTimeZone: lambda self, e: self.func( 1262 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 1263 ), 1264 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 1265 exp.GroupConcat: lambda self, e: groupconcat_sql( 1266 self, e, func_name="STRING_AGG", within_group=False 1267 ), 1268 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 1269 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 1270 exp.If: if_sql(false_value="NULL"), 1271 exp.ILike: no_ilike_sql, 1272 exp.IntDiv: rename_func("DIV"), 1273 exp.Int64: rename_func("INT64"), 1274 exp.JSONBool: rename_func("BOOL"), 1275 exp.JSONExtract: _json_extract_sql, 1276 exp.JSONExtractArray: _json_extract_sql, 1277 exp.JSONExtractScalar: _json_extract_sql, 1278 exp.JSONFormat: rename_func("TO_JSON_STRING"), 1279 exp.JSONKeysAtDepth: rename_func("JSON_KEYS"), 1280 exp.JSONValueArray: rename_func("JSON_VALUE_ARRAY"), 1281 exp.Levenshtein: _levenshtein_sql, 1282 exp.Max: max_or_greatest, 1283 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 1284 exp.MD5Digest: rename_func("MD5"), 1285 exp.Min: min_or_least, 1286 exp.Normalize: lambda self, e: self.func( 1287 "NORMALIZE_AND_CASEFOLD" if e.args.get("is_casefold") else "NORMALIZE", 1288 e.this, 1289 e.args.get("form"), 1290 ), 1291 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1292 exp.RegexpExtract: lambda self, e: self.func( 1293 "REGEXP_EXTRACT", 1294 e.this, 1295 e.expression, 1296 e.args.get("position"), 1297 e.args.get("occurrence"), 1298 ), 1299 exp.RegexpExtractAll: lambda self, e: self.func( 1300 "REGEXP_EXTRACT_ALL", e.this, e.expression 1301 ), 1302 exp.RegexpReplace: regexp_replace_sql, 1303 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 1304 exp.ReturnsProperty: _returnsproperty_sql, 1305 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 1306 exp.ParseTime: lambda self, e: self.func("PARSE_TIME", self.format_time(e), e.this), 1307 exp.ParseDatetime: lambda self, e: self.func( 1308 "PARSE_DATETIME", self.format_time(e), e.this 1309 ), 1310 exp.Select: transforms.preprocess( 1311 [ 1312 transforms.explode_projection_to_unnest(), 1313 transforms.unqualify_unnest, 1314 transforms.eliminate_distinct_on, 1315 _alias_ordered_group, 1316 transforms.eliminate_semi_and_anti_joins, 1317 ] 1318 ), 1319 exp.SHA: rename_func("SHA1"), 1320 exp.SHA2: sha256_sql, 1321 exp.StabilityProperty: lambda self, e: ( 1322 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 1323 ), 1324 exp.String: rename_func("STRING"), 1325 exp.StrPosition: lambda self, e: ( 1326 strposition_sql( 1327 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 1328 ) 1329 ), 1330 exp.StrToDate: _str_to_datetime_sql, 1331 exp.StrToTime: _str_to_datetime_sql, 1332 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 1333 exp.TimeFromParts: rename_func("TIME"), 1334 exp.TimestampFromParts: rename_func("DATETIME"), 1335 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 1336 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 1337 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 1338 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 1339 exp.TimeStrToTime: timestrtotime_sql, 1340 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 1341 exp.TsOrDsAdd: _ts_or_ds_add_sql, 1342 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 1343 exp.TsOrDsToTime: rename_func("TIME"), 1344 exp.TsOrDsToDatetime: rename_func("DATETIME"), 1345 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 1346 exp.Unhex: rename_func("FROM_HEX"), 1347 exp.UnixDate: rename_func("UNIX_DATE"), 1348 exp.UnixToTime: _unix_to_time_sql, 1349 exp.Uuid: lambda *_: "GENERATE_UUID()", 1350 exp.Values: _derived_table_values_to_unnest, 1351 exp.VariancePop: rename_func("VAR_POP"), 1352 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 1353 } 1354 1355 SUPPORTED_JSON_PATH_PARTS = { 1356 exp.JSONPathKey, 1357 exp.JSONPathRoot, 1358 exp.JSONPathSubscript, 1359 } 1360 1361 TYPE_MAPPING = { 1362 **generator.Generator.TYPE_MAPPING, 1363 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 1364 exp.DataType.Type.BIGINT: "INT64", 1365 exp.DataType.Type.BINARY: "BYTES", 1366 exp.DataType.Type.BLOB: "BYTES", 1367 exp.DataType.Type.BOOLEAN: "BOOL", 1368 exp.DataType.Type.CHAR: "STRING", 1369 exp.DataType.Type.DECIMAL: "NUMERIC", 1370 exp.DataType.Type.DOUBLE: "FLOAT64", 1371 exp.DataType.Type.FLOAT: "FLOAT64", 1372 exp.DataType.Type.INT: "INT64", 1373 exp.DataType.Type.NCHAR: "STRING", 1374 exp.DataType.Type.NVARCHAR: "STRING", 1375 exp.DataType.Type.SMALLINT: "INT64", 1376 exp.DataType.Type.TEXT: "STRING", 1377 exp.DataType.Type.TIMESTAMP: "DATETIME", 1378 exp.DataType.Type.TIMESTAMPNTZ: "DATETIME", 1379 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1380 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1381 exp.DataType.Type.TINYINT: "INT64", 1382 exp.DataType.Type.ROWVERSION: "BYTES", 1383 exp.DataType.Type.UUID: "STRING", 1384 exp.DataType.Type.VARBINARY: "BYTES", 1385 exp.DataType.Type.VARCHAR: "STRING", 1386 exp.DataType.Type.VARIANT: "ANY TYPE", 1387 } 1388 1389 PROPERTIES_LOCATION = { 1390 **generator.Generator.PROPERTIES_LOCATION, 1391 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1392 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1393 } 1394 1395 # WINDOW comes after QUALIFY 1396 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1397 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1398 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1399 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1400 } 1401 1402 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1403 RESERVED_KEYWORDS = { 1404 "all", 1405 "and", 1406 "any", 1407 "array", 1408 "as", 1409 "asc", 1410 "assert_rows_modified", 1411 "at", 1412 "between", 1413 "by", 1414 "case", 1415 "cast", 1416 "collate", 1417 "contains", 1418 "create", 1419 "cross", 1420 "cube", 1421 "current", 1422 "default", 1423 "define", 1424 "desc", 1425 "distinct", 1426 "else", 1427 "end", 1428 "enum", 1429 "escape", 1430 "except", 1431 "exclude", 1432 "exists", 1433 "extract", 1434 "false", 1435 "fetch", 1436 "following", 1437 "for", 1438 "from", 1439 "full", 1440 "group", 1441 "grouping", 1442 "groups", 1443 "hash", 1444 "having", 1445 "if", 1446 "ignore", 1447 "in", 1448 "inner", 1449 "intersect", 1450 "interval", 1451 "into", 1452 "is", 1453 "join", 1454 "lateral", 1455 "left", 1456 "like", 1457 "limit", 1458 "lookup", 1459 "merge", 1460 "natural", 1461 "new", 1462 "no", 1463 "not", 1464 "null", 1465 "nulls", 1466 "of", 1467 "on", 1468 "or", 1469 "order", 1470 "outer", 1471 "over", 1472 "partition", 1473 "preceding", 1474 "proto", 1475 "qualify", 1476 "range", 1477 "recursive", 1478 "respect", 1479 "right", 1480 "rollup", 1481 "rows", 1482 "select", 1483 "set", 1484 "some", 1485 "struct", 1486 "tablesample", 1487 "then", 1488 "to", 1489 "treat", 1490 "true", 1491 "unbounded", 1492 "union", 1493 "unnest", 1494 "using", 1495 "when", 1496 "where", 1497 "window", 1498 "with", 1499 "within", 1500 } 1501 1502 def datetrunc_sql(self, expression: exp.DateTrunc) -> str: 1503 unit = expression.unit 1504 unit_sql = unit.name if unit.is_string else self.sql(unit) 1505 return self.func("DATE_TRUNC", expression.this, unit_sql, expression.args.get("zone")) 1506 1507 def mod_sql(self, expression: exp.Mod) -> str: 1508 this = expression.this 1509 expr = expression.expression 1510 return self.func( 1511 "MOD", 1512 this.unnest() if isinstance(this, exp.Paren) else this, 1513 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1514 ) 1515 1516 def column_parts(self, expression: exp.Column) -> str: 1517 if expression.meta.get("quoted_column"): 1518 # If a column reference is of the form `dataset.table`.name, we need 1519 # to preserve the quoted table path, otherwise the reference breaks 1520 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1521 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1522 return f"{table_path}.{self.sql(expression, 'this')}" 1523 1524 return super().column_parts(expression) 1525 1526 def table_parts(self, expression: exp.Table) -> str: 1527 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1528 # we need to make sure the correct quoting is used in each case. 1529 # 1530 # For example, if there is a CTE x that clashes with a schema name, then the former will 1531 # return the table y in that schema, whereas the latter will return the CTE's y column: 1532 # 1533 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1534 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1535 if expression.meta.get("quoted_table"): 1536 table_parts = ".".join(p.name for p in expression.parts) 1537 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1538 1539 return super().table_parts(expression) 1540 1541 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1542 this = expression.this 1543 if isinstance(this, exp.TsOrDsToDatetime): 1544 func_name = "FORMAT_DATETIME" 1545 elif isinstance(this, exp.TsOrDsToTimestamp): 1546 func_name = "FORMAT_TIMESTAMP" 1547 elif isinstance(this, exp.TsOrDsToTime): 1548 func_name = "FORMAT_TIME" 1549 else: 1550 func_name = "FORMAT_DATE" 1551 1552 time_expr = this if isinstance(this, self.TS_OR_DS_TYPES) else expression 1553 return self.func( 1554 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1555 ) 1556 1557 def eq_sql(self, expression: exp.EQ) -> str: 1558 # Operands of = cannot be NULL in BigQuery 1559 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1560 if not isinstance(expression.parent, exp.Update): 1561 return "NULL" 1562 1563 return self.binary(expression, "=") 1564 1565 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1566 parent = expression.parent 1567 1568 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1569 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1570 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1571 return self.func( 1572 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1573 ) 1574 1575 return super().attimezone_sql(expression) 1576 1577 def trycast_sql(self, expression: exp.TryCast) -> str: 1578 return self.cast_sql(expression, safe_prefix="SAFE_") 1579 1580 def bracket_sql(self, expression: exp.Bracket) -> str: 1581 this = expression.this 1582 expressions = expression.expressions 1583 1584 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1585 arg = expressions[0] 1586 if arg.type is None: 1587 from sqlglot.optimizer.annotate_types import annotate_types 1588 1589 arg = annotate_types(arg, dialect=self.dialect) 1590 1591 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1592 # BQ doesn't support bracket syntax with string values for structs 1593 return f"{self.sql(this)}.{arg.name}" 1594 1595 expressions_sql = self.expressions(expression, flat=True) 1596 offset = expression.args.get("offset") 1597 1598 if offset == 0: 1599 expressions_sql = f"OFFSET({expressions_sql})" 1600 elif offset == 1: 1601 expressions_sql = f"ORDINAL({expressions_sql})" 1602 elif offset is not None: 1603 self.unsupported(f"Unsupported array offset: {offset}") 1604 1605 if expression.args.get("safe"): 1606 expressions_sql = f"SAFE_{expressions_sql}" 1607 1608 return f"{self.sql(this)}[{expressions_sql}]" 1609 1610 def in_unnest_op(self, expression: exp.Unnest) -> str: 1611 return self.sql(expression) 1612 1613 def version_sql(self, expression: exp.Version) -> str: 1614 if expression.name == "TIMESTAMP": 1615 expression.set("this", "SYSTEM_TIME") 1616 return super().version_sql(expression) 1617 1618 def contains_sql(self, expression: exp.Contains) -> str: 1619 this = expression.this 1620 expr = expression.expression 1621 1622 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1623 this = this.this 1624 expr = expr.this 1625 1626 return self.func("CONTAINS_SUBSTR", this, expr, expression.args.get("json_scope")) 1627 1628 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1629 this = expression.this 1630 1631 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1632 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1633 # because they aren't literals and so the above syntax is invalid BigQuery. 1634 if isinstance(this, exp.Array): 1635 elem = seq_get(this.expressions, 0) 1636 if not (elem and elem.find(exp.Query)): 1637 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1638 1639 return super().cast_sql(expression, safe_prefix=safe_prefix) 1640 1641 def declareitem_sql(self, expression: exp.DeclareItem) -> str: 1642 variables = self.expressions(expression, "this") 1643 default = self.sql(expression, "default") 1644 default = f" DEFAULT {default}" if default else "" 1645 kind = self.sql(expression, "kind") 1646 kind = f" {kind}" if kind else "" 1647 1648 return f"{variables}{kind}{default}"
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether the name of the function should be preserved inside the node's metadata, can be useful for roundtripping deprecated vs new functions that share an AST node e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery
Whether hex strings such as x'CC' evaluate to integer or binary/blob type
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime
formats.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
639 def normalize_identifier(self, expression: E) -> E: 640 if ( 641 isinstance(expression, exp.Identifier) 642 and self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 643 ): 644 parent = expression.parent 645 while isinstance(parent, exp.Dot): 646 parent = parent.parent 647 648 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 649 # by default. The following check uses a heuristic to detect tables based on whether 650 # they are qualified. This should generally be correct, because tables in BigQuery 651 # must be qualified with at least a dataset, unless @@dataset_id is set. 652 case_sensitive = ( 653 isinstance(parent, exp.UserDefinedFunction) 654 or ( 655 isinstance(parent, exp.Table) 656 and parent.db 657 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 658 ) 659 or expression.meta.get("is_table") 660 ) 661 if not case_sensitive: 662 expression.set("this", expression.this.lower()) 663 664 return t.cast(E, expression) 665 666 return super().normalize_identifier(expression)
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
668 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 669 VAR_TOKENS = { 670 TokenType.DASH, 671 TokenType.VAR, 672 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
674 class Tokenizer(tokens.Tokenizer): 675 QUOTES = ["'", '"', '"""', "'''"] 676 COMMENTS = ["--", "#", ("/*", "*/")] 677 IDENTIFIERS = ["`"] 678 STRING_ESCAPES = ["\\"] 679 680 HEX_STRINGS = [("0x", ""), ("0X", "")] 681 682 BYTE_STRINGS = [ 683 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 684 ] 685 686 RAW_STRINGS = [ 687 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 688 ] 689 690 NESTED_COMMENTS = False 691 692 KEYWORDS = { 693 **tokens.Tokenizer.KEYWORDS, 694 "ANY TYPE": TokenType.VARIANT, 695 "BEGIN": TokenType.COMMAND, 696 "BEGIN TRANSACTION": TokenType.BEGIN, 697 "BYTEINT": TokenType.INT, 698 "BYTES": TokenType.BINARY, 699 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 700 "DATETIME": TokenType.TIMESTAMP, 701 "DECLARE": TokenType.DECLARE, 702 "ELSEIF": TokenType.COMMAND, 703 "EXCEPTION": TokenType.COMMAND, 704 "EXPORT": TokenType.EXPORT, 705 "FLOAT64": TokenType.DOUBLE, 706 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 707 "LOOP": TokenType.COMMAND, 708 "MODEL": TokenType.MODEL, 709 "NOT DETERMINISTIC": TokenType.VOLATILE, 710 "RECORD": TokenType.STRUCT, 711 "REPEAT": TokenType.COMMAND, 712 "TIMESTAMP": TokenType.TIMESTAMPTZ, 713 "WHILE": TokenType.COMMAND, 714 } 715 KEYWORDS.pop("DIV") 716 KEYWORDS.pop("VALUES") 717 KEYWORDS.pop("/*+")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
719 class Parser(parser.Parser): 720 PREFIXED_PIVOT_COLUMNS = True 721 LOG_DEFAULTS_TO_LN = True 722 SUPPORTS_IMPLICIT_UNNEST = True 723 JOINS_HAVE_EQUAL_PRECEDENCE = True 724 725 # BigQuery does not allow ASC/DESC to be used as an identifier 726 ID_VAR_TOKENS = parser.Parser.ID_VAR_TOKENS - {TokenType.ASC, TokenType.DESC} 727 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 728 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 729 COMMENT_TABLE_ALIAS_TOKENS = parser.Parser.COMMENT_TABLE_ALIAS_TOKENS - { 730 TokenType.ASC, 731 TokenType.DESC, 732 } 733 UPDATE_ALIAS_TOKENS = parser.Parser.UPDATE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 734 735 FUNCTIONS = { 736 **parser.Parser.FUNCTIONS, 737 "APPROX_TOP_COUNT": exp.ApproxTopK.from_arg_list, 738 "BOOL": exp.JSONBool.from_arg_list, 739 "CONTAINS_SUBSTR": _build_contains_substring, 740 "DATE": _build_date, 741 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 742 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 743 "DATE_TRUNC": lambda args: exp.DateTrunc( 744 unit=seq_get(args, 1), 745 this=seq_get(args, 0), 746 zone=seq_get(args, 2), 747 ), 748 "DATETIME": _build_datetime, 749 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 750 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 751 "DIV": binary_from_function(exp.IntDiv), 752 "EDIT_DISTANCE": _build_levenshtein, 753 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 754 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 755 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 756 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 757 "JSON_EXTRACT_STRING_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 758 "JSON_KEYS": exp.JSONKeysAtDepth.from_arg_list, 759 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 760 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 761 "JSON_STRIP_NULLS": _build_json_strip_nulls, 762 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 763 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 764 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 765 "MD5": exp.MD5Digest.from_arg_list, 766 "NORMALIZE_AND_CASEFOLD": lambda args: exp.Normalize( 767 this=seq_get(args, 0), form=seq_get(args, 1), is_casefold=True 768 ), 769 "OCTET_LENGTH": exp.ByteLength.from_arg_list, 770 "TO_HEX": _build_to_hex, 771 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 772 [seq_get(args, 1), seq_get(args, 0)] 773 ), 774 "PARSE_TIME": lambda args: build_formatted_time(exp.ParseTime, "bigquery")( 775 [seq_get(args, 1), seq_get(args, 0)] 776 ), 777 "PARSE_TIMESTAMP": _build_parse_timestamp, 778 "PARSE_DATETIME": lambda args: build_formatted_time(exp.ParseDatetime, "bigquery")( 779 [seq_get(args, 1), seq_get(args, 0)] 780 ), 781 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 782 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 783 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 784 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 785 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 786 ), 787 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 788 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 789 "SPLIT": lambda args: exp.Split( 790 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 791 this=seq_get(args, 0), 792 expression=seq_get(args, 1) or exp.Literal.string(","), 793 ), 794 "STRPOS": exp.StrPosition.from_arg_list, 795 "TIME": _build_time, 796 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 797 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 798 "TIMESTAMP": _build_timestamp, 799 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 800 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 801 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 802 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 803 ), 804 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 805 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 806 ), 807 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 808 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 809 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 810 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 811 "FORMAT_TIME": _build_format_time(exp.TsOrDsToTime), 812 "FROM_HEX": exp.Unhex.from_arg_list, 813 "WEEK": lambda args: exp.WeekStart(this=exp.var(seq_get(args, 0))), 814 } 815 816 FUNCTION_PARSERS = { 817 **parser.Parser.FUNCTION_PARSERS, 818 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 819 "JSON_ARRAY": lambda self: self.expression( 820 exp.JSONArray, expressions=self._parse_csv(self._parse_bitwise) 821 ), 822 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 823 "PREDICT": lambda self: self._parse_predict(), 824 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 825 "GENERATE_EMBEDDING": lambda self: self._parse_generate_embedding(), 826 "VECTOR_SEARCH": lambda self: self._parse_vector_search(), 827 } 828 FUNCTION_PARSERS.pop("TRIM") 829 830 NO_PAREN_FUNCTIONS = { 831 **parser.Parser.NO_PAREN_FUNCTIONS, 832 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 833 } 834 835 NESTED_TYPE_TOKENS = { 836 *parser.Parser.NESTED_TYPE_TOKENS, 837 TokenType.TABLE, 838 } 839 840 PROPERTY_PARSERS = { 841 **parser.Parser.PROPERTY_PARSERS, 842 "NOT DETERMINISTIC": lambda self: self.expression( 843 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 844 ), 845 "OPTIONS": lambda self: self._parse_with_property(), 846 } 847 848 CONSTRAINT_PARSERS = { 849 **parser.Parser.CONSTRAINT_PARSERS, 850 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 851 } 852 853 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 854 RANGE_PARSERS.pop(TokenType.OVERLAPS) 855 856 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 857 858 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 859 860 STATEMENT_PARSERS = { 861 **parser.Parser.STATEMENT_PARSERS, 862 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 863 TokenType.END: lambda self: self._parse_as_command(self._prev), 864 TokenType.FOR: lambda self: self._parse_for_in(), 865 TokenType.EXPORT: lambda self: self._parse_export_data(), 866 TokenType.DECLARE: lambda self: self._parse_declare(), 867 } 868 869 BRACKET_OFFSETS = { 870 "OFFSET": (0, False), 871 "ORDINAL": (1, False), 872 "SAFE_OFFSET": (0, True), 873 "SAFE_ORDINAL": (1, True), 874 } 875 876 def _parse_for_in(self) -> t.Union[exp.ForIn, exp.Command]: 877 index = self._index 878 this = self._parse_range() 879 self._match_text_seq("DO") 880 if self._match(TokenType.COMMAND): 881 self._retreat(index) 882 return self._parse_as_command(self._prev) 883 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 884 885 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 886 this = super()._parse_table_part(schema=schema) or self._parse_number() 887 888 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 889 if isinstance(this, exp.Identifier): 890 table_name = this.name 891 while self._match(TokenType.DASH, advance=False) and self._next: 892 start = self._curr 893 while self._is_connected() and not self._match_set( 894 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 895 ): 896 self._advance() 897 898 if start == self._curr: 899 break 900 901 table_name += self._find_sql(start, self._prev) 902 903 this = exp.Identifier( 904 this=table_name, quoted=this.args.get("quoted") 905 ).update_positions(this) 906 elif isinstance(this, exp.Literal): 907 table_name = this.name 908 909 if self._is_connected() and self._parse_var(any_token=True): 910 table_name += self._prev.text 911 912 this = exp.Identifier(this=table_name, quoted=True).update_positions(this) 913 914 return this 915 916 def _parse_table_parts( 917 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 918 ) -> exp.Table: 919 table = super()._parse_table_parts( 920 schema=schema, is_db_reference=is_db_reference, wildcard=True 921 ) 922 923 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 924 if not table.catalog: 925 if table.db: 926 previous_db = table.args["db"] 927 parts = table.db.split(".") 928 if len(parts) == 2 and not table.args["db"].quoted: 929 table.set( 930 "catalog", exp.Identifier(this=parts[0]).update_positions(previous_db) 931 ) 932 table.set("db", exp.Identifier(this=parts[1]).update_positions(previous_db)) 933 else: 934 previous_this = table.this 935 parts = table.name.split(".") 936 if len(parts) == 2 and not table.this.quoted: 937 table.set( 938 "db", exp.Identifier(this=parts[0]).update_positions(previous_this) 939 ) 940 table.set( 941 "this", exp.Identifier(this=parts[1]).update_positions(previous_this) 942 ) 943 944 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 945 alias = table.this 946 catalog, db, this, *rest = ( 947 exp.to_identifier(p, quoted=True) 948 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 949 ) 950 951 for part in (catalog, db, this): 952 if part: 953 part.update_positions(table.this) 954 955 if rest and this: 956 this = exp.Dot.build([this, *rest]) # type: ignore 957 958 table = exp.Table( 959 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 960 ) 961 table.meta["quoted_table"] = True 962 else: 963 alias = None 964 965 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 966 # dataset, so if the project identifier is omitted we need to fix the ast so that 967 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 968 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 969 # views, because it would seem like the "catalog" part is set, when it'd actually 970 # be the region/dataset. Merging the two identifiers into a single one is done to 971 # avoid producing a 4-part Table reference, which would cause issues in the schema 972 # module, when there are 3-part table names mixed with information schema views. 973 # 974 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 975 table_parts = table.parts 976 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 977 # We need to alias the table here to avoid breaking existing qualified columns. 978 # This is expected to be safe, because if there's an actual alias coming up in 979 # the token stream, it will overwrite this one. If there isn't one, we are only 980 # exposing the name that can be used to reference the view explicitly (a no-op). 981 exp.alias_( 982 table, 983 t.cast(exp.Identifier, alias or table_parts[-1]), 984 table=True, 985 copy=False, 986 ) 987 988 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 989 new_this = exp.Identifier(this=info_schema_view, quoted=True).update_positions( 990 line=table_parts[-2].meta.get("line"), 991 col=table_parts[-1].meta.get("col"), 992 start=table_parts[-2].meta.get("start"), 993 end=table_parts[-1].meta.get("end"), 994 ) 995 table.set("this", new_this) 996 table.set("db", seq_get(table_parts, -3)) 997 table.set("catalog", seq_get(table_parts, -4)) 998 999 return table 1000 1001 def _parse_column(self) -> t.Optional[exp.Expression]: 1002 column = super()._parse_column() 1003 if isinstance(column, exp.Column): 1004 parts = column.parts 1005 if any("." in p.name for p in parts): 1006 catalog, db, table, this, *rest = ( 1007 exp.to_identifier(p, quoted=True) 1008 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 1009 ) 1010 1011 if rest and this: 1012 this = exp.Dot.build([this, *rest]) # type: ignore 1013 1014 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 1015 column.meta["quoted_column"] = True 1016 1017 return column 1018 1019 @t.overload 1020 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 1021 1022 @t.overload 1023 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 1024 1025 def _parse_json_object(self, agg=False): 1026 json_object = super()._parse_json_object() 1027 array_kv_pair = seq_get(json_object.expressions, 0) 1028 1029 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 1030 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 1031 if ( 1032 array_kv_pair 1033 and isinstance(array_kv_pair.this, exp.Array) 1034 and isinstance(array_kv_pair.expression, exp.Array) 1035 ): 1036 keys = array_kv_pair.this.expressions 1037 values = array_kv_pair.expression.expressions 1038 1039 json_object.set( 1040 "expressions", 1041 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 1042 ) 1043 1044 return json_object 1045 1046 def _parse_bracket( 1047 self, this: t.Optional[exp.Expression] = None 1048 ) -> t.Optional[exp.Expression]: 1049 bracket = super()._parse_bracket(this) 1050 1051 if this is bracket: 1052 return bracket 1053 1054 if isinstance(bracket, exp.Bracket): 1055 for expression in bracket.expressions: 1056 name = expression.name.upper() 1057 1058 if name not in self.BRACKET_OFFSETS: 1059 break 1060 1061 offset, safe = self.BRACKET_OFFSETS[name] 1062 bracket.set("offset", offset) 1063 bracket.set("safe", safe) 1064 expression.replace(expression.expressions[0]) 1065 1066 return bracket 1067 1068 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 1069 unnest = super()._parse_unnest(with_alias=with_alias) 1070 1071 if not unnest: 1072 return None 1073 1074 unnest_expr = seq_get(unnest.expressions, 0) 1075 if unnest_expr: 1076 from sqlglot.optimizer.annotate_types import annotate_types 1077 1078 unnest_expr = annotate_types(unnest_expr, dialect=self.dialect) 1079 1080 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 1081 # in contrast to other dialects such as DuckDB which flattens only the array by default 1082 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 1083 array_elem.is_type(exp.DataType.Type.STRUCT) 1084 for array_elem in unnest_expr._type.expressions 1085 ): 1086 unnest.set("explode_array", True) 1087 1088 return unnest 1089 1090 def _parse_make_interval(self) -> exp.MakeInterval: 1091 expr = exp.MakeInterval() 1092 1093 for arg_key in expr.arg_types: 1094 value = self._parse_lambda() 1095 1096 if not value: 1097 break 1098 1099 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 1100 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 1101 if isinstance(value, exp.Kwarg): 1102 arg_key = value.this.name 1103 1104 expr.set(arg_key, value) 1105 1106 self._match(TokenType.COMMA) 1107 1108 return expr 1109 1110 def _parse_predict(self) -> exp.Predict: 1111 self._match_text_seq("MODEL") 1112 this = self._parse_table() 1113 1114 self._match(TokenType.COMMA) 1115 self._match_text_seq("TABLE") 1116 1117 return self.expression( 1118 exp.Predict, 1119 this=this, 1120 expression=self._parse_table(), 1121 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 1122 ) 1123 1124 def _parse_generate_embedding(self) -> exp.GenerateEmbedding: 1125 self._match_text_seq("MODEL") 1126 this = self._parse_table() 1127 1128 self._match(TokenType.COMMA) 1129 self._match_text_seq("TABLE") 1130 1131 return self.expression( 1132 exp.GenerateEmbedding, 1133 this=this, 1134 expression=self._parse_table(), 1135 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 1136 ) 1137 1138 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 1139 self._match(TokenType.TABLE) 1140 this = self._parse_table() 1141 1142 expr = self.expression(exp.FeaturesAtTime, this=this) 1143 1144 while self._match(TokenType.COMMA): 1145 arg = self._parse_lambda() 1146 1147 # Get the LHS of the Kwarg and set the arg to that value, e.g 1148 # "num_rows => 1" sets the expr's `num_rows` arg 1149 if arg: 1150 expr.set(arg.this.name, arg) 1151 1152 return expr 1153 1154 def _parse_vector_search(self) -> exp.VectorSearch: 1155 self._match(TokenType.TABLE) 1156 base_table = self._parse_table() 1157 1158 self._match(TokenType.COMMA) 1159 1160 column_to_search = self._parse_bitwise() 1161 self._match(TokenType.COMMA) 1162 1163 self._match(TokenType.TABLE) 1164 query_table = self._parse_table() 1165 1166 expr = self.expression( 1167 exp.VectorSearch, 1168 this=base_table, 1169 column_to_search=column_to_search, 1170 query_table=query_table, 1171 ) 1172 1173 while self._match(TokenType.COMMA): 1174 # query_column_to_search can be named argument or positional 1175 if self._match(TokenType.STRING, advance=False): 1176 query_column = self._parse_string() 1177 expr.set("query_column_to_search", query_column) 1178 else: 1179 arg = self._parse_lambda() 1180 if arg: 1181 expr.set(arg.this.name, arg) 1182 1183 return expr 1184 1185 def _parse_export_data(self) -> exp.Export: 1186 self._match_text_seq("DATA") 1187 1188 return self.expression( 1189 exp.Export, 1190 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 1191 options=self._parse_properties(), 1192 this=self._match_text_seq("AS") and self._parse_select(), 1193 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- CAST_COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- QUERY_MODIFIER_TOKENS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
- MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS
- JSON_EXTRACT_REQUIRES_JSON_EXPRESSION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- build_cast
- errors
- sql
1195 class Generator(generator.Generator): 1196 INTERVAL_ALLOWS_PLURAL_FORM = False 1197 JOIN_HINTS = False 1198 QUERY_HINTS = False 1199 TABLE_HINTS = False 1200 LIMIT_FETCH = "LIMIT" 1201 RENAME_TABLE_WITH_DB = False 1202 NVL2_SUPPORTED = False 1203 UNNEST_WITH_ORDINALITY = False 1204 COLLATE_IS_FUNC = True 1205 LIMIT_ONLY_LITERALS = True 1206 SUPPORTS_TABLE_ALIAS_COLUMNS = False 1207 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 1208 JSON_KEY_VALUE_PAIR_SEP = "," 1209 NULL_ORDERING_SUPPORTED = False 1210 IGNORE_NULLS_IN_FUNC = True 1211 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 1212 CAN_IMPLEMENT_ARRAY_ANY = True 1213 SUPPORTS_TO_NUMBER = False 1214 NAMED_PLACEHOLDER_TOKEN = "@" 1215 HEX_FUNC = "TO_HEX" 1216 WITH_PROPERTIES_PREFIX = "OPTIONS" 1217 SUPPORTS_EXPLODING_PROJECTIONS = False 1218 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1219 SUPPORTS_UNIX_SECONDS = True 1220 1221 SAFE_JSON_PATH_KEY_RE = re.compile(r"^[_\-a-zA-Z][\-\w]*$") 1222 1223 TS_OR_DS_TYPES = ( 1224 exp.TsOrDsToDatetime, 1225 exp.TsOrDsToTimestamp, 1226 exp.TsOrDsToTime, 1227 exp.TsOrDsToDate, 1228 ) 1229 1230 TRANSFORMS = { 1231 **generator.Generator.TRANSFORMS, 1232 exp.ApproxTopK: rename_func("APPROX_TOP_COUNT"), 1233 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1234 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 1235 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 1236 exp.Array: inline_array_unless_query, 1237 exp.ArrayContains: _array_contains_sql, 1238 exp.ArrayFilter: filter_array_using_unnest, 1239 exp.ArrayRemove: filter_array_using_unnest, 1240 exp.ByteLength: rename_func("BYTE_LENGTH"), 1241 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 1242 exp.CollateProperty: lambda self, e: ( 1243 f"DEFAULT COLLATE {self.sql(e, 'this')}" 1244 if e.args.get("default") 1245 else f"COLLATE {self.sql(e, 'this')}" 1246 ), 1247 exp.Commit: lambda *_: "COMMIT TRANSACTION", 1248 exp.CountIf: rename_func("COUNTIF"), 1249 exp.Create: _create_sql, 1250 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 1251 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 1252 exp.DateDiff: lambda self, e: self.func( 1253 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 1254 ), 1255 exp.DateFromParts: rename_func("DATE"), 1256 exp.DateStrToDate: datestrtodate_sql, 1257 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 1258 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 1259 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 1260 exp.DateFromUnixDate: rename_func("DATE_FROM_UNIX_DATE"), 1261 exp.FromTimeZone: lambda self, e: self.func( 1262 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 1263 ), 1264 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 1265 exp.GroupConcat: lambda self, e: groupconcat_sql( 1266 self, e, func_name="STRING_AGG", within_group=False 1267 ), 1268 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 1269 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 1270 exp.If: if_sql(false_value="NULL"), 1271 exp.ILike: no_ilike_sql, 1272 exp.IntDiv: rename_func("DIV"), 1273 exp.Int64: rename_func("INT64"), 1274 exp.JSONBool: rename_func("BOOL"), 1275 exp.JSONExtract: _json_extract_sql, 1276 exp.JSONExtractArray: _json_extract_sql, 1277 exp.JSONExtractScalar: _json_extract_sql, 1278 exp.JSONFormat: rename_func("TO_JSON_STRING"), 1279 exp.JSONKeysAtDepth: rename_func("JSON_KEYS"), 1280 exp.JSONValueArray: rename_func("JSON_VALUE_ARRAY"), 1281 exp.Levenshtein: _levenshtein_sql, 1282 exp.Max: max_or_greatest, 1283 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 1284 exp.MD5Digest: rename_func("MD5"), 1285 exp.Min: min_or_least, 1286 exp.Normalize: lambda self, e: self.func( 1287 "NORMALIZE_AND_CASEFOLD" if e.args.get("is_casefold") else "NORMALIZE", 1288 e.this, 1289 e.args.get("form"), 1290 ), 1291 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1292 exp.RegexpExtract: lambda self, e: self.func( 1293 "REGEXP_EXTRACT", 1294 e.this, 1295 e.expression, 1296 e.args.get("position"), 1297 e.args.get("occurrence"), 1298 ), 1299 exp.RegexpExtractAll: lambda self, e: self.func( 1300 "REGEXP_EXTRACT_ALL", e.this, e.expression 1301 ), 1302 exp.RegexpReplace: regexp_replace_sql, 1303 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 1304 exp.ReturnsProperty: _returnsproperty_sql, 1305 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 1306 exp.ParseTime: lambda self, e: self.func("PARSE_TIME", self.format_time(e), e.this), 1307 exp.ParseDatetime: lambda self, e: self.func( 1308 "PARSE_DATETIME", self.format_time(e), e.this 1309 ), 1310 exp.Select: transforms.preprocess( 1311 [ 1312 transforms.explode_projection_to_unnest(), 1313 transforms.unqualify_unnest, 1314 transforms.eliminate_distinct_on, 1315 _alias_ordered_group, 1316 transforms.eliminate_semi_and_anti_joins, 1317 ] 1318 ), 1319 exp.SHA: rename_func("SHA1"), 1320 exp.SHA2: sha256_sql, 1321 exp.StabilityProperty: lambda self, e: ( 1322 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 1323 ), 1324 exp.String: rename_func("STRING"), 1325 exp.StrPosition: lambda self, e: ( 1326 strposition_sql( 1327 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 1328 ) 1329 ), 1330 exp.StrToDate: _str_to_datetime_sql, 1331 exp.StrToTime: _str_to_datetime_sql, 1332 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 1333 exp.TimeFromParts: rename_func("TIME"), 1334 exp.TimestampFromParts: rename_func("DATETIME"), 1335 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 1336 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 1337 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 1338 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 1339 exp.TimeStrToTime: timestrtotime_sql, 1340 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 1341 exp.TsOrDsAdd: _ts_or_ds_add_sql, 1342 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 1343 exp.TsOrDsToTime: rename_func("TIME"), 1344 exp.TsOrDsToDatetime: rename_func("DATETIME"), 1345 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 1346 exp.Unhex: rename_func("FROM_HEX"), 1347 exp.UnixDate: rename_func("UNIX_DATE"), 1348 exp.UnixToTime: _unix_to_time_sql, 1349 exp.Uuid: lambda *_: "GENERATE_UUID()", 1350 exp.Values: _derived_table_values_to_unnest, 1351 exp.VariancePop: rename_func("VAR_POP"), 1352 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 1353 } 1354 1355 SUPPORTED_JSON_PATH_PARTS = { 1356 exp.JSONPathKey, 1357 exp.JSONPathRoot, 1358 exp.JSONPathSubscript, 1359 } 1360 1361 TYPE_MAPPING = { 1362 **generator.Generator.TYPE_MAPPING, 1363 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 1364 exp.DataType.Type.BIGINT: "INT64", 1365 exp.DataType.Type.BINARY: "BYTES", 1366 exp.DataType.Type.BLOB: "BYTES", 1367 exp.DataType.Type.BOOLEAN: "BOOL", 1368 exp.DataType.Type.CHAR: "STRING", 1369 exp.DataType.Type.DECIMAL: "NUMERIC", 1370 exp.DataType.Type.DOUBLE: "FLOAT64", 1371 exp.DataType.Type.FLOAT: "FLOAT64", 1372 exp.DataType.Type.INT: "INT64", 1373 exp.DataType.Type.NCHAR: "STRING", 1374 exp.DataType.Type.NVARCHAR: "STRING", 1375 exp.DataType.Type.SMALLINT: "INT64", 1376 exp.DataType.Type.TEXT: "STRING", 1377 exp.DataType.Type.TIMESTAMP: "DATETIME", 1378 exp.DataType.Type.TIMESTAMPNTZ: "DATETIME", 1379 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1380 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1381 exp.DataType.Type.TINYINT: "INT64", 1382 exp.DataType.Type.ROWVERSION: "BYTES", 1383 exp.DataType.Type.UUID: "STRING", 1384 exp.DataType.Type.VARBINARY: "BYTES", 1385 exp.DataType.Type.VARCHAR: "STRING", 1386 exp.DataType.Type.VARIANT: "ANY TYPE", 1387 } 1388 1389 PROPERTIES_LOCATION = { 1390 **generator.Generator.PROPERTIES_LOCATION, 1391 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1392 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1393 } 1394 1395 # WINDOW comes after QUALIFY 1396 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1397 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1398 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1399 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1400 } 1401 1402 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1403 RESERVED_KEYWORDS = { 1404 "all", 1405 "and", 1406 "any", 1407 "array", 1408 "as", 1409 "asc", 1410 "assert_rows_modified", 1411 "at", 1412 "between", 1413 "by", 1414 "case", 1415 "cast", 1416 "collate", 1417 "contains", 1418 "create", 1419 "cross", 1420 "cube", 1421 "current", 1422 "default", 1423 "define", 1424 "desc", 1425 "distinct", 1426 "else", 1427 "end", 1428 "enum", 1429 "escape", 1430 "except", 1431 "exclude", 1432 "exists", 1433 "extract", 1434 "false", 1435 "fetch", 1436 "following", 1437 "for", 1438 "from", 1439 "full", 1440 "group", 1441 "grouping", 1442 "groups", 1443 "hash", 1444 "having", 1445 "if", 1446 "ignore", 1447 "in", 1448 "inner", 1449 "intersect", 1450 "interval", 1451 "into", 1452 "is", 1453 "join", 1454 "lateral", 1455 "left", 1456 "like", 1457 "limit", 1458 "lookup", 1459 "merge", 1460 "natural", 1461 "new", 1462 "no", 1463 "not", 1464 "null", 1465 "nulls", 1466 "of", 1467 "on", 1468 "or", 1469 "order", 1470 "outer", 1471 "over", 1472 "partition", 1473 "preceding", 1474 "proto", 1475 "qualify", 1476 "range", 1477 "recursive", 1478 "respect", 1479 "right", 1480 "rollup", 1481 "rows", 1482 "select", 1483 "set", 1484 "some", 1485 "struct", 1486 "tablesample", 1487 "then", 1488 "to", 1489 "treat", 1490 "true", 1491 "unbounded", 1492 "union", 1493 "unnest", 1494 "using", 1495 "when", 1496 "where", 1497 "window", 1498 "with", 1499 "within", 1500 } 1501 1502 def datetrunc_sql(self, expression: exp.DateTrunc) -> str: 1503 unit = expression.unit 1504 unit_sql = unit.name if unit.is_string else self.sql(unit) 1505 return self.func("DATE_TRUNC", expression.this, unit_sql, expression.args.get("zone")) 1506 1507 def mod_sql(self, expression: exp.Mod) -> str: 1508 this = expression.this 1509 expr = expression.expression 1510 return self.func( 1511 "MOD", 1512 this.unnest() if isinstance(this, exp.Paren) else this, 1513 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1514 ) 1515 1516 def column_parts(self, expression: exp.Column) -> str: 1517 if expression.meta.get("quoted_column"): 1518 # If a column reference is of the form `dataset.table`.name, we need 1519 # to preserve the quoted table path, otherwise the reference breaks 1520 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1521 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1522 return f"{table_path}.{self.sql(expression, 'this')}" 1523 1524 return super().column_parts(expression) 1525 1526 def table_parts(self, expression: exp.Table) -> str: 1527 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1528 # we need to make sure the correct quoting is used in each case. 1529 # 1530 # For example, if there is a CTE x that clashes with a schema name, then the former will 1531 # return the table y in that schema, whereas the latter will return the CTE's y column: 1532 # 1533 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1534 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1535 if expression.meta.get("quoted_table"): 1536 table_parts = ".".join(p.name for p in expression.parts) 1537 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1538 1539 return super().table_parts(expression) 1540 1541 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1542 this = expression.this 1543 if isinstance(this, exp.TsOrDsToDatetime): 1544 func_name = "FORMAT_DATETIME" 1545 elif isinstance(this, exp.TsOrDsToTimestamp): 1546 func_name = "FORMAT_TIMESTAMP" 1547 elif isinstance(this, exp.TsOrDsToTime): 1548 func_name = "FORMAT_TIME" 1549 else: 1550 func_name = "FORMAT_DATE" 1551 1552 time_expr = this if isinstance(this, self.TS_OR_DS_TYPES) else expression 1553 return self.func( 1554 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1555 ) 1556 1557 def eq_sql(self, expression: exp.EQ) -> str: 1558 # Operands of = cannot be NULL in BigQuery 1559 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1560 if not isinstance(expression.parent, exp.Update): 1561 return "NULL" 1562 1563 return self.binary(expression, "=") 1564 1565 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1566 parent = expression.parent 1567 1568 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1569 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1570 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1571 return self.func( 1572 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1573 ) 1574 1575 return super().attimezone_sql(expression) 1576 1577 def trycast_sql(self, expression: exp.TryCast) -> str: 1578 return self.cast_sql(expression, safe_prefix="SAFE_") 1579 1580 def bracket_sql(self, expression: exp.Bracket) -> str: 1581 this = expression.this 1582 expressions = expression.expressions 1583 1584 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1585 arg = expressions[0] 1586 if arg.type is None: 1587 from sqlglot.optimizer.annotate_types import annotate_types 1588 1589 arg = annotate_types(arg, dialect=self.dialect) 1590 1591 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1592 # BQ doesn't support bracket syntax with string values for structs 1593 return f"{self.sql(this)}.{arg.name}" 1594 1595 expressions_sql = self.expressions(expression, flat=True) 1596 offset = expression.args.get("offset") 1597 1598 if offset == 0: 1599 expressions_sql = f"OFFSET({expressions_sql})" 1600 elif offset == 1: 1601 expressions_sql = f"ORDINAL({expressions_sql})" 1602 elif offset is not None: 1603 self.unsupported(f"Unsupported array offset: {offset}") 1604 1605 if expression.args.get("safe"): 1606 expressions_sql = f"SAFE_{expressions_sql}" 1607 1608 return f"{self.sql(this)}[{expressions_sql}]" 1609 1610 def in_unnest_op(self, expression: exp.Unnest) -> str: 1611 return self.sql(expression) 1612 1613 def version_sql(self, expression: exp.Version) -> str: 1614 if expression.name == "TIMESTAMP": 1615 expression.set("this", "SYSTEM_TIME") 1616 return super().version_sql(expression) 1617 1618 def contains_sql(self, expression: exp.Contains) -> str: 1619 this = expression.this 1620 expr = expression.expression 1621 1622 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1623 this = this.this 1624 expr = expr.this 1625 1626 return self.func("CONTAINS_SUBSTR", this, expr, expression.args.get("json_scope")) 1627 1628 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1629 this = expression.this 1630 1631 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1632 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1633 # because they aren't literals and so the above syntax is invalid BigQuery. 1634 if isinstance(this, exp.Array): 1635 elem = seq_get(this.expressions, 0) 1636 if not (elem and elem.find(exp.Query)): 1637 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1638 1639 return super().cast_sql(expression, safe_prefix=safe_prefix) 1640 1641 def declareitem_sql(self, expression: exp.DeclareItem) -> str: 1642 variables = self.expressions(expression, "this") 1643 default = self.sql(expression, "default") 1644 default = f" DEFAULT {default}" if default else "" 1645 kind = self.sql(expression, "kind") 1646 kind = f" {kind}" if kind else "" 1647 1648 return f"{variables}{kind}{default}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1516 def column_parts(self, expression: exp.Column) -> str: 1517 if expression.meta.get("quoted_column"): 1518 # If a column reference is of the form `dataset.table`.name, we need 1519 # to preserve the quoted table path, otherwise the reference breaks 1520 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1521 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1522 return f"{table_path}.{self.sql(expression, 'this')}" 1523 1524 return super().column_parts(expression)
1526 def table_parts(self, expression: exp.Table) -> str: 1527 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1528 # we need to make sure the correct quoting is used in each case. 1529 # 1530 # For example, if there is a CTE x that clashes with a schema name, then the former will 1531 # return the table y in that schema, whereas the latter will return the CTE's y column: 1532 # 1533 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1534 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1535 if expression.meta.get("quoted_table"): 1536 table_parts = ".".join(p.name for p in expression.parts) 1537 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1538 1539 return super().table_parts(expression)
1541 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1542 this = expression.this 1543 if isinstance(this, exp.TsOrDsToDatetime): 1544 func_name = "FORMAT_DATETIME" 1545 elif isinstance(this, exp.TsOrDsToTimestamp): 1546 func_name = "FORMAT_TIMESTAMP" 1547 elif isinstance(this, exp.TsOrDsToTime): 1548 func_name = "FORMAT_TIME" 1549 else: 1550 func_name = "FORMAT_DATE" 1551 1552 time_expr = this if isinstance(this, self.TS_OR_DS_TYPES) else expression 1553 return self.func( 1554 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1555 )
1557 def eq_sql(self, expression: exp.EQ) -> str: 1558 # Operands of = cannot be NULL in BigQuery 1559 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1560 if not isinstance(expression.parent, exp.Update): 1561 return "NULL" 1562 1563 return self.binary(expression, "=")
1565 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1566 parent = expression.parent 1567 1568 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1569 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1570 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1571 return self.func( 1572 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1573 ) 1574 1575 return super().attimezone_sql(expression)
1580 def bracket_sql(self, expression: exp.Bracket) -> str: 1581 this = expression.this 1582 expressions = expression.expressions 1583 1584 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1585 arg = expressions[0] 1586 if arg.type is None: 1587 from sqlglot.optimizer.annotate_types import annotate_types 1588 1589 arg = annotate_types(arg, dialect=self.dialect) 1590 1591 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1592 # BQ doesn't support bracket syntax with string values for structs 1593 return f"{self.sql(this)}.{arg.name}" 1594 1595 expressions_sql = self.expressions(expression, flat=True) 1596 offset = expression.args.get("offset") 1597 1598 if offset == 0: 1599 expressions_sql = f"OFFSET({expressions_sql})" 1600 elif offset == 1: 1601 expressions_sql = f"ORDINAL({expressions_sql})" 1602 elif offset is not None: 1603 self.unsupported(f"Unsupported array offset: {offset}") 1604 1605 if expression.args.get("safe"): 1606 expressions_sql = f"SAFE_{expressions_sql}" 1607 1608 return f"{self.sql(this)}[{expressions_sql}]"
1618 def contains_sql(self, expression: exp.Contains) -> str: 1619 this = expression.this 1620 expr = expression.expression 1621 1622 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1623 this = this.this 1624 expr = expr.this 1625 1626 return self.func("CONTAINS_SUBSTR", this, expr, expression.args.get("json_scope"))
1628 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1629 this = expression.this 1630 1631 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1632 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1633 # because they aren't literals and so the above syntax is invalid BigQuery. 1634 if isinstance(this, exp.Array): 1635 elem = seq_get(this.expressions, 0) 1636 if not (elem and elem.find(exp.Query)): 1637 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1638 1639 return super().cast_sql(expression, safe_prefix=safe_prefix)
1641 def declareitem_sql(self, expression: exp.DeclareItem) -> str: 1642 variables = self.expressions(expression, "this") 1643 default = self.sql(expression, "default") 1644 default = f" DEFAULT {default}" if default else "" 1645 kind = self.sql(expression, "kind") 1646 kind = f" {kind}" if kind else "" 1647 1648 return f"{variables}{kind}{default}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- SUPPORTS_WINDOW_EXCLUDE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- ALTER_SET_WRAPPED
- NORMALIZE_EXTRACT_DATE_PARTS
- PARSE_JSON_NAME
- ARRAY_SIZE_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- SUPPORTS_BETWEEN_FLAGS
- SUPPORTS_LIKE_QUANTIFIERS
- UNSUPPORTED_TYPES
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- queryband_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- formatphrase_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- is_sql
- like_sql
- ilike_sql
- similarto_sql
- lt_sql
- lte_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- generateembedding_sql
- featuresattime_sql
- vectorsearch_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- revoke_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- get_put_sql
- translatecharacters_sql
- decodecase_sql
- semanticview_sql
- getextract_sql
- datefromunixdate_sql
- space_sql
- buildproperty_sql
- refreshtriggerproperty_sql