sqlglot.dialects.clickhouse
1from __future__ import annotations 2 3import typing as t 4import datetime 5 6from sqlglot import exp, generator, parser, tokens 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 NormalizationStrategy, 10 arg_max_or_min_no_count, 11 build_date_delta, 12 build_formatted_time, 13 inline_array_sql, 14 json_extract_segments, 15 json_path_key_only_name, 16 no_pivot_sql, 17 build_json_extract_path, 18 rename_func, 19 sha256_sql, 20 var_map_sql, 21 timestamptrunc_sql, 22 unit_to_var, 23 trim_sql, 24) 25from sqlglot.generator import Generator 26from sqlglot.helper import is_int, seq_get 27from sqlglot.tokens import Token, TokenType 28 29DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd] 30 31 32def _build_date_format(args: t.List) -> exp.TimeToStr: 33 expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args) 34 35 timezone = seq_get(args, 2) 36 if timezone: 37 expr.set("zone", timezone) 38 39 return expr 40 41 42def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str: 43 scale = expression.args.get("scale") 44 timestamp = expression.this 45 46 if scale in (None, exp.UnixToTime.SECONDS): 47 return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 48 if scale == exp.UnixToTime.MILLIS: 49 return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 50 if scale == exp.UnixToTime.MICROS: 51 return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 52 if scale == exp.UnixToTime.NANOS: 53 return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 54 55 return self.func( 56 "fromUnixTimestamp", 57 exp.cast( 58 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 59 ), 60 ) 61 62 63def _lower_func(sql: str) -> str: 64 index = sql.index("(") 65 return sql[:index].lower() + sql[index:] 66 67 68def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str: 69 quantile = expression.args["quantile"] 70 args = f"({self.sql(expression, 'this')})" 71 72 if isinstance(quantile, exp.Array): 73 func = self.func("quantiles", *quantile) 74 else: 75 func = self.func("quantile", quantile) 76 77 return func + args 78 79 80def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: 81 if len(args) == 1: 82 return exp.CountIf(this=seq_get(args, 0)) 83 84 return exp.CombinedAggFunc(this="countIf", expressions=args, parts=("count", "If")) 85 86 87def _build_str_to_date(args: t.List) -> exp.Cast | exp.Anonymous: 88 if len(args) == 3: 89 return exp.Anonymous(this="STR_TO_DATE", expressions=args) 90 91 strtodate = exp.StrToDate.from_arg_list(args) 92 return exp.cast(strtodate, exp.DataType.build(exp.DataType.Type.DATETIME)) 93 94 95def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]: 96 def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str: 97 if not expression.unit: 98 return rename_func(name)(self, expression) 99 100 return self.func( 101 name, 102 unit_to_var(expression), 103 expression.expression, 104 expression.this, 105 ) 106 107 return _delta_sql 108 109 110def _timestrtotime_sql(self: ClickHouse.Generator, expression: exp.TimeStrToTime): 111 tz = expression.args.get("zone") 112 datatype = exp.DataType.build(exp.DataType.Type.TIMESTAMP) 113 ts = expression.this 114 if tz: 115 # build a datatype that encodes the timezone as a type parameter, eg DateTime('America/Los_Angeles') 116 datatype = exp.DataType.build( 117 exp.DataType.Type.TIMESTAMPTZ, # Type.TIMESTAMPTZ maps to DateTime 118 expressions=[exp.DataTypeParam(this=tz)], 119 ) 120 121 if isinstance(ts, exp.Literal): 122 # strip the timezone out of the literal, eg turn '2020-01-01 12:13:14-08:00' into '2020-01-01 12:13:14' 123 # this is because Clickhouse encodes the timezone as a data type parameter and throws an error if it's part of the timestamp string 124 ts_without_tz = ( 125 datetime.datetime.fromisoformat(ts.name).replace(tzinfo=None).isoformat(sep=" ") 126 ) 127 ts = exp.Literal.string(ts_without_tz) 128 129 return self.sql(exp.cast(ts, datatype, dialect=self.dialect)) 130 131 132class ClickHouse(Dialect): 133 NORMALIZE_FUNCTIONS: bool | str = False 134 NULL_ORDERING = "nulls_are_last" 135 SUPPORTS_USER_DEFINED_TYPES = False 136 SAFE_DIVISION = True 137 LOG_BASE_FIRST: t.Optional[bool] = None 138 FORCE_EARLY_ALIAS_REF_EXPANSION = True 139 140 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 141 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 142 143 UNESCAPED_SEQUENCES = { 144 "\\0": "\0", 145 } 146 147 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 148 149 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 150 exp.Except: False, 151 exp.Intersect: False, 152 exp.Union: None, 153 } 154 155 class Tokenizer(tokens.Tokenizer): 156 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 157 IDENTIFIERS = ['"', "`"] 158 STRING_ESCAPES = ["'", "\\"] 159 BIT_STRINGS = [("0b", "")] 160 HEX_STRINGS = [("0x", ""), ("0X", "")] 161 HEREDOC_STRINGS = ["$"] 162 163 KEYWORDS = { 164 **tokens.Tokenizer.KEYWORDS, 165 "ATTACH": TokenType.COMMAND, 166 "DATE32": TokenType.DATE32, 167 "DATETIME64": TokenType.DATETIME64, 168 "DICTIONARY": TokenType.DICTIONARY, 169 "ENUM8": TokenType.ENUM8, 170 "ENUM16": TokenType.ENUM16, 171 "FINAL": TokenType.FINAL, 172 "FIXEDSTRING": TokenType.FIXEDSTRING, 173 "FLOAT32": TokenType.FLOAT, 174 "FLOAT64": TokenType.DOUBLE, 175 "GLOBAL": TokenType.GLOBAL, 176 "INT256": TokenType.INT256, 177 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 178 "MAP": TokenType.MAP, 179 "NESTED": TokenType.NESTED, 180 "SAMPLE": TokenType.TABLE_SAMPLE, 181 "TUPLE": TokenType.STRUCT, 182 "UINT128": TokenType.UINT128, 183 "UINT16": TokenType.USMALLINT, 184 "UINT256": TokenType.UINT256, 185 "UINT32": TokenType.UINT, 186 "UINT64": TokenType.UBIGINT, 187 "UINT8": TokenType.UTINYINT, 188 "IPV4": TokenType.IPV4, 189 "IPV6": TokenType.IPV6, 190 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 191 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 192 "SYSTEM": TokenType.COMMAND, 193 "PREWHERE": TokenType.PREWHERE, 194 } 195 KEYWORDS.pop("/*+") 196 197 SINGLE_TOKENS = { 198 **tokens.Tokenizer.SINGLE_TOKENS, 199 "$": TokenType.HEREDOC_STRING, 200 } 201 202 class Parser(parser.Parser): 203 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 204 # * select x from t1 union all select x from t2 limit 1; 205 # * select x from t1 union all (select x from t2 limit 1); 206 MODIFIERS_ATTACHED_TO_SET_OP = False 207 INTERVAL_SPANS = False 208 209 FUNCTIONS = { 210 **parser.Parser.FUNCTIONS, 211 "ANY": exp.AnyValue.from_arg_list, 212 "ARRAYSUM": exp.ArraySum.from_arg_list, 213 "COUNTIF": _build_count_if, 214 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 215 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 216 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 217 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 218 "DATE_FORMAT": _build_date_format, 219 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 220 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 221 "FORMATDATETIME": _build_date_format, 222 "JSONEXTRACTSTRING": build_json_extract_path( 223 exp.JSONExtractScalar, zero_based_indexing=False 224 ), 225 "MAP": parser.build_var_map, 226 "MATCH": exp.RegexpLike.from_arg_list, 227 "RANDCANONICAL": exp.Rand.from_arg_list, 228 "STR_TO_DATE": _build_str_to_date, 229 "TUPLE": exp.Struct.from_arg_list, 230 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 231 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 232 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 233 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 234 "UNIQ": exp.ApproxDistinct.from_arg_list, 235 "XOR": lambda args: exp.Xor(expressions=args), 236 "MD5": exp.MD5Digest.from_arg_list, 237 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 238 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 239 } 240 241 AGG_FUNCTIONS = { 242 "count", 243 "min", 244 "max", 245 "sum", 246 "avg", 247 "any", 248 "stddevPop", 249 "stddevSamp", 250 "varPop", 251 "varSamp", 252 "corr", 253 "covarPop", 254 "covarSamp", 255 "entropy", 256 "exponentialMovingAverage", 257 "intervalLengthSum", 258 "kolmogorovSmirnovTest", 259 "mannWhitneyUTest", 260 "median", 261 "rankCorr", 262 "sumKahan", 263 "studentTTest", 264 "welchTTest", 265 "anyHeavy", 266 "anyLast", 267 "boundingRatio", 268 "first_value", 269 "last_value", 270 "argMin", 271 "argMax", 272 "avgWeighted", 273 "topK", 274 "topKWeighted", 275 "deltaSum", 276 "deltaSumTimestamp", 277 "groupArray", 278 "groupArrayLast", 279 "groupUniqArray", 280 "groupArrayInsertAt", 281 "groupArrayMovingAvg", 282 "groupArrayMovingSum", 283 "groupArraySample", 284 "groupBitAnd", 285 "groupBitOr", 286 "groupBitXor", 287 "groupBitmap", 288 "groupBitmapAnd", 289 "groupBitmapOr", 290 "groupBitmapXor", 291 "sumWithOverflow", 292 "sumMap", 293 "minMap", 294 "maxMap", 295 "skewSamp", 296 "skewPop", 297 "kurtSamp", 298 "kurtPop", 299 "uniq", 300 "uniqExact", 301 "uniqCombined", 302 "uniqCombined64", 303 "uniqHLL12", 304 "uniqTheta", 305 "quantile", 306 "quantiles", 307 "quantileExact", 308 "quantilesExact", 309 "quantileExactLow", 310 "quantilesExactLow", 311 "quantileExactHigh", 312 "quantilesExactHigh", 313 "quantileExactWeighted", 314 "quantilesExactWeighted", 315 "quantileTiming", 316 "quantilesTiming", 317 "quantileTimingWeighted", 318 "quantilesTimingWeighted", 319 "quantileDeterministic", 320 "quantilesDeterministic", 321 "quantileTDigest", 322 "quantilesTDigest", 323 "quantileTDigestWeighted", 324 "quantilesTDigestWeighted", 325 "quantileBFloat16", 326 "quantilesBFloat16", 327 "quantileBFloat16Weighted", 328 "quantilesBFloat16Weighted", 329 "simpleLinearRegression", 330 "stochasticLinearRegression", 331 "stochasticLogisticRegression", 332 "categoricalInformationValue", 333 "contingency", 334 "cramersV", 335 "cramersVBiasCorrected", 336 "theilsU", 337 "maxIntersections", 338 "maxIntersectionsPosition", 339 "meanZTest", 340 "quantileInterpolatedWeighted", 341 "quantilesInterpolatedWeighted", 342 "quantileGK", 343 "quantilesGK", 344 "sparkBar", 345 "sumCount", 346 "largestTriangleThreeBuckets", 347 "histogram", 348 "sequenceMatch", 349 "sequenceCount", 350 "windowFunnel", 351 "retention", 352 "uniqUpTo", 353 "sequenceNextNode", 354 "exponentialTimeDecayedAvg", 355 } 356 357 AGG_FUNCTIONS_SUFFIXES = [ 358 "If", 359 "Array", 360 "ArrayIf", 361 "Map", 362 "SimpleState", 363 "State", 364 "Merge", 365 "MergeState", 366 "ForEach", 367 "Distinct", 368 "OrDefault", 369 "OrNull", 370 "Resample", 371 "ArgMin", 372 "ArgMax", 373 ] 374 375 FUNC_TOKENS = { 376 *parser.Parser.FUNC_TOKENS, 377 TokenType.SET, 378 } 379 380 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 381 382 ID_VAR_TOKENS = { 383 *parser.Parser.ID_VAR_TOKENS, 384 TokenType.LIKE, 385 } 386 387 AGG_FUNC_MAPPING = ( 388 lambda functions, suffixes: { 389 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 390 } 391 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 392 393 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 394 395 FUNCTION_PARSERS = { 396 **parser.Parser.FUNCTION_PARSERS, 397 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 398 "QUANTILE": lambda self: self._parse_quantile(), 399 "COLUMNS": lambda self: self._parse_columns(), 400 } 401 402 FUNCTION_PARSERS.pop("MATCH") 403 404 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 405 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 406 407 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 408 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 409 410 RANGE_PARSERS = { 411 **parser.Parser.RANGE_PARSERS, 412 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 413 and self._parse_in(this, is_global=True), 414 } 415 416 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 417 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 418 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 419 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 420 421 JOIN_KINDS = { 422 *parser.Parser.JOIN_KINDS, 423 TokenType.ANY, 424 TokenType.ASOF, 425 TokenType.ARRAY, 426 } 427 428 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 429 TokenType.ANY, 430 TokenType.ARRAY, 431 TokenType.FINAL, 432 TokenType.FORMAT, 433 TokenType.SETTINGS, 434 } 435 436 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 437 TokenType.FORMAT, 438 } 439 440 LOG_DEFAULTS_TO_LN = True 441 442 QUERY_MODIFIER_PARSERS = { 443 **parser.Parser.QUERY_MODIFIER_PARSERS, 444 TokenType.SETTINGS: lambda self: ( 445 "settings", 446 self._advance() or self._parse_csv(self._parse_assignment), 447 ), 448 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 449 } 450 451 CONSTRAINT_PARSERS = { 452 **parser.Parser.CONSTRAINT_PARSERS, 453 "INDEX": lambda self: self._parse_index_constraint(), 454 "CODEC": lambda self: self._parse_compress(), 455 } 456 457 ALTER_PARSERS = { 458 **parser.Parser.ALTER_PARSERS, 459 "REPLACE": lambda self: self._parse_alter_table_replace(), 460 } 461 462 SCHEMA_UNNAMED_CONSTRAINTS = { 463 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 464 "INDEX", 465 } 466 467 PLACEHOLDER_PARSERS = { 468 **parser.Parser.PLACEHOLDER_PARSERS, 469 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 470 } 471 472 def _parse_types( 473 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 474 ) -> t.Optional[exp.Expression]: 475 dtype = super()._parse_types( 476 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 477 ) 478 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 479 # Mark every type as non-nullable which is ClickHouse's default, unless it's 480 # already marked as nullable. This marker helps us transpile types from other 481 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 482 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 483 # fail in ClickHouse without the `Nullable` type constructor. 484 dtype.set("nullable", False) 485 486 return dtype 487 488 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 489 index = self._index 490 this = self._parse_bitwise() 491 if self._match(TokenType.FROM): 492 self._retreat(index) 493 return super()._parse_extract() 494 495 # We return Anonymous here because extract and regexpExtract have different semantics, 496 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 497 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 498 # 499 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 500 self._match(TokenType.COMMA) 501 return self.expression( 502 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 503 ) 504 505 def _parse_assignment(self) -> t.Optional[exp.Expression]: 506 this = super()._parse_assignment() 507 508 if self._match(TokenType.PLACEHOLDER): 509 return self.expression( 510 exp.If, 511 this=this, 512 true=self._parse_assignment(), 513 false=self._match(TokenType.COLON) and self._parse_assignment(), 514 ) 515 516 return this 517 518 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 519 """ 520 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 521 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 522 """ 523 this = self._parse_id_var() 524 self._match(TokenType.COLON) 525 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 526 self._match_text_seq("IDENTIFIER") and "Identifier" 527 ) 528 529 if not kind: 530 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 531 elif not self._match(TokenType.R_BRACE): 532 self.raise_error("Expecting }") 533 534 return self.expression(exp.Placeholder, this=this, kind=kind) 535 536 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 537 this = super()._parse_in(this) 538 this.set("is_global", is_global) 539 return this 540 541 def _parse_table( 542 self, 543 schema: bool = False, 544 joins: bool = False, 545 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 546 parse_bracket: bool = False, 547 is_db_reference: bool = False, 548 parse_partition: bool = False, 549 ) -> t.Optional[exp.Expression]: 550 this = super()._parse_table( 551 schema=schema, 552 joins=joins, 553 alias_tokens=alias_tokens, 554 parse_bracket=parse_bracket, 555 is_db_reference=is_db_reference, 556 ) 557 558 if self._match(TokenType.FINAL): 559 this = self.expression(exp.Final, this=this) 560 561 return this 562 563 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 564 return super()._parse_position(haystack_first=True) 565 566 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 567 def _parse_cte(self) -> exp.CTE: 568 # WITH <identifier> AS <subquery expression> 569 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 570 571 if not cte: 572 # WITH <expression> AS <identifier> 573 cte = self.expression( 574 exp.CTE, 575 this=self._parse_assignment(), 576 alias=self._parse_table_alias(), 577 scalar=True, 578 ) 579 580 return cte 581 582 def _parse_join_parts( 583 self, 584 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 585 is_global = self._match(TokenType.GLOBAL) and self._prev 586 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 587 588 if kind_pre: 589 kind = self._match_set(self.JOIN_KINDS) and self._prev 590 side = self._match_set(self.JOIN_SIDES) and self._prev 591 return is_global, side, kind 592 593 return ( 594 is_global, 595 self._match_set(self.JOIN_SIDES) and self._prev, 596 self._match_set(self.JOIN_KINDS) and self._prev, 597 ) 598 599 def _parse_join( 600 self, skip_join_token: bool = False, parse_bracket: bool = False 601 ) -> t.Optional[exp.Join]: 602 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 603 if join: 604 join.set("global", join.args.pop("method", None)) 605 606 return join 607 608 def _parse_function( 609 self, 610 functions: t.Optional[t.Dict[str, t.Callable]] = None, 611 anonymous: bool = False, 612 optional_parens: bool = True, 613 any_token: bool = False, 614 ) -> t.Optional[exp.Expression]: 615 expr = super()._parse_function( 616 functions=functions, 617 anonymous=anonymous, 618 optional_parens=optional_parens, 619 any_token=any_token, 620 ) 621 622 func = expr.this if isinstance(expr, exp.Window) else expr 623 624 # Aggregate functions can be split in 2 parts: <func_name><suffix> 625 parts = ( 626 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 627 ) 628 629 if parts: 630 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 631 params = self._parse_func_params(anon_func) 632 633 kwargs = { 634 "this": anon_func.this, 635 "expressions": anon_func.expressions, 636 } 637 if parts[1]: 638 kwargs["parts"] = parts 639 exp_class: t.Type[exp.Expression] = ( 640 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 641 ) 642 else: 643 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 644 645 kwargs["exp_class"] = exp_class 646 if params: 647 kwargs["params"] = params 648 649 func = self.expression(**kwargs) 650 651 if isinstance(expr, exp.Window): 652 # The window's func was parsed as Anonymous in base parser, fix its 653 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 654 expr.set("this", func) 655 elif params: 656 # Params have blocked super()._parse_function() from parsing the following window 657 # (if that exists) as they're standing between the function call and the window spec 658 expr = self._parse_window(func) 659 else: 660 expr = func 661 662 return expr 663 664 def _parse_func_params( 665 self, this: t.Optional[exp.Func] = None 666 ) -> t.Optional[t.List[exp.Expression]]: 667 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 668 return self._parse_csv(self._parse_lambda) 669 670 if self._match(TokenType.L_PAREN): 671 params = self._parse_csv(self._parse_lambda) 672 self._match_r_paren(this) 673 return params 674 675 return None 676 677 def _parse_quantile(self) -> exp.Quantile: 678 this = self._parse_lambda() 679 params = self._parse_func_params() 680 if params: 681 return self.expression(exp.Quantile, this=params[0], quantile=this) 682 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 683 684 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 685 return super()._parse_wrapped_id_vars(optional=True) 686 687 def _parse_primary_key( 688 self, wrapped_optional: bool = False, in_props: bool = False 689 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 690 return super()._parse_primary_key( 691 wrapped_optional=wrapped_optional or in_props, in_props=in_props 692 ) 693 694 def _parse_on_property(self) -> t.Optional[exp.Expression]: 695 index = self._index 696 if self._match_text_seq("CLUSTER"): 697 this = self._parse_id_var() 698 if this: 699 return self.expression(exp.OnCluster, this=this) 700 else: 701 self._retreat(index) 702 return None 703 704 def _parse_index_constraint( 705 self, kind: t.Optional[str] = None 706 ) -> exp.IndexColumnConstraint: 707 # INDEX name1 expr TYPE type1(args) GRANULARITY value 708 this = self._parse_id_var() 709 expression = self._parse_assignment() 710 711 index_type = self._match_text_seq("TYPE") and ( 712 self._parse_function() or self._parse_var() 713 ) 714 715 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 716 717 return self.expression( 718 exp.IndexColumnConstraint, 719 this=this, 720 expression=expression, 721 index_type=index_type, 722 granularity=granularity, 723 ) 724 725 def _parse_partition(self) -> t.Optional[exp.Partition]: 726 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 727 if not self._match(TokenType.PARTITION): 728 return None 729 730 if self._match_text_seq("ID"): 731 # Corresponds to the PARTITION ID <string_value> syntax 732 expressions: t.List[exp.Expression] = [ 733 self.expression(exp.PartitionId, this=self._parse_string()) 734 ] 735 else: 736 expressions = self._parse_expressions() 737 738 return self.expression(exp.Partition, expressions=expressions) 739 740 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 741 partition = self._parse_partition() 742 743 if not partition or not self._match(TokenType.FROM): 744 return None 745 746 return self.expression( 747 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 748 ) 749 750 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 751 if not self._match_text_seq("PROJECTION"): 752 return None 753 754 return self.expression( 755 exp.ProjectionDef, 756 this=self._parse_id_var(), 757 expression=self._parse_wrapped(self._parse_statement), 758 ) 759 760 def _parse_constraint(self) -> t.Optional[exp.Expression]: 761 return super()._parse_constraint() or self._parse_projection_def() 762 763 def _parse_alias( 764 self, this: t.Optional[exp.Expression], explicit: bool = False 765 ) -> t.Optional[exp.Expression]: 766 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 767 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 768 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 769 return this 770 771 return super()._parse_alias(this=this, explicit=explicit) 772 773 def _parse_expression(self) -> t.Optional[exp.Expression]: 774 this = super()._parse_expression() 775 776 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 777 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 778 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 779 self._match(TokenType.R_PAREN) 780 781 return this 782 783 def _parse_columns(self) -> exp.Expression: 784 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 785 786 while self._next and self._match_text_seq(")", "APPLY", "("): 787 self._match(TokenType.R_PAREN) 788 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 789 return this 790 791 class Generator(generator.Generator): 792 QUERY_HINTS = False 793 STRUCT_DELIMITER = ("(", ")") 794 NVL2_SUPPORTED = False 795 TABLESAMPLE_REQUIRES_PARENS = False 796 TABLESAMPLE_SIZE_IS_ROWS = False 797 TABLESAMPLE_KEYWORDS = "SAMPLE" 798 LAST_DAY_SUPPORTS_DATE_PART = False 799 CAN_IMPLEMENT_ARRAY_ANY = True 800 SUPPORTS_TO_NUMBER = False 801 JOIN_HINTS = False 802 TABLE_HINTS = False 803 GROUPINGS_SEP = "" 804 SET_OP_MODIFIERS = False 805 SUPPORTS_TABLE_ALIAS_COLUMNS = False 806 VALUES_AS_TABLE = False 807 808 STRING_TYPE_MAPPING = { 809 exp.DataType.Type.CHAR: "String", 810 exp.DataType.Type.LONGBLOB: "String", 811 exp.DataType.Type.LONGTEXT: "String", 812 exp.DataType.Type.MEDIUMBLOB: "String", 813 exp.DataType.Type.MEDIUMTEXT: "String", 814 exp.DataType.Type.TINYBLOB: "String", 815 exp.DataType.Type.TINYTEXT: "String", 816 exp.DataType.Type.TEXT: "String", 817 exp.DataType.Type.VARBINARY: "String", 818 exp.DataType.Type.VARCHAR: "String", 819 } 820 821 SUPPORTED_JSON_PATH_PARTS = { 822 exp.JSONPathKey, 823 exp.JSONPathRoot, 824 exp.JSONPathSubscript, 825 } 826 827 TYPE_MAPPING = { 828 **generator.Generator.TYPE_MAPPING, 829 **STRING_TYPE_MAPPING, 830 exp.DataType.Type.ARRAY: "Array", 831 exp.DataType.Type.BOOLEAN: "Bool", 832 exp.DataType.Type.BIGINT: "Int64", 833 exp.DataType.Type.DATE32: "Date32", 834 exp.DataType.Type.DATETIME: "DateTime", 835 exp.DataType.Type.DATETIME64: "DateTime64", 836 exp.DataType.Type.TIMESTAMP: "DateTime", 837 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 838 exp.DataType.Type.DOUBLE: "Float64", 839 exp.DataType.Type.ENUM: "Enum", 840 exp.DataType.Type.ENUM8: "Enum8", 841 exp.DataType.Type.ENUM16: "Enum16", 842 exp.DataType.Type.FIXEDSTRING: "FixedString", 843 exp.DataType.Type.FLOAT: "Float32", 844 exp.DataType.Type.INT: "Int32", 845 exp.DataType.Type.MEDIUMINT: "Int32", 846 exp.DataType.Type.INT128: "Int128", 847 exp.DataType.Type.INT256: "Int256", 848 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 849 exp.DataType.Type.MAP: "Map", 850 exp.DataType.Type.NESTED: "Nested", 851 exp.DataType.Type.SMALLINT: "Int16", 852 exp.DataType.Type.STRUCT: "Tuple", 853 exp.DataType.Type.TINYINT: "Int8", 854 exp.DataType.Type.UBIGINT: "UInt64", 855 exp.DataType.Type.UINT: "UInt32", 856 exp.DataType.Type.UINT128: "UInt128", 857 exp.DataType.Type.UINT256: "UInt256", 858 exp.DataType.Type.USMALLINT: "UInt16", 859 exp.DataType.Type.UTINYINT: "UInt8", 860 exp.DataType.Type.IPV4: "IPv4", 861 exp.DataType.Type.IPV6: "IPv6", 862 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 863 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 864 } 865 866 TRANSFORMS = { 867 **generator.Generator.TRANSFORMS, 868 exp.AnyValue: rename_func("any"), 869 exp.ApproxDistinct: rename_func("uniq"), 870 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 871 exp.ArraySize: rename_func("LENGTH"), 872 exp.ArraySum: rename_func("arraySum"), 873 exp.ArgMax: arg_max_or_min_no_count("argMax"), 874 exp.ArgMin: arg_max_or_min_no_count("argMin"), 875 exp.Array: inline_array_sql, 876 exp.CastToStrType: rename_func("CAST"), 877 exp.CountIf: rename_func("countIf"), 878 exp.CompressColumnConstraint: lambda self, 879 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 880 exp.ComputedColumnConstraint: lambda self, 881 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 882 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 883 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 884 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 885 exp.DateStrToDate: rename_func("toDate"), 886 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 887 exp.Explode: rename_func("arrayJoin"), 888 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 889 exp.IsNan: rename_func("isNaN"), 890 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 891 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 892 exp.JSONPathKey: json_path_key_only_name, 893 exp.JSONPathRoot: lambda *_: "", 894 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 895 exp.Nullif: rename_func("nullIf"), 896 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 897 exp.Pivot: no_pivot_sql, 898 exp.Quantile: _quantile_sql, 899 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 900 exp.Rand: rename_func("randCanonical"), 901 exp.StartsWith: rename_func("startsWith"), 902 exp.StrPosition: lambda self, e: self.func( 903 "position", e.this, e.args.get("substr"), e.args.get("position") 904 ), 905 exp.TimeToStr: lambda self, e: self.func( 906 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 907 ), 908 exp.TimeStrToTime: _timestrtotime_sql, 909 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 910 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 911 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 912 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 913 exp.MD5Digest: rename_func("MD5"), 914 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 915 exp.SHA: rename_func("SHA1"), 916 exp.SHA2: sha256_sql, 917 exp.UnixToTime: _unix_to_time_sql, 918 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 919 exp.Trim: trim_sql, 920 exp.Variance: rename_func("varSamp"), 921 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 922 exp.Stddev: rename_func("stddevSamp"), 923 exp.Chr: rename_func("CHAR"), 924 exp.Lag: lambda self, e: self.func( 925 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 926 ), 927 exp.Lead: lambda self, e: self.func( 928 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 929 ), 930 } 931 932 PROPERTIES_LOCATION = { 933 **generator.Generator.PROPERTIES_LOCATION, 934 exp.OnCluster: exp.Properties.Location.POST_NAME, 935 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 936 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 937 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 938 } 939 940 # There's no list in docs, but it can be found in Clickhouse code 941 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 942 ON_CLUSTER_TARGETS = { 943 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 944 "DATABASE", 945 "TABLE", 946 "VIEW", 947 "DICTIONARY", 948 "INDEX", 949 "FUNCTION", 950 "NAMED COLLECTION", 951 } 952 953 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 954 NON_NULLABLE_TYPES = { 955 exp.DataType.Type.ARRAY, 956 exp.DataType.Type.MAP, 957 exp.DataType.Type.STRUCT, 958 } 959 960 def strtodate_sql(self, expression: exp.StrToDate) -> str: 961 strtodate_sql = self.function_fallback_sql(expression) 962 963 if not isinstance(expression.parent, exp.Cast): 964 # StrToDate returns DATEs in other dialects (eg. postgres), so 965 # this branch aims to improve the transpilation to clickhouse 966 return f"CAST({strtodate_sql} AS DATE)" 967 968 return strtodate_sql 969 970 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 971 this = expression.this 972 973 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 974 return self.sql(this) 975 976 return super().cast_sql(expression, safe_prefix=safe_prefix) 977 978 def trycast_sql(self, expression: exp.TryCast) -> str: 979 dtype = expression.to 980 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 981 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 982 dtype.set("nullable", True) 983 984 return super().cast_sql(expression) 985 986 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 987 this = self.json_path_part(expression.this) 988 return str(int(this) + 1) if is_int(this) else this 989 990 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 991 return f"AS {self.sql(expression, 'this')}" 992 993 def _any_to_has( 994 self, 995 expression: exp.EQ | exp.NEQ, 996 default: t.Callable[[t.Any], str], 997 prefix: str = "", 998 ) -> str: 999 if isinstance(expression.left, exp.Any): 1000 arr = expression.left 1001 this = expression.right 1002 elif isinstance(expression.right, exp.Any): 1003 arr = expression.right 1004 this = expression.left 1005 else: 1006 return default(expression) 1007 1008 return prefix + self.func("has", arr.this.unnest(), this) 1009 1010 def eq_sql(self, expression: exp.EQ) -> str: 1011 return self._any_to_has(expression, super().eq_sql) 1012 1013 def neq_sql(self, expression: exp.NEQ) -> str: 1014 return self._any_to_has(expression, super().neq_sql, "NOT ") 1015 1016 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1017 # Manually add a flag to make the search case-insensitive 1018 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1019 return self.func("match", expression.this, regex) 1020 1021 def datatype_sql(self, expression: exp.DataType) -> str: 1022 # String is the standard ClickHouse type, every other variant is just an alias. 1023 # Additionally, any supplied length parameter will be ignored. 1024 # 1025 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1026 if expression.this in self.STRING_TYPE_MAPPING: 1027 dtype = "String" 1028 else: 1029 dtype = super().datatype_sql(expression) 1030 1031 # This section changes the type to `Nullable(...)` if the following conditions hold: 1032 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1033 # and change their semantics 1034 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1035 # constraint: "Type of Map key must be a type, that can be represented by integer or 1036 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1037 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1038 parent = expression.parent 1039 nullable = expression.args.get("nullable") 1040 if nullable is True or ( 1041 nullable is None 1042 and not ( 1043 isinstance(parent, exp.DataType) 1044 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1045 and expression.index in (None, 0) 1046 ) 1047 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1048 ): 1049 dtype = f"Nullable({dtype})" 1050 1051 return dtype 1052 1053 def cte_sql(self, expression: exp.CTE) -> str: 1054 if expression.args.get("scalar"): 1055 this = self.sql(expression, "this") 1056 alias = self.sql(expression, "alias") 1057 return f"{this} AS {alias}" 1058 1059 return super().cte_sql(expression) 1060 1061 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1062 return super().after_limit_modifiers(expression) + [ 1063 ( 1064 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1065 if expression.args.get("settings") 1066 else "" 1067 ), 1068 ( 1069 self.seg("FORMAT ") + self.sql(expression, "format") 1070 if expression.args.get("format") 1071 else "" 1072 ), 1073 ] 1074 1075 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1076 params = self.expressions(expression, key="params", flat=True) 1077 return self.func(expression.name, *expression.expressions) + f"({params})" 1078 1079 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1080 return self.func(expression.name, *expression.expressions) 1081 1082 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1083 return self.anonymousaggfunc_sql(expression) 1084 1085 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1086 return self.parameterizedagg_sql(expression) 1087 1088 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1089 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1090 1091 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1092 return f"ON CLUSTER {self.sql(expression, 'this')}" 1093 1094 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1095 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1096 exp.Properties.Location.POST_NAME 1097 ): 1098 this_name = self.sql( 1099 expression.this if isinstance(expression.this, exp.Schema) else expression, 1100 "this", 1101 ) 1102 this_properties = " ".join( 1103 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1104 ) 1105 this_schema = self.schema_columns_sql(expression.this) 1106 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1107 1108 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1109 1110 return super().createable_sql(expression, locations) 1111 1112 def create_sql(self, expression: exp.Create) -> str: 1113 # The comment property comes last in CTAS statements, i.e. after the query 1114 query = expression.expression 1115 if isinstance(query, exp.Query): 1116 comment_prop = expression.find(exp.SchemaCommentProperty) 1117 if comment_prop: 1118 comment_prop.pop() 1119 query.replace(exp.paren(query)) 1120 else: 1121 comment_prop = None 1122 1123 create_sql = super().create_sql(expression) 1124 1125 comment_sql = self.sql(comment_prop) 1126 comment_sql = f" {comment_sql}" if comment_sql else "" 1127 1128 return f"{create_sql}{comment_sql}" 1129 1130 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1131 this = self.indent(self.sql(expression, "this")) 1132 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1133 1134 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1135 this = self.sql(expression, "this") 1136 this = f" {this}" if this else "" 1137 expr = self.sql(expression, "expression") 1138 expr = f" {expr}" if expr else "" 1139 index_type = self.sql(expression, "index_type") 1140 index_type = f" TYPE {index_type}" if index_type else "" 1141 granularity = self.sql(expression, "granularity") 1142 granularity = f" GRANULARITY {granularity}" if granularity else "" 1143 1144 return f"INDEX{this}{expr}{index_type}{granularity}" 1145 1146 def partition_sql(self, expression: exp.Partition) -> str: 1147 return f"PARTITION {self.expressions(expression, flat=True)}" 1148 1149 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1150 return f"ID {self.sql(expression.this)}" 1151 1152 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1153 return ( 1154 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1155 ) 1156 1157 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1158 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
133class ClickHouse(Dialect): 134 NORMALIZE_FUNCTIONS: bool | str = False 135 NULL_ORDERING = "nulls_are_last" 136 SUPPORTS_USER_DEFINED_TYPES = False 137 SAFE_DIVISION = True 138 LOG_BASE_FIRST: t.Optional[bool] = None 139 FORCE_EARLY_ALIAS_REF_EXPANSION = True 140 141 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 142 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 143 144 UNESCAPED_SEQUENCES = { 145 "\\0": "\0", 146 } 147 148 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 149 150 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 151 exp.Except: False, 152 exp.Intersect: False, 153 exp.Union: None, 154 } 155 156 class Tokenizer(tokens.Tokenizer): 157 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 158 IDENTIFIERS = ['"', "`"] 159 STRING_ESCAPES = ["'", "\\"] 160 BIT_STRINGS = [("0b", "")] 161 HEX_STRINGS = [("0x", ""), ("0X", "")] 162 HEREDOC_STRINGS = ["$"] 163 164 KEYWORDS = { 165 **tokens.Tokenizer.KEYWORDS, 166 "ATTACH": TokenType.COMMAND, 167 "DATE32": TokenType.DATE32, 168 "DATETIME64": TokenType.DATETIME64, 169 "DICTIONARY": TokenType.DICTIONARY, 170 "ENUM8": TokenType.ENUM8, 171 "ENUM16": TokenType.ENUM16, 172 "FINAL": TokenType.FINAL, 173 "FIXEDSTRING": TokenType.FIXEDSTRING, 174 "FLOAT32": TokenType.FLOAT, 175 "FLOAT64": TokenType.DOUBLE, 176 "GLOBAL": TokenType.GLOBAL, 177 "INT256": TokenType.INT256, 178 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 179 "MAP": TokenType.MAP, 180 "NESTED": TokenType.NESTED, 181 "SAMPLE": TokenType.TABLE_SAMPLE, 182 "TUPLE": TokenType.STRUCT, 183 "UINT128": TokenType.UINT128, 184 "UINT16": TokenType.USMALLINT, 185 "UINT256": TokenType.UINT256, 186 "UINT32": TokenType.UINT, 187 "UINT64": TokenType.UBIGINT, 188 "UINT8": TokenType.UTINYINT, 189 "IPV4": TokenType.IPV4, 190 "IPV6": TokenType.IPV6, 191 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 192 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 193 "SYSTEM": TokenType.COMMAND, 194 "PREWHERE": TokenType.PREWHERE, 195 } 196 KEYWORDS.pop("/*+") 197 198 SINGLE_TOKENS = { 199 **tokens.Tokenizer.SINGLE_TOKENS, 200 "$": TokenType.HEREDOC_STRING, 201 } 202 203 class Parser(parser.Parser): 204 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 205 # * select x from t1 union all select x from t2 limit 1; 206 # * select x from t1 union all (select x from t2 limit 1); 207 MODIFIERS_ATTACHED_TO_SET_OP = False 208 INTERVAL_SPANS = False 209 210 FUNCTIONS = { 211 **parser.Parser.FUNCTIONS, 212 "ANY": exp.AnyValue.from_arg_list, 213 "ARRAYSUM": exp.ArraySum.from_arg_list, 214 "COUNTIF": _build_count_if, 215 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 216 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 217 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 218 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 219 "DATE_FORMAT": _build_date_format, 220 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 221 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 222 "FORMATDATETIME": _build_date_format, 223 "JSONEXTRACTSTRING": build_json_extract_path( 224 exp.JSONExtractScalar, zero_based_indexing=False 225 ), 226 "MAP": parser.build_var_map, 227 "MATCH": exp.RegexpLike.from_arg_list, 228 "RANDCANONICAL": exp.Rand.from_arg_list, 229 "STR_TO_DATE": _build_str_to_date, 230 "TUPLE": exp.Struct.from_arg_list, 231 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 232 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 233 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 234 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 235 "UNIQ": exp.ApproxDistinct.from_arg_list, 236 "XOR": lambda args: exp.Xor(expressions=args), 237 "MD5": exp.MD5Digest.from_arg_list, 238 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 239 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 240 } 241 242 AGG_FUNCTIONS = { 243 "count", 244 "min", 245 "max", 246 "sum", 247 "avg", 248 "any", 249 "stddevPop", 250 "stddevSamp", 251 "varPop", 252 "varSamp", 253 "corr", 254 "covarPop", 255 "covarSamp", 256 "entropy", 257 "exponentialMovingAverage", 258 "intervalLengthSum", 259 "kolmogorovSmirnovTest", 260 "mannWhitneyUTest", 261 "median", 262 "rankCorr", 263 "sumKahan", 264 "studentTTest", 265 "welchTTest", 266 "anyHeavy", 267 "anyLast", 268 "boundingRatio", 269 "first_value", 270 "last_value", 271 "argMin", 272 "argMax", 273 "avgWeighted", 274 "topK", 275 "topKWeighted", 276 "deltaSum", 277 "deltaSumTimestamp", 278 "groupArray", 279 "groupArrayLast", 280 "groupUniqArray", 281 "groupArrayInsertAt", 282 "groupArrayMovingAvg", 283 "groupArrayMovingSum", 284 "groupArraySample", 285 "groupBitAnd", 286 "groupBitOr", 287 "groupBitXor", 288 "groupBitmap", 289 "groupBitmapAnd", 290 "groupBitmapOr", 291 "groupBitmapXor", 292 "sumWithOverflow", 293 "sumMap", 294 "minMap", 295 "maxMap", 296 "skewSamp", 297 "skewPop", 298 "kurtSamp", 299 "kurtPop", 300 "uniq", 301 "uniqExact", 302 "uniqCombined", 303 "uniqCombined64", 304 "uniqHLL12", 305 "uniqTheta", 306 "quantile", 307 "quantiles", 308 "quantileExact", 309 "quantilesExact", 310 "quantileExactLow", 311 "quantilesExactLow", 312 "quantileExactHigh", 313 "quantilesExactHigh", 314 "quantileExactWeighted", 315 "quantilesExactWeighted", 316 "quantileTiming", 317 "quantilesTiming", 318 "quantileTimingWeighted", 319 "quantilesTimingWeighted", 320 "quantileDeterministic", 321 "quantilesDeterministic", 322 "quantileTDigest", 323 "quantilesTDigest", 324 "quantileTDigestWeighted", 325 "quantilesTDigestWeighted", 326 "quantileBFloat16", 327 "quantilesBFloat16", 328 "quantileBFloat16Weighted", 329 "quantilesBFloat16Weighted", 330 "simpleLinearRegression", 331 "stochasticLinearRegression", 332 "stochasticLogisticRegression", 333 "categoricalInformationValue", 334 "contingency", 335 "cramersV", 336 "cramersVBiasCorrected", 337 "theilsU", 338 "maxIntersections", 339 "maxIntersectionsPosition", 340 "meanZTest", 341 "quantileInterpolatedWeighted", 342 "quantilesInterpolatedWeighted", 343 "quantileGK", 344 "quantilesGK", 345 "sparkBar", 346 "sumCount", 347 "largestTriangleThreeBuckets", 348 "histogram", 349 "sequenceMatch", 350 "sequenceCount", 351 "windowFunnel", 352 "retention", 353 "uniqUpTo", 354 "sequenceNextNode", 355 "exponentialTimeDecayedAvg", 356 } 357 358 AGG_FUNCTIONS_SUFFIXES = [ 359 "If", 360 "Array", 361 "ArrayIf", 362 "Map", 363 "SimpleState", 364 "State", 365 "Merge", 366 "MergeState", 367 "ForEach", 368 "Distinct", 369 "OrDefault", 370 "OrNull", 371 "Resample", 372 "ArgMin", 373 "ArgMax", 374 ] 375 376 FUNC_TOKENS = { 377 *parser.Parser.FUNC_TOKENS, 378 TokenType.SET, 379 } 380 381 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 382 383 ID_VAR_TOKENS = { 384 *parser.Parser.ID_VAR_TOKENS, 385 TokenType.LIKE, 386 } 387 388 AGG_FUNC_MAPPING = ( 389 lambda functions, suffixes: { 390 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 391 } 392 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 393 394 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 395 396 FUNCTION_PARSERS = { 397 **parser.Parser.FUNCTION_PARSERS, 398 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 399 "QUANTILE": lambda self: self._parse_quantile(), 400 "COLUMNS": lambda self: self._parse_columns(), 401 } 402 403 FUNCTION_PARSERS.pop("MATCH") 404 405 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 406 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 407 408 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 409 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 410 411 RANGE_PARSERS = { 412 **parser.Parser.RANGE_PARSERS, 413 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 414 and self._parse_in(this, is_global=True), 415 } 416 417 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 418 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 419 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 420 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 421 422 JOIN_KINDS = { 423 *parser.Parser.JOIN_KINDS, 424 TokenType.ANY, 425 TokenType.ASOF, 426 TokenType.ARRAY, 427 } 428 429 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 430 TokenType.ANY, 431 TokenType.ARRAY, 432 TokenType.FINAL, 433 TokenType.FORMAT, 434 TokenType.SETTINGS, 435 } 436 437 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 438 TokenType.FORMAT, 439 } 440 441 LOG_DEFAULTS_TO_LN = True 442 443 QUERY_MODIFIER_PARSERS = { 444 **parser.Parser.QUERY_MODIFIER_PARSERS, 445 TokenType.SETTINGS: lambda self: ( 446 "settings", 447 self._advance() or self._parse_csv(self._parse_assignment), 448 ), 449 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 450 } 451 452 CONSTRAINT_PARSERS = { 453 **parser.Parser.CONSTRAINT_PARSERS, 454 "INDEX": lambda self: self._parse_index_constraint(), 455 "CODEC": lambda self: self._parse_compress(), 456 } 457 458 ALTER_PARSERS = { 459 **parser.Parser.ALTER_PARSERS, 460 "REPLACE": lambda self: self._parse_alter_table_replace(), 461 } 462 463 SCHEMA_UNNAMED_CONSTRAINTS = { 464 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 465 "INDEX", 466 } 467 468 PLACEHOLDER_PARSERS = { 469 **parser.Parser.PLACEHOLDER_PARSERS, 470 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 471 } 472 473 def _parse_types( 474 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 475 ) -> t.Optional[exp.Expression]: 476 dtype = super()._parse_types( 477 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 478 ) 479 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 480 # Mark every type as non-nullable which is ClickHouse's default, unless it's 481 # already marked as nullable. This marker helps us transpile types from other 482 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 483 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 484 # fail in ClickHouse without the `Nullable` type constructor. 485 dtype.set("nullable", False) 486 487 return dtype 488 489 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 490 index = self._index 491 this = self._parse_bitwise() 492 if self._match(TokenType.FROM): 493 self._retreat(index) 494 return super()._parse_extract() 495 496 # We return Anonymous here because extract and regexpExtract have different semantics, 497 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 498 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 499 # 500 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 501 self._match(TokenType.COMMA) 502 return self.expression( 503 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 504 ) 505 506 def _parse_assignment(self) -> t.Optional[exp.Expression]: 507 this = super()._parse_assignment() 508 509 if self._match(TokenType.PLACEHOLDER): 510 return self.expression( 511 exp.If, 512 this=this, 513 true=self._parse_assignment(), 514 false=self._match(TokenType.COLON) and self._parse_assignment(), 515 ) 516 517 return this 518 519 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 520 """ 521 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 522 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 523 """ 524 this = self._parse_id_var() 525 self._match(TokenType.COLON) 526 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 527 self._match_text_seq("IDENTIFIER") and "Identifier" 528 ) 529 530 if not kind: 531 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 532 elif not self._match(TokenType.R_BRACE): 533 self.raise_error("Expecting }") 534 535 return self.expression(exp.Placeholder, this=this, kind=kind) 536 537 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 538 this = super()._parse_in(this) 539 this.set("is_global", is_global) 540 return this 541 542 def _parse_table( 543 self, 544 schema: bool = False, 545 joins: bool = False, 546 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 547 parse_bracket: bool = False, 548 is_db_reference: bool = False, 549 parse_partition: bool = False, 550 ) -> t.Optional[exp.Expression]: 551 this = super()._parse_table( 552 schema=schema, 553 joins=joins, 554 alias_tokens=alias_tokens, 555 parse_bracket=parse_bracket, 556 is_db_reference=is_db_reference, 557 ) 558 559 if self._match(TokenType.FINAL): 560 this = self.expression(exp.Final, this=this) 561 562 return this 563 564 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 565 return super()._parse_position(haystack_first=True) 566 567 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 568 def _parse_cte(self) -> exp.CTE: 569 # WITH <identifier> AS <subquery expression> 570 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 571 572 if not cte: 573 # WITH <expression> AS <identifier> 574 cte = self.expression( 575 exp.CTE, 576 this=self._parse_assignment(), 577 alias=self._parse_table_alias(), 578 scalar=True, 579 ) 580 581 return cte 582 583 def _parse_join_parts( 584 self, 585 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 586 is_global = self._match(TokenType.GLOBAL) and self._prev 587 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 588 589 if kind_pre: 590 kind = self._match_set(self.JOIN_KINDS) and self._prev 591 side = self._match_set(self.JOIN_SIDES) and self._prev 592 return is_global, side, kind 593 594 return ( 595 is_global, 596 self._match_set(self.JOIN_SIDES) and self._prev, 597 self._match_set(self.JOIN_KINDS) and self._prev, 598 ) 599 600 def _parse_join( 601 self, skip_join_token: bool = False, parse_bracket: bool = False 602 ) -> t.Optional[exp.Join]: 603 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 604 if join: 605 join.set("global", join.args.pop("method", None)) 606 607 return join 608 609 def _parse_function( 610 self, 611 functions: t.Optional[t.Dict[str, t.Callable]] = None, 612 anonymous: bool = False, 613 optional_parens: bool = True, 614 any_token: bool = False, 615 ) -> t.Optional[exp.Expression]: 616 expr = super()._parse_function( 617 functions=functions, 618 anonymous=anonymous, 619 optional_parens=optional_parens, 620 any_token=any_token, 621 ) 622 623 func = expr.this if isinstance(expr, exp.Window) else expr 624 625 # Aggregate functions can be split in 2 parts: <func_name><suffix> 626 parts = ( 627 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 628 ) 629 630 if parts: 631 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 632 params = self._parse_func_params(anon_func) 633 634 kwargs = { 635 "this": anon_func.this, 636 "expressions": anon_func.expressions, 637 } 638 if parts[1]: 639 kwargs["parts"] = parts 640 exp_class: t.Type[exp.Expression] = ( 641 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 642 ) 643 else: 644 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 645 646 kwargs["exp_class"] = exp_class 647 if params: 648 kwargs["params"] = params 649 650 func = self.expression(**kwargs) 651 652 if isinstance(expr, exp.Window): 653 # The window's func was parsed as Anonymous in base parser, fix its 654 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 655 expr.set("this", func) 656 elif params: 657 # Params have blocked super()._parse_function() from parsing the following window 658 # (if that exists) as they're standing between the function call and the window spec 659 expr = self._parse_window(func) 660 else: 661 expr = func 662 663 return expr 664 665 def _parse_func_params( 666 self, this: t.Optional[exp.Func] = None 667 ) -> t.Optional[t.List[exp.Expression]]: 668 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 669 return self._parse_csv(self._parse_lambda) 670 671 if self._match(TokenType.L_PAREN): 672 params = self._parse_csv(self._parse_lambda) 673 self._match_r_paren(this) 674 return params 675 676 return None 677 678 def _parse_quantile(self) -> exp.Quantile: 679 this = self._parse_lambda() 680 params = self._parse_func_params() 681 if params: 682 return self.expression(exp.Quantile, this=params[0], quantile=this) 683 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 684 685 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 686 return super()._parse_wrapped_id_vars(optional=True) 687 688 def _parse_primary_key( 689 self, wrapped_optional: bool = False, in_props: bool = False 690 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 691 return super()._parse_primary_key( 692 wrapped_optional=wrapped_optional or in_props, in_props=in_props 693 ) 694 695 def _parse_on_property(self) -> t.Optional[exp.Expression]: 696 index = self._index 697 if self._match_text_seq("CLUSTER"): 698 this = self._parse_id_var() 699 if this: 700 return self.expression(exp.OnCluster, this=this) 701 else: 702 self._retreat(index) 703 return None 704 705 def _parse_index_constraint( 706 self, kind: t.Optional[str] = None 707 ) -> exp.IndexColumnConstraint: 708 # INDEX name1 expr TYPE type1(args) GRANULARITY value 709 this = self._parse_id_var() 710 expression = self._parse_assignment() 711 712 index_type = self._match_text_seq("TYPE") and ( 713 self._parse_function() or self._parse_var() 714 ) 715 716 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 717 718 return self.expression( 719 exp.IndexColumnConstraint, 720 this=this, 721 expression=expression, 722 index_type=index_type, 723 granularity=granularity, 724 ) 725 726 def _parse_partition(self) -> t.Optional[exp.Partition]: 727 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 728 if not self._match(TokenType.PARTITION): 729 return None 730 731 if self._match_text_seq("ID"): 732 # Corresponds to the PARTITION ID <string_value> syntax 733 expressions: t.List[exp.Expression] = [ 734 self.expression(exp.PartitionId, this=self._parse_string()) 735 ] 736 else: 737 expressions = self._parse_expressions() 738 739 return self.expression(exp.Partition, expressions=expressions) 740 741 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 742 partition = self._parse_partition() 743 744 if not partition or not self._match(TokenType.FROM): 745 return None 746 747 return self.expression( 748 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 749 ) 750 751 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 752 if not self._match_text_seq("PROJECTION"): 753 return None 754 755 return self.expression( 756 exp.ProjectionDef, 757 this=self._parse_id_var(), 758 expression=self._parse_wrapped(self._parse_statement), 759 ) 760 761 def _parse_constraint(self) -> t.Optional[exp.Expression]: 762 return super()._parse_constraint() or self._parse_projection_def() 763 764 def _parse_alias( 765 self, this: t.Optional[exp.Expression], explicit: bool = False 766 ) -> t.Optional[exp.Expression]: 767 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 768 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 769 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 770 return this 771 772 return super()._parse_alias(this=this, explicit=explicit) 773 774 def _parse_expression(self) -> t.Optional[exp.Expression]: 775 this = super()._parse_expression() 776 777 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 778 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 779 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 780 self._match(TokenType.R_PAREN) 781 782 return this 783 784 def _parse_columns(self) -> exp.Expression: 785 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 786 787 while self._next and self._match_text_seq(")", "APPLY", "("): 788 self._match(TokenType.R_PAREN) 789 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 790 return this 791 792 class Generator(generator.Generator): 793 QUERY_HINTS = False 794 STRUCT_DELIMITER = ("(", ")") 795 NVL2_SUPPORTED = False 796 TABLESAMPLE_REQUIRES_PARENS = False 797 TABLESAMPLE_SIZE_IS_ROWS = False 798 TABLESAMPLE_KEYWORDS = "SAMPLE" 799 LAST_DAY_SUPPORTS_DATE_PART = False 800 CAN_IMPLEMENT_ARRAY_ANY = True 801 SUPPORTS_TO_NUMBER = False 802 JOIN_HINTS = False 803 TABLE_HINTS = False 804 GROUPINGS_SEP = "" 805 SET_OP_MODIFIERS = False 806 SUPPORTS_TABLE_ALIAS_COLUMNS = False 807 VALUES_AS_TABLE = False 808 809 STRING_TYPE_MAPPING = { 810 exp.DataType.Type.CHAR: "String", 811 exp.DataType.Type.LONGBLOB: "String", 812 exp.DataType.Type.LONGTEXT: "String", 813 exp.DataType.Type.MEDIUMBLOB: "String", 814 exp.DataType.Type.MEDIUMTEXT: "String", 815 exp.DataType.Type.TINYBLOB: "String", 816 exp.DataType.Type.TINYTEXT: "String", 817 exp.DataType.Type.TEXT: "String", 818 exp.DataType.Type.VARBINARY: "String", 819 exp.DataType.Type.VARCHAR: "String", 820 } 821 822 SUPPORTED_JSON_PATH_PARTS = { 823 exp.JSONPathKey, 824 exp.JSONPathRoot, 825 exp.JSONPathSubscript, 826 } 827 828 TYPE_MAPPING = { 829 **generator.Generator.TYPE_MAPPING, 830 **STRING_TYPE_MAPPING, 831 exp.DataType.Type.ARRAY: "Array", 832 exp.DataType.Type.BOOLEAN: "Bool", 833 exp.DataType.Type.BIGINT: "Int64", 834 exp.DataType.Type.DATE32: "Date32", 835 exp.DataType.Type.DATETIME: "DateTime", 836 exp.DataType.Type.DATETIME64: "DateTime64", 837 exp.DataType.Type.TIMESTAMP: "DateTime", 838 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 839 exp.DataType.Type.DOUBLE: "Float64", 840 exp.DataType.Type.ENUM: "Enum", 841 exp.DataType.Type.ENUM8: "Enum8", 842 exp.DataType.Type.ENUM16: "Enum16", 843 exp.DataType.Type.FIXEDSTRING: "FixedString", 844 exp.DataType.Type.FLOAT: "Float32", 845 exp.DataType.Type.INT: "Int32", 846 exp.DataType.Type.MEDIUMINT: "Int32", 847 exp.DataType.Type.INT128: "Int128", 848 exp.DataType.Type.INT256: "Int256", 849 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 850 exp.DataType.Type.MAP: "Map", 851 exp.DataType.Type.NESTED: "Nested", 852 exp.DataType.Type.SMALLINT: "Int16", 853 exp.DataType.Type.STRUCT: "Tuple", 854 exp.DataType.Type.TINYINT: "Int8", 855 exp.DataType.Type.UBIGINT: "UInt64", 856 exp.DataType.Type.UINT: "UInt32", 857 exp.DataType.Type.UINT128: "UInt128", 858 exp.DataType.Type.UINT256: "UInt256", 859 exp.DataType.Type.USMALLINT: "UInt16", 860 exp.DataType.Type.UTINYINT: "UInt8", 861 exp.DataType.Type.IPV4: "IPv4", 862 exp.DataType.Type.IPV6: "IPv6", 863 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 864 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 865 } 866 867 TRANSFORMS = { 868 **generator.Generator.TRANSFORMS, 869 exp.AnyValue: rename_func("any"), 870 exp.ApproxDistinct: rename_func("uniq"), 871 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 872 exp.ArraySize: rename_func("LENGTH"), 873 exp.ArraySum: rename_func("arraySum"), 874 exp.ArgMax: arg_max_or_min_no_count("argMax"), 875 exp.ArgMin: arg_max_or_min_no_count("argMin"), 876 exp.Array: inline_array_sql, 877 exp.CastToStrType: rename_func("CAST"), 878 exp.CountIf: rename_func("countIf"), 879 exp.CompressColumnConstraint: lambda self, 880 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 881 exp.ComputedColumnConstraint: lambda self, 882 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 883 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 884 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 885 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 886 exp.DateStrToDate: rename_func("toDate"), 887 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 888 exp.Explode: rename_func("arrayJoin"), 889 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 890 exp.IsNan: rename_func("isNaN"), 891 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 892 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 893 exp.JSONPathKey: json_path_key_only_name, 894 exp.JSONPathRoot: lambda *_: "", 895 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 896 exp.Nullif: rename_func("nullIf"), 897 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 898 exp.Pivot: no_pivot_sql, 899 exp.Quantile: _quantile_sql, 900 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 901 exp.Rand: rename_func("randCanonical"), 902 exp.StartsWith: rename_func("startsWith"), 903 exp.StrPosition: lambda self, e: self.func( 904 "position", e.this, e.args.get("substr"), e.args.get("position") 905 ), 906 exp.TimeToStr: lambda self, e: self.func( 907 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 908 ), 909 exp.TimeStrToTime: _timestrtotime_sql, 910 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 911 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 912 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 913 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 914 exp.MD5Digest: rename_func("MD5"), 915 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 916 exp.SHA: rename_func("SHA1"), 917 exp.SHA2: sha256_sql, 918 exp.UnixToTime: _unix_to_time_sql, 919 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 920 exp.Trim: trim_sql, 921 exp.Variance: rename_func("varSamp"), 922 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 923 exp.Stddev: rename_func("stddevSamp"), 924 exp.Chr: rename_func("CHAR"), 925 exp.Lag: lambda self, e: self.func( 926 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 927 ), 928 exp.Lead: lambda self, e: self.func( 929 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 930 ), 931 } 932 933 PROPERTIES_LOCATION = { 934 **generator.Generator.PROPERTIES_LOCATION, 935 exp.OnCluster: exp.Properties.Location.POST_NAME, 936 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 937 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 938 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 939 } 940 941 # There's no list in docs, but it can be found in Clickhouse code 942 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 943 ON_CLUSTER_TARGETS = { 944 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 945 "DATABASE", 946 "TABLE", 947 "VIEW", 948 "DICTIONARY", 949 "INDEX", 950 "FUNCTION", 951 "NAMED COLLECTION", 952 } 953 954 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 955 NON_NULLABLE_TYPES = { 956 exp.DataType.Type.ARRAY, 957 exp.DataType.Type.MAP, 958 exp.DataType.Type.STRUCT, 959 } 960 961 def strtodate_sql(self, expression: exp.StrToDate) -> str: 962 strtodate_sql = self.function_fallback_sql(expression) 963 964 if not isinstance(expression.parent, exp.Cast): 965 # StrToDate returns DATEs in other dialects (eg. postgres), so 966 # this branch aims to improve the transpilation to clickhouse 967 return f"CAST({strtodate_sql} AS DATE)" 968 969 return strtodate_sql 970 971 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 972 this = expression.this 973 974 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 975 return self.sql(this) 976 977 return super().cast_sql(expression, safe_prefix=safe_prefix) 978 979 def trycast_sql(self, expression: exp.TryCast) -> str: 980 dtype = expression.to 981 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 982 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 983 dtype.set("nullable", True) 984 985 return super().cast_sql(expression) 986 987 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 988 this = self.json_path_part(expression.this) 989 return str(int(this) + 1) if is_int(this) else this 990 991 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 992 return f"AS {self.sql(expression, 'this')}" 993 994 def _any_to_has( 995 self, 996 expression: exp.EQ | exp.NEQ, 997 default: t.Callable[[t.Any], str], 998 prefix: str = "", 999 ) -> str: 1000 if isinstance(expression.left, exp.Any): 1001 arr = expression.left 1002 this = expression.right 1003 elif isinstance(expression.right, exp.Any): 1004 arr = expression.right 1005 this = expression.left 1006 else: 1007 return default(expression) 1008 1009 return prefix + self.func("has", arr.this.unnest(), this) 1010 1011 def eq_sql(self, expression: exp.EQ) -> str: 1012 return self._any_to_has(expression, super().eq_sql) 1013 1014 def neq_sql(self, expression: exp.NEQ) -> str: 1015 return self._any_to_has(expression, super().neq_sql, "NOT ") 1016 1017 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1018 # Manually add a flag to make the search case-insensitive 1019 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1020 return self.func("match", expression.this, regex) 1021 1022 def datatype_sql(self, expression: exp.DataType) -> str: 1023 # String is the standard ClickHouse type, every other variant is just an alias. 1024 # Additionally, any supplied length parameter will be ignored. 1025 # 1026 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1027 if expression.this in self.STRING_TYPE_MAPPING: 1028 dtype = "String" 1029 else: 1030 dtype = super().datatype_sql(expression) 1031 1032 # This section changes the type to `Nullable(...)` if the following conditions hold: 1033 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1034 # and change their semantics 1035 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1036 # constraint: "Type of Map key must be a type, that can be represented by integer or 1037 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1038 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1039 parent = expression.parent 1040 nullable = expression.args.get("nullable") 1041 if nullable is True or ( 1042 nullable is None 1043 and not ( 1044 isinstance(parent, exp.DataType) 1045 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1046 and expression.index in (None, 0) 1047 ) 1048 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1049 ): 1050 dtype = f"Nullable({dtype})" 1051 1052 return dtype 1053 1054 def cte_sql(self, expression: exp.CTE) -> str: 1055 if expression.args.get("scalar"): 1056 this = self.sql(expression, "this") 1057 alias = self.sql(expression, "alias") 1058 return f"{this} AS {alias}" 1059 1060 return super().cte_sql(expression) 1061 1062 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1063 return super().after_limit_modifiers(expression) + [ 1064 ( 1065 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1066 if expression.args.get("settings") 1067 else "" 1068 ), 1069 ( 1070 self.seg("FORMAT ") + self.sql(expression, "format") 1071 if expression.args.get("format") 1072 else "" 1073 ), 1074 ] 1075 1076 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1077 params = self.expressions(expression, key="params", flat=True) 1078 return self.func(expression.name, *expression.expressions) + f"({params})" 1079 1080 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1081 return self.func(expression.name, *expression.expressions) 1082 1083 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1084 return self.anonymousaggfunc_sql(expression) 1085 1086 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1087 return self.parameterizedagg_sql(expression) 1088 1089 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1090 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1091 1092 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1093 return f"ON CLUSTER {self.sql(expression, 'this')}" 1094 1095 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1096 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1097 exp.Properties.Location.POST_NAME 1098 ): 1099 this_name = self.sql( 1100 expression.this if isinstance(expression.this, exp.Schema) else expression, 1101 "this", 1102 ) 1103 this_properties = " ".join( 1104 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1105 ) 1106 this_schema = self.schema_columns_sql(expression.this) 1107 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1108 1109 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1110 1111 return super().createable_sql(expression, locations) 1112 1113 def create_sql(self, expression: exp.Create) -> str: 1114 # The comment property comes last in CTAS statements, i.e. after the query 1115 query = expression.expression 1116 if isinstance(query, exp.Query): 1117 comment_prop = expression.find(exp.SchemaCommentProperty) 1118 if comment_prop: 1119 comment_prop.pop() 1120 query.replace(exp.paren(query)) 1121 else: 1122 comment_prop = None 1123 1124 create_sql = super().create_sql(expression) 1125 1126 comment_sql = self.sql(comment_prop) 1127 comment_sql = f" {comment_sql}" if comment_sql else "" 1128 1129 return f"{create_sql}{comment_sql}" 1130 1131 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1132 this = self.indent(self.sql(expression, "this")) 1133 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1134 1135 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1136 this = self.sql(expression, "this") 1137 this = f" {this}" if this else "" 1138 expr = self.sql(expression, "expression") 1139 expr = f" {expr}" if expr else "" 1140 index_type = self.sql(expression, "index_type") 1141 index_type = f" TYPE {index_type}" if index_type else "" 1142 granularity = self.sql(expression, "granularity") 1143 granularity = f" GRANULARITY {granularity}" if granularity else "" 1144 1145 return f"INDEX{this}{expr}{index_type}{granularity}" 1146 1147 def partition_sql(self, expression: exp.Partition) -> str: 1148 return f"PARTITION {self.expressions(expression, flat=True)}" 1149 1150 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1151 return f"ID {self.sql(expression.this)}" 1152 1153 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1154 return ( 1155 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1156 ) 1157 1158 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1159 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Specifies the strategy according to which identifiers should be normalized.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse equivalent of CREATE SCHEMA is CREATE DATABASE.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- TYPED_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- STRICT_JSON_PATH_SYNTAX
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- REGEXP_EXTRACT_DEFAULT_GROUP
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
156 class Tokenizer(tokens.Tokenizer): 157 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 158 IDENTIFIERS = ['"', "`"] 159 STRING_ESCAPES = ["'", "\\"] 160 BIT_STRINGS = [("0b", "")] 161 HEX_STRINGS = [("0x", ""), ("0X", "")] 162 HEREDOC_STRINGS = ["$"] 163 164 KEYWORDS = { 165 **tokens.Tokenizer.KEYWORDS, 166 "ATTACH": TokenType.COMMAND, 167 "DATE32": TokenType.DATE32, 168 "DATETIME64": TokenType.DATETIME64, 169 "DICTIONARY": TokenType.DICTIONARY, 170 "ENUM8": TokenType.ENUM8, 171 "ENUM16": TokenType.ENUM16, 172 "FINAL": TokenType.FINAL, 173 "FIXEDSTRING": TokenType.FIXEDSTRING, 174 "FLOAT32": TokenType.FLOAT, 175 "FLOAT64": TokenType.DOUBLE, 176 "GLOBAL": TokenType.GLOBAL, 177 "INT256": TokenType.INT256, 178 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 179 "MAP": TokenType.MAP, 180 "NESTED": TokenType.NESTED, 181 "SAMPLE": TokenType.TABLE_SAMPLE, 182 "TUPLE": TokenType.STRUCT, 183 "UINT128": TokenType.UINT128, 184 "UINT16": TokenType.USMALLINT, 185 "UINT256": TokenType.UINT256, 186 "UINT32": TokenType.UINT, 187 "UINT64": TokenType.UBIGINT, 188 "UINT8": TokenType.UTINYINT, 189 "IPV4": TokenType.IPV4, 190 "IPV6": TokenType.IPV6, 191 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 192 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 193 "SYSTEM": TokenType.COMMAND, 194 "PREWHERE": TokenType.PREWHERE, 195 } 196 KEYWORDS.pop("/*+") 197 198 SINGLE_TOKENS = { 199 **tokens.Tokenizer.SINGLE_TOKENS, 200 "$": TokenType.HEREDOC_STRING, 201 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BYTE_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIER_ESCAPES
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
203 class Parser(parser.Parser): 204 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 205 # * select x from t1 union all select x from t2 limit 1; 206 # * select x from t1 union all (select x from t2 limit 1); 207 MODIFIERS_ATTACHED_TO_SET_OP = False 208 INTERVAL_SPANS = False 209 210 FUNCTIONS = { 211 **parser.Parser.FUNCTIONS, 212 "ANY": exp.AnyValue.from_arg_list, 213 "ARRAYSUM": exp.ArraySum.from_arg_list, 214 "COUNTIF": _build_count_if, 215 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 216 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 217 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 218 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 219 "DATE_FORMAT": _build_date_format, 220 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 221 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 222 "FORMATDATETIME": _build_date_format, 223 "JSONEXTRACTSTRING": build_json_extract_path( 224 exp.JSONExtractScalar, zero_based_indexing=False 225 ), 226 "MAP": parser.build_var_map, 227 "MATCH": exp.RegexpLike.from_arg_list, 228 "RANDCANONICAL": exp.Rand.from_arg_list, 229 "STR_TO_DATE": _build_str_to_date, 230 "TUPLE": exp.Struct.from_arg_list, 231 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 232 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 233 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 234 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 235 "UNIQ": exp.ApproxDistinct.from_arg_list, 236 "XOR": lambda args: exp.Xor(expressions=args), 237 "MD5": exp.MD5Digest.from_arg_list, 238 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 239 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 240 } 241 242 AGG_FUNCTIONS = { 243 "count", 244 "min", 245 "max", 246 "sum", 247 "avg", 248 "any", 249 "stddevPop", 250 "stddevSamp", 251 "varPop", 252 "varSamp", 253 "corr", 254 "covarPop", 255 "covarSamp", 256 "entropy", 257 "exponentialMovingAverage", 258 "intervalLengthSum", 259 "kolmogorovSmirnovTest", 260 "mannWhitneyUTest", 261 "median", 262 "rankCorr", 263 "sumKahan", 264 "studentTTest", 265 "welchTTest", 266 "anyHeavy", 267 "anyLast", 268 "boundingRatio", 269 "first_value", 270 "last_value", 271 "argMin", 272 "argMax", 273 "avgWeighted", 274 "topK", 275 "topKWeighted", 276 "deltaSum", 277 "deltaSumTimestamp", 278 "groupArray", 279 "groupArrayLast", 280 "groupUniqArray", 281 "groupArrayInsertAt", 282 "groupArrayMovingAvg", 283 "groupArrayMovingSum", 284 "groupArraySample", 285 "groupBitAnd", 286 "groupBitOr", 287 "groupBitXor", 288 "groupBitmap", 289 "groupBitmapAnd", 290 "groupBitmapOr", 291 "groupBitmapXor", 292 "sumWithOverflow", 293 "sumMap", 294 "minMap", 295 "maxMap", 296 "skewSamp", 297 "skewPop", 298 "kurtSamp", 299 "kurtPop", 300 "uniq", 301 "uniqExact", 302 "uniqCombined", 303 "uniqCombined64", 304 "uniqHLL12", 305 "uniqTheta", 306 "quantile", 307 "quantiles", 308 "quantileExact", 309 "quantilesExact", 310 "quantileExactLow", 311 "quantilesExactLow", 312 "quantileExactHigh", 313 "quantilesExactHigh", 314 "quantileExactWeighted", 315 "quantilesExactWeighted", 316 "quantileTiming", 317 "quantilesTiming", 318 "quantileTimingWeighted", 319 "quantilesTimingWeighted", 320 "quantileDeterministic", 321 "quantilesDeterministic", 322 "quantileTDigest", 323 "quantilesTDigest", 324 "quantileTDigestWeighted", 325 "quantilesTDigestWeighted", 326 "quantileBFloat16", 327 "quantilesBFloat16", 328 "quantileBFloat16Weighted", 329 "quantilesBFloat16Weighted", 330 "simpleLinearRegression", 331 "stochasticLinearRegression", 332 "stochasticLogisticRegression", 333 "categoricalInformationValue", 334 "contingency", 335 "cramersV", 336 "cramersVBiasCorrected", 337 "theilsU", 338 "maxIntersections", 339 "maxIntersectionsPosition", 340 "meanZTest", 341 "quantileInterpolatedWeighted", 342 "quantilesInterpolatedWeighted", 343 "quantileGK", 344 "quantilesGK", 345 "sparkBar", 346 "sumCount", 347 "largestTriangleThreeBuckets", 348 "histogram", 349 "sequenceMatch", 350 "sequenceCount", 351 "windowFunnel", 352 "retention", 353 "uniqUpTo", 354 "sequenceNextNode", 355 "exponentialTimeDecayedAvg", 356 } 357 358 AGG_FUNCTIONS_SUFFIXES = [ 359 "If", 360 "Array", 361 "ArrayIf", 362 "Map", 363 "SimpleState", 364 "State", 365 "Merge", 366 "MergeState", 367 "ForEach", 368 "Distinct", 369 "OrDefault", 370 "OrNull", 371 "Resample", 372 "ArgMin", 373 "ArgMax", 374 ] 375 376 FUNC_TOKENS = { 377 *parser.Parser.FUNC_TOKENS, 378 TokenType.SET, 379 } 380 381 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 382 383 ID_VAR_TOKENS = { 384 *parser.Parser.ID_VAR_TOKENS, 385 TokenType.LIKE, 386 } 387 388 AGG_FUNC_MAPPING = ( 389 lambda functions, suffixes: { 390 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 391 } 392 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 393 394 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 395 396 FUNCTION_PARSERS = { 397 **parser.Parser.FUNCTION_PARSERS, 398 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 399 "QUANTILE": lambda self: self._parse_quantile(), 400 "COLUMNS": lambda self: self._parse_columns(), 401 } 402 403 FUNCTION_PARSERS.pop("MATCH") 404 405 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 406 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 407 408 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 409 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 410 411 RANGE_PARSERS = { 412 **parser.Parser.RANGE_PARSERS, 413 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 414 and self._parse_in(this, is_global=True), 415 } 416 417 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 418 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 419 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 420 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 421 422 JOIN_KINDS = { 423 *parser.Parser.JOIN_KINDS, 424 TokenType.ANY, 425 TokenType.ASOF, 426 TokenType.ARRAY, 427 } 428 429 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 430 TokenType.ANY, 431 TokenType.ARRAY, 432 TokenType.FINAL, 433 TokenType.FORMAT, 434 TokenType.SETTINGS, 435 } 436 437 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 438 TokenType.FORMAT, 439 } 440 441 LOG_DEFAULTS_TO_LN = True 442 443 QUERY_MODIFIER_PARSERS = { 444 **parser.Parser.QUERY_MODIFIER_PARSERS, 445 TokenType.SETTINGS: lambda self: ( 446 "settings", 447 self._advance() or self._parse_csv(self._parse_assignment), 448 ), 449 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 450 } 451 452 CONSTRAINT_PARSERS = { 453 **parser.Parser.CONSTRAINT_PARSERS, 454 "INDEX": lambda self: self._parse_index_constraint(), 455 "CODEC": lambda self: self._parse_compress(), 456 } 457 458 ALTER_PARSERS = { 459 **parser.Parser.ALTER_PARSERS, 460 "REPLACE": lambda self: self._parse_alter_table_replace(), 461 } 462 463 SCHEMA_UNNAMED_CONSTRAINTS = { 464 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 465 "INDEX", 466 } 467 468 PLACEHOLDER_PARSERS = { 469 **parser.Parser.PLACEHOLDER_PARSERS, 470 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 471 } 472 473 def _parse_types( 474 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 475 ) -> t.Optional[exp.Expression]: 476 dtype = super()._parse_types( 477 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 478 ) 479 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 480 # Mark every type as non-nullable which is ClickHouse's default, unless it's 481 # already marked as nullable. This marker helps us transpile types from other 482 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 483 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 484 # fail in ClickHouse without the `Nullable` type constructor. 485 dtype.set("nullable", False) 486 487 return dtype 488 489 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 490 index = self._index 491 this = self._parse_bitwise() 492 if self._match(TokenType.FROM): 493 self._retreat(index) 494 return super()._parse_extract() 495 496 # We return Anonymous here because extract and regexpExtract have different semantics, 497 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 498 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 499 # 500 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 501 self._match(TokenType.COMMA) 502 return self.expression( 503 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 504 ) 505 506 def _parse_assignment(self) -> t.Optional[exp.Expression]: 507 this = super()._parse_assignment() 508 509 if self._match(TokenType.PLACEHOLDER): 510 return self.expression( 511 exp.If, 512 this=this, 513 true=self._parse_assignment(), 514 false=self._match(TokenType.COLON) and self._parse_assignment(), 515 ) 516 517 return this 518 519 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 520 """ 521 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 522 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 523 """ 524 this = self._parse_id_var() 525 self._match(TokenType.COLON) 526 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 527 self._match_text_seq("IDENTIFIER") and "Identifier" 528 ) 529 530 if not kind: 531 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 532 elif not self._match(TokenType.R_BRACE): 533 self.raise_error("Expecting }") 534 535 return self.expression(exp.Placeholder, this=this, kind=kind) 536 537 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 538 this = super()._parse_in(this) 539 this.set("is_global", is_global) 540 return this 541 542 def _parse_table( 543 self, 544 schema: bool = False, 545 joins: bool = False, 546 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 547 parse_bracket: bool = False, 548 is_db_reference: bool = False, 549 parse_partition: bool = False, 550 ) -> t.Optional[exp.Expression]: 551 this = super()._parse_table( 552 schema=schema, 553 joins=joins, 554 alias_tokens=alias_tokens, 555 parse_bracket=parse_bracket, 556 is_db_reference=is_db_reference, 557 ) 558 559 if self._match(TokenType.FINAL): 560 this = self.expression(exp.Final, this=this) 561 562 return this 563 564 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 565 return super()._parse_position(haystack_first=True) 566 567 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 568 def _parse_cte(self) -> exp.CTE: 569 # WITH <identifier> AS <subquery expression> 570 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 571 572 if not cte: 573 # WITH <expression> AS <identifier> 574 cte = self.expression( 575 exp.CTE, 576 this=self._parse_assignment(), 577 alias=self._parse_table_alias(), 578 scalar=True, 579 ) 580 581 return cte 582 583 def _parse_join_parts( 584 self, 585 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 586 is_global = self._match(TokenType.GLOBAL) and self._prev 587 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 588 589 if kind_pre: 590 kind = self._match_set(self.JOIN_KINDS) and self._prev 591 side = self._match_set(self.JOIN_SIDES) and self._prev 592 return is_global, side, kind 593 594 return ( 595 is_global, 596 self._match_set(self.JOIN_SIDES) and self._prev, 597 self._match_set(self.JOIN_KINDS) and self._prev, 598 ) 599 600 def _parse_join( 601 self, skip_join_token: bool = False, parse_bracket: bool = False 602 ) -> t.Optional[exp.Join]: 603 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 604 if join: 605 join.set("global", join.args.pop("method", None)) 606 607 return join 608 609 def _parse_function( 610 self, 611 functions: t.Optional[t.Dict[str, t.Callable]] = None, 612 anonymous: bool = False, 613 optional_parens: bool = True, 614 any_token: bool = False, 615 ) -> t.Optional[exp.Expression]: 616 expr = super()._parse_function( 617 functions=functions, 618 anonymous=anonymous, 619 optional_parens=optional_parens, 620 any_token=any_token, 621 ) 622 623 func = expr.this if isinstance(expr, exp.Window) else expr 624 625 # Aggregate functions can be split in 2 parts: <func_name><suffix> 626 parts = ( 627 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 628 ) 629 630 if parts: 631 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 632 params = self._parse_func_params(anon_func) 633 634 kwargs = { 635 "this": anon_func.this, 636 "expressions": anon_func.expressions, 637 } 638 if parts[1]: 639 kwargs["parts"] = parts 640 exp_class: t.Type[exp.Expression] = ( 641 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 642 ) 643 else: 644 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 645 646 kwargs["exp_class"] = exp_class 647 if params: 648 kwargs["params"] = params 649 650 func = self.expression(**kwargs) 651 652 if isinstance(expr, exp.Window): 653 # The window's func was parsed as Anonymous in base parser, fix its 654 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 655 expr.set("this", func) 656 elif params: 657 # Params have blocked super()._parse_function() from parsing the following window 658 # (if that exists) as they're standing between the function call and the window spec 659 expr = self._parse_window(func) 660 else: 661 expr = func 662 663 return expr 664 665 def _parse_func_params( 666 self, this: t.Optional[exp.Func] = None 667 ) -> t.Optional[t.List[exp.Expression]]: 668 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 669 return self._parse_csv(self._parse_lambda) 670 671 if self._match(TokenType.L_PAREN): 672 params = self._parse_csv(self._parse_lambda) 673 self._match_r_paren(this) 674 return params 675 676 return None 677 678 def _parse_quantile(self) -> exp.Quantile: 679 this = self._parse_lambda() 680 params = self._parse_func_params() 681 if params: 682 return self.expression(exp.Quantile, this=params[0], quantile=this) 683 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 684 685 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 686 return super()._parse_wrapped_id_vars(optional=True) 687 688 def _parse_primary_key( 689 self, wrapped_optional: bool = False, in_props: bool = False 690 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 691 return super()._parse_primary_key( 692 wrapped_optional=wrapped_optional or in_props, in_props=in_props 693 ) 694 695 def _parse_on_property(self) -> t.Optional[exp.Expression]: 696 index = self._index 697 if self._match_text_seq("CLUSTER"): 698 this = self._parse_id_var() 699 if this: 700 return self.expression(exp.OnCluster, this=this) 701 else: 702 self._retreat(index) 703 return None 704 705 def _parse_index_constraint( 706 self, kind: t.Optional[str] = None 707 ) -> exp.IndexColumnConstraint: 708 # INDEX name1 expr TYPE type1(args) GRANULARITY value 709 this = self._parse_id_var() 710 expression = self._parse_assignment() 711 712 index_type = self._match_text_seq("TYPE") and ( 713 self._parse_function() or self._parse_var() 714 ) 715 716 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 717 718 return self.expression( 719 exp.IndexColumnConstraint, 720 this=this, 721 expression=expression, 722 index_type=index_type, 723 granularity=granularity, 724 ) 725 726 def _parse_partition(self) -> t.Optional[exp.Partition]: 727 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 728 if not self._match(TokenType.PARTITION): 729 return None 730 731 if self._match_text_seq("ID"): 732 # Corresponds to the PARTITION ID <string_value> syntax 733 expressions: t.List[exp.Expression] = [ 734 self.expression(exp.PartitionId, this=self._parse_string()) 735 ] 736 else: 737 expressions = self._parse_expressions() 738 739 return self.expression(exp.Partition, expressions=expressions) 740 741 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 742 partition = self._parse_partition() 743 744 if not partition or not self._match(TokenType.FROM): 745 return None 746 747 return self.expression( 748 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 749 ) 750 751 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 752 if not self._match_text_seq("PROJECTION"): 753 return None 754 755 return self.expression( 756 exp.ProjectionDef, 757 this=self._parse_id_var(), 758 expression=self._parse_wrapped(self._parse_statement), 759 ) 760 761 def _parse_constraint(self) -> t.Optional[exp.Expression]: 762 return super()._parse_constraint() or self._parse_projection_def() 763 764 def _parse_alias( 765 self, this: t.Optional[exp.Expression], explicit: bool = False 766 ) -> t.Optional[exp.Expression]: 767 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 768 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 769 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 770 return this 771 772 return super()._parse_alias(this=this, explicit=explicit) 773 774 def _parse_expression(self) -> t.Optional[exp.Expression]: 775 this = super()._parse_expression() 776 777 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 778 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 779 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 780 self._match(TokenType.R_PAREN) 781 782 return this 783 784 def _parse_columns(self) -> exp.Expression: 785 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 786 787 while self._next and self._match_text_seq(")", "APPLY", "("): 788 self._match(TokenType.R_PAREN) 789 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 790 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PROPERTY_PARSERS
- ALTER_ALTER_PARSERS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- OPERATION_MODIFIERS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
792 class Generator(generator.Generator): 793 QUERY_HINTS = False 794 STRUCT_DELIMITER = ("(", ")") 795 NVL2_SUPPORTED = False 796 TABLESAMPLE_REQUIRES_PARENS = False 797 TABLESAMPLE_SIZE_IS_ROWS = False 798 TABLESAMPLE_KEYWORDS = "SAMPLE" 799 LAST_DAY_SUPPORTS_DATE_PART = False 800 CAN_IMPLEMENT_ARRAY_ANY = True 801 SUPPORTS_TO_NUMBER = False 802 JOIN_HINTS = False 803 TABLE_HINTS = False 804 GROUPINGS_SEP = "" 805 SET_OP_MODIFIERS = False 806 SUPPORTS_TABLE_ALIAS_COLUMNS = False 807 VALUES_AS_TABLE = False 808 809 STRING_TYPE_MAPPING = { 810 exp.DataType.Type.CHAR: "String", 811 exp.DataType.Type.LONGBLOB: "String", 812 exp.DataType.Type.LONGTEXT: "String", 813 exp.DataType.Type.MEDIUMBLOB: "String", 814 exp.DataType.Type.MEDIUMTEXT: "String", 815 exp.DataType.Type.TINYBLOB: "String", 816 exp.DataType.Type.TINYTEXT: "String", 817 exp.DataType.Type.TEXT: "String", 818 exp.DataType.Type.VARBINARY: "String", 819 exp.DataType.Type.VARCHAR: "String", 820 } 821 822 SUPPORTED_JSON_PATH_PARTS = { 823 exp.JSONPathKey, 824 exp.JSONPathRoot, 825 exp.JSONPathSubscript, 826 } 827 828 TYPE_MAPPING = { 829 **generator.Generator.TYPE_MAPPING, 830 **STRING_TYPE_MAPPING, 831 exp.DataType.Type.ARRAY: "Array", 832 exp.DataType.Type.BOOLEAN: "Bool", 833 exp.DataType.Type.BIGINT: "Int64", 834 exp.DataType.Type.DATE32: "Date32", 835 exp.DataType.Type.DATETIME: "DateTime", 836 exp.DataType.Type.DATETIME64: "DateTime64", 837 exp.DataType.Type.TIMESTAMP: "DateTime", 838 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 839 exp.DataType.Type.DOUBLE: "Float64", 840 exp.DataType.Type.ENUM: "Enum", 841 exp.DataType.Type.ENUM8: "Enum8", 842 exp.DataType.Type.ENUM16: "Enum16", 843 exp.DataType.Type.FIXEDSTRING: "FixedString", 844 exp.DataType.Type.FLOAT: "Float32", 845 exp.DataType.Type.INT: "Int32", 846 exp.DataType.Type.MEDIUMINT: "Int32", 847 exp.DataType.Type.INT128: "Int128", 848 exp.DataType.Type.INT256: "Int256", 849 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 850 exp.DataType.Type.MAP: "Map", 851 exp.DataType.Type.NESTED: "Nested", 852 exp.DataType.Type.SMALLINT: "Int16", 853 exp.DataType.Type.STRUCT: "Tuple", 854 exp.DataType.Type.TINYINT: "Int8", 855 exp.DataType.Type.UBIGINT: "UInt64", 856 exp.DataType.Type.UINT: "UInt32", 857 exp.DataType.Type.UINT128: "UInt128", 858 exp.DataType.Type.UINT256: "UInt256", 859 exp.DataType.Type.USMALLINT: "UInt16", 860 exp.DataType.Type.UTINYINT: "UInt8", 861 exp.DataType.Type.IPV4: "IPv4", 862 exp.DataType.Type.IPV6: "IPv6", 863 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 864 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 865 } 866 867 TRANSFORMS = { 868 **generator.Generator.TRANSFORMS, 869 exp.AnyValue: rename_func("any"), 870 exp.ApproxDistinct: rename_func("uniq"), 871 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 872 exp.ArraySize: rename_func("LENGTH"), 873 exp.ArraySum: rename_func("arraySum"), 874 exp.ArgMax: arg_max_or_min_no_count("argMax"), 875 exp.ArgMin: arg_max_or_min_no_count("argMin"), 876 exp.Array: inline_array_sql, 877 exp.CastToStrType: rename_func("CAST"), 878 exp.CountIf: rename_func("countIf"), 879 exp.CompressColumnConstraint: lambda self, 880 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 881 exp.ComputedColumnConstraint: lambda self, 882 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 883 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 884 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 885 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 886 exp.DateStrToDate: rename_func("toDate"), 887 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 888 exp.Explode: rename_func("arrayJoin"), 889 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 890 exp.IsNan: rename_func("isNaN"), 891 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 892 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 893 exp.JSONPathKey: json_path_key_only_name, 894 exp.JSONPathRoot: lambda *_: "", 895 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 896 exp.Nullif: rename_func("nullIf"), 897 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 898 exp.Pivot: no_pivot_sql, 899 exp.Quantile: _quantile_sql, 900 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 901 exp.Rand: rename_func("randCanonical"), 902 exp.StartsWith: rename_func("startsWith"), 903 exp.StrPosition: lambda self, e: self.func( 904 "position", e.this, e.args.get("substr"), e.args.get("position") 905 ), 906 exp.TimeToStr: lambda self, e: self.func( 907 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 908 ), 909 exp.TimeStrToTime: _timestrtotime_sql, 910 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 911 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 912 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 913 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 914 exp.MD5Digest: rename_func("MD5"), 915 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 916 exp.SHA: rename_func("SHA1"), 917 exp.SHA2: sha256_sql, 918 exp.UnixToTime: _unix_to_time_sql, 919 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 920 exp.Trim: trim_sql, 921 exp.Variance: rename_func("varSamp"), 922 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 923 exp.Stddev: rename_func("stddevSamp"), 924 exp.Chr: rename_func("CHAR"), 925 exp.Lag: lambda self, e: self.func( 926 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 927 ), 928 exp.Lead: lambda self, e: self.func( 929 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 930 ), 931 } 932 933 PROPERTIES_LOCATION = { 934 **generator.Generator.PROPERTIES_LOCATION, 935 exp.OnCluster: exp.Properties.Location.POST_NAME, 936 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 937 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 938 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 939 } 940 941 # There's no list in docs, but it can be found in Clickhouse code 942 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 943 ON_CLUSTER_TARGETS = { 944 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 945 "DATABASE", 946 "TABLE", 947 "VIEW", 948 "DICTIONARY", 949 "INDEX", 950 "FUNCTION", 951 "NAMED COLLECTION", 952 } 953 954 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 955 NON_NULLABLE_TYPES = { 956 exp.DataType.Type.ARRAY, 957 exp.DataType.Type.MAP, 958 exp.DataType.Type.STRUCT, 959 } 960 961 def strtodate_sql(self, expression: exp.StrToDate) -> str: 962 strtodate_sql = self.function_fallback_sql(expression) 963 964 if not isinstance(expression.parent, exp.Cast): 965 # StrToDate returns DATEs in other dialects (eg. postgres), so 966 # this branch aims to improve the transpilation to clickhouse 967 return f"CAST({strtodate_sql} AS DATE)" 968 969 return strtodate_sql 970 971 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 972 this = expression.this 973 974 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 975 return self.sql(this) 976 977 return super().cast_sql(expression, safe_prefix=safe_prefix) 978 979 def trycast_sql(self, expression: exp.TryCast) -> str: 980 dtype = expression.to 981 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 982 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 983 dtype.set("nullable", True) 984 985 return super().cast_sql(expression) 986 987 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 988 this = self.json_path_part(expression.this) 989 return str(int(this) + 1) if is_int(this) else this 990 991 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 992 return f"AS {self.sql(expression, 'this')}" 993 994 def _any_to_has( 995 self, 996 expression: exp.EQ | exp.NEQ, 997 default: t.Callable[[t.Any], str], 998 prefix: str = "", 999 ) -> str: 1000 if isinstance(expression.left, exp.Any): 1001 arr = expression.left 1002 this = expression.right 1003 elif isinstance(expression.right, exp.Any): 1004 arr = expression.right 1005 this = expression.left 1006 else: 1007 return default(expression) 1008 1009 return prefix + self.func("has", arr.this.unnest(), this) 1010 1011 def eq_sql(self, expression: exp.EQ) -> str: 1012 return self._any_to_has(expression, super().eq_sql) 1013 1014 def neq_sql(self, expression: exp.NEQ) -> str: 1015 return self._any_to_has(expression, super().neq_sql, "NOT ") 1016 1017 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1018 # Manually add a flag to make the search case-insensitive 1019 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1020 return self.func("match", expression.this, regex) 1021 1022 def datatype_sql(self, expression: exp.DataType) -> str: 1023 # String is the standard ClickHouse type, every other variant is just an alias. 1024 # Additionally, any supplied length parameter will be ignored. 1025 # 1026 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1027 if expression.this in self.STRING_TYPE_MAPPING: 1028 dtype = "String" 1029 else: 1030 dtype = super().datatype_sql(expression) 1031 1032 # This section changes the type to `Nullable(...)` if the following conditions hold: 1033 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1034 # and change their semantics 1035 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1036 # constraint: "Type of Map key must be a type, that can be represented by integer or 1037 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1038 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1039 parent = expression.parent 1040 nullable = expression.args.get("nullable") 1041 if nullable is True or ( 1042 nullable is None 1043 and not ( 1044 isinstance(parent, exp.DataType) 1045 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1046 and expression.index in (None, 0) 1047 ) 1048 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1049 ): 1050 dtype = f"Nullable({dtype})" 1051 1052 return dtype 1053 1054 def cte_sql(self, expression: exp.CTE) -> str: 1055 if expression.args.get("scalar"): 1056 this = self.sql(expression, "this") 1057 alias = self.sql(expression, "alias") 1058 return f"{this} AS {alias}" 1059 1060 return super().cte_sql(expression) 1061 1062 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1063 return super().after_limit_modifiers(expression) + [ 1064 ( 1065 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1066 if expression.args.get("settings") 1067 else "" 1068 ), 1069 ( 1070 self.seg("FORMAT ") + self.sql(expression, "format") 1071 if expression.args.get("format") 1072 else "" 1073 ), 1074 ] 1075 1076 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1077 params = self.expressions(expression, key="params", flat=True) 1078 return self.func(expression.name, *expression.expressions) + f"({params})" 1079 1080 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1081 return self.func(expression.name, *expression.expressions) 1082 1083 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1084 return self.anonymousaggfunc_sql(expression) 1085 1086 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1087 return self.parameterizedagg_sql(expression) 1088 1089 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1090 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1091 1092 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1093 return f"ON CLUSTER {self.sql(expression, 'this')}" 1094 1095 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1096 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1097 exp.Properties.Location.POST_NAME 1098 ): 1099 this_name = self.sql( 1100 expression.this if isinstance(expression.this, exp.Schema) else expression, 1101 "this", 1102 ) 1103 this_properties = " ".join( 1104 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1105 ) 1106 this_schema = self.schema_columns_sql(expression.this) 1107 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1108 1109 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1110 1111 return super().createable_sql(expression, locations) 1112 1113 def create_sql(self, expression: exp.Create) -> str: 1114 # The comment property comes last in CTAS statements, i.e. after the query 1115 query = expression.expression 1116 if isinstance(query, exp.Query): 1117 comment_prop = expression.find(exp.SchemaCommentProperty) 1118 if comment_prop: 1119 comment_prop.pop() 1120 query.replace(exp.paren(query)) 1121 else: 1122 comment_prop = None 1123 1124 create_sql = super().create_sql(expression) 1125 1126 comment_sql = self.sql(comment_prop) 1127 comment_sql = f" {comment_sql}" if comment_sql else "" 1128 1129 return f"{create_sql}{comment_sql}" 1130 1131 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1132 this = self.indent(self.sql(expression, "this")) 1133 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1134 1135 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1136 this = self.sql(expression, "this") 1137 this = f" {this}" if this else "" 1138 expr = self.sql(expression, "expression") 1139 expr = f" {expr}" if expr else "" 1140 index_type = self.sql(expression, "index_type") 1141 index_type = f" TYPE {index_type}" if index_type else "" 1142 granularity = self.sql(expression, "granularity") 1143 granularity = f" GRANULARITY {granularity}" if granularity else "" 1144 1145 return f"INDEX{this}{expr}{index_type}{granularity}" 1146 1147 def partition_sql(self, expression: exp.Partition) -> str: 1148 return f"PARTITION {self.expressions(expression, flat=True)}" 1149 1150 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1151 return f"ID {self.sql(expression.this)}" 1152 1153 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1154 return ( 1155 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1156 ) 1157 1158 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1159 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
961 def strtodate_sql(self, expression: exp.StrToDate) -> str: 962 strtodate_sql = self.function_fallback_sql(expression) 963 964 if not isinstance(expression.parent, exp.Cast): 965 # StrToDate returns DATEs in other dialects (eg. postgres), so 966 # this branch aims to improve the transpilation to clickhouse 967 return f"CAST({strtodate_sql} AS DATE)" 968 969 return strtodate_sql
971 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 972 this = expression.this 973 974 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 975 return self.sql(this) 976 977 return super().cast_sql(expression, safe_prefix=safe_prefix)
979 def trycast_sql(self, expression: exp.TryCast) -> str: 980 dtype = expression.to 981 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 982 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 983 dtype.set("nullable", True) 984 985 return super().cast_sql(expression)
1022 def datatype_sql(self, expression: exp.DataType) -> str: 1023 # String is the standard ClickHouse type, every other variant is just an alias. 1024 # Additionally, any supplied length parameter will be ignored. 1025 # 1026 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1027 if expression.this in self.STRING_TYPE_MAPPING: 1028 dtype = "String" 1029 else: 1030 dtype = super().datatype_sql(expression) 1031 1032 # This section changes the type to `Nullable(...)` if the following conditions hold: 1033 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1034 # and change their semantics 1035 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1036 # constraint: "Type of Map key must be a type, that can be represented by integer or 1037 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1038 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1039 parent = expression.parent 1040 nullable = expression.args.get("nullable") 1041 if nullable is True or ( 1042 nullable is None 1043 and not ( 1044 isinstance(parent, exp.DataType) 1045 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1046 and expression.index in (None, 0) 1047 ) 1048 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1049 ): 1050 dtype = f"Nullable({dtype})" 1051 1052 return dtype
1062 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1063 return super().after_limit_modifiers(expression) + [ 1064 ( 1065 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1066 if expression.args.get("settings") 1067 else "" 1068 ), 1069 ( 1070 self.seg("FORMAT ") + self.sql(expression, "format") 1071 if expression.args.get("format") 1072 else "" 1073 ), 1074 ]
1095 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1096 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1097 exp.Properties.Location.POST_NAME 1098 ): 1099 this_name = self.sql( 1100 expression.this if isinstance(expression.this, exp.Schema) else expression, 1101 "this", 1102 ) 1103 this_properties = " ".join( 1104 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1105 ) 1106 this_schema = self.schema_columns_sql(expression.this) 1107 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1108 1109 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1110 1111 return super().createable_sql(expression, locations)
1113 def create_sql(self, expression: exp.Create) -> str: 1114 # The comment property comes last in CTAS statements, i.e. after the query 1115 query = expression.expression 1116 if isinstance(query, exp.Query): 1117 comment_prop = expression.find(exp.SchemaCommentProperty) 1118 if comment_prop: 1119 comment_prop.pop() 1120 query.replace(exp.paren(query)) 1121 else: 1122 comment_prop = None 1123 1124 create_sql = super().create_sql(expression) 1125 1126 comment_sql = self.sql(comment_prop) 1127 comment_sql = f" {comment_sql}" if comment_sql else "" 1128 1129 return f"{create_sql}{comment_sql}"
1135 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1136 this = self.sql(expression, "this") 1137 this = f" {this}" if this else "" 1138 expr = self.sql(expression, "expression") 1139 expr = f" {expr}" if expr else "" 1140 index_type = self.sql(expression, "index_type") 1141 index_type = f" TYPE {index_type}" if index_type else "" 1142 granularity = self.sql(expression, "granularity") 1143 granularity = f" GRANULARITY {granularity}" if granularity else "" 1144 1145 return f"INDEX{this}{expr}{index_type}{granularity}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- distributedbyproperty_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql