sqlglot.dialects.clickhouse
1from __future__ import annotations 2import typing as t 3import datetime 4from sqlglot import exp, generator, parser, tokens 5from sqlglot.dialects.dialect import ( 6 Dialect, 7 NormalizationStrategy, 8 arg_max_or_min_no_count, 9 build_date_delta, 10 build_formatted_time, 11 inline_array_sql, 12 json_extract_segments, 13 json_path_key_only_name, 14 no_pivot_sql, 15 build_json_extract_path, 16 rename_func, 17 sha256_sql, 18 var_map_sql, 19 timestamptrunc_sql, 20 unit_to_var, 21 trim_sql, 22) 23from sqlglot.generator import Generator 24from sqlglot.helper import is_int, seq_get 25from sqlglot.tokens import Token, TokenType 26from sqlglot.generator import unsupported_args 27 28DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd] 29 30 31def _build_date_format(args: t.List) -> exp.TimeToStr: 32 expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args) 33 34 timezone = seq_get(args, 2) 35 if timezone: 36 expr.set("zone", timezone) 37 38 return expr 39 40 41def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str: 42 scale = expression.args.get("scale") 43 timestamp = expression.this 44 45 if scale in (None, exp.UnixToTime.SECONDS): 46 return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 47 if scale == exp.UnixToTime.MILLIS: 48 return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 49 if scale == exp.UnixToTime.MICROS: 50 return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 51 if scale == exp.UnixToTime.NANOS: 52 return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 53 54 return self.func( 55 "fromUnixTimestamp", 56 exp.cast( 57 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 58 ), 59 ) 60 61 62def _lower_func(sql: str) -> str: 63 index = sql.index("(") 64 return sql[:index].lower() + sql[index:] 65 66 67def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str: 68 quantile = expression.args["quantile"] 69 args = f"({self.sql(expression, 'this')})" 70 71 if isinstance(quantile, exp.Array): 72 func = self.func("quantiles", *quantile) 73 else: 74 func = self.func("quantile", quantile) 75 76 return func + args 77 78 79def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: 80 if len(args) == 1: 81 return exp.CountIf(this=seq_get(args, 0)) 82 83 return exp.CombinedAggFunc(this="countIf", expressions=args, parts=("count", "If")) 84 85 86def _build_str_to_date(args: t.List) -> exp.Cast | exp.Anonymous: 87 if len(args) == 3: 88 return exp.Anonymous(this="STR_TO_DATE", expressions=args) 89 90 strtodate = exp.StrToDate.from_arg_list(args) 91 return exp.cast(strtodate, exp.DataType.build(exp.DataType.Type.DATETIME)) 92 93 94def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]: 95 def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str: 96 if not expression.unit: 97 return rename_func(name)(self, expression) 98 99 return self.func( 100 name, 101 unit_to_var(expression), 102 expression.expression, 103 expression.this, 104 ) 105 106 return _delta_sql 107 108 109def _timestrtotime_sql(self: ClickHouse.Generator, expression: exp.TimeStrToTime): 110 ts = expression.this 111 112 tz = expression.args.get("zone") 113 if tz and isinstance(ts, exp.Literal): 114 # Clickhouse will not accept timestamps that include a UTC offset, so we must remove them. 115 # The first step to removing is parsing the string with `datetime.datetime.fromisoformat`. 116 # 117 # In python <3.11, `fromisoformat()` can only parse timestamps of millisecond (3 digit) 118 # or microsecond (6 digit) precision. It will error if passed any other number of fractional 119 # digits, so we extract the fractional seconds and pad to 6 digits before parsing. 120 ts_string = ts.name.strip() 121 122 # separate [date and time] from [fractional seconds and UTC offset] 123 ts_parts = ts_string.split(".") 124 if len(ts_parts) == 2: 125 # separate fractional seconds and UTC offset 126 offset_sep = "+" if "+" in ts_parts[1] else "-" 127 ts_frac_parts = ts_parts[1].split(offset_sep) 128 num_frac_parts = len(ts_frac_parts) 129 130 # pad to 6 digits if fractional seconds present 131 ts_frac_parts[0] = ts_frac_parts[0].ljust(6, "0") 132 ts_string = "".join( 133 [ 134 ts_parts[0], # date and time 135 ".", 136 ts_frac_parts[0], # fractional seconds 137 offset_sep if num_frac_parts > 1 else "", 138 ts_frac_parts[1] if num_frac_parts > 1 else "", # utc offset (if present) 139 ] 140 ) 141 142 # return literal with no timezone, eg turn '2020-01-01 12:13:14-08:00' into '2020-01-01 12:13:14' 143 # this is because Clickhouse encodes the timezone as a data type parameter and throws an error if 144 # it's part of the timestamp string 145 ts_without_tz = ( 146 datetime.datetime.fromisoformat(ts_string).replace(tzinfo=None).isoformat(sep=" ") 147 ) 148 ts = exp.Literal.string(ts_without_tz) 149 150 # Non-nullable DateTime64 with microsecond precision 151 expressions = [exp.DataTypeParam(this=tz)] if tz else [] 152 datatype = exp.DataType.build( 153 exp.DataType.Type.DATETIME64, 154 expressions=[exp.DataTypeParam(this=exp.Literal.number(6)), *expressions], 155 nullable=False, 156 ) 157 158 return self.sql(exp.cast(ts, datatype, dialect=self.dialect)) 159 160 161class ClickHouse(Dialect): 162 NORMALIZE_FUNCTIONS: bool | str = False 163 NULL_ORDERING = "nulls_are_last" 164 SUPPORTS_USER_DEFINED_TYPES = False 165 SAFE_DIVISION = True 166 LOG_BASE_FIRST: t.Optional[bool] = None 167 FORCE_EARLY_ALIAS_REF_EXPANSION = True 168 PRESERVE_ORIGINAL_NAMES = True 169 170 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 171 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 172 173 UNESCAPED_SEQUENCES = { 174 "\\0": "\0", 175 } 176 177 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 178 179 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 180 exp.Except: False, 181 exp.Intersect: False, 182 exp.Union: None, 183 } 184 185 class Tokenizer(tokens.Tokenizer): 186 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 187 IDENTIFIERS = ['"', "`"] 188 IDENTIFIER_ESCAPES = ["\\"] 189 STRING_ESCAPES = ["'", "\\"] 190 BIT_STRINGS = [("0b", "")] 191 HEX_STRINGS = [("0x", ""), ("0X", "")] 192 HEREDOC_STRINGS = ["$"] 193 194 KEYWORDS = { 195 **tokens.Tokenizer.KEYWORDS, 196 "ATTACH": TokenType.COMMAND, 197 "DATE32": TokenType.DATE32, 198 "DATETIME64": TokenType.DATETIME64, 199 "DICTIONARY": TokenType.DICTIONARY, 200 "ENUM8": TokenType.ENUM8, 201 "ENUM16": TokenType.ENUM16, 202 "FINAL": TokenType.FINAL, 203 "FIXEDSTRING": TokenType.FIXEDSTRING, 204 "FLOAT32": TokenType.FLOAT, 205 "FLOAT64": TokenType.DOUBLE, 206 "GLOBAL": TokenType.GLOBAL, 207 "INT256": TokenType.INT256, 208 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 209 "MAP": TokenType.MAP, 210 "NESTED": TokenType.NESTED, 211 "SAMPLE": TokenType.TABLE_SAMPLE, 212 "TUPLE": TokenType.STRUCT, 213 "UINT128": TokenType.UINT128, 214 "UINT16": TokenType.USMALLINT, 215 "UINT256": TokenType.UINT256, 216 "UINT32": TokenType.UINT, 217 "UINT64": TokenType.UBIGINT, 218 "UINT8": TokenType.UTINYINT, 219 "IPV4": TokenType.IPV4, 220 "IPV6": TokenType.IPV6, 221 "POINT": TokenType.POINT, 222 "RING": TokenType.RING, 223 "LINESTRING": TokenType.LINESTRING, 224 "MULTILINESTRING": TokenType.MULTILINESTRING, 225 "POLYGON": TokenType.POLYGON, 226 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 227 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 228 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 229 "SYSTEM": TokenType.COMMAND, 230 "PREWHERE": TokenType.PREWHERE, 231 } 232 KEYWORDS.pop("/*+") 233 234 SINGLE_TOKENS = { 235 **tokens.Tokenizer.SINGLE_TOKENS, 236 "$": TokenType.HEREDOC_STRING, 237 } 238 239 class Parser(parser.Parser): 240 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 241 # * select x from t1 union all select x from t2 limit 1; 242 # * select x from t1 union all (select x from t2 limit 1); 243 MODIFIERS_ATTACHED_TO_SET_OP = False 244 INTERVAL_SPANS = False 245 246 FUNCTIONS = { 247 **parser.Parser.FUNCTIONS, 248 "ANY": exp.AnyValue.from_arg_list, 249 "ARRAYSUM": exp.ArraySum.from_arg_list, 250 "COUNTIF": _build_count_if, 251 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 252 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 253 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 254 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 255 "DATE_FORMAT": _build_date_format, 256 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 257 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 258 "FORMATDATETIME": _build_date_format, 259 "JSONEXTRACTSTRING": build_json_extract_path( 260 exp.JSONExtractScalar, zero_based_indexing=False 261 ), 262 "MAP": parser.build_var_map, 263 "MATCH": exp.RegexpLike.from_arg_list, 264 "RANDCANONICAL": exp.Rand.from_arg_list, 265 "STR_TO_DATE": _build_str_to_date, 266 "TUPLE": exp.Struct.from_arg_list, 267 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 268 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 269 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 270 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 271 "UNIQ": exp.ApproxDistinct.from_arg_list, 272 "XOR": lambda args: exp.Xor(expressions=args), 273 "MD5": exp.MD5Digest.from_arg_list, 274 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 275 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 276 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 277 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 278 } 279 FUNCTIONS.pop("TRANSFORM") 280 281 AGG_FUNCTIONS = { 282 "count", 283 "min", 284 "max", 285 "sum", 286 "avg", 287 "any", 288 "stddevPop", 289 "stddevSamp", 290 "varPop", 291 "varSamp", 292 "corr", 293 "covarPop", 294 "covarSamp", 295 "entropy", 296 "exponentialMovingAverage", 297 "intervalLengthSum", 298 "kolmogorovSmirnovTest", 299 "mannWhitneyUTest", 300 "median", 301 "rankCorr", 302 "sumKahan", 303 "studentTTest", 304 "welchTTest", 305 "anyHeavy", 306 "anyLast", 307 "boundingRatio", 308 "first_value", 309 "last_value", 310 "argMin", 311 "argMax", 312 "avgWeighted", 313 "topK", 314 "topKWeighted", 315 "deltaSum", 316 "deltaSumTimestamp", 317 "groupArray", 318 "groupArrayLast", 319 "groupUniqArray", 320 "groupArrayInsertAt", 321 "groupArrayMovingAvg", 322 "groupArrayMovingSum", 323 "groupArraySample", 324 "groupBitAnd", 325 "groupBitOr", 326 "groupBitXor", 327 "groupBitmap", 328 "groupBitmapAnd", 329 "groupBitmapOr", 330 "groupBitmapXor", 331 "sumWithOverflow", 332 "sumMap", 333 "minMap", 334 "maxMap", 335 "skewSamp", 336 "skewPop", 337 "kurtSamp", 338 "kurtPop", 339 "uniq", 340 "uniqExact", 341 "uniqCombined", 342 "uniqCombined64", 343 "uniqHLL12", 344 "uniqTheta", 345 "quantile", 346 "quantiles", 347 "quantileExact", 348 "quantilesExact", 349 "quantileExactLow", 350 "quantilesExactLow", 351 "quantileExactHigh", 352 "quantilesExactHigh", 353 "quantileExactWeighted", 354 "quantilesExactWeighted", 355 "quantileTiming", 356 "quantilesTiming", 357 "quantileTimingWeighted", 358 "quantilesTimingWeighted", 359 "quantileDeterministic", 360 "quantilesDeterministic", 361 "quantileTDigest", 362 "quantilesTDigest", 363 "quantileTDigestWeighted", 364 "quantilesTDigestWeighted", 365 "quantileBFloat16", 366 "quantilesBFloat16", 367 "quantileBFloat16Weighted", 368 "quantilesBFloat16Weighted", 369 "simpleLinearRegression", 370 "stochasticLinearRegression", 371 "stochasticLogisticRegression", 372 "categoricalInformationValue", 373 "contingency", 374 "cramersV", 375 "cramersVBiasCorrected", 376 "theilsU", 377 "maxIntersections", 378 "maxIntersectionsPosition", 379 "meanZTest", 380 "quantileInterpolatedWeighted", 381 "quantilesInterpolatedWeighted", 382 "quantileGK", 383 "quantilesGK", 384 "sparkBar", 385 "sumCount", 386 "largestTriangleThreeBuckets", 387 "histogram", 388 "sequenceMatch", 389 "sequenceCount", 390 "windowFunnel", 391 "retention", 392 "uniqUpTo", 393 "sequenceNextNode", 394 "exponentialTimeDecayedAvg", 395 } 396 397 AGG_FUNCTIONS_SUFFIXES = [ 398 "If", 399 "Array", 400 "ArrayIf", 401 "Map", 402 "SimpleState", 403 "State", 404 "Merge", 405 "MergeState", 406 "ForEach", 407 "Distinct", 408 "OrDefault", 409 "OrNull", 410 "Resample", 411 "ArgMin", 412 "ArgMax", 413 ] 414 415 FUNC_TOKENS = { 416 *parser.Parser.FUNC_TOKENS, 417 TokenType.SET, 418 } 419 420 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 421 422 ID_VAR_TOKENS = { 423 *parser.Parser.ID_VAR_TOKENS, 424 TokenType.LIKE, 425 } 426 427 AGG_FUNC_MAPPING = ( 428 lambda functions, suffixes: { 429 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 430 } 431 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 432 433 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 434 435 FUNCTION_PARSERS = { 436 **parser.Parser.FUNCTION_PARSERS, 437 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 438 "QUANTILE": lambda self: self._parse_quantile(), 439 "MEDIAN": lambda self: self._parse_quantile(), 440 "COLUMNS": lambda self: self._parse_columns(), 441 } 442 443 FUNCTION_PARSERS.pop("MATCH") 444 445 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 446 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 447 448 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 449 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 450 451 RANGE_PARSERS = { 452 **parser.Parser.RANGE_PARSERS, 453 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 454 and self._parse_in(this, is_global=True), 455 } 456 457 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 458 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 459 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 460 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 461 462 JOIN_KINDS = { 463 *parser.Parser.JOIN_KINDS, 464 TokenType.ANY, 465 TokenType.ASOF, 466 TokenType.ARRAY, 467 } 468 469 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 470 TokenType.ANY, 471 TokenType.ARRAY, 472 TokenType.FINAL, 473 TokenType.FORMAT, 474 TokenType.SETTINGS, 475 } 476 477 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 478 TokenType.FORMAT, 479 } 480 481 LOG_DEFAULTS_TO_LN = True 482 483 QUERY_MODIFIER_PARSERS = { 484 **parser.Parser.QUERY_MODIFIER_PARSERS, 485 TokenType.SETTINGS: lambda self: ( 486 "settings", 487 self._advance() or self._parse_csv(self._parse_assignment), 488 ), 489 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 490 } 491 492 CONSTRAINT_PARSERS = { 493 **parser.Parser.CONSTRAINT_PARSERS, 494 "INDEX": lambda self: self._parse_index_constraint(), 495 "CODEC": lambda self: self._parse_compress(), 496 } 497 498 ALTER_PARSERS = { 499 **parser.Parser.ALTER_PARSERS, 500 "REPLACE": lambda self: self._parse_alter_table_replace(), 501 } 502 503 SCHEMA_UNNAMED_CONSTRAINTS = { 504 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 505 "INDEX", 506 } 507 508 PLACEHOLDER_PARSERS = { 509 **parser.Parser.PLACEHOLDER_PARSERS, 510 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 511 } 512 513 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 514 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 515 return self._parse_lambda() 516 517 def _parse_types( 518 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 519 ) -> t.Optional[exp.Expression]: 520 dtype = super()._parse_types( 521 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 522 ) 523 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 524 # Mark every type as non-nullable which is ClickHouse's default, unless it's 525 # already marked as nullable. This marker helps us transpile types from other 526 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 527 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 528 # fail in ClickHouse without the `Nullable` type constructor. 529 dtype.set("nullable", False) 530 531 return dtype 532 533 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 534 index = self._index 535 this = self._parse_bitwise() 536 if self._match(TokenType.FROM): 537 self._retreat(index) 538 return super()._parse_extract() 539 540 # We return Anonymous here because extract and regexpExtract have different semantics, 541 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 542 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 543 # 544 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 545 self._match(TokenType.COMMA) 546 return self.expression( 547 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 548 ) 549 550 def _parse_assignment(self) -> t.Optional[exp.Expression]: 551 this = super()._parse_assignment() 552 553 if self._match(TokenType.PLACEHOLDER): 554 return self.expression( 555 exp.If, 556 this=this, 557 true=self._parse_assignment(), 558 false=self._match(TokenType.COLON) and self._parse_assignment(), 559 ) 560 561 return this 562 563 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 564 """ 565 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 566 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 567 """ 568 this = self._parse_id_var() 569 self._match(TokenType.COLON) 570 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 571 self._match_text_seq("IDENTIFIER") and "Identifier" 572 ) 573 574 if not kind: 575 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 576 elif not self._match(TokenType.R_BRACE): 577 self.raise_error("Expecting }") 578 579 return self.expression(exp.Placeholder, this=this, kind=kind) 580 581 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 582 this = super()._parse_in(this) 583 this.set("is_global", is_global) 584 return this 585 586 def _parse_table( 587 self, 588 schema: bool = False, 589 joins: bool = False, 590 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 591 parse_bracket: bool = False, 592 is_db_reference: bool = False, 593 parse_partition: bool = False, 594 ) -> t.Optional[exp.Expression]: 595 this = super()._parse_table( 596 schema=schema, 597 joins=joins, 598 alias_tokens=alias_tokens, 599 parse_bracket=parse_bracket, 600 is_db_reference=is_db_reference, 601 ) 602 603 if self._match(TokenType.FINAL): 604 this = self.expression(exp.Final, this=this) 605 606 return this 607 608 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 609 return super()._parse_position(haystack_first=True) 610 611 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 612 def _parse_cte(self) -> exp.CTE: 613 # WITH <identifier> AS <subquery expression> 614 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 615 616 if not cte: 617 # WITH <expression> AS <identifier> 618 cte = self.expression( 619 exp.CTE, 620 this=self._parse_assignment(), 621 alias=self._parse_table_alias(), 622 scalar=True, 623 ) 624 625 return cte 626 627 def _parse_join_parts( 628 self, 629 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 630 is_global = self._match(TokenType.GLOBAL) and self._prev 631 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 632 633 if kind_pre: 634 kind = self._match_set(self.JOIN_KINDS) and self._prev 635 side = self._match_set(self.JOIN_SIDES) and self._prev 636 return is_global, side, kind 637 638 return ( 639 is_global, 640 self._match_set(self.JOIN_SIDES) and self._prev, 641 self._match_set(self.JOIN_KINDS) and self._prev, 642 ) 643 644 def _parse_join( 645 self, skip_join_token: bool = False, parse_bracket: bool = False 646 ) -> t.Optional[exp.Join]: 647 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 648 if join: 649 join.set("global", join.args.pop("method", None)) 650 651 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 652 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 653 if join.kind == "ARRAY": 654 for table in join.find_all(exp.Table): 655 table.replace(table.to_column()) 656 657 return join 658 659 def _parse_function( 660 self, 661 functions: t.Optional[t.Dict[str, t.Callable]] = None, 662 anonymous: bool = False, 663 optional_parens: bool = True, 664 any_token: bool = False, 665 ) -> t.Optional[exp.Expression]: 666 expr = super()._parse_function( 667 functions=functions, 668 anonymous=anonymous, 669 optional_parens=optional_parens, 670 any_token=any_token, 671 ) 672 673 func = expr.this if isinstance(expr, exp.Window) else expr 674 675 # Aggregate functions can be split in 2 parts: <func_name><suffix> 676 parts = ( 677 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 678 ) 679 680 if parts: 681 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 682 params = self._parse_func_params(anon_func) 683 684 kwargs = { 685 "this": anon_func.this, 686 "expressions": anon_func.expressions, 687 } 688 if parts[1]: 689 kwargs["parts"] = parts 690 exp_class: t.Type[exp.Expression] = ( 691 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 692 ) 693 else: 694 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 695 696 kwargs["exp_class"] = exp_class 697 if params: 698 kwargs["params"] = params 699 700 func = self.expression(**kwargs) 701 702 if isinstance(expr, exp.Window): 703 # The window's func was parsed as Anonymous in base parser, fix its 704 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 705 expr.set("this", func) 706 elif params: 707 # Params have blocked super()._parse_function() from parsing the following window 708 # (if that exists) as they're standing between the function call and the window spec 709 expr = self._parse_window(func) 710 else: 711 expr = func 712 713 return expr 714 715 def _parse_func_params( 716 self, this: t.Optional[exp.Func] = None 717 ) -> t.Optional[t.List[exp.Expression]]: 718 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 719 return self._parse_csv(self._parse_lambda) 720 721 if self._match(TokenType.L_PAREN): 722 params = self._parse_csv(self._parse_lambda) 723 self._match_r_paren(this) 724 return params 725 726 return None 727 728 def _parse_quantile(self) -> exp.Quantile: 729 this = self._parse_lambda() 730 params = self._parse_func_params() 731 if params: 732 return self.expression(exp.Quantile, this=params[0], quantile=this) 733 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 734 735 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 736 return super()._parse_wrapped_id_vars(optional=True) 737 738 def _parse_primary_key( 739 self, wrapped_optional: bool = False, in_props: bool = False 740 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 741 return super()._parse_primary_key( 742 wrapped_optional=wrapped_optional or in_props, in_props=in_props 743 ) 744 745 def _parse_on_property(self) -> t.Optional[exp.Expression]: 746 index = self._index 747 if self._match_text_seq("CLUSTER"): 748 this = self._parse_id_var() 749 if this: 750 return self.expression(exp.OnCluster, this=this) 751 else: 752 self._retreat(index) 753 return None 754 755 def _parse_index_constraint( 756 self, kind: t.Optional[str] = None 757 ) -> exp.IndexColumnConstraint: 758 # INDEX name1 expr TYPE type1(args) GRANULARITY value 759 this = self._parse_id_var() 760 expression = self._parse_assignment() 761 762 index_type = self._match_text_seq("TYPE") and ( 763 self._parse_function() or self._parse_var() 764 ) 765 766 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 767 768 return self.expression( 769 exp.IndexColumnConstraint, 770 this=this, 771 expression=expression, 772 index_type=index_type, 773 granularity=granularity, 774 ) 775 776 def _parse_partition(self) -> t.Optional[exp.Partition]: 777 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 778 if not self._match(TokenType.PARTITION): 779 return None 780 781 if self._match_text_seq("ID"): 782 # Corresponds to the PARTITION ID <string_value> syntax 783 expressions: t.List[exp.Expression] = [ 784 self.expression(exp.PartitionId, this=self._parse_string()) 785 ] 786 else: 787 expressions = self._parse_expressions() 788 789 return self.expression(exp.Partition, expressions=expressions) 790 791 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 792 partition = self._parse_partition() 793 794 if not partition or not self._match(TokenType.FROM): 795 return None 796 797 return self.expression( 798 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 799 ) 800 801 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 802 if not self._match_text_seq("PROJECTION"): 803 return None 804 805 return self.expression( 806 exp.ProjectionDef, 807 this=self._parse_id_var(), 808 expression=self._parse_wrapped(self._parse_statement), 809 ) 810 811 def _parse_constraint(self) -> t.Optional[exp.Expression]: 812 return super()._parse_constraint() or self._parse_projection_def() 813 814 def _parse_alias( 815 self, this: t.Optional[exp.Expression], explicit: bool = False 816 ) -> t.Optional[exp.Expression]: 817 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 818 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 819 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 820 return this 821 822 return super()._parse_alias(this=this, explicit=explicit) 823 824 def _parse_expression(self) -> t.Optional[exp.Expression]: 825 this = super()._parse_expression() 826 827 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 828 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 829 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 830 self._match(TokenType.R_PAREN) 831 832 return this 833 834 def _parse_columns(self) -> exp.Expression: 835 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 836 837 while self._next and self._match_text_seq(")", "APPLY", "("): 838 self._match(TokenType.R_PAREN) 839 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 840 return this 841 842 class Generator(generator.Generator): 843 QUERY_HINTS = False 844 STRUCT_DELIMITER = ("(", ")") 845 NVL2_SUPPORTED = False 846 TABLESAMPLE_REQUIRES_PARENS = False 847 TABLESAMPLE_SIZE_IS_ROWS = False 848 TABLESAMPLE_KEYWORDS = "SAMPLE" 849 LAST_DAY_SUPPORTS_DATE_PART = False 850 CAN_IMPLEMENT_ARRAY_ANY = True 851 SUPPORTS_TO_NUMBER = False 852 JOIN_HINTS = False 853 TABLE_HINTS = False 854 GROUPINGS_SEP = "" 855 SET_OP_MODIFIERS = False 856 SUPPORTS_TABLE_ALIAS_COLUMNS = False 857 VALUES_AS_TABLE = False 858 ARRAY_SIZE_NAME = "LENGTH" 859 860 STRING_TYPE_MAPPING = { 861 exp.DataType.Type.CHAR: "String", 862 exp.DataType.Type.LONGBLOB: "String", 863 exp.DataType.Type.LONGTEXT: "String", 864 exp.DataType.Type.MEDIUMBLOB: "String", 865 exp.DataType.Type.MEDIUMTEXT: "String", 866 exp.DataType.Type.TINYBLOB: "String", 867 exp.DataType.Type.TINYTEXT: "String", 868 exp.DataType.Type.TEXT: "String", 869 exp.DataType.Type.VARBINARY: "String", 870 exp.DataType.Type.VARCHAR: "String", 871 } 872 873 SUPPORTED_JSON_PATH_PARTS = { 874 exp.JSONPathKey, 875 exp.JSONPathRoot, 876 exp.JSONPathSubscript, 877 } 878 879 TYPE_MAPPING = { 880 **generator.Generator.TYPE_MAPPING, 881 **STRING_TYPE_MAPPING, 882 exp.DataType.Type.ARRAY: "Array", 883 exp.DataType.Type.BOOLEAN: "Bool", 884 exp.DataType.Type.BIGINT: "Int64", 885 exp.DataType.Type.DATE32: "Date32", 886 exp.DataType.Type.DATETIME: "DateTime", 887 exp.DataType.Type.DATETIME64: "DateTime64", 888 exp.DataType.Type.DECIMAL: "Decimal", 889 exp.DataType.Type.DECIMAL32: "Decimal32", 890 exp.DataType.Type.DECIMAL64: "Decimal64", 891 exp.DataType.Type.DECIMAL128: "Decimal128", 892 exp.DataType.Type.DECIMAL256: "Decimal256", 893 exp.DataType.Type.TIMESTAMP: "DateTime", 894 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 895 exp.DataType.Type.DOUBLE: "Float64", 896 exp.DataType.Type.ENUM: "Enum", 897 exp.DataType.Type.ENUM8: "Enum8", 898 exp.DataType.Type.ENUM16: "Enum16", 899 exp.DataType.Type.FIXEDSTRING: "FixedString", 900 exp.DataType.Type.FLOAT: "Float32", 901 exp.DataType.Type.INT: "Int32", 902 exp.DataType.Type.MEDIUMINT: "Int32", 903 exp.DataType.Type.INT128: "Int128", 904 exp.DataType.Type.INT256: "Int256", 905 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 906 exp.DataType.Type.MAP: "Map", 907 exp.DataType.Type.NESTED: "Nested", 908 exp.DataType.Type.SMALLINT: "Int16", 909 exp.DataType.Type.STRUCT: "Tuple", 910 exp.DataType.Type.TINYINT: "Int8", 911 exp.DataType.Type.UBIGINT: "UInt64", 912 exp.DataType.Type.UINT: "UInt32", 913 exp.DataType.Type.UINT128: "UInt128", 914 exp.DataType.Type.UINT256: "UInt256", 915 exp.DataType.Type.USMALLINT: "UInt16", 916 exp.DataType.Type.UTINYINT: "UInt8", 917 exp.DataType.Type.IPV4: "IPv4", 918 exp.DataType.Type.IPV6: "IPv6", 919 exp.DataType.Type.POINT: "Point", 920 exp.DataType.Type.RING: "Ring", 921 exp.DataType.Type.LINESTRING: "LineString", 922 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 923 exp.DataType.Type.POLYGON: "Polygon", 924 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 925 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 926 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 927 } 928 929 TRANSFORMS = { 930 **generator.Generator.TRANSFORMS, 931 exp.AnyValue: rename_func("any"), 932 exp.ApproxDistinct: rename_func("uniq"), 933 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 934 exp.ArraySum: rename_func("arraySum"), 935 exp.ArgMax: arg_max_or_min_no_count("argMax"), 936 exp.ArgMin: arg_max_or_min_no_count("argMin"), 937 exp.Array: inline_array_sql, 938 exp.CastToStrType: rename_func("CAST"), 939 exp.CountIf: rename_func("countIf"), 940 exp.CompressColumnConstraint: lambda self, 941 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 942 exp.ComputedColumnConstraint: lambda self, 943 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 944 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 945 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 946 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 947 exp.DateStrToDate: rename_func("toDate"), 948 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 949 exp.Explode: rename_func("arrayJoin"), 950 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 951 exp.IsNan: rename_func("isNaN"), 952 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 953 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 954 exp.JSONPathKey: json_path_key_only_name, 955 exp.JSONPathRoot: lambda *_: "", 956 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 957 exp.Median: rename_func("median"), 958 exp.Nullif: rename_func("nullIf"), 959 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 960 exp.Pivot: no_pivot_sql, 961 exp.Quantile: _quantile_sql, 962 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 963 exp.Rand: rename_func("randCanonical"), 964 exp.StartsWith: rename_func("startsWith"), 965 exp.StrPosition: lambda self, e: self.func( 966 "position", e.this, e.args.get("substr"), e.args.get("position") 967 ), 968 exp.TimeToStr: lambda self, e: self.func( 969 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 970 ), 971 exp.TimeStrToTime: _timestrtotime_sql, 972 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 973 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 974 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 975 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 976 exp.MD5Digest: rename_func("MD5"), 977 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 978 exp.SHA: rename_func("SHA1"), 979 exp.SHA2: sha256_sql, 980 exp.UnixToTime: _unix_to_time_sql, 981 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 982 exp.Trim: trim_sql, 983 exp.Variance: rename_func("varSamp"), 984 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 985 exp.Stddev: rename_func("stddevSamp"), 986 exp.Chr: rename_func("CHAR"), 987 exp.Lag: lambda self, e: self.func( 988 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 989 ), 990 exp.Lead: lambda self, e: self.func( 991 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 992 ), 993 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 994 rename_func("editDistance") 995 ), 996 } 997 998 PROPERTIES_LOCATION = { 999 **generator.Generator.PROPERTIES_LOCATION, 1000 exp.OnCluster: exp.Properties.Location.POST_NAME, 1001 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1002 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1003 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1004 } 1005 1006 # There's no list in docs, but it can be found in Clickhouse code 1007 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1008 ON_CLUSTER_TARGETS = { 1009 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1010 "DATABASE", 1011 "TABLE", 1012 "VIEW", 1013 "DICTIONARY", 1014 "INDEX", 1015 "FUNCTION", 1016 "NAMED COLLECTION", 1017 } 1018 1019 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1020 NON_NULLABLE_TYPES = { 1021 exp.DataType.Type.ARRAY, 1022 exp.DataType.Type.MAP, 1023 exp.DataType.Type.STRUCT, 1024 exp.DataType.Type.POINT, 1025 exp.DataType.Type.RING, 1026 exp.DataType.Type.LINESTRING, 1027 exp.DataType.Type.MULTILINESTRING, 1028 exp.DataType.Type.POLYGON, 1029 exp.DataType.Type.MULTIPOLYGON, 1030 } 1031 1032 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1033 strtodate_sql = self.function_fallback_sql(expression) 1034 1035 if not isinstance(expression.parent, exp.Cast): 1036 # StrToDate returns DATEs in other dialects (eg. postgres), so 1037 # this branch aims to improve the transpilation to clickhouse 1038 return f"CAST({strtodate_sql} AS DATE)" 1039 1040 return strtodate_sql 1041 1042 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1043 this = expression.this 1044 1045 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1046 return self.sql(this) 1047 1048 return super().cast_sql(expression, safe_prefix=safe_prefix) 1049 1050 def trycast_sql(self, expression: exp.TryCast) -> str: 1051 dtype = expression.to 1052 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1053 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1054 dtype.set("nullable", True) 1055 1056 return super().cast_sql(expression) 1057 1058 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1059 this = self.json_path_part(expression.this) 1060 return str(int(this) + 1) if is_int(this) else this 1061 1062 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1063 return f"AS {self.sql(expression, 'this')}" 1064 1065 def _any_to_has( 1066 self, 1067 expression: exp.EQ | exp.NEQ, 1068 default: t.Callable[[t.Any], str], 1069 prefix: str = "", 1070 ) -> str: 1071 if isinstance(expression.left, exp.Any): 1072 arr = expression.left 1073 this = expression.right 1074 elif isinstance(expression.right, exp.Any): 1075 arr = expression.right 1076 this = expression.left 1077 else: 1078 return default(expression) 1079 1080 return prefix + self.func("has", arr.this.unnest(), this) 1081 1082 def eq_sql(self, expression: exp.EQ) -> str: 1083 return self._any_to_has(expression, super().eq_sql) 1084 1085 def neq_sql(self, expression: exp.NEQ) -> str: 1086 return self._any_to_has(expression, super().neq_sql, "NOT ") 1087 1088 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1089 # Manually add a flag to make the search case-insensitive 1090 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1091 return self.func("match", expression.this, regex) 1092 1093 def datatype_sql(self, expression: exp.DataType) -> str: 1094 # String is the standard ClickHouse type, every other variant is just an alias. 1095 # Additionally, any supplied length parameter will be ignored. 1096 # 1097 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1098 if expression.this in self.STRING_TYPE_MAPPING: 1099 dtype = "String" 1100 else: 1101 dtype = super().datatype_sql(expression) 1102 1103 # This section changes the type to `Nullable(...)` if the following conditions hold: 1104 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1105 # and change their semantics 1106 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1107 # constraint: "Type of Map key must be a type, that can be represented by integer or 1108 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1109 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1110 parent = expression.parent 1111 nullable = expression.args.get("nullable") 1112 if nullable is True or ( 1113 nullable is None 1114 and not ( 1115 isinstance(parent, exp.DataType) 1116 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1117 and expression.index in (None, 0) 1118 ) 1119 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1120 ): 1121 dtype = f"Nullable({dtype})" 1122 1123 return dtype 1124 1125 def cte_sql(self, expression: exp.CTE) -> str: 1126 if expression.args.get("scalar"): 1127 this = self.sql(expression, "this") 1128 alias = self.sql(expression, "alias") 1129 return f"{this} AS {alias}" 1130 1131 return super().cte_sql(expression) 1132 1133 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1134 return super().after_limit_modifiers(expression) + [ 1135 ( 1136 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1137 if expression.args.get("settings") 1138 else "" 1139 ), 1140 ( 1141 self.seg("FORMAT ") + self.sql(expression, "format") 1142 if expression.args.get("format") 1143 else "" 1144 ), 1145 ] 1146 1147 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1148 params = self.expressions(expression, key="params", flat=True) 1149 return self.func(expression.name, *expression.expressions) + f"({params})" 1150 1151 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1152 return self.func(expression.name, *expression.expressions) 1153 1154 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1155 return self.anonymousaggfunc_sql(expression) 1156 1157 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1158 return self.parameterizedagg_sql(expression) 1159 1160 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1161 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1162 1163 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1164 return f"ON CLUSTER {self.sql(expression, 'this')}" 1165 1166 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1167 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1168 exp.Properties.Location.POST_NAME 1169 ): 1170 this_name = self.sql( 1171 expression.this if isinstance(expression.this, exp.Schema) else expression, 1172 "this", 1173 ) 1174 this_properties = " ".join( 1175 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1176 ) 1177 this_schema = self.schema_columns_sql(expression.this) 1178 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1179 1180 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1181 1182 return super().createable_sql(expression, locations) 1183 1184 def create_sql(self, expression: exp.Create) -> str: 1185 # The comment property comes last in CTAS statements, i.e. after the query 1186 query = expression.expression 1187 if isinstance(query, exp.Query): 1188 comment_prop = expression.find(exp.SchemaCommentProperty) 1189 if comment_prop: 1190 comment_prop.pop() 1191 query.replace(exp.paren(query)) 1192 else: 1193 comment_prop = None 1194 1195 create_sql = super().create_sql(expression) 1196 1197 comment_sql = self.sql(comment_prop) 1198 comment_sql = f" {comment_sql}" if comment_sql else "" 1199 1200 return f"{create_sql}{comment_sql}" 1201 1202 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1203 this = self.indent(self.sql(expression, "this")) 1204 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1205 1206 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1207 this = self.sql(expression, "this") 1208 this = f" {this}" if this else "" 1209 expr = self.sql(expression, "expression") 1210 expr = f" {expr}" if expr else "" 1211 index_type = self.sql(expression, "index_type") 1212 index_type = f" TYPE {index_type}" if index_type else "" 1213 granularity = self.sql(expression, "granularity") 1214 granularity = f" GRANULARITY {granularity}" if granularity else "" 1215 1216 return f"INDEX{this}{expr}{index_type}{granularity}" 1217 1218 def partition_sql(self, expression: exp.Partition) -> str: 1219 return f"PARTITION {self.expressions(expression, flat=True)}" 1220 1221 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1222 return f"ID {self.sql(expression.this)}" 1223 1224 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1225 return ( 1226 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1227 ) 1228 1229 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1230 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1231 1232 def is_sql(self, expression: exp.Is) -> str: 1233 is_sql = super().is_sql(expression) 1234 1235 if isinstance(expression.parent, exp.Not): 1236 # value IS NOT NULL -> NOT (value IS NULL) 1237 is_sql = self.wrap(is_sql) 1238 1239 return is_sql
162class ClickHouse(Dialect): 163 NORMALIZE_FUNCTIONS: bool | str = False 164 NULL_ORDERING = "nulls_are_last" 165 SUPPORTS_USER_DEFINED_TYPES = False 166 SAFE_DIVISION = True 167 LOG_BASE_FIRST: t.Optional[bool] = None 168 FORCE_EARLY_ALIAS_REF_EXPANSION = True 169 PRESERVE_ORIGINAL_NAMES = True 170 171 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 172 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 173 174 UNESCAPED_SEQUENCES = { 175 "\\0": "\0", 176 } 177 178 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 179 180 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 181 exp.Except: False, 182 exp.Intersect: False, 183 exp.Union: None, 184 } 185 186 class Tokenizer(tokens.Tokenizer): 187 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 188 IDENTIFIERS = ['"', "`"] 189 IDENTIFIER_ESCAPES = ["\\"] 190 STRING_ESCAPES = ["'", "\\"] 191 BIT_STRINGS = [("0b", "")] 192 HEX_STRINGS = [("0x", ""), ("0X", "")] 193 HEREDOC_STRINGS = ["$"] 194 195 KEYWORDS = { 196 **tokens.Tokenizer.KEYWORDS, 197 "ATTACH": TokenType.COMMAND, 198 "DATE32": TokenType.DATE32, 199 "DATETIME64": TokenType.DATETIME64, 200 "DICTIONARY": TokenType.DICTIONARY, 201 "ENUM8": TokenType.ENUM8, 202 "ENUM16": TokenType.ENUM16, 203 "FINAL": TokenType.FINAL, 204 "FIXEDSTRING": TokenType.FIXEDSTRING, 205 "FLOAT32": TokenType.FLOAT, 206 "FLOAT64": TokenType.DOUBLE, 207 "GLOBAL": TokenType.GLOBAL, 208 "INT256": TokenType.INT256, 209 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 210 "MAP": TokenType.MAP, 211 "NESTED": TokenType.NESTED, 212 "SAMPLE": TokenType.TABLE_SAMPLE, 213 "TUPLE": TokenType.STRUCT, 214 "UINT128": TokenType.UINT128, 215 "UINT16": TokenType.USMALLINT, 216 "UINT256": TokenType.UINT256, 217 "UINT32": TokenType.UINT, 218 "UINT64": TokenType.UBIGINT, 219 "UINT8": TokenType.UTINYINT, 220 "IPV4": TokenType.IPV4, 221 "IPV6": TokenType.IPV6, 222 "POINT": TokenType.POINT, 223 "RING": TokenType.RING, 224 "LINESTRING": TokenType.LINESTRING, 225 "MULTILINESTRING": TokenType.MULTILINESTRING, 226 "POLYGON": TokenType.POLYGON, 227 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 228 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 229 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 230 "SYSTEM": TokenType.COMMAND, 231 "PREWHERE": TokenType.PREWHERE, 232 } 233 KEYWORDS.pop("/*+") 234 235 SINGLE_TOKENS = { 236 **tokens.Tokenizer.SINGLE_TOKENS, 237 "$": TokenType.HEREDOC_STRING, 238 } 239 240 class Parser(parser.Parser): 241 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 242 # * select x from t1 union all select x from t2 limit 1; 243 # * select x from t1 union all (select x from t2 limit 1); 244 MODIFIERS_ATTACHED_TO_SET_OP = False 245 INTERVAL_SPANS = False 246 247 FUNCTIONS = { 248 **parser.Parser.FUNCTIONS, 249 "ANY": exp.AnyValue.from_arg_list, 250 "ARRAYSUM": exp.ArraySum.from_arg_list, 251 "COUNTIF": _build_count_if, 252 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 253 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 254 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 255 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 256 "DATE_FORMAT": _build_date_format, 257 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 258 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 259 "FORMATDATETIME": _build_date_format, 260 "JSONEXTRACTSTRING": build_json_extract_path( 261 exp.JSONExtractScalar, zero_based_indexing=False 262 ), 263 "MAP": parser.build_var_map, 264 "MATCH": exp.RegexpLike.from_arg_list, 265 "RANDCANONICAL": exp.Rand.from_arg_list, 266 "STR_TO_DATE": _build_str_to_date, 267 "TUPLE": exp.Struct.from_arg_list, 268 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 269 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 270 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 271 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 272 "UNIQ": exp.ApproxDistinct.from_arg_list, 273 "XOR": lambda args: exp.Xor(expressions=args), 274 "MD5": exp.MD5Digest.from_arg_list, 275 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 276 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 277 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 278 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 279 } 280 FUNCTIONS.pop("TRANSFORM") 281 282 AGG_FUNCTIONS = { 283 "count", 284 "min", 285 "max", 286 "sum", 287 "avg", 288 "any", 289 "stddevPop", 290 "stddevSamp", 291 "varPop", 292 "varSamp", 293 "corr", 294 "covarPop", 295 "covarSamp", 296 "entropy", 297 "exponentialMovingAverage", 298 "intervalLengthSum", 299 "kolmogorovSmirnovTest", 300 "mannWhitneyUTest", 301 "median", 302 "rankCorr", 303 "sumKahan", 304 "studentTTest", 305 "welchTTest", 306 "anyHeavy", 307 "anyLast", 308 "boundingRatio", 309 "first_value", 310 "last_value", 311 "argMin", 312 "argMax", 313 "avgWeighted", 314 "topK", 315 "topKWeighted", 316 "deltaSum", 317 "deltaSumTimestamp", 318 "groupArray", 319 "groupArrayLast", 320 "groupUniqArray", 321 "groupArrayInsertAt", 322 "groupArrayMovingAvg", 323 "groupArrayMovingSum", 324 "groupArraySample", 325 "groupBitAnd", 326 "groupBitOr", 327 "groupBitXor", 328 "groupBitmap", 329 "groupBitmapAnd", 330 "groupBitmapOr", 331 "groupBitmapXor", 332 "sumWithOverflow", 333 "sumMap", 334 "minMap", 335 "maxMap", 336 "skewSamp", 337 "skewPop", 338 "kurtSamp", 339 "kurtPop", 340 "uniq", 341 "uniqExact", 342 "uniqCombined", 343 "uniqCombined64", 344 "uniqHLL12", 345 "uniqTheta", 346 "quantile", 347 "quantiles", 348 "quantileExact", 349 "quantilesExact", 350 "quantileExactLow", 351 "quantilesExactLow", 352 "quantileExactHigh", 353 "quantilesExactHigh", 354 "quantileExactWeighted", 355 "quantilesExactWeighted", 356 "quantileTiming", 357 "quantilesTiming", 358 "quantileTimingWeighted", 359 "quantilesTimingWeighted", 360 "quantileDeterministic", 361 "quantilesDeterministic", 362 "quantileTDigest", 363 "quantilesTDigest", 364 "quantileTDigestWeighted", 365 "quantilesTDigestWeighted", 366 "quantileBFloat16", 367 "quantilesBFloat16", 368 "quantileBFloat16Weighted", 369 "quantilesBFloat16Weighted", 370 "simpleLinearRegression", 371 "stochasticLinearRegression", 372 "stochasticLogisticRegression", 373 "categoricalInformationValue", 374 "contingency", 375 "cramersV", 376 "cramersVBiasCorrected", 377 "theilsU", 378 "maxIntersections", 379 "maxIntersectionsPosition", 380 "meanZTest", 381 "quantileInterpolatedWeighted", 382 "quantilesInterpolatedWeighted", 383 "quantileGK", 384 "quantilesGK", 385 "sparkBar", 386 "sumCount", 387 "largestTriangleThreeBuckets", 388 "histogram", 389 "sequenceMatch", 390 "sequenceCount", 391 "windowFunnel", 392 "retention", 393 "uniqUpTo", 394 "sequenceNextNode", 395 "exponentialTimeDecayedAvg", 396 } 397 398 AGG_FUNCTIONS_SUFFIXES = [ 399 "If", 400 "Array", 401 "ArrayIf", 402 "Map", 403 "SimpleState", 404 "State", 405 "Merge", 406 "MergeState", 407 "ForEach", 408 "Distinct", 409 "OrDefault", 410 "OrNull", 411 "Resample", 412 "ArgMin", 413 "ArgMax", 414 ] 415 416 FUNC_TOKENS = { 417 *parser.Parser.FUNC_TOKENS, 418 TokenType.SET, 419 } 420 421 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 422 423 ID_VAR_TOKENS = { 424 *parser.Parser.ID_VAR_TOKENS, 425 TokenType.LIKE, 426 } 427 428 AGG_FUNC_MAPPING = ( 429 lambda functions, suffixes: { 430 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 431 } 432 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 433 434 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 435 436 FUNCTION_PARSERS = { 437 **parser.Parser.FUNCTION_PARSERS, 438 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 439 "QUANTILE": lambda self: self._parse_quantile(), 440 "MEDIAN": lambda self: self._parse_quantile(), 441 "COLUMNS": lambda self: self._parse_columns(), 442 } 443 444 FUNCTION_PARSERS.pop("MATCH") 445 446 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 447 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 448 449 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 450 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 451 452 RANGE_PARSERS = { 453 **parser.Parser.RANGE_PARSERS, 454 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 455 and self._parse_in(this, is_global=True), 456 } 457 458 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 459 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 460 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 461 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 462 463 JOIN_KINDS = { 464 *parser.Parser.JOIN_KINDS, 465 TokenType.ANY, 466 TokenType.ASOF, 467 TokenType.ARRAY, 468 } 469 470 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 471 TokenType.ANY, 472 TokenType.ARRAY, 473 TokenType.FINAL, 474 TokenType.FORMAT, 475 TokenType.SETTINGS, 476 } 477 478 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 479 TokenType.FORMAT, 480 } 481 482 LOG_DEFAULTS_TO_LN = True 483 484 QUERY_MODIFIER_PARSERS = { 485 **parser.Parser.QUERY_MODIFIER_PARSERS, 486 TokenType.SETTINGS: lambda self: ( 487 "settings", 488 self._advance() or self._parse_csv(self._parse_assignment), 489 ), 490 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 491 } 492 493 CONSTRAINT_PARSERS = { 494 **parser.Parser.CONSTRAINT_PARSERS, 495 "INDEX": lambda self: self._parse_index_constraint(), 496 "CODEC": lambda self: self._parse_compress(), 497 } 498 499 ALTER_PARSERS = { 500 **parser.Parser.ALTER_PARSERS, 501 "REPLACE": lambda self: self._parse_alter_table_replace(), 502 } 503 504 SCHEMA_UNNAMED_CONSTRAINTS = { 505 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 506 "INDEX", 507 } 508 509 PLACEHOLDER_PARSERS = { 510 **parser.Parser.PLACEHOLDER_PARSERS, 511 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 512 } 513 514 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 515 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 516 return self._parse_lambda() 517 518 def _parse_types( 519 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 520 ) -> t.Optional[exp.Expression]: 521 dtype = super()._parse_types( 522 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 523 ) 524 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 525 # Mark every type as non-nullable which is ClickHouse's default, unless it's 526 # already marked as nullable. This marker helps us transpile types from other 527 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 528 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 529 # fail in ClickHouse without the `Nullable` type constructor. 530 dtype.set("nullable", False) 531 532 return dtype 533 534 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 535 index = self._index 536 this = self._parse_bitwise() 537 if self._match(TokenType.FROM): 538 self._retreat(index) 539 return super()._parse_extract() 540 541 # We return Anonymous here because extract and regexpExtract have different semantics, 542 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 543 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 544 # 545 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 546 self._match(TokenType.COMMA) 547 return self.expression( 548 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 549 ) 550 551 def _parse_assignment(self) -> t.Optional[exp.Expression]: 552 this = super()._parse_assignment() 553 554 if self._match(TokenType.PLACEHOLDER): 555 return self.expression( 556 exp.If, 557 this=this, 558 true=self._parse_assignment(), 559 false=self._match(TokenType.COLON) and self._parse_assignment(), 560 ) 561 562 return this 563 564 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 565 """ 566 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 567 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 568 """ 569 this = self._parse_id_var() 570 self._match(TokenType.COLON) 571 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 572 self._match_text_seq("IDENTIFIER") and "Identifier" 573 ) 574 575 if not kind: 576 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 577 elif not self._match(TokenType.R_BRACE): 578 self.raise_error("Expecting }") 579 580 return self.expression(exp.Placeholder, this=this, kind=kind) 581 582 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 583 this = super()._parse_in(this) 584 this.set("is_global", is_global) 585 return this 586 587 def _parse_table( 588 self, 589 schema: bool = False, 590 joins: bool = False, 591 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 592 parse_bracket: bool = False, 593 is_db_reference: bool = False, 594 parse_partition: bool = False, 595 ) -> t.Optional[exp.Expression]: 596 this = super()._parse_table( 597 schema=schema, 598 joins=joins, 599 alias_tokens=alias_tokens, 600 parse_bracket=parse_bracket, 601 is_db_reference=is_db_reference, 602 ) 603 604 if self._match(TokenType.FINAL): 605 this = self.expression(exp.Final, this=this) 606 607 return this 608 609 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 610 return super()._parse_position(haystack_first=True) 611 612 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 613 def _parse_cte(self) -> exp.CTE: 614 # WITH <identifier> AS <subquery expression> 615 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 616 617 if not cte: 618 # WITH <expression> AS <identifier> 619 cte = self.expression( 620 exp.CTE, 621 this=self._parse_assignment(), 622 alias=self._parse_table_alias(), 623 scalar=True, 624 ) 625 626 return cte 627 628 def _parse_join_parts( 629 self, 630 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 631 is_global = self._match(TokenType.GLOBAL) and self._prev 632 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 633 634 if kind_pre: 635 kind = self._match_set(self.JOIN_KINDS) and self._prev 636 side = self._match_set(self.JOIN_SIDES) and self._prev 637 return is_global, side, kind 638 639 return ( 640 is_global, 641 self._match_set(self.JOIN_SIDES) and self._prev, 642 self._match_set(self.JOIN_KINDS) and self._prev, 643 ) 644 645 def _parse_join( 646 self, skip_join_token: bool = False, parse_bracket: bool = False 647 ) -> t.Optional[exp.Join]: 648 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 649 if join: 650 join.set("global", join.args.pop("method", None)) 651 652 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 653 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 654 if join.kind == "ARRAY": 655 for table in join.find_all(exp.Table): 656 table.replace(table.to_column()) 657 658 return join 659 660 def _parse_function( 661 self, 662 functions: t.Optional[t.Dict[str, t.Callable]] = None, 663 anonymous: bool = False, 664 optional_parens: bool = True, 665 any_token: bool = False, 666 ) -> t.Optional[exp.Expression]: 667 expr = super()._parse_function( 668 functions=functions, 669 anonymous=anonymous, 670 optional_parens=optional_parens, 671 any_token=any_token, 672 ) 673 674 func = expr.this if isinstance(expr, exp.Window) else expr 675 676 # Aggregate functions can be split in 2 parts: <func_name><suffix> 677 parts = ( 678 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 679 ) 680 681 if parts: 682 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 683 params = self._parse_func_params(anon_func) 684 685 kwargs = { 686 "this": anon_func.this, 687 "expressions": anon_func.expressions, 688 } 689 if parts[1]: 690 kwargs["parts"] = parts 691 exp_class: t.Type[exp.Expression] = ( 692 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 693 ) 694 else: 695 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 696 697 kwargs["exp_class"] = exp_class 698 if params: 699 kwargs["params"] = params 700 701 func = self.expression(**kwargs) 702 703 if isinstance(expr, exp.Window): 704 # The window's func was parsed as Anonymous in base parser, fix its 705 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 706 expr.set("this", func) 707 elif params: 708 # Params have blocked super()._parse_function() from parsing the following window 709 # (if that exists) as they're standing between the function call and the window spec 710 expr = self._parse_window(func) 711 else: 712 expr = func 713 714 return expr 715 716 def _parse_func_params( 717 self, this: t.Optional[exp.Func] = None 718 ) -> t.Optional[t.List[exp.Expression]]: 719 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 720 return self._parse_csv(self._parse_lambda) 721 722 if self._match(TokenType.L_PAREN): 723 params = self._parse_csv(self._parse_lambda) 724 self._match_r_paren(this) 725 return params 726 727 return None 728 729 def _parse_quantile(self) -> exp.Quantile: 730 this = self._parse_lambda() 731 params = self._parse_func_params() 732 if params: 733 return self.expression(exp.Quantile, this=params[0], quantile=this) 734 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 735 736 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 737 return super()._parse_wrapped_id_vars(optional=True) 738 739 def _parse_primary_key( 740 self, wrapped_optional: bool = False, in_props: bool = False 741 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 742 return super()._parse_primary_key( 743 wrapped_optional=wrapped_optional or in_props, in_props=in_props 744 ) 745 746 def _parse_on_property(self) -> t.Optional[exp.Expression]: 747 index = self._index 748 if self._match_text_seq("CLUSTER"): 749 this = self._parse_id_var() 750 if this: 751 return self.expression(exp.OnCluster, this=this) 752 else: 753 self._retreat(index) 754 return None 755 756 def _parse_index_constraint( 757 self, kind: t.Optional[str] = None 758 ) -> exp.IndexColumnConstraint: 759 # INDEX name1 expr TYPE type1(args) GRANULARITY value 760 this = self._parse_id_var() 761 expression = self._parse_assignment() 762 763 index_type = self._match_text_seq("TYPE") and ( 764 self._parse_function() or self._parse_var() 765 ) 766 767 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 768 769 return self.expression( 770 exp.IndexColumnConstraint, 771 this=this, 772 expression=expression, 773 index_type=index_type, 774 granularity=granularity, 775 ) 776 777 def _parse_partition(self) -> t.Optional[exp.Partition]: 778 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 779 if not self._match(TokenType.PARTITION): 780 return None 781 782 if self._match_text_seq("ID"): 783 # Corresponds to the PARTITION ID <string_value> syntax 784 expressions: t.List[exp.Expression] = [ 785 self.expression(exp.PartitionId, this=self._parse_string()) 786 ] 787 else: 788 expressions = self._parse_expressions() 789 790 return self.expression(exp.Partition, expressions=expressions) 791 792 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 793 partition = self._parse_partition() 794 795 if not partition or not self._match(TokenType.FROM): 796 return None 797 798 return self.expression( 799 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 800 ) 801 802 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 803 if not self._match_text_seq("PROJECTION"): 804 return None 805 806 return self.expression( 807 exp.ProjectionDef, 808 this=self._parse_id_var(), 809 expression=self._parse_wrapped(self._parse_statement), 810 ) 811 812 def _parse_constraint(self) -> t.Optional[exp.Expression]: 813 return super()._parse_constraint() or self._parse_projection_def() 814 815 def _parse_alias( 816 self, this: t.Optional[exp.Expression], explicit: bool = False 817 ) -> t.Optional[exp.Expression]: 818 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 819 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 820 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 821 return this 822 823 return super()._parse_alias(this=this, explicit=explicit) 824 825 def _parse_expression(self) -> t.Optional[exp.Expression]: 826 this = super()._parse_expression() 827 828 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 829 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 830 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 831 self._match(TokenType.R_PAREN) 832 833 return this 834 835 def _parse_columns(self) -> exp.Expression: 836 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 837 838 while self._next and self._match_text_seq(")", "APPLY", "("): 839 self._match(TokenType.R_PAREN) 840 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 841 return this 842 843 class Generator(generator.Generator): 844 QUERY_HINTS = False 845 STRUCT_DELIMITER = ("(", ")") 846 NVL2_SUPPORTED = False 847 TABLESAMPLE_REQUIRES_PARENS = False 848 TABLESAMPLE_SIZE_IS_ROWS = False 849 TABLESAMPLE_KEYWORDS = "SAMPLE" 850 LAST_DAY_SUPPORTS_DATE_PART = False 851 CAN_IMPLEMENT_ARRAY_ANY = True 852 SUPPORTS_TO_NUMBER = False 853 JOIN_HINTS = False 854 TABLE_HINTS = False 855 GROUPINGS_SEP = "" 856 SET_OP_MODIFIERS = False 857 SUPPORTS_TABLE_ALIAS_COLUMNS = False 858 VALUES_AS_TABLE = False 859 ARRAY_SIZE_NAME = "LENGTH" 860 861 STRING_TYPE_MAPPING = { 862 exp.DataType.Type.CHAR: "String", 863 exp.DataType.Type.LONGBLOB: "String", 864 exp.DataType.Type.LONGTEXT: "String", 865 exp.DataType.Type.MEDIUMBLOB: "String", 866 exp.DataType.Type.MEDIUMTEXT: "String", 867 exp.DataType.Type.TINYBLOB: "String", 868 exp.DataType.Type.TINYTEXT: "String", 869 exp.DataType.Type.TEXT: "String", 870 exp.DataType.Type.VARBINARY: "String", 871 exp.DataType.Type.VARCHAR: "String", 872 } 873 874 SUPPORTED_JSON_PATH_PARTS = { 875 exp.JSONPathKey, 876 exp.JSONPathRoot, 877 exp.JSONPathSubscript, 878 } 879 880 TYPE_MAPPING = { 881 **generator.Generator.TYPE_MAPPING, 882 **STRING_TYPE_MAPPING, 883 exp.DataType.Type.ARRAY: "Array", 884 exp.DataType.Type.BOOLEAN: "Bool", 885 exp.DataType.Type.BIGINT: "Int64", 886 exp.DataType.Type.DATE32: "Date32", 887 exp.DataType.Type.DATETIME: "DateTime", 888 exp.DataType.Type.DATETIME64: "DateTime64", 889 exp.DataType.Type.DECIMAL: "Decimal", 890 exp.DataType.Type.DECIMAL32: "Decimal32", 891 exp.DataType.Type.DECIMAL64: "Decimal64", 892 exp.DataType.Type.DECIMAL128: "Decimal128", 893 exp.DataType.Type.DECIMAL256: "Decimal256", 894 exp.DataType.Type.TIMESTAMP: "DateTime", 895 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 896 exp.DataType.Type.DOUBLE: "Float64", 897 exp.DataType.Type.ENUM: "Enum", 898 exp.DataType.Type.ENUM8: "Enum8", 899 exp.DataType.Type.ENUM16: "Enum16", 900 exp.DataType.Type.FIXEDSTRING: "FixedString", 901 exp.DataType.Type.FLOAT: "Float32", 902 exp.DataType.Type.INT: "Int32", 903 exp.DataType.Type.MEDIUMINT: "Int32", 904 exp.DataType.Type.INT128: "Int128", 905 exp.DataType.Type.INT256: "Int256", 906 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 907 exp.DataType.Type.MAP: "Map", 908 exp.DataType.Type.NESTED: "Nested", 909 exp.DataType.Type.SMALLINT: "Int16", 910 exp.DataType.Type.STRUCT: "Tuple", 911 exp.DataType.Type.TINYINT: "Int8", 912 exp.DataType.Type.UBIGINT: "UInt64", 913 exp.DataType.Type.UINT: "UInt32", 914 exp.DataType.Type.UINT128: "UInt128", 915 exp.DataType.Type.UINT256: "UInt256", 916 exp.DataType.Type.USMALLINT: "UInt16", 917 exp.DataType.Type.UTINYINT: "UInt8", 918 exp.DataType.Type.IPV4: "IPv4", 919 exp.DataType.Type.IPV6: "IPv6", 920 exp.DataType.Type.POINT: "Point", 921 exp.DataType.Type.RING: "Ring", 922 exp.DataType.Type.LINESTRING: "LineString", 923 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 924 exp.DataType.Type.POLYGON: "Polygon", 925 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 926 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 927 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 928 } 929 930 TRANSFORMS = { 931 **generator.Generator.TRANSFORMS, 932 exp.AnyValue: rename_func("any"), 933 exp.ApproxDistinct: rename_func("uniq"), 934 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 935 exp.ArraySum: rename_func("arraySum"), 936 exp.ArgMax: arg_max_or_min_no_count("argMax"), 937 exp.ArgMin: arg_max_or_min_no_count("argMin"), 938 exp.Array: inline_array_sql, 939 exp.CastToStrType: rename_func("CAST"), 940 exp.CountIf: rename_func("countIf"), 941 exp.CompressColumnConstraint: lambda self, 942 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 943 exp.ComputedColumnConstraint: lambda self, 944 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 945 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 946 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 947 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 948 exp.DateStrToDate: rename_func("toDate"), 949 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 950 exp.Explode: rename_func("arrayJoin"), 951 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 952 exp.IsNan: rename_func("isNaN"), 953 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 954 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 955 exp.JSONPathKey: json_path_key_only_name, 956 exp.JSONPathRoot: lambda *_: "", 957 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 958 exp.Median: rename_func("median"), 959 exp.Nullif: rename_func("nullIf"), 960 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 961 exp.Pivot: no_pivot_sql, 962 exp.Quantile: _quantile_sql, 963 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 964 exp.Rand: rename_func("randCanonical"), 965 exp.StartsWith: rename_func("startsWith"), 966 exp.StrPosition: lambda self, e: self.func( 967 "position", e.this, e.args.get("substr"), e.args.get("position") 968 ), 969 exp.TimeToStr: lambda self, e: self.func( 970 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 971 ), 972 exp.TimeStrToTime: _timestrtotime_sql, 973 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 974 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 975 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 976 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 977 exp.MD5Digest: rename_func("MD5"), 978 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 979 exp.SHA: rename_func("SHA1"), 980 exp.SHA2: sha256_sql, 981 exp.UnixToTime: _unix_to_time_sql, 982 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 983 exp.Trim: trim_sql, 984 exp.Variance: rename_func("varSamp"), 985 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 986 exp.Stddev: rename_func("stddevSamp"), 987 exp.Chr: rename_func("CHAR"), 988 exp.Lag: lambda self, e: self.func( 989 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 990 ), 991 exp.Lead: lambda self, e: self.func( 992 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 993 ), 994 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 995 rename_func("editDistance") 996 ), 997 } 998 999 PROPERTIES_LOCATION = { 1000 **generator.Generator.PROPERTIES_LOCATION, 1001 exp.OnCluster: exp.Properties.Location.POST_NAME, 1002 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1003 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1004 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1005 } 1006 1007 # There's no list in docs, but it can be found in Clickhouse code 1008 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1009 ON_CLUSTER_TARGETS = { 1010 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1011 "DATABASE", 1012 "TABLE", 1013 "VIEW", 1014 "DICTIONARY", 1015 "INDEX", 1016 "FUNCTION", 1017 "NAMED COLLECTION", 1018 } 1019 1020 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1021 NON_NULLABLE_TYPES = { 1022 exp.DataType.Type.ARRAY, 1023 exp.DataType.Type.MAP, 1024 exp.DataType.Type.STRUCT, 1025 exp.DataType.Type.POINT, 1026 exp.DataType.Type.RING, 1027 exp.DataType.Type.LINESTRING, 1028 exp.DataType.Type.MULTILINESTRING, 1029 exp.DataType.Type.POLYGON, 1030 exp.DataType.Type.MULTIPOLYGON, 1031 } 1032 1033 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1034 strtodate_sql = self.function_fallback_sql(expression) 1035 1036 if not isinstance(expression.parent, exp.Cast): 1037 # StrToDate returns DATEs in other dialects (eg. postgres), so 1038 # this branch aims to improve the transpilation to clickhouse 1039 return f"CAST({strtodate_sql} AS DATE)" 1040 1041 return strtodate_sql 1042 1043 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1044 this = expression.this 1045 1046 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1047 return self.sql(this) 1048 1049 return super().cast_sql(expression, safe_prefix=safe_prefix) 1050 1051 def trycast_sql(self, expression: exp.TryCast) -> str: 1052 dtype = expression.to 1053 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1054 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1055 dtype.set("nullable", True) 1056 1057 return super().cast_sql(expression) 1058 1059 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1060 this = self.json_path_part(expression.this) 1061 return str(int(this) + 1) if is_int(this) else this 1062 1063 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1064 return f"AS {self.sql(expression, 'this')}" 1065 1066 def _any_to_has( 1067 self, 1068 expression: exp.EQ | exp.NEQ, 1069 default: t.Callable[[t.Any], str], 1070 prefix: str = "", 1071 ) -> str: 1072 if isinstance(expression.left, exp.Any): 1073 arr = expression.left 1074 this = expression.right 1075 elif isinstance(expression.right, exp.Any): 1076 arr = expression.right 1077 this = expression.left 1078 else: 1079 return default(expression) 1080 1081 return prefix + self.func("has", arr.this.unnest(), this) 1082 1083 def eq_sql(self, expression: exp.EQ) -> str: 1084 return self._any_to_has(expression, super().eq_sql) 1085 1086 def neq_sql(self, expression: exp.NEQ) -> str: 1087 return self._any_to_has(expression, super().neq_sql, "NOT ") 1088 1089 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1090 # Manually add a flag to make the search case-insensitive 1091 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1092 return self.func("match", expression.this, regex) 1093 1094 def datatype_sql(self, expression: exp.DataType) -> str: 1095 # String is the standard ClickHouse type, every other variant is just an alias. 1096 # Additionally, any supplied length parameter will be ignored. 1097 # 1098 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1099 if expression.this in self.STRING_TYPE_MAPPING: 1100 dtype = "String" 1101 else: 1102 dtype = super().datatype_sql(expression) 1103 1104 # This section changes the type to `Nullable(...)` if the following conditions hold: 1105 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1106 # and change their semantics 1107 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1108 # constraint: "Type of Map key must be a type, that can be represented by integer or 1109 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1110 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1111 parent = expression.parent 1112 nullable = expression.args.get("nullable") 1113 if nullable is True or ( 1114 nullable is None 1115 and not ( 1116 isinstance(parent, exp.DataType) 1117 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1118 and expression.index in (None, 0) 1119 ) 1120 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1121 ): 1122 dtype = f"Nullable({dtype})" 1123 1124 return dtype 1125 1126 def cte_sql(self, expression: exp.CTE) -> str: 1127 if expression.args.get("scalar"): 1128 this = self.sql(expression, "this") 1129 alias = self.sql(expression, "alias") 1130 return f"{this} AS {alias}" 1131 1132 return super().cte_sql(expression) 1133 1134 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1135 return super().after_limit_modifiers(expression) + [ 1136 ( 1137 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1138 if expression.args.get("settings") 1139 else "" 1140 ), 1141 ( 1142 self.seg("FORMAT ") + self.sql(expression, "format") 1143 if expression.args.get("format") 1144 else "" 1145 ), 1146 ] 1147 1148 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1149 params = self.expressions(expression, key="params", flat=True) 1150 return self.func(expression.name, *expression.expressions) + f"({params})" 1151 1152 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1153 return self.func(expression.name, *expression.expressions) 1154 1155 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1156 return self.anonymousaggfunc_sql(expression) 1157 1158 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1159 return self.parameterizedagg_sql(expression) 1160 1161 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1162 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1163 1164 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1165 return f"ON CLUSTER {self.sql(expression, 'this')}" 1166 1167 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1168 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1169 exp.Properties.Location.POST_NAME 1170 ): 1171 this_name = self.sql( 1172 expression.this if isinstance(expression.this, exp.Schema) else expression, 1173 "this", 1174 ) 1175 this_properties = " ".join( 1176 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1177 ) 1178 this_schema = self.schema_columns_sql(expression.this) 1179 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1180 1181 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1182 1183 return super().createable_sql(expression, locations) 1184 1185 def create_sql(self, expression: exp.Create) -> str: 1186 # The comment property comes last in CTAS statements, i.e. after the query 1187 query = expression.expression 1188 if isinstance(query, exp.Query): 1189 comment_prop = expression.find(exp.SchemaCommentProperty) 1190 if comment_prop: 1191 comment_prop.pop() 1192 query.replace(exp.paren(query)) 1193 else: 1194 comment_prop = None 1195 1196 create_sql = super().create_sql(expression) 1197 1198 comment_sql = self.sql(comment_prop) 1199 comment_sql = f" {comment_sql}" if comment_sql else "" 1200 1201 return f"{create_sql}{comment_sql}" 1202 1203 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1204 this = self.indent(self.sql(expression, "this")) 1205 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1206 1207 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1208 this = self.sql(expression, "this") 1209 this = f" {this}" if this else "" 1210 expr = self.sql(expression, "expression") 1211 expr = f" {expr}" if expr else "" 1212 index_type = self.sql(expression, "index_type") 1213 index_type = f" TYPE {index_type}" if index_type else "" 1214 granularity = self.sql(expression, "granularity") 1215 granularity = f" GRANULARITY {granularity}" if granularity else "" 1216 1217 return f"INDEX{this}{expr}{index_type}{granularity}" 1218 1219 def partition_sql(self, expression: exp.Partition) -> str: 1220 return f"PARTITION {self.expressions(expression, flat=True)}" 1221 1222 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1223 return f"ID {self.sql(expression.this)}" 1224 1225 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1226 return ( 1227 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1228 ) 1229 1230 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1231 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1232 1233 def is_sql(self, expression: exp.Is) -> str: 1234 is_sql = super().is_sql(expression) 1235 1236 if isinstance(expression.parent, exp.Not): 1237 # value IS NOT NULL -> NOT (value IS NULL) 1238 is_sql = self.wrap(is_sql) 1239 1240 return is_sql
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether the name of the function should be preserved inside the node's metadata, can be useful for roundtripping deprecated vs new functions that share an AST node e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery
Specifies the strategy according to which identifiers should be normalized.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse equivalent of CREATE SCHEMA is CREATE DATABASE.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- TYPED_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- STRICT_JSON_PATH_SYNTAX
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- PROMOTE_TO_INFERRED_DATETIME_TYPE
- SUPPORTS_VALUES_DEFAULT
- REGEXP_EXTRACT_DEFAULT_GROUP
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
186 class Tokenizer(tokens.Tokenizer): 187 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 188 IDENTIFIERS = ['"', "`"] 189 IDENTIFIER_ESCAPES = ["\\"] 190 STRING_ESCAPES = ["'", "\\"] 191 BIT_STRINGS = [("0b", "")] 192 HEX_STRINGS = [("0x", ""), ("0X", "")] 193 HEREDOC_STRINGS = ["$"] 194 195 KEYWORDS = { 196 **tokens.Tokenizer.KEYWORDS, 197 "ATTACH": TokenType.COMMAND, 198 "DATE32": TokenType.DATE32, 199 "DATETIME64": TokenType.DATETIME64, 200 "DICTIONARY": TokenType.DICTIONARY, 201 "ENUM8": TokenType.ENUM8, 202 "ENUM16": TokenType.ENUM16, 203 "FINAL": TokenType.FINAL, 204 "FIXEDSTRING": TokenType.FIXEDSTRING, 205 "FLOAT32": TokenType.FLOAT, 206 "FLOAT64": TokenType.DOUBLE, 207 "GLOBAL": TokenType.GLOBAL, 208 "INT256": TokenType.INT256, 209 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 210 "MAP": TokenType.MAP, 211 "NESTED": TokenType.NESTED, 212 "SAMPLE": TokenType.TABLE_SAMPLE, 213 "TUPLE": TokenType.STRUCT, 214 "UINT128": TokenType.UINT128, 215 "UINT16": TokenType.USMALLINT, 216 "UINT256": TokenType.UINT256, 217 "UINT32": TokenType.UINT, 218 "UINT64": TokenType.UBIGINT, 219 "UINT8": TokenType.UTINYINT, 220 "IPV4": TokenType.IPV4, 221 "IPV6": TokenType.IPV6, 222 "POINT": TokenType.POINT, 223 "RING": TokenType.RING, 224 "LINESTRING": TokenType.LINESTRING, 225 "MULTILINESTRING": TokenType.MULTILINESTRING, 226 "POLYGON": TokenType.POLYGON, 227 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 228 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 229 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 230 "SYSTEM": TokenType.COMMAND, 231 "PREWHERE": TokenType.PREWHERE, 232 } 233 KEYWORDS.pop("/*+") 234 235 SINGLE_TOKENS = { 236 **tokens.Tokenizer.SINGLE_TOKENS, 237 "$": TokenType.HEREDOC_STRING, 238 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BYTE_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
240 class Parser(parser.Parser): 241 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 242 # * select x from t1 union all select x from t2 limit 1; 243 # * select x from t1 union all (select x from t2 limit 1); 244 MODIFIERS_ATTACHED_TO_SET_OP = False 245 INTERVAL_SPANS = False 246 247 FUNCTIONS = { 248 **parser.Parser.FUNCTIONS, 249 "ANY": exp.AnyValue.from_arg_list, 250 "ARRAYSUM": exp.ArraySum.from_arg_list, 251 "COUNTIF": _build_count_if, 252 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 253 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 254 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 255 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 256 "DATE_FORMAT": _build_date_format, 257 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 258 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 259 "FORMATDATETIME": _build_date_format, 260 "JSONEXTRACTSTRING": build_json_extract_path( 261 exp.JSONExtractScalar, zero_based_indexing=False 262 ), 263 "MAP": parser.build_var_map, 264 "MATCH": exp.RegexpLike.from_arg_list, 265 "RANDCANONICAL": exp.Rand.from_arg_list, 266 "STR_TO_DATE": _build_str_to_date, 267 "TUPLE": exp.Struct.from_arg_list, 268 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 269 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 270 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 271 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 272 "UNIQ": exp.ApproxDistinct.from_arg_list, 273 "XOR": lambda args: exp.Xor(expressions=args), 274 "MD5": exp.MD5Digest.from_arg_list, 275 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 276 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 277 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 278 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 279 } 280 FUNCTIONS.pop("TRANSFORM") 281 282 AGG_FUNCTIONS = { 283 "count", 284 "min", 285 "max", 286 "sum", 287 "avg", 288 "any", 289 "stddevPop", 290 "stddevSamp", 291 "varPop", 292 "varSamp", 293 "corr", 294 "covarPop", 295 "covarSamp", 296 "entropy", 297 "exponentialMovingAverage", 298 "intervalLengthSum", 299 "kolmogorovSmirnovTest", 300 "mannWhitneyUTest", 301 "median", 302 "rankCorr", 303 "sumKahan", 304 "studentTTest", 305 "welchTTest", 306 "anyHeavy", 307 "anyLast", 308 "boundingRatio", 309 "first_value", 310 "last_value", 311 "argMin", 312 "argMax", 313 "avgWeighted", 314 "topK", 315 "topKWeighted", 316 "deltaSum", 317 "deltaSumTimestamp", 318 "groupArray", 319 "groupArrayLast", 320 "groupUniqArray", 321 "groupArrayInsertAt", 322 "groupArrayMovingAvg", 323 "groupArrayMovingSum", 324 "groupArraySample", 325 "groupBitAnd", 326 "groupBitOr", 327 "groupBitXor", 328 "groupBitmap", 329 "groupBitmapAnd", 330 "groupBitmapOr", 331 "groupBitmapXor", 332 "sumWithOverflow", 333 "sumMap", 334 "minMap", 335 "maxMap", 336 "skewSamp", 337 "skewPop", 338 "kurtSamp", 339 "kurtPop", 340 "uniq", 341 "uniqExact", 342 "uniqCombined", 343 "uniqCombined64", 344 "uniqHLL12", 345 "uniqTheta", 346 "quantile", 347 "quantiles", 348 "quantileExact", 349 "quantilesExact", 350 "quantileExactLow", 351 "quantilesExactLow", 352 "quantileExactHigh", 353 "quantilesExactHigh", 354 "quantileExactWeighted", 355 "quantilesExactWeighted", 356 "quantileTiming", 357 "quantilesTiming", 358 "quantileTimingWeighted", 359 "quantilesTimingWeighted", 360 "quantileDeterministic", 361 "quantilesDeterministic", 362 "quantileTDigest", 363 "quantilesTDigest", 364 "quantileTDigestWeighted", 365 "quantilesTDigestWeighted", 366 "quantileBFloat16", 367 "quantilesBFloat16", 368 "quantileBFloat16Weighted", 369 "quantilesBFloat16Weighted", 370 "simpleLinearRegression", 371 "stochasticLinearRegression", 372 "stochasticLogisticRegression", 373 "categoricalInformationValue", 374 "contingency", 375 "cramersV", 376 "cramersVBiasCorrected", 377 "theilsU", 378 "maxIntersections", 379 "maxIntersectionsPosition", 380 "meanZTest", 381 "quantileInterpolatedWeighted", 382 "quantilesInterpolatedWeighted", 383 "quantileGK", 384 "quantilesGK", 385 "sparkBar", 386 "sumCount", 387 "largestTriangleThreeBuckets", 388 "histogram", 389 "sequenceMatch", 390 "sequenceCount", 391 "windowFunnel", 392 "retention", 393 "uniqUpTo", 394 "sequenceNextNode", 395 "exponentialTimeDecayedAvg", 396 } 397 398 AGG_FUNCTIONS_SUFFIXES = [ 399 "If", 400 "Array", 401 "ArrayIf", 402 "Map", 403 "SimpleState", 404 "State", 405 "Merge", 406 "MergeState", 407 "ForEach", 408 "Distinct", 409 "OrDefault", 410 "OrNull", 411 "Resample", 412 "ArgMin", 413 "ArgMax", 414 ] 415 416 FUNC_TOKENS = { 417 *parser.Parser.FUNC_TOKENS, 418 TokenType.SET, 419 } 420 421 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 422 423 ID_VAR_TOKENS = { 424 *parser.Parser.ID_VAR_TOKENS, 425 TokenType.LIKE, 426 } 427 428 AGG_FUNC_MAPPING = ( 429 lambda functions, suffixes: { 430 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 431 } 432 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 433 434 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 435 436 FUNCTION_PARSERS = { 437 **parser.Parser.FUNCTION_PARSERS, 438 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 439 "QUANTILE": lambda self: self._parse_quantile(), 440 "MEDIAN": lambda self: self._parse_quantile(), 441 "COLUMNS": lambda self: self._parse_columns(), 442 } 443 444 FUNCTION_PARSERS.pop("MATCH") 445 446 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 447 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 448 449 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 450 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 451 452 RANGE_PARSERS = { 453 **parser.Parser.RANGE_PARSERS, 454 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 455 and self._parse_in(this, is_global=True), 456 } 457 458 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 459 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 460 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 461 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 462 463 JOIN_KINDS = { 464 *parser.Parser.JOIN_KINDS, 465 TokenType.ANY, 466 TokenType.ASOF, 467 TokenType.ARRAY, 468 } 469 470 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 471 TokenType.ANY, 472 TokenType.ARRAY, 473 TokenType.FINAL, 474 TokenType.FORMAT, 475 TokenType.SETTINGS, 476 } 477 478 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 479 TokenType.FORMAT, 480 } 481 482 LOG_DEFAULTS_TO_LN = True 483 484 QUERY_MODIFIER_PARSERS = { 485 **parser.Parser.QUERY_MODIFIER_PARSERS, 486 TokenType.SETTINGS: lambda self: ( 487 "settings", 488 self._advance() or self._parse_csv(self._parse_assignment), 489 ), 490 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 491 } 492 493 CONSTRAINT_PARSERS = { 494 **parser.Parser.CONSTRAINT_PARSERS, 495 "INDEX": lambda self: self._parse_index_constraint(), 496 "CODEC": lambda self: self._parse_compress(), 497 } 498 499 ALTER_PARSERS = { 500 **parser.Parser.ALTER_PARSERS, 501 "REPLACE": lambda self: self._parse_alter_table_replace(), 502 } 503 504 SCHEMA_UNNAMED_CONSTRAINTS = { 505 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 506 "INDEX", 507 } 508 509 PLACEHOLDER_PARSERS = { 510 **parser.Parser.PLACEHOLDER_PARSERS, 511 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 512 } 513 514 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 515 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 516 return self._parse_lambda() 517 518 def _parse_types( 519 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 520 ) -> t.Optional[exp.Expression]: 521 dtype = super()._parse_types( 522 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 523 ) 524 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 525 # Mark every type as non-nullable which is ClickHouse's default, unless it's 526 # already marked as nullable. This marker helps us transpile types from other 527 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 528 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 529 # fail in ClickHouse without the `Nullable` type constructor. 530 dtype.set("nullable", False) 531 532 return dtype 533 534 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 535 index = self._index 536 this = self._parse_bitwise() 537 if self._match(TokenType.FROM): 538 self._retreat(index) 539 return super()._parse_extract() 540 541 # We return Anonymous here because extract and regexpExtract have different semantics, 542 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 543 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 544 # 545 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 546 self._match(TokenType.COMMA) 547 return self.expression( 548 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 549 ) 550 551 def _parse_assignment(self) -> t.Optional[exp.Expression]: 552 this = super()._parse_assignment() 553 554 if self._match(TokenType.PLACEHOLDER): 555 return self.expression( 556 exp.If, 557 this=this, 558 true=self._parse_assignment(), 559 false=self._match(TokenType.COLON) and self._parse_assignment(), 560 ) 561 562 return this 563 564 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 565 """ 566 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 567 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 568 """ 569 this = self._parse_id_var() 570 self._match(TokenType.COLON) 571 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 572 self._match_text_seq("IDENTIFIER") and "Identifier" 573 ) 574 575 if not kind: 576 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 577 elif not self._match(TokenType.R_BRACE): 578 self.raise_error("Expecting }") 579 580 return self.expression(exp.Placeholder, this=this, kind=kind) 581 582 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 583 this = super()._parse_in(this) 584 this.set("is_global", is_global) 585 return this 586 587 def _parse_table( 588 self, 589 schema: bool = False, 590 joins: bool = False, 591 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 592 parse_bracket: bool = False, 593 is_db_reference: bool = False, 594 parse_partition: bool = False, 595 ) -> t.Optional[exp.Expression]: 596 this = super()._parse_table( 597 schema=schema, 598 joins=joins, 599 alias_tokens=alias_tokens, 600 parse_bracket=parse_bracket, 601 is_db_reference=is_db_reference, 602 ) 603 604 if self._match(TokenType.FINAL): 605 this = self.expression(exp.Final, this=this) 606 607 return this 608 609 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 610 return super()._parse_position(haystack_first=True) 611 612 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 613 def _parse_cte(self) -> exp.CTE: 614 # WITH <identifier> AS <subquery expression> 615 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 616 617 if not cte: 618 # WITH <expression> AS <identifier> 619 cte = self.expression( 620 exp.CTE, 621 this=self._parse_assignment(), 622 alias=self._parse_table_alias(), 623 scalar=True, 624 ) 625 626 return cte 627 628 def _parse_join_parts( 629 self, 630 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 631 is_global = self._match(TokenType.GLOBAL) and self._prev 632 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 633 634 if kind_pre: 635 kind = self._match_set(self.JOIN_KINDS) and self._prev 636 side = self._match_set(self.JOIN_SIDES) and self._prev 637 return is_global, side, kind 638 639 return ( 640 is_global, 641 self._match_set(self.JOIN_SIDES) and self._prev, 642 self._match_set(self.JOIN_KINDS) and self._prev, 643 ) 644 645 def _parse_join( 646 self, skip_join_token: bool = False, parse_bracket: bool = False 647 ) -> t.Optional[exp.Join]: 648 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 649 if join: 650 join.set("global", join.args.pop("method", None)) 651 652 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 653 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 654 if join.kind == "ARRAY": 655 for table in join.find_all(exp.Table): 656 table.replace(table.to_column()) 657 658 return join 659 660 def _parse_function( 661 self, 662 functions: t.Optional[t.Dict[str, t.Callable]] = None, 663 anonymous: bool = False, 664 optional_parens: bool = True, 665 any_token: bool = False, 666 ) -> t.Optional[exp.Expression]: 667 expr = super()._parse_function( 668 functions=functions, 669 anonymous=anonymous, 670 optional_parens=optional_parens, 671 any_token=any_token, 672 ) 673 674 func = expr.this if isinstance(expr, exp.Window) else expr 675 676 # Aggregate functions can be split in 2 parts: <func_name><suffix> 677 parts = ( 678 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 679 ) 680 681 if parts: 682 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 683 params = self._parse_func_params(anon_func) 684 685 kwargs = { 686 "this": anon_func.this, 687 "expressions": anon_func.expressions, 688 } 689 if parts[1]: 690 kwargs["parts"] = parts 691 exp_class: t.Type[exp.Expression] = ( 692 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 693 ) 694 else: 695 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 696 697 kwargs["exp_class"] = exp_class 698 if params: 699 kwargs["params"] = params 700 701 func = self.expression(**kwargs) 702 703 if isinstance(expr, exp.Window): 704 # The window's func was parsed as Anonymous in base parser, fix its 705 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 706 expr.set("this", func) 707 elif params: 708 # Params have blocked super()._parse_function() from parsing the following window 709 # (if that exists) as they're standing between the function call and the window spec 710 expr = self._parse_window(func) 711 else: 712 expr = func 713 714 return expr 715 716 def _parse_func_params( 717 self, this: t.Optional[exp.Func] = None 718 ) -> t.Optional[t.List[exp.Expression]]: 719 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 720 return self._parse_csv(self._parse_lambda) 721 722 if self._match(TokenType.L_PAREN): 723 params = self._parse_csv(self._parse_lambda) 724 self._match_r_paren(this) 725 return params 726 727 return None 728 729 def _parse_quantile(self) -> exp.Quantile: 730 this = self._parse_lambda() 731 params = self._parse_func_params() 732 if params: 733 return self.expression(exp.Quantile, this=params[0], quantile=this) 734 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 735 736 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 737 return super()._parse_wrapped_id_vars(optional=True) 738 739 def _parse_primary_key( 740 self, wrapped_optional: bool = False, in_props: bool = False 741 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 742 return super()._parse_primary_key( 743 wrapped_optional=wrapped_optional or in_props, in_props=in_props 744 ) 745 746 def _parse_on_property(self) -> t.Optional[exp.Expression]: 747 index = self._index 748 if self._match_text_seq("CLUSTER"): 749 this = self._parse_id_var() 750 if this: 751 return self.expression(exp.OnCluster, this=this) 752 else: 753 self._retreat(index) 754 return None 755 756 def _parse_index_constraint( 757 self, kind: t.Optional[str] = None 758 ) -> exp.IndexColumnConstraint: 759 # INDEX name1 expr TYPE type1(args) GRANULARITY value 760 this = self._parse_id_var() 761 expression = self._parse_assignment() 762 763 index_type = self._match_text_seq("TYPE") and ( 764 self._parse_function() or self._parse_var() 765 ) 766 767 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 768 769 return self.expression( 770 exp.IndexColumnConstraint, 771 this=this, 772 expression=expression, 773 index_type=index_type, 774 granularity=granularity, 775 ) 776 777 def _parse_partition(self) -> t.Optional[exp.Partition]: 778 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 779 if not self._match(TokenType.PARTITION): 780 return None 781 782 if self._match_text_seq("ID"): 783 # Corresponds to the PARTITION ID <string_value> syntax 784 expressions: t.List[exp.Expression] = [ 785 self.expression(exp.PartitionId, this=self._parse_string()) 786 ] 787 else: 788 expressions = self._parse_expressions() 789 790 return self.expression(exp.Partition, expressions=expressions) 791 792 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 793 partition = self._parse_partition() 794 795 if not partition or not self._match(TokenType.FROM): 796 return None 797 798 return self.expression( 799 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 800 ) 801 802 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 803 if not self._match_text_seq("PROJECTION"): 804 return None 805 806 return self.expression( 807 exp.ProjectionDef, 808 this=self._parse_id_var(), 809 expression=self._parse_wrapped(self._parse_statement), 810 ) 811 812 def _parse_constraint(self) -> t.Optional[exp.Expression]: 813 return super()._parse_constraint() or self._parse_projection_def() 814 815 def _parse_alias( 816 self, this: t.Optional[exp.Expression], explicit: bool = False 817 ) -> t.Optional[exp.Expression]: 818 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 819 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 820 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 821 return this 822 823 return super()._parse_alias(this=this, explicit=explicit) 824 825 def _parse_expression(self) -> t.Optional[exp.Expression]: 826 this = super()._parse_expression() 827 828 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 829 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 830 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 831 self._match(TokenType.R_PAREN) 832 833 return this 834 835 def _parse_columns(self) -> exp.Expression: 836 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 837 838 while self._next and self._match_text_seq(")", "APPLY", "("): 839 self._match(TokenType.R_PAREN) 840 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 841 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PROPERTY_PARSERS
- ALTER_ALTER_PARSERS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- OPERATION_MODIFIERS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
843 class Generator(generator.Generator): 844 QUERY_HINTS = False 845 STRUCT_DELIMITER = ("(", ")") 846 NVL2_SUPPORTED = False 847 TABLESAMPLE_REQUIRES_PARENS = False 848 TABLESAMPLE_SIZE_IS_ROWS = False 849 TABLESAMPLE_KEYWORDS = "SAMPLE" 850 LAST_DAY_SUPPORTS_DATE_PART = False 851 CAN_IMPLEMENT_ARRAY_ANY = True 852 SUPPORTS_TO_NUMBER = False 853 JOIN_HINTS = False 854 TABLE_HINTS = False 855 GROUPINGS_SEP = "" 856 SET_OP_MODIFIERS = False 857 SUPPORTS_TABLE_ALIAS_COLUMNS = False 858 VALUES_AS_TABLE = False 859 ARRAY_SIZE_NAME = "LENGTH" 860 861 STRING_TYPE_MAPPING = { 862 exp.DataType.Type.CHAR: "String", 863 exp.DataType.Type.LONGBLOB: "String", 864 exp.DataType.Type.LONGTEXT: "String", 865 exp.DataType.Type.MEDIUMBLOB: "String", 866 exp.DataType.Type.MEDIUMTEXT: "String", 867 exp.DataType.Type.TINYBLOB: "String", 868 exp.DataType.Type.TINYTEXT: "String", 869 exp.DataType.Type.TEXT: "String", 870 exp.DataType.Type.VARBINARY: "String", 871 exp.DataType.Type.VARCHAR: "String", 872 } 873 874 SUPPORTED_JSON_PATH_PARTS = { 875 exp.JSONPathKey, 876 exp.JSONPathRoot, 877 exp.JSONPathSubscript, 878 } 879 880 TYPE_MAPPING = { 881 **generator.Generator.TYPE_MAPPING, 882 **STRING_TYPE_MAPPING, 883 exp.DataType.Type.ARRAY: "Array", 884 exp.DataType.Type.BOOLEAN: "Bool", 885 exp.DataType.Type.BIGINT: "Int64", 886 exp.DataType.Type.DATE32: "Date32", 887 exp.DataType.Type.DATETIME: "DateTime", 888 exp.DataType.Type.DATETIME64: "DateTime64", 889 exp.DataType.Type.DECIMAL: "Decimal", 890 exp.DataType.Type.DECIMAL32: "Decimal32", 891 exp.DataType.Type.DECIMAL64: "Decimal64", 892 exp.DataType.Type.DECIMAL128: "Decimal128", 893 exp.DataType.Type.DECIMAL256: "Decimal256", 894 exp.DataType.Type.TIMESTAMP: "DateTime", 895 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 896 exp.DataType.Type.DOUBLE: "Float64", 897 exp.DataType.Type.ENUM: "Enum", 898 exp.DataType.Type.ENUM8: "Enum8", 899 exp.DataType.Type.ENUM16: "Enum16", 900 exp.DataType.Type.FIXEDSTRING: "FixedString", 901 exp.DataType.Type.FLOAT: "Float32", 902 exp.DataType.Type.INT: "Int32", 903 exp.DataType.Type.MEDIUMINT: "Int32", 904 exp.DataType.Type.INT128: "Int128", 905 exp.DataType.Type.INT256: "Int256", 906 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 907 exp.DataType.Type.MAP: "Map", 908 exp.DataType.Type.NESTED: "Nested", 909 exp.DataType.Type.SMALLINT: "Int16", 910 exp.DataType.Type.STRUCT: "Tuple", 911 exp.DataType.Type.TINYINT: "Int8", 912 exp.DataType.Type.UBIGINT: "UInt64", 913 exp.DataType.Type.UINT: "UInt32", 914 exp.DataType.Type.UINT128: "UInt128", 915 exp.DataType.Type.UINT256: "UInt256", 916 exp.DataType.Type.USMALLINT: "UInt16", 917 exp.DataType.Type.UTINYINT: "UInt8", 918 exp.DataType.Type.IPV4: "IPv4", 919 exp.DataType.Type.IPV6: "IPv6", 920 exp.DataType.Type.POINT: "Point", 921 exp.DataType.Type.RING: "Ring", 922 exp.DataType.Type.LINESTRING: "LineString", 923 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 924 exp.DataType.Type.POLYGON: "Polygon", 925 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 926 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 927 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 928 } 929 930 TRANSFORMS = { 931 **generator.Generator.TRANSFORMS, 932 exp.AnyValue: rename_func("any"), 933 exp.ApproxDistinct: rename_func("uniq"), 934 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 935 exp.ArraySum: rename_func("arraySum"), 936 exp.ArgMax: arg_max_or_min_no_count("argMax"), 937 exp.ArgMin: arg_max_or_min_no_count("argMin"), 938 exp.Array: inline_array_sql, 939 exp.CastToStrType: rename_func("CAST"), 940 exp.CountIf: rename_func("countIf"), 941 exp.CompressColumnConstraint: lambda self, 942 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 943 exp.ComputedColumnConstraint: lambda self, 944 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 945 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 946 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 947 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 948 exp.DateStrToDate: rename_func("toDate"), 949 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 950 exp.Explode: rename_func("arrayJoin"), 951 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 952 exp.IsNan: rename_func("isNaN"), 953 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 954 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 955 exp.JSONPathKey: json_path_key_only_name, 956 exp.JSONPathRoot: lambda *_: "", 957 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 958 exp.Median: rename_func("median"), 959 exp.Nullif: rename_func("nullIf"), 960 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 961 exp.Pivot: no_pivot_sql, 962 exp.Quantile: _quantile_sql, 963 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 964 exp.Rand: rename_func("randCanonical"), 965 exp.StartsWith: rename_func("startsWith"), 966 exp.StrPosition: lambda self, e: self.func( 967 "position", e.this, e.args.get("substr"), e.args.get("position") 968 ), 969 exp.TimeToStr: lambda self, e: self.func( 970 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 971 ), 972 exp.TimeStrToTime: _timestrtotime_sql, 973 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 974 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 975 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 976 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 977 exp.MD5Digest: rename_func("MD5"), 978 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 979 exp.SHA: rename_func("SHA1"), 980 exp.SHA2: sha256_sql, 981 exp.UnixToTime: _unix_to_time_sql, 982 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 983 exp.Trim: trim_sql, 984 exp.Variance: rename_func("varSamp"), 985 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 986 exp.Stddev: rename_func("stddevSamp"), 987 exp.Chr: rename_func("CHAR"), 988 exp.Lag: lambda self, e: self.func( 989 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 990 ), 991 exp.Lead: lambda self, e: self.func( 992 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 993 ), 994 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 995 rename_func("editDistance") 996 ), 997 } 998 999 PROPERTIES_LOCATION = { 1000 **generator.Generator.PROPERTIES_LOCATION, 1001 exp.OnCluster: exp.Properties.Location.POST_NAME, 1002 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1003 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1004 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1005 } 1006 1007 # There's no list in docs, but it can be found in Clickhouse code 1008 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1009 ON_CLUSTER_TARGETS = { 1010 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1011 "DATABASE", 1012 "TABLE", 1013 "VIEW", 1014 "DICTIONARY", 1015 "INDEX", 1016 "FUNCTION", 1017 "NAMED COLLECTION", 1018 } 1019 1020 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1021 NON_NULLABLE_TYPES = { 1022 exp.DataType.Type.ARRAY, 1023 exp.DataType.Type.MAP, 1024 exp.DataType.Type.STRUCT, 1025 exp.DataType.Type.POINT, 1026 exp.DataType.Type.RING, 1027 exp.DataType.Type.LINESTRING, 1028 exp.DataType.Type.MULTILINESTRING, 1029 exp.DataType.Type.POLYGON, 1030 exp.DataType.Type.MULTIPOLYGON, 1031 } 1032 1033 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1034 strtodate_sql = self.function_fallback_sql(expression) 1035 1036 if not isinstance(expression.parent, exp.Cast): 1037 # StrToDate returns DATEs in other dialects (eg. postgres), so 1038 # this branch aims to improve the transpilation to clickhouse 1039 return f"CAST({strtodate_sql} AS DATE)" 1040 1041 return strtodate_sql 1042 1043 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1044 this = expression.this 1045 1046 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1047 return self.sql(this) 1048 1049 return super().cast_sql(expression, safe_prefix=safe_prefix) 1050 1051 def trycast_sql(self, expression: exp.TryCast) -> str: 1052 dtype = expression.to 1053 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1054 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1055 dtype.set("nullable", True) 1056 1057 return super().cast_sql(expression) 1058 1059 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1060 this = self.json_path_part(expression.this) 1061 return str(int(this) + 1) if is_int(this) else this 1062 1063 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1064 return f"AS {self.sql(expression, 'this')}" 1065 1066 def _any_to_has( 1067 self, 1068 expression: exp.EQ | exp.NEQ, 1069 default: t.Callable[[t.Any], str], 1070 prefix: str = "", 1071 ) -> str: 1072 if isinstance(expression.left, exp.Any): 1073 arr = expression.left 1074 this = expression.right 1075 elif isinstance(expression.right, exp.Any): 1076 arr = expression.right 1077 this = expression.left 1078 else: 1079 return default(expression) 1080 1081 return prefix + self.func("has", arr.this.unnest(), this) 1082 1083 def eq_sql(self, expression: exp.EQ) -> str: 1084 return self._any_to_has(expression, super().eq_sql) 1085 1086 def neq_sql(self, expression: exp.NEQ) -> str: 1087 return self._any_to_has(expression, super().neq_sql, "NOT ") 1088 1089 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1090 # Manually add a flag to make the search case-insensitive 1091 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1092 return self.func("match", expression.this, regex) 1093 1094 def datatype_sql(self, expression: exp.DataType) -> str: 1095 # String is the standard ClickHouse type, every other variant is just an alias. 1096 # Additionally, any supplied length parameter will be ignored. 1097 # 1098 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1099 if expression.this in self.STRING_TYPE_MAPPING: 1100 dtype = "String" 1101 else: 1102 dtype = super().datatype_sql(expression) 1103 1104 # This section changes the type to `Nullable(...)` if the following conditions hold: 1105 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1106 # and change their semantics 1107 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1108 # constraint: "Type of Map key must be a type, that can be represented by integer or 1109 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1110 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1111 parent = expression.parent 1112 nullable = expression.args.get("nullable") 1113 if nullable is True or ( 1114 nullable is None 1115 and not ( 1116 isinstance(parent, exp.DataType) 1117 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1118 and expression.index in (None, 0) 1119 ) 1120 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1121 ): 1122 dtype = f"Nullable({dtype})" 1123 1124 return dtype 1125 1126 def cte_sql(self, expression: exp.CTE) -> str: 1127 if expression.args.get("scalar"): 1128 this = self.sql(expression, "this") 1129 alias = self.sql(expression, "alias") 1130 return f"{this} AS {alias}" 1131 1132 return super().cte_sql(expression) 1133 1134 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1135 return super().after_limit_modifiers(expression) + [ 1136 ( 1137 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1138 if expression.args.get("settings") 1139 else "" 1140 ), 1141 ( 1142 self.seg("FORMAT ") + self.sql(expression, "format") 1143 if expression.args.get("format") 1144 else "" 1145 ), 1146 ] 1147 1148 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1149 params = self.expressions(expression, key="params", flat=True) 1150 return self.func(expression.name, *expression.expressions) + f"({params})" 1151 1152 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1153 return self.func(expression.name, *expression.expressions) 1154 1155 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1156 return self.anonymousaggfunc_sql(expression) 1157 1158 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1159 return self.parameterizedagg_sql(expression) 1160 1161 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1162 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1163 1164 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1165 return f"ON CLUSTER {self.sql(expression, 'this')}" 1166 1167 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1168 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1169 exp.Properties.Location.POST_NAME 1170 ): 1171 this_name = self.sql( 1172 expression.this if isinstance(expression.this, exp.Schema) else expression, 1173 "this", 1174 ) 1175 this_properties = " ".join( 1176 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1177 ) 1178 this_schema = self.schema_columns_sql(expression.this) 1179 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1180 1181 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1182 1183 return super().createable_sql(expression, locations) 1184 1185 def create_sql(self, expression: exp.Create) -> str: 1186 # The comment property comes last in CTAS statements, i.e. after the query 1187 query = expression.expression 1188 if isinstance(query, exp.Query): 1189 comment_prop = expression.find(exp.SchemaCommentProperty) 1190 if comment_prop: 1191 comment_prop.pop() 1192 query.replace(exp.paren(query)) 1193 else: 1194 comment_prop = None 1195 1196 create_sql = super().create_sql(expression) 1197 1198 comment_sql = self.sql(comment_prop) 1199 comment_sql = f" {comment_sql}" if comment_sql else "" 1200 1201 return f"{create_sql}{comment_sql}" 1202 1203 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1204 this = self.indent(self.sql(expression, "this")) 1205 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1206 1207 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1208 this = self.sql(expression, "this") 1209 this = f" {this}" if this else "" 1210 expr = self.sql(expression, "expression") 1211 expr = f" {expr}" if expr else "" 1212 index_type = self.sql(expression, "index_type") 1213 index_type = f" TYPE {index_type}" if index_type else "" 1214 granularity = self.sql(expression, "granularity") 1215 granularity = f" GRANULARITY {granularity}" if granularity else "" 1216 1217 return f"INDEX{this}{expr}{index_type}{granularity}" 1218 1219 def partition_sql(self, expression: exp.Partition) -> str: 1220 return f"PARTITION {self.expressions(expression, flat=True)}" 1221 1222 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1223 return f"ID {self.sql(expression.this)}" 1224 1225 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1226 return ( 1227 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1228 ) 1229 1230 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1231 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1232 1233 def is_sql(self, expression: exp.Is) -> str: 1234 is_sql = super().is_sql(expression) 1235 1236 if isinstance(expression.parent, exp.Not): 1237 # value IS NOT NULL -> NOT (value IS NULL) 1238 is_sql = self.wrap(is_sql) 1239 1240 return is_sql
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1033 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1034 strtodate_sql = self.function_fallback_sql(expression) 1035 1036 if not isinstance(expression.parent, exp.Cast): 1037 # StrToDate returns DATEs in other dialects (eg. postgres), so 1038 # this branch aims to improve the transpilation to clickhouse 1039 return f"CAST({strtodate_sql} AS DATE)" 1040 1041 return strtodate_sql
1043 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1044 this = expression.this 1045 1046 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1047 return self.sql(this) 1048 1049 return super().cast_sql(expression, safe_prefix=safe_prefix)
1051 def trycast_sql(self, expression: exp.TryCast) -> str: 1052 dtype = expression.to 1053 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1054 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1055 dtype.set("nullable", True) 1056 1057 return super().cast_sql(expression)
1094 def datatype_sql(self, expression: exp.DataType) -> str: 1095 # String is the standard ClickHouse type, every other variant is just an alias. 1096 # Additionally, any supplied length parameter will be ignored. 1097 # 1098 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1099 if expression.this in self.STRING_TYPE_MAPPING: 1100 dtype = "String" 1101 else: 1102 dtype = super().datatype_sql(expression) 1103 1104 # This section changes the type to `Nullable(...)` if the following conditions hold: 1105 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1106 # and change their semantics 1107 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1108 # constraint: "Type of Map key must be a type, that can be represented by integer or 1109 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1110 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1111 parent = expression.parent 1112 nullable = expression.args.get("nullable") 1113 if nullable is True or ( 1114 nullable is None 1115 and not ( 1116 isinstance(parent, exp.DataType) 1117 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1118 and expression.index in (None, 0) 1119 ) 1120 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1121 ): 1122 dtype = f"Nullable({dtype})" 1123 1124 return dtype
1134 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1135 return super().after_limit_modifiers(expression) + [ 1136 ( 1137 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1138 if expression.args.get("settings") 1139 else "" 1140 ), 1141 ( 1142 self.seg("FORMAT ") + self.sql(expression, "format") 1143 if expression.args.get("format") 1144 else "" 1145 ), 1146 ]
1167 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1168 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1169 exp.Properties.Location.POST_NAME 1170 ): 1171 this_name = self.sql( 1172 expression.this if isinstance(expression.this, exp.Schema) else expression, 1173 "this", 1174 ) 1175 this_properties = " ".join( 1176 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1177 ) 1178 this_schema = self.schema_columns_sql(expression.this) 1179 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1180 1181 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1182 1183 return super().createable_sql(expression, locations)
1185 def create_sql(self, expression: exp.Create) -> str: 1186 # The comment property comes last in CTAS statements, i.e. after the query 1187 query = expression.expression 1188 if isinstance(query, exp.Query): 1189 comment_prop = expression.find(exp.SchemaCommentProperty) 1190 if comment_prop: 1191 comment_prop.pop() 1192 query.replace(exp.paren(query)) 1193 else: 1194 comment_prop = None 1195 1196 create_sql = super().create_sql(expression) 1197 1198 comment_sql = self.sql(comment_prop) 1199 comment_sql = f" {comment_sql}" if comment_sql else "" 1200 1201 return f"{create_sql}{comment_sql}"
1207 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1208 this = self.sql(expression, "this") 1209 this = f" {this}" if this else "" 1210 expr = self.sql(expression, "expression") 1211 expr = f" {expr}" if expr else "" 1212 index_type = self.sql(expression, "index_type") 1213 index_type = f" TYPE {index_type}" if index_type else "" 1214 granularity = self.sql(expression, "granularity") 1215 granularity = f" GRANULARITY {granularity}" if granularity else "" 1216 1217 return f"INDEX{this}{expr}{index_type}{granularity}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- SUPPORTS_UNIX_SECONDS
- PARSE_JSON_NAME
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- distributedbyproperty_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql