sqlglot.dialects.clickhouse
1from __future__ import annotations 2import typing as t 3import datetime 4from sqlglot import exp, generator, parser, tokens 5from sqlglot._typing import E 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 arg_max_or_min_no_count, 10 build_date_delta, 11 build_formatted_time, 12 inline_array_sql, 13 json_extract_segments, 14 json_path_key_only_name, 15 length_or_char_length_sql, 16 no_pivot_sql, 17 build_json_extract_path, 18 rename_func, 19 remove_from_array_using_filter, 20 sha256_sql, 21 strposition_sql, 22 var_map_sql, 23 timestamptrunc_sql, 24 unit_to_var, 25 trim_sql, 26) 27from sqlglot.generator import Generator 28from sqlglot.helper import is_int, seq_get 29from sqlglot.tokens import Token, TokenType 30from sqlglot.generator import unsupported_args 31 32DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd] 33 34 35def _build_datetime_format( 36 expr_type: t.Type[E], 37) -> t.Callable[[t.List], E]: 38 def _builder(args: t.List) -> E: 39 expr = build_formatted_time(expr_type, "clickhouse")(args) 40 41 timezone = seq_get(args, 2) 42 if timezone: 43 expr.set("zone", timezone) 44 45 return expr 46 47 return _builder 48 49 50def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str: 51 scale = expression.args.get("scale") 52 timestamp = expression.this 53 54 if scale in (None, exp.UnixToTime.SECONDS): 55 return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 56 if scale == exp.UnixToTime.MILLIS: 57 return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 58 if scale == exp.UnixToTime.MICROS: 59 return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 60 if scale == exp.UnixToTime.NANOS: 61 return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 62 63 return self.func( 64 "fromUnixTimestamp", 65 exp.cast( 66 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 67 ), 68 ) 69 70 71def _lower_func(sql: str) -> str: 72 index = sql.index("(") 73 return sql[:index].lower() + sql[index:] 74 75 76def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str: 77 quantile = expression.args["quantile"] 78 args = f"({self.sql(expression, 'this')})" 79 80 if isinstance(quantile, exp.Array): 81 func = self.func("quantiles", *quantile) 82 else: 83 func = self.func("quantile", quantile) 84 85 return func + args 86 87 88def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: 89 if len(args) == 1: 90 return exp.CountIf(this=seq_get(args, 0)) 91 92 return exp.CombinedAggFunc(this="countIf", expressions=args) 93 94 95def _build_str_to_date(args: t.List) -> exp.Cast | exp.Anonymous: 96 if len(args) == 3: 97 return exp.Anonymous(this="STR_TO_DATE", expressions=args) 98 99 strtodate = exp.StrToDate.from_arg_list(args) 100 return exp.cast(strtodate, exp.DataType.build(exp.DataType.Type.DATETIME)) 101 102 103def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]: 104 def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str: 105 if not expression.unit: 106 return rename_func(name)(self, expression) 107 108 return self.func( 109 name, 110 unit_to_var(expression), 111 expression.expression, 112 expression.this, 113 expression.args.get("zone"), 114 ) 115 116 return _delta_sql 117 118 119def _timestrtotime_sql(self: ClickHouse.Generator, expression: exp.TimeStrToTime): 120 ts = expression.this 121 122 tz = expression.args.get("zone") 123 if tz and isinstance(ts, exp.Literal): 124 # Clickhouse will not accept timestamps that include a UTC offset, so we must remove them. 125 # The first step to removing is parsing the string with `datetime.datetime.fromisoformat`. 126 # 127 # In python <3.11, `fromisoformat()` can only parse timestamps of millisecond (3 digit) 128 # or microsecond (6 digit) precision. It will error if passed any other number of fractional 129 # digits, so we extract the fractional seconds and pad to 6 digits before parsing. 130 ts_string = ts.name.strip() 131 132 # separate [date and time] from [fractional seconds and UTC offset] 133 ts_parts = ts_string.split(".") 134 if len(ts_parts) == 2: 135 # separate fractional seconds and UTC offset 136 offset_sep = "+" if "+" in ts_parts[1] else "-" 137 ts_frac_parts = ts_parts[1].split(offset_sep) 138 num_frac_parts = len(ts_frac_parts) 139 140 # pad to 6 digits if fractional seconds present 141 ts_frac_parts[0] = ts_frac_parts[0].ljust(6, "0") 142 ts_string = "".join( 143 [ 144 ts_parts[0], # date and time 145 ".", 146 ts_frac_parts[0], # fractional seconds 147 offset_sep if num_frac_parts > 1 else "", 148 ts_frac_parts[1] if num_frac_parts > 1 else "", # utc offset (if present) 149 ] 150 ) 151 152 # return literal with no timezone, eg turn '2020-01-01 12:13:14-08:00' into '2020-01-01 12:13:14' 153 # this is because Clickhouse encodes the timezone as a data type parameter and throws an error if 154 # it's part of the timestamp string 155 ts_without_tz = ( 156 datetime.datetime.fromisoformat(ts_string).replace(tzinfo=None).isoformat(sep=" ") 157 ) 158 ts = exp.Literal.string(ts_without_tz) 159 160 # Non-nullable DateTime64 with microsecond precision 161 expressions = [exp.DataTypeParam(this=tz)] if tz else [] 162 datatype = exp.DataType.build( 163 exp.DataType.Type.DATETIME64, 164 expressions=[exp.DataTypeParam(this=exp.Literal.number(6)), *expressions], 165 nullable=False, 166 ) 167 168 return self.sql(exp.cast(ts, datatype, dialect=self.dialect)) 169 170 171def _map_sql(self: ClickHouse.Generator, expression: exp.Map | exp.VarMap) -> str: 172 if not (expression.parent and expression.parent.arg_key == "settings"): 173 return _lower_func(var_map_sql(self, expression)) 174 175 keys = expression.args.get("keys") 176 values = expression.args.get("values") 177 178 if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array): 179 self.unsupported("Cannot convert array columns into map.") 180 return "" 181 182 args = [] 183 for key, value in zip(keys.expressions, values.expressions): 184 args.append(f"{self.sql(key)}: {self.sql(value)}") 185 186 csv_args = ", ".join(args) 187 188 return f"{{{csv_args}}}" 189 190 191class ClickHouse(Dialect): 192 NORMALIZE_FUNCTIONS: bool | str = False 193 NULL_ORDERING = "nulls_are_last" 194 SUPPORTS_USER_DEFINED_TYPES = False 195 SAFE_DIVISION = True 196 LOG_BASE_FIRST: t.Optional[bool] = None 197 FORCE_EARLY_ALIAS_REF_EXPANSION = True 198 PRESERVE_ORIGINAL_NAMES = True 199 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 200 IDENTIFIERS_CAN_START_WITH_DIGIT = True 201 HEX_STRING_IS_INTEGER_TYPE = True 202 203 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 204 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 205 206 UNESCAPED_SEQUENCES = { 207 "\\0": "\0", 208 } 209 210 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 211 212 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 213 exp.Except: False, 214 exp.Intersect: False, 215 exp.Union: None, 216 } 217 218 def generate_values_aliases(self, expression: exp.Values) -> t.List[exp.Identifier]: 219 # Clickhouse allows VALUES to have an embedded structure e.g: 220 # VALUES('person String, place String', ('Noah', 'Paris'), ...) 221 # In this case, we don't want to qualify the columns 222 values = expression.expressions[0].expressions 223 224 structure = ( 225 values[0] 226 if (len(values) > 1 and values[0].is_string and isinstance(values[1], exp.Tuple)) 227 else None 228 ) 229 if structure: 230 # Split each column definition into the column name e.g: 231 # 'person String, place String' -> ['person', 'place'] 232 structure_coldefs = [coldef.strip() for coldef in structure.name.split(",")] 233 column_aliases = [ 234 exp.to_identifier(coldef.split(" ")[0]) for coldef in structure_coldefs 235 ] 236 else: 237 # Default column aliases in CH are "c1", "c2", etc. 238 column_aliases = [ 239 exp.to_identifier(f"c{i + 1}") for i in range(len(values[0].expressions)) 240 ] 241 242 return column_aliases 243 244 class Tokenizer(tokens.Tokenizer): 245 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 246 IDENTIFIERS = ['"', "`"] 247 IDENTIFIER_ESCAPES = ["\\"] 248 STRING_ESCAPES = ["'", "\\"] 249 BIT_STRINGS = [("0b", "")] 250 HEX_STRINGS = [("0x", ""), ("0X", "")] 251 HEREDOC_STRINGS = ["$"] 252 253 KEYWORDS = { 254 **tokens.Tokenizer.KEYWORDS, 255 ".:": TokenType.DOTCOLON, 256 "ATTACH": TokenType.COMMAND, 257 "DATE32": TokenType.DATE32, 258 "DATETIME64": TokenType.DATETIME64, 259 "DICTIONARY": TokenType.DICTIONARY, 260 "DYNAMIC": TokenType.DYNAMIC, 261 "ENUM8": TokenType.ENUM8, 262 "ENUM16": TokenType.ENUM16, 263 "EXCHANGE": TokenType.COMMAND, 264 "FINAL": TokenType.FINAL, 265 "FIXEDSTRING": TokenType.FIXEDSTRING, 266 "FLOAT32": TokenType.FLOAT, 267 "FLOAT64": TokenType.DOUBLE, 268 "GLOBAL": TokenType.GLOBAL, 269 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 270 "MAP": TokenType.MAP, 271 "NESTED": TokenType.NESTED, 272 "NOTHING": TokenType.NOTHING, 273 "SAMPLE": TokenType.TABLE_SAMPLE, 274 "TUPLE": TokenType.STRUCT, 275 "UINT16": TokenType.USMALLINT, 276 "UINT32": TokenType.UINT, 277 "UINT64": TokenType.UBIGINT, 278 "UINT8": TokenType.UTINYINT, 279 "IPV4": TokenType.IPV4, 280 "IPV6": TokenType.IPV6, 281 "POINT": TokenType.POINT, 282 "RING": TokenType.RING, 283 "LINESTRING": TokenType.LINESTRING, 284 "MULTILINESTRING": TokenType.MULTILINESTRING, 285 "POLYGON": TokenType.POLYGON, 286 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 287 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 288 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 289 "SYSTEM": TokenType.COMMAND, 290 "PREWHERE": TokenType.PREWHERE, 291 } 292 KEYWORDS.pop("/*+") 293 294 SINGLE_TOKENS = { 295 **tokens.Tokenizer.SINGLE_TOKENS, 296 "$": TokenType.HEREDOC_STRING, 297 } 298 299 class Parser(parser.Parser): 300 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 301 # * select x from t1 union all select x from t2 limit 1; 302 # * select x from t1 union all (select x from t2 limit 1); 303 MODIFIERS_ATTACHED_TO_SET_OP = False 304 INTERVAL_SPANS = False 305 OPTIONAL_ALIAS_TOKEN_CTE = False 306 JOINS_HAVE_EQUAL_PRECEDENCE = True 307 308 FUNCTIONS = { 309 **parser.Parser.FUNCTIONS, 310 "ANY": exp.AnyValue.from_arg_list, 311 "ARRAYSUM": exp.ArraySum.from_arg_list, 312 "ARRAYREVERSE": exp.ArrayReverse.from_arg_list, 313 "ARRAYSLICE": exp.ArraySlice.from_arg_list, 314 "COUNTIF": _build_count_if, 315 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 316 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 317 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 318 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 319 "DATE_FORMAT": _build_datetime_format(exp.TimeToStr), 320 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 321 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 322 "FORMATDATETIME": _build_datetime_format(exp.TimeToStr), 323 "JSONEXTRACTSTRING": build_json_extract_path( 324 exp.JSONExtractScalar, zero_based_indexing=False 325 ), 326 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 327 "MAP": parser.build_var_map, 328 "MATCH": exp.RegexpLike.from_arg_list, 329 "PARSEDATETIME": _build_datetime_format(exp.ParseDatetime), 330 "RANDCANONICAL": exp.Rand.from_arg_list, 331 "STR_TO_DATE": _build_str_to_date, 332 "TUPLE": exp.Struct.from_arg_list, 333 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 334 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 335 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 336 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 337 "UNIQ": exp.ApproxDistinct.from_arg_list, 338 "XOR": lambda args: exp.Xor(expressions=args), 339 "MD5": exp.MD5Digest.from_arg_list, 340 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 341 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 342 "SUBSTRINGINDEX": exp.SubstringIndex.from_arg_list, 343 "TOTYPENAME": exp.Typeof.from_arg_list, 344 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 345 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 346 } 347 FUNCTIONS.pop("TRANSFORM") 348 349 AGG_FUNCTIONS = { 350 "count", 351 "min", 352 "max", 353 "sum", 354 "avg", 355 "any", 356 "stddevPop", 357 "stddevSamp", 358 "varPop", 359 "varSamp", 360 "corr", 361 "covarPop", 362 "covarSamp", 363 "entropy", 364 "exponentialMovingAverage", 365 "intervalLengthSum", 366 "kolmogorovSmirnovTest", 367 "mannWhitneyUTest", 368 "median", 369 "rankCorr", 370 "sumKahan", 371 "studentTTest", 372 "welchTTest", 373 "anyHeavy", 374 "anyLast", 375 "boundingRatio", 376 "first_value", 377 "last_value", 378 "argMin", 379 "argMax", 380 "avgWeighted", 381 "topK", 382 "topKWeighted", 383 "deltaSum", 384 "deltaSumTimestamp", 385 "groupArray", 386 "groupArrayLast", 387 "groupUniqArray", 388 "groupArrayInsertAt", 389 "groupArrayMovingAvg", 390 "groupArrayMovingSum", 391 "groupArraySample", 392 "groupBitAnd", 393 "groupBitOr", 394 "groupBitXor", 395 "groupBitmap", 396 "groupBitmapAnd", 397 "groupBitmapOr", 398 "groupBitmapXor", 399 "sumWithOverflow", 400 "sumMap", 401 "minMap", 402 "maxMap", 403 "skewSamp", 404 "skewPop", 405 "kurtSamp", 406 "kurtPop", 407 "uniq", 408 "uniqExact", 409 "uniqCombined", 410 "uniqCombined64", 411 "uniqHLL12", 412 "uniqTheta", 413 "quantile", 414 "quantiles", 415 "quantileExact", 416 "quantilesExact", 417 "quantileExactLow", 418 "quantilesExactLow", 419 "quantileExactHigh", 420 "quantilesExactHigh", 421 "quantileExactWeighted", 422 "quantilesExactWeighted", 423 "quantileTiming", 424 "quantilesTiming", 425 "quantileTimingWeighted", 426 "quantilesTimingWeighted", 427 "quantileDeterministic", 428 "quantilesDeterministic", 429 "quantileTDigest", 430 "quantilesTDigest", 431 "quantileTDigestWeighted", 432 "quantilesTDigestWeighted", 433 "quantileBFloat16", 434 "quantilesBFloat16", 435 "quantileBFloat16Weighted", 436 "quantilesBFloat16Weighted", 437 "simpleLinearRegression", 438 "stochasticLinearRegression", 439 "stochasticLogisticRegression", 440 "categoricalInformationValue", 441 "contingency", 442 "cramersV", 443 "cramersVBiasCorrected", 444 "theilsU", 445 "maxIntersections", 446 "maxIntersectionsPosition", 447 "meanZTest", 448 "quantileInterpolatedWeighted", 449 "quantilesInterpolatedWeighted", 450 "quantileGK", 451 "quantilesGK", 452 "sparkBar", 453 "sumCount", 454 "largestTriangleThreeBuckets", 455 "histogram", 456 "sequenceMatch", 457 "sequenceCount", 458 "windowFunnel", 459 "retention", 460 "uniqUpTo", 461 "sequenceNextNode", 462 "exponentialTimeDecayedAvg", 463 } 464 465 AGG_FUNCTIONS_SUFFIXES = [ 466 "If", 467 "Array", 468 "ArrayIf", 469 "Map", 470 "SimpleState", 471 "State", 472 "Merge", 473 "MergeState", 474 "ForEach", 475 "Distinct", 476 "OrDefault", 477 "OrNull", 478 "Resample", 479 "ArgMin", 480 "ArgMax", 481 ] 482 483 FUNC_TOKENS = { 484 *parser.Parser.FUNC_TOKENS, 485 TokenType.AND, 486 TokenType.OR, 487 TokenType.SET, 488 } 489 490 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 491 492 ID_VAR_TOKENS = { 493 *parser.Parser.ID_VAR_TOKENS, 494 TokenType.LIKE, 495 } 496 497 AGG_FUNC_MAPPING = ( 498 lambda functions, suffixes: { 499 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 500 } 501 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 502 503 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 504 505 FUNCTION_PARSERS = { 506 **parser.Parser.FUNCTION_PARSERS, 507 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 508 "QUANTILE": lambda self: self._parse_quantile(), 509 "MEDIAN": lambda self: self._parse_quantile(), 510 "COLUMNS": lambda self: self._parse_columns(), 511 } 512 513 FUNCTION_PARSERS.pop("MATCH") 514 515 PROPERTY_PARSERS = { 516 **parser.Parser.PROPERTY_PARSERS, 517 "ENGINE": lambda self: self._parse_engine_property(), 518 } 519 PROPERTY_PARSERS.pop("DYNAMIC") 520 521 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 522 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 523 524 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 525 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 526 527 RANGE_PARSERS = { 528 **parser.Parser.RANGE_PARSERS, 529 TokenType.GLOBAL: lambda self, this: self._parse_global_in(this), 530 } 531 532 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 533 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 534 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 535 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 536 537 JOIN_KINDS = { 538 *parser.Parser.JOIN_KINDS, 539 TokenType.ANY, 540 TokenType.ASOF, 541 TokenType.ARRAY, 542 } 543 544 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 545 TokenType.ANY, 546 TokenType.ARRAY, 547 TokenType.FINAL, 548 TokenType.FORMAT, 549 TokenType.SETTINGS, 550 } 551 552 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 553 TokenType.FORMAT, 554 } 555 556 LOG_DEFAULTS_TO_LN = True 557 558 QUERY_MODIFIER_PARSERS = { 559 **parser.Parser.QUERY_MODIFIER_PARSERS, 560 TokenType.SETTINGS: lambda self: ( 561 "settings", 562 self._advance() or self._parse_csv(self._parse_assignment), 563 ), 564 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 565 } 566 567 CONSTRAINT_PARSERS = { 568 **parser.Parser.CONSTRAINT_PARSERS, 569 "INDEX": lambda self: self._parse_index_constraint(), 570 "CODEC": lambda self: self._parse_compress(), 571 } 572 573 ALTER_PARSERS = { 574 **parser.Parser.ALTER_PARSERS, 575 "REPLACE": lambda self: self._parse_alter_table_replace(), 576 } 577 578 SCHEMA_UNNAMED_CONSTRAINTS = { 579 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 580 "INDEX", 581 } 582 583 PLACEHOLDER_PARSERS = { 584 **parser.Parser.PLACEHOLDER_PARSERS, 585 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 586 } 587 588 def _parse_engine_property(self) -> exp.EngineProperty: 589 self._match(TokenType.EQ) 590 return self.expression( 591 exp.EngineProperty, 592 this=self._parse_field(any_token=True, anonymous_func=True), 593 ) 594 595 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 596 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 597 return self._parse_lambda() 598 599 def _parse_types( 600 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 601 ) -> t.Optional[exp.Expression]: 602 dtype = super()._parse_types( 603 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 604 ) 605 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 606 # Mark every type as non-nullable which is ClickHouse's default, unless it's 607 # already marked as nullable. This marker helps us transpile types from other 608 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 609 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 610 # fail in ClickHouse without the `Nullable` type constructor. 611 dtype.set("nullable", False) 612 613 return dtype 614 615 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 616 index = self._index 617 this = self._parse_bitwise() 618 if self._match(TokenType.FROM): 619 self._retreat(index) 620 return super()._parse_extract() 621 622 # We return Anonymous here because extract and regexpExtract have different semantics, 623 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 624 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 625 # 626 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 627 self._match(TokenType.COMMA) 628 return self.expression( 629 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 630 ) 631 632 def _parse_assignment(self) -> t.Optional[exp.Expression]: 633 this = super()._parse_assignment() 634 635 if self._match(TokenType.PLACEHOLDER): 636 return self.expression( 637 exp.If, 638 this=this, 639 true=self._parse_assignment(), 640 false=self._match(TokenType.COLON) and self._parse_assignment(), 641 ) 642 643 return this 644 645 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 646 """ 647 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 648 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 649 """ 650 index = self._index 651 652 this = self._parse_id_var() 653 self._match(TokenType.COLON) 654 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 655 self._match_text_seq("IDENTIFIER") and "Identifier" 656 ) 657 658 if not kind: 659 self._retreat(index) 660 return None 661 elif not self._match(TokenType.R_BRACE): 662 self.raise_error("Expecting }") 663 664 if isinstance(this, exp.Identifier) and not this.quoted: 665 this = exp.var(this.name) 666 667 return self.expression(exp.Placeholder, this=this, kind=kind) 668 669 def _parse_bracket( 670 self, this: t.Optional[exp.Expression] = None 671 ) -> t.Optional[exp.Expression]: 672 l_brace = self._match(TokenType.L_BRACE, advance=False) 673 bracket = super()._parse_bracket(this) 674 675 if l_brace and isinstance(bracket, exp.Struct): 676 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 677 for expression in bracket.expressions: 678 if not isinstance(expression, exp.PropertyEQ): 679 break 680 681 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 682 varmap.args["values"].append("expressions", expression.expression) 683 684 return varmap 685 686 return bracket 687 688 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 689 this = super()._parse_in(this) 690 this.set("is_global", is_global) 691 return this 692 693 def _parse_global_in(self, this: t.Optional[exp.Expression]) -> exp.Not | exp.In: 694 is_negated = self._match(TokenType.NOT) 695 this = self._match(TokenType.IN) and self._parse_in(this, is_global=True) 696 return self.expression(exp.Not, this=this) if is_negated else this 697 698 def _parse_table( 699 self, 700 schema: bool = False, 701 joins: bool = False, 702 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 703 parse_bracket: bool = False, 704 is_db_reference: bool = False, 705 parse_partition: bool = False, 706 consume_pipe: bool = False, 707 ) -> t.Optional[exp.Expression]: 708 this = super()._parse_table( 709 schema=schema, 710 joins=joins, 711 alias_tokens=alias_tokens, 712 parse_bracket=parse_bracket, 713 is_db_reference=is_db_reference, 714 ) 715 716 if isinstance(this, exp.Table): 717 inner = this.this 718 alias = this.args.get("alias") 719 720 if isinstance(inner, exp.GenerateSeries) and alias and not alias.columns: 721 alias.set("columns", [exp.to_identifier("generate_series")]) 722 723 if self._match(TokenType.FINAL): 724 this = self.expression(exp.Final, this=this) 725 726 return this 727 728 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 729 return super()._parse_position(haystack_first=True) 730 731 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 732 def _parse_cte(self) -> t.Optional[exp.CTE]: 733 # WITH <identifier> AS <subquery expression> 734 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 735 736 if not cte: 737 # WITH <expression> AS <identifier> 738 cte = self.expression( 739 exp.CTE, 740 this=self._parse_assignment(), 741 alias=self._parse_table_alias(), 742 scalar=True, 743 ) 744 745 return cte 746 747 def _parse_join_parts( 748 self, 749 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 750 is_global = self._match(TokenType.GLOBAL) and self._prev 751 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 752 753 if kind_pre: 754 kind = self._match_set(self.JOIN_KINDS) and self._prev 755 side = self._match_set(self.JOIN_SIDES) and self._prev 756 return is_global, side, kind 757 758 return ( 759 is_global, 760 self._match_set(self.JOIN_SIDES) and self._prev, 761 self._match_set(self.JOIN_KINDS) and self._prev, 762 ) 763 764 def _parse_join( 765 self, skip_join_token: bool = False, parse_bracket: bool = False 766 ) -> t.Optional[exp.Join]: 767 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 768 if join: 769 join.set("global", join.args.pop("method", None)) 770 771 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 772 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 773 if join.kind == "ARRAY": 774 for table in join.find_all(exp.Table): 775 table.replace(table.to_column()) 776 777 return join 778 779 def _parse_function( 780 self, 781 functions: t.Optional[t.Dict[str, t.Callable]] = None, 782 anonymous: bool = False, 783 optional_parens: bool = True, 784 any_token: bool = False, 785 ) -> t.Optional[exp.Expression]: 786 expr = super()._parse_function( 787 functions=functions, 788 anonymous=anonymous, 789 optional_parens=optional_parens, 790 any_token=any_token, 791 ) 792 793 func = expr.this if isinstance(expr, exp.Window) else expr 794 795 # Aggregate functions can be split in 2 parts: <func_name><suffix> 796 parts = ( 797 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 798 ) 799 800 if parts: 801 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 802 params = self._parse_func_params(anon_func) 803 804 kwargs = { 805 "this": anon_func.this, 806 "expressions": anon_func.expressions, 807 } 808 if parts[1]: 809 exp_class: t.Type[exp.Expression] = ( 810 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 811 ) 812 else: 813 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 814 815 kwargs["exp_class"] = exp_class 816 if params: 817 kwargs["params"] = params 818 819 func = self.expression(**kwargs) 820 821 if isinstance(expr, exp.Window): 822 # The window's func was parsed as Anonymous in base parser, fix its 823 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 824 expr.set("this", func) 825 elif params: 826 # Params have blocked super()._parse_function() from parsing the following window 827 # (if that exists) as they're standing between the function call and the window spec 828 expr = self._parse_window(func) 829 else: 830 expr = func 831 832 return expr 833 834 def _parse_func_params( 835 self, this: t.Optional[exp.Func] = None 836 ) -> t.Optional[t.List[exp.Expression]]: 837 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 838 return self._parse_csv(self._parse_lambda) 839 840 if self._match(TokenType.L_PAREN): 841 params = self._parse_csv(self._parse_lambda) 842 self._match_r_paren(this) 843 return params 844 845 return None 846 847 def _parse_quantile(self) -> exp.Quantile: 848 this = self._parse_lambda() 849 params = self._parse_func_params() 850 if params: 851 return self.expression(exp.Quantile, this=params[0], quantile=this) 852 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 853 854 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 855 return super()._parse_wrapped_id_vars(optional=True) 856 857 def _parse_primary_key( 858 self, wrapped_optional: bool = False, in_props: bool = False 859 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 860 return super()._parse_primary_key( 861 wrapped_optional=wrapped_optional or in_props, in_props=in_props 862 ) 863 864 def _parse_on_property(self) -> t.Optional[exp.Expression]: 865 index = self._index 866 if self._match_text_seq("CLUSTER"): 867 this = self._parse_string() or self._parse_id_var() 868 if this: 869 return self.expression(exp.OnCluster, this=this) 870 else: 871 self._retreat(index) 872 return None 873 874 def _parse_index_constraint( 875 self, kind: t.Optional[str] = None 876 ) -> exp.IndexColumnConstraint: 877 # INDEX name1 expr TYPE type1(args) GRANULARITY value 878 this = self._parse_id_var() 879 expression = self._parse_assignment() 880 881 index_type = self._match_text_seq("TYPE") and ( 882 self._parse_function() or self._parse_var() 883 ) 884 885 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 886 887 return self.expression( 888 exp.IndexColumnConstraint, 889 this=this, 890 expression=expression, 891 index_type=index_type, 892 granularity=granularity, 893 ) 894 895 def _parse_partition(self) -> t.Optional[exp.Partition]: 896 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 897 if not self._match(TokenType.PARTITION): 898 return None 899 900 if self._match_text_seq("ID"): 901 # Corresponds to the PARTITION ID <string_value> syntax 902 expressions: t.List[exp.Expression] = [ 903 self.expression(exp.PartitionId, this=self._parse_string()) 904 ] 905 else: 906 expressions = self._parse_expressions() 907 908 return self.expression(exp.Partition, expressions=expressions) 909 910 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 911 partition = self._parse_partition() 912 913 if not partition or not self._match(TokenType.FROM): 914 return None 915 916 return self.expression( 917 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 918 ) 919 920 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 921 if not self._match_text_seq("PROJECTION"): 922 return None 923 924 return self.expression( 925 exp.ProjectionDef, 926 this=self._parse_id_var(), 927 expression=self._parse_wrapped(self._parse_statement), 928 ) 929 930 def _parse_constraint(self) -> t.Optional[exp.Expression]: 931 return super()._parse_constraint() or self._parse_projection_def() 932 933 def _parse_alias( 934 self, this: t.Optional[exp.Expression], explicit: bool = False 935 ) -> t.Optional[exp.Expression]: 936 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 937 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 938 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 939 return this 940 941 return super()._parse_alias(this=this, explicit=explicit) 942 943 def _parse_expression(self) -> t.Optional[exp.Expression]: 944 this = super()._parse_expression() 945 946 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 947 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 948 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 949 self._match(TokenType.R_PAREN) 950 951 return this 952 953 def _parse_columns(self) -> exp.Expression: 954 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 955 956 while self._next and self._match_text_seq(")", "APPLY", "("): 957 self._match(TokenType.R_PAREN) 958 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 959 return this 960 961 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 962 value = super()._parse_value(values=values) 963 if not value: 964 return None 965 966 # In Clickhouse "SELECT * FROM VALUES (1, 2, 3)" generates a table with a single column, in contrast 967 # to other dialects. For this case, we canonicalize the values into a tuple-of-tuples AST if it's not already one. 968 # In INSERT INTO statements the same clause actually references multiple columns (opposite semantics), 969 # but the final result is not altered by the extra parentheses. 970 # Note: Clickhouse allows VALUES([structure], value, ...) so the branch checks for the last expression 971 expressions = value.expressions 972 if values and not isinstance(expressions[-1], exp.Tuple): 973 value.set( 974 "expressions", 975 [self.expression(exp.Tuple, expressions=[expr]) for expr in expressions], 976 ) 977 978 return value 979 980 class Generator(generator.Generator): 981 QUERY_HINTS = False 982 STRUCT_DELIMITER = ("(", ")") 983 NVL2_SUPPORTED = False 984 TABLESAMPLE_REQUIRES_PARENS = False 985 TABLESAMPLE_SIZE_IS_ROWS = False 986 TABLESAMPLE_KEYWORDS = "SAMPLE" 987 LAST_DAY_SUPPORTS_DATE_PART = False 988 CAN_IMPLEMENT_ARRAY_ANY = True 989 SUPPORTS_TO_NUMBER = False 990 JOIN_HINTS = False 991 TABLE_HINTS = False 992 GROUPINGS_SEP = "" 993 SET_OP_MODIFIERS = False 994 ARRAY_SIZE_NAME = "LENGTH" 995 WRAP_DERIVED_VALUES = False 996 997 STRING_TYPE_MAPPING = { 998 exp.DataType.Type.BLOB: "String", 999 exp.DataType.Type.CHAR: "String", 1000 exp.DataType.Type.LONGBLOB: "String", 1001 exp.DataType.Type.LONGTEXT: "String", 1002 exp.DataType.Type.MEDIUMBLOB: "String", 1003 exp.DataType.Type.MEDIUMTEXT: "String", 1004 exp.DataType.Type.TINYBLOB: "String", 1005 exp.DataType.Type.TINYTEXT: "String", 1006 exp.DataType.Type.TEXT: "String", 1007 exp.DataType.Type.VARBINARY: "String", 1008 exp.DataType.Type.VARCHAR: "String", 1009 } 1010 1011 SUPPORTED_JSON_PATH_PARTS = { 1012 exp.JSONPathKey, 1013 exp.JSONPathRoot, 1014 exp.JSONPathSubscript, 1015 } 1016 1017 TYPE_MAPPING = { 1018 **generator.Generator.TYPE_MAPPING, 1019 **STRING_TYPE_MAPPING, 1020 exp.DataType.Type.ARRAY: "Array", 1021 exp.DataType.Type.BOOLEAN: "Bool", 1022 exp.DataType.Type.BIGINT: "Int64", 1023 exp.DataType.Type.DATE32: "Date32", 1024 exp.DataType.Type.DATETIME: "DateTime", 1025 exp.DataType.Type.DATETIME2: "DateTime", 1026 exp.DataType.Type.SMALLDATETIME: "DateTime", 1027 exp.DataType.Type.DATETIME64: "DateTime64", 1028 exp.DataType.Type.DECIMAL: "Decimal", 1029 exp.DataType.Type.DECIMAL32: "Decimal32", 1030 exp.DataType.Type.DECIMAL64: "Decimal64", 1031 exp.DataType.Type.DECIMAL128: "Decimal128", 1032 exp.DataType.Type.DECIMAL256: "Decimal256", 1033 exp.DataType.Type.TIMESTAMP: "DateTime", 1034 exp.DataType.Type.TIMESTAMPNTZ: "DateTime", 1035 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 1036 exp.DataType.Type.DOUBLE: "Float64", 1037 exp.DataType.Type.ENUM: "Enum", 1038 exp.DataType.Type.ENUM8: "Enum8", 1039 exp.DataType.Type.ENUM16: "Enum16", 1040 exp.DataType.Type.FIXEDSTRING: "FixedString", 1041 exp.DataType.Type.FLOAT: "Float32", 1042 exp.DataType.Type.INT: "Int32", 1043 exp.DataType.Type.MEDIUMINT: "Int32", 1044 exp.DataType.Type.INT128: "Int128", 1045 exp.DataType.Type.INT256: "Int256", 1046 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 1047 exp.DataType.Type.MAP: "Map", 1048 exp.DataType.Type.NESTED: "Nested", 1049 exp.DataType.Type.NOTHING: "Nothing", 1050 exp.DataType.Type.SMALLINT: "Int16", 1051 exp.DataType.Type.STRUCT: "Tuple", 1052 exp.DataType.Type.TINYINT: "Int8", 1053 exp.DataType.Type.UBIGINT: "UInt64", 1054 exp.DataType.Type.UINT: "UInt32", 1055 exp.DataType.Type.UINT128: "UInt128", 1056 exp.DataType.Type.UINT256: "UInt256", 1057 exp.DataType.Type.USMALLINT: "UInt16", 1058 exp.DataType.Type.UTINYINT: "UInt8", 1059 exp.DataType.Type.IPV4: "IPv4", 1060 exp.DataType.Type.IPV6: "IPv6", 1061 exp.DataType.Type.POINT: "Point", 1062 exp.DataType.Type.RING: "Ring", 1063 exp.DataType.Type.LINESTRING: "LineString", 1064 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 1065 exp.DataType.Type.POLYGON: "Polygon", 1066 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 1067 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 1068 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 1069 exp.DataType.Type.DYNAMIC: "Dynamic", 1070 } 1071 1072 TRANSFORMS = { 1073 **generator.Generator.TRANSFORMS, 1074 exp.AnyValue: rename_func("any"), 1075 exp.ApproxDistinct: rename_func("uniq"), 1076 exp.ArrayConcat: rename_func("arrayConcat"), 1077 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 1078 exp.ArrayRemove: remove_from_array_using_filter, 1079 exp.ArrayReverse: rename_func("arrayReverse"), 1080 exp.ArraySlice: rename_func("arraySlice"), 1081 exp.ArraySum: rename_func("arraySum"), 1082 exp.ArgMax: arg_max_or_min_no_count("argMax"), 1083 exp.ArgMin: arg_max_or_min_no_count("argMin"), 1084 exp.Array: inline_array_sql, 1085 exp.CastToStrType: rename_func("CAST"), 1086 exp.CountIf: rename_func("countIf"), 1087 exp.CompressColumnConstraint: lambda self, 1088 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 1089 exp.ComputedColumnConstraint: lambda self, 1090 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 1091 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 1092 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 1093 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 1094 exp.DateStrToDate: rename_func("toDate"), 1095 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 1096 exp.Explode: rename_func("arrayJoin"), 1097 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 1098 exp.IsNan: rename_func("isNaN"), 1099 exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}", 1100 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 1101 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 1102 exp.JSONPathKey: json_path_key_only_name, 1103 exp.JSONPathRoot: lambda *_: "", 1104 exp.Length: length_or_char_length_sql, 1105 exp.Map: _map_sql, 1106 exp.Median: rename_func("median"), 1107 exp.Nullif: rename_func("nullIf"), 1108 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1109 exp.Pivot: no_pivot_sql, 1110 exp.Quantile: _quantile_sql, 1111 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 1112 exp.Rand: rename_func("randCanonical"), 1113 exp.StartsWith: rename_func("startsWith"), 1114 exp.EndsWith: rename_func("endsWith"), 1115 exp.StrPosition: lambda self, e: strposition_sql( 1116 self, 1117 e, 1118 func_name="POSITION", 1119 supports_position=True, 1120 use_ansi_position=False, 1121 ), 1122 exp.TimeToStr: lambda self, e: self.func( 1123 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 1124 ), 1125 exp.TimeStrToTime: _timestrtotime_sql, 1126 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1127 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1128 exp.Typeof: rename_func("toTypeName"), 1129 exp.VarMap: _map_sql, 1130 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1131 exp.MD5Digest: rename_func("MD5"), 1132 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1133 exp.SHA: rename_func("SHA1"), 1134 exp.SHA2: sha256_sql, 1135 exp.UnixToTime: _unix_to_time_sql, 1136 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 1137 exp.Trim: lambda self, e: trim_sql(self, e, default_trim_type="BOTH"), 1138 exp.Variance: rename_func("varSamp"), 1139 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1140 exp.Stddev: rename_func("stddevSamp"), 1141 exp.Chr: rename_func("CHAR"), 1142 exp.Lag: lambda self, e: self.func( 1143 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1144 ), 1145 exp.Lead: lambda self, e: self.func( 1146 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1147 ), 1148 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1149 rename_func("editDistance") 1150 ), 1151 exp.ParseDatetime: rename_func("parseDateTime"), 1152 } 1153 1154 PROPERTIES_LOCATION = { 1155 **generator.Generator.PROPERTIES_LOCATION, 1156 exp.OnCluster: exp.Properties.Location.POST_NAME, 1157 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1158 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1159 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1160 } 1161 1162 # There's no list in docs, but it can be found in Clickhouse code 1163 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1164 ON_CLUSTER_TARGETS = { 1165 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1166 "DATABASE", 1167 "TABLE", 1168 "VIEW", 1169 "DICTIONARY", 1170 "INDEX", 1171 "FUNCTION", 1172 "NAMED COLLECTION", 1173 } 1174 1175 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1176 NON_NULLABLE_TYPES = { 1177 exp.DataType.Type.ARRAY, 1178 exp.DataType.Type.MAP, 1179 exp.DataType.Type.STRUCT, 1180 exp.DataType.Type.POINT, 1181 exp.DataType.Type.RING, 1182 exp.DataType.Type.LINESTRING, 1183 exp.DataType.Type.MULTILINESTRING, 1184 exp.DataType.Type.POLYGON, 1185 exp.DataType.Type.MULTIPOLYGON, 1186 } 1187 1188 def offset_sql(self, expression: exp.Offset) -> str: 1189 offset = super().offset_sql(expression) 1190 1191 # OFFSET ... FETCH syntax requires a "ROW" or "ROWS" keyword 1192 # https://clickhouse.com/docs/sql-reference/statements/select/offset 1193 parent = expression.parent 1194 if isinstance(parent, exp.Select) and isinstance(parent.args.get("limit"), exp.Fetch): 1195 offset = f"{offset} ROWS" 1196 1197 return offset 1198 1199 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1200 strtodate_sql = self.function_fallback_sql(expression) 1201 1202 if not isinstance(expression.parent, exp.Cast): 1203 # StrToDate returns DATEs in other dialects (eg. postgres), so 1204 # this branch aims to improve the transpilation to clickhouse 1205 return self.cast_sql(exp.cast(expression, "DATE")) 1206 1207 return strtodate_sql 1208 1209 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1210 this = expression.this 1211 1212 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1213 return self.sql(this) 1214 1215 return super().cast_sql(expression, safe_prefix=safe_prefix) 1216 1217 def trycast_sql(self, expression: exp.TryCast) -> str: 1218 dtype = expression.to 1219 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1220 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1221 dtype.set("nullable", True) 1222 1223 return super().cast_sql(expression) 1224 1225 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1226 this = self.json_path_part(expression.this) 1227 return str(int(this) + 1) if is_int(this) else this 1228 1229 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1230 return f"AS {self.sql(expression, 'this')}" 1231 1232 def _any_to_has( 1233 self, 1234 expression: exp.EQ | exp.NEQ, 1235 default: t.Callable[[t.Any], str], 1236 prefix: str = "", 1237 ) -> str: 1238 if isinstance(expression.left, exp.Any): 1239 arr = expression.left 1240 this = expression.right 1241 elif isinstance(expression.right, exp.Any): 1242 arr = expression.right 1243 this = expression.left 1244 else: 1245 return default(expression) 1246 1247 return prefix + self.func("has", arr.this.unnest(), this) 1248 1249 def eq_sql(self, expression: exp.EQ) -> str: 1250 return self._any_to_has(expression, super().eq_sql) 1251 1252 def neq_sql(self, expression: exp.NEQ) -> str: 1253 return self._any_to_has(expression, super().neq_sql, "NOT ") 1254 1255 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1256 # Manually add a flag to make the search case-insensitive 1257 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1258 return self.func("match", expression.this, regex) 1259 1260 def datatype_sql(self, expression: exp.DataType) -> str: 1261 # String is the standard ClickHouse type, every other variant is just an alias. 1262 # Additionally, any supplied length parameter will be ignored. 1263 # 1264 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1265 if expression.this in self.STRING_TYPE_MAPPING: 1266 dtype = "String" 1267 else: 1268 dtype = super().datatype_sql(expression) 1269 1270 # This section changes the type to `Nullable(...)` if the following conditions hold: 1271 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1272 # and change their semantics 1273 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1274 # constraint: "Type of Map key must be a type, that can be represented by integer or 1275 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1276 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1277 parent = expression.parent 1278 nullable = expression.args.get("nullable") 1279 if nullable is True or ( 1280 nullable is None 1281 and not ( 1282 isinstance(parent, exp.DataType) 1283 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1284 and expression.index in (None, 0) 1285 ) 1286 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1287 ): 1288 dtype = f"Nullable({dtype})" 1289 1290 return dtype 1291 1292 def cte_sql(self, expression: exp.CTE) -> str: 1293 if expression.args.get("scalar"): 1294 this = self.sql(expression, "this") 1295 alias = self.sql(expression, "alias") 1296 return f"{this} AS {alias}" 1297 1298 return super().cte_sql(expression) 1299 1300 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1301 return super().after_limit_modifiers(expression) + [ 1302 ( 1303 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1304 if expression.args.get("settings") 1305 else "" 1306 ), 1307 ( 1308 self.seg("FORMAT ") + self.sql(expression, "format") 1309 if expression.args.get("format") 1310 else "" 1311 ), 1312 ] 1313 1314 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1315 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1316 1317 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1318 return f"ON CLUSTER {self.sql(expression, 'this')}" 1319 1320 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1321 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1322 exp.Properties.Location.POST_NAME 1323 ): 1324 this_name = self.sql( 1325 expression.this if isinstance(expression.this, exp.Schema) else expression, 1326 "this", 1327 ) 1328 this_properties = " ".join( 1329 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1330 ) 1331 this_schema = self.schema_columns_sql(expression.this) 1332 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1333 1334 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1335 1336 return super().createable_sql(expression, locations) 1337 1338 def create_sql(self, expression: exp.Create) -> str: 1339 # The comment property comes last in CTAS statements, i.e. after the query 1340 query = expression.expression 1341 if isinstance(query, exp.Query): 1342 comment_prop = expression.find(exp.SchemaCommentProperty) 1343 if comment_prop: 1344 comment_prop.pop() 1345 query.replace(exp.paren(query)) 1346 else: 1347 comment_prop = None 1348 1349 create_sql = super().create_sql(expression) 1350 1351 comment_sql = self.sql(comment_prop) 1352 comment_sql = f" {comment_sql}" if comment_sql else "" 1353 1354 return f"{create_sql}{comment_sql}" 1355 1356 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1357 this = self.indent(self.sql(expression, "this")) 1358 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1359 1360 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1361 this = self.sql(expression, "this") 1362 this = f" {this}" if this else "" 1363 expr = self.sql(expression, "expression") 1364 expr = f" {expr}" if expr else "" 1365 index_type = self.sql(expression, "index_type") 1366 index_type = f" TYPE {index_type}" if index_type else "" 1367 granularity = self.sql(expression, "granularity") 1368 granularity = f" GRANULARITY {granularity}" if granularity else "" 1369 1370 return f"INDEX{this}{expr}{index_type}{granularity}" 1371 1372 def partition_sql(self, expression: exp.Partition) -> str: 1373 return f"PARTITION {self.expressions(expression, flat=True)}" 1374 1375 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1376 return f"ID {self.sql(expression.this)}" 1377 1378 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1379 return ( 1380 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1381 ) 1382 1383 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1384 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1385 1386 def is_sql(self, expression: exp.Is) -> str: 1387 is_sql = super().is_sql(expression) 1388 1389 if isinstance(expression.parent, exp.Not): 1390 # value IS NOT NULL -> NOT (value IS NULL) 1391 is_sql = self.wrap(is_sql) 1392 1393 return is_sql 1394 1395 def in_sql(self, expression: exp.In) -> str: 1396 in_sql = super().in_sql(expression) 1397 1398 if isinstance(expression.parent, exp.Not) and expression.args.get("is_global"): 1399 in_sql = in_sql.replace("GLOBAL IN", "GLOBAL NOT IN", 1) 1400 1401 return in_sql 1402 1403 def not_sql(self, expression: exp.Not) -> str: 1404 if isinstance(expression.this, exp.In) and expression.this.args.get("is_global"): 1405 # let `GLOBAL IN` child interpose `NOT` 1406 return self.sql(expression, "this") 1407 1408 return super().not_sql(expression) 1409 1410 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1411 # If the VALUES clause contains tuples of expressions, we need to treat it 1412 # as a table since Clickhouse will automatically alias it as such. 1413 alias = expression.args.get("alias") 1414 1415 if alias and alias.args.get("columns") and expression.expressions: 1416 values = expression.expressions[0].expressions 1417 values_as_table = any(isinstance(value, exp.Tuple) for value in values) 1418 else: 1419 values_as_table = True 1420 1421 return super().values_sql(expression, values_as_table=values_as_table)
192class ClickHouse(Dialect): 193 NORMALIZE_FUNCTIONS: bool | str = False 194 NULL_ORDERING = "nulls_are_last" 195 SUPPORTS_USER_DEFINED_TYPES = False 196 SAFE_DIVISION = True 197 LOG_BASE_FIRST: t.Optional[bool] = None 198 FORCE_EARLY_ALIAS_REF_EXPANSION = True 199 PRESERVE_ORIGINAL_NAMES = True 200 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 201 IDENTIFIERS_CAN_START_WITH_DIGIT = True 202 HEX_STRING_IS_INTEGER_TYPE = True 203 204 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 205 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 206 207 UNESCAPED_SEQUENCES = { 208 "\\0": "\0", 209 } 210 211 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 212 213 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 214 exp.Except: False, 215 exp.Intersect: False, 216 exp.Union: None, 217 } 218 219 def generate_values_aliases(self, expression: exp.Values) -> t.List[exp.Identifier]: 220 # Clickhouse allows VALUES to have an embedded structure e.g: 221 # VALUES('person String, place String', ('Noah', 'Paris'), ...) 222 # In this case, we don't want to qualify the columns 223 values = expression.expressions[0].expressions 224 225 structure = ( 226 values[0] 227 if (len(values) > 1 and values[0].is_string and isinstance(values[1], exp.Tuple)) 228 else None 229 ) 230 if structure: 231 # Split each column definition into the column name e.g: 232 # 'person String, place String' -> ['person', 'place'] 233 structure_coldefs = [coldef.strip() for coldef in structure.name.split(",")] 234 column_aliases = [ 235 exp.to_identifier(coldef.split(" ")[0]) for coldef in structure_coldefs 236 ] 237 else: 238 # Default column aliases in CH are "c1", "c2", etc. 239 column_aliases = [ 240 exp.to_identifier(f"c{i + 1}") for i in range(len(values[0].expressions)) 241 ] 242 243 return column_aliases 244 245 class Tokenizer(tokens.Tokenizer): 246 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 247 IDENTIFIERS = ['"', "`"] 248 IDENTIFIER_ESCAPES = ["\\"] 249 STRING_ESCAPES = ["'", "\\"] 250 BIT_STRINGS = [("0b", "")] 251 HEX_STRINGS = [("0x", ""), ("0X", "")] 252 HEREDOC_STRINGS = ["$"] 253 254 KEYWORDS = { 255 **tokens.Tokenizer.KEYWORDS, 256 ".:": TokenType.DOTCOLON, 257 "ATTACH": TokenType.COMMAND, 258 "DATE32": TokenType.DATE32, 259 "DATETIME64": TokenType.DATETIME64, 260 "DICTIONARY": TokenType.DICTIONARY, 261 "DYNAMIC": TokenType.DYNAMIC, 262 "ENUM8": TokenType.ENUM8, 263 "ENUM16": TokenType.ENUM16, 264 "EXCHANGE": TokenType.COMMAND, 265 "FINAL": TokenType.FINAL, 266 "FIXEDSTRING": TokenType.FIXEDSTRING, 267 "FLOAT32": TokenType.FLOAT, 268 "FLOAT64": TokenType.DOUBLE, 269 "GLOBAL": TokenType.GLOBAL, 270 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 271 "MAP": TokenType.MAP, 272 "NESTED": TokenType.NESTED, 273 "NOTHING": TokenType.NOTHING, 274 "SAMPLE": TokenType.TABLE_SAMPLE, 275 "TUPLE": TokenType.STRUCT, 276 "UINT16": TokenType.USMALLINT, 277 "UINT32": TokenType.UINT, 278 "UINT64": TokenType.UBIGINT, 279 "UINT8": TokenType.UTINYINT, 280 "IPV4": TokenType.IPV4, 281 "IPV6": TokenType.IPV6, 282 "POINT": TokenType.POINT, 283 "RING": TokenType.RING, 284 "LINESTRING": TokenType.LINESTRING, 285 "MULTILINESTRING": TokenType.MULTILINESTRING, 286 "POLYGON": TokenType.POLYGON, 287 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 288 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 289 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 290 "SYSTEM": TokenType.COMMAND, 291 "PREWHERE": TokenType.PREWHERE, 292 } 293 KEYWORDS.pop("/*+") 294 295 SINGLE_TOKENS = { 296 **tokens.Tokenizer.SINGLE_TOKENS, 297 "$": TokenType.HEREDOC_STRING, 298 } 299 300 class Parser(parser.Parser): 301 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 302 # * select x from t1 union all select x from t2 limit 1; 303 # * select x from t1 union all (select x from t2 limit 1); 304 MODIFIERS_ATTACHED_TO_SET_OP = False 305 INTERVAL_SPANS = False 306 OPTIONAL_ALIAS_TOKEN_CTE = False 307 JOINS_HAVE_EQUAL_PRECEDENCE = True 308 309 FUNCTIONS = { 310 **parser.Parser.FUNCTIONS, 311 "ANY": exp.AnyValue.from_arg_list, 312 "ARRAYSUM": exp.ArraySum.from_arg_list, 313 "ARRAYREVERSE": exp.ArrayReverse.from_arg_list, 314 "ARRAYSLICE": exp.ArraySlice.from_arg_list, 315 "COUNTIF": _build_count_if, 316 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 317 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 318 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 319 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 320 "DATE_FORMAT": _build_datetime_format(exp.TimeToStr), 321 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 322 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 323 "FORMATDATETIME": _build_datetime_format(exp.TimeToStr), 324 "JSONEXTRACTSTRING": build_json_extract_path( 325 exp.JSONExtractScalar, zero_based_indexing=False 326 ), 327 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 328 "MAP": parser.build_var_map, 329 "MATCH": exp.RegexpLike.from_arg_list, 330 "PARSEDATETIME": _build_datetime_format(exp.ParseDatetime), 331 "RANDCANONICAL": exp.Rand.from_arg_list, 332 "STR_TO_DATE": _build_str_to_date, 333 "TUPLE": exp.Struct.from_arg_list, 334 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 335 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 336 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 337 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 338 "UNIQ": exp.ApproxDistinct.from_arg_list, 339 "XOR": lambda args: exp.Xor(expressions=args), 340 "MD5": exp.MD5Digest.from_arg_list, 341 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 342 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 343 "SUBSTRINGINDEX": exp.SubstringIndex.from_arg_list, 344 "TOTYPENAME": exp.Typeof.from_arg_list, 345 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 346 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 347 } 348 FUNCTIONS.pop("TRANSFORM") 349 350 AGG_FUNCTIONS = { 351 "count", 352 "min", 353 "max", 354 "sum", 355 "avg", 356 "any", 357 "stddevPop", 358 "stddevSamp", 359 "varPop", 360 "varSamp", 361 "corr", 362 "covarPop", 363 "covarSamp", 364 "entropy", 365 "exponentialMovingAverage", 366 "intervalLengthSum", 367 "kolmogorovSmirnovTest", 368 "mannWhitneyUTest", 369 "median", 370 "rankCorr", 371 "sumKahan", 372 "studentTTest", 373 "welchTTest", 374 "anyHeavy", 375 "anyLast", 376 "boundingRatio", 377 "first_value", 378 "last_value", 379 "argMin", 380 "argMax", 381 "avgWeighted", 382 "topK", 383 "topKWeighted", 384 "deltaSum", 385 "deltaSumTimestamp", 386 "groupArray", 387 "groupArrayLast", 388 "groupUniqArray", 389 "groupArrayInsertAt", 390 "groupArrayMovingAvg", 391 "groupArrayMovingSum", 392 "groupArraySample", 393 "groupBitAnd", 394 "groupBitOr", 395 "groupBitXor", 396 "groupBitmap", 397 "groupBitmapAnd", 398 "groupBitmapOr", 399 "groupBitmapXor", 400 "sumWithOverflow", 401 "sumMap", 402 "minMap", 403 "maxMap", 404 "skewSamp", 405 "skewPop", 406 "kurtSamp", 407 "kurtPop", 408 "uniq", 409 "uniqExact", 410 "uniqCombined", 411 "uniqCombined64", 412 "uniqHLL12", 413 "uniqTheta", 414 "quantile", 415 "quantiles", 416 "quantileExact", 417 "quantilesExact", 418 "quantileExactLow", 419 "quantilesExactLow", 420 "quantileExactHigh", 421 "quantilesExactHigh", 422 "quantileExactWeighted", 423 "quantilesExactWeighted", 424 "quantileTiming", 425 "quantilesTiming", 426 "quantileTimingWeighted", 427 "quantilesTimingWeighted", 428 "quantileDeterministic", 429 "quantilesDeterministic", 430 "quantileTDigest", 431 "quantilesTDigest", 432 "quantileTDigestWeighted", 433 "quantilesTDigestWeighted", 434 "quantileBFloat16", 435 "quantilesBFloat16", 436 "quantileBFloat16Weighted", 437 "quantilesBFloat16Weighted", 438 "simpleLinearRegression", 439 "stochasticLinearRegression", 440 "stochasticLogisticRegression", 441 "categoricalInformationValue", 442 "contingency", 443 "cramersV", 444 "cramersVBiasCorrected", 445 "theilsU", 446 "maxIntersections", 447 "maxIntersectionsPosition", 448 "meanZTest", 449 "quantileInterpolatedWeighted", 450 "quantilesInterpolatedWeighted", 451 "quantileGK", 452 "quantilesGK", 453 "sparkBar", 454 "sumCount", 455 "largestTriangleThreeBuckets", 456 "histogram", 457 "sequenceMatch", 458 "sequenceCount", 459 "windowFunnel", 460 "retention", 461 "uniqUpTo", 462 "sequenceNextNode", 463 "exponentialTimeDecayedAvg", 464 } 465 466 AGG_FUNCTIONS_SUFFIXES = [ 467 "If", 468 "Array", 469 "ArrayIf", 470 "Map", 471 "SimpleState", 472 "State", 473 "Merge", 474 "MergeState", 475 "ForEach", 476 "Distinct", 477 "OrDefault", 478 "OrNull", 479 "Resample", 480 "ArgMin", 481 "ArgMax", 482 ] 483 484 FUNC_TOKENS = { 485 *parser.Parser.FUNC_TOKENS, 486 TokenType.AND, 487 TokenType.OR, 488 TokenType.SET, 489 } 490 491 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 492 493 ID_VAR_TOKENS = { 494 *parser.Parser.ID_VAR_TOKENS, 495 TokenType.LIKE, 496 } 497 498 AGG_FUNC_MAPPING = ( 499 lambda functions, suffixes: { 500 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 501 } 502 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 503 504 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 505 506 FUNCTION_PARSERS = { 507 **parser.Parser.FUNCTION_PARSERS, 508 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 509 "QUANTILE": lambda self: self._parse_quantile(), 510 "MEDIAN": lambda self: self._parse_quantile(), 511 "COLUMNS": lambda self: self._parse_columns(), 512 } 513 514 FUNCTION_PARSERS.pop("MATCH") 515 516 PROPERTY_PARSERS = { 517 **parser.Parser.PROPERTY_PARSERS, 518 "ENGINE": lambda self: self._parse_engine_property(), 519 } 520 PROPERTY_PARSERS.pop("DYNAMIC") 521 522 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 523 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 524 525 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 526 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 527 528 RANGE_PARSERS = { 529 **parser.Parser.RANGE_PARSERS, 530 TokenType.GLOBAL: lambda self, this: self._parse_global_in(this), 531 } 532 533 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 534 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 535 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 536 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 537 538 JOIN_KINDS = { 539 *parser.Parser.JOIN_KINDS, 540 TokenType.ANY, 541 TokenType.ASOF, 542 TokenType.ARRAY, 543 } 544 545 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 546 TokenType.ANY, 547 TokenType.ARRAY, 548 TokenType.FINAL, 549 TokenType.FORMAT, 550 TokenType.SETTINGS, 551 } 552 553 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 554 TokenType.FORMAT, 555 } 556 557 LOG_DEFAULTS_TO_LN = True 558 559 QUERY_MODIFIER_PARSERS = { 560 **parser.Parser.QUERY_MODIFIER_PARSERS, 561 TokenType.SETTINGS: lambda self: ( 562 "settings", 563 self._advance() or self._parse_csv(self._parse_assignment), 564 ), 565 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 566 } 567 568 CONSTRAINT_PARSERS = { 569 **parser.Parser.CONSTRAINT_PARSERS, 570 "INDEX": lambda self: self._parse_index_constraint(), 571 "CODEC": lambda self: self._parse_compress(), 572 } 573 574 ALTER_PARSERS = { 575 **parser.Parser.ALTER_PARSERS, 576 "REPLACE": lambda self: self._parse_alter_table_replace(), 577 } 578 579 SCHEMA_UNNAMED_CONSTRAINTS = { 580 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 581 "INDEX", 582 } 583 584 PLACEHOLDER_PARSERS = { 585 **parser.Parser.PLACEHOLDER_PARSERS, 586 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 587 } 588 589 def _parse_engine_property(self) -> exp.EngineProperty: 590 self._match(TokenType.EQ) 591 return self.expression( 592 exp.EngineProperty, 593 this=self._parse_field(any_token=True, anonymous_func=True), 594 ) 595 596 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 597 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 598 return self._parse_lambda() 599 600 def _parse_types( 601 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 602 ) -> t.Optional[exp.Expression]: 603 dtype = super()._parse_types( 604 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 605 ) 606 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 607 # Mark every type as non-nullable which is ClickHouse's default, unless it's 608 # already marked as nullable. This marker helps us transpile types from other 609 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 610 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 611 # fail in ClickHouse without the `Nullable` type constructor. 612 dtype.set("nullable", False) 613 614 return dtype 615 616 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 617 index = self._index 618 this = self._parse_bitwise() 619 if self._match(TokenType.FROM): 620 self._retreat(index) 621 return super()._parse_extract() 622 623 # We return Anonymous here because extract and regexpExtract have different semantics, 624 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 625 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 626 # 627 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 628 self._match(TokenType.COMMA) 629 return self.expression( 630 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 631 ) 632 633 def _parse_assignment(self) -> t.Optional[exp.Expression]: 634 this = super()._parse_assignment() 635 636 if self._match(TokenType.PLACEHOLDER): 637 return self.expression( 638 exp.If, 639 this=this, 640 true=self._parse_assignment(), 641 false=self._match(TokenType.COLON) and self._parse_assignment(), 642 ) 643 644 return this 645 646 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 647 """ 648 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 649 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 650 """ 651 index = self._index 652 653 this = self._parse_id_var() 654 self._match(TokenType.COLON) 655 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 656 self._match_text_seq("IDENTIFIER") and "Identifier" 657 ) 658 659 if not kind: 660 self._retreat(index) 661 return None 662 elif not self._match(TokenType.R_BRACE): 663 self.raise_error("Expecting }") 664 665 if isinstance(this, exp.Identifier) and not this.quoted: 666 this = exp.var(this.name) 667 668 return self.expression(exp.Placeholder, this=this, kind=kind) 669 670 def _parse_bracket( 671 self, this: t.Optional[exp.Expression] = None 672 ) -> t.Optional[exp.Expression]: 673 l_brace = self._match(TokenType.L_BRACE, advance=False) 674 bracket = super()._parse_bracket(this) 675 676 if l_brace and isinstance(bracket, exp.Struct): 677 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 678 for expression in bracket.expressions: 679 if not isinstance(expression, exp.PropertyEQ): 680 break 681 682 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 683 varmap.args["values"].append("expressions", expression.expression) 684 685 return varmap 686 687 return bracket 688 689 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 690 this = super()._parse_in(this) 691 this.set("is_global", is_global) 692 return this 693 694 def _parse_global_in(self, this: t.Optional[exp.Expression]) -> exp.Not | exp.In: 695 is_negated = self._match(TokenType.NOT) 696 this = self._match(TokenType.IN) and self._parse_in(this, is_global=True) 697 return self.expression(exp.Not, this=this) if is_negated else this 698 699 def _parse_table( 700 self, 701 schema: bool = False, 702 joins: bool = False, 703 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 704 parse_bracket: bool = False, 705 is_db_reference: bool = False, 706 parse_partition: bool = False, 707 consume_pipe: bool = False, 708 ) -> t.Optional[exp.Expression]: 709 this = super()._parse_table( 710 schema=schema, 711 joins=joins, 712 alias_tokens=alias_tokens, 713 parse_bracket=parse_bracket, 714 is_db_reference=is_db_reference, 715 ) 716 717 if isinstance(this, exp.Table): 718 inner = this.this 719 alias = this.args.get("alias") 720 721 if isinstance(inner, exp.GenerateSeries) and alias and not alias.columns: 722 alias.set("columns", [exp.to_identifier("generate_series")]) 723 724 if self._match(TokenType.FINAL): 725 this = self.expression(exp.Final, this=this) 726 727 return this 728 729 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 730 return super()._parse_position(haystack_first=True) 731 732 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 733 def _parse_cte(self) -> t.Optional[exp.CTE]: 734 # WITH <identifier> AS <subquery expression> 735 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 736 737 if not cte: 738 # WITH <expression> AS <identifier> 739 cte = self.expression( 740 exp.CTE, 741 this=self._parse_assignment(), 742 alias=self._parse_table_alias(), 743 scalar=True, 744 ) 745 746 return cte 747 748 def _parse_join_parts( 749 self, 750 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 751 is_global = self._match(TokenType.GLOBAL) and self._prev 752 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 753 754 if kind_pre: 755 kind = self._match_set(self.JOIN_KINDS) and self._prev 756 side = self._match_set(self.JOIN_SIDES) and self._prev 757 return is_global, side, kind 758 759 return ( 760 is_global, 761 self._match_set(self.JOIN_SIDES) and self._prev, 762 self._match_set(self.JOIN_KINDS) and self._prev, 763 ) 764 765 def _parse_join( 766 self, skip_join_token: bool = False, parse_bracket: bool = False 767 ) -> t.Optional[exp.Join]: 768 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 769 if join: 770 join.set("global", join.args.pop("method", None)) 771 772 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 773 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 774 if join.kind == "ARRAY": 775 for table in join.find_all(exp.Table): 776 table.replace(table.to_column()) 777 778 return join 779 780 def _parse_function( 781 self, 782 functions: t.Optional[t.Dict[str, t.Callable]] = None, 783 anonymous: bool = False, 784 optional_parens: bool = True, 785 any_token: bool = False, 786 ) -> t.Optional[exp.Expression]: 787 expr = super()._parse_function( 788 functions=functions, 789 anonymous=anonymous, 790 optional_parens=optional_parens, 791 any_token=any_token, 792 ) 793 794 func = expr.this if isinstance(expr, exp.Window) else expr 795 796 # Aggregate functions can be split in 2 parts: <func_name><suffix> 797 parts = ( 798 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 799 ) 800 801 if parts: 802 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 803 params = self._parse_func_params(anon_func) 804 805 kwargs = { 806 "this": anon_func.this, 807 "expressions": anon_func.expressions, 808 } 809 if parts[1]: 810 exp_class: t.Type[exp.Expression] = ( 811 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 812 ) 813 else: 814 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 815 816 kwargs["exp_class"] = exp_class 817 if params: 818 kwargs["params"] = params 819 820 func = self.expression(**kwargs) 821 822 if isinstance(expr, exp.Window): 823 # The window's func was parsed as Anonymous in base parser, fix its 824 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 825 expr.set("this", func) 826 elif params: 827 # Params have blocked super()._parse_function() from parsing the following window 828 # (if that exists) as they're standing between the function call and the window spec 829 expr = self._parse_window(func) 830 else: 831 expr = func 832 833 return expr 834 835 def _parse_func_params( 836 self, this: t.Optional[exp.Func] = None 837 ) -> t.Optional[t.List[exp.Expression]]: 838 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 839 return self._parse_csv(self._parse_lambda) 840 841 if self._match(TokenType.L_PAREN): 842 params = self._parse_csv(self._parse_lambda) 843 self._match_r_paren(this) 844 return params 845 846 return None 847 848 def _parse_quantile(self) -> exp.Quantile: 849 this = self._parse_lambda() 850 params = self._parse_func_params() 851 if params: 852 return self.expression(exp.Quantile, this=params[0], quantile=this) 853 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 854 855 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 856 return super()._parse_wrapped_id_vars(optional=True) 857 858 def _parse_primary_key( 859 self, wrapped_optional: bool = False, in_props: bool = False 860 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 861 return super()._parse_primary_key( 862 wrapped_optional=wrapped_optional or in_props, in_props=in_props 863 ) 864 865 def _parse_on_property(self) -> t.Optional[exp.Expression]: 866 index = self._index 867 if self._match_text_seq("CLUSTER"): 868 this = self._parse_string() or self._parse_id_var() 869 if this: 870 return self.expression(exp.OnCluster, this=this) 871 else: 872 self._retreat(index) 873 return None 874 875 def _parse_index_constraint( 876 self, kind: t.Optional[str] = None 877 ) -> exp.IndexColumnConstraint: 878 # INDEX name1 expr TYPE type1(args) GRANULARITY value 879 this = self._parse_id_var() 880 expression = self._parse_assignment() 881 882 index_type = self._match_text_seq("TYPE") and ( 883 self._parse_function() or self._parse_var() 884 ) 885 886 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 887 888 return self.expression( 889 exp.IndexColumnConstraint, 890 this=this, 891 expression=expression, 892 index_type=index_type, 893 granularity=granularity, 894 ) 895 896 def _parse_partition(self) -> t.Optional[exp.Partition]: 897 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 898 if not self._match(TokenType.PARTITION): 899 return None 900 901 if self._match_text_seq("ID"): 902 # Corresponds to the PARTITION ID <string_value> syntax 903 expressions: t.List[exp.Expression] = [ 904 self.expression(exp.PartitionId, this=self._parse_string()) 905 ] 906 else: 907 expressions = self._parse_expressions() 908 909 return self.expression(exp.Partition, expressions=expressions) 910 911 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 912 partition = self._parse_partition() 913 914 if not partition or not self._match(TokenType.FROM): 915 return None 916 917 return self.expression( 918 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 919 ) 920 921 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 922 if not self._match_text_seq("PROJECTION"): 923 return None 924 925 return self.expression( 926 exp.ProjectionDef, 927 this=self._parse_id_var(), 928 expression=self._parse_wrapped(self._parse_statement), 929 ) 930 931 def _parse_constraint(self) -> t.Optional[exp.Expression]: 932 return super()._parse_constraint() or self._parse_projection_def() 933 934 def _parse_alias( 935 self, this: t.Optional[exp.Expression], explicit: bool = False 936 ) -> t.Optional[exp.Expression]: 937 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 938 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 939 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 940 return this 941 942 return super()._parse_alias(this=this, explicit=explicit) 943 944 def _parse_expression(self) -> t.Optional[exp.Expression]: 945 this = super()._parse_expression() 946 947 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 948 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 949 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 950 self._match(TokenType.R_PAREN) 951 952 return this 953 954 def _parse_columns(self) -> exp.Expression: 955 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 956 957 while self._next and self._match_text_seq(")", "APPLY", "("): 958 self._match(TokenType.R_PAREN) 959 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 960 return this 961 962 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 963 value = super()._parse_value(values=values) 964 if not value: 965 return None 966 967 # In Clickhouse "SELECT * FROM VALUES (1, 2, 3)" generates a table with a single column, in contrast 968 # to other dialects. For this case, we canonicalize the values into a tuple-of-tuples AST if it's not already one. 969 # In INSERT INTO statements the same clause actually references multiple columns (opposite semantics), 970 # but the final result is not altered by the extra parentheses. 971 # Note: Clickhouse allows VALUES([structure], value, ...) so the branch checks for the last expression 972 expressions = value.expressions 973 if values and not isinstance(expressions[-1], exp.Tuple): 974 value.set( 975 "expressions", 976 [self.expression(exp.Tuple, expressions=[expr]) for expr in expressions], 977 ) 978 979 return value 980 981 class Generator(generator.Generator): 982 QUERY_HINTS = False 983 STRUCT_DELIMITER = ("(", ")") 984 NVL2_SUPPORTED = False 985 TABLESAMPLE_REQUIRES_PARENS = False 986 TABLESAMPLE_SIZE_IS_ROWS = False 987 TABLESAMPLE_KEYWORDS = "SAMPLE" 988 LAST_DAY_SUPPORTS_DATE_PART = False 989 CAN_IMPLEMENT_ARRAY_ANY = True 990 SUPPORTS_TO_NUMBER = False 991 JOIN_HINTS = False 992 TABLE_HINTS = False 993 GROUPINGS_SEP = "" 994 SET_OP_MODIFIERS = False 995 ARRAY_SIZE_NAME = "LENGTH" 996 WRAP_DERIVED_VALUES = False 997 998 STRING_TYPE_MAPPING = { 999 exp.DataType.Type.BLOB: "String", 1000 exp.DataType.Type.CHAR: "String", 1001 exp.DataType.Type.LONGBLOB: "String", 1002 exp.DataType.Type.LONGTEXT: "String", 1003 exp.DataType.Type.MEDIUMBLOB: "String", 1004 exp.DataType.Type.MEDIUMTEXT: "String", 1005 exp.DataType.Type.TINYBLOB: "String", 1006 exp.DataType.Type.TINYTEXT: "String", 1007 exp.DataType.Type.TEXT: "String", 1008 exp.DataType.Type.VARBINARY: "String", 1009 exp.DataType.Type.VARCHAR: "String", 1010 } 1011 1012 SUPPORTED_JSON_PATH_PARTS = { 1013 exp.JSONPathKey, 1014 exp.JSONPathRoot, 1015 exp.JSONPathSubscript, 1016 } 1017 1018 TYPE_MAPPING = { 1019 **generator.Generator.TYPE_MAPPING, 1020 **STRING_TYPE_MAPPING, 1021 exp.DataType.Type.ARRAY: "Array", 1022 exp.DataType.Type.BOOLEAN: "Bool", 1023 exp.DataType.Type.BIGINT: "Int64", 1024 exp.DataType.Type.DATE32: "Date32", 1025 exp.DataType.Type.DATETIME: "DateTime", 1026 exp.DataType.Type.DATETIME2: "DateTime", 1027 exp.DataType.Type.SMALLDATETIME: "DateTime", 1028 exp.DataType.Type.DATETIME64: "DateTime64", 1029 exp.DataType.Type.DECIMAL: "Decimal", 1030 exp.DataType.Type.DECIMAL32: "Decimal32", 1031 exp.DataType.Type.DECIMAL64: "Decimal64", 1032 exp.DataType.Type.DECIMAL128: "Decimal128", 1033 exp.DataType.Type.DECIMAL256: "Decimal256", 1034 exp.DataType.Type.TIMESTAMP: "DateTime", 1035 exp.DataType.Type.TIMESTAMPNTZ: "DateTime", 1036 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 1037 exp.DataType.Type.DOUBLE: "Float64", 1038 exp.DataType.Type.ENUM: "Enum", 1039 exp.DataType.Type.ENUM8: "Enum8", 1040 exp.DataType.Type.ENUM16: "Enum16", 1041 exp.DataType.Type.FIXEDSTRING: "FixedString", 1042 exp.DataType.Type.FLOAT: "Float32", 1043 exp.DataType.Type.INT: "Int32", 1044 exp.DataType.Type.MEDIUMINT: "Int32", 1045 exp.DataType.Type.INT128: "Int128", 1046 exp.DataType.Type.INT256: "Int256", 1047 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 1048 exp.DataType.Type.MAP: "Map", 1049 exp.DataType.Type.NESTED: "Nested", 1050 exp.DataType.Type.NOTHING: "Nothing", 1051 exp.DataType.Type.SMALLINT: "Int16", 1052 exp.DataType.Type.STRUCT: "Tuple", 1053 exp.DataType.Type.TINYINT: "Int8", 1054 exp.DataType.Type.UBIGINT: "UInt64", 1055 exp.DataType.Type.UINT: "UInt32", 1056 exp.DataType.Type.UINT128: "UInt128", 1057 exp.DataType.Type.UINT256: "UInt256", 1058 exp.DataType.Type.USMALLINT: "UInt16", 1059 exp.DataType.Type.UTINYINT: "UInt8", 1060 exp.DataType.Type.IPV4: "IPv4", 1061 exp.DataType.Type.IPV6: "IPv6", 1062 exp.DataType.Type.POINT: "Point", 1063 exp.DataType.Type.RING: "Ring", 1064 exp.DataType.Type.LINESTRING: "LineString", 1065 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 1066 exp.DataType.Type.POLYGON: "Polygon", 1067 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 1068 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 1069 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 1070 exp.DataType.Type.DYNAMIC: "Dynamic", 1071 } 1072 1073 TRANSFORMS = { 1074 **generator.Generator.TRANSFORMS, 1075 exp.AnyValue: rename_func("any"), 1076 exp.ApproxDistinct: rename_func("uniq"), 1077 exp.ArrayConcat: rename_func("arrayConcat"), 1078 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 1079 exp.ArrayRemove: remove_from_array_using_filter, 1080 exp.ArrayReverse: rename_func("arrayReverse"), 1081 exp.ArraySlice: rename_func("arraySlice"), 1082 exp.ArraySum: rename_func("arraySum"), 1083 exp.ArgMax: arg_max_or_min_no_count("argMax"), 1084 exp.ArgMin: arg_max_or_min_no_count("argMin"), 1085 exp.Array: inline_array_sql, 1086 exp.CastToStrType: rename_func("CAST"), 1087 exp.CountIf: rename_func("countIf"), 1088 exp.CompressColumnConstraint: lambda self, 1089 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 1090 exp.ComputedColumnConstraint: lambda self, 1091 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 1092 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 1093 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 1094 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 1095 exp.DateStrToDate: rename_func("toDate"), 1096 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 1097 exp.Explode: rename_func("arrayJoin"), 1098 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 1099 exp.IsNan: rename_func("isNaN"), 1100 exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}", 1101 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 1102 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 1103 exp.JSONPathKey: json_path_key_only_name, 1104 exp.JSONPathRoot: lambda *_: "", 1105 exp.Length: length_or_char_length_sql, 1106 exp.Map: _map_sql, 1107 exp.Median: rename_func("median"), 1108 exp.Nullif: rename_func("nullIf"), 1109 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1110 exp.Pivot: no_pivot_sql, 1111 exp.Quantile: _quantile_sql, 1112 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 1113 exp.Rand: rename_func("randCanonical"), 1114 exp.StartsWith: rename_func("startsWith"), 1115 exp.EndsWith: rename_func("endsWith"), 1116 exp.StrPosition: lambda self, e: strposition_sql( 1117 self, 1118 e, 1119 func_name="POSITION", 1120 supports_position=True, 1121 use_ansi_position=False, 1122 ), 1123 exp.TimeToStr: lambda self, e: self.func( 1124 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 1125 ), 1126 exp.TimeStrToTime: _timestrtotime_sql, 1127 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1128 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1129 exp.Typeof: rename_func("toTypeName"), 1130 exp.VarMap: _map_sql, 1131 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1132 exp.MD5Digest: rename_func("MD5"), 1133 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1134 exp.SHA: rename_func("SHA1"), 1135 exp.SHA2: sha256_sql, 1136 exp.UnixToTime: _unix_to_time_sql, 1137 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 1138 exp.Trim: lambda self, e: trim_sql(self, e, default_trim_type="BOTH"), 1139 exp.Variance: rename_func("varSamp"), 1140 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1141 exp.Stddev: rename_func("stddevSamp"), 1142 exp.Chr: rename_func("CHAR"), 1143 exp.Lag: lambda self, e: self.func( 1144 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1145 ), 1146 exp.Lead: lambda self, e: self.func( 1147 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1148 ), 1149 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1150 rename_func("editDistance") 1151 ), 1152 exp.ParseDatetime: rename_func("parseDateTime"), 1153 } 1154 1155 PROPERTIES_LOCATION = { 1156 **generator.Generator.PROPERTIES_LOCATION, 1157 exp.OnCluster: exp.Properties.Location.POST_NAME, 1158 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1159 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1160 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1161 } 1162 1163 # There's no list in docs, but it can be found in Clickhouse code 1164 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1165 ON_CLUSTER_TARGETS = { 1166 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1167 "DATABASE", 1168 "TABLE", 1169 "VIEW", 1170 "DICTIONARY", 1171 "INDEX", 1172 "FUNCTION", 1173 "NAMED COLLECTION", 1174 } 1175 1176 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1177 NON_NULLABLE_TYPES = { 1178 exp.DataType.Type.ARRAY, 1179 exp.DataType.Type.MAP, 1180 exp.DataType.Type.STRUCT, 1181 exp.DataType.Type.POINT, 1182 exp.DataType.Type.RING, 1183 exp.DataType.Type.LINESTRING, 1184 exp.DataType.Type.MULTILINESTRING, 1185 exp.DataType.Type.POLYGON, 1186 exp.DataType.Type.MULTIPOLYGON, 1187 } 1188 1189 def offset_sql(self, expression: exp.Offset) -> str: 1190 offset = super().offset_sql(expression) 1191 1192 # OFFSET ... FETCH syntax requires a "ROW" or "ROWS" keyword 1193 # https://clickhouse.com/docs/sql-reference/statements/select/offset 1194 parent = expression.parent 1195 if isinstance(parent, exp.Select) and isinstance(parent.args.get("limit"), exp.Fetch): 1196 offset = f"{offset} ROWS" 1197 1198 return offset 1199 1200 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1201 strtodate_sql = self.function_fallback_sql(expression) 1202 1203 if not isinstance(expression.parent, exp.Cast): 1204 # StrToDate returns DATEs in other dialects (eg. postgres), so 1205 # this branch aims to improve the transpilation to clickhouse 1206 return self.cast_sql(exp.cast(expression, "DATE")) 1207 1208 return strtodate_sql 1209 1210 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1211 this = expression.this 1212 1213 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1214 return self.sql(this) 1215 1216 return super().cast_sql(expression, safe_prefix=safe_prefix) 1217 1218 def trycast_sql(self, expression: exp.TryCast) -> str: 1219 dtype = expression.to 1220 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1221 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1222 dtype.set("nullable", True) 1223 1224 return super().cast_sql(expression) 1225 1226 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1227 this = self.json_path_part(expression.this) 1228 return str(int(this) + 1) if is_int(this) else this 1229 1230 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1231 return f"AS {self.sql(expression, 'this')}" 1232 1233 def _any_to_has( 1234 self, 1235 expression: exp.EQ | exp.NEQ, 1236 default: t.Callable[[t.Any], str], 1237 prefix: str = "", 1238 ) -> str: 1239 if isinstance(expression.left, exp.Any): 1240 arr = expression.left 1241 this = expression.right 1242 elif isinstance(expression.right, exp.Any): 1243 arr = expression.right 1244 this = expression.left 1245 else: 1246 return default(expression) 1247 1248 return prefix + self.func("has", arr.this.unnest(), this) 1249 1250 def eq_sql(self, expression: exp.EQ) -> str: 1251 return self._any_to_has(expression, super().eq_sql) 1252 1253 def neq_sql(self, expression: exp.NEQ) -> str: 1254 return self._any_to_has(expression, super().neq_sql, "NOT ") 1255 1256 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1257 # Manually add a flag to make the search case-insensitive 1258 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1259 return self.func("match", expression.this, regex) 1260 1261 def datatype_sql(self, expression: exp.DataType) -> str: 1262 # String is the standard ClickHouse type, every other variant is just an alias. 1263 # Additionally, any supplied length parameter will be ignored. 1264 # 1265 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1266 if expression.this in self.STRING_TYPE_MAPPING: 1267 dtype = "String" 1268 else: 1269 dtype = super().datatype_sql(expression) 1270 1271 # This section changes the type to `Nullable(...)` if the following conditions hold: 1272 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1273 # and change their semantics 1274 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1275 # constraint: "Type of Map key must be a type, that can be represented by integer or 1276 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1277 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1278 parent = expression.parent 1279 nullable = expression.args.get("nullable") 1280 if nullable is True or ( 1281 nullable is None 1282 and not ( 1283 isinstance(parent, exp.DataType) 1284 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1285 and expression.index in (None, 0) 1286 ) 1287 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1288 ): 1289 dtype = f"Nullable({dtype})" 1290 1291 return dtype 1292 1293 def cte_sql(self, expression: exp.CTE) -> str: 1294 if expression.args.get("scalar"): 1295 this = self.sql(expression, "this") 1296 alias = self.sql(expression, "alias") 1297 return f"{this} AS {alias}" 1298 1299 return super().cte_sql(expression) 1300 1301 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1302 return super().after_limit_modifiers(expression) + [ 1303 ( 1304 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1305 if expression.args.get("settings") 1306 else "" 1307 ), 1308 ( 1309 self.seg("FORMAT ") + self.sql(expression, "format") 1310 if expression.args.get("format") 1311 else "" 1312 ), 1313 ] 1314 1315 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1316 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1317 1318 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1319 return f"ON CLUSTER {self.sql(expression, 'this')}" 1320 1321 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1322 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1323 exp.Properties.Location.POST_NAME 1324 ): 1325 this_name = self.sql( 1326 expression.this if isinstance(expression.this, exp.Schema) else expression, 1327 "this", 1328 ) 1329 this_properties = " ".join( 1330 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1331 ) 1332 this_schema = self.schema_columns_sql(expression.this) 1333 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1334 1335 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1336 1337 return super().createable_sql(expression, locations) 1338 1339 def create_sql(self, expression: exp.Create) -> str: 1340 # The comment property comes last in CTAS statements, i.e. after the query 1341 query = expression.expression 1342 if isinstance(query, exp.Query): 1343 comment_prop = expression.find(exp.SchemaCommentProperty) 1344 if comment_prop: 1345 comment_prop.pop() 1346 query.replace(exp.paren(query)) 1347 else: 1348 comment_prop = None 1349 1350 create_sql = super().create_sql(expression) 1351 1352 comment_sql = self.sql(comment_prop) 1353 comment_sql = f" {comment_sql}" if comment_sql else "" 1354 1355 return f"{create_sql}{comment_sql}" 1356 1357 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1358 this = self.indent(self.sql(expression, "this")) 1359 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1360 1361 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1362 this = self.sql(expression, "this") 1363 this = f" {this}" if this else "" 1364 expr = self.sql(expression, "expression") 1365 expr = f" {expr}" if expr else "" 1366 index_type = self.sql(expression, "index_type") 1367 index_type = f" TYPE {index_type}" if index_type else "" 1368 granularity = self.sql(expression, "granularity") 1369 granularity = f" GRANULARITY {granularity}" if granularity else "" 1370 1371 return f"INDEX{this}{expr}{index_type}{granularity}" 1372 1373 def partition_sql(self, expression: exp.Partition) -> str: 1374 return f"PARTITION {self.expressions(expression, flat=True)}" 1375 1376 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1377 return f"ID {self.sql(expression.this)}" 1378 1379 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1380 return ( 1381 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1382 ) 1383 1384 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1385 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1386 1387 def is_sql(self, expression: exp.Is) -> str: 1388 is_sql = super().is_sql(expression) 1389 1390 if isinstance(expression.parent, exp.Not): 1391 # value IS NOT NULL -> NOT (value IS NULL) 1392 is_sql = self.wrap(is_sql) 1393 1394 return is_sql 1395 1396 def in_sql(self, expression: exp.In) -> str: 1397 in_sql = super().in_sql(expression) 1398 1399 if isinstance(expression.parent, exp.Not) and expression.args.get("is_global"): 1400 in_sql = in_sql.replace("GLOBAL IN", "GLOBAL NOT IN", 1) 1401 1402 return in_sql 1403 1404 def not_sql(self, expression: exp.Not) -> str: 1405 if isinstance(expression.this, exp.In) and expression.this.args.get("is_global"): 1406 # let `GLOBAL IN` child interpose `NOT` 1407 return self.sql(expression, "this") 1408 1409 return super().not_sql(expression) 1410 1411 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1412 # If the VALUES clause contains tuples of expressions, we need to treat it 1413 # as a table since Clickhouse will automatically alias it as such. 1414 alias = expression.args.get("alias") 1415 1416 if alias and alias.args.get("columns") and expression.expressions: 1417 values = expression.expressions[0].expressions 1418 values_as_table = any(isinstance(value, exp.Tuple) for value in values) 1419 else: 1420 values_as_table = True 1421 1422 return super().values_sql(expression, values_as_table=values_as_table)
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether the name of the function should be preserved inside the node's metadata, can be useful for roundtripping deprecated vs new functions that share an AST node e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery
Whether number literals can include underscores for better readability
Whether hex strings such as x'CC' evaluate to integer or binary/blob type
Specifies the strategy according to which identifiers should be normalized.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse equivalent of CREATE SCHEMA is CREATE DATABASE.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
219 def generate_values_aliases(self, expression: exp.Values) -> t.List[exp.Identifier]: 220 # Clickhouse allows VALUES to have an embedded structure e.g: 221 # VALUES('person String, place String', ('Noah', 'Paris'), ...) 222 # In this case, we don't want to qualify the columns 223 values = expression.expressions[0].expressions 224 225 structure = ( 226 values[0] 227 if (len(values) > 1 and values[0].is_string and isinstance(values[1], exp.Tuple)) 228 else None 229 ) 230 if structure: 231 # Split each column definition into the column name e.g: 232 # 'person String, place String' -> ['person', 'place'] 233 structure_coldefs = [coldef.strip() for coldef in structure.name.split(",")] 234 column_aliases = [ 235 exp.to_identifier(coldef.split(" ")[0]) for coldef in structure_coldefs 236 ] 237 else: 238 # Default column aliases in CH are "c1", "c2", etc. 239 column_aliases = [ 240 exp.to_identifier(f"c{i + 1}") for i in range(len(values[0].expressions)) 241 ] 242 243 return column_aliases
245 class Tokenizer(tokens.Tokenizer): 246 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 247 IDENTIFIERS = ['"', "`"] 248 IDENTIFIER_ESCAPES = ["\\"] 249 STRING_ESCAPES = ["'", "\\"] 250 BIT_STRINGS = [("0b", "")] 251 HEX_STRINGS = [("0x", ""), ("0X", "")] 252 HEREDOC_STRINGS = ["$"] 253 254 KEYWORDS = { 255 **tokens.Tokenizer.KEYWORDS, 256 ".:": TokenType.DOTCOLON, 257 "ATTACH": TokenType.COMMAND, 258 "DATE32": TokenType.DATE32, 259 "DATETIME64": TokenType.DATETIME64, 260 "DICTIONARY": TokenType.DICTIONARY, 261 "DYNAMIC": TokenType.DYNAMIC, 262 "ENUM8": TokenType.ENUM8, 263 "ENUM16": TokenType.ENUM16, 264 "EXCHANGE": TokenType.COMMAND, 265 "FINAL": TokenType.FINAL, 266 "FIXEDSTRING": TokenType.FIXEDSTRING, 267 "FLOAT32": TokenType.FLOAT, 268 "FLOAT64": TokenType.DOUBLE, 269 "GLOBAL": TokenType.GLOBAL, 270 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 271 "MAP": TokenType.MAP, 272 "NESTED": TokenType.NESTED, 273 "NOTHING": TokenType.NOTHING, 274 "SAMPLE": TokenType.TABLE_SAMPLE, 275 "TUPLE": TokenType.STRUCT, 276 "UINT16": TokenType.USMALLINT, 277 "UINT32": TokenType.UINT, 278 "UINT64": TokenType.UBIGINT, 279 "UINT8": TokenType.UTINYINT, 280 "IPV4": TokenType.IPV4, 281 "IPV6": TokenType.IPV6, 282 "POINT": TokenType.POINT, 283 "RING": TokenType.RING, 284 "LINESTRING": TokenType.LINESTRING, 285 "MULTILINESTRING": TokenType.MULTILINESTRING, 286 "POLYGON": TokenType.POLYGON, 287 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 288 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 289 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 290 "SYSTEM": TokenType.COMMAND, 291 "PREWHERE": TokenType.PREWHERE, 292 } 293 KEYWORDS.pop("/*+") 294 295 SINGLE_TOKENS = { 296 **tokens.Tokenizer.SINGLE_TOKENS, 297 "$": TokenType.HEREDOC_STRING, 298 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BYTE_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
300 class Parser(parser.Parser): 301 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 302 # * select x from t1 union all select x from t2 limit 1; 303 # * select x from t1 union all (select x from t2 limit 1); 304 MODIFIERS_ATTACHED_TO_SET_OP = False 305 INTERVAL_SPANS = False 306 OPTIONAL_ALIAS_TOKEN_CTE = False 307 JOINS_HAVE_EQUAL_PRECEDENCE = True 308 309 FUNCTIONS = { 310 **parser.Parser.FUNCTIONS, 311 "ANY": exp.AnyValue.from_arg_list, 312 "ARRAYSUM": exp.ArraySum.from_arg_list, 313 "ARRAYREVERSE": exp.ArrayReverse.from_arg_list, 314 "ARRAYSLICE": exp.ArraySlice.from_arg_list, 315 "COUNTIF": _build_count_if, 316 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 317 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 318 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 319 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 320 "DATE_FORMAT": _build_datetime_format(exp.TimeToStr), 321 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 322 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 323 "FORMATDATETIME": _build_datetime_format(exp.TimeToStr), 324 "JSONEXTRACTSTRING": build_json_extract_path( 325 exp.JSONExtractScalar, zero_based_indexing=False 326 ), 327 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 328 "MAP": parser.build_var_map, 329 "MATCH": exp.RegexpLike.from_arg_list, 330 "PARSEDATETIME": _build_datetime_format(exp.ParseDatetime), 331 "RANDCANONICAL": exp.Rand.from_arg_list, 332 "STR_TO_DATE": _build_str_to_date, 333 "TUPLE": exp.Struct.from_arg_list, 334 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 335 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 336 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 337 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 338 "UNIQ": exp.ApproxDistinct.from_arg_list, 339 "XOR": lambda args: exp.Xor(expressions=args), 340 "MD5": exp.MD5Digest.from_arg_list, 341 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 342 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 343 "SUBSTRINGINDEX": exp.SubstringIndex.from_arg_list, 344 "TOTYPENAME": exp.Typeof.from_arg_list, 345 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 346 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 347 } 348 FUNCTIONS.pop("TRANSFORM") 349 350 AGG_FUNCTIONS = { 351 "count", 352 "min", 353 "max", 354 "sum", 355 "avg", 356 "any", 357 "stddevPop", 358 "stddevSamp", 359 "varPop", 360 "varSamp", 361 "corr", 362 "covarPop", 363 "covarSamp", 364 "entropy", 365 "exponentialMovingAverage", 366 "intervalLengthSum", 367 "kolmogorovSmirnovTest", 368 "mannWhitneyUTest", 369 "median", 370 "rankCorr", 371 "sumKahan", 372 "studentTTest", 373 "welchTTest", 374 "anyHeavy", 375 "anyLast", 376 "boundingRatio", 377 "first_value", 378 "last_value", 379 "argMin", 380 "argMax", 381 "avgWeighted", 382 "topK", 383 "topKWeighted", 384 "deltaSum", 385 "deltaSumTimestamp", 386 "groupArray", 387 "groupArrayLast", 388 "groupUniqArray", 389 "groupArrayInsertAt", 390 "groupArrayMovingAvg", 391 "groupArrayMovingSum", 392 "groupArraySample", 393 "groupBitAnd", 394 "groupBitOr", 395 "groupBitXor", 396 "groupBitmap", 397 "groupBitmapAnd", 398 "groupBitmapOr", 399 "groupBitmapXor", 400 "sumWithOverflow", 401 "sumMap", 402 "minMap", 403 "maxMap", 404 "skewSamp", 405 "skewPop", 406 "kurtSamp", 407 "kurtPop", 408 "uniq", 409 "uniqExact", 410 "uniqCombined", 411 "uniqCombined64", 412 "uniqHLL12", 413 "uniqTheta", 414 "quantile", 415 "quantiles", 416 "quantileExact", 417 "quantilesExact", 418 "quantileExactLow", 419 "quantilesExactLow", 420 "quantileExactHigh", 421 "quantilesExactHigh", 422 "quantileExactWeighted", 423 "quantilesExactWeighted", 424 "quantileTiming", 425 "quantilesTiming", 426 "quantileTimingWeighted", 427 "quantilesTimingWeighted", 428 "quantileDeterministic", 429 "quantilesDeterministic", 430 "quantileTDigest", 431 "quantilesTDigest", 432 "quantileTDigestWeighted", 433 "quantilesTDigestWeighted", 434 "quantileBFloat16", 435 "quantilesBFloat16", 436 "quantileBFloat16Weighted", 437 "quantilesBFloat16Weighted", 438 "simpleLinearRegression", 439 "stochasticLinearRegression", 440 "stochasticLogisticRegression", 441 "categoricalInformationValue", 442 "contingency", 443 "cramersV", 444 "cramersVBiasCorrected", 445 "theilsU", 446 "maxIntersections", 447 "maxIntersectionsPosition", 448 "meanZTest", 449 "quantileInterpolatedWeighted", 450 "quantilesInterpolatedWeighted", 451 "quantileGK", 452 "quantilesGK", 453 "sparkBar", 454 "sumCount", 455 "largestTriangleThreeBuckets", 456 "histogram", 457 "sequenceMatch", 458 "sequenceCount", 459 "windowFunnel", 460 "retention", 461 "uniqUpTo", 462 "sequenceNextNode", 463 "exponentialTimeDecayedAvg", 464 } 465 466 AGG_FUNCTIONS_SUFFIXES = [ 467 "If", 468 "Array", 469 "ArrayIf", 470 "Map", 471 "SimpleState", 472 "State", 473 "Merge", 474 "MergeState", 475 "ForEach", 476 "Distinct", 477 "OrDefault", 478 "OrNull", 479 "Resample", 480 "ArgMin", 481 "ArgMax", 482 ] 483 484 FUNC_TOKENS = { 485 *parser.Parser.FUNC_TOKENS, 486 TokenType.AND, 487 TokenType.OR, 488 TokenType.SET, 489 } 490 491 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 492 493 ID_VAR_TOKENS = { 494 *parser.Parser.ID_VAR_TOKENS, 495 TokenType.LIKE, 496 } 497 498 AGG_FUNC_MAPPING = ( 499 lambda functions, suffixes: { 500 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 501 } 502 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 503 504 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 505 506 FUNCTION_PARSERS = { 507 **parser.Parser.FUNCTION_PARSERS, 508 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 509 "QUANTILE": lambda self: self._parse_quantile(), 510 "MEDIAN": lambda self: self._parse_quantile(), 511 "COLUMNS": lambda self: self._parse_columns(), 512 } 513 514 FUNCTION_PARSERS.pop("MATCH") 515 516 PROPERTY_PARSERS = { 517 **parser.Parser.PROPERTY_PARSERS, 518 "ENGINE": lambda self: self._parse_engine_property(), 519 } 520 PROPERTY_PARSERS.pop("DYNAMIC") 521 522 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 523 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 524 525 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 526 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 527 528 RANGE_PARSERS = { 529 **parser.Parser.RANGE_PARSERS, 530 TokenType.GLOBAL: lambda self, this: self._parse_global_in(this), 531 } 532 533 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 534 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 535 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 536 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 537 538 JOIN_KINDS = { 539 *parser.Parser.JOIN_KINDS, 540 TokenType.ANY, 541 TokenType.ASOF, 542 TokenType.ARRAY, 543 } 544 545 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 546 TokenType.ANY, 547 TokenType.ARRAY, 548 TokenType.FINAL, 549 TokenType.FORMAT, 550 TokenType.SETTINGS, 551 } 552 553 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 554 TokenType.FORMAT, 555 } 556 557 LOG_DEFAULTS_TO_LN = True 558 559 QUERY_MODIFIER_PARSERS = { 560 **parser.Parser.QUERY_MODIFIER_PARSERS, 561 TokenType.SETTINGS: lambda self: ( 562 "settings", 563 self._advance() or self._parse_csv(self._parse_assignment), 564 ), 565 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 566 } 567 568 CONSTRAINT_PARSERS = { 569 **parser.Parser.CONSTRAINT_PARSERS, 570 "INDEX": lambda self: self._parse_index_constraint(), 571 "CODEC": lambda self: self._parse_compress(), 572 } 573 574 ALTER_PARSERS = { 575 **parser.Parser.ALTER_PARSERS, 576 "REPLACE": lambda self: self._parse_alter_table_replace(), 577 } 578 579 SCHEMA_UNNAMED_CONSTRAINTS = { 580 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 581 "INDEX", 582 } 583 584 PLACEHOLDER_PARSERS = { 585 **parser.Parser.PLACEHOLDER_PARSERS, 586 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 587 } 588 589 def _parse_engine_property(self) -> exp.EngineProperty: 590 self._match(TokenType.EQ) 591 return self.expression( 592 exp.EngineProperty, 593 this=self._parse_field(any_token=True, anonymous_func=True), 594 ) 595 596 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 597 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 598 return self._parse_lambda() 599 600 def _parse_types( 601 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 602 ) -> t.Optional[exp.Expression]: 603 dtype = super()._parse_types( 604 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 605 ) 606 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 607 # Mark every type as non-nullable which is ClickHouse's default, unless it's 608 # already marked as nullable. This marker helps us transpile types from other 609 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 610 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 611 # fail in ClickHouse without the `Nullable` type constructor. 612 dtype.set("nullable", False) 613 614 return dtype 615 616 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 617 index = self._index 618 this = self._parse_bitwise() 619 if self._match(TokenType.FROM): 620 self._retreat(index) 621 return super()._parse_extract() 622 623 # We return Anonymous here because extract and regexpExtract have different semantics, 624 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 625 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 626 # 627 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 628 self._match(TokenType.COMMA) 629 return self.expression( 630 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 631 ) 632 633 def _parse_assignment(self) -> t.Optional[exp.Expression]: 634 this = super()._parse_assignment() 635 636 if self._match(TokenType.PLACEHOLDER): 637 return self.expression( 638 exp.If, 639 this=this, 640 true=self._parse_assignment(), 641 false=self._match(TokenType.COLON) and self._parse_assignment(), 642 ) 643 644 return this 645 646 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 647 """ 648 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 649 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 650 """ 651 index = self._index 652 653 this = self._parse_id_var() 654 self._match(TokenType.COLON) 655 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 656 self._match_text_seq("IDENTIFIER") and "Identifier" 657 ) 658 659 if not kind: 660 self._retreat(index) 661 return None 662 elif not self._match(TokenType.R_BRACE): 663 self.raise_error("Expecting }") 664 665 if isinstance(this, exp.Identifier) and not this.quoted: 666 this = exp.var(this.name) 667 668 return self.expression(exp.Placeholder, this=this, kind=kind) 669 670 def _parse_bracket( 671 self, this: t.Optional[exp.Expression] = None 672 ) -> t.Optional[exp.Expression]: 673 l_brace = self._match(TokenType.L_BRACE, advance=False) 674 bracket = super()._parse_bracket(this) 675 676 if l_brace and isinstance(bracket, exp.Struct): 677 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 678 for expression in bracket.expressions: 679 if not isinstance(expression, exp.PropertyEQ): 680 break 681 682 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 683 varmap.args["values"].append("expressions", expression.expression) 684 685 return varmap 686 687 return bracket 688 689 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 690 this = super()._parse_in(this) 691 this.set("is_global", is_global) 692 return this 693 694 def _parse_global_in(self, this: t.Optional[exp.Expression]) -> exp.Not | exp.In: 695 is_negated = self._match(TokenType.NOT) 696 this = self._match(TokenType.IN) and self._parse_in(this, is_global=True) 697 return self.expression(exp.Not, this=this) if is_negated else this 698 699 def _parse_table( 700 self, 701 schema: bool = False, 702 joins: bool = False, 703 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 704 parse_bracket: bool = False, 705 is_db_reference: bool = False, 706 parse_partition: bool = False, 707 consume_pipe: bool = False, 708 ) -> t.Optional[exp.Expression]: 709 this = super()._parse_table( 710 schema=schema, 711 joins=joins, 712 alias_tokens=alias_tokens, 713 parse_bracket=parse_bracket, 714 is_db_reference=is_db_reference, 715 ) 716 717 if isinstance(this, exp.Table): 718 inner = this.this 719 alias = this.args.get("alias") 720 721 if isinstance(inner, exp.GenerateSeries) and alias and not alias.columns: 722 alias.set("columns", [exp.to_identifier("generate_series")]) 723 724 if self._match(TokenType.FINAL): 725 this = self.expression(exp.Final, this=this) 726 727 return this 728 729 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 730 return super()._parse_position(haystack_first=True) 731 732 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 733 def _parse_cte(self) -> t.Optional[exp.CTE]: 734 # WITH <identifier> AS <subquery expression> 735 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 736 737 if not cte: 738 # WITH <expression> AS <identifier> 739 cte = self.expression( 740 exp.CTE, 741 this=self._parse_assignment(), 742 alias=self._parse_table_alias(), 743 scalar=True, 744 ) 745 746 return cte 747 748 def _parse_join_parts( 749 self, 750 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 751 is_global = self._match(TokenType.GLOBAL) and self._prev 752 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 753 754 if kind_pre: 755 kind = self._match_set(self.JOIN_KINDS) and self._prev 756 side = self._match_set(self.JOIN_SIDES) and self._prev 757 return is_global, side, kind 758 759 return ( 760 is_global, 761 self._match_set(self.JOIN_SIDES) and self._prev, 762 self._match_set(self.JOIN_KINDS) and self._prev, 763 ) 764 765 def _parse_join( 766 self, skip_join_token: bool = False, parse_bracket: bool = False 767 ) -> t.Optional[exp.Join]: 768 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 769 if join: 770 join.set("global", join.args.pop("method", None)) 771 772 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 773 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 774 if join.kind == "ARRAY": 775 for table in join.find_all(exp.Table): 776 table.replace(table.to_column()) 777 778 return join 779 780 def _parse_function( 781 self, 782 functions: t.Optional[t.Dict[str, t.Callable]] = None, 783 anonymous: bool = False, 784 optional_parens: bool = True, 785 any_token: bool = False, 786 ) -> t.Optional[exp.Expression]: 787 expr = super()._parse_function( 788 functions=functions, 789 anonymous=anonymous, 790 optional_parens=optional_parens, 791 any_token=any_token, 792 ) 793 794 func = expr.this if isinstance(expr, exp.Window) else expr 795 796 # Aggregate functions can be split in 2 parts: <func_name><suffix> 797 parts = ( 798 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 799 ) 800 801 if parts: 802 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 803 params = self._parse_func_params(anon_func) 804 805 kwargs = { 806 "this": anon_func.this, 807 "expressions": anon_func.expressions, 808 } 809 if parts[1]: 810 exp_class: t.Type[exp.Expression] = ( 811 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 812 ) 813 else: 814 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 815 816 kwargs["exp_class"] = exp_class 817 if params: 818 kwargs["params"] = params 819 820 func = self.expression(**kwargs) 821 822 if isinstance(expr, exp.Window): 823 # The window's func was parsed as Anonymous in base parser, fix its 824 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 825 expr.set("this", func) 826 elif params: 827 # Params have blocked super()._parse_function() from parsing the following window 828 # (if that exists) as they're standing between the function call and the window spec 829 expr = self._parse_window(func) 830 else: 831 expr = func 832 833 return expr 834 835 def _parse_func_params( 836 self, this: t.Optional[exp.Func] = None 837 ) -> t.Optional[t.List[exp.Expression]]: 838 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 839 return self._parse_csv(self._parse_lambda) 840 841 if self._match(TokenType.L_PAREN): 842 params = self._parse_csv(self._parse_lambda) 843 self._match_r_paren(this) 844 return params 845 846 return None 847 848 def _parse_quantile(self) -> exp.Quantile: 849 this = self._parse_lambda() 850 params = self._parse_func_params() 851 if params: 852 return self.expression(exp.Quantile, this=params[0], quantile=this) 853 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 854 855 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 856 return super()._parse_wrapped_id_vars(optional=True) 857 858 def _parse_primary_key( 859 self, wrapped_optional: bool = False, in_props: bool = False 860 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 861 return super()._parse_primary_key( 862 wrapped_optional=wrapped_optional or in_props, in_props=in_props 863 ) 864 865 def _parse_on_property(self) -> t.Optional[exp.Expression]: 866 index = self._index 867 if self._match_text_seq("CLUSTER"): 868 this = self._parse_string() or self._parse_id_var() 869 if this: 870 return self.expression(exp.OnCluster, this=this) 871 else: 872 self._retreat(index) 873 return None 874 875 def _parse_index_constraint( 876 self, kind: t.Optional[str] = None 877 ) -> exp.IndexColumnConstraint: 878 # INDEX name1 expr TYPE type1(args) GRANULARITY value 879 this = self._parse_id_var() 880 expression = self._parse_assignment() 881 882 index_type = self._match_text_seq("TYPE") and ( 883 self._parse_function() or self._parse_var() 884 ) 885 886 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 887 888 return self.expression( 889 exp.IndexColumnConstraint, 890 this=this, 891 expression=expression, 892 index_type=index_type, 893 granularity=granularity, 894 ) 895 896 def _parse_partition(self) -> t.Optional[exp.Partition]: 897 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 898 if not self._match(TokenType.PARTITION): 899 return None 900 901 if self._match_text_seq("ID"): 902 # Corresponds to the PARTITION ID <string_value> syntax 903 expressions: t.List[exp.Expression] = [ 904 self.expression(exp.PartitionId, this=self._parse_string()) 905 ] 906 else: 907 expressions = self._parse_expressions() 908 909 return self.expression(exp.Partition, expressions=expressions) 910 911 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 912 partition = self._parse_partition() 913 914 if not partition or not self._match(TokenType.FROM): 915 return None 916 917 return self.expression( 918 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 919 ) 920 921 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 922 if not self._match_text_seq("PROJECTION"): 923 return None 924 925 return self.expression( 926 exp.ProjectionDef, 927 this=self._parse_id_var(), 928 expression=self._parse_wrapped(self._parse_statement), 929 ) 930 931 def _parse_constraint(self) -> t.Optional[exp.Expression]: 932 return super()._parse_constraint() or self._parse_projection_def() 933 934 def _parse_alias( 935 self, this: t.Optional[exp.Expression], explicit: bool = False 936 ) -> t.Optional[exp.Expression]: 937 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 938 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 939 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 940 return this 941 942 return super()._parse_alias(this=this, explicit=explicit) 943 944 def _parse_expression(self) -> t.Optional[exp.Expression]: 945 this = super()._parse_expression() 946 947 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 948 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 949 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 950 self._match(TokenType.R_PAREN) 951 952 return this 953 954 def _parse_columns(self) -> exp.Expression: 955 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 956 957 while self._next and self._match_text_seq(")", "APPLY", "("): 958 self._match(TokenType.R_PAREN) 959 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 960 return this 961 962 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 963 value = super()._parse_value(values=values) 964 if not value: 965 return None 966 967 # In Clickhouse "SELECT * FROM VALUES (1, 2, 3)" generates a table with a single column, in contrast 968 # to other dialects. For this case, we canonicalize the values into a tuple-of-tuples AST if it's not already one. 969 # In INSERT INTO statements the same clause actually references multiple columns (opposite semantics), 970 # but the final result is not altered by the extra parentheses. 971 # Note: Clickhouse allows VALUES([structure], value, ...) so the branch checks for the last expression 972 expressions = value.expressions 973 if values and not isinstance(expressions[-1], exp.Tuple): 974 value.set( 975 "expressions", 976 [self.expression(exp.Tuple, expressions=[expr]) for expr in expressions], 977 ) 978 979 return value
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_HINTS
- LAMBDAS
- CAST_COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- ALTER_ALTER_PARSERS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_TOKENS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- ALTER_RENAME_REQUIRES_COLUMN
- ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
- MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS
- JSON_EXTRACT_REQUIRES_JSON_EXPRESSION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- build_cast
- errors
- sql
981 class Generator(generator.Generator): 982 QUERY_HINTS = False 983 STRUCT_DELIMITER = ("(", ")") 984 NVL2_SUPPORTED = False 985 TABLESAMPLE_REQUIRES_PARENS = False 986 TABLESAMPLE_SIZE_IS_ROWS = False 987 TABLESAMPLE_KEYWORDS = "SAMPLE" 988 LAST_DAY_SUPPORTS_DATE_PART = False 989 CAN_IMPLEMENT_ARRAY_ANY = True 990 SUPPORTS_TO_NUMBER = False 991 JOIN_HINTS = False 992 TABLE_HINTS = False 993 GROUPINGS_SEP = "" 994 SET_OP_MODIFIERS = False 995 ARRAY_SIZE_NAME = "LENGTH" 996 WRAP_DERIVED_VALUES = False 997 998 STRING_TYPE_MAPPING = { 999 exp.DataType.Type.BLOB: "String", 1000 exp.DataType.Type.CHAR: "String", 1001 exp.DataType.Type.LONGBLOB: "String", 1002 exp.DataType.Type.LONGTEXT: "String", 1003 exp.DataType.Type.MEDIUMBLOB: "String", 1004 exp.DataType.Type.MEDIUMTEXT: "String", 1005 exp.DataType.Type.TINYBLOB: "String", 1006 exp.DataType.Type.TINYTEXT: "String", 1007 exp.DataType.Type.TEXT: "String", 1008 exp.DataType.Type.VARBINARY: "String", 1009 exp.DataType.Type.VARCHAR: "String", 1010 } 1011 1012 SUPPORTED_JSON_PATH_PARTS = { 1013 exp.JSONPathKey, 1014 exp.JSONPathRoot, 1015 exp.JSONPathSubscript, 1016 } 1017 1018 TYPE_MAPPING = { 1019 **generator.Generator.TYPE_MAPPING, 1020 **STRING_TYPE_MAPPING, 1021 exp.DataType.Type.ARRAY: "Array", 1022 exp.DataType.Type.BOOLEAN: "Bool", 1023 exp.DataType.Type.BIGINT: "Int64", 1024 exp.DataType.Type.DATE32: "Date32", 1025 exp.DataType.Type.DATETIME: "DateTime", 1026 exp.DataType.Type.DATETIME2: "DateTime", 1027 exp.DataType.Type.SMALLDATETIME: "DateTime", 1028 exp.DataType.Type.DATETIME64: "DateTime64", 1029 exp.DataType.Type.DECIMAL: "Decimal", 1030 exp.DataType.Type.DECIMAL32: "Decimal32", 1031 exp.DataType.Type.DECIMAL64: "Decimal64", 1032 exp.DataType.Type.DECIMAL128: "Decimal128", 1033 exp.DataType.Type.DECIMAL256: "Decimal256", 1034 exp.DataType.Type.TIMESTAMP: "DateTime", 1035 exp.DataType.Type.TIMESTAMPNTZ: "DateTime", 1036 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 1037 exp.DataType.Type.DOUBLE: "Float64", 1038 exp.DataType.Type.ENUM: "Enum", 1039 exp.DataType.Type.ENUM8: "Enum8", 1040 exp.DataType.Type.ENUM16: "Enum16", 1041 exp.DataType.Type.FIXEDSTRING: "FixedString", 1042 exp.DataType.Type.FLOAT: "Float32", 1043 exp.DataType.Type.INT: "Int32", 1044 exp.DataType.Type.MEDIUMINT: "Int32", 1045 exp.DataType.Type.INT128: "Int128", 1046 exp.DataType.Type.INT256: "Int256", 1047 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 1048 exp.DataType.Type.MAP: "Map", 1049 exp.DataType.Type.NESTED: "Nested", 1050 exp.DataType.Type.NOTHING: "Nothing", 1051 exp.DataType.Type.SMALLINT: "Int16", 1052 exp.DataType.Type.STRUCT: "Tuple", 1053 exp.DataType.Type.TINYINT: "Int8", 1054 exp.DataType.Type.UBIGINT: "UInt64", 1055 exp.DataType.Type.UINT: "UInt32", 1056 exp.DataType.Type.UINT128: "UInt128", 1057 exp.DataType.Type.UINT256: "UInt256", 1058 exp.DataType.Type.USMALLINT: "UInt16", 1059 exp.DataType.Type.UTINYINT: "UInt8", 1060 exp.DataType.Type.IPV4: "IPv4", 1061 exp.DataType.Type.IPV6: "IPv6", 1062 exp.DataType.Type.POINT: "Point", 1063 exp.DataType.Type.RING: "Ring", 1064 exp.DataType.Type.LINESTRING: "LineString", 1065 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 1066 exp.DataType.Type.POLYGON: "Polygon", 1067 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 1068 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 1069 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 1070 exp.DataType.Type.DYNAMIC: "Dynamic", 1071 } 1072 1073 TRANSFORMS = { 1074 **generator.Generator.TRANSFORMS, 1075 exp.AnyValue: rename_func("any"), 1076 exp.ApproxDistinct: rename_func("uniq"), 1077 exp.ArrayConcat: rename_func("arrayConcat"), 1078 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 1079 exp.ArrayRemove: remove_from_array_using_filter, 1080 exp.ArrayReverse: rename_func("arrayReverse"), 1081 exp.ArraySlice: rename_func("arraySlice"), 1082 exp.ArraySum: rename_func("arraySum"), 1083 exp.ArgMax: arg_max_or_min_no_count("argMax"), 1084 exp.ArgMin: arg_max_or_min_no_count("argMin"), 1085 exp.Array: inline_array_sql, 1086 exp.CastToStrType: rename_func("CAST"), 1087 exp.CountIf: rename_func("countIf"), 1088 exp.CompressColumnConstraint: lambda self, 1089 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 1090 exp.ComputedColumnConstraint: lambda self, 1091 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 1092 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 1093 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 1094 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 1095 exp.DateStrToDate: rename_func("toDate"), 1096 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 1097 exp.Explode: rename_func("arrayJoin"), 1098 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 1099 exp.IsNan: rename_func("isNaN"), 1100 exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}", 1101 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 1102 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 1103 exp.JSONPathKey: json_path_key_only_name, 1104 exp.JSONPathRoot: lambda *_: "", 1105 exp.Length: length_or_char_length_sql, 1106 exp.Map: _map_sql, 1107 exp.Median: rename_func("median"), 1108 exp.Nullif: rename_func("nullIf"), 1109 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1110 exp.Pivot: no_pivot_sql, 1111 exp.Quantile: _quantile_sql, 1112 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 1113 exp.Rand: rename_func("randCanonical"), 1114 exp.StartsWith: rename_func("startsWith"), 1115 exp.EndsWith: rename_func("endsWith"), 1116 exp.StrPosition: lambda self, e: strposition_sql( 1117 self, 1118 e, 1119 func_name="POSITION", 1120 supports_position=True, 1121 use_ansi_position=False, 1122 ), 1123 exp.TimeToStr: lambda self, e: self.func( 1124 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 1125 ), 1126 exp.TimeStrToTime: _timestrtotime_sql, 1127 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1128 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1129 exp.Typeof: rename_func("toTypeName"), 1130 exp.VarMap: _map_sql, 1131 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1132 exp.MD5Digest: rename_func("MD5"), 1133 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1134 exp.SHA: rename_func("SHA1"), 1135 exp.SHA2: sha256_sql, 1136 exp.UnixToTime: _unix_to_time_sql, 1137 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 1138 exp.Trim: lambda self, e: trim_sql(self, e, default_trim_type="BOTH"), 1139 exp.Variance: rename_func("varSamp"), 1140 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1141 exp.Stddev: rename_func("stddevSamp"), 1142 exp.Chr: rename_func("CHAR"), 1143 exp.Lag: lambda self, e: self.func( 1144 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1145 ), 1146 exp.Lead: lambda self, e: self.func( 1147 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1148 ), 1149 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1150 rename_func("editDistance") 1151 ), 1152 exp.ParseDatetime: rename_func("parseDateTime"), 1153 } 1154 1155 PROPERTIES_LOCATION = { 1156 **generator.Generator.PROPERTIES_LOCATION, 1157 exp.OnCluster: exp.Properties.Location.POST_NAME, 1158 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1159 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1160 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1161 } 1162 1163 # There's no list in docs, but it can be found in Clickhouse code 1164 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1165 ON_CLUSTER_TARGETS = { 1166 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1167 "DATABASE", 1168 "TABLE", 1169 "VIEW", 1170 "DICTIONARY", 1171 "INDEX", 1172 "FUNCTION", 1173 "NAMED COLLECTION", 1174 } 1175 1176 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1177 NON_NULLABLE_TYPES = { 1178 exp.DataType.Type.ARRAY, 1179 exp.DataType.Type.MAP, 1180 exp.DataType.Type.STRUCT, 1181 exp.DataType.Type.POINT, 1182 exp.DataType.Type.RING, 1183 exp.DataType.Type.LINESTRING, 1184 exp.DataType.Type.MULTILINESTRING, 1185 exp.DataType.Type.POLYGON, 1186 exp.DataType.Type.MULTIPOLYGON, 1187 } 1188 1189 def offset_sql(self, expression: exp.Offset) -> str: 1190 offset = super().offset_sql(expression) 1191 1192 # OFFSET ... FETCH syntax requires a "ROW" or "ROWS" keyword 1193 # https://clickhouse.com/docs/sql-reference/statements/select/offset 1194 parent = expression.parent 1195 if isinstance(parent, exp.Select) and isinstance(parent.args.get("limit"), exp.Fetch): 1196 offset = f"{offset} ROWS" 1197 1198 return offset 1199 1200 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1201 strtodate_sql = self.function_fallback_sql(expression) 1202 1203 if not isinstance(expression.parent, exp.Cast): 1204 # StrToDate returns DATEs in other dialects (eg. postgres), so 1205 # this branch aims to improve the transpilation to clickhouse 1206 return self.cast_sql(exp.cast(expression, "DATE")) 1207 1208 return strtodate_sql 1209 1210 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1211 this = expression.this 1212 1213 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1214 return self.sql(this) 1215 1216 return super().cast_sql(expression, safe_prefix=safe_prefix) 1217 1218 def trycast_sql(self, expression: exp.TryCast) -> str: 1219 dtype = expression.to 1220 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1221 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1222 dtype.set("nullable", True) 1223 1224 return super().cast_sql(expression) 1225 1226 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1227 this = self.json_path_part(expression.this) 1228 return str(int(this) + 1) if is_int(this) else this 1229 1230 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1231 return f"AS {self.sql(expression, 'this')}" 1232 1233 def _any_to_has( 1234 self, 1235 expression: exp.EQ | exp.NEQ, 1236 default: t.Callable[[t.Any], str], 1237 prefix: str = "", 1238 ) -> str: 1239 if isinstance(expression.left, exp.Any): 1240 arr = expression.left 1241 this = expression.right 1242 elif isinstance(expression.right, exp.Any): 1243 arr = expression.right 1244 this = expression.left 1245 else: 1246 return default(expression) 1247 1248 return prefix + self.func("has", arr.this.unnest(), this) 1249 1250 def eq_sql(self, expression: exp.EQ) -> str: 1251 return self._any_to_has(expression, super().eq_sql) 1252 1253 def neq_sql(self, expression: exp.NEQ) -> str: 1254 return self._any_to_has(expression, super().neq_sql, "NOT ") 1255 1256 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1257 # Manually add a flag to make the search case-insensitive 1258 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1259 return self.func("match", expression.this, regex) 1260 1261 def datatype_sql(self, expression: exp.DataType) -> str: 1262 # String is the standard ClickHouse type, every other variant is just an alias. 1263 # Additionally, any supplied length parameter will be ignored. 1264 # 1265 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1266 if expression.this in self.STRING_TYPE_MAPPING: 1267 dtype = "String" 1268 else: 1269 dtype = super().datatype_sql(expression) 1270 1271 # This section changes the type to `Nullable(...)` if the following conditions hold: 1272 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1273 # and change their semantics 1274 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1275 # constraint: "Type of Map key must be a type, that can be represented by integer or 1276 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1277 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1278 parent = expression.parent 1279 nullable = expression.args.get("nullable") 1280 if nullable is True or ( 1281 nullable is None 1282 and not ( 1283 isinstance(parent, exp.DataType) 1284 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1285 and expression.index in (None, 0) 1286 ) 1287 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1288 ): 1289 dtype = f"Nullable({dtype})" 1290 1291 return dtype 1292 1293 def cte_sql(self, expression: exp.CTE) -> str: 1294 if expression.args.get("scalar"): 1295 this = self.sql(expression, "this") 1296 alias = self.sql(expression, "alias") 1297 return f"{this} AS {alias}" 1298 1299 return super().cte_sql(expression) 1300 1301 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1302 return super().after_limit_modifiers(expression) + [ 1303 ( 1304 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1305 if expression.args.get("settings") 1306 else "" 1307 ), 1308 ( 1309 self.seg("FORMAT ") + self.sql(expression, "format") 1310 if expression.args.get("format") 1311 else "" 1312 ), 1313 ] 1314 1315 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1316 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1317 1318 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1319 return f"ON CLUSTER {self.sql(expression, 'this')}" 1320 1321 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1322 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1323 exp.Properties.Location.POST_NAME 1324 ): 1325 this_name = self.sql( 1326 expression.this if isinstance(expression.this, exp.Schema) else expression, 1327 "this", 1328 ) 1329 this_properties = " ".join( 1330 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1331 ) 1332 this_schema = self.schema_columns_sql(expression.this) 1333 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1334 1335 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1336 1337 return super().createable_sql(expression, locations) 1338 1339 def create_sql(self, expression: exp.Create) -> str: 1340 # The comment property comes last in CTAS statements, i.e. after the query 1341 query = expression.expression 1342 if isinstance(query, exp.Query): 1343 comment_prop = expression.find(exp.SchemaCommentProperty) 1344 if comment_prop: 1345 comment_prop.pop() 1346 query.replace(exp.paren(query)) 1347 else: 1348 comment_prop = None 1349 1350 create_sql = super().create_sql(expression) 1351 1352 comment_sql = self.sql(comment_prop) 1353 comment_sql = f" {comment_sql}" if comment_sql else "" 1354 1355 return f"{create_sql}{comment_sql}" 1356 1357 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1358 this = self.indent(self.sql(expression, "this")) 1359 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1360 1361 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1362 this = self.sql(expression, "this") 1363 this = f" {this}" if this else "" 1364 expr = self.sql(expression, "expression") 1365 expr = f" {expr}" if expr else "" 1366 index_type = self.sql(expression, "index_type") 1367 index_type = f" TYPE {index_type}" if index_type else "" 1368 granularity = self.sql(expression, "granularity") 1369 granularity = f" GRANULARITY {granularity}" if granularity else "" 1370 1371 return f"INDEX{this}{expr}{index_type}{granularity}" 1372 1373 def partition_sql(self, expression: exp.Partition) -> str: 1374 return f"PARTITION {self.expressions(expression, flat=True)}" 1375 1376 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1377 return f"ID {self.sql(expression.this)}" 1378 1379 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1380 return ( 1381 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1382 ) 1383 1384 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1385 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1386 1387 def is_sql(self, expression: exp.Is) -> str: 1388 is_sql = super().is_sql(expression) 1389 1390 if isinstance(expression.parent, exp.Not): 1391 # value IS NOT NULL -> NOT (value IS NULL) 1392 is_sql = self.wrap(is_sql) 1393 1394 return is_sql 1395 1396 def in_sql(self, expression: exp.In) -> str: 1397 in_sql = super().in_sql(expression) 1398 1399 if isinstance(expression.parent, exp.Not) and expression.args.get("is_global"): 1400 in_sql = in_sql.replace("GLOBAL IN", "GLOBAL NOT IN", 1) 1401 1402 return in_sql 1403 1404 def not_sql(self, expression: exp.Not) -> str: 1405 if isinstance(expression.this, exp.In) and expression.this.args.get("is_global"): 1406 # let `GLOBAL IN` child interpose `NOT` 1407 return self.sql(expression, "this") 1408 1409 return super().not_sql(expression) 1410 1411 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1412 # If the VALUES clause contains tuples of expressions, we need to treat it 1413 # as a table since Clickhouse will automatically alias it as such. 1414 alias = expression.args.get("alias") 1415 1416 if alias and alias.args.get("columns") and expression.expressions: 1417 values = expression.expressions[0].expressions 1418 values_as_table = any(isinstance(value, exp.Tuple) for value in values) 1419 else: 1420 values_as_table = True 1421 1422 return super().values_sql(expression, values_as_table=values_as_table)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1189 def offset_sql(self, expression: exp.Offset) -> str: 1190 offset = super().offset_sql(expression) 1191 1192 # OFFSET ... FETCH syntax requires a "ROW" or "ROWS" keyword 1193 # https://clickhouse.com/docs/sql-reference/statements/select/offset 1194 parent = expression.parent 1195 if isinstance(parent, exp.Select) and isinstance(parent.args.get("limit"), exp.Fetch): 1196 offset = f"{offset} ROWS" 1197 1198 return offset
1200 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1201 strtodate_sql = self.function_fallback_sql(expression) 1202 1203 if not isinstance(expression.parent, exp.Cast): 1204 # StrToDate returns DATEs in other dialects (eg. postgres), so 1205 # this branch aims to improve the transpilation to clickhouse 1206 return self.cast_sql(exp.cast(expression, "DATE")) 1207 1208 return strtodate_sql
1210 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1211 this = expression.this 1212 1213 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1214 return self.sql(this) 1215 1216 return super().cast_sql(expression, safe_prefix=safe_prefix)
1218 def trycast_sql(self, expression: exp.TryCast) -> str: 1219 dtype = expression.to 1220 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1221 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1222 dtype.set("nullable", True) 1223 1224 return super().cast_sql(expression)
1261 def datatype_sql(self, expression: exp.DataType) -> str: 1262 # String is the standard ClickHouse type, every other variant is just an alias. 1263 # Additionally, any supplied length parameter will be ignored. 1264 # 1265 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1266 if expression.this in self.STRING_TYPE_MAPPING: 1267 dtype = "String" 1268 else: 1269 dtype = super().datatype_sql(expression) 1270 1271 # This section changes the type to `Nullable(...)` if the following conditions hold: 1272 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1273 # and change their semantics 1274 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1275 # constraint: "Type of Map key must be a type, that can be represented by integer or 1276 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1277 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1278 parent = expression.parent 1279 nullable = expression.args.get("nullable") 1280 if nullable is True or ( 1281 nullable is None 1282 and not ( 1283 isinstance(parent, exp.DataType) 1284 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1285 and expression.index in (None, 0) 1286 ) 1287 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1288 ): 1289 dtype = f"Nullable({dtype})" 1290 1291 return dtype
1301 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1302 return super().after_limit_modifiers(expression) + [ 1303 ( 1304 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1305 if expression.args.get("settings") 1306 else "" 1307 ), 1308 ( 1309 self.seg("FORMAT ") + self.sql(expression, "format") 1310 if expression.args.get("format") 1311 else "" 1312 ), 1313 ]
1321 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1322 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1323 exp.Properties.Location.POST_NAME 1324 ): 1325 this_name = self.sql( 1326 expression.this if isinstance(expression.this, exp.Schema) else expression, 1327 "this", 1328 ) 1329 this_properties = " ".join( 1330 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1331 ) 1332 this_schema = self.schema_columns_sql(expression.this) 1333 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1334 1335 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1336 1337 return super().createable_sql(expression, locations)
1339 def create_sql(self, expression: exp.Create) -> str: 1340 # The comment property comes last in CTAS statements, i.e. after the query 1341 query = expression.expression 1342 if isinstance(query, exp.Query): 1343 comment_prop = expression.find(exp.SchemaCommentProperty) 1344 if comment_prop: 1345 comment_prop.pop() 1346 query.replace(exp.paren(query)) 1347 else: 1348 comment_prop = None 1349 1350 create_sql = super().create_sql(expression) 1351 1352 comment_sql = self.sql(comment_prop) 1353 comment_sql = f" {comment_sql}" if comment_sql else "" 1354 1355 return f"{create_sql}{comment_sql}"
1361 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1362 this = self.sql(expression, "this") 1363 this = f" {this}" if this else "" 1364 expr = self.sql(expression, "expression") 1365 expr = f" {expr}" if expr else "" 1366 index_type = self.sql(expression, "index_type") 1367 index_type = f" TYPE {index_type}" if index_type else "" 1368 granularity = self.sql(expression, "granularity") 1369 granularity = f" GRANULARITY {granularity}" if granularity else "" 1370 1371 return f"INDEX{this}{expr}{index_type}{granularity}"
1411 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1412 # If the VALUES clause contains tuples of expressions, we need to treat it 1413 # as a table since Clickhouse will automatically alias it as such. 1414 alias = expression.args.get("alias") 1415 1416 if alias and alias.args.get("columns") and expression.expressions: 1417 values = expression.expressions[0].expressions 1418 values_as_table = any(isinstance(value, exp.Tuple) for value in values) 1419 else: 1420 values_as_table = True 1421 1422 return super().values_sql(expression, values_as_table=values_as_table)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SUPPORTS_WINDOW_EXCLUDE
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- NORMALIZE_EXTRACT_DATE_PARTS
- PARSE_JSON_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- SUPPORTS_BETWEEN_FLAGS
- SUPPORTS_LIKE_QUANTIFIERS
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- setitem_sql
- set_sql
- queryband_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- formatphrase_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- like_sql
- ilike_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- get_put_sql
- translatecharacters_sql
- decodecase_sql
- semanticview_sql
- getextract_sql
- datefromunixdate_sql