sqlglot.dialects.clickhouse
1from __future__ import annotations 2import typing as t 3import datetime 4from sqlglot import exp, generator, parser, tokens 5from sqlglot.dialects.dialect import ( 6 Dialect, 7 NormalizationStrategy, 8 arg_max_or_min_no_count, 9 build_date_delta, 10 build_formatted_time, 11 inline_array_sql, 12 json_extract_segments, 13 json_path_key_only_name, 14 length_or_char_length_sql, 15 no_pivot_sql, 16 build_json_extract_path, 17 rename_func, 18 remove_from_array_using_filter, 19 sha256_sql, 20 strposition_sql, 21 var_map_sql, 22 timestamptrunc_sql, 23 unit_to_var, 24 trim_sql, 25) 26from sqlglot.generator import Generator 27from sqlglot.helper import is_int, seq_get 28from sqlglot.tokens import Token, TokenType 29from sqlglot.generator import unsupported_args 30 31DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd] 32 33 34def _build_date_format(args: t.List) -> exp.TimeToStr: 35 expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args) 36 37 timezone = seq_get(args, 2) 38 if timezone: 39 expr.set("zone", timezone) 40 41 return expr 42 43 44def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str: 45 scale = expression.args.get("scale") 46 timestamp = expression.this 47 48 if scale in (None, exp.UnixToTime.SECONDS): 49 return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 50 if scale == exp.UnixToTime.MILLIS: 51 return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 52 if scale == exp.UnixToTime.MICROS: 53 return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 54 if scale == exp.UnixToTime.NANOS: 55 return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 56 57 return self.func( 58 "fromUnixTimestamp", 59 exp.cast( 60 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 61 ), 62 ) 63 64 65def _lower_func(sql: str) -> str: 66 index = sql.index("(") 67 return sql[:index].lower() + sql[index:] 68 69 70def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str: 71 quantile = expression.args["quantile"] 72 args = f"({self.sql(expression, 'this')})" 73 74 if isinstance(quantile, exp.Array): 75 func = self.func("quantiles", *quantile) 76 else: 77 func = self.func("quantile", quantile) 78 79 return func + args 80 81 82def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: 83 if len(args) == 1: 84 return exp.CountIf(this=seq_get(args, 0)) 85 86 return exp.CombinedAggFunc(this="countIf", expressions=args) 87 88 89def _build_str_to_date(args: t.List) -> exp.Cast | exp.Anonymous: 90 if len(args) == 3: 91 return exp.Anonymous(this="STR_TO_DATE", expressions=args) 92 93 strtodate = exp.StrToDate.from_arg_list(args) 94 return exp.cast(strtodate, exp.DataType.build(exp.DataType.Type.DATETIME)) 95 96 97def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]: 98 def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str: 99 if not expression.unit: 100 return rename_func(name)(self, expression) 101 102 return self.func( 103 name, 104 unit_to_var(expression), 105 expression.expression, 106 expression.this, 107 expression.args.get("zone"), 108 ) 109 110 return _delta_sql 111 112 113def _timestrtotime_sql(self: ClickHouse.Generator, expression: exp.TimeStrToTime): 114 ts = expression.this 115 116 tz = expression.args.get("zone") 117 if tz and isinstance(ts, exp.Literal): 118 # Clickhouse will not accept timestamps that include a UTC offset, so we must remove them. 119 # The first step to removing is parsing the string with `datetime.datetime.fromisoformat`. 120 # 121 # In python <3.11, `fromisoformat()` can only parse timestamps of millisecond (3 digit) 122 # or microsecond (6 digit) precision. It will error if passed any other number of fractional 123 # digits, so we extract the fractional seconds and pad to 6 digits before parsing. 124 ts_string = ts.name.strip() 125 126 # separate [date and time] from [fractional seconds and UTC offset] 127 ts_parts = ts_string.split(".") 128 if len(ts_parts) == 2: 129 # separate fractional seconds and UTC offset 130 offset_sep = "+" if "+" in ts_parts[1] else "-" 131 ts_frac_parts = ts_parts[1].split(offset_sep) 132 num_frac_parts = len(ts_frac_parts) 133 134 # pad to 6 digits if fractional seconds present 135 ts_frac_parts[0] = ts_frac_parts[0].ljust(6, "0") 136 ts_string = "".join( 137 [ 138 ts_parts[0], # date and time 139 ".", 140 ts_frac_parts[0], # fractional seconds 141 offset_sep if num_frac_parts > 1 else "", 142 ts_frac_parts[1] if num_frac_parts > 1 else "", # utc offset (if present) 143 ] 144 ) 145 146 # return literal with no timezone, eg turn '2020-01-01 12:13:14-08:00' into '2020-01-01 12:13:14' 147 # this is because Clickhouse encodes the timezone as a data type parameter and throws an error if 148 # it's part of the timestamp string 149 ts_without_tz = ( 150 datetime.datetime.fromisoformat(ts_string).replace(tzinfo=None).isoformat(sep=" ") 151 ) 152 ts = exp.Literal.string(ts_without_tz) 153 154 # Non-nullable DateTime64 with microsecond precision 155 expressions = [exp.DataTypeParam(this=tz)] if tz else [] 156 datatype = exp.DataType.build( 157 exp.DataType.Type.DATETIME64, 158 expressions=[exp.DataTypeParam(this=exp.Literal.number(6)), *expressions], 159 nullable=False, 160 ) 161 162 return self.sql(exp.cast(ts, datatype, dialect=self.dialect)) 163 164 165def _map_sql(self: ClickHouse.Generator, expression: exp.Map | exp.VarMap) -> str: 166 if not (expression.parent and expression.parent.arg_key == "settings"): 167 return _lower_func(var_map_sql(self, expression)) 168 169 keys = expression.args.get("keys") 170 values = expression.args.get("values") 171 172 if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array): 173 self.unsupported("Cannot convert array columns into map.") 174 return "" 175 176 args = [] 177 for key, value in zip(keys.expressions, values.expressions): 178 args.append(f"{self.sql(key)}: {self.sql(value)}") 179 180 csv_args = ", ".join(args) 181 182 return f"{{{csv_args}}}" 183 184 185class ClickHouse(Dialect): 186 NORMALIZE_FUNCTIONS: bool | str = False 187 NULL_ORDERING = "nulls_are_last" 188 SUPPORTS_USER_DEFINED_TYPES = False 189 SAFE_DIVISION = True 190 LOG_BASE_FIRST: t.Optional[bool] = None 191 FORCE_EARLY_ALIAS_REF_EXPANSION = True 192 PRESERVE_ORIGINAL_NAMES = True 193 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 194 IDENTIFIERS_CAN_START_WITH_DIGIT = True 195 HEX_STRING_IS_INTEGER_TYPE = True 196 197 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 198 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 199 200 UNESCAPED_SEQUENCES = { 201 "\\0": "\0", 202 } 203 204 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 205 206 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 207 exp.Except: False, 208 exp.Intersect: False, 209 exp.Union: None, 210 } 211 212 def generate_values_aliases(self, expression: exp.Values) -> t.List[exp.Identifier]: 213 # Clickhouse allows VALUES to have an embedded structure e.g: 214 # VALUES('person String, place String', ('Noah', 'Paris'), ...) 215 # In this case, we don't want to qualify the columns 216 values = expression.expressions[0].expressions 217 218 structure = ( 219 values[0] 220 if (len(values) > 1 and values[0].is_string and isinstance(values[1], exp.Tuple)) 221 else None 222 ) 223 if structure: 224 # Split each column definition into the column name e.g: 225 # 'person String, place String' -> ['person', 'place'] 226 structure_coldefs = [coldef.strip() for coldef in structure.name.split(",")] 227 column_aliases = [ 228 exp.to_identifier(coldef.split(" ")[0]) for coldef in structure_coldefs 229 ] 230 else: 231 # Default column aliases in CH are "c1", "c2", etc. 232 column_aliases = [ 233 exp.to_identifier(f"c{i + 1}") for i in range(len(values[0].expressions)) 234 ] 235 236 return column_aliases 237 238 class Tokenizer(tokens.Tokenizer): 239 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 240 IDENTIFIERS = ['"', "`"] 241 IDENTIFIER_ESCAPES = ["\\"] 242 STRING_ESCAPES = ["'", "\\"] 243 BIT_STRINGS = [("0b", "")] 244 HEX_STRINGS = [("0x", ""), ("0X", "")] 245 HEREDOC_STRINGS = ["$"] 246 247 KEYWORDS = { 248 **tokens.Tokenizer.KEYWORDS, 249 ".:": TokenType.DOTCOLON, 250 "ATTACH": TokenType.COMMAND, 251 "DATE32": TokenType.DATE32, 252 "DATETIME64": TokenType.DATETIME64, 253 "DICTIONARY": TokenType.DICTIONARY, 254 "DYNAMIC": TokenType.DYNAMIC, 255 "ENUM8": TokenType.ENUM8, 256 "ENUM16": TokenType.ENUM16, 257 "EXCHANGE": TokenType.COMMAND, 258 "FINAL": TokenType.FINAL, 259 "FIXEDSTRING": TokenType.FIXEDSTRING, 260 "FLOAT32": TokenType.FLOAT, 261 "FLOAT64": TokenType.DOUBLE, 262 "GLOBAL": TokenType.GLOBAL, 263 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 264 "MAP": TokenType.MAP, 265 "NESTED": TokenType.NESTED, 266 "NOTHING": TokenType.NOTHING, 267 "SAMPLE": TokenType.TABLE_SAMPLE, 268 "TUPLE": TokenType.STRUCT, 269 "UINT16": TokenType.USMALLINT, 270 "UINT32": TokenType.UINT, 271 "UINT64": TokenType.UBIGINT, 272 "UINT8": TokenType.UTINYINT, 273 "IPV4": TokenType.IPV4, 274 "IPV6": TokenType.IPV6, 275 "POINT": TokenType.POINT, 276 "RING": TokenType.RING, 277 "LINESTRING": TokenType.LINESTRING, 278 "MULTILINESTRING": TokenType.MULTILINESTRING, 279 "POLYGON": TokenType.POLYGON, 280 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 281 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 282 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 283 "SYSTEM": TokenType.COMMAND, 284 "PREWHERE": TokenType.PREWHERE, 285 } 286 KEYWORDS.pop("/*+") 287 288 SINGLE_TOKENS = { 289 **tokens.Tokenizer.SINGLE_TOKENS, 290 "$": TokenType.HEREDOC_STRING, 291 } 292 293 class Parser(parser.Parser): 294 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 295 # * select x from t1 union all select x from t2 limit 1; 296 # * select x from t1 union all (select x from t2 limit 1); 297 MODIFIERS_ATTACHED_TO_SET_OP = False 298 INTERVAL_SPANS = False 299 OPTIONAL_ALIAS_TOKEN_CTE = False 300 JOINS_HAVE_EQUAL_PRECEDENCE = True 301 302 FUNCTIONS = { 303 **parser.Parser.FUNCTIONS, 304 "ANY": exp.AnyValue.from_arg_list, 305 "ARRAYSUM": exp.ArraySum.from_arg_list, 306 "ARRAYREVERSE": exp.ArrayReverse.from_arg_list, 307 "ARRAYSLICE": exp.ArraySlice.from_arg_list, 308 "COUNTIF": _build_count_if, 309 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 310 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 311 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 312 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 313 "DATE_FORMAT": _build_date_format, 314 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 315 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 316 "FORMATDATETIME": _build_date_format, 317 "JSONEXTRACTSTRING": build_json_extract_path( 318 exp.JSONExtractScalar, zero_based_indexing=False 319 ), 320 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 321 "MAP": parser.build_var_map, 322 "MATCH": exp.RegexpLike.from_arg_list, 323 "RANDCANONICAL": exp.Rand.from_arg_list, 324 "STR_TO_DATE": _build_str_to_date, 325 "TUPLE": exp.Struct.from_arg_list, 326 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 327 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 328 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 329 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 330 "UNIQ": exp.ApproxDistinct.from_arg_list, 331 "XOR": lambda args: exp.Xor(expressions=args), 332 "MD5": exp.MD5Digest.from_arg_list, 333 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 334 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 335 "SUBSTRINGINDEX": exp.SubstringIndex.from_arg_list, 336 "TOTYPENAME": exp.Typeof.from_arg_list, 337 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 338 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 339 } 340 FUNCTIONS.pop("TRANSFORM") 341 342 AGG_FUNCTIONS = { 343 "count", 344 "min", 345 "max", 346 "sum", 347 "avg", 348 "any", 349 "stddevPop", 350 "stddevSamp", 351 "varPop", 352 "varSamp", 353 "corr", 354 "covarPop", 355 "covarSamp", 356 "entropy", 357 "exponentialMovingAverage", 358 "intervalLengthSum", 359 "kolmogorovSmirnovTest", 360 "mannWhitneyUTest", 361 "median", 362 "rankCorr", 363 "sumKahan", 364 "studentTTest", 365 "welchTTest", 366 "anyHeavy", 367 "anyLast", 368 "boundingRatio", 369 "first_value", 370 "last_value", 371 "argMin", 372 "argMax", 373 "avgWeighted", 374 "topK", 375 "topKWeighted", 376 "deltaSum", 377 "deltaSumTimestamp", 378 "groupArray", 379 "groupArrayLast", 380 "groupUniqArray", 381 "groupArrayInsertAt", 382 "groupArrayMovingAvg", 383 "groupArrayMovingSum", 384 "groupArraySample", 385 "groupBitAnd", 386 "groupBitOr", 387 "groupBitXor", 388 "groupBitmap", 389 "groupBitmapAnd", 390 "groupBitmapOr", 391 "groupBitmapXor", 392 "sumWithOverflow", 393 "sumMap", 394 "minMap", 395 "maxMap", 396 "skewSamp", 397 "skewPop", 398 "kurtSamp", 399 "kurtPop", 400 "uniq", 401 "uniqExact", 402 "uniqCombined", 403 "uniqCombined64", 404 "uniqHLL12", 405 "uniqTheta", 406 "quantile", 407 "quantiles", 408 "quantileExact", 409 "quantilesExact", 410 "quantileExactLow", 411 "quantilesExactLow", 412 "quantileExactHigh", 413 "quantilesExactHigh", 414 "quantileExactWeighted", 415 "quantilesExactWeighted", 416 "quantileTiming", 417 "quantilesTiming", 418 "quantileTimingWeighted", 419 "quantilesTimingWeighted", 420 "quantileDeterministic", 421 "quantilesDeterministic", 422 "quantileTDigest", 423 "quantilesTDigest", 424 "quantileTDigestWeighted", 425 "quantilesTDigestWeighted", 426 "quantileBFloat16", 427 "quantilesBFloat16", 428 "quantileBFloat16Weighted", 429 "quantilesBFloat16Weighted", 430 "simpleLinearRegression", 431 "stochasticLinearRegression", 432 "stochasticLogisticRegression", 433 "categoricalInformationValue", 434 "contingency", 435 "cramersV", 436 "cramersVBiasCorrected", 437 "theilsU", 438 "maxIntersections", 439 "maxIntersectionsPosition", 440 "meanZTest", 441 "quantileInterpolatedWeighted", 442 "quantilesInterpolatedWeighted", 443 "quantileGK", 444 "quantilesGK", 445 "sparkBar", 446 "sumCount", 447 "largestTriangleThreeBuckets", 448 "histogram", 449 "sequenceMatch", 450 "sequenceCount", 451 "windowFunnel", 452 "retention", 453 "uniqUpTo", 454 "sequenceNextNode", 455 "exponentialTimeDecayedAvg", 456 } 457 458 AGG_FUNCTIONS_SUFFIXES = [ 459 "If", 460 "Array", 461 "ArrayIf", 462 "Map", 463 "SimpleState", 464 "State", 465 "Merge", 466 "MergeState", 467 "ForEach", 468 "Distinct", 469 "OrDefault", 470 "OrNull", 471 "Resample", 472 "ArgMin", 473 "ArgMax", 474 ] 475 476 FUNC_TOKENS = { 477 *parser.Parser.FUNC_TOKENS, 478 TokenType.AND, 479 TokenType.OR, 480 TokenType.SET, 481 } 482 483 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 484 485 ID_VAR_TOKENS = { 486 *parser.Parser.ID_VAR_TOKENS, 487 TokenType.LIKE, 488 } 489 490 AGG_FUNC_MAPPING = ( 491 lambda functions, suffixes: { 492 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 493 } 494 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 495 496 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 497 498 FUNCTION_PARSERS = { 499 **parser.Parser.FUNCTION_PARSERS, 500 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 501 "QUANTILE": lambda self: self._parse_quantile(), 502 "MEDIAN": lambda self: self._parse_quantile(), 503 "COLUMNS": lambda self: self._parse_columns(), 504 } 505 506 FUNCTION_PARSERS.pop("MATCH") 507 508 PROPERTY_PARSERS = { 509 **parser.Parser.PROPERTY_PARSERS, 510 "ENGINE": lambda self: self._parse_engine_property(), 511 } 512 PROPERTY_PARSERS.pop("DYNAMIC") 513 514 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 515 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 516 517 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 518 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 519 520 RANGE_PARSERS = { 521 **parser.Parser.RANGE_PARSERS, 522 TokenType.GLOBAL: lambda self, this: self._parse_global_in(this), 523 } 524 525 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 526 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 527 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 528 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 529 530 JOIN_KINDS = { 531 *parser.Parser.JOIN_KINDS, 532 TokenType.ANY, 533 TokenType.ASOF, 534 TokenType.ARRAY, 535 } 536 537 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 538 TokenType.ANY, 539 TokenType.ARRAY, 540 TokenType.FINAL, 541 TokenType.FORMAT, 542 TokenType.SETTINGS, 543 } 544 545 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 546 TokenType.FORMAT, 547 } 548 549 LOG_DEFAULTS_TO_LN = True 550 551 QUERY_MODIFIER_PARSERS = { 552 **parser.Parser.QUERY_MODIFIER_PARSERS, 553 TokenType.SETTINGS: lambda self: ( 554 "settings", 555 self._advance() or self._parse_csv(self._parse_assignment), 556 ), 557 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 558 } 559 560 CONSTRAINT_PARSERS = { 561 **parser.Parser.CONSTRAINT_PARSERS, 562 "INDEX": lambda self: self._parse_index_constraint(), 563 "CODEC": lambda self: self._parse_compress(), 564 } 565 566 ALTER_PARSERS = { 567 **parser.Parser.ALTER_PARSERS, 568 "REPLACE": lambda self: self._parse_alter_table_replace(), 569 } 570 571 SCHEMA_UNNAMED_CONSTRAINTS = { 572 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 573 "INDEX", 574 } 575 576 PLACEHOLDER_PARSERS = { 577 **parser.Parser.PLACEHOLDER_PARSERS, 578 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 579 } 580 581 def _parse_engine_property(self) -> exp.EngineProperty: 582 self._match(TokenType.EQ) 583 return self.expression( 584 exp.EngineProperty, 585 this=self._parse_field(any_token=True, anonymous_func=True), 586 ) 587 588 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 589 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 590 return self._parse_lambda() 591 592 def _parse_types( 593 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 594 ) -> t.Optional[exp.Expression]: 595 dtype = super()._parse_types( 596 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 597 ) 598 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 599 # Mark every type as non-nullable which is ClickHouse's default, unless it's 600 # already marked as nullable. This marker helps us transpile types from other 601 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 602 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 603 # fail in ClickHouse without the `Nullable` type constructor. 604 dtype.set("nullable", False) 605 606 return dtype 607 608 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 609 index = self._index 610 this = self._parse_bitwise() 611 if self._match(TokenType.FROM): 612 self._retreat(index) 613 return super()._parse_extract() 614 615 # We return Anonymous here because extract and regexpExtract have different semantics, 616 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 617 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 618 # 619 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 620 self._match(TokenType.COMMA) 621 return self.expression( 622 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 623 ) 624 625 def _parse_assignment(self) -> t.Optional[exp.Expression]: 626 this = super()._parse_assignment() 627 628 if self._match(TokenType.PLACEHOLDER): 629 return self.expression( 630 exp.If, 631 this=this, 632 true=self._parse_assignment(), 633 false=self._match(TokenType.COLON) and self._parse_assignment(), 634 ) 635 636 return this 637 638 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 639 """ 640 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 641 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 642 """ 643 index = self._index 644 645 this = self._parse_id_var() 646 self._match(TokenType.COLON) 647 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 648 self._match_text_seq("IDENTIFIER") and "Identifier" 649 ) 650 651 if not kind: 652 self._retreat(index) 653 return None 654 elif not self._match(TokenType.R_BRACE): 655 self.raise_error("Expecting }") 656 657 if isinstance(this, exp.Identifier) and not this.quoted: 658 this = exp.var(this.name) 659 660 return self.expression(exp.Placeholder, this=this, kind=kind) 661 662 def _parse_bracket( 663 self, this: t.Optional[exp.Expression] = None 664 ) -> t.Optional[exp.Expression]: 665 l_brace = self._match(TokenType.L_BRACE, advance=False) 666 bracket = super()._parse_bracket(this) 667 668 if l_brace and isinstance(bracket, exp.Struct): 669 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 670 for expression in bracket.expressions: 671 if not isinstance(expression, exp.PropertyEQ): 672 break 673 674 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 675 varmap.args["values"].append("expressions", expression.expression) 676 677 return varmap 678 679 return bracket 680 681 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 682 this = super()._parse_in(this) 683 this.set("is_global", is_global) 684 return this 685 686 def _parse_global_in(self, this: t.Optional[exp.Expression]) -> exp.Not | exp.In: 687 is_negated = self._match(TokenType.NOT) 688 this = self._match(TokenType.IN) and self._parse_in(this, is_global=True) 689 return self.expression(exp.Not, this=this) if is_negated else this 690 691 def _parse_table( 692 self, 693 schema: bool = False, 694 joins: bool = False, 695 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 696 parse_bracket: bool = False, 697 is_db_reference: bool = False, 698 parse_partition: bool = False, 699 consume_pipe: bool = False, 700 ) -> t.Optional[exp.Expression]: 701 this = super()._parse_table( 702 schema=schema, 703 joins=joins, 704 alias_tokens=alias_tokens, 705 parse_bracket=parse_bracket, 706 is_db_reference=is_db_reference, 707 ) 708 709 if isinstance(this, exp.Table): 710 inner = this.this 711 alias = this.args.get("alias") 712 713 if isinstance(inner, exp.GenerateSeries) and alias and not alias.columns: 714 alias.set("columns", [exp.to_identifier("generate_series")]) 715 716 if self._match(TokenType.FINAL): 717 this = self.expression(exp.Final, this=this) 718 719 return this 720 721 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 722 return super()._parse_position(haystack_first=True) 723 724 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 725 def _parse_cte(self) -> t.Optional[exp.CTE]: 726 # WITH <identifier> AS <subquery expression> 727 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 728 729 if not cte: 730 # WITH <expression> AS <identifier> 731 cte = self.expression( 732 exp.CTE, 733 this=self._parse_assignment(), 734 alias=self._parse_table_alias(), 735 scalar=True, 736 ) 737 738 return cte 739 740 def _parse_join_parts( 741 self, 742 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 743 is_global = self._match(TokenType.GLOBAL) and self._prev 744 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 745 746 if kind_pre: 747 kind = self._match_set(self.JOIN_KINDS) and self._prev 748 side = self._match_set(self.JOIN_SIDES) and self._prev 749 return is_global, side, kind 750 751 return ( 752 is_global, 753 self._match_set(self.JOIN_SIDES) and self._prev, 754 self._match_set(self.JOIN_KINDS) and self._prev, 755 ) 756 757 def _parse_join( 758 self, skip_join_token: bool = False, parse_bracket: bool = False 759 ) -> t.Optional[exp.Join]: 760 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 761 if join: 762 join.set("global", join.args.pop("method", None)) 763 764 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 765 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 766 if join.kind == "ARRAY": 767 for table in join.find_all(exp.Table): 768 table.replace(table.to_column()) 769 770 return join 771 772 def _parse_function( 773 self, 774 functions: t.Optional[t.Dict[str, t.Callable]] = None, 775 anonymous: bool = False, 776 optional_parens: bool = True, 777 any_token: bool = False, 778 ) -> t.Optional[exp.Expression]: 779 expr = super()._parse_function( 780 functions=functions, 781 anonymous=anonymous, 782 optional_parens=optional_parens, 783 any_token=any_token, 784 ) 785 786 func = expr.this if isinstance(expr, exp.Window) else expr 787 788 # Aggregate functions can be split in 2 parts: <func_name><suffix> 789 parts = ( 790 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 791 ) 792 793 if parts: 794 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 795 params = self._parse_func_params(anon_func) 796 797 kwargs = { 798 "this": anon_func.this, 799 "expressions": anon_func.expressions, 800 } 801 if parts[1]: 802 exp_class: t.Type[exp.Expression] = ( 803 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 804 ) 805 else: 806 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 807 808 kwargs["exp_class"] = exp_class 809 if params: 810 kwargs["params"] = params 811 812 func = self.expression(**kwargs) 813 814 if isinstance(expr, exp.Window): 815 # The window's func was parsed as Anonymous in base parser, fix its 816 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 817 expr.set("this", func) 818 elif params: 819 # Params have blocked super()._parse_function() from parsing the following window 820 # (if that exists) as they're standing between the function call and the window spec 821 expr = self._parse_window(func) 822 else: 823 expr = func 824 825 return expr 826 827 def _parse_func_params( 828 self, this: t.Optional[exp.Func] = None 829 ) -> t.Optional[t.List[exp.Expression]]: 830 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 831 return self._parse_csv(self._parse_lambda) 832 833 if self._match(TokenType.L_PAREN): 834 params = self._parse_csv(self._parse_lambda) 835 self._match_r_paren(this) 836 return params 837 838 return None 839 840 def _parse_quantile(self) -> exp.Quantile: 841 this = self._parse_lambda() 842 params = self._parse_func_params() 843 if params: 844 return self.expression(exp.Quantile, this=params[0], quantile=this) 845 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 846 847 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 848 return super()._parse_wrapped_id_vars(optional=True) 849 850 def _parse_primary_key( 851 self, wrapped_optional: bool = False, in_props: bool = False 852 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 853 return super()._parse_primary_key( 854 wrapped_optional=wrapped_optional or in_props, in_props=in_props 855 ) 856 857 def _parse_on_property(self) -> t.Optional[exp.Expression]: 858 index = self._index 859 if self._match_text_seq("CLUSTER"): 860 this = self._parse_string() or self._parse_id_var() 861 if this: 862 return self.expression(exp.OnCluster, this=this) 863 else: 864 self._retreat(index) 865 return None 866 867 def _parse_index_constraint( 868 self, kind: t.Optional[str] = None 869 ) -> exp.IndexColumnConstraint: 870 # INDEX name1 expr TYPE type1(args) GRANULARITY value 871 this = self._parse_id_var() 872 expression = self._parse_assignment() 873 874 index_type = self._match_text_seq("TYPE") and ( 875 self._parse_function() or self._parse_var() 876 ) 877 878 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 879 880 return self.expression( 881 exp.IndexColumnConstraint, 882 this=this, 883 expression=expression, 884 index_type=index_type, 885 granularity=granularity, 886 ) 887 888 def _parse_partition(self) -> t.Optional[exp.Partition]: 889 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 890 if not self._match(TokenType.PARTITION): 891 return None 892 893 if self._match_text_seq("ID"): 894 # Corresponds to the PARTITION ID <string_value> syntax 895 expressions: t.List[exp.Expression] = [ 896 self.expression(exp.PartitionId, this=self._parse_string()) 897 ] 898 else: 899 expressions = self._parse_expressions() 900 901 return self.expression(exp.Partition, expressions=expressions) 902 903 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 904 partition = self._parse_partition() 905 906 if not partition or not self._match(TokenType.FROM): 907 return None 908 909 return self.expression( 910 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 911 ) 912 913 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 914 if not self._match_text_seq("PROJECTION"): 915 return None 916 917 return self.expression( 918 exp.ProjectionDef, 919 this=self._parse_id_var(), 920 expression=self._parse_wrapped(self._parse_statement), 921 ) 922 923 def _parse_constraint(self) -> t.Optional[exp.Expression]: 924 return super()._parse_constraint() or self._parse_projection_def() 925 926 def _parse_alias( 927 self, this: t.Optional[exp.Expression], explicit: bool = False 928 ) -> t.Optional[exp.Expression]: 929 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 930 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 931 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 932 return this 933 934 return super()._parse_alias(this=this, explicit=explicit) 935 936 def _parse_expression(self) -> t.Optional[exp.Expression]: 937 this = super()._parse_expression() 938 939 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 940 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 941 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 942 self._match(TokenType.R_PAREN) 943 944 return this 945 946 def _parse_columns(self) -> exp.Expression: 947 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 948 949 while self._next and self._match_text_seq(")", "APPLY", "("): 950 self._match(TokenType.R_PAREN) 951 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 952 return this 953 954 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 955 value = super()._parse_value(values=values) 956 if not value: 957 return None 958 959 # In Clickhouse "SELECT * FROM VALUES (1, 2, 3)" generates a table with a single column, in contrast 960 # to other dialects. For this case, we canonicalize the values into a tuple-of-tuples AST if it's not already one. 961 # In INSERT INTO statements the same clause actually references multiple columns (opposite semantics), 962 # but the final result is not altered by the extra parentheses. 963 # Note: Clickhouse allows VALUES([structure], value, ...) so the branch checks for the last expression 964 expressions = value.expressions 965 if values and not isinstance(expressions[-1], exp.Tuple): 966 value.set( 967 "expressions", 968 [self.expression(exp.Tuple, expressions=[expr]) for expr in expressions], 969 ) 970 971 return value 972 973 class Generator(generator.Generator): 974 QUERY_HINTS = False 975 STRUCT_DELIMITER = ("(", ")") 976 NVL2_SUPPORTED = False 977 TABLESAMPLE_REQUIRES_PARENS = False 978 TABLESAMPLE_SIZE_IS_ROWS = False 979 TABLESAMPLE_KEYWORDS = "SAMPLE" 980 LAST_DAY_SUPPORTS_DATE_PART = False 981 CAN_IMPLEMENT_ARRAY_ANY = True 982 SUPPORTS_TO_NUMBER = False 983 JOIN_HINTS = False 984 TABLE_HINTS = False 985 GROUPINGS_SEP = "" 986 SET_OP_MODIFIERS = False 987 ARRAY_SIZE_NAME = "LENGTH" 988 WRAP_DERIVED_VALUES = False 989 990 STRING_TYPE_MAPPING = { 991 exp.DataType.Type.BLOB: "String", 992 exp.DataType.Type.CHAR: "String", 993 exp.DataType.Type.LONGBLOB: "String", 994 exp.DataType.Type.LONGTEXT: "String", 995 exp.DataType.Type.MEDIUMBLOB: "String", 996 exp.DataType.Type.MEDIUMTEXT: "String", 997 exp.DataType.Type.TINYBLOB: "String", 998 exp.DataType.Type.TINYTEXT: "String", 999 exp.DataType.Type.TEXT: "String", 1000 exp.DataType.Type.VARBINARY: "String", 1001 exp.DataType.Type.VARCHAR: "String", 1002 } 1003 1004 SUPPORTED_JSON_PATH_PARTS = { 1005 exp.JSONPathKey, 1006 exp.JSONPathRoot, 1007 exp.JSONPathSubscript, 1008 } 1009 1010 TYPE_MAPPING = { 1011 **generator.Generator.TYPE_MAPPING, 1012 **STRING_TYPE_MAPPING, 1013 exp.DataType.Type.ARRAY: "Array", 1014 exp.DataType.Type.BOOLEAN: "Bool", 1015 exp.DataType.Type.BIGINT: "Int64", 1016 exp.DataType.Type.DATE32: "Date32", 1017 exp.DataType.Type.DATETIME: "DateTime", 1018 exp.DataType.Type.DATETIME2: "DateTime", 1019 exp.DataType.Type.SMALLDATETIME: "DateTime", 1020 exp.DataType.Type.DATETIME64: "DateTime64", 1021 exp.DataType.Type.DECIMAL: "Decimal", 1022 exp.DataType.Type.DECIMAL32: "Decimal32", 1023 exp.DataType.Type.DECIMAL64: "Decimal64", 1024 exp.DataType.Type.DECIMAL128: "Decimal128", 1025 exp.DataType.Type.DECIMAL256: "Decimal256", 1026 exp.DataType.Type.TIMESTAMP: "DateTime", 1027 exp.DataType.Type.TIMESTAMPNTZ: "DateTime", 1028 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 1029 exp.DataType.Type.DOUBLE: "Float64", 1030 exp.DataType.Type.ENUM: "Enum", 1031 exp.DataType.Type.ENUM8: "Enum8", 1032 exp.DataType.Type.ENUM16: "Enum16", 1033 exp.DataType.Type.FIXEDSTRING: "FixedString", 1034 exp.DataType.Type.FLOAT: "Float32", 1035 exp.DataType.Type.INT: "Int32", 1036 exp.DataType.Type.MEDIUMINT: "Int32", 1037 exp.DataType.Type.INT128: "Int128", 1038 exp.DataType.Type.INT256: "Int256", 1039 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 1040 exp.DataType.Type.MAP: "Map", 1041 exp.DataType.Type.NESTED: "Nested", 1042 exp.DataType.Type.NOTHING: "Nothing", 1043 exp.DataType.Type.SMALLINT: "Int16", 1044 exp.DataType.Type.STRUCT: "Tuple", 1045 exp.DataType.Type.TINYINT: "Int8", 1046 exp.DataType.Type.UBIGINT: "UInt64", 1047 exp.DataType.Type.UINT: "UInt32", 1048 exp.DataType.Type.UINT128: "UInt128", 1049 exp.DataType.Type.UINT256: "UInt256", 1050 exp.DataType.Type.USMALLINT: "UInt16", 1051 exp.DataType.Type.UTINYINT: "UInt8", 1052 exp.DataType.Type.IPV4: "IPv4", 1053 exp.DataType.Type.IPV6: "IPv6", 1054 exp.DataType.Type.POINT: "Point", 1055 exp.DataType.Type.RING: "Ring", 1056 exp.DataType.Type.LINESTRING: "LineString", 1057 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 1058 exp.DataType.Type.POLYGON: "Polygon", 1059 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 1060 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 1061 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 1062 exp.DataType.Type.DYNAMIC: "Dynamic", 1063 } 1064 1065 TRANSFORMS = { 1066 **generator.Generator.TRANSFORMS, 1067 exp.AnyValue: rename_func("any"), 1068 exp.ApproxDistinct: rename_func("uniq"), 1069 exp.ArrayConcat: rename_func("arrayConcat"), 1070 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 1071 exp.ArrayRemove: remove_from_array_using_filter, 1072 exp.ArrayReverse: rename_func("arrayReverse"), 1073 exp.ArraySlice: rename_func("arraySlice"), 1074 exp.ArraySum: rename_func("arraySum"), 1075 exp.ArgMax: arg_max_or_min_no_count("argMax"), 1076 exp.ArgMin: arg_max_or_min_no_count("argMin"), 1077 exp.Array: inline_array_sql, 1078 exp.CastToStrType: rename_func("CAST"), 1079 exp.CountIf: rename_func("countIf"), 1080 exp.CompressColumnConstraint: lambda self, 1081 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 1082 exp.ComputedColumnConstraint: lambda self, 1083 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 1084 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 1085 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 1086 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 1087 exp.DateStrToDate: rename_func("toDate"), 1088 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 1089 exp.Explode: rename_func("arrayJoin"), 1090 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 1091 exp.IsNan: rename_func("isNaN"), 1092 exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}", 1093 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 1094 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 1095 exp.JSONPathKey: json_path_key_only_name, 1096 exp.JSONPathRoot: lambda *_: "", 1097 exp.Length: length_or_char_length_sql, 1098 exp.Map: _map_sql, 1099 exp.Median: rename_func("median"), 1100 exp.Nullif: rename_func("nullIf"), 1101 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1102 exp.Pivot: no_pivot_sql, 1103 exp.Quantile: _quantile_sql, 1104 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 1105 exp.Rand: rename_func("randCanonical"), 1106 exp.StartsWith: rename_func("startsWith"), 1107 exp.EndsWith: rename_func("endsWith"), 1108 exp.StrPosition: lambda self, e: strposition_sql( 1109 self, 1110 e, 1111 func_name="POSITION", 1112 supports_position=True, 1113 use_ansi_position=False, 1114 ), 1115 exp.TimeToStr: lambda self, e: self.func( 1116 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 1117 ), 1118 exp.TimeStrToTime: _timestrtotime_sql, 1119 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1120 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1121 exp.Typeof: rename_func("toTypeName"), 1122 exp.VarMap: _map_sql, 1123 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1124 exp.MD5Digest: rename_func("MD5"), 1125 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1126 exp.SHA: rename_func("SHA1"), 1127 exp.SHA2: sha256_sql, 1128 exp.UnixToTime: _unix_to_time_sql, 1129 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 1130 exp.Trim: lambda self, e: trim_sql(self, e, default_trim_type="BOTH"), 1131 exp.Variance: rename_func("varSamp"), 1132 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1133 exp.Stddev: rename_func("stddevSamp"), 1134 exp.Chr: rename_func("CHAR"), 1135 exp.Lag: lambda self, e: self.func( 1136 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1137 ), 1138 exp.Lead: lambda self, e: self.func( 1139 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1140 ), 1141 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1142 rename_func("editDistance") 1143 ), 1144 } 1145 1146 PROPERTIES_LOCATION = { 1147 **generator.Generator.PROPERTIES_LOCATION, 1148 exp.OnCluster: exp.Properties.Location.POST_NAME, 1149 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1150 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1151 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1152 } 1153 1154 # There's no list in docs, but it can be found in Clickhouse code 1155 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1156 ON_CLUSTER_TARGETS = { 1157 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1158 "DATABASE", 1159 "TABLE", 1160 "VIEW", 1161 "DICTIONARY", 1162 "INDEX", 1163 "FUNCTION", 1164 "NAMED COLLECTION", 1165 } 1166 1167 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1168 NON_NULLABLE_TYPES = { 1169 exp.DataType.Type.ARRAY, 1170 exp.DataType.Type.MAP, 1171 exp.DataType.Type.STRUCT, 1172 exp.DataType.Type.POINT, 1173 exp.DataType.Type.RING, 1174 exp.DataType.Type.LINESTRING, 1175 exp.DataType.Type.MULTILINESTRING, 1176 exp.DataType.Type.POLYGON, 1177 exp.DataType.Type.MULTIPOLYGON, 1178 } 1179 1180 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1181 strtodate_sql = self.function_fallback_sql(expression) 1182 1183 if not isinstance(expression.parent, exp.Cast): 1184 # StrToDate returns DATEs in other dialects (eg. postgres), so 1185 # this branch aims to improve the transpilation to clickhouse 1186 return self.cast_sql(exp.cast(expression, "DATE")) 1187 1188 return strtodate_sql 1189 1190 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1191 this = expression.this 1192 1193 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1194 return self.sql(this) 1195 1196 return super().cast_sql(expression, safe_prefix=safe_prefix) 1197 1198 def trycast_sql(self, expression: exp.TryCast) -> str: 1199 dtype = expression.to 1200 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1201 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1202 dtype.set("nullable", True) 1203 1204 return super().cast_sql(expression) 1205 1206 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1207 this = self.json_path_part(expression.this) 1208 return str(int(this) + 1) if is_int(this) else this 1209 1210 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1211 return f"AS {self.sql(expression, 'this')}" 1212 1213 def _any_to_has( 1214 self, 1215 expression: exp.EQ | exp.NEQ, 1216 default: t.Callable[[t.Any], str], 1217 prefix: str = "", 1218 ) -> str: 1219 if isinstance(expression.left, exp.Any): 1220 arr = expression.left 1221 this = expression.right 1222 elif isinstance(expression.right, exp.Any): 1223 arr = expression.right 1224 this = expression.left 1225 else: 1226 return default(expression) 1227 1228 return prefix + self.func("has", arr.this.unnest(), this) 1229 1230 def eq_sql(self, expression: exp.EQ) -> str: 1231 return self._any_to_has(expression, super().eq_sql) 1232 1233 def neq_sql(self, expression: exp.NEQ) -> str: 1234 return self._any_to_has(expression, super().neq_sql, "NOT ") 1235 1236 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1237 # Manually add a flag to make the search case-insensitive 1238 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1239 return self.func("match", expression.this, regex) 1240 1241 def datatype_sql(self, expression: exp.DataType) -> str: 1242 # String is the standard ClickHouse type, every other variant is just an alias. 1243 # Additionally, any supplied length parameter will be ignored. 1244 # 1245 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1246 if expression.this in self.STRING_TYPE_MAPPING: 1247 dtype = "String" 1248 else: 1249 dtype = super().datatype_sql(expression) 1250 1251 # This section changes the type to `Nullable(...)` if the following conditions hold: 1252 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1253 # and change their semantics 1254 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1255 # constraint: "Type of Map key must be a type, that can be represented by integer or 1256 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1257 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1258 parent = expression.parent 1259 nullable = expression.args.get("nullable") 1260 if nullable is True or ( 1261 nullable is None 1262 and not ( 1263 isinstance(parent, exp.DataType) 1264 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1265 and expression.index in (None, 0) 1266 ) 1267 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1268 ): 1269 dtype = f"Nullable({dtype})" 1270 1271 return dtype 1272 1273 def cte_sql(self, expression: exp.CTE) -> str: 1274 if expression.args.get("scalar"): 1275 this = self.sql(expression, "this") 1276 alias = self.sql(expression, "alias") 1277 return f"{this} AS {alias}" 1278 1279 return super().cte_sql(expression) 1280 1281 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1282 return super().after_limit_modifiers(expression) + [ 1283 ( 1284 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1285 if expression.args.get("settings") 1286 else "" 1287 ), 1288 ( 1289 self.seg("FORMAT ") + self.sql(expression, "format") 1290 if expression.args.get("format") 1291 else "" 1292 ), 1293 ] 1294 1295 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1296 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1297 1298 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1299 return f"ON CLUSTER {self.sql(expression, 'this')}" 1300 1301 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1302 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1303 exp.Properties.Location.POST_NAME 1304 ): 1305 this_name = self.sql( 1306 expression.this if isinstance(expression.this, exp.Schema) else expression, 1307 "this", 1308 ) 1309 this_properties = " ".join( 1310 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1311 ) 1312 this_schema = self.schema_columns_sql(expression.this) 1313 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1314 1315 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1316 1317 return super().createable_sql(expression, locations) 1318 1319 def create_sql(self, expression: exp.Create) -> str: 1320 # The comment property comes last in CTAS statements, i.e. after the query 1321 query = expression.expression 1322 if isinstance(query, exp.Query): 1323 comment_prop = expression.find(exp.SchemaCommentProperty) 1324 if comment_prop: 1325 comment_prop.pop() 1326 query.replace(exp.paren(query)) 1327 else: 1328 comment_prop = None 1329 1330 create_sql = super().create_sql(expression) 1331 1332 comment_sql = self.sql(comment_prop) 1333 comment_sql = f" {comment_sql}" if comment_sql else "" 1334 1335 return f"{create_sql}{comment_sql}" 1336 1337 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1338 this = self.indent(self.sql(expression, "this")) 1339 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1340 1341 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1342 this = self.sql(expression, "this") 1343 this = f" {this}" if this else "" 1344 expr = self.sql(expression, "expression") 1345 expr = f" {expr}" if expr else "" 1346 index_type = self.sql(expression, "index_type") 1347 index_type = f" TYPE {index_type}" if index_type else "" 1348 granularity = self.sql(expression, "granularity") 1349 granularity = f" GRANULARITY {granularity}" if granularity else "" 1350 1351 return f"INDEX{this}{expr}{index_type}{granularity}" 1352 1353 def partition_sql(self, expression: exp.Partition) -> str: 1354 return f"PARTITION {self.expressions(expression, flat=True)}" 1355 1356 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1357 return f"ID {self.sql(expression.this)}" 1358 1359 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1360 return ( 1361 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1362 ) 1363 1364 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1365 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1366 1367 def is_sql(self, expression: exp.Is) -> str: 1368 is_sql = super().is_sql(expression) 1369 1370 if isinstance(expression.parent, exp.Not): 1371 # value IS NOT NULL -> NOT (value IS NULL) 1372 is_sql = self.wrap(is_sql) 1373 1374 return is_sql 1375 1376 def in_sql(self, expression: exp.In) -> str: 1377 in_sql = super().in_sql(expression) 1378 1379 if isinstance(expression.parent, exp.Not) and expression.args.get("is_global"): 1380 in_sql = in_sql.replace("GLOBAL IN", "GLOBAL NOT IN", 1) 1381 1382 return in_sql 1383 1384 def not_sql(self, expression: exp.Not) -> str: 1385 if isinstance(expression.this, exp.In) and expression.this.args.get("is_global"): 1386 # let `GLOBAL IN` child interpose `NOT` 1387 return self.sql(expression, "this") 1388 1389 return super().not_sql(expression) 1390 1391 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1392 # If the VALUES clause contains tuples of expressions, we need to treat it 1393 # as a table since Clickhouse will automatically alias it as such. 1394 alias = expression.args.get("alias") 1395 1396 if alias and alias.args.get("columns") and expression.expressions: 1397 values = expression.expressions[0].expressions 1398 values_as_table = any(isinstance(value, exp.Tuple) for value in values) 1399 else: 1400 values_as_table = True 1401 1402 return super().values_sql(expression, values_as_table=values_as_table)
186class ClickHouse(Dialect): 187 NORMALIZE_FUNCTIONS: bool | str = False 188 NULL_ORDERING = "nulls_are_last" 189 SUPPORTS_USER_DEFINED_TYPES = False 190 SAFE_DIVISION = True 191 LOG_BASE_FIRST: t.Optional[bool] = None 192 FORCE_EARLY_ALIAS_REF_EXPANSION = True 193 PRESERVE_ORIGINAL_NAMES = True 194 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 195 IDENTIFIERS_CAN_START_WITH_DIGIT = True 196 HEX_STRING_IS_INTEGER_TYPE = True 197 198 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 199 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 200 201 UNESCAPED_SEQUENCES = { 202 "\\0": "\0", 203 } 204 205 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 206 207 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 208 exp.Except: False, 209 exp.Intersect: False, 210 exp.Union: None, 211 } 212 213 def generate_values_aliases(self, expression: exp.Values) -> t.List[exp.Identifier]: 214 # Clickhouse allows VALUES to have an embedded structure e.g: 215 # VALUES('person String, place String', ('Noah', 'Paris'), ...) 216 # In this case, we don't want to qualify the columns 217 values = expression.expressions[0].expressions 218 219 structure = ( 220 values[0] 221 if (len(values) > 1 and values[0].is_string and isinstance(values[1], exp.Tuple)) 222 else None 223 ) 224 if structure: 225 # Split each column definition into the column name e.g: 226 # 'person String, place String' -> ['person', 'place'] 227 structure_coldefs = [coldef.strip() for coldef in structure.name.split(",")] 228 column_aliases = [ 229 exp.to_identifier(coldef.split(" ")[0]) for coldef in structure_coldefs 230 ] 231 else: 232 # Default column aliases in CH are "c1", "c2", etc. 233 column_aliases = [ 234 exp.to_identifier(f"c{i + 1}") for i in range(len(values[0].expressions)) 235 ] 236 237 return column_aliases 238 239 class Tokenizer(tokens.Tokenizer): 240 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 241 IDENTIFIERS = ['"', "`"] 242 IDENTIFIER_ESCAPES = ["\\"] 243 STRING_ESCAPES = ["'", "\\"] 244 BIT_STRINGS = [("0b", "")] 245 HEX_STRINGS = [("0x", ""), ("0X", "")] 246 HEREDOC_STRINGS = ["$"] 247 248 KEYWORDS = { 249 **tokens.Tokenizer.KEYWORDS, 250 ".:": TokenType.DOTCOLON, 251 "ATTACH": TokenType.COMMAND, 252 "DATE32": TokenType.DATE32, 253 "DATETIME64": TokenType.DATETIME64, 254 "DICTIONARY": TokenType.DICTIONARY, 255 "DYNAMIC": TokenType.DYNAMIC, 256 "ENUM8": TokenType.ENUM8, 257 "ENUM16": TokenType.ENUM16, 258 "EXCHANGE": TokenType.COMMAND, 259 "FINAL": TokenType.FINAL, 260 "FIXEDSTRING": TokenType.FIXEDSTRING, 261 "FLOAT32": TokenType.FLOAT, 262 "FLOAT64": TokenType.DOUBLE, 263 "GLOBAL": TokenType.GLOBAL, 264 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 265 "MAP": TokenType.MAP, 266 "NESTED": TokenType.NESTED, 267 "NOTHING": TokenType.NOTHING, 268 "SAMPLE": TokenType.TABLE_SAMPLE, 269 "TUPLE": TokenType.STRUCT, 270 "UINT16": TokenType.USMALLINT, 271 "UINT32": TokenType.UINT, 272 "UINT64": TokenType.UBIGINT, 273 "UINT8": TokenType.UTINYINT, 274 "IPV4": TokenType.IPV4, 275 "IPV6": TokenType.IPV6, 276 "POINT": TokenType.POINT, 277 "RING": TokenType.RING, 278 "LINESTRING": TokenType.LINESTRING, 279 "MULTILINESTRING": TokenType.MULTILINESTRING, 280 "POLYGON": TokenType.POLYGON, 281 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 282 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 283 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 284 "SYSTEM": TokenType.COMMAND, 285 "PREWHERE": TokenType.PREWHERE, 286 } 287 KEYWORDS.pop("/*+") 288 289 SINGLE_TOKENS = { 290 **tokens.Tokenizer.SINGLE_TOKENS, 291 "$": TokenType.HEREDOC_STRING, 292 } 293 294 class Parser(parser.Parser): 295 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 296 # * select x from t1 union all select x from t2 limit 1; 297 # * select x from t1 union all (select x from t2 limit 1); 298 MODIFIERS_ATTACHED_TO_SET_OP = False 299 INTERVAL_SPANS = False 300 OPTIONAL_ALIAS_TOKEN_CTE = False 301 JOINS_HAVE_EQUAL_PRECEDENCE = True 302 303 FUNCTIONS = { 304 **parser.Parser.FUNCTIONS, 305 "ANY": exp.AnyValue.from_arg_list, 306 "ARRAYSUM": exp.ArraySum.from_arg_list, 307 "ARRAYREVERSE": exp.ArrayReverse.from_arg_list, 308 "ARRAYSLICE": exp.ArraySlice.from_arg_list, 309 "COUNTIF": _build_count_if, 310 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 311 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 312 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 313 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 314 "DATE_FORMAT": _build_date_format, 315 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 316 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 317 "FORMATDATETIME": _build_date_format, 318 "JSONEXTRACTSTRING": build_json_extract_path( 319 exp.JSONExtractScalar, zero_based_indexing=False 320 ), 321 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 322 "MAP": parser.build_var_map, 323 "MATCH": exp.RegexpLike.from_arg_list, 324 "RANDCANONICAL": exp.Rand.from_arg_list, 325 "STR_TO_DATE": _build_str_to_date, 326 "TUPLE": exp.Struct.from_arg_list, 327 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 328 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 329 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 330 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 331 "UNIQ": exp.ApproxDistinct.from_arg_list, 332 "XOR": lambda args: exp.Xor(expressions=args), 333 "MD5": exp.MD5Digest.from_arg_list, 334 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 335 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 336 "SUBSTRINGINDEX": exp.SubstringIndex.from_arg_list, 337 "TOTYPENAME": exp.Typeof.from_arg_list, 338 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 339 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 340 } 341 FUNCTIONS.pop("TRANSFORM") 342 343 AGG_FUNCTIONS = { 344 "count", 345 "min", 346 "max", 347 "sum", 348 "avg", 349 "any", 350 "stddevPop", 351 "stddevSamp", 352 "varPop", 353 "varSamp", 354 "corr", 355 "covarPop", 356 "covarSamp", 357 "entropy", 358 "exponentialMovingAverage", 359 "intervalLengthSum", 360 "kolmogorovSmirnovTest", 361 "mannWhitneyUTest", 362 "median", 363 "rankCorr", 364 "sumKahan", 365 "studentTTest", 366 "welchTTest", 367 "anyHeavy", 368 "anyLast", 369 "boundingRatio", 370 "first_value", 371 "last_value", 372 "argMin", 373 "argMax", 374 "avgWeighted", 375 "topK", 376 "topKWeighted", 377 "deltaSum", 378 "deltaSumTimestamp", 379 "groupArray", 380 "groupArrayLast", 381 "groupUniqArray", 382 "groupArrayInsertAt", 383 "groupArrayMovingAvg", 384 "groupArrayMovingSum", 385 "groupArraySample", 386 "groupBitAnd", 387 "groupBitOr", 388 "groupBitXor", 389 "groupBitmap", 390 "groupBitmapAnd", 391 "groupBitmapOr", 392 "groupBitmapXor", 393 "sumWithOverflow", 394 "sumMap", 395 "minMap", 396 "maxMap", 397 "skewSamp", 398 "skewPop", 399 "kurtSamp", 400 "kurtPop", 401 "uniq", 402 "uniqExact", 403 "uniqCombined", 404 "uniqCombined64", 405 "uniqHLL12", 406 "uniqTheta", 407 "quantile", 408 "quantiles", 409 "quantileExact", 410 "quantilesExact", 411 "quantileExactLow", 412 "quantilesExactLow", 413 "quantileExactHigh", 414 "quantilesExactHigh", 415 "quantileExactWeighted", 416 "quantilesExactWeighted", 417 "quantileTiming", 418 "quantilesTiming", 419 "quantileTimingWeighted", 420 "quantilesTimingWeighted", 421 "quantileDeterministic", 422 "quantilesDeterministic", 423 "quantileTDigest", 424 "quantilesTDigest", 425 "quantileTDigestWeighted", 426 "quantilesTDigestWeighted", 427 "quantileBFloat16", 428 "quantilesBFloat16", 429 "quantileBFloat16Weighted", 430 "quantilesBFloat16Weighted", 431 "simpleLinearRegression", 432 "stochasticLinearRegression", 433 "stochasticLogisticRegression", 434 "categoricalInformationValue", 435 "contingency", 436 "cramersV", 437 "cramersVBiasCorrected", 438 "theilsU", 439 "maxIntersections", 440 "maxIntersectionsPosition", 441 "meanZTest", 442 "quantileInterpolatedWeighted", 443 "quantilesInterpolatedWeighted", 444 "quantileGK", 445 "quantilesGK", 446 "sparkBar", 447 "sumCount", 448 "largestTriangleThreeBuckets", 449 "histogram", 450 "sequenceMatch", 451 "sequenceCount", 452 "windowFunnel", 453 "retention", 454 "uniqUpTo", 455 "sequenceNextNode", 456 "exponentialTimeDecayedAvg", 457 } 458 459 AGG_FUNCTIONS_SUFFIXES = [ 460 "If", 461 "Array", 462 "ArrayIf", 463 "Map", 464 "SimpleState", 465 "State", 466 "Merge", 467 "MergeState", 468 "ForEach", 469 "Distinct", 470 "OrDefault", 471 "OrNull", 472 "Resample", 473 "ArgMin", 474 "ArgMax", 475 ] 476 477 FUNC_TOKENS = { 478 *parser.Parser.FUNC_TOKENS, 479 TokenType.AND, 480 TokenType.OR, 481 TokenType.SET, 482 } 483 484 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 485 486 ID_VAR_TOKENS = { 487 *parser.Parser.ID_VAR_TOKENS, 488 TokenType.LIKE, 489 } 490 491 AGG_FUNC_MAPPING = ( 492 lambda functions, suffixes: { 493 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 494 } 495 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 496 497 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 498 499 FUNCTION_PARSERS = { 500 **parser.Parser.FUNCTION_PARSERS, 501 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 502 "QUANTILE": lambda self: self._parse_quantile(), 503 "MEDIAN": lambda self: self._parse_quantile(), 504 "COLUMNS": lambda self: self._parse_columns(), 505 } 506 507 FUNCTION_PARSERS.pop("MATCH") 508 509 PROPERTY_PARSERS = { 510 **parser.Parser.PROPERTY_PARSERS, 511 "ENGINE": lambda self: self._parse_engine_property(), 512 } 513 PROPERTY_PARSERS.pop("DYNAMIC") 514 515 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 516 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 517 518 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 519 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 520 521 RANGE_PARSERS = { 522 **parser.Parser.RANGE_PARSERS, 523 TokenType.GLOBAL: lambda self, this: self._parse_global_in(this), 524 } 525 526 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 527 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 528 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 529 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 530 531 JOIN_KINDS = { 532 *parser.Parser.JOIN_KINDS, 533 TokenType.ANY, 534 TokenType.ASOF, 535 TokenType.ARRAY, 536 } 537 538 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 539 TokenType.ANY, 540 TokenType.ARRAY, 541 TokenType.FINAL, 542 TokenType.FORMAT, 543 TokenType.SETTINGS, 544 } 545 546 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 547 TokenType.FORMAT, 548 } 549 550 LOG_DEFAULTS_TO_LN = True 551 552 QUERY_MODIFIER_PARSERS = { 553 **parser.Parser.QUERY_MODIFIER_PARSERS, 554 TokenType.SETTINGS: lambda self: ( 555 "settings", 556 self._advance() or self._parse_csv(self._parse_assignment), 557 ), 558 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 559 } 560 561 CONSTRAINT_PARSERS = { 562 **parser.Parser.CONSTRAINT_PARSERS, 563 "INDEX": lambda self: self._parse_index_constraint(), 564 "CODEC": lambda self: self._parse_compress(), 565 } 566 567 ALTER_PARSERS = { 568 **parser.Parser.ALTER_PARSERS, 569 "REPLACE": lambda self: self._parse_alter_table_replace(), 570 } 571 572 SCHEMA_UNNAMED_CONSTRAINTS = { 573 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 574 "INDEX", 575 } 576 577 PLACEHOLDER_PARSERS = { 578 **parser.Parser.PLACEHOLDER_PARSERS, 579 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 580 } 581 582 def _parse_engine_property(self) -> exp.EngineProperty: 583 self._match(TokenType.EQ) 584 return self.expression( 585 exp.EngineProperty, 586 this=self._parse_field(any_token=True, anonymous_func=True), 587 ) 588 589 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 590 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 591 return self._parse_lambda() 592 593 def _parse_types( 594 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 595 ) -> t.Optional[exp.Expression]: 596 dtype = super()._parse_types( 597 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 598 ) 599 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 600 # Mark every type as non-nullable which is ClickHouse's default, unless it's 601 # already marked as nullable. This marker helps us transpile types from other 602 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 603 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 604 # fail in ClickHouse without the `Nullable` type constructor. 605 dtype.set("nullable", False) 606 607 return dtype 608 609 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 610 index = self._index 611 this = self._parse_bitwise() 612 if self._match(TokenType.FROM): 613 self._retreat(index) 614 return super()._parse_extract() 615 616 # We return Anonymous here because extract and regexpExtract have different semantics, 617 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 618 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 619 # 620 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 621 self._match(TokenType.COMMA) 622 return self.expression( 623 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 624 ) 625 626 def _parse_assignment(self) -> t.Optional[exp.Expression]: 627 this = super()._parse_assignment() 628 629 if self._match(TokenType.PLACEHOLDER): 630 return self.expression( 631 exp.If, 632 this=this, 633 true=self._parse_assignment(), 634 false=self._match(TokenType.COLON) and self._parse_assignment(), 635 ) 636 637 return this 638 639 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 640 """ 641 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 642 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 643 """ 644 index = self._index 645 646 this = self._parse_id_var() 647 self._match(TokenType.COLON) 648 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 649 self._match_text_seq("IDENTIFIER") and "Identifier" 650 ) 651 652 if not kind: 653 self._retreat(index) 654 return None 655 elif not self._match(TokenType.R_BRACE): 656 self.raise_error("Expecting }") 657 658 if isinstance(this, exp.Identifier) and not this.quoted: 659 this = exp.var(this.name) 660 661 return self.expression(exp.Placeholder, this=this, kind=kind) 662 663 def _parse_bracket( 664 self, this: t.Optional[exp.Expression] = None 665 ) -> t.Optional[exp.Expression]: 666 l_brace = self._match(TokenType.L_BRACE, advance=False) 667 bracket = super()._parse_bracket(this) 668 669 if l_brace and isinstance(bracket, exp.Struct): 670 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 671 for expression in bracket.expressions: 672 if not isinstance(expression, exp.PropertyEQ): 673 break 674 675 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 676 varmap.args["values"].append("expressions", expression.expression) 677 678 return varmap 679 680 return bracket 681 682 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 683 this = super()._parse_in(this) 684 this.set("is_global", is_global) 685 return this 686 687 def _parse_global_in(self, this: t.Optional[exp.Expression]) -> exp.Not | exp.In: 688 is_negated = self._match(TokenType.NOT) 689 this = self._match(TokenType.IN) and self._parse_in(this, is_global=True) 690 return self.expression(exp.Not, this=this) if is_negated else this 691 692 def _parse_table( 693 self, 694 schema: bool = False, 695 joins: bool = False, 696 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 697 parse_bracket: bool = False, 698 is_db_reference: bool = False, 699 parse_partition: bool = False, 700 consume_pipe: bool = False, 701 ) -> t.Optional[exp.Expression]: 702 this = super()._parse_table( 703 schema=schema, 704 joins=joins, 705 alias_tokens=alias_tokens, 706 parse_bracket=parse_bracket, 707 is_db_reference=is_db_reference, 708 ) 709 710 if isinstance(this, exp.Table): 711 inner = this.this 712 alias = this.args.get("alias") 713 714 if isinstance(inner, exp.GenerateSeries) and alias and not alias.columns: 715 alias.set("columns", [exp.to_identifier("generate_series")]) 716 717 if self._match(TokenType.FINAL): 718 this = self.expression(exp.Final, this=this) 719 720 return this 721 722 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 723 return super()._parse_position(haystack_first=True) 724 725 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 726 def _parse_cte(self) -> t.Optional[exp.CTE]: 727 # WITH <identifier> AS <subquery expression> 728 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 729 730 if not cte: 731 # WITH <expression> AS <identifier> 732 cte = self.expression( 733 exp.CTE, 734 this=self._parse_assignment(), 735 alias=self._parse_table_alias(), 736 scalar=True, 737 ) 738 739 return cte 740 741 def _parse_join_parts( 742 self, 743 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 744 is_global = self._match(TokenType.GLOBAL) and self._prev 745 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 746 747 if kind_pre: 748 kind = self._match_set(self.JOIN_KINDS) and self._prev 749 side = self._match_set(self.JOIN_SIDES) and self._prev 750 return is_global, side, kind 751 752 return ( 753 is_global, 754 self._match_set(self.JOIN_SIDES) and self._prev, 755 self._match_set(self.JOIN_KINDS) and self._prev, 756 ) 757 758 def _parse_join( 759 self, skip_join_token: bool = False, parse_bracket: bool = False 760 ) -> t.Optional[exp.Join]: 761 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 762 if join: 763 join.set("global", join.args.pop("method", None)) 764 765 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 766 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 767 if join.kind == "ARRAY": 768 for table in join.find_all(exp.Table): 769 table.replace(table.to_column()) 770 771 return join 772 773 def _parse_function( 774 self, 775 functions: t.Optional[t.Dict[str, t.Callable]] = None, 776 anonymous: bool = False, 777 optional_parens: bool = True, 778 any_token: bool = False, 779 ) -> t.Optional[exp.Expression]: 780 expr = super()._parse_function( 781 functions=functions, 782 anonymous=anonymous, 783 optional_parens=optional_parens, 784 any_token=any_token, 785 ) 786 787 func = expr.this if isinstance(expr, exp.Window) else expr 788 789 # Aggregate functions can be split in 2 parts: <func_name><suffix> 790 parts = ( 791 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 792 ) 793 794 if parts: 795 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 796 params = self._parse_func_params(anon_func) 797 798 kwargs = { 799 "this": anon_func.this, 800 "expressions": anon_func.expressions, 801 } 802 if parts[1]: 803 exp_class: t.Type[exp.Expression] = ( 804 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 805 ) 806 else: 807 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 808 809 kwargs["exp_class"] = exp_class 810 if params: 811 kwargs["params"] = params 812 813 func = self.expression(**kwargs) 814 815 if isinstance(expr, exp.Window): 816 # The window's func was parsed as Anonymous in base parser, fix its 817 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 818 expr.set("this", func) 819 elif params: 820 # Params have blocked super()._parse_function() from parsing the following window 821 # (if that exists) as they're standing between the function call and the window spec 822 expr = self._parse_window(func) 823 else: 824 expr = func 825 826 return expr 827 828 def _parse_func_params( 829 self, this: t.Optional[exp.Func] = None 830 ) -> t.Optional[t.List[exp.Expression]]: 831 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 832 return self._parse_csv(self._parse_lambda) 833 834 if self._match(TokenType.L_PAREN): 835 params = self._parse_csv(self._parse_lambda) 836 self._match_r_paren(this) 837 return params 838 839 return None 840 841 def _parse_quantile(self) -> exp.Quantile: 842 this = self._parse_lambda() 843 params = self._parse_func_params() 844 if params: 845 return self.expression(exp.Quantile, this=params[0], quantile=this) 846 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 847 848 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 849 return super()._parse_wrapped_id_vars(optional=True) 850 851 def _parse_primary_key( 852 self, wrapped_optional: bool = False, in_props: bool = False 853 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 854 return super()._parse_primary_key( 855 wrapped_optional=wrapped_optional or in_props, in_props=in_props 856 ) 857 858 def _parse_on_property(self) -> t.Optional[exp.Expression]: 859 index = self._index 860 if self._match_text_seq("CLUSTER"): 861 this = self._parse_string() or self._parse_id_var() 862 if this: 863 return self.expression(exp.OnCluster, this=this) 864 else: 865 self._retreat(index) 866 return None 867 868 def _parse_index_constraint( 869 self, kind: t.Optional[str] = None 870 ) -> exp.IndexColumnConstraint: 871 # INDEX name1 expr TYPE type1(args) GRANULARITY value 872 this = self._parse_id_var() 873 expression = self._parse_assignment() 874 875 index_type = self._match_text_seq("TYPE") and ( 876 self._parse_function() or self._parse_var() 877 ) 878 879 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 880 881 return self.expression( 882 exp.IndexColumnConstraint, 883 this=this, 884 expression=expression, 885 index_type=index_type, 886 granularity=granularity, 887 ) 888 889 def _parse_partition(self) -> t.Optional[exp.Partition]: 890 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 891 if not self._match(TokenType.PARTITION): 892 return None 893 894 if self._match_text_seq("ID"): 895 # Corresponds to the PARTITION ID <string_value> syntax 896 expressions: t.List[exp.Expression] = [ 897 self.expression(exp.PartitionId, this=self._parse_string()) 898 ] 899 else: 900 expressions = self._parse_expressions() 901 902 return self.expression(exp.Partition, expressions=expressions) 903 904 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 905 partition = self._parse_partition() 906 907 if not partition or not self._match(TokenType.FROM): 908 return None 909 910 return self.expression( 911 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 912 ) 913 914 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 915 if not self._match_text_seq("PROJECTION"): 916 return None 917 918 return self.expression( 919 exp.ProjectionDef, 920 this=self._parse_id_var(), 921 expression=self._parse_wrapped(self._parse_statement), 922 ) 923 924 def _parse_constraint(self) -> t.Optional[exp.Expression]: 925 return super()._parse_constraint() or self._parse_projection_def() 926 927 def _parse_alias( 928 self, this: t.Optional[exp.Expression], explicit: bool = False 929 ) -> t.Optional[exp.Expression]: 930 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 931 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 932 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 933 return this 934 935 return super()._parse_alias(this=this, explicit=explicit) 936 937 def _parse_expression(self) -> t.Optional[exp.Expression]: 938 this = super()._parse_expression() 939 940 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 941 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 942 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 943 self._match(TokenType.R_PAREN) 944 945 return this 946 947 def _parse_columns(self) -> exp.Expression: 948 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 949 950 while self._next and self._match_text_seq(")", "APPLY", "("): 951 self._match(TokenType.R_PAREN) 952 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 953 return this 954 955 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 956 value = super()._parse_value(values=values) 957 if not value: 958 return None 959 960 # In Clickhouse "SELECT * FROM VALUES (1, 2, 3)" generates a table with a single column, in contrast 961 # to other dialects. For this case, we canonicalize the values into a tuple-of-tuples AST if it's not already one. 962 # In INSERT INTO statements the same clause actually references multiple columns (opposite semantics), 963 # but the final result is not altered by the extra parentheses. 964 # Note: Clickhouse allows VALUES([structure], value, ...) so the branch checks for the last expression 965 expressions = value.expressions 966 if values and not isinstance(expressions[-1], exp.Tuple): 967 value.set( 968 "expressions", 969 [self.expression(exp.Tuple, expressions=[expr]) for expr in expressions], 970 ) 971 972 return value 973 974 class Generator(generator.Generator): 975 QUERY_HINTS = False 976 STRUCT_DELIMITER = ("(", ")") 977 NVL2_SUPPORTED = False 978 TABLESAMPLE_REQUIRES_PARENS = False 979 TABLESAMPLE_SIZE_IS_ROWS = False 980 TABLESAMPLE_KEYWORDS = "SAMPLE" 981 LAST_DAY_SUPPORTS_DATE_PART = False 982 CAN_IMPLEMENT_ARRAY_ANY = True 983 SUPPORTS_TO_NUMBER = False 984 JOIN_HINTS = False 985 TABLE_HINTS = False 986 GROUPINGS_SEP = "" 987 SET_OP_MODIFIERS = False 988 ARRAY_SIZE_NAME = "LENGTH" 989 WRAP_DERIVED_VALUES = False 990 991 STRING_TYPE_MAPPING = { 992 exp.DataType.Type.BLOB: "String", 993 exp.DataType.Type.CHAR: "String", 994 exp.DataType.Type.LONGBLOB: "String", 995 exp.DataType.Type.LONGTEXT: "String", 996 exp.DataType.Type.MEDIUMBLOB: "String", 997 exp.DataType.Type.MEDIUMTEXT: "String", 998 exp.DataType.Type.TINYBLOB: "String", 999 exp.DataType.Type.TINYTEXT: "String", 1000 exp.DataType.Type.TEXT: "String", 1001 exp.DataType.Type.VARBINARY: "String", 1002 exp.DataType.Type.VARCHAR: "String", 1003 } 1004 1005 SUPPORTED_JSON_PATH_PARTS = { 1006 exp.JSONPathKey, 1007 exp.JSONPathRoot, 1008 exp.JSONPathSubscript, 1009 } 1010 1011 TYPE_MAPPING = { 1012 **generator.Generator.TYPE_MAPPING, 1013 **STRING_TYPE_MAPPING, 1014 exp.DataType.Type.ARRAY: "Array", 1015 exp.DataType.Type.BOOLEAN: "Bool", 1016 exp.DataType.Type.BIGINT: "Int64", 1017 exp.DataType.Type.DATE32: "Date32", 1018 exp.DataType.Type.DATETIME: "DateTime", 1019 exp.DataType.Type.DATETIME2: "DateTime", 1020 exp.DataType.Type.SMALLDATETIME: "DateTime", 1021 exp.DataType.Type.DATETIME64: "DateTime64", 1022 exp.DataType.Type.DECIMAL: "Decimal", 1023 exp.DataType.Type.DECIMAL32: "Decimal32", 1024 exp.DataType.Type.DECIMAL64: "Decimal64", 1025 exp.DataType.Type.DECIMAL128: "Decimal128", 1026 exp.DataType.Type.DECIMAL256: "Decimal256", 1027 exp.DataType.Type.TIMESTAMP: "DateTime", 1028 exp.DataType.Type.TIMESTAMPNTZ: "DateTime", 1029 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 1030 exp.DataType.Type.DOUBLE: "Float64", 1031 exp.DataType.Type.ENUM: "Enum", 1032 exp.DataType.Type.ENUM8: "Enum8", 1033 exp.DataType.Type.ENUM16: "Enum16", 1034 exp.DataType.Type.FIXEDSTRING: "FixedString", 1035 exp.DataType.Type.FLOAT: "Float32", 1036 exp.DataType.Type.INT: "Int32", 1037 exp.DataType.Type.MEDIUMINT: "Int32", 1038 exp.DataType.Type.INT128: "Int128", 1039 exp.DataType.Type.INT256: "Int256", 1040 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 1041 exp.DataType.Type.MAP: "Map", 1042 exp.DataType.Type.NESTED: "Nested", 1043 exp.DataType.Type.NOTHING: "Nothing", 1044 exp.DataType.Type.SMALLINT: "Int16", 1045 exp.DataType.Type.STRUCT: "Tuple", 1046 exp.DataType.Type.TINYINT: "Int8", 1047 exp.DataType.Type.UBIGINT: "UInt64", 1048 exp.DataType.Type.UINT: "UInt32", 1049 exp.DataType.Type.UINT128: "UInt128", 1050 exp.DataType.Type.UINT256: "UInt256", 1051 exp.DataType.Type.USMALLINT: "UInt16", 1052 exp.DataType.Type.UTINYINT: "UInt8", 1053 exp.DataType.Type.IPV4: "IPv4", 1054 exp.DataType.Type.IPV6: "IPv6", 1055 exp.DataType.Type.POINT: "Point", 1056 exp.DataType.Type.RING: "Ring", 1057 exp.DataType.Type.LINESTRING: "LineString", 1058 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 1059 exp.DataType.Type.POLYGON: "Polygon", 1060 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 1061 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 1062 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 1063 exp.DataType.Type.DYNAMIC: "Dynamic", 1064 } 1065 1066 TRANSFORMS = { 1067 **generator.Generator.TRANSFORMS, 1068 exp.AnyValue: rename_func("any"), 1069 exp.ApproxDistinct: rename_func("uniq"), 1070 exp.ArrayConcat: rename_func("arrayConcat"), 1071 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 1072 exp.ArrayRemove: remove_from_array_using_filter, 1073 exp.ArrayReverse: rename_func("arrayReverse"), 1074 exp.ArraySlice: rename_func("arraySlice"), 1075 exp.ArraySum: rename_func("arraySum"), 1076 exp.ArgMax: arg_max_or_min_no_count("argMax"), 1077 exp.ArgMin: arg_max_or_min_no_count("argMin"), 1078 exp.Array: inline_array_sql, 1079 exp.CastToStrType: rename_func("CAST"), 1080 exp.CountIf: rename_func("countIf"), 1081 exp.CompressColumnConstraint: lambda self, 1082 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 1083 exp.ComputedColumnConstraint: lambda self, 1084 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 1085 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 1086 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 1087 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 1088 exp.DateStrToDate: rename_func("toDate"), 1089 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 1090 exp.Explode: rename_func("arrayJoin"), 1091 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 1092 exp.IsNan: rename_func("isNaN"), 1093 exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}", 1094 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 1095 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 1096 exp.JSONPathKey: json_path_key_only_name, 1097 exp.JSONPathRoot: lambda *_: "", 1098 exp.Length: length_or_char_length_sql, 1099 exp.Map: _map_sql, 1100 exp.Median: rename_func("median"), 1101 exp.Nullif: rename_func("nullIf"), 1102 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1103 exp.Pivot: no_pivot_sql, 1104 exp.Quantile: _quantile_sql, 1105 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 1106 exp.Rand: rename_func("randCanonical"), 1107 exp.StartsWith: rename_func("startsWith"), 1108 exp.EndsWith: rename_func("endsWith"), 1109 exp.StrPosition: lambda self, e: strposition_sql( 1110 self, 1111 e, 1112 func_name="POSITION", 1113 supports_position=True, 1114 use_ansi_position=False, 1115 ), 1116 exp.TimeToStr: lambda self, e: self.func( 1117 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 1118 ), 1119 exp.TimeStrToTime: _timestrtotime_sql, 1120 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1121 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1122 exp.Typeof: rename_func("toTypeName"), 1123 exp.VarMap: _map_sql, 1124 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1125 exp.MD5Digest: rename_func("MD5"), 1126 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1127 exp.SHA: rename_func("SHA1"), 1128 exp.SHA2: sha256_sql, 1129 exp.UnixToTime: _unix_to_time_sql, 1130 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 1131 exp.Trim: lambda self, e: trim_sql(self, e, default_trim_type="BOTH"), 1132 exp.Variance: rename_func("varSamp"), 1133 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1134 exp.Stddev: rename_func("stddevSamp"), 1135 exp.Chr: rename_func("CHAR"), 1136 exp.Lag: lambda self, e: self.func( 1137 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1138 ), 1139 exp.Lead: lambda self, e: self.func( 1140 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1141 ), 1142 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1143 rename_func("editDistance") 1144 ), 1145 } 1146 1147 PROPERTIES_LOCATION = { 1148 **generator.Generator.PROPERTIES_LOCATION, 1149 exp.OnCluster: exp.Properties.Location.POST_NAME, 1150 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1151 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1152 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1153 } 1154 1155 # There's no list in docs, but it can be found in Clickhouse code 1156 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1157 ON_CLUSTER_TARGETS = { 1158 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1159 "DATABASE", 1160 "TABLE", 1161 "VIEW", 1162 "DICTIONARY", 1163 "INDEX", 1164 "FUNCTION", 1165 "NAMED COLLECTION", 1166 } 1167 1168 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1169 NON_NULLABLE_TYPES = { 1170 exp.DataType.Type.ARRAY, 1171 exp.DataType.Type.MAP, 1172 exp.DataType.Type.STRUCT, 1173 exp.DataType.Type.POINT, 1174 exp.DataType.Type.RING, 1175 exp.DataType.Type.LINESTRING, 1176 exp.DataType.Type.MULTILINESTRING, 1177 exp.DataType.Type.POLYGON, 1178 exp.DataType.Type.MULTIPOLYGON, 1179 } 1180 1181 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1182 strtodate_sql = self.function_fallback_sql(expression) 1183 1184 if not isinstance(expression.parent, exp.Cast): 1185 # StrToDate returns DATEs in other dialects (eg. postgres), so 1186 # this branch aims to improve the transpilation to clickhouse 1187 return self.cast_sql(exp.cast(expression, "DATE")) 1188 1189 return strtodate_sql 1190 1191 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1192 this = expression.this 1193 1194 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1195 return self.sql(this) 1196 1197 return super().cast_sql(expression, safe_prefix=safe_prefix) 1198 1199 def trycast_sql(self, expression: exp.TryCast) -> str: 1200 dtype = expression.to 1201 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1202 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1203 dtype.set("nullable", True) 1204 1205 return super().cast_sql(expression) 1206 1207 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1208 this = self.json_path_part(expression.this) 1209 return str(int(this) + 1) if is_int(this) else this 1210 1211 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1212 return f"AS {self.sql(expression, 'this')}" 1213 1214 def _any_to_has( 1215 self, 1216 expression: exp.EQ | exp.NEQ, 1217 default: t.Callable[[t.Any], str], 1218 prefix: str = "", 1219 ) -> str: 1220 if isinstance(expression.left, exp.Any): 1221 arr = expression.left 1222 this = expression.right 1223 elif isinstance(expression.right, exp.Any): 1224 arr = expression.right 1225 this = expression.left 1226 else: 1227 return default(expression) 1228 1229 return prefix + self.func("has", arr.this.unnest(), this) 1230 1231 def eq_sql(self, expression: exp.EQ) -> str: 1232 return self._any_to_has(expression, super().eq_sql) 1233 1234 def neq_sql(self, expression: exp.NEQ) -> str: 1235 return self._any_to_has(expression, super().neq_sql, "NOT ") 1236 1237 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1238 # Manually add a flag to make the search case-insensitive 1239 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1240 return self.func("match", expression.this, regex) 1241 1242 def datatype_sql(self, expression: exp.DataType) -> str: 1243 # String is the standard ClickHouse type, every other variant is just an alias. 1244 # Additionally, any supplied length parameter will be ignored. 1245 # 1246 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1247 if expression.this in self.STRING_TYPE_MAPPING: 1248 dtype = "String" 1249 else: 1250 dtype = super().datatype_sql(expression) 1251 1252 # This section changes the type to `Nullable(...)` if the following conditions hold: 1253 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1254 # and change their semantics 1255 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1256 # constraint: "Type of Map key must be a type, that can be represented by integer or 1257 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1258 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1259 parent = expression.parent 1260 nullable = expression.args.get("nullable") 1261 if nullable is True or ( 1262 nullable is None 1263 and not ( 1264 isinstance(parent, exp.DataType) 1265 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1266 and expression.index in (None, 0) 1267 ) 1268 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1269 ): 1270 dtype = f"Nullable({dtype})" 1271 1272 return dtype 1273 1274 def cte_sql(self, expression: exp.CTE) -> str: 1275 if expression.args.get("scalar"): 1276 this = self.sql(expression, "this") 1277 alias = self.sql(expression, "alias") 1278 return f"{this} AS {alias}" 1279 1280 return super().cte_sql(expression) 1281 1282 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1283 return super().after_limit_modifiers(expression) + [ 1284 ( 1285 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1286 if expression.args.get("settings") 1287 else "" 1288 ), 1289 ( 1290 self.seg("FORMAT ") + self.sql(expression, "format") 1291 if expression.args.get("format") 1292 else "" 1293 ), 1294 ] 1295 1296 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1297 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1298 1299 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1300 return f"ON CLUSTER {self.sql(expression, 'this')}" 1301 1302 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1303 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1304 exp.Properties.Location.POST_NAME 1305 ): 1306 this_name = self.sql( 1307 expression.this if isinstance(expression.this, exp.Schema) else expression, 1308 "this", 1309 ) 1310 this_properties = " ".join( 1311 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1312 ) 1313 this_schema = self.schema_columns_sql(expression.this) 1314 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1315 1316 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1317 1318 return super().createable_sql(expression, locations) 1319 1320 def create_sql(self, expression: exp.Create) -> str: 1321 # The comment property comes last in CTAS statements, i.e. after the query 1322 query = expression.expression 1323 if isinstance(query, exp.Query): 1324 comment_prop = expression.find(exp.SchemaCommentProperty) 1325 if comment_prop: 1326 comment_prop.pop() 1327 query.replace(exp.paren(query)) 1328 else: 1329 comment_prop = None 1330 1331 create_sql = super().create_sql(expression) 1332 1333 comment_sql = self.sql(comment_prop) 1334 comment_sql = f" {comment_sql}" if comment_sql else "" 1335 1336 return f"{create_sql}{comment_sql}" 1337 1338 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1339 this = self.indent(self.sql(expression, "this")) 1340 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1341 1342 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1343 this = self.sql(expression, "this") 1344 this = f" {this}" if this else "" 1345 expr = self.sql(expression, "expression") 1346 expr = f" {expr}" if expr else "" 1347 index_type = self.sql(expression, "index_type") 1348 index_type = f" TYPE {index_type}" if index_type else "" 1349 granularity = self.sql(expression, "granularity") 1350 granularity = f" GRANULARITY {granularity}" if granularity else "" 1351 1352 return f"INDEX{this}{expr}{index_type}{granularity}" 1353 1354 def partition_sql(self, expression: exp.Partition) -> str: 1355 return f"PARTITION {self.expressions(expression, flat=True)}" 1356 1357 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1358 return f"ID {self.sql(expression.this)}" 1359 1360 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1361 return ( 1362 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1363 ) 1364 1365 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1366 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1367 1368 def is_sql(self, expression: exp.Is) -> str: 1369 is_sql = super().is_sql(expression) 1370 1371 if isinstance(expression.parent, exp.Not): 1372 # value IS NOT NULL -> NOT (value IS NULL) 1373 is_sql = self.wrap(is_sql) 1374 1375 return is_sql 1376 1377 def in_sql(self, expression: exp.In) -> str: 1378 in_sql = super().in_sql(expression) 1379 1380 if isinstance(expression.parent, exp.Not) and expression.args.get("is_global"): 1381 in_sql = in_sql.replace("GLOBAL IN", "GLOBAL NOT IN", 1) 1382 1383 return in_sql 1384 1385 def not_sql(self, expression: exp.Not) -> str: 1386 if isinstance(expression.this, exp.In) and expression.this.args.get("is_global"): 1387 # let `GLOBAL IN` child interpose `NOT` 1388 return self.sql(expression, "this") 1389 1390 return super().not_sql(expression) 1391 1392 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1393 # If the VALUES clause contains tuples of expressions, we need to treat it 1394 # as a table since Clickhouse will automatically alias it as such. 1395 alias = expression.args.get("alias") 1396 1397 if alias and alias.args.get("columns") and expression.expressions: 1398 values = expression.expressions[0].expressions 1399 values_as_table = any(isinstance(value, exp.Tuple) for value in values) 1400 else: 1401 values_as_table = True 1402 1403 return super().values_sql(expression, values_as_table=values_as_table)
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether the name of the function should be preserved inside the node's metadata, can be useful for roundtripping deprecated vs new functions that share an AST node e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery
Whether number literals can include underscores for better readability
Whether hex strings such as x'CC' evaluate to integer or binary/blob type
Specifies the strategy according to which identifiers should be normalized.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse equivalent of CREATE SCHEMA is CREATE DATABASE.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
213 def generate_values_aliases(self, expression: exp.Values) -> t.List[exp.Identifier]: 214 # Clickhouse allows VALUES to have an embedded structure e.g: 215 # VALUES('person String, place String', ('Noah', 'Paris'), ...) 216 # In this case, we don't want to qualify the columns 217 values = expression.expressions[0].expressions 218 219 structure = ( 220 values[0] 221 if (len(values) > 1 and values[0].is_string and isinstance(values[1], exp.Tuple)) 222 else None 223 ) 224 if structure: 225 # Split each column definition into the column name e.g: 226 # 'person String, place String' -> ['person', 'place'] 227 structure_coldefs = [coldef.strip() for coldef in structure.name.split(",")] 228 column_aliases = [ 229 exp.to_identifier(coldef.split(" ")[0]) for coldef in structure_coldefs 230 ] 231 else: 232 # Default column aliases in CH are "c1", "c2", etc. 233 column_aliases = [ 234 exp.to_identifier(f"c{i + 1}") for i in range(len(values[0].expressions)) 235 ] 236 237 return column_aliases
239 class Tokenizer(tokens.Tokenizer): 240 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 241 IDENTIFIERS = ['"', "`"] 242 IDENTIFIER_ESCAPES = ["\\"] 243 STRING_ESCAPES = ["'", "\\"] 244 BIT_STRINGS = [("0b", "")] 245 HEX_STRINGS = [("0x", ""), ("0X", "")] 246 HEREDOC_STRINGS = ["$"] 247 248 KEYWORDS = { 249 **tokens.Tokenizer.KEYWORDS, 250 ".:": TokenType.DOTCOLON, 251 "ATTACH": TokenType.COMMAND, 252 "DATE32": TokenType.DATE32, 253 "DATETIME64": TokenType.DATETIME64, 254 "DICTIONARY": TokenType.DICTIONARY, 255 "DYNAMIC": TokenType.DYNAMIC, 256 "ENUM8": TokenType.ENUM8, 257 "ENUM16": TokenType.ENUM16, 258 "EXCHANGE": TokenType.COMMAND, 259 "FINAL": TokenType.FINAL, 260 "FIXEDSTRING": TokenType.FIXEDSTRING, 261 "FLOAT32": TokenType.FLOAT, 262 "FLOAT64": TokenType.DOUBLE, 263 "GLOBAL": TokenType.GLOBAL, 264 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 265 "MAP": TokenType.MAP, 266 "NESTED": TokenType.NESTED, 267 "NOTHING": TokenType.NOTHING, 268 "SAMPLE": TokenType.TABLE_SAMPLE, 269 "TUPLE": TokenType.STRUCT, 270 "UINT16": TokenType.USMALLINT, 271 "UINT32": TokenType.UINT, 272 "UINT64": TokenType.UBIGINT, 273 "UINT8": TokenType.UTINYINT, 274 "IPV4": TokenType.IPV4, 275 "IPV6": TokenType.IPV6, 276 "POINT": TokenType.POINT, 277 "RING": TokenType.RING, 278 "LINESTRING": TokenType.LINESTRING, 279 "MULTILINESTRING": TokenType.MULTILINESTRING, 280 "POLYGON": TokenType.POLYGON, 281 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 282 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 283 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 284 "SYSTEM": TokenType.COMMAND, 285 "PREWHERE": TokenType.PREWHERE, 286 } 287 KEYWORDS.pop("/*+") 288 289 SINGLE_TOKENS = { 290 **tokens.Tokenizer.SINGLE_TOKENS, 291 "$": TokenType.HEREDOC_STRING, 292 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BYTE_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
294 class Parser(parser.Parser): 295 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 296 # * select x from t1 union all select x from t2 limit 1; 297 # * select x from t1 union all (select x from t2 limit 1); 298 MODIFIERS_ATTACHED_TO_SET_OP = False 299 INTERVAL_SPANS = False 300 OPTIONAL_ALIAS_TOKEN_CTE = False 301 JOINS_HAVE_EQUAL_PRECEDENCE = True 302 303 FUNCTIONS = { 304 **parser.Parser.FUNCTIONS, 305 "ANY": exp.AnyValue.from_arg_list, 306 "ARRAYSUM": exp.ArraySum.from_arg_list, 307 "ARRAYREVERSE": exp.ArrayReverse.from_arg_list, 308 "ARRAYSLICE": exp.ArraySlice.from_arg_list, 309 "COUNTIF": _build_count_if, 310 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 311 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 312 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 313 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 314 "DATE_FORMAT": _build_date_format, 315 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 316 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 317 "FORMATDATETIME": _build_date_format, 318 "JSONEXTRACTSTRING": build_json_extract_path( 319 exp.JSONExtractScalar, zero_based_indexing=False 320 ), 321 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 322 "MAP": parser.build_var_map, 323 "MATCH": exp.RegexpLike.from_arg_list, 324 "RANDCANONICAL": exp.Rand.from_arg_list, 325 "STR_TO_DATE": _build_str_to_date, 326 "TUPLE": exp.Struct.from_arg_list, 327 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 328 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 329 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 330 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 331 "UNIQ": exp.ApproxDistinct.from_arg_list, 332 "XOR": lambda args: exp.Xor(expressions=args), 333 "MD5": exp.MD5Digest.from_arg_list, 334 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 335 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 336 "SUBSTRINGINDEX": exp.SubstringIndex.from_arg_list, 337 "TOTYPENAME": exp.Typeof.from_arg_list, 338 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 339 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 340 } 341 FUNCTIONS.pop("TRANSFORM") 342 343 AGG_FUNCTIONS = { 344 "count", 345 "min", 346 "max", 347 "sum", 348 "avg", 349 "any", 350 "stddevPop", 351 "stddevSamp", 352 "varPop", 353 "varSamp", 354 "corr", 355 "covarPop", 356 "covarSamp", 357 "entropy", 358 "exponentialMovingAverage", 359 "intervalLengthSum", 360 "kolmogorovSmirnovTest", 361 "mannWhitneyUTest", 362 "median", 363 "rankCorr", 364 "sumKahan", 365 "studentTTest", 366 "welchTTest", 367 "anyHeavy", 368 "anyLast", 369 "boundingRatio", 370 "first_value", 371 "last_value", 372 "argMin", 373 "argMax", 374 "avgWeighted", 375 "topK", 376 "topKWeighted", 377 "deltaSum", 378 "deltaSumTimestamp", 379 "groupArray", 380 "groupArrayLast", 381 "groupUniqArray", 382 "groupArrayInsertAt", 383 "groupArrayMovingAvg", 384 "groupArrayMovingSum", 385 "groupArraySample", 386 "groupBitAnd", 387 "groupBitOr", 388 "groupBitXor", 389 "groupBitmap", 390 "groupBitmapAnd", 391 "groupBitmapOr", 392 "groupBitmapXor", 393 "sumWithOverflow", 394 "sumMap", 395 "minMap", 396 "maxMap", 397 "skewSamp", 398 "skewPop", 399 "kurtSamp", 400 "kurtPop", 401 "uniq", 402 "uniqExact", 403 "uniqCombined", 404 "uniqCombined64", 405 "uniqHLL12", 406 "uniqTheta", 407 "quantile", 408 "quantiles", 409 "quantileExact", 410 "quantilesExact", 411 "quantileExactLow", 412 "quantilesExactLow", 413 "quantileExactHigh", 414 "quantilesExactHigh", 415 "quantileExactWeighted", 416 "quantilesExactWeighted", 417 "quantileTiming", 418 "quantilesTiming", 419 "quantileTimingWeighted", 420 "quantilesTimingWeighted", 421 "quantileDeterministic", 422 "quantilesDeterministic", 423 "quantileTDigest", 424 "quantilesTDigest", 425 "quantileTDigestWeighted", 426 "quantilesTDigestWeighted", 427 "quantileBFloat16", 428 "quantilesBFloat16", 429 "quantileBFloat16Weighted", 430 "quantilesBFloat16Weighted", 431 "simpleLinearRegression", 432 "stochasticLinearRegression", 433 "stochasticLogisticRegression", 434 "categoricalInformationValue", 435 "contingency", 436 "cramersV", 437 "cramersVBiasCorrected", 438 "theilsU", 439 "maxIntersections", 440 "maxIntersectionsPosition", 441 "meanZTest", 442 "quantileInterpolatedWeighted", 443 "quantilesInterpolatedWeighted", 444 "quantileGK", 445 "quantilesGK", 446 "sparkBar", 447 "sumCount", 448 "largestTriangleThreeBuckets", 449 "histogram", 450 "sequenceMatch", 451 "sequenceCount", 452 "windowFunnel", 453 "retention", 454 "uniqUpTo", 455 "sequenceNextNode", 456 "exponentialTimeDecayedAvg", 457 } 458 459 AGG_FUNCTIONS_SUFFIXES = [ 460 "If", 461 "Array", 462 "ArrayIf", 463 "Map", 464 "SimpleState", 465 "State", 466 "Merge", 467 "MergeState", 468 "ForEach", 469 "Distinct", 470 "OrDefault", 471 "OrNull", 472 "Resample", 473 "ArgMin", 474 "ArgMax", 475 ] 476 477 FUNC_TOKENS = { 478 *parser.Parser.FUNC_TOKENS, 479 TokenType.AND, 480 TokenType.OR, 481 TokenType.SET, 482 } 483 484 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 485 486 ID_VAR_TOKENS = { 487 *parser.Parser.ID_VAR_TOKENS, 488 TokenType.LIKE, 489 } 490 491 AGG_FUNC_MAPPING = ( 492 lambda functions, suffixes: { 493 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 494 } 495 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 496 497 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 498 499 FUNCTION_PARSERS = { 500 **parser.Parser.FUNCTION_PARSERS, 501 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 502 "QUANTILE": lambda self: self._parse_quantile(), 503 "MEDIAN": lambda self: self._parse_quantile(), 504 "COLUMNS": lambda self: self._parse_columns(), 505 } 506 507 FUNCTION_PARSERS.pop("MATCH") 508 509 PROPERTY_PARSERS = { 510 **parser.Parser.PROPERTY_PARSERS, 511 "ENGINE": lambda self: self._parse_engine_property(), 512 } 513 PROPERTY_PARSERS.pop("DYNAMIC") 514 515 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 516 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 517 518 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 519 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 520 521 RANGE_PARSERS = { 522 **parser.Parser.RANGE_PARSERS, 523 TokenType.GLOBAL: lambda self, this: self._parse_global_in(this), 524 } 525 526 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 527 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 528 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 529 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 530 531 JOIN_KINDS = { 532 *parser.Parser.JOIN_KINDS, 533 TokenType.ANY, 534 TokenType.ASOF, 535 TokenType.ARRAY, 536 } 537 538 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 539 TokenType.ANY, 540 TokenType.ARRAY, 541 TokenType.FINAL, 542 TokenType.FORMAT, 543 TokenType.SETTINGS, 544 } 545 546 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 547 TokenType.FORMAT, 548 } 549 550 LOG_DEFAULTS_TO_LN = True 551 552 QUERY_MODIFIER_PARSERS = { 553 **parser.Parser.QUERY_MODIFIER_PARSERS, 554 TokenType.SETTINGS: lambda self: ( 555 "settings", 556 self._advance() or self._parse_csv(self._parse_assignment), 557 ), 558 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 559 } 560 561 CONSTRAINT_PARSERS = { 562 **parser.Parser.CONSTRAINT_PARSERS, 563 "INDEX": lambda self: self._parse_index_constraint(), 564 "CODEC": lambda self: self._parse_compress(), 565 } 566 567 ALTER_PARSERS = { 568 **parser.Parser.ALTER_PARSERS, 569 "REPLACE": lambda self: self._parse_alter_table_replace(), 570 } 571 572 SCHEMA_UNNAMED_CONSTRAINTS = { 573 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 574 "INDEX", 575 } 576 577 PLACEHOLDER_PARSERS = { 578 **parser.Parser.PLACEHOLDER_PARSERS, 579 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 580 } 581 582 def _parse_engine_property(self) -> exp.EngineProperty: 583 self._match(TokenType.EQ) 584 return self.expression( 585 exp.EngineProperty, 586 this=self._parse_field(any_token=True, anonymous_func=True), 587 ) 588 589 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 590 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 591 return self._parse_lambda() 592 593 def _parse_types( 594 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 595 ) -> t.Optional[exp.Expression]: 596 dtype = super()._parse_types( 597 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 598 ) 599 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 600 # Mark every type as non-nullable which is ClickHouse's default, unless it's 601 # already marked as nullable. This marker helps us transpile types from other 602 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 603 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 604 # fail in ClickHouse without the `Nullable` type constructor. 605 dtype.set("nullable", False) 606 607 return dtype 608 609 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 610 index = self._index 611 this = self._parse_bitwise() 612 if self._match(TokenType.FROM): 613 self._retreat(index) 614 return super()._parse_extract() 615 616 # We return Anonymous here because extract and regexpExtract have different semantics, 617 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 618 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 619 # 620 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 621 self._match(TokenType.COMMA) 622 return self.expression( 623 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 624 ) 625 626 def _parse_assignment(self) -> t.Optional[exp.Expression]: 627 this = super()._parse_assignment() 628 629 if self._match(TokenType.PLACEHOLDER): 630 return self.expression( 631 exp.If, 632 this=this, 633 true=self._parse_assignment(), 634 false=self._match(TokenType.COLON) and self._parse_assignment(), 635 ) 636 637 return this 638 639 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 640 """ 641 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 642 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 643 """ 644 index = self._index 645 646 this = self._parse_id_var() 647 self._match(TokenType.COLON) 648 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 649 self._match_text_seq("IDENTIFIER") and "Identifier" 650 ) 651 652 if not kind: 653 self._retreat(index) 654 return None 655 elif not self._match(TokenType.R_BRACE): 656 self.raise_error("Expecting }") 657 658 if isinstance(this, exp.Identifier) and not this.quoted: 659 this = exp.var(this.name) 660 661 return self.expression(exp.Placeholder, this=this, kind=kind) 662 663 def _parse_bracket( 664 self, this: t.Optional[exp.Expression] = None 665 ) -> t.Optional[exp.Expression]: 666 l_brace = self._match(TokenType.L_BRACE, advance=False) 667 bracket = super()._parse_bracket(this) 668 669 if l_brace and isinstance(bracket, exp.Struct): 670 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 671 for expression in bracket.expressions: 672 if not isinstance(expression, exp.PropertyEQ): 673 break 674 675 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 676 varmap.args["values"].append("expressions", expression.expression) 677 678 return varmap 679 680 return bracket 681 682 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 683 this = super()._parse_in(this) 684 this.set("is_global", is_global) 685 return this 686 687 def _parse_global_in(self, this: t.Optional[exp.Expression]) -> exp.Not | exp.In: 688 is_negated = self._match(TokenType.NOT) 689 this = self._match(TokenType.IN) and self._parse_in(this, is_global=True) 690 return self.expression(exp.Not, this=this) if is_negated else this 691 692 def _parse_table( 693 self, 694 schema: bool = False, 695 joins: bool = False, 696 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 697 parse_bracket: bool = False, 698 is_db_reference: bool = False, 699 parse_partition: bool = False, 700 consume_pipe: bool = False, 701 ) -> t.Optional[exp.Expression]: 702 this = super()._parse_table( 703 schema=schema, 704 joins=joins, 705 alias_tokens=alias_tokens, 706 parse_bracket=parse_bracket, 707 is_db_reference=is_db_reference, 708 ) 709 710 if isinstance(this, exp.Table): 711 inner = this.this 712 alias = this.args.get("alias") 713 714 if isinstance(inner, exp.GenerateSeries) and alias and not alias.columns: 715 alias.set("columns", [exp.to_identifier("generate_series")]) 716 717 if self._match(TokenType.FINAL): 718 this = self.expression(exp.Final, this=this) 719 720 return this 721 722 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 723 return super()._parse_position(haystack_first=True) 724 725 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 726 def _parse_cte(self) -> t.Optional[exp.CTE]: 727 # WITH <identifier> AS <subquery expression> 728 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 729 730 if not cte: 731 # WITH <expression> AS <identifier> 732 cte = self.expression( 733 exp.CTE, 734 this=self._parse_assignment(), 735 alias=self._parse_table_alias(), 736 scalar=True, 737 ) 738 739 return cte 740 741 def _parse_join_parts( 742 self, 743 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 744 is_global = self._match(TokenType.GLOBAL) and self._prev 745 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 746 747 if kind_pre: 748 kind = self._match_set(self.JOIN_KINDS) and self._prev 749 side = self._match_set(self.JOIN_SIDES) and self._prev 750 return is_global, side, kind 751 752 return ( 753 is_global, 754 self._match_set(self.JOIN_SIDES) and self._prev, 755 self._match_set(self.JOIN_KINDS) and self._prev, 756 ) 757 758 def _parse_join( 759 self, skip_join_token: bool = False, parse_bracket: bool = False 760 ) -> t.Optional[exp.Join]: 761 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 762 if join: 763 join.set("global", join.args.pop("method", None)) 764 765 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 766 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 767 if join.kind == "ARRAY": 768 for table in join.find_all(exp.Table): 769 table.replace(table.to_column()) 770 771 return join 772 773 def _parse_function( 774 self, 775 functions: t.Optional[t.Dict[str, t.Callable]] = None, 776 anonymous: bool = False, 777 optional_parens: bool = True, 778 any_token: bool = False, 779 ) -> t.Optional[exp.Expression]: 780 expr = super()._parse_function( 781 functions=functions, 782 anonymous=anonymous, 783 optional_parens=optional_parens, 784 any_token=any_token, 785 ) 786 787 func = expr.this if isinstance(expr, exp.Window) else expr 788 789 # Aggregate functions can be split in 2 parts: <func_name><suffix> 790 parts = ( 791 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 792 ) 793 794 if parts: 795 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 796 params = self._parse_func_params(anon_func) 797 798 kwargs = { 799 "this": anon_func.this, 800 "expressions": anon_func.expressions, 801 } 802 if parts[1]: 803 exp_class: t.Type[exp.Expression] = ( 804 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 805 ) 806 else: 807 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 808 809 kwargs["exp_class"] = exp_class 810 if params: 811 kwargs["params"] = params 812 813 func = self.expression(**kwargs) 814 815 if isinstance(expr, exp.Window): 816 # The window's func was parsed as Anonymous in base parser, fix its 817 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 818 expr.set("this", func) 819 elif params: 820 # Params have blocked super()._parse_function() from parsing the following window 821 # (if that exists) as they're standing between the function call and the window spec 822 expr = self._parse_window(func) 823 else: 824 expr = func 825 826 return expr 827 828 def _parse_func_params( 829 self, this: t.Optional[exp.Func] = None 830 ) -> t.Optional[t.List[exp.Expression]]: 831 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 832 return self._parse_csv(self._parse_lambda) 833 834 if self._match(TokenType.L_PAREN): 835 params = self._parse_csv(self._parse_lambda) 836 self._match_r_paren(this) 837 return params 838 839 return None 840 841 def _parse_quantile(self) -> exp.Quantile: 842 this = self._parse_lambda() 843 params = self._parse_func_params() 844 if params: 845 return self.expression(exp.Quantile, this=params[0], quantile=this) 846 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 847 848 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 849 return super()._parse_wrapped_id_vars(optional=True) 850 851 def _parse_primary_key( 852 self, wrapped_optional: bool = False, in_props: bool = False 853 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 854 return super()._parse_primary_key( 855 wrapped_optional=wrapped_optional or in_props, in_props=in_props 856 ) 857 858 def _parse_on_property(self) -> t.Optional[exp.Expression]: 859 index = self._index 860 if self._match_text_seq("CLUSTER"): 861 this = self._parse_string() or self._parse_id_var() 862 if this: 863 return self.expression(exp.OnCluster, this=this) 864 else: 865 self._retreat(index) 866 return None 867 868 def _parse_index_constraint( 869 self, kind: t.Optional[str] = None 870 ) -> exp.IndexColumnConstraint: 871 # INDEX name1 expr TYPE type1(args) GRANULARITY value 872 this = self._parse_id_var() 873 expression = self._parse_assignment() 874 875 index_type = self._match_text_seq("TYPE") and ( 876 self._parse_function() or self._parse_var() 877 ) 878 879 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 880 881 return self.expression( 882 exp.IndexColumnConstraint, 883 this=this, 884 expression=expression, 885 index_type=index_type, 886 granularity=granularity, 887 ) 888 889 def _parse_partition(self) -> t.Optional[exp.Partition]: 890 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 891 if not self._match(TokenType.PARTITION): 892 return None 893 894 if self._match_text_seq("ID"): 895 # Corresponds to the PARTITION ID <string_value> syntax 896 expressions: t.List[exp.Expression] = [ 897 self.expression(exp.PartitionId, this=self._parse_string()) 898 ] 899 else: 900 expressions = self._parse_expressions() 901 902 return self.expression(exp.Partition, expressions=expressions) 903 904 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 905 partition = self._parse_partition() 906 907 if not partition or not self._match(TokenType.FROM): 908 return None 909 910 return self.expression( 911 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 912 ) 913 914 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 915 if not self._match_text_seq("PROJECTION"): 916 return None 917 918 return self.expression( 919 exp.ProjectionDef, 920 this=self._parse_id_var(), 921 expression=self._parse_wrapped(self._parse_statement), 922 ) 923 924 def _parse_constraint(self) -> t.Optional[exp.Expression]: 925 return super()._parse_constraint() or self._parse_projection_def() 926 927 def _parse_alias( 928 self, this: t.Optional[exp.Expression], explicit: bool = False 929 ) -> t.Optional[exp.Expression]: 930 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 931 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 932 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 933 return this 934 935 return super()._parse_alias(this=this, explicit=explicit) 936 937 def _parse_expression(self) -> t.Optional[exp.Expression]: 938 this = super()._parse_expression() 939 940 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 941 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 942 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 943 self._match(TokenType.R_PAREN) 944 945 return this 946 947 def _parse_columns(self) -> exp.Expression: 948 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 949 950 while self._next and self._match_text_seq(")", "APPLY", "("): 951 self._match(TokenType.R_PAREN) 952 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 953 return this 954 955 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 956 value = super()._parse_value(values=values) 957 if not value: 958 return None 959 960 # In Clickhouse "SELECT * FROM VALUES (1, 2, 3)" generates a table with a single column, in contrast 961 # to other dialects. For this case, we canonicalize the values into a tuple-of-tuples AST if it's not already one. 962 # In INSERT INTO statements the same clause actually references multiple columns (opposite semantics), 963 # but the final result is not altered by the extra parentheses. 964 # Note: Clickhouse allows VALUES([structure], value, ...) so the branch checks for the last expression 965 expressions = value.expressions 966 if values and not isinstance(expressions[-1], exp.Tuple): 967 value.set( 968 "expressions", 969 [self.expression(exp.Tuple, expressions=[expr]) for expr in expressions], 970 ) 971 972 return value
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- ALTER_ALTER_PARSERS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- ALTER_RENAME_REQUIRES_COLUMN
- ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- errors
- sql
974 class Generator(generator.Generator): 975 QUERY_HINTS = False 976 STRUCT_DELIMITER = ("(", ")") 977 NVL2_SUPPORTED = False 978 TABLESAMPLE_REQUIRES_PARENS = False 979 TABLESAMPLE_SIZE_IS_ROWS = False 980 TABLESAMPLE_KEYWORDS = "SAMPLE" 981 LAST_DAY_SUPPORTS_DATE_PART = False 982 CAN_IMPLEMENT_ARRAY_ANY = True 983 SUPPORTS_TO_NUMBER = False 984 JOIN_HINTS = False 985 TABLE_HINTS = False 986 GROUPINGS_SEP = "" 987 SET_OP_MODIFIERS = False 988 ARRAY_SIZE_NAME = "LENGTH" 989 WRAP_DERIVED_VALUES = False 990 991 STRING_TYPE_MAPPING = { 992 exp.DataType.Type.BLOB: "String", 993 exp.DataType.Type.CHAR: "String", 994 exp.DataType.Type.LONGBLOB: "String", 995 exp.DataType.Type.LONGTEXT: "String", 996 exp.DataType.Type.MEDIUMBLOB: "String", 997 exp.DataType.Type.MEDIUMTEXT: "String", 998 exp.DataType.Type.TINYBLOB: "String", 999 exp.DataType.Type.TINYTEXT: "String", 1000 exp.DataType.Type.TEXT: "String", 1001 exp.DataType.Type.VARBINARY: "String", 1002 exp.DataType.Type.VARCHAR: "String", 1003 } 1004 1005 SUPPORTED_JSON_PATH_PARTS = { 1006 exp.JSONPathKey, 1007 exp.JSONPathRoot, 1008 exp.JSONPathSubscript, 1009 } 1010 1011 TYPE_MAPPING = { 1012 **generator.Generator.TYPE_MAPPING, 1013 **STRING_TYPE_MAPPING, 1014 exp.DataType.Type.ARRAY: "Array", 1015 exp.DataType.Type.BOOLEAN: "Bool", 1016 exp.DataType.Type.BIGINT: "Int64", 1017 exp.DataType.Type.DATE32: "Date32", 1018 exp.DataType.Type.DATETIME: "DateTime", 1019 exp.DataType.Type.DATETIME2: "DateTime", 1020 exp.DataType.Type.SMALLDATETIME: "DateTime", 1021 exp.DataType.Type.DATETIME64: "DateTime64", 1022 exp.DataType.Type.DECIMAL: "Decimal", 1023 exp.DataType.Type.DECIMAL32: "Decimal32", 1024 exp.DataType.Type.DECIMAL64: "Decimal64", 1025 exp.DataType.Type.DECIMAL128: "Decimal128", 1026 exp.DataType.Type.DECIMAL256: "Decimal256", 1027 exp.DataType.Type.TIMESTAMP: "DateTime", 1028 exp.DataType.Type.TIMESTAMPNTZ: "DateTime", 1029 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 1030 exp.DataType.Type.DOUBLE: "Float64", 1031 exp.DataType.Type.ENUM: "Enum", 1032 exp.DataType.Type.ENUM8: "Enum8", 1033 exp.DataType.Type.ENUM16: "Enum16", 1034 exp.DataType.Type.FIXEDSTRING: "FixedString", 1035 exp.DataType.Type.FLOAT: "Float32", 1036 exp.DataType.Type.INT: "Int32", 1037 exp.DataType.Type.MEDIUMINT: "Int32", 1038 exp.DataType.Type.INT128: "Int128", 1039 exp.DataType.Type.INT256: "Int256", 1040 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 1041 exp.DataType.Type.MAP: "Map", 1042 exp.DataType.Type.NESTED: "Nested", 1043 exp.DataType.Type.NOTHING: "Nothing", 1044 exp.DataType.Type.SMALLINT: "Int16", 1045 exp.DataType.Type.STRUCT: "Tuple", 1046 exp.DataType.Type.TINYINT: "Int8", 1047 exp.DataType.Type.UBIGINT: "UInt64", 1048 exp.DataType.Type.UINT: "UInt32", 1049 exp.DataType.Type.UINT128: "UInt128", 1050 exp.DataType.Type.UINT256: "UInt256", 1051 exp.DataType.Type.USMALLINT: "UInt16", 1052 exp.DataType.Type.UTINYINT: "UInt8", 1053 exp.DataType.Type.IPV4: "IPv4", 1054 exp.DataType.Type.IPV6: "IPv6", 1055 exp.DataType.Type.POINT: "Point", 1056 exp.DataType.Type.RING: "Ring", 1057 exp.DataType.Type.LINESTRING: "LineString", 1058 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 1059 exp.DataType.Type.POLYGON: "Polygon", 1060 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 1061 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 1062 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 1063 exp.DataType.Type.DYNAMIC: "Dynamic", 1064 } 1065 1066 TRANSFORMS = { 1067 **generator.Generator.TRANSFORMS, 1068 exp.AnyValue: rename_func("any"), 1069 exp.ApproxDistinct: rename_func("uniq"), 1070 exp.ArrayConcat: rename_func("arrayConcat"), 1071 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 1072 exp.ArrayRemove: remove_from_array_using_filter, 1073 exp.ArrayReverse: rename_func("arrayReverse"), 1074 exp.ArraySlice: rename_func("arraySlice"), 1075 exp.ArraySum: rename_func("arraySum"), 1076 exp.ArgMax: arg_max_or_min_no_count("argMax"), 1077 exp.ArgMin: arg_max_or_min_no_count("argMin"), 1078 exp.Array: inline_array_sql, 1079 exp.CastToStrType: rename_func("CAST"), 1080 exp.CountIf: rename_func("countIf"), 1081 exp.CompressColumnConstraint: lambda self, 1082 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 1083 exp.ComputedColumnConstraint: lambda self, 1084 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 1085 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 1086 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 1087 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 1088 exp.DateStrToDate: rename_func("toDate"), 1089 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 1090 exp.Explode: rename_func("arrayJoin"), 1091 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 1092 exp.IsNan: rename_func("isNaN"), 1093 exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}", 1094 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 1095 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 1096 exp.JSONPathKey: json_path_key_only_name, 1097 exp.JSONPathRoot: lambda *_: "", 1098 exp.Length: length_or_char_length_sql, 1099 exp.Map: _map_sql, 1100 exp.Median: rename_func("median"), 1101 exp.Nullif: rename_func("nullIf"), 1102 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1103 exp.Pivot: no_pivot_sql, 1104 exp.Quantile: _quantile_sql, 1105 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 1106 exp.Rand: rename_func("randCanonical"), 1107 exp.StartsWith: rename_func("startsWith"), 1108 exp.EndsWith: rename_func("endsWith"), 1109 exp.StrPosition: lambda self, e: strposition_sql( 1110 self, 1111 e, 1112 func_name="POSITION", 1113 supports_position=True, 1114 use_ansi_position=False, 1115 ), 1116 exp.TimeToStr: lambda self, e: self.func( 1117 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 1118 ), 1119 exp.TimeStrToTime: _timestrtotime_sql, 1120 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1121 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1122 exp.Typeof: rename_func("toTypeName"), 1123 exp.VarMap: _map_sql, 1124 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1125 exp.MD5Digest: rename_func("MD5"), 1126 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1127 exp.SHA: rename_func("SHA1"), 1128 exp.SHA2: sha256_sql, 1129 exp.UnixToTime: _unix_to_time_sql, 1130 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 1131 exp.Trim: lambda self, e: trim_sql(self, e, default_trim_type="BOTH"), 1132 exp.Variance: rename_func("varSamp"), 1133 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1134 exp.Stddev: rename_func("stddevSamp"), 1135 exp.Chr: rename_func("CHAR"), 1136 exp.Lag: lambda self, e: self.func( 1137 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1138 ), 1139 exp.Lead: lambda self, e: self.func( 1140 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1141 ), 1142 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1143 rename_func("editDistance") 1144 ), 1145 } 1146 1147 PROPERTIES_LOCATION = { 1148 **generator.Generator.PROPERTIES_LOCATION, 1149 exp.OnCluster: exp.Properties.Location.POST_NAME, 1150 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1151 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1152 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1153 } 1154 1155 # There's no list in docs, but it can be found in Clickhouse code 1156 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1157 ON_CLUSTER_TARGETS = { 1158 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1159 "DATABASE", 1160 "TABLE", 1161 "VIEW", 1162 "DICTIONARY", 1163 "INDEX", 1164 "FUNCTION", 1165 "NAMED COLLECTION", 1166 } 1167 1168 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1169 NON_NULLABLE_TYPES = { 1170 exp.DataType.Type.ARRAY, 1171 exp.DataType.Type.MAP, 1172 exp.DataType.Type.STRUCT, 1173 exp.DataType.Type.POINT, 1174 exp.DataType.Type.RING, 1175 exp.DataType.Type.LINESTRING, 1176 exp.DataType.Type.MULTILINESTRING, 1177 exp.DataType.Type.POLYGON, 1178 exp.DataType.Type.MULTIPOLYGON, 1179 } 1180 1181 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1182 strtodate_sql = self.function_fallback_sql(expression) 1183 1184 if not isinstance(expression.parent, exp.Cast): 1185 # StrToDate returns DATEs in other dialects (eg. postgres), so 1186 # this branch aims to improve the transpilation to clickhouse 1187 return self.cast_sql(exp.cast(expression, "DATE")) 1188 1189 return strtodate_sql 1190 1191 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1192 this = expression.this 1193 1194 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1195 return self.sql(this) 1196 1197 return super().cast_sql(expression, safe_prefix=safe_prefix) 1198 1199 def trycast_sql(self, expression: exp.TryCast) -> str: 1200 dtype = expression.to 1201 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1202 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1203 dtype.set("nullable", True) 1204 1205 return super().cast_sql(expression) 1206 1207 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1208 this = self.json_path_part(expression.this) 1209 return str(int(this) + 1) if is_int(this) else this 1210 1211 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1212 return f"AS {self.sql(expression, 'this')}" 1213 1214 def _any_to_has( 1215 self, 1216 expression: exp.EQ | exp.NEQ, 1217 default: t.Callable[[t.Any], str], 1218 prefix: str = "", 1219 ) -> str: 1220 if isinstance(expression.left, exp.Any): 1221 arr = expression.left 1222 this = expression.right 1223 elif isinstance(expression.right, exp.Any): 1224 arr = expression.right 1225 this = expression.left 1226 else: 1227 return default(expression) 1228 1229 return prefix + self.func("has", arr.this.unnest(), this) 1230 1231 def eq_sql(self, expression: exp.EQ) -> str: 1232 return self._any_to_has(expression, super().eq_sql) 1233 1234 def neq_sql(self, expression: exp.NEQ) -> str: 1235 return self._any_to_has(expression, super().neq_sql, "NOT ") 1236 1237 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1238 # Manually add a flag to make the search case-insensitive 1239 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1240 return self.func("match", expression.this, regex) 1241 1242 def datatype_sql(self, expression: exp.DataType) -> str: 1243 # String is the standard ClickHouse type, every other variant is just an alias. 1244 # Additionally, any supplied length parameter will be ignored. 1245 # 1246 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1247 if expression.this in self.STRING_TYPE_MAPPING: 1248 dtype = "String" 1249 else: 1250 dtype = super().datatype_sql(expression) 1251 1252 # This section changes the type to `Nullable(...)` if the following conditions hold: 1253 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1254 # and change their semantics 1255 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1256 # constraint: "Type of Map key must be a type, that can be represented by integer or 1257 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1258 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1259 parent = expression.parent 1260 nullable = expression.args.get("nullable") 1261 if nullable is True or ( 1262 nullable is None 1263 and not ( 1264 isinstance(parent, exp.DataType) 1265 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1266 and expression.index in (None, 0) 1267 ) 1268 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1269 ): 1270 dtype = f"Nullable({dtype})" 1271 1272 return dtype 1273 1274 def cte_sql(self, expression: exp.CTE) -> str: 1275 if expression.args.get("scalar"): 1276 this = self.sql(expression, "this") 1277 alias = self.sql(expression, "alias") 1278 return f"{this} AS {alias}" 1279 1280 return super().cte_sql(expression) 1281 1282 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1283 return super().after_limit_modifiers(expression) + [ 1284 ( 1285 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1286 if expression.args.get("settings") 1287 else "" 1288 ), 1289 ( 1290 self.seg("FORMAT ") + self.sql(expression, "format") 1291 if expression.args.get("format") 1292 else "" 1293 ), 1294 ] 1295 1296 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1297 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1298 1299 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1300 return f"ON CLUSTER {self.sql(expression, 'this')}" 1301 1302 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1303 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1304 exp.Properties.Location.POST_NAME 1305 ): 1306 this_name = self.sql( 1307 expression.this if isinstance(expression.this, exp.Schema) else expression, 1308 "this", 1309 ) 1310 this_properties = " ".join( 1311 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1312 ) 1313 this_schema = self.schema_columns_sql(expression.this) 1314 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1315 1316 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1317 1318 return super().createable_sql(expression, locations) 1319 1320 def create_sql(self, expression: exp.Create) -> str: 1321 # The comment property comes last in CTAS statements, i.e. after the query 1322 query = expression.expression 1323 if isinstance(query, exp.Query): 1324 comment_prop = expression.find(exp.SchemaCommentProperty) 1325 if comment_prop: 1326 comment_prop.pop() 1327 query.replace(exp.paren(query)) 1328 else: 1329 comment_prop = None 1330 1331 create_sql = super().create_sql(expression) 1332 1333 comment_sql = self.sql(comment_prop) 1334 comment_sql = f" {comment_sql}" if comment_sql else "" 1335 1336 return f"{create_sql}{comment_sql}" 1337 1338 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1339 this = self.indent(self.sql(expression, "this")) 1340 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1341 1342 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1343 this = self.sql(expression, "this") 1344 this = f" {this}" if this else "" 1345 expr = self.sql(expression, "expression") 1346 expr = f" {expr}" if expr else "" 1347 index_type = self.sql(expression, "index_type") 1348 index_type = f" TYPE {index_type}" if index_type else "" 1349 granularity = self.sql(expression, "granularity") 1350 granularity = f" GRANULARITY {granularity}" if granularity else "" 1351 1352 return f"INDEX{this}{expr}{index_type}{granularity}" 1353 1354 def partition_sql(self, expression: exp.Partition) -> str: 1355 return f"PARTITION {self.expressions(expression, flat=True)}" 1356 1357 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1358 return f"ID {self.sql(expression.this)}" 1359 1360 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1361 return ( 1362 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1363 ) 1364 1365 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1366 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1367 1368 def is_sql(self, expression: exp.Is) -> str: 1369 is_sql = super().is_sql(expression) 1370 1371 if isinstance(expression.parent, exp.Not): 1372 # value IS NOT NULL -> NOT (value IS NULL) 1373 is_sql = self.wrap(is_sql) 1374 1375 return is_sql 1376 1377 def in_sql(self, expression: exp.In) -> str: 1378 in_sql = super().in_sql(expression) 1379 1380 if isinstance(expression.parent, exp.Not) and expression.args.get("is_global"): 1381 in_sql = in_sql.replace("GLOBAL IN", "GLOBAL NOT IN", 1) 1382 1383 return in_sql 1384 1385 def not_sql(self, expression: exp.Not) -> str: 1386 if isinstance(expression.this, exp.In) and expression.this.args.get("is_global"): 1387 # let `GLOBAL IN` child interpose `NOT` 1388 return self.sql(expression, "this") 1389 1390 return super().not_sql(expression) 1391 1392 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1393 # If the VALUES clause contains tuples of expressions, we need to treat it 1394 # as a table since Clickhouse will automatically alias it as such. 1395 alias = expression.args.get("alias") 1396 1397 if alias and alias.args.get("columns") and expression.expressions: 1398 values = expression.expressions[0].expressions 1399 values_as_table = any(isinstance(value, exp.Tuple) for value in values) 1400 else: 1401 values_as_table = True 1402 1403 return super().values_sql(expression, values_as_table=values_as_table)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1181 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1182 strtodate_sql = self.function_fallback_sql(expression) 1183 1184 if not isinstance(expression.parent, exp.Cast): 1185 # StrToDate returns DATEs in other dialects (eg. postgres), so 1186 # this branch aims to improve the transpilation to clickhouse 1187 return self.cast_sql(exp.cast(expression, "DATE")) 1188 1189 return strtodate_sql
1191 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1192 this = expression.this 1193 1194 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1195 return self.sql(this) 1196 1197 return super().cast_sql(expression, safe_prefix=safe_prefix)
1199 def trycast_sql(self, expression: exp.TryCast) -> str: 1200 dtype = expression.to 1201 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1202 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1203 dtype.set("nullable", True) 1204 1205 return super().cast_sql(expression)
1242 def datatype_sql(self, expression: exp.DataType) -> str: 1243 # String is the standard ClickHouse type, every other variant is just an alias. 1244 # Additionally, any supplied length parameter will be ignored. 1245 # 1246 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1247 if expression.this in self.STRING_TYPE_MAPPING: 1248 dtype = "String" 1249 else: 1250 dtype = super().datatype_sql(expression) 1251 1252 # This section changes the type to `Nullable(...)` if the following conditions hold: 1253 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1254 # and change their semantics 1255 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1256 # constraint: "Type of Map key must be a type, that can be represented by integer or 1257 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1258 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1259 parent = expression.parent 1260 nullable = expression.args.get("nullable") 1261 if nullable is True or ( 1262 nullable is None 1263 and not ( 1264 isinstance(parent, exp.DataType) 1265 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1266 and expression.index in (None, 0) 1267 ) 1268 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1269 ): 1270 dtype = f"Nullable({dtype})" 1271 1272 return dtype
1282 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1283 return super().after_limit_modifiers(expression) + [ 1284 ( 1285 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1286 if expression.args.get("settings") 1287 else "" 1288 ), 1289 ( 1290 self.seg("FORMAT ") + self.sql(expression, "format") 1291 if expression.args.get("format") 1292 else "" 1293 ), 1294 ]
1302 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1303 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1304 exp.Properties.Location.POST_NAME 1305 ): 1306 this_name = self.sql( 1307 expression.this if isinstance(expression.this, exp.Schema) else expression, 1308 "this", 1309 ) 1310 this_properties = " ".join( 1311 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1312 ) 1313 this_schema = self.schema_columns_sql(expression.this) 1314 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1315 1316 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1317 1318 return super().createable_sql(expression, locations)
1320 def create_sql(self, expression: exp.Create) -> str: 1321 # The comment property comes last in CTAS statements, i.e. after the query 1322 query = expression.expression 1323 if isinstance(query, exp.Query): 1324 comment_prop = expression.find(exp.SchemaCommentProperty) 1325 if comment_prop: 1326 comment_prop.pop() 1327 query.replace(exp.paren(query)) 1328 else: 1329 comment_prop = None 1330 1331 create_sql = super().create_sql(expression) 1332 1333 comment_sql = self.sql(comment_prop) 1334 comment_sql = f" {comment_sql}" if comment_sql else "" 1335 1336 return f"{create_sql}{comment_sql}"
1342 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1343 this = self.sql(expression, "this") 1344 this = f" {this}" if this else "" 1345 expr = self.sql(expression, "expression") 1346 expr = f" {expr}" if expr else "" 1347 index_type = self.sql(expression, "index_type") 1348 index_type = f" TYPE {index_type}" if index_type else "" 1349 granularity = self.sql(expression, "granularity") 1350 granularity = f" GRANULARITY {granularity}" if granularity else "" 1351 1352 return f"INDEX{this}{expr}{index_type}{granularity}"
1392 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1393 # If the VALUES clause contains tuples of expressions, we need to treat it 1394 # as a table since Clickhouse will automatically alias it as such. 1395 alias = expression.args.get("alias") 1396 1397 if alias and alias.args.get("columns") and expression.expressions: 1398 values = expression.expressions[0].expressions 1399 values_as_table = any(isinstance(value, exp.Tuple) for value in values) 1400 else: 1401 values_as_table = True 1402 1403 return super().values_sql(expression, values_as_table=values_as_table)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SUPPORTS_WINDOW_EXCLUDE
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- NORMALIZE_EXTRACT_DATE_PARTS
- PARSE_JSON_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- get_put_sql
- translatecharacters_sql