sqlglot.dialects.clickhouse
1from __future__ import annotations 2import typing as t 3import datetime 4from sqlglot import exp, generator, parser, tokens 5from sqlglot.dialects.dialect import ( 6 Dialect, 7 NormalizationStrategy, 8 arg_max_or_min_no_count, 9 build_date_delta, 10 build_formatted_time, 11 inline_array_sql, 12 json_extract_segments, 13 json_path_key_only_name, 14 length_or_char_length_sql, 15 no_pivot_sql, 16 build_json_extract_path, 17 rename_func, 18 remove_from_array_using_filter, 19 sha256_sql, 20 strposition_sql, 21 var_map_sql, 22 timestamptrunc_sql, 23 unit_to_var, 24 trim_sql, 25) 26from sqlglot.generator import Generator 27from sqlglot.helper import is_int, seq_get 28from sqlglot.tokens import Token, TokenType 29from sqlglot.generator import unsupported_args 30 31DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd] 32 33 34def _build_date_format(args: t.List) -> exp.TimeToStr: 35 expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args) 36 37 timezone = seq_get(args, 2) 38 if timezone: 39 expr.set("zone", timezone) 40 41 return expr 42 43 44def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str: 45 scale = expression.args.get("scale") 46 timestamp = expression.this 47 48 if scale in (None, exp.UnixToTime.SECONDS): 49 return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 50 if scale == exp.UnixToTime.MILLIS: 51 return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 52 if scale == exp.UnixToTime.MICROS: 53 return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 54 if scale == exp.UnixToTime.NANOS: 55 return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 56 57 return self.func( 58 "fromUnixTimestamp", 59 exp.cast( 60 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 61 ), 62 ) 63 64 65def _lower_func(sql: str) -> str: 66 index = sql.index("(") 67 return sql[:index].lower() + sql[index:] 68 69 70def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str: 71 quantile = expression.args["quantile"] 72 args = f"({self.sql(expression, 'this')})" 73 74 if isinstance(quantile, exp.Array): 75 func = self.func("quantiles", *quantile) 76 else: 77 func = self.func("quantile", quantile) 78 79 return func + args 80 81 82def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: 83 if len(args) == 1: 84 return exp.CountIf(this=seq_get(args, 0)) 85 86 return exp.CombinedAggFunc(this="countIf", expressions=args) 87 88 89def _build_str_to_date(args: t.List) -> exp.Cast | exp.Anonymous: 90 if len(args) == 3: 91 return exp.Anonymous(this="STR_TO_DATE", expressions=args) 92 93 strtodate = exp.StrToDate.from_arg_list(args) 94 return exp.cast(strtodate, exp.DataType.build(exp.DataType.Type.DATETIME)) 95 96 97def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]: 98 def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str: 99 if not expression.unit: 100 return rename_func(name)(self, expression) 101 102 return self.func( 103 name, 104 unit_to_var(expression), 105 expression.expression, 106 expression.this, 107 expression.args.get("zone"), 108 ) 109 110 return _delta_sql 111 112 113def _timestrtotime_sql(self: ClickHouse.Generator, expression: exp.TimeStrToTime): 114 ts = expression.this 115 116 tz = expression.args.get("zone") 117 if tz and isinstance(ts, exp.Literal): 118 # Clickhouse will not accept timestamps that include a UTC offset, so we must remove them. 119 # The first step to removing is parsing the string with `datetime.datetime.fromisoformat`. 120 # 121 # In python <3.11, `fromisoformat()` can only parse timestamps of millisecond (3 digit) 122 # or microsecond (6 digit) precision. It will error if passed any other number of fractional 123 # digits, so we extract the fractional seconds and pad to 6 digits before parsing. 124 ts_string = ts.name.strip() 125 126 # separate [date and time] from [fractional seconds and UTC offset] 127 ts_parts = ts_string.split(".") 128 if len(ts_parts) == 2: 129 # separate fractional seconds and UTC offset 130 offset_sep = "+" if "+" in ts_parts[1] else "-" 131 ts_frac_parts = ts_parts[1].split(offset_sep) 132 num_frac_parts = len(ts_frac_parts) 133 134 # pad to 6 digits if fractional seconds present 135 ts_frac_parts[0] = ts_frac_parts[0].ljust(6, "0") 136 ts_string = "".join( 137 [ 138 ts_parts[0], # date and time 139 ".", 140 ts_frac_parts[0], # fractional seconds 141 offset_sep if num_frac_parts > 1 else "", 142 ts_frac_parts[1] if num_frac_parts > 1 else "", # utc offset (if present) 143 ] 144 ) 145 146 # return literal with no timezone, eg turn '2020-01-01 12:13:14-08:00' into '2020-01-01 12:13:14' 147 # this is because Clickhouse encodes the timezone as a data type parameter and throws an error if 148 # it's part of the timestamp string 149 ts_without_tz = ( 150 datetime.datetime.fromisoformat(ts_string).replace(tzinfo=None).isoformat(sep=" ") 151 ) 152 ts = exp.Literal.string(ts_without_tz) 153 154 # Non-nullable DateTime64 with microsecond precision 155 expressions = [exp.DataTypeParam(this=tz)] if tz else [] 156 datatype = exp.DataType.build( 157 exp.DataType.Type.DATETIME64, 158 expressions=[exp.DataTypeParam(this=exp.Literal.number(6)), *expressions], 159 nullable=False, 160 ) 161 162 return self.sql(exp.cast(ts, datatype, dialect=self.dialect)) 163 164 165def _map_sql(self: ClickHouse.Generator, expression: exp.Map | exp.VarMap) -> str: 166 if not (expression.parent and expression.parent.arg_key == "settings"): 167 return _lower_func(var_map_sql(self, expression)) 168 169 keys = expression.args.get("keys") 170 values = expression.args.get("values") 171 172 if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array): 173 self.unsupported("Cannot convert array columns into map.") 174 return "" 175 176 args = [] 177 for key, value in zip(keys.expressions, values.expressions): 178 args.append(f"{self.sql(key)}: {self.sql(value)}") 179 180 csv_args = ", ".join(args) 181 182 return f"{{{csv_args}}}" 183 184 185class ClickHouse(Dialect): 186 NORMALIZE_FUNCTIONS: bool | str = False 187 NULL_ORDERING = "nulls_are_last" 188 SUPPORTS_USER_DEFINED_TYPES = False 189 SAFE_DIVISION = True 190 LOG_BASE_FIRST: t.Optional[bool] = None 191 FORCE_EARLY_ALIAS_REF_EXPANSION = True 192 PRESERVE_ORIGINAL_NAMES = True 193 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 194 IDENTIFIERS_CAN_START_WITH_DIGIT = True 195 HEX_STRING_IS_INTEGER_TYPE = True 196 197 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 198 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 199 200 UNESCAPED_SEQUENCES = { 201 "\\0": "\0", 202 } 203 204 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 205 206 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 207 exp.Except: False, 208 exp.Intersect: False, 209 exp.Union: None, 210 } 211 212 def generate_values_aliases(self, expression: exp.Values) -> t.List[exp.Identifier]: 213 # Clickhouse allows VALUES to have an embedded structure e.g: 214 # VALUES('person String, place String', ('Noah', 'Paris'), ...) 215 # In this case, we don't want to qualify the columns 216 values = expression.expressions[0].expressions 217 218 structure = ( 219 values[0] 220 if (len(values) > 1 and values[0].is_string and isinstance(values[1], exp.Tuple)) 221 else None 222 ) 223 if structure: 224 # Split each column definition into the column name e.g: 225 # 'person String, place String' -> ['person', 'place'] 226 structure_coldefs = [coldef.strip() for coldef in structure.name.split(",")] 227 column_aliases = [ 228 exp.to_identifier(coldef.split(" ")[0]) for coldef in structure_coldefs 229 ] 230 else: 231 # Default column aliases in CH are "c1", "c2", etc. 232 column_aliases = [ 233 exp.to_identifier(f"c{i + 1}") for i in range(len(values[0].expressions)) 234 ] 235 236 return column_aliases 237 238 class Tokenizer(tokens.Tokenizer): 239 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 240 IDENTIFIERS = ['"', "`"] 241 IDENTIFIER_ESCAPES = ["\\"] 242 STRING_ESCAPES = ["'", "\\"] 243 BIT_STRINGS = [("0b", "")] 244 HEX_STRINGS = [("0x", ""), ("0X", "")] 245 HEREDOC_STRINGS = ["$"] 246 247 KEYWORDS = { 248 **tokens.Tokenizer.KEYWORDS, 249 ".:": TokenType.DOTCOLON, 250 "ATTACH": TokenType.COMMAND, 251 "DATE32": TokenType.DATE32, 252 "DATETIME64": TokenType.DATETIME64, 253 "DICTIONARY": TokenType.DICTIONARY, 254 "DYNAMIC": TokenType.DYNAMIC, 255 "ENUM8": TokenType.ENUM8, 256 "ENUM16": TokenType.ENUM16, 257 "EXCHANGE": TokenType.COMMAND, 258 "FINAL": TokenType.FINAL, 259 "FIXEDSTRING": TokenType.FIXEDSTRING, 260 "FLOAT32": TokenType.FLOAT, 261 "FLOAT64": TokenType.DOUBLE, 262 "GLOBAL": TokenType.GLOBAL, 263 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 264 "MAP": TokenType.MAP, 265 "NESTED": TokenType.NESTED, 266 "NOTHING": TokenType.NOTHING, 267 "SAMPLE": TokenType.TABLE_SAMPLE, 268 "TUPLE": TokenType.STRUCT, 269 "UINT16": TokenType.USMALLINT, 270 "UINT32": TokenType.UINT, 271 "UINT64": TokenType.UBIGINT, 272 "UINT8": TokenType.UTINYINT, 273 "IPV4": TokenType.IPV4, 274 "IPV6": TokenType.IPV6, 275 "POINT": TokenType.POINT, 276 "RING": TokenType.RING, 277 "LINESTRING": TokenType.LINESTRING, 278 "MULTILINESTRING": TokenType.MULTILINESTRING, 279 "POLYGON": TokenType.POLYGON, 280 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 281 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 282 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 283 "SYSTEM": TokenType.COMMAND, 284 "PREWHERE": TokenType.PREWHERE, 285 } 286 KEYWORDS.pop("/*+") 287 288 SINGLE_TOKENS = { 289 **tokens.Tokenizer.SINGLE_TOKENS, 290 "$": TokenType.HEREDOC_STRING, 291 } 292 293 class Parser(parser.Parser): 294 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 295 # * select x from t1 union all select x from t2 limit 1; 296 # * select x from t1 union all (select x from t2 limit 1); 297 MODIFIERS_ATTACHED_TO_SET_OP = False 298 INTERVAL_SPANS = False 299 OPTIONAL_ALIAS_TOKEN_CTE = False 300 301 FUNCTIONS = { 302 **parser.Parser.FUNCTIONS, 303 "ANY": exp.AnyValue.from_arg_list, 304 "ARRAYSUM": exp.ArraySum.from_arg_list, 305 "COUNTIF": _build_count_if, 306 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 307 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 308 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 309 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 310 "DATE_FORMAT": _build_date_format, 311 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 312 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 313 "FORMATDATETIME": _build_date_format, 314 "JSONEXTRACTSTRING": build_json_extract_path( 315 exp.JSONExtractScalar, zero_based_indexing=False 316 ), 317 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 318 "MAP": parser.build_var_map, 319 "MATCH": exp.RegexpLike.from_arg_list, 320 "RANDCANONICAL": exp.Rand.from_arg_list, 321 "STR_TO_DATE": _build_str_to_date, 322 "TUPLE": exp.Struct.from_arg_list, 323 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 324 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 325 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 326 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 327 "UNIQ": exp.ApproxDistinct.from_arg_list, 328 "XOR": lambda args: exp.Xor(expressions=args), 329 "MD5": exp.MD5Digest.from_arg_list, 330 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 331 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 332 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 333 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 334 } 335 FUNCTIONS.pop("TRANSFORM") 336 337 AGG_FUNCTIONS = { 338 "count", 339 "min", 340 "max", 341 "sum", 342 "avg", 343 "any", 344 "stddevPop", 345 "stddevSamp", 346 "varPop", 347 "varSamp", 348 "corr", 349 "covarPop", 350 "covarSamp", 351 "entropy", 352 "exponentialMovingAverage", 353 "intervalLengthSum", 354 "kolmogorovSmirnovTest", 355 "mannWhitneyUTest", 356 "median", 357 "rankCorr", 358 "sumKahan", 359 "studentTTest", 360 "welchTTest", 361 "anyHeavy", 362 "anyLast", 363 "boundingRatio", 364 "first_value", 365 "last_value", 366 "argMin", 367 "argMax", 368 "avgWeighted", 369 "topK", 370 "topKWeighted", 371 "deltaSum", 372 "deltaSumTimestamp", 373 "groupArray", 374 "groupArrayLast", 375 "groupUniqArray", 376 "groupArrayInsertAt", 377 "groupArrayMovingAvg", 378 "groupArrayMovingSum", 379 "groupArraySample", 380 "groupBitAnd", 381 "groupBitOr", 382 "groupBitXor", 383 "groupBitmap", 384 "groupBitmapAnd", 385 "groupBitmapOr", 386 "groupBitmapXor", 387 "sumWithOverflow", 388 "sumMap", 389 "minMap", 390 "maxMap", 391 "skewSamp", 392 "skewPop", 393 "kurtSamp", 394 "kurtPop", 395 "uniq", 396 "uniqExact", 397 "uniqCombined", 398 "uniqCombined64", 399 "uniqHLL12", 400 "uniqTheta", 401 "quantile", 402 "quantiles", 403 "quantileExact", 404 "quantilesExact", 405 "quantileExactLow", 406 "quantilesExactLow", 407 "quantileExactHigh", 408 "quantilesExactHigh", 409 "quantileExactWeighted", 410 "quantilesExactWeighted", 411 "quantileTiming", 412 "quantilesTiming", 413 "quantileTimingWeighted", 414 "quantilesTimingWeighted", 415 "quantileDeterministic", 416 "quantilesDeterministic", 417 "quantileTDigest", 418 "quantilesTDigest", 419 "quantileTDigestWeighted", 420 "quantilesTDigestWeighted", 421 "quantileBFloat16", 422 "quantilesBFloat16", 423 "quantileBFloat16Weighted", 424 "quantilesBFloat16Weighted", 425 "simpleLinearRegression", 426 "stochasticLinearRegression", 427 "stochasticLogisticRegression", 428 "categoricalInformationValue", 429 "contingency", 430 "cramersV", 431 "cramersVBiasCorrected", 432 "theilsU", 433 "maxIntersections", 434 "maxIntersectionsPosition", 435 "meanZTest", 436 "quantileInterpolatedWeighted", 437 "quantilesInterpolatedWeighted", 438 "quantileGK", 439 "quantilesGK", 440 "sparkBar", 441 "sumCount", 442 "largestTriangleThreeBuckets", 443 "histogram", 444 "sequenceMatch", 445 "sequenceCount", 446 "windowFunnel", 447 "retention", 448 "uniqUpTo", 449 "sequenceNextNode", 450 "exponentialTimeDecayedAvg", 451 } 452 453 AGG_FUNCTIONS_SUFFIXES = [ 454 "If", 455 "Array", 456 "ArrayIf", 457 "Map", 458 "SimpleState", 459 "State", 460 "Merge", 461 "MergeState", 462 "ForEach", 463 "Distinct", 464 "OrDefault", 465 "OrNull", 466 "Resample", 467 "ArgMin", 468 "ArgMax", 469 ] 470 471 FUNC_TOKENS = { 472 *parser.Parser.FUNC_TOKENS, 473 TokenType.AND, 474 TokenType.OR, 475 TokenType.SET, 476 } 477 478 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 479 480 ID_VAR_TOKENS = { 481 *parser.Parser.ID_VAR_TOKENS, 482 TokenType.LIKE, 483 } 484 485 AGG_FUNC_MAPPING = ( 486 lambda functions, suffixes: { 487 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 488 } 489 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 490 491 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 492 493 FUNCTION_PARSERS = { 494 **parser.Parser.FUNCTION_PARSERS, 495 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 496 "QUANTILE": lambda self: self._parse_quantile(), 497 "MEDIAN": lambda self: self._parse_quantile(), 498 "COLUMNS": lambda self: self._parse_columns(), 499 } 500 501 FUNCTION_PARSERS.pop("MATCH") 502 503 PROPERTY_PARSERS = { 504 **parser.Parser.PROPERTY_PARSERS, 505 "ENGINE": lambda self: self._parse_engine_property(), 506 } 507 PROPERTY_PARSERS.pop("DYNAMIC") 508 509 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 510 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 511 512 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 513 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 514 515 RANGE_PARSERS = { 516 **parser.Parser.RANGE_PARSERS, 517 TokenType.GLOBAL: lambda self, this: self._parse_global_in(this), 518 } 519 520 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 521 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 522 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 523 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 524 525 JOIN_KINDS = { 526 *parser.Parser.JOIN_KINDS, 527 TokenType.ANY, 528 TokenType.ASOF, 529 TokenType.ARRAY, 530 } 531 532 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 533 TokenType.ANY, 534 TokenType.ARRAY, 535 TokenType.FINAL, 536 TokenType.FORMAT, 537 TokenType.SETTINGS, 538 } 539 540 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 541 TokenType.FORMAT, 542 } 543 544 LOG_DEFAULTS_TO_LN = True 545 546 QUERY_MODIFIER_PARSERS = { 547 **parser.Parser.QUERY_MODIFIER_PARSERS, 548 TokenType.SETTINGS: lambda self: ( 549 "settings", 550 self._advance() or self._parse_csv(self._parse_assignment), 551 ), 552 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 553 } 554 555 CONSTRAINT_PARSERS = { 556 **parser.Parser.CONSTRAINT_PARSERS, 557 "INDEX": lambda self: self._parse_index_constraint(), 558 "CODEC": lambda self: self._parse_compress(), 559 } 560 561 ALTER_PARSERS = { 562 **parser.Parser.ALTER_PARSERS, 563 "REPLACE": lambda self: self._parse_alter_table_replace(), 564 } 565 566 SCHEMA_UNNAMED_CONSTRAINTS = { 567 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 568 "INDEX", 569 } 570 571 PLACEHOLDER_PARSERS = { 572 **parser.Parser.PLACEHOLDER_PARSERS, 573 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 574 } 575 576 def _parse_engine_property(self) -> exp.EngineProperty: 577 self._match(TokenType.EQ) 578 return self.expression( 579 exp.EngineProperty, 580 this=self._parse_field(any_token=True, anonymous_func=True), 581 ) 582 583 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 584 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 585 return self._parse_lambda() 586 587 def _parse_types( 588 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 589 ) -> t.Optional[exp.Expression]: 590 dtype = super()._parse_types( 591 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 592 ) 593 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 594 # Mark every type as non-nullable which is ClickHouse's default, unless it's 595 # already marked as nullable. This marker helps us transpile types from other 596 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 597 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 598 # fail in ClickHouse without the `Nullable` type constructor. 599 dtype.set("nullable", False) 600 601 return dtype 602 603 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 604 index = self._index 605 this = self._parse_bitwise() 606 if self._match(TokenType.FROM): 607 self._retreat(index) 608 return super()._parse_extract() 609 610 # We return Anonymous here because extract and regexpExtract have different semantics, 611 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 612 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 613 # 614 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 615 self._match(TokenType.COMMA) 616 return self.expression( 617 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 618 ) 619 620 def _parse_assignment(self) -> t.Optional[exp.Expression]: 621 this = super()._parse_assignment() 622 623 if self._match(TokenType.PLACEHOLDER): 624 return self.expression( 625 exp.If, 626 this=this, 627 true=self._parse_assignment(), 628 false=self._match(TokenType.COLON) and self._parse_assignment(), 629 ) 630 631 return this 632 633 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 634 """ 635 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 636 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 637 """ 638 index = self._index 639 640 this = self._parse_id_var() 641 self._match(TokenType.COLON) 642 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 643 self._match_text_seq("IDENTIFIER") and "Identifier" 644 ) 645 646 if not kind: 647 self._retreat(index) 648 return None 649 elif not self._match(TokenType.R_BRACE): 650 self.raise_error("Expecting }") 651 652 if isinstance(this, exp.Identifier) and not this.quoted: 653 this = exp.var(this.name) 654 655 return self.expression(exp.Placeholder, this=this, kind=kind) 656 657 def _parse_bracket( 658 self, this: t.Optional[exp.Expression] = None 659 ) -> t.Optional[exp.Expression]: 660 l_brace = self._match(TokenType.L_BRACE, advance=False) 661 bracket = super()._parse_bracket(this) 662 663 if l_brace and isinstance(bracket, exp.Struct): 664 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 665 for expression in bracket.expressions: 666 if not isinstance(expression, exp.PropertyEQ): 667 break 668 669 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 670 varmap.args["values"].append("expressions", expression.expression) 671 672 return varmap 673 674 return bracket 675 676 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 677 this = super()._parse_in(this) 678 this.set("is_global", is_global) 679 return this 680 681 def _parse_global_in(self, this: t.Optional[exp.Expression]) -> exp.Not | exp.In: 682 is_negated = self._match(TokenType.NOT) 683 this = self._match(TokenType.IN) and self._parse_in(this, is_global=True) 684 return self.expression(exp.Not, this=this) if is_negated else this 685 686 def _parse_table( 687 self, 688 schema: bool = False, 689 joins: bool = False, 690 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 691 parse_bracket: bool = False, 692 is_db_reference: bool = False, 693 parse_partition: bool = False, 694 ) -> t.Optional[exp.Expression]: 695 this = super()._parse_table( 696 schema=schema, 697 joins=joins, 698 alias_tokens=alias_tokens, 699 parse_bracket=parse_bracket, 700 is_db_reference=is_db_reference, 701 ) 702 703 if isinstance(this, exp.Table): 704 inner = this.this 705 alias = this.args.get("alias") 706 707 if isinstance(inner, exp.GenerateSeries) and alias and not alias.columns: 708 alias.set("columns", [exp.to_identifier("generate_series")]) 709 710 if self._match(TokenType.FINAL): 711 this = self.expression(exp.Final, this=this) 712 713 return this 714 715 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 716 return super()._parse_position(haystack_first=True) 717 718 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 719 def _parse_cte(self) -> t.Optional[exp.CTE]: 720 # WITH <identifier> AS <subquery expression> 721 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 722 723 if not cte: 724 # WITH <expression> AS <identifier> 725 cte = self.expression( 726 exp.CTE, 727 this=self._parse_assignment(), 728 alias=self._parse_table_alias(), 729 scalar=True, 730 ) 731 732 return cte 733 734 def _parse_join_parts( 735 self, 736 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 737 is_global = self._match(TokenType.GLOBAL) and self._prev 738 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 739 740 if kind_pre: 741 kind = self._match_set(self.JOIN_KINDS) and self._prev 742 side = self._match_set(self.JOIN_SIDES) and self._prev 743 return is_global, side, kind 744 745 return ( 746 is_global, 747 self._match_set(self.JOIN_SIDES) and self._prev, 748 self._match_set(self.JOIN_KINDS) and self._prev, 749 ) 750 751 def _parse_join( 752 self, skip_join_token: bool = False, parse_bracket: bool = False 753 ) -> t.Optional[exp.Join]: 754 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 755 if join: 756 join.set("global", join.args.pop("method", None)) 757 758 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 759 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 760 if join.kind == "ARRAY": 761 for table in join.find_all(exp.Table): 762 table.replace(table.to_column()) 763 764 return join 765 766 def _parse_function( 767 self, 768 functions: t.Optional[t.Dict[str, t.Callable]] = None, 769 anonymous: bool = False, 770 optional_parens: bool = True, 771 any_token: bool = False, 772 ) -> t.Optional[exp.Expression]: 773 expr = super()._parse_function( 774 functions=functions, 775 anonymous=anonymous, 776 optional_parens=optional_parens, 777 any_token=any_token, 778 ) 779 780 func = expr.this if isinstance(expr, exp.Window) else expr 781 782 # Aggregate functions can be split in 2 parts: <func_name><suffix> 783 parts = ( 784 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 785 ) 786 787 if parts: 788 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 789 params = self._parse_func_params(anon_func) 790 791 kwargs = { 792 "this": anon_func.this, 793 "expressions": anon_func.expressions, 794 } 795 if parts[1]: 796 exp_class: t.Type[exp.Expression] = ( 797 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 798 ) 799 else: 800 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 801 802 kwargs["exp_class"] = exp_class 803 if params: 804 kwargs["params"] = params 805 806 func = self.expression(**kwargs) 807 808 if isinstance(expr, exp.Window): 809 # The window's func was parsed as Anonymous in base parser, fix its 810 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 811 expr.set("this", func) 812 elif params: 813 # Params have blocked super()._parse_function() from parsing the following window 814 # (if that exists) as they're standing between the function call and the window spec 815 expr = self._parse_window(func) 816 else: 817 expr = func 818 819 return expr 820 821 def _parse_func_params( 822 self, this: t.Optional[exp.Func] = None 823 ) -> t.Optional[t.List[exp.Expression]]: 824 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 825 return self._parse_csv(self._parse_lambda) 826 827 if self._match(TokenType.L_PAREN): 828 params = self._parse_csv(self._parse_lambda) 829 self._match_r_paren(this) 830 return params 831 832 return None 833 834 def _parse_quantile(self) -> exp.Quantile: 835 this = self._parse_lambda() 836 params = self._parse_func_params() 837 if params: 838 return self.expression(exp.Quantile, this=params[0], quantile=this) 839 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 840 841 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 842 return super()._parse_wrapped_id_vars(optional=True) 843 844 def _parse_primary_key( 845 self, wrapped_optional: bool = False, in_props: bool = False 846 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 847 return super()._parse_primary_key( 848 wrapped_optional=wrapped_optional or in_props, in_props=in_props 849 ) 850 851 def _parse_on_property(self) -> t.Optional[exp.Expression]: 852 index = self._index 853 if self._match_text_seq("CLUSTER"): 854 this = self._parse_string() or self._parse_id_var() 855 if this: 856 return self.expression(exp.OnCluster, this=this) 857 else: 858 self._retreat(index) 859 return None 860 861 def _parse_index_constraint( 862 self, kind: t.Optional[str] = None 863 ) -> exp.IndexColumnConstraint: 864 # INDEX name1 expr TYPE type1(args) GRANULARITY value 865 this = self._parse_id_var() 866 expression = self._parse_assignment() 867 868 index_type = self._match_text_seq("TYPE") and ( 869 self._parse_function() or self._parse_var() 870 ) 871 872 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 873 874 return self.expression( 875 exp.IndexColumnConstraint, 876 this=this, 877 expression=expression, 878 index_type=index_type, 879 granularity=granularity, 880 ) 881 882 def _parse_partition(self) -> t.Optional[exp.Partition]: 883 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 884 if not self._match(TokenType.PARTITION): 885 return None 886 887 if self._match_text_seq("ID"): 888 # Corresponds to the PARTITION ID <string_value> syntax 889 expressions: t.List[exp.Expression] = [ 890 self.expression(exp.PartitionId, this=self._parse_string()) 891 ] 892 else: 893 expressions = self._parse_expressions() 894 895 return self.expression(exp.Partition, expressions=expressions) 896 897 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 898 partition = self._parse_partition() 899 900 if not partition or not self._match(TokenType.FROM): 901 return None 902 903 return self.expression( 904 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 905 ) 906 907 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 908 if not self._match_text_seq("PROJECTION"): 909 return None 910 911 return self.expression( 912 exp.ProjectionDef, 913 this=self._parse_id_var(), 914 expression=self._parse_wrapped(self._parse_statement), 915 ) 916 917 def _parse_constraint(self) -> t.Optional[exp.Expression]: 918 return super()._parse_constraint() or self._parse_projection_def() 919 920 def _parse_alias( 921 self, this: t.Optional[exp.Expression], explicit: bool = False 922 ) -> t.Optional[exp.Expression]: 923 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 924 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 925 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 926 return this 927 928 return super()._parse_alias(this=this, explicit=explicit) 929 930 def _parse_expression(self) -> t.Optional[exp.Expression]: 931 this = super()._parse_expression() 932 933 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 934 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 935 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 936 self._match(TokenType.R_PAREN) 937 938 return this 939 940 def _parse_columns(self) -> exp.Expression: 941 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 942 943 while self._next and self._match_text_seq(")", "APPLY", "("): 944 self._match(TokenType.R_PAREN) 945 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 946 return this 947 948 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 949 value = super()._parse_value(values=values) 950 if not value: 951 return None 952 953 # In Clickhouse "SELECT * FROM VALUES (1, 2, 3)" generates a table with a single column, in contrast 954 # to other dialects. For this case, we canonicalize the values into a tuple-of-tuples AST if it's not already one. 955 # In INSERT INTO statements the same clause actually references multiple columns (opposite semantics), 956 # but the final result is not altered by the extra parentheses. 957 # Note: Clickhouse allows VALUES([structure], value, ...) so the branch checks for the last expression 958 expressions = value.expressions 959 if values and not isinstance(expressions[-1], exp.Tuple): 960 value.set( 961 "expressions", 962 [self.expression(exp.Tuple, expressions=[expr]) for expr in expressions], 963 ) 964 965 return value 966 967 class Generator(generator.Generator): 968 QUERY_HINTS = False 969 STRUCT_DELIMITER = ("(", ")") 970 NVL2_SUPPORTED = False 971 TABLESAMPLE_REQUIRES_PARENS = False 972 TABLESAMPLE_SIZE_IS_ROWS = False 973 TABLESAMPLE_KEYWORDS = "SAMPLE" 974 LAST_DAY_SUPPORTS_DATE_PART = False 975 CAN_IMPLEMENT_ARRAY_ANY = True 976 SUPPORTS_TO_NUMBER = False 977 JOIN_HINTS = False 978 TABLE_HINTS = False 979 GROUPINGS_SEP = "" 980 SET_OP_MODIFIERS = False 981 ARRAY_SIZE_NAME = "LENGTH" 982 WRAP_DERIVED_VALUES = False 983 984 STRING_TYPE_MAPPING = { 985 exp.DataType.Type.BLOB: "String", 986 exp.DataType.Type.CHAR: "String", 987 exp.DataType.Type.LONGBLOB: "String", 988 exp.DataType.Type.LONGTEXT: "String", 989 exp.DataType.Type.MEDIUMBLOB: "String", 990 exp.DataType.Type.MEDIUMTEXT: "String", 991 exp.DataType.Type.TINYBLOB: "String", 992 exp.DataType.Type.TINYTEXT: "String", 993 exp.DataType.Type.TEXT: "String", 994 exp.DataType.Type.VARBINARY: "String", 995 exp.DataType.Type.VARCHAR: "String", 996 } 997 998 SUPPORTED_JSON_PATH_PARTS = { 999 exp.JSONPathKey, 1000 exp.JSONPathRoot, 1001 exp.JSONPathSubscript, 1002 } 1003 1004 TYPE_MAPPING = { 1005 **generator.Generator.TYPE_MAPPING, 1006 **STRING_TYPE_MAPPING, 1007 exp.DataType.Type.ARRAY: "Array", 1008 exp.DataType.Type.BOOLEAN: "Bool", 1009 exp.DataType.Type.BIGINT: "Int64", 1010 exp.DataType.Type.DATE32: "Date32", 1011 exp.DataType.Type.DATETIME: "DateTime", 1012 exp.DataType.Type.DATETIME2: "DateTime", 1013 exp.DataType.Type.SMALLDATETIME: "DateTime", 1014 exp.DataType.Type.DATETIME64: "DateTime64", 1015 exp.DataType.Type.DECIMAL: "Decimal", 1016 exp.DataType.Type.DECIMAL32: "Decimal32", 1017 exp.DataType.Type.DECIMAL64: "Decimal64", 1018 exp.DataType.Type.DECIMAL128: "Decimal128", 1019 exp.DataType.Type.DECIMAL256: "Decimal256", 1020 exp.DataType.Type.TIMESTAMP: "DateTime", 1021 exp.DataType.Type.TIMESTAMPNTZ: "DateTime", 1022 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 1023 exp.DataType.Type.DOUBLE: "Float64", 1024 exp.DataType.Type.ENUM: "Enum", 1025 exp.DataType.Type.ENUM8: "Enum8", 1026 exp.DataType.Type.ENUM16: "Enum16", 1027 exp.DataType.Type.FIXEDSTRING: "FixedString", 1028 exp.DataType.Type.FLOAT: "Float32", 1029 exp.DataType.Type.INT: "Int32", 1030 exp.DataType.Type.MEDIUMINT: "Int32", 1031 exp.DataType.Type.INT128: "Int128", 1032 exp.DataType.Type.INT256: "Int256", 1033 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 1034 exp.DataType.Type.MAP: "Map", 1035 exp.DataType.Type.NESTED: "Nested", 1036 exp.DataType.Type.NOTHING: "Nothing", 1037 exp.DataType.Type.SMALLINT: "Int16", 1038 exp.DataType.Type.STRUCT: "Tuple", 1039 exp.DataType.Type.TINYINT: "Int8", 1040 exp.DataType.Type.UBIGINT: "UInt64", 1041 exp.DataType.Type.UINT: "UInt32", 1042 exp.DataType.Type.UINT128: "UInt128", 1043 exp.DataType.Type.UINT256: "UInt256", 1044 exp.DataType.Type.USMALLINT: "UInt16", 1045 exp.DataType.Type.UTINYINT: "UInt8", 1046 exp.DataType.Type.IPV4: "IPv4", 1047 exp.DataType.Type.IPV6: "IPv6", 1048 exp.DataType.Type.POINT: "Point", 1049 exp.DataType.Type.RING: "Ring", 1050 exp.DataType.Type.LINESTRING: "LineString", 1051 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 1052 exp.DataType.Type.POLYGON: "Polygon", 1053 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 1054 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 1055 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 1056 exp.DataType.Type.DYNAMIC: "Dynamic", 1057 } 1058 1059 TRANSFORMS = { 1060 **generator.Generator.TRANSFORMS, 1061 exp.AnyValue: rename_func("any"), 1062 exp.ApproxDistinct: rename_func("uniq"), 1063 exp.ArrayConcat: rename_func("arrayConcat"), 1064 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 1065 exp.ArrayRemove: remove_from_array_using_filter, 1066 exp.ArraySum: rename_func("arraySum"), 1067 exp.ArgMax: arg_max_or_min_no_count("argMax"), 1068 exp.ArgMin: arg_max_or_min_no_count("argMin"), 1069 exp.Array: inline_array_sql, 1070 exp.CastToStrType: rename_func("CAST"), 1071 exp.CountIf: rename_func("countIf"), 1072 exp.CompressColumnConstraint: lambda self, 1073 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 1074 exp.ComputedColumnConstraint: lambda self, 1075 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 1076 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 1077 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 1078 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 1079 exp.DateStrToDate: rename_func("toDate"), 1080 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 1081 exp.Explode: rename_func("arrayJoin"), 1082 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 1083 exp.IsNan: rename_func("isNaN"), 1084 exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}", 1085 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 1086 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 1087 exp.JSONPathKey: json_path_key_only_name, 1088 exp.JSONPathRoot: lambda *_: "", 1089 exp.Length: length_or_char_length_sql, 1090 exp.Map: _map_sql, 1091 exp.Median: rename_func("median"), 1092 exp.Nullif: rename_func("nullIf"), 1093 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1094 exp.Pivot: no_pivot_sql, 1095 exp.Quantile: _quantile_sql, 1096 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 1097 exp.Rand: rename_func("randCanonical"), 1098 exp.StartsWith: rename_func("startsWith"), 1099 exp.StrPosition: lambda self, e: strposition_sql( 1100 self, 1101 e, 1102 func_name="POSITION", 1103 supports_position=True, 1104 use_ansi_position=False, 1105 ), 1106 exp.TimeToStr: lambda self, e: self.func( 1107 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 1108 ), 1109 exp.TimeStrToTime: _timestrtotime_sql, 1110 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1111 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1112 exp.VarMap: _map_sql, 1113 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1114 exp.MD5Digest: rename_func("MD5"), 1115 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1116 exp.SHA: rename_func("SHA1"), 1117 exp.SHA2: sha256_sql, 1118 exp.UnixToTime: _unix_to_time_sql, 1119 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 1120 exp.Trim: lambda self, e: trim_sql(self, e, default_trim_type="BOTH"), 1121 exp.Variance: rename_func("varSamp"), 1122 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1123 exp.Stddev: rename_func("stddevSamp"), 1124 exp.Chr: rename_func("CHAR"), 1125 exp.Lag: lambda self, e: self.func( 1126 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1127 ), 1128 exp.Lead: lambda self, e: self.func( 1129 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1130 ), 1131 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1132 rename_func("editDistance") 1133 ), 1134 } 1135 1136 PROPERTIES_LOCATION = { 1137 **generator.Generator.PROPERTIES_LOCATION, 1138 exp.OnCluster: exp.Properties.Location.POST_NAME, 1139 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1140 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1141 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1142 } 1143 1144 # There's no list in docs, but it can be found in Clickhouse code 1145 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1146 ON_CLUSTER_TARGETS = { 1147 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1148 "DATABASE", 1149 "TABLE", 1150 "VIEW", 1151 "DICTIONARY", 1152 "INDEX", 1153 "FUNCTION", 1154 "NAMED COLLECTION", 1155 } 1156 1157 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1158 NON_NULLABLE_TYPES = { 1159 exp.DataType.Type.ARRAY, 1160 exp.DataType.Type.MAP, 1161 exp.DataType.Type.STRUCT, 1162 exp.DataType.Type.POINT, 1163 exp.DataType.Type.RING, 1164 exp.DataType.Type.LINESTRING, 1165 exp.DataType.Type.MULTILINESTRING, 1166 exp.DataType.Type.POLYGON, 1167 exp.DataType.Type.MULTIPOLYGON, 1168 } 1169 1170 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1171 strtodate_sql = self.function_fallback_sql(expression) 1172 1173 if not isinstance(expression.parent, exp.Cast): 1174 # StrToDate returns DATEs in other dialects (eg. postgres), so 1175 # this branch aims to improve the transpilation to clickhouse 1176 return self.cast_sql(exp.cast(expression, "DATE")) 1177 1178 return strtodate_sql 1179 1180 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1181 this = expression.this 1182 1183 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1184 return self.sql(this) 1185 1186 return super().cast_sql(expression, safe_prefix=safe_prefix) 1187 1188 def trycast_sql(self, expression: exp.TryCast) -> str: 1189 dtype = expression.to 1190 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1191 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1192 dtype.set("nullable", True) 1193 1194 return super().cast_sql(expression) 1195 1196 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1197 this = self.json_path_part(expression.this) 1198 return str(int(this) + 1) if is_int(this) else this 1199 1200 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1201 return f"AS {self.sql(expression, 'this')}" 1202 1203 def _any_to_has( 1204 self, 1205 expression: exp.EQ | exp.NEQ, 1206 default: t.Callable[[t.Any], str], 1207 prefix: str = "", 1208 ) -> str: 1209 if isinstance(expression.left, exp.Any): 1210 arr = expression.left 1211 this = expression.right 1212 elif isinstance(expression.right, exp.Any): 1213 arr = expression.right 1214 this = expression.left 1215 else: 1216 return default(expression) 1217 1218 return prefix + self.func("has", arr.this.unnest(), this) 1219 1220 def eq_sql(self, expression: exp.EQ) -> str: 1221 return self._any_to_has(expression, super().eq_sql) 1222 1223 def neq_sql(self, expression: exp.NEQ) -> str: 1224 return self._any_to_has(expression, super().neq_sql, "NOT ") 1225 1226 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1227 # Manually add a flag to make the search case-insensitive 1228 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1229 return self.func("match", expression.this, regex) 1230 1231 def datatype_sql(self, expression: exp.DataType) -> str: 1232 # String is the standard ClickHouse type, every other variant is just an alias. 1233 # Additionally, any supplied length parameter will be ignored. 1234 # 1235 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1236 if expression.this in self.STRING_TYPE_MAPPING: 1237 dtype = "String" 1238 else: 1239 dtype = super().datatype_sql(expression) 1240 1241 # This section changes the type to `Nullable(...)` if the following conditions hold: 1242 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1243 # and change their semantics 1244 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1245 # constraint: "Type of Map key must be a type, that can be represented by integer or 1246 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1247 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1248 parent = expression.parent 1249 nullable = expression.args.get("nullable") 1250 if nullable is True or ( 1251 nullable is None 1252 and not ( 1253 isinstance(parent, exp.DataType) 1254 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1255 and expression.index in (None, 0) 1256 ) 1257 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1258 ): 1259 dtype = f"Nullable({dtype})" 1260 1261 return dtype 1262 1263 def cte_sql(self, expression: exp.CTE) -> str: 1264 if expression.args.get("scalar"): 1265 this = self.sql(expression, "this") 1266 alias = self.sql(expression, "alias") 1267 return f"{this} AS {alias}" 1268 1269 return super().cte_sql(expression) 1270 1271 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1272 return super().after_limit_modifiers(expression) + [ 1273 ( 1274 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1275 if expression.args.get("settings") 1276 else "" 1277 ), 1278 ( 1279 self.seg("FORMAT ") + self.sql(expression, "format") 1280 if expression.args.get("format") 1281 else "" 1282 ), 1283 ] 1284 1285 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1286 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1287 1288 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1289 return f"ON CLUSTER {self.sql(expression, 'this')}" 1290 1291 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1292 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1293 exp.Properties.Location.POST_NAME 1294 ): 1295 this_name = self.sql( 1296 expression.this if isinstance(expression.this, exp.Schema) else expression, 1297 "this", 1298 ) 1299 this_properties = " ".join( 1300 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1301 ) 1302 this_schema = self.schema_columns_sql(expression.this) 1303 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1304 1305 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1306 1307 return super().createable_sql(expression, locations) 1308 1309 def create_sql(self, expression: exp.Create) -> str: 1310 # The comment property comes last in CTAS statements, i.e. after the query 1311 query = expression.expression 1312 if isinstance(query, exp.Query): 1313 comment_prop = expression.find(exp.SchemaCommentProperty) 1314 if comment_prop: 1315 comment_prop.pop() 1316 query.replace(exp.paren(query)) 1317 else: 1318 comment_prop = None 1319 1320 create_sql = super().create_sql(expression) 1321 1322 comment_sql = self.sql(comment_prop) 1323 comment_sql = f" {comment_sql}" if comment_sql else "" 1324 1325 return f"{create_sql}{comment_sql}" 1326 1327 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1328 this = self.indent(self.sql(expression, "this")) 1329 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1330 1331 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1332 this = self.sql(expression, "this") 1333 this = f" {this}" if this else "" 1334 expr = self.sql(expression, "expression") 1335 expr = f" {expr}" if expr else "" 1336 index_type = self.sql(expression, "index_type") 1337 index_type = f" TYPE {index_type}" if index_type else "" 1338 granularity = self.sql(expression, "granularity") 1339 granularity = f" GRANULARITY {granularity}" if granularity else "" 1340 1341 return f"INDEX{this}{expr}{index_type}{granularity}" 1342 1343 def partition_sql(self, expression: exp.Partition) -> str: 1344 return f"PARTITION {self.expressions(expression, flat=True)}" 1345 1346 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1347 return f"ID {self.sql(expression.this)}" 1348 1349 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1350 return ( 1351 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1352 ) 1353 1354 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1355 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1356 1357 def is_sql(self, expression: exp.Is) -> str: 1358 is_sql = super().is_sql(expression) 1359 1360 if isinstance(expression.parent, exp.Not): 1361 # value IS NOT NULL -> NOT (value IS NULL) 1362 is_sql = self.wrap(is_sql) 1363 1364 return is_sql 1365 1366 def in_sql(self, expression: exp.In) -> str: 1367 in_sql = super().in_sql(expression) 1368 1369 if isinstance(expression.parent, exp.Not) and expression.args.get("is_global"): 1370 in_sql = in_sql.replace("GLOBAL IN", "GLOBAL NOT IN", 1) 1371 1372 return in_sql 1373 1374 def not_sql(self, expression: exp.Not) -> str: 1375 if isinstance(expression.this, exp.In) and expression.this.args.get("is_global"): 1376 # let `GLOBAL IN` child interpose `NOT` 1377 return self.sql(expression, "this") 1378 1379 return super().not_sql(expression) 1380 1381 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1382 # If the VALUES clause contains tuples of expressions, we need to treat it 1383 # as a table since Clickhouse will automatically alias it as such. 1384 alias = expression.args.get("alias") 1385 1386 if alias and alias.args.get("columns") and expression.expressions: 1387 values = expression.expressions[0].expressions 1388 values_as_table = any(isinstance(value, exp.Tuple) for value in values) 1389 else: 1390 values_as_table = True 1391 1392 return super().values_sql(expression, values_as_table=values_as_table)
186class ClickHouse(Dialect): 187 NORMALIZE_FUNCTIONS: bool | str = False 188 NULL_ORDERING = "nulls_are_last" 189 SUPPORTS_USER_DEFINED_TYPES = False 190 SAFE_DIVISION = True 191 LOG_BASE_FIRST: t.Optional[bool] = None 192 FORCE_EARLY_ALIAS_REF_EXPANSION = True 193 PRESERVE_ORIGINAL_NAMES = True 194 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 195 IDENTIFIERS_CAN_START_WITH_DIGIT = True 196 HEX_STRING_IS_INTEGER_TYPE = True 197 198 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 199 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 200 201 UNESCAPED_SEQUENCES = { 202 "\\0": "\0", 203 } 204 205 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 206 207 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 208 exp.Except: False, 209 exp.Intersect: False, 210 exp.Union: None, 211 } 212 213 def generate_values_aliases(self, expression: exp.Values) -> t.List[exp.Identifier]: 214 # Clickhouse allows VALUES to have an embedded structure e.g: 215 # VALUES('person String, place String', ('Noah', 'Paris'), ...) 216 # In this case, we don't want to qualify the columns 217 values = expression.expressions[0].expressions 218 219 structure = ( 220 values[0] 221 if (len(values) > 1 and values[0].is_string and isinstance(values[1], exp.Tuple)) 222 else None 223 ) 224 if structure: 225 # Split each column definition into the column name e.g: 226 # 'person String, place String' -> ['person', 'place'] 227 structure_coldefs = [coldef.strip() for coldef in structure.name.split(",")] 228 column_aliases = [ 229 exp.to_identifier(coldef.split(" ")[0]) for coldef in structure_coldefs 230 ] 231 else: 232 # Default column aliases in CH are "c1", "c2", etc. 233 column_aliases = [ 234 exp.to_identifier(f"c{i + 1}") for i in range(len(values[0].expressions)) 235 ] 236 237 return column_aliases 238 239 class Tokenizer(tokens.Tokenizer): 240 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 241 IDENTIFIERS = ['"', "`"] 242 IDENTIFIER_ESCAPES = ["\\"] 243 STRING_ESCAPES = ["'", "\\"] 244 BIT_STRINGS = [("0b", "")] 245 HEX_STRINGS = [("0x", ""), ("0X", "")] 246 HEREDOC_STRINGS = ["$"] 247 248 KEYWORDS = { 249 **tokens.Tokenizer.KEYWORDS, 250 ".:": TokenType.DOTCOLON, 251 "ATTACH": TokenType.COMMAND, 252 "DATE32": TokenType.DATE32, 253 "DATETIME64": TokenType.DATETIME64, 254 "DICTIONARY": TokenType.DICTIONARY, 255 "DYNAMIC": TokenType.DYNAMIC, 256 "ENUM8": TokenType.ENUM8, 257 "ENUM16": TokenType.ENUM16, 258 "EXCHANGE": TokenType.COMMAND, 259 "FINAL": TokenType.FINAL, 260 "FIXEDSTRING": TokenType.FIXEDSTRING, 261 "FLOAT32": TokenType.FLOAT, 262 "FLOAT64": TokenType.DOUBLE, 263 "GLOBAL": TokenType.GLOBAL, 264 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 265 "MAP": TokenType.MAP, 266 "NESTED": TokenType.NESTED, 267 "NOTHING": TokenType.NOTHING, 268 "SAMPLE": TokenType.TABLE_SAMPLE, 269 "TUPLE": TokenType.STRUCT, 270 "UINT16": TokenType.USMALLINT, 271 "UINT32": TokenType.UINT, 272 "UINT64": TokenType.UBIGINT, 273 "UINT8": TokenType.UTINYINT, 274 "IPV4": TokenType.IPV4, 275 "IPV6": TokenType.IPV6, 276 "POINT": TokenType.POINT, 277 "RING": TokenType.RING, 278 "LINESTRING": TokenType.LINESTRING, 279 "MULTILINESTRING": TokenType.MULTILINESTRING, 280 "POLYGON": TokenType.POLYGON, 281 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 282 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 283 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 284 "SYSTEM": TokenType.COMMAND, 285 "PREWHERE": TokenType.PREWHERE, 286 } 287 KEYWORDS.pop("/*+") 288 289 SINGLE_TOKENS = { 290 **tokens.Tokenizer.SINGLE_TOKENS, 291 "$": TokenType.HEREDOC_STRING, 292 } 293 294 class Parser(parser.Parser): 295 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 296 # * select x from t1 union all select x from t2 limit 1; 297 # * select x from t1 union all (select x from t2 limit 1); 298 MODIFIERS_ATTACHED_TO_SET_OP = False 299 INTERVAL_SPANS = False 300 OPTIONAL_ALIAS_TOKEN_CTE = False 301 302 FUNCTIONS = { 303 **parser.Parser.FUNCTIONS, 304 "ANY": exp.AnyValue.from_arg_list, 305 "ARRAYSUM": exp.ArraySum.from_arg_list, 306 "COUNTIF": _build_count_if, 307 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 308 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 309 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 310 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 311 "DATE_FORMAT": _build_date_format, 312 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 313 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 314 "FORMATDATETIME": _build_date_format, 315 "JSONEXTRACTSTRING": build_json_extract_path( 316 exp.JSONExtractScalar, zero_based_indexing=False 317 ), 318 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 319 "MAP": parser.build_var_map, 320 "MATCH": exp.RegexpLike.from_arg_list, 321 "RANDCANONICAL": exp.Rand.from_arg_list, 322 "STR_TO_DATE": _build_str_to_date, 323 "TUPLE": exp.Struct.from_arg_list, 324 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 325 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 326 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 327 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 328 "UNIQ": exp.ApproxDistinct.from_arg_list, 329 "XOR": lambda args: exp.Xor(expressions=args), 330 "MD5": exp.MD5Digest.from_arg_list, 331 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 332 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 333 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 334 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 335 } 336 FUNCTIONS.pop("TRANSFORM") 337 338 AGG_FUNCTIONS = { 339 "count", 340 "min", 341 "max", 342 "sum", 343 "avg", 344 "any", 345 "stddevPop", 346 "stddevSamp", 347 "varPop", 348 "varSamp", 349 "corr", 350 "covarPop", 351 "covarSamp", 352 "entropy", 353 "exponentialMovingAverage", 354 "intervalLengthSum", 355 "kolmogorovSmirnovTest", 356 "mannWhitneyUTest", 357 "median", 358 "rankCorr", 359 "sumKahan", 360 "studentTTest", 361 "welchTTest", 362 "anyHeavy", 363 "anyLast", 364 "boundingRatio", 365 "first_value", 366 "last_value", 367 "argMin", 368 "argMax", 369 "avgWeighted", 370 "topK", 371 "topKWeighted", 372 "deltaSum", 373 "deltaSumTimestamp", 374 "groupArray", 375 "groupArrayLast", 376 "groupUniqArray", 377 "groupArrayInsertAt", 378 "groupArrayMovingAvg", 379 "groupArrayMovingSum", 380 "groupArraySample", 381 "groupBitAnd", 382 "groupBitOr", 383 "groupBitXor", 384 "groupBitmap", 385 "groupBitmapAnd", 386 "groupBitmapOr", 387 "groupBitmapXor", 388 "sumWithOverflow", 389 "sumMap", 390 "minMap", 391 "maxMap", 392 "skewSamp", 393 "skewPop", 394 "kurtSamp", 395 "kurtPop", 396 "uniq", 397 "uniqExact", 398 "uniqCombined", 399 "uniqCombined64", 400 "uniqHLL12", 401 "uniqTheta", 402 "quantile", 403 "quantiles", 404 "quantileExact", 405 "quantilesExact", 406 "quantileExactLow", 407 "quantilesExactLow", 408 "quantileExactHigh", 409 "quantilesExactHigh", 410 "quantileExactWeighted", 411 "quantilesExactWeighted", 412 "quantileTiming", 413 "quantilesTiming", 414 "quantileTimingWeighted", 415 "quantilesTimingWeighted", 416 "quantileDeterministic", 417 "quantilesDeterministic", 418 "quantileTDigest", 419 "quantilesTDigest", 420 "quantileTDigestWeighted", 421 "quantilesTDigestWeighted", 422 "quantileBFloat16", 423 "quantilesBFloat16", 424 "quantileBFloat16Weighted", 425 "quantilesBFloat16Weighted", 426 "simpleLinearRegression", 427 "stochasticLinearRegression", 428 "stochasticLogisticRegression", 429 "categoricalInformationValue", 430 "contingency", 431 "cramersV", 432 "cramersVBiasCorrected", 433 "theilsU", 434 "maxIntersections", 435 "maxIntersectionsPosition", 436 "meanZTest", 437 "quantileInterpolatedWeighted", 438 "quantilesInterpolatedWeighted", 439 "quantileGK", 440 "quantilesGK", 441 "sparkBar", 442 "sumCount", 443 "largestTriangleThreeBuckets", 444 "histogram", 445 "sequenceMatch", 446 "sequenceCount", 447 "windowFunnel", 448 "retention", 449 "uniqUpTo", 450 "sequenceNextNode", 451 "exponentialTimeDecayedAvg", 452 } 453 454 AGG_FUNCTIONS_SUFFIXES = [ 455 "If", 456 "Array", 457 "ArrayIf", 458 "Map", 459 "SimpleState", 460 "State", 461 "Merge", 462 "MergeState", 463 "ForEach", 464 "Distinct", 465 "OrDefault", 466 "OrNull", 467 "Resample", 468 "ArgMin", 469 "ArgMax", 470 ] 471 472 FUNC_TOKENS = { 473 *parser.Parser.FUNC_TOKENS, 474 TokenType.AND, 475 TokenType.OR, 476 TokenType.SET, 477 } 478 479 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 480 481 ID_VAR_TOKENS = { 482 *parser.Parser.ID_VAR_TOKENS, 483 TokenType.LIKE, 484 } 485 486 AGG_FUNC_MAPPING = ( 487 lambda functions, suffixes: { 488 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 489 } 490 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 491 492 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 493 494 FUNCTION_PARSERS = { 495 **parser.Parser.FUNCTION_PARSERS, 496 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 497 "QUANTILE": lambda self: self._parse_quantile(), 498 "MEDIAN": lambda self: self._parse_quantile(), 499 "COLUMNS": lambda self: self._parse_columns(), 500 } 501 502 FUNCTION_PARSERS.pop("MATCH") 503 504 PROPERTY_PARSERS = { 505 **parser.Parser.PROPERTY_PARSERS, 506 "ENGINE": lambda self: self._parse_engine_property(), 507 } 508 PROPERTY_PARSERS.pop("DYNAMIC") 509 510 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 511 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 512 513 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 514 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 515 516 RANGE_PARSERS = { 517 **parser.Parser.RANGE_PARSERS, 518 TokenType.GLOBAL: lambda self, this: self._parse_global_in(this), 519 } 520 521 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 522 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 523 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 524 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 525 526 JOIN_KINDS = { 527 *parser.Parser.JOIN_KINDS, 528 TokenType.ANY, 529 TokenType.ASOF, 530 TokenType.ARRAY, 531 } 532 533 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 534 TokenType.ANY, 535 TokenType.ARRAY, 536 TokenType.FINAL, 537 TokenType.FORMAT, 538 TokenType.SETTINGS, 539 } 540 541 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 542 TokenType.FORMAT, 543 } 544 545 LOG_DEFAULTS_TO_LN = True 546 547 QUERY_MODIFIER_PARSERS = { 548 **parser.Parser.QUERY_MODIFIER_PARSERS, 549 TokenType.SETTINGS: lambda self: ( 550 "settings", 551 self._advance() or self._parse_csv(self._parse_assignment), 552 ), 553 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 554 } 555 556 CONSTRAINT_PARSERS = { 557 **parser.Parser.CONSTRAINT_PARSERS, 558 "INDEX": lambda self: self._parse_index_constraint(), 559 "CODEC": lambda self: self._parse_compress(), 560 } 561 562 ALTER_PARSERS = { 563 **parser.Parser.ALTER_PARSERS, 564 "REPLACE": lambda self: self._parse_alter_table_replace(), 565 } 566 567 SCHEMA_UNNAMED_CONSTRAINTS = { 568 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 569 "INDEX", 570 } 571 572 PLACEHOLDER_PARSERS = { 573 **parser.Parser.PLACEHOLDER_PARSERS, 574 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 575 } 576 577 def _parse_engine_property(self) -> exp.EngineProperty: 578 self._match(TokenType.EQ) 579 return self.expression( 580 exp.EngineProperty, 581 this=self._parse_field(any_token=True, anonymous_func=True), 582 ) 583 584 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 585 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 586 return self._parse_lambda() 587 588 def _parse_types( 589 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 590 ) -> t.Optional[exp.Expression]: 591 dtype = super()._parse_types( 592 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 593 ) 594 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 595 # Mark every type as non-nullable which is ClickHouse's default, unless it's 596 # already marked as nullable. This marker helps us transpile types from other 597 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 598 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 599 # fail in ClickHouse without the `Nullable` type constructor. 600 dtype.set("nullable", False) 601 602 return dtype 603 604 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 605 index = self._index 606 this = self._parse_bitwise() 607 if self._match(TokenType.FROM): 608 self._retreat(index) 609 return super()._parse_extract() 610 611 # We return Anonymous here because extract and regexpExtract have different semantics, 612 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 613 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 614 # 615 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 616 self._match(TokenType.COMMA) 617 return self.expression( 618 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 619 ) 620 621 def _parse_assignment(self) -> t.Optional[exp.Expression]: 622 this = super()._parse_assignment() 623 624 if self._match(TokenType.PLACEHOLDER): 625 return self.expression( 626 exp.If, 627 this=this, 628 true=self._parse_assignment(), 629 false=self._match(TokenType.COLON) and self._parse_assignment(), 630 ) 631 632 return this 633 634 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 635 """ 636 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 637 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 638 """ 639 index = self._index 640 641 this = self._parse_id_var() 642 self._match(TokenType.COLON) 643 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 644 self._match_text_seq("IDENTIFIER") and "Identifier" 645 ) 646 647 if not kind: 648 self._retreat(index) 649 return None 650 elif not self._match(TokenType.R_BRACE): 651 self.raise_error("Expecting }") 652 653 if isinstance(this, exp.Identifier) and not this.quoted: 654 this = exp.var(this.name) 655 656 return self.expression(exp.Placeholder, this=this, kind=kind) 657 658 def _parse_bracket( 659 self, this: t.Optional[exp.Expression] = None 660 ) -> t.Optional[exp.Expression]: 661 l_brace = self._match(TokenType.L_BRACE, advance=False) 662 bracket = super()._parse_bracket(this) 663 664 if l_brace and isinstance(bracket, exp.Struct): 665 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 666 for expression in bracket.expressions: 667 if not isinstance(expression, exp.PropertyEQ): 668 break 669 670 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 671 varmap.args["values"].append("expressions", expression.expression) 672 673 return varmap 674 675 return bracket 676 677 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 678 this = super()._parse_in(this) 679 this.set("is_global", is_global) 680 return this 681 682 def _parse_global_in(self, this: t.Optional[exp.Expression]) -> exp.Not | exp.In: 683 is_negated = self._match(TokenType.NOT) 684 this = self._match(TokenType.IN) and self._parse_in(this, is_global=True) 685 return self.expression(exp.Not, this=this) if is_negated else this 686 687 def _parse_table( 688 self, 689 schema: bool = False, 690 joins: bool = False, 691 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 692 parse_bracket: bool = False, 693 is_db_reference: bool = False, 694 parse_partition: bool = False, 695 ) -> t.Optional[exp.Expression]: 696 this = super()._parse_table( 697 schema=schema, 698 joins=joins, 699 alias_tokens=alias_tokens, 700 parse_bracket=parse_bracket, 701 is_db_reference=is_db_reference, 702 ) 703 704 if isinstance(this, exp.Table): 705 inner = this.this 706 alias = this.args.get("alias") 707 708 if isinstance(inner, exp.GenerateSeries) and alias and not alias.columns: 709 alias.set("columns", [exp.to_identifier("generate_series")]) 710 711 if self._match(TokenType.FINAL): 712 this = self.expression(exp.Final, this=this) 713 714 return this 715 716 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 717 return super()._parse_position(haystack_first=True) 718 719 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 720 def _parse_cte(self) -> t.Optional[exp.CTE]: 721 # WITH <identifier> AS <subquery expression> 722 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 723 724 if not cte: 725 # WITH <expression> AS <identifier> 726 cte = self.expression( 727 exp.CTE, 728 this=self._parse_assignment(), 729 alias=self._parse_table_alias(), 730 scalar=True, 731 ) 732 733 return cte 734 735 def _parse_join_parts( 736 self, 737 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 738 is_global = self._match(TokenType.GLOBAL) and self._prev 739 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 740 741 if kind_pre: 742 kind = self._match_set(self.JOIN_KINDS) and self._prev 743 side = self._match_set(self.JOIN_SIDES) and self._prev 744 return is_global, side, kind 745 746 return ( 747 is_global, 748 self._match_set(self.JOIN_SIDES) and self._prev, 749 self._match_set(self.JOIN_KINDS) and self._prev, 750 ) 751 752 def _parse_join( 753 self, skip_join_token: bool = False, parse_bracket: bool = False 754 ) -> t.Optional[exp.Join]: 755 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 756 if join: 757 join.set("global", join.args.pop("method", None)) 758 759 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 760 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 761 if join.kind == "ARRAY": 762 for table in join.find_all(exp.Table): 763 table.replace(table.to_column()) 764 765 return join 766 767 def _parse_function( 768 self, 769 functions: t.Optional[t.Dict[str, t.Callable]] = None, 770 anonymous: bool = False, 771 optional_parens: bool = True, 772 any_token: bool = False, 773 ) -> t.Optional[exp.Expression]: 774 expr = super()._parse_function( 775 functions=functions, 776 anonymous=anonymous, 777 optional_parens=optional_parens, 778 any_token=any_token, 779 ) 780 781 func = expr.this if isinstance(expr, exp.Window) else expr 782 783 # Aggregate functions can be split in 2 parts: <func_name><suffix> 784 parts = ( 785 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 786 ) 787 788 if parts: 789 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 790 params = self._parse_func_params(anon_func) 791 792 kwargs = { 793 "this": anon_func.this, 794 "expressions": anon_func.expressions, 795 } 796 if parts[1]: 797 exp_class: t.Type[exp.Expression] = ( 798 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 799 ) 800 else: 801 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 802 803 kwargs["exp_class"] = exp_class 804 if params: 805 kwargs["params"] = params 806 807 func = self.expression(**kwargs) 808 809 if isinstance(expr, exp.Window): 810 # The window's func was parsed as Anonymous in base parser, fix its 811 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 812 expr.set("this", func) 813 elif params: 814 # Params have blocked super()._parse_function() from parsing the following window 815 # (if that exists) as they're standing between the function call and the window spec 816 expr = self._parse_window(func) 817 else: 818 expr = func 819 820 return expr 821 822 def _parse_func_params( 823 self, this: t.Optional[exp.Func] = None 824 ) -> t.Optional[t.List[exp.Expression]]: 825 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 826 return self._parse_csv(self._parse_lambda) 827 828 if self._match(TokenType.L_PAREN): 829 params = self._parse_csv(self._parse_lambda) 830 self._match_r_paren(this) 831 return params 832 833 return None 834 835 def _parse_quantile(self) -> exp.Quantile: 836 this = self._parse_lambda() 837 params = self._parse_func_params() 838 if params: 839 return self.expression(exp.Quantile, this=params[0], quantile=this) 840 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 841 842 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 843 return super()._parse_wrapped_id_vars(optional=True) 844 845 def _parse_primary_key( 846 self, wrapped_optional: bool = False, in_props: bool = False 847 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 848 return super()._parse_primary_key( 849 wrapped_optional=wrapped_optional or in_props, in_props=in_props 850 ) 851 852 def _parse_on_property(self) -> t.Optional[exp.Expression]: 853 index = self._index 854 if self._match_text_seq("CLUSTER"): 855 this = self._parse_string() or self._parse_id_var() 856 if this: 857 return self.expression(exp.OnCluster, this=this) 858 else: 859 self._retreat(index) 860 return None 861 862 def _parse_index_constraint( 863 self, kind: t.Optional[str] = None 864 ) -> exp.IndexColumnConstraint: 865 # INDEX name1 expr TYPE type1(args) GRANULARITY value 866 this = self._parse_id_var() 867 expression = self._parse_assignment() 868 869 index_type = self._match_text_seq("TYPE") and ( 870 self._parse_function() or self._parse_var() 871 ) 872 873 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 874 875 return self.expression( 876 exp.IndexColumnConstraint, 877 this=this, 878 expression=expression, 879 index_type=index_type, 880 granularity=granularity, 881 ) 882 883 def _parse_partition(self) -> t.Optional[exp.Partition]: 884 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 885 if not self._match(TokenType.PARTITION): 886 return None 887 888 if self._match_text_seq("ID"): 889 # Corresponds to the PARTITION ID <string_value> syntax 890 expressions: t.List[exp.Expression] = [ 891 self.expression(exp.PartitionId, this=self._parse_string()) 892 ] 893 else: 894 expressions = self._parse_expressions() 895 896 return self.expression(exp.Partition, expressions=expressions) 897 898 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 899 partition = self._parse_partition() 900 901 if not partition or not self._match(TokenType.FROM): 902 return None 903 904 return self.expression( 905 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 906 ) 907 908 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 909 if not self._match_text_seq("PROJECTION"): 910 return None 911 912 return self.expression( 913 exp.ProjectionDef, 914 this=self._parse_id_var(), 915 expression=self._parse_wrapped(self._parse_statement), 916 ) 917 918 def _parse_constraint(self) -> t.Optional[exp.Expression]: 919 return super()._parse_constraint() or self._parse_projection_def() 920 921 def _parse_alias( 922 self, this: t.Optional[exp.Expression], explicit: bool = False 923 ) -> t.Optional[exp.Expression]: 924 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 925 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 926 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 927 return this 928 929 return super()._parse_alias(this=this, explicit=explicit) 930 931 def _parse_expression(self) -> t.Optional[exp.Expression]: 932 this = super()._parse_expression() 933 934 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 935 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 936 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 937 self._match(TokenType.R_PAREN) 938 939 return this 940 941 def _parse_columns(self) -> exp.Expression: 942 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 943 944 while self._next and self._match_text_seq(")", "APPLY", "("): 945 self._match(TokenType.R_PAREN) 946 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 947 return this 948 949 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 950 value = super()._parse_value(values=values) 951 if not value: 952 return None 953 954 # In Clickhouse "SELECT * FROM VALUES (1, 2, 3)" generates a table with a single column, in contrast 955 # to other dialects. For this case, we canonicalize the values into a tuple-of-tuples AST if it's not already one. 956 # In INSERT INTO statements the same clause actually references multiple columns (opposite semantics), 957 # but the final result is not altered by the extra parentheses. 958 # Note: Clickhouse allows VALUES([structure], value, ...) so the branch checks for the last expression 959 expressions = value.expressions 960 if values and not isinstance(expressions[-1], exp.Tuple): 961 value.set( 962 "expressions", 963 [self.expression(exp.Tuple, expressions=[expr]) for expr in expressions], 964 ) 965 966 return value 967 968 class Generator(generator.Generator): 969 QUERY_HINTS = False 970 STRUCT_DELIMITER = ("(", ")") 971 NVL2_SUPPORTED = False 972 TABLESAMPLE_REQUIRES_PARENS = False 973 TABLESAMPLE_SIZE_IS_ROWS = False 974 TABLESAMPLE_KEYWORDS = "SAMPLE" 975 LAST_DAY_SUPPORTS_DATE_PART = False 976 CAN_IMPLEMENT_ARRAY_ANY = True 977 SUPPORTS_TO_NUMBER = False 978 JOIN_HINTS = False 979 TABLE_HINTS = False 980 GROUPINGS_SEP = "" 981 SET_OP_MODIFIERS = False 982 ARRAY_SIZE_NAME = "LENGTH" 983 WRAP_DERIVED_VALUES = False 984 985 STRING_TYPE_MAPPING = { 986 exp.DataType.Type.BLOB: "String", 987 exp.DataType.Type.CHAR: "String", 988 exp.DataType.Type.LONGBLOB: "String", 989 exp.DataType.Type.LONGTEXT: "String", 990 exp.DataType.Type.MEDIUMBLOB: "String", 991 exp.DataType.Type.MEDIUMTEXT: "String", 992 exp.DataType.Type.TINYBLOB: "String", 993 exp.DataType.Type.TINYTEXT: "String", 994 exp.DataType.Type.TEXT: "String", 995 exp.DataType.Type.VARBINARY: "String", 996 exp.DataType.Type.VARCHAR: "String", 997 } 998 999 SUPPORTED_JSON_PATH_PARTS = { 1000 exp.JSONPathKey, 1001 exp.JSONPathRoot, 1002 exp.JSONPathSubscript, 1003 } 1004 1005 TYPE_MAPPING = { 1006 **generator.Generator.TYPE_MAPPING, 1007 **STRING_TYPE_MAPPING, 1008 exp.DataType.Type.ARRAY: "Array", 1009 exp.DataType.Type.BOOLEAN: "Bool", 1010 exp.DataType.Type.BIGINT: "Int64", 1011 exp.DataType.Type.DATE32: "Date32", 1012 exp.DataType.Type.DATETIME: "DateTime", 1013 exp.DataType.Type.DATETIME2: "DateTime", 1014 exp.DataType.Type.SMALLDATETIME: "DateTime", 1015 exp.DataType.Type.DATETIME64: "DateTime64", 1016 exp.DataType.Type.DECIMAL: "Decimal", 1017 exp.DataType.Type.DECIMAL32: "Decimal32", 1018 exp.DataType.Type.DECIMAL64: "Decimal64", 1019 exp.DataType.Type.DECIMAL128: "Decimal128", 1020 exp.DataType.Type.DECIMAL256: "Decimal256", 1021 exp.DataType.Type.TIMESTAMP: "DateTime", 1022 exp.DataType.Type.TIMESTAMPNTZ: "DateTime", 1023 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 1024 exp.DataType.Type.DOUBLE: "Float64", 1025 exp.DataType.Type.ENUM: "Enum", 1026 exp.DataType.Type.ENUM8: "Enum8", 1027 exp.DataType.Type.ENUM16: "Enum16", 1028 exp.DataType.Type.FIXEDSTRING: "FixedString", 1029 exp.DataType.Type.FLOAT: "Float32", 1030 exp.DataType.Type.INT: "Int32", 1031 exp.DataType.Type.MEDIUMINT: "Int32", 1032 exp.DataType.Type.INT128: "Int128", 1033 exp.DataType.Type.INT256: "Int256", 1034 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 1035 exp.DataType.Type.MAP: "Map", 1036 exp.DataType.Type.NESTED: "Nested", 1037 exp.DataType.Type.NOTHING: "Nothing", 1038 exp.DataType.Type.SMALLINT: "Int16", 1039 exp.DataType.Type.STRUCT: "Tuple", 1040 exp.DataType.Type.TINYINT: "Int8", 1041 exp.DataType.Type.UBIGINT: "UInt64", 1042 exp.DataType.Type.UINT: "UInt32", 1043 exp.DataType.Type.UINT128: "UInt128", 1044 exp.DataType.Type.UINT256: "UInt256", 1045 exp.DataType.Type.USMALLINT: "UInt16", 1046 exp.DataType.Type.UTINYINT: "UInt8", 1047 exp.DataType.Type.IPV4: "IPv4", 1048 exp.DataType.Type.IPV6: "IPv6", 1049 exp.DataType.Type.POINT: "Point", 1050 exp.DataType.Type.RING: "Ring", 1051 exp.DataType.Type.LINESTRING: "LineString", 1052 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 1053 exp.DataType.Type.POLYGON: "Polygon", 1054 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 1055 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 1056 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 1057 exp.DataType.Type.DYNAMIC: "Dynamic", 1058 } 1059 1060 TRANSFORMS = { 1061 **generator.Generator.TRANSFORMS, 1062 exp.AnyValue: rename_func("any"), 1063 exp.ApproxDistinct: rename_func("uniq"), 1064 exp.ArrayConcat: rename_func("arrayConcat"), 1065 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 1066 exp.ArrayRemove: remove_from_array_using_filter, 1067 exp.ArraySum: rename_func("arraySum"), 1068 exp.ArgMax: arg_max_or_min_no_count("argMax"), 1069 exp.ArgMin: arg_max_or_min_no_count("argMin"), 1070 exp.Array: inline_array_sql, 1071 exp.CastToStrType: rename_func("CAST"), 1072 exp.CountIf: rename_func("countIf"), 1073 exp.CompressColumnConstraint: lambda self, 1074 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 1075 exp.ComputedColumnConstraint: lambda self, 1076 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 1077 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 1078 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 1079 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 1080 exp.DateStrToDate: rename_func("toDate"), 1081 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 1082 exp.Explode: rename_func("arrayJoin"), 1083 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 1084 exp.IsNan: rename_func("isNaN"), 1085 exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}", 1086 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 1087 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 1088 exp.JSONPathKey: json_path_key_only_name, 1089 exp.JSONPathRoot: lambda *_: "", 1090 exp.Length: length_or_char_length_sql, 1091 exp.Map: _map_sql, 1092 exp.Median: rename_func("median"), 1093 exp.Nullif: rename_func("nullIf"), 1094 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1095 exp.Pivot: no_pivot_sql, 1096 exp.Quantile: _quantile_sql, 1097 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 1098 exp.Rand: rename_func("randCanonical"), 1099 exp.StartsWith: rename_func("startsWith"), 1100 exp.StrPosition: lambda self, e: strposition_sql( 1101 self, 1102 e, 1103 func_name="POSITION", 1104 supports_position=True, 1105 use_ansi_position=False, 1106 ), 1107 exp.TimeToStr: lambda self, e: self.func( 1108 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 1109 ), 1110 exp.TimeStrToTime: _timestrtotime_sql, 1111 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1112 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1113 exp.VarMap: _map_sql, 1114 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1115 exp.MD5Digest: rename_func("MD5"), 1116 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1117 exp.SHA: rename_func("SHA1"), 1118 exp.SHA2: sha256_sql, 1119 exp.UnixToTime: _unix_to_time_sql, 1120 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 1121 exp.Trim: lambda self, e: trim_sql(self, e, default_trim_type="BOTH"), 1122 exp.Variance: rename_func("varSamp"), 1123 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1124 exp.Stddev: rename_func("stddevSamp"), 1125 exp.Chr: rename_func("CHAR"), 1126 exp.Lag: lambda self, e: self.func( 1127 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1128 ), 1129 exp.Lead: lambda self, e: self.func( 1130 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1131 ), 1132 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1133 rename_func("editDistance") 1134 ), 1135 } 1136 1137 PROPERTIES_LOCATION = { 1138 **generator.Generator.PROPERTIES_LOCATION, 1139 exp.OnCluster: exp.Properties.Location.POST_NAME, 1140 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1141 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1142 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1143 } 1144 1145 # There's no list in docs, but it can be found in Clickhouse code 1146 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1147 ON_CLUSTER_TARGETS = { 1148 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1149 "DATABASE", 1150 "TABLE", 1151 "VIEW", 1152 "DICTIONARY", 1153 "INDEX", 1154 "FUNCTION", 1155 "NAMED COLLECTION", 1156 } 1157 1158 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1159 NON_NULLABLE_TYPES = { 1160 exp.DataType.Type.ARRAY, 1161 exp.DataType.Type.MAP, 1162 exp.DataType.Type.STRUCT, 1163 exp.DataType.Type.POINT, 1164 exp.DataType.Type.RING, 1165 exp.DataType.Type.LINESTRING, 1166 exp.DataType.Type.MULTILINESTRING, 1167 exp.DataType.Type.POLYGON, 1168 exp.DataType.Type.MULTIPOLYGON, 1169 } 1170 1171 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1172 strtodate_sql = self.function_fallback_sql(expression) 1173 1174 if not isinstance(expression.parent, exp.Cast): 1175 # StrToDate returns DATEs in other dialects (eg. postgres), so 1176 # this branch aims to improve the transpilation to clickhouse 1177 return self.cast_sql(exp.cast(expression, "DATE")) 1178 1179 return strtodate_sql 1180 1181 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1182 this = expression.this 1183 1184 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1185 return self.sql(this) 1186 1187 return super().cast_sql(expression, safe_prefix=safe_prefix) 1188 1189 def trycast_sql(self, expression: exp.TryCast) -> str: 1190 dtype = expression.to 1191 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1192 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1193 dtype.set("nullable", True) 1194 1195 return super().cast_sql(expression) 1196 1197 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1198 this = self.json_path_part(expression.this) 1199 return str(int(this) + 1) if is_int(this) else this 1200 1201 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1202 return f"AS {self.sql(expression, 'this')}" 1203 1204 def _any_to_has( 1205 self, 1206 expression: exp.EQ | exp.NEQ, 1207 default: t.Callable[[t.Any], str], 1208 prefix: str = "", 1209 ) -> str: 1210 if isinstance(expression.left, exp.Any): 1211 arr = expression.left 1212 this = expression.right 1213 elif isinstance(expression.right, exp.Any): 1214 arr = expression.right 1215 this = expression.left 1216 else: 1217 return default(expression) 1218 1219 return prefix + self.func("has", arr.this.unnest(), this) 1220 1221 def eq_sql(self, expression: exp.EQ) -> str: 1222 return self._any_to_has(expression, super().eq_sql) 1223 1224 def neq_sql(self, expression: exp.NEQ) -> str: 1225 return self._any_to_has(expression, super().neq_sql, "NOT ") 1226 1227 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1228 # Manually add a flag to make the search case-insensitive 1229 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1230 return self.func("match", expression.this, regex) 1231 1232 def datatype_sql(self, expression: exp.DataType) -> str: 1233 # String is the standard ClickHouse type, every other variant is just an alias. 1234 # Additionally, any supplied length parameter will be ignored. 1235 # 1236 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1237 if expression.this in self.STRING_TYPE_MAPPING: 1238 dtype = "String" 1239 else: 1240 dtype = super().datatype_sql(expression) 1241 1242 # This section changes the type to `Nullable(...)` if the following conditions hold: 1243 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1244 # and change their semantics 1245 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1246 # constraint: "Type of Map key must be a type, that can be represented by integer or 1247 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1248 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1249 parent = expression.parent 1250 nullable = expression.args.get("nullable") 1251 if nullable is True or ( 1252 nullable is None 1253 and not ( 1254 isinstance(parent, exp.DataType) 1255 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1256 and expression.index in (None, 0) 1257 ) 1258 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1259 ): 1260 dtype = f"Nullable({dtype})" 1261 1262 return dtype 1263 1264 def cte_sql(self, expression: exp.CTE) -> str: 1265 if expression.args.get("scalar"): 1266 this = self.sql(expression, "this") 1267 alias = self.sql(expression, "alias") 1268 return f"{this} AS {alias}" 1269 1270 return super().cte_sql(expression) 1271 1272 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1273 return super().after_limit_modifiers(expression) + [ 1274 ( 1275 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1276 if expression.args.get("settings") 1277 else "" 1278 ), 1279 ( 1280 self.seg("FORMAT ") + self.sql(expression, "format") 1281 if expression.args.get("format") 1282 else "" 1283 ), 1284 ] 1285 1286 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1287 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1288 1289 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1290 return f"ON CLUSTER {self.sql(expression, 'this')}" 1291 1292 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1293 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1294 exp.Properties.Location.POST_NAME 1295 ): 1296 this_name = self.sql( 1297 expression.this if isinstance(expression.this, exp.Schema) else expression, 1298 "this", 1299 ) 1300 this_properties = " ".join( 1301 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1302 ) 1303 this_schema = self.schema_columns_sql(expression.this) 1304 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1305 1306 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1307 1308 return super().createable_sql(expression, locations) 1309 1310 def create_sql(self, expression: exp.Create) -> str: 1311 # The comment property comes last in CTAS statements, i.e. after the query 1312 query = expression.expression 1313 if isinstance(query, exp.Query): 1314 comment_prop = expression.find(exp.SchemaCommentProperty) 1315 if comment_prop: 1316 comment_prop.pop() 1317 query.replace(exp.paren(query)) 1318 else: 1319 comment_prop = None 1320 1321 create_sql = super().create_sql(expression) 1322 1323 comment_sql = self.sql(comment_prop) 1324 comment_sql = f" {comment_sql}" if comment_sql else "" 1325 1326 return f"{create_sql}{comment_sql}" 1327 1328 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1329 this = self.indent(self.sql(expression, "this")) 1330 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1331 1332 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1333 this = self.sql(expression, "this") 1334 this = f" {this}" if this else "" 1335 expr = self.sql(expression, "expression") 1336 expr = f" {expr}" if expr else "" 1337 index_type = self.sql(expression, "index_type") 1338 index_type = f" TYPE {index_type}" if index_type else "" 1339 granularity = self.sql(expression, "granularity") 1340 granularity = f" GRANULARITY {granularity}" if granularity else "" 1341 1342 return f"INDEX{this}{expr}{index_type}{granularity}" 1343 1344 def partition_sql(self, expression: exp.Partition) -> str: 1345 return f"PARTITION {self.expressions(expression, flat=True)}" 1346 1347 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1348 return f"ID {self.sql(expression.this)}" 1349 1350 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1351 return ( 1352 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1353 ) 1354 1355 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1356 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1357 1358 def is_sql(self, expression: exp.Is) -> str: 1359 is_sql = super().is_sql(expression) 1360 1361 if isinstance(expression.parent, exp.Not): 1362 # value IS NOT NULL -> NOT (value IS NULL) 1363 is_sql = self.wrap(is_sql) 1364 1365 return is_sql 1366 1367 def in_sql(self, expression: exp.In) -> str: 1368 in_sql = super().in_sql(expression) 1369 1370 if isinstance(expression.parent, exp.Not) and expression.args.get("is_global"): 1371 in_sql = in_sql.replace("GLOBAL IN", "GLOBAL NOT IN", 1) 1372 1373 return in_sql 1374 1375 def not_sql(self, expression: exp.Not) -> str: 1376 if isinstance(expression.this, exp.In) and expression.this.args.get("is_global"): 1377 # let `GLOBAL IN` child interpose `NOT` 1378 return self.sql(expression, "this") 1379 1380 return super().not_sql(expression) 1381 1382 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1383 # If the VALUES clause contains tuples of expressions, we need to treat it 1384 # as a table since Clickhouse will automatically alias it as such. 1385 alias = expression.args.get("alias") 1386 1387 if alias and alias.args.get("columns") and expression.expressions: 1388 values = expression.expressions[0].expressions 1389 values_as_table = any(isinstance(value, exp.Tuple) for value in values) 1390 else: 1391 values_as_table = True 1392 1393 return super().values_sql(expression, values_as_table=values_as_table)
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether the name of the function should be preserved inside the node's metadata, can be useful for roundtripping deprecated vs new functions that share an AST node e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery
Whether number literals can include underscores for better readability
Whether hex strings such as x'CC' evaluate to integer or binary/blob type
Specifies the strategy according to which identifiers should be normalized.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse equivalent of CREATE SCHEMA is CREATE DATABASE.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
213 def generate_values_aliases(self, expression: exp.Values) -> t.List[exp.Identifier]: 214 # Clickhouse allows VALUES to have an embedded structure e.g: 215 # VALUES('person String, place String', ('Noah', 'Paris'), ...) 216 # In this case, we don't want to qualify the columns 217 values = expression.expressions[0].expressions 218 219 structure = ( 220 values[0] 221 if (len(values) > 1 and values[0].is_string and isinstance(values[1], exp.Tuple)) 222 else None 223 ) 224 if structure: 225 # Split each column definition into the column name e.g: 226 # 'person String, place String' -> ['person', 'place'] 227 structure_coldefs = [coldef.strip() for coldef in structure.name.split(",")] 228 column_aliases = [ 229 exp.to_identifier(coldef.split(" ")[0]) for coldef in structure_coldefs 230 ] 231 else: 232 # Default column aliases in CH are "c1", "c2", etc. 233 column_aliases = [ 234 exp.to_identifier(f"c{i + 1}") for i in range(len(values[0].expressions)) 235 ] 236 237 return column_aliases
239 class Tokenizer(tokens.Tokenizer): 240 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 241 IDENTIFIERS = ['"', "`"] 242 IDENTIFIER_ESCAPES = ["\\"] 243 STRING_ESCAPES = ["'", "\\"] 244 BIT_STRINGS = [("0b", "")] 245 HEX_STRINGS = [("0x", ""), ("0X", "")] 246 HEREDOC_STRINGS = ["$"] 247 248 KEYWORDS = { 249 **tokens.Tokenizer.KEYWORDS, 250 ".:": TokenType.DOTCOLON, 251 "ATTACH": TokenType.COMMAND, 252 "DATE32": TokenType.DATE32, 253 "DATETIME64": TokenType.DATETIME64, 254 "DICTIONARY": TokenType.DICTIONARY, 255 "DYNAMIC": TokenType.DYNAMIC, 256 "ENUM8": TokenType.ENUM8, 257 "ENUM16": TokenType.ENUM16, 258 "EXCHANGE": TokenType.COMMAND, 259 "FINAL": TokenType.FINAL, 260 "FIXEDSTRING": TokenType.FIXEDSTRING, 261 "FLOAT32": TokenType.FLOAT, 262 "FLOAT64": TokenType.DOUBLE, 263 "GLOBAL": TokenType.GLOBAL, 264 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 265 "MAP": TokenType.MAP, 266 "NESTED": TokenType.NESTED, 267 "NOTHING": TokenType.NOTHING, 268 "SAMPLE": TokenType.TABLE_SAMPLE, 269 "TUPLE": TokenType.STRUCT, 270 "UINT16": TokenType.USMALLINT, 271 "UINT32": TokenType.UINT, 272 "UINT64": TokenType.UBIGINT, 273 "UINT8": TokenType.UTINYINT, 274 "IPV4": TokenType.IPV4, 275 "IPV6": TokenType.IPV6, 276 "POINT": TokenType.POINT, 277 "RING": TokenType.RING, 278 "LINESTRING": TokenType.LINESTRING, 279 "MULTILINESTRING": TokenType.MULTILINESTRING, 280 "POLYGON": TokenType.POLYGON, 281 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 282 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 283 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 284 "SYSTEM": TokenType.COMMAND, 285 "PREWHERE": TokenType.PREWHERE, 286 } 287 KEYWORDS.pop("/*+") 288 289 SINGLE_TOKENS = { 290 **tokens.Tokenizer.SINGLE_TOKENS, 291 "$": TokenType.HEREDOC_STRING, 292 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BYTE_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
294 class Parser(parser.Parser): 295 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 296 # * select x from t1 union all select x from t2 limit 1; 297 # * select x from t1 union all (select x from t2 limit 1); 298 MODIFIERS_ATTACHED_TO_SET_OP = False 299 INTERVAL_SPANS = False 300 OPTIONAL_ALIAS_TOKEN_CTE = False 301 302 FUNCTIONS = { 303 **parser.Parser.FUNCTIONS, 304 "ANY": exp.AnyValue.from_arg_list, 305 "ARRAYSUM": exp.ArraySum.from_arg_list, 306 "COUNTIF": _build_count_if, 307 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 308 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 309 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 310 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 311 "DATE_FORMAT": _build_date_format, 312 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 313 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 314 "FORMATDATETIME": _build_date_format, 315 "JSONEXTRACTSTRING": build_json_extract_path( 316 exp.JSONExtractScalar, zero_based_indexing=False 317 ), 318 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 319 "MAP": parser.build_var_map, 320 "MATCH": exp.RegexpLike.from_arg_list, 321 "RANDCANONICAL": exp.Rand.from_arg_list, 322 "STR_TO_DATE": _build_str_to_date, 323 "TUPLE": exp.Struct.from_arg_list, 324 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 325 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 326 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 327 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 328 "UNIQ": exp.ApproxDistinct.from_arg_list, 329 "XOR": lambda args: exp.Xor(expressions=args), 330 "MD5": exp.MD5Digest.from_arg_list, 331 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 332 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 333 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 334 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 335 } 336 FUNCTIONS.pop("TRANSFORM") 337 338 AGG_FUNCTIONS = { 339 "count", 340 "min", 341 "max", 342 "sum", 343 "avg", 344 "any", 345 "stddevPop", 346 "stddevSamp", 347 "varPop", 348 "varSamp", 349 "corr", 350 "covarPop", 351 "covarSamp", 352 "entropy", 353 "exponentialMovingAverage", 354 "intervalLengthSum", 355 "kolmogorovSmirnovTest", 356 "mannWhitneyUTest", 357 "median", 358 "rankCorr", 359 "sumKahan", 360 "studentTTest", 361 "welchTTest", 362 "anyHeavy", 363 "anyLast", 364 "boundingRatio", 365 "first_value", 366 "last_value", 367 "argMin", 368 "argMax", 369 "avgWeighted", 370 "topK", 371 "topKWeighted", 372 "deltaSum", 373 "deltaSumTimestamp", 374 "groupArray", 375 "groupArrayLast", 376 "groupUniqArray", 377 "groupArrayInsertAt", 378 "groupArrayMovingAvg", 379 "groupArrayMovingSum", 380 "groupArraySample", 381 "groupBitAnd", 382 "groupBitOr", 383 "groupBitXor", 384 "groupBitmap", 385 "groupBitmapAnd", 386 "groupBitmapOr", 387 "groupBitmapXor", 388 "sumWithOverflow", 389 "sumMap", 390 "minMap", 391 "maxMap", 392 "skewSamp", 393 "skewPop", 394 "kurtSamp", 395 "kurtPop", 396 "uniq", 397 "uniqExact", 398 "uniqCombined", 399 "uniqCombined64", 400 "uniqHLL12", 401 "uniqTheta", 402 "quantile", 403 "quantiles", 404 "quantileExact", 405 "quantilesExact", 406 "quantileExactLow", 407 "quantilesExactLow", 408 "quantileExactHigh", 409 "quantilesExactHigh", 410 "quantileExactWeighted", 411 "quantilesExactWeighted", 412 "quantileTiming", 413 "quantilesTiming", 414 "quantileTimingWeighted", 415 "quantilesTimingWeighted", 416 "quantileDeterministic", 417 "quantilesDeterministic", 418 "quantileTDigest", 419 "quantilesTDigest", 420 "quantileTDigestWeighted", 421 "quantilesTDigestWeighted", 422 "quantileBFloat16", 423 "quantilesBFloat16", 424 "quantileBFloat16Weighted", 425 "quantilesBFloat16Weighted", 426 "simpleLinearRegression", 427 "stochasticLinearRegression", 428 "stochasticLogisticRegression", 429 "categoricalInformationValue", 430 "contingency", 431 "cramersV", 432 "cramersVBiasCorrected", 433 "theilsU", 434 "maxIntersections", 435 "maxIntersectionsPosition", 436 "meanZTest", 437 "quantileInterpolatedWeighted", 438 "quantilesInterpolatedWeighted", 439 "quantileGK", 440 "quantilesGK", 441 "sparkBar", 442 "sumCount", 443 "largestTriangleThreeBuckets", 444 "histogram", 445 "sequenceMatch", 446 "sequenceCount", 447 "windowFunnel", 448 "retention", 449 "uniqUpTo", 450 "sequenceNextNode", 451 "exponentialTimeDecayedAvg", 452 } 453 454 AGG_FUNCTIONS_SUFFIXES = [ 455 "If", 456 "Array", 457 "ArrayIf", 458 "Map", 459 "SimpleState", 460 "State", 461 "Merge", 462 "MergeState", 463 "ForEach", 464 "Distinct", 465 "OrDefault", 466 "OrNull", 467 "Resample", 468 "ArgMin", 469 "ArgMax", 470 ] 471 472 FUNC_TOKENS = { 473 *parser.Parser.FUNC_TOKENS, 474 TokenType.AND, 475 TokenType.OR, 476 TokenType.SET, 477 } 478 479 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 480 481 ID_VAR_TOKENS = { 482 *parser.Parser.ID_VAR_TOKENS, 483 TokenType.LIKE, 484 } 485 486 AGG_FUNC_MAPPING = ( 487 lambda functions, suffixes: { 488 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 489 } 490 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 491 492 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 493 494 FUNCTION_PARSERS = { 495 **parser.Parser.FUNCTION_PARSERS, 496 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 497 "QUANTILE": lambda self: self._parse_quantile(), 498 "MEDIAN": lambda self: self._parse_quantile(), 499 "COLUMNS": lambda self: self._parse_columns(), 500 } 501 502 FUNCTION_PARSERS.pop("MATCH") 503 504 PROPERTY_PARSERS = { 505 **parser.Parser.PROPERTY_PARSERS, 506 "ENGINE": lambda self: self._parse_engine_property(), 507 } 508 PROPERTY_PARSERS.pop("DYNAMIC") 509 510 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 511 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 512 513 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 514 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 515 516 RANGE_PARSERS = { 517 **parser.Parser.RANGE_PARSERS, 518 TokenType.GLOBAL: lambda self, this: self._parse_global_in(this), 519 } 520 521 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 522 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 523 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 524 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 525 526 JOIN_KINDS = { 527 *parser.Parser.JOIN_KINDS, 528 TokenType.ANY, 529 TokenType.ASOF, 530 TokenType.ARRAY, 531 } 532 533 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 534 TokenType.ANY, 535 TokenType.ARRAY, 536 TokenType.FINAL, 537 TokenType.FORMAT, 538 TokenType.SETTINGS, 539 } 540 541 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 542 TokenType.FORMAT, 543 } 544 545 LOG_DEFAULTS_TO_LN = True 546 547 QUERY_MODIFIER_PARSERS = { 548 **parser.Parser.QUERY_MODIFIER_PARSERS, 549 TokenType.SETTINGS: lambda self: ( 550 "settings", 551 self._advance() or self._parse_csv(self._parse_assignment), 552 ), 553 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 554 } 555 556 CONSTRAINT_PARSERS = { 557 **parser.Parser.CONSTRAINT_PARSERS, 558 "INDEX": lambda self: self._parse_index_constraint(), 559 "CODEC": lambda self: self._parse_compress(), 560 } 561 562 ALTER_PARSERS = { 563 **parser.Parser.ALTER_PARSERS, 564 "REPLACE": lambda self: self._parse_alter_table_replace(), 565 } 566 567 SCHEMA_UNNAMED_CONSTRAINTS = { 568 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 569 "INDEX", 570 } 571 572 PLACEHOLDER_PARSERS = { 573 **parser.Parser.PLACEHOLDER_PARSERS, 574 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 575 } 576 577 def _parse_engine_property(self) -> exp.EngineProperty: 578 self._match(TokenType.EQ) 579 return self.expression( 580 exp.EngineProperty, 581 this=self._parse_field(any_token=True, anonymous_func=True), 582 ) 583 584 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 585 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 586 return self._parse_lambda() 587 588 def _parse_types( 589 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 590 ) -> t.Optional[exp.Expression]: 591 dtype = super()._parse_types( 592 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 593 ) 594 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 595 # Mark every type as non-nullable which is ClickHouse's default, unless it's 596 # already marked as nullable. This marker helps us transpile types from other 597 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 598 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 599 # fail in ClickHouse without the `Nullable` type constructor. 600 dtype.set("nullable", False) 601 602 return dtype 603 604 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 605 index = self._index 606 this = self._parse_bitwise() 607 if self._match(TokenType.FROM): 608 self._retreat(index) 609 return super()._parse_extract() 610 611 # We return Anonymous here because extract and regexpExtract have different semantics, 612 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 613 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 614 # 615 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 616 self._match(TokenType.COMMA) 617 return self.expression( 618 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 619 ) 620 621 def _parse_assignment(self) -> t.Optional[exp.Expression]: 622 this = super()._parse_assignment() 623 624 if self._match(TokenType.PLACEHOLDER): 625 return self.expression( 626 exp.If, 627 this=this, 628 true=self._parse_assignment(), 629 false=self._match(TokenType.COLON) and self._parse_assignment(), 630 ) 631 632 return this 633 634 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 635 """ 636 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 637 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 638 """ 639 index = self._index 640 641 this = self._parse_id_var() 642 self._match(TokenType.COLON) 643 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 644 self._match_text_seq("IDENTIFIER") and "Identifier" 645 ) 646 647 if not kind: 648 self._retreat(index) 649 return None 650 elif not self._match(TokenType.R_BRACE): 651 self.raise_error("Expecting }") 652 653 if isinstance(this, exp.Identifier) and not this.quoted: 654 this = exp.var(this.name) 655 656 return self.expression(exp.Placeholder, this=this, kind=kind) 657 658 def _parse_bracket( 659 self, this: t.Optional[exp.Expression] = None 660 ) -> t.Optional[exp.Expression]: 661 l_brace = self._match(TokenType.L_BRACE, advance=False) 662 bracket = super()._parse_bracket(this) 663 664 if l_brace and isinstance(bracket, exp.Struct): 665 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 666 for expression in bracket.expressions: 667 if not isinstance(expression, exp.PropertyEQ): 668 break 669 670 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 671 varmap.args["values"].append("expressions", expression.expression) 672 673 return varmap 674 675 return bracket 676 677 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 678 this = super()._parse_in(this) 679 this.set("is_global", is_global) 680 return this 681 682 def _parse_global_in(self, this: t.Optional[exp.Expression]) -> exp.Not | exp.In: 683 is_negated = self._match(TokenType.NOT) 684 this = self._match(TokenType.IN) and self._parse_in(this, is_global=True) 685 return self.expression(exp.Not, this=this) if is_negated else this 686 687 def _parse_table( 688 self, 689 schema: bool = False, 690 joins: bool = False, 691 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 692 parse_bracket: bool = False, 693 is_db_reference: bool = False, 694 parse_partition: bool = False, 695 ) -> t.Optional[exp.Expression]: 696 this = super()._parse_table( 697 schema=schema, 698 joins=joins, 699 alias_tokens=alias_tokens, 700 parse_bracket=parse_bracket, 701 is_db_reference=is_db_reference, 702 ) 703 704 if isinstance(this, exp.Table): 705 inner = this.this 706 alias = this.args.get("alias") 707 708 if isinstance(inner, exp.GenerateSeries) and alias and not alias.columns: 709 alias.set("columns", [exp.to_identifier("generate_series")]) 710 711 if self._match(TokenType.FINAL): 712 this = self.expression(exp.Final, this=this) 713 714 return this 715 716 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 717 return super()._parse_position(haystack_first=True) 718 719 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 720 def _parse_cte(self) -> t.Optional[exp.CTE]: 721 # WITH <identifier> AS <subquery expression> 722 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 723 724 if not cte: 725 # WITH <expression> AS <identifier> 726 cte = self.expression( 727 exp.CTE, 728 this=self._parse_assignment(), 729 alias=self._parse_table_alias(), 730 scalar=True, 731 ) 732 733 return cte 734 735 def _parse_join_parts( 736 self, 737 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 738 is_global = self._match(TokenType.GLOBAL) and self._prev 739 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 740 741 if kind_pre: 742 kind = self._match_set(self.JOIN_KINDS) and self._prev 743 side = self._match_set(self.JOIN_SIDES) and self._prev 744 return is_global, side, kind 745 746 return ( 747 is_global, 748 self._match_set(self.JOIN_SIDES) and self._prev, 749 self._match_set(self.JOIN_KINDS) and self._prev, 750 ) 751 752 def _parse_join( 753 self, skip_join_token: bool = False, parse_bracket: bool = False 754 ) -> t.Optional[exp.Join]: 755 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 756 if join: 757 join.set("global", join.args.pop("method", None)) 758 759 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 760 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 761 if join.kind == "ARRAY": 762 for table in join.find_all(exp.Table): 763 table.replace(table.to_column()) 764 765 return join 766 767 def _parse_function( 768 self, 769 functions: t.Optional[t.Dict[str, t.Callable]] = None, 770 anonymous: bool = False, 771 optional_parens: bool = True, 772 any_token: bool = False, 773 ) -> t.Optional[exp.Expression]: 774 expr = super()._parse_function( 775 functions=functions, 776 anonymous=anonymous, 777 optional_parens=optional_parens, 778 any_token=any_token, 779 ) 780 781 func = expr.this if isinstance(expr, exp.Window) else expr 782 783 # Aggregate functions can be split in 2 parts: <func_name><suffix> 784 parts = ( 785 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 786 ) 787 788 if parts: 789 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 790 params = self._parse_func_params(anon_func) 791 792 kwargs = { 793 "this": anon_func.this, 794 "expressions": anon_func.expressions, 795 } 796 if parts[1]: 797 exp_class: t.Type[exp.Expression] = ( 798 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 799 ) 800 else: 801 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 802 803 kwargs["exp_class"] = exp_class 804 if params: 805 kwargs["params"] = params 806 807 func = self.expression(**kwargs) 808 809 if isinstance(expr, exp.Window): 810 # The window's func was parsed as Anonymous in base parser, fix its 811 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 812 expr.set("this", func) 813 elif params: 814 # Params have blocked super()._parse_function() from parsing the following window 815 # (if that exists) as they're standing between the function call and the window spec 816 expr = self._parse_window(func) 817 else: 818 expr = func 819 820 return expr 821 822 def _parse_func_params( 823 self, this: t.Optional[exp.Func] = None 824 ) -> t.Optional[t.List[exp.Expression]]: 825 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 826 return self._parse_csv(self._parse_lambda) 827 828 if self._match(TokenType.L_PAREN): 829 params = self._parse_csv(self._parse_lambda) 830 self._match_r_paren(this) 831 return params 832 833 return None 834 835 def _parse_quantile(self) -> exp.Quantile: 836 this = self._parse_lambda() 837 params = self._parse_func_params() 838 if params: 839 return self.expression(exp.Quantile, this=params[0], quantile=this) 840 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 841 842 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 843 return super()._parse_wrapped_id_vars(optional=True) 844 845 def _parse_primary_key( 846 self, wrapped_optional: bool = False, in_props: bool = False 847 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 848 return super()._parse_primary_key( 849 wrapped_optional=wrapped_optional or in_props, in_props=in_props 850 ) 851 852 def _parse_on_property(self) -> t.Optional[exp.Expression]: 853 index = self._index 854 if self._match_text_seq("CLUSTER"): 855 this = self._parse_string() or self._parse_id_var() 856 if this: 857 return self.expression(exp.OnCluster, this=this) 858 else: 859 self._retreat(index) 860 return None 861 862 def _parse_index_constraint( 863 self, kind: t.Optional[str] = None 864 ) -> exp.IndexColumnConstraint: 865 # INDEX name1 expr TYPE type1(args) GRANULARITY value 866 this = self._parse_id_var() 867 expression = self._parse_assignment() 868 869 index_type = self._match_text_seq("TYPE") and ( 870 self._parse_function() or self._parse_var() 871 ) 872 873 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 874 875 return self.expression( 876 exp.IndexColumnConstraint, 877 this=this, 878 expression=expression, 879 index_type=index_type, 880 granularity=granularity, 881 ) 882 883 def _parse_partition(self) -> t.Optional[exp.Partition]: 884 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 885 if not self._match(TokenType.PARTITION): 886 return None 887 888 if self._match_text_seq("ID"): 889 # Corresponds to the PARTITION ID <string_value> syntax 890 expressions: t.List[exp.Expression] = [ 891 self.expression(exp.PartitionId, this=self._parse_string()) 892 ] 893 else: 894 expressions = self._parse_expressions() 895 896 return self.expression(exp.Partition, expressions=expressions) 897 898 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 899 partition = self._parse_partition() 900 901 if not partition or not self._match(TokenType.FROM): 902 return None 903 904 return self.expression( 905 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 906 ) 907 908 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 909 if not self._match_text_seq("PROJECTION"): 910 return None 911 912 return self.expression( 913 exp.ProjectionDef, 914 this=self._parse_id_var(), 915 expression=self._parse_wrapped(self._parse_statement), 916 ) 917 918 def _parse_constraint(self) -> t.Optional[exp.Expression]: 919 return super()._parse_constraint() or self._parse_projection_def() 920 921 def _parse_alias( 922 self, this: t.Optional[exp.Expression], explicit: bool = False 923 ) -> t.Optional[exp.Expression]: 924 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 925 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 926 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 927 return this 928 929 return super()._parse_alias(this=this, explicit=explicit) 930 931 def _parse_expression(self) -> t.Optional[exp.Expression]: 932 this = super()._parse_expression() 933 934 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 935 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 936 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 937 self._match(TokenType.R_PAREN) 938 939 return this 940 941 def _parse_columns(self) -> exp.Expression: 942 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 943 944 while self._next and self._match_text_seq(")", "APPLY", "("): 945 self._match(TokenType.R_PAREN) 946 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 947 return this 948 949 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 950 value = super()._parse_value(values=values) 951 if not value: 952 return None 953 954 # In Clickhouse "SELECT * FROM VALUES (1, 2, 3)" generates a table with a single column, in contrast 955 # to other dialects. For this case, we canonicalize the values into a tuple-of-tuples AST if it's not already one. 956 # In INSERT INTO statements the same clause actually references multiple columns (opposite semantics), 957 # but the final result is not altered by the extra parentheses. 958 # Note: Clickhouse allows VALUES([structure], value, ...) so the branch checks for the last expression 959 expressions = value.expressions 960 if values and not isinstance(expressions[-1], exp.Tuple): 961 value.set( 962 "expressions", 963 [self.expression(exp.Tuple, expressions=[expr]) for expr in expressions], 964 ) 965 966 return value
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- ALTER_ALTER_PARSERS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- errors
- sql
968 class Generator(generator.Generator): 969 QUERY_HINTS = False 970 STRUCT_DELIMITER = ("(", ")") 971 NVL2_SUPPORTED = False 972 TABLESAMPLE_REQUIRES_PARENS = False 973 TABLESAMPLE_SIZE_IS_ROWS = False 974 TABLESAMPLE_KEYWORDS = "SAMPLE" 975 LAST_DAY_SUPPORTS_DATE_PART = False 976 CAN_IMPLEMENT_ARRAY_ANY = True 977 SUPPORTS_TO_NUMBER = False 978 JOIN_HINTS = False 979 TABLE_HINTS = False 980 GROUPINGS_SEP = "" 981 SET_OP_MODIFIERS = False 982 ARRAY_SIZE_NAME = "LENGTH" 983 WRAP_DERIVED_VALUES = False 984 985 STRING_TYPE_MAPPING = { 986 exp.DataType.Type.BLOB: "String", 987 exp.DataType.Type.CHAR: "String", 988 exp.DataType.Type.LONGBLOB: "String", 989 exp.DataType.Type.LONGTEXT: "String", 990 exp.DataType.Type.MEDIUMBLOB: "String", 991 exp.DataType.Type.MEDIUMTEXT: "String", 992 exp.DataType.Type.TINYBLOB: "String", 993 exp.DataType.Type.TINYTEXT: "String", 994 exp.DataType.Type.TEXT: "String", 995 exp.DataType.Type.VARBINARY: "String", 996 exp.DataType.Type.VARCHAR: "String", 997 } 998 999 SUPPORTED_JSON_PATH_PARTS = { 1000 exp.JSONPathKey, 1001 exp.JSONPathRoot, 1002 exp.JSONPathSubscript, 1003 } 1004 1005 TYPE_MAPPING = { 1006 **generator.Generator.TYPE_MAPPING, 1007 **STRING_TYPE_MAPPING, 1008 exp.DataType.Type.ARRAY: "Array", 1009 exp.DataType.Type.BOOLEAN: "Bool", 1010 exp.DataType.Type.BIGINT: "Int64", 1011 exp.DataType.Type.DATE32: "Date32", 1012 exp.DataType.Type.DATETIME: "DateTime", 1013 exp.DataType.Type.DATETIME2: "DateTime", 1014 exp.DataType.Type.SMALLDATETIME: "DateTime", 1015 exp.DataType.Type.DATETIME64: "DateTime64", 1016 exp.DataType.Type.DECIMAL: "Decimal", 1017 exp.DataType.Type.DECIMAL32: "Decimal32", 1018 exp.DataType.Type.DECIMAL64: "Decimal64", 1019 exp.DataType.Type.DECIMAL128: "Decimal128", 1020 exp.DataType.Type.DECIMAL256: "Decimal256", 1021 exp.DataType.Type.TIMESTAMP: "DateTime", 1022 exp.DataType.Type.TIMESTAMPNTZ: "DateTime", 1023 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 1024 exp.DataType.Type.DOUBLE: "Float64", 1025 exp.DataType.Type.ENUM: "Enum", 1026 exp.DataType.Type.ENUM8: "Enum8", 1027 exp.DataType.Type.ENUM16: "Enum16", 1028 exp.DataType.Type.FIXEDSTRING: "FixedString", 1029 exp.DataType.Type.FLOAT: "Float32", 1030 exp.DataType.Type.INT: "Int32", 1031 exp.DataType.Type.MEDIUMINT: "Int32", 1032 exp.DataType.Type.INT128: "Int128", 1033 exp.DataType.Type.INT256: "Int256", 1034 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 1035 exp.DataType.Type.MAP: "Map", 1036 exp.DataType.Type.NESTED: "Nested", 1037 exp.DataType.Type.NOTHING: "Nothing", 1038 exp.DataType.Type.SMALLINT: "Int16", 1039 exp.DataType.Type.STRUCT: "Tuple", 1040 exp.DataType.Type.TINYINT: "Int8", 1041 exp.DataType.Type.UBIGINT: "UInt64", 1042 exp.DataType.Type.UINT: "UInt32", 1043 exp.DataType.Type.UINT128: "UInt128", 1044 exp.DataType.Type.UINT256: "UInt256", 1045 exp.DataType.Type.USMALLINT: "UInt16", 1046 exp.DataType.Type.UTINYINT: "UInt8", 1047 exp.DataType.Type.IPV4: "IPv4", 1048 exp.DataType.Type.IPV6: "IPv6", 1049 exp.DataType.Type.POINT: "Point", 1050 exp.DataType.Type.RING: "Ring", 1051 exp.DataType.Type.LINESTRING: "LineString", 1052 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 1053 exp.DataType.Type.POLYGON: "Polygon", 1054 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 1055 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 1056 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 1057 exp.DataType.Type.DYNAMIC: "Dynamic", 1058 } 1059 1060 TRANSFORMS = { 1061 **generator.Generator.TRANSFORMS, 1062 exp.AnyValue: rename_func("any"), 1063 exp.ApproxDistinct: rename_func("uniq"), 1064 exp.ArrayConcat: rename_func("arrayConcat"), 1065 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 1066 exp.ArrayRemove: remove_from_array_using_filter, 1067 exp.ArraySum: rename_func("arraySum"), 1068 exp.ArgMax: arg_max_or_min_no_count("argMax"), 1069 exp.ArgMin: arg_max_or_min_no_count("argMin"), 1070 exp.Array: inline_array_sql, 1071 exp.CastToStrType: rename_func("CAST"), 1072 exp.CountIf: rename_func("countIf"), 1073 exp.CompressColumnConstraint: lambda self, 1074 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 1075 exp.ComputedColumnConstraint: lambda self, 1076 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 1077 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 1078 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 1079 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 1080 exp.DateStrToDate: rename_func("toDate"), 1081 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 1082 exp.Explode: rename_func("arrayJoin"), 1083 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 1084 exp.IsNan: rename_func("isNaN"), 1085 exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}", 1086 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 1087 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 1088 exp.JSONPathKey: json_path_key_only_name, 1089 exp.JSONPathRoot: lambda *_: "", 1090 exp.Length: length_or_char_length_sql, 1091 exp.Map: _map_sql, 1092 exp.Median: rename_func("median"), 1093 exp.Nullif: rename_func("nullIf"), 1094 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1095 exp.Pivot: no_pivot_sql, 1096 exp.Quantile: _quantile_sql, 1097 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 1098 exp.Rand: rename_func("randCanonical"), 1099 exp.StartsWith: rename_func("startsWith"), 1100 exp.StrPosition: lambda self, e: strposition_sql( 1101 self, 1102 e, 1103 func_name="POSITION", 1104 supports_position=True, 1105 use_ansi_position=False, 1106 ), 1107 exp.TimeToStr: lambda self, e: self.func( 1108 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 1109 ), 1110 exp.TimeStrToTime: _timestrtotime_sql, 1111 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1112 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1113 exp.VarMap: _map_sql, 1114 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1115 exp.MD5Digest: rename_func("MD5"), 1116 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1117 exp.SHA: rename_func("SHA1"), 1118 exp.SHA2: sha256_sql, 1119 exp.UnixToTime: _unix_to_time_sql, 1120 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 1121 exp.Trim: lambda self, e: trim_sql(self, e, default_trim_type="BOTH"), 1122 exp.Variance: rename_func("varSamp"), 1123 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1124 exp.Stddev: rename_func("stddevSamp"), 1125 exp.Chr: rename_func("CHAR"), 1126 exp.Lag: lambda self, e: self.func( 1127 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1128 ), 1129 exp.Lead: lambda self, e: self.func( 1130 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1131 ), 1132 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1133 rename_func("editDistance") 1134 ), 1135 } 1136 1137 PROPERTIES_LOCATION = { 1138 **generator.Generator.PROPERTIES_LOCATION, 1139 exp.OnCluster: exp.Properties.Location.POST_NAME, 1140 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1141 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1142 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1143 } 1144 1145 # There's no list in docs, but it can be found in Clickhouse code 1146 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1147 ON_CLUSTER_TARGETS = { 1148 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1149 "DATABASE", 1150 "TABLE", 1151 "VIEW", 1152 "DICTIONARY", 1153 "INDEX", 1154 "FUNCTION", 1155 "NAMED COLLECTION", 1156 } 1157 1158 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1159 NON_NULLABLE_TYPES = { 1160 exp.DataType.Type.ARRAY, 1161 exp.DataType.Type.MAP, 1162 exp.DataType.Type.STRUCT, 1163 exp.DataType.Type.POINT, 1164 exp.DataType.Type.RING, 1165 exp.DataType.Type.LINESTRING, 1166 exp.DataType.Type.MULTILINESTRING, 1167 exp.DataType.Type.POLYGON, 1168 exp.DataType.Type.MULTIPOLYGON, 1169 } 1170 1171 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1172 strtodate_sql = self.function_fallback_sql(expression) 1173 1174 if not isinstance(expression.parent, exp.Cast): 1175 # StrToDate returns DATEs in other dialects (eg. postgres), so 1176 # this branch aims to improve the transpilation to clickhouse 1177 return self.cast_sql(exp.cast(expression, "DATE")) 1178 1179 return strtodate_sql 1180 1181 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1182 this = expression.this 1183 1184 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1185 return self.sql(this) 1186 1187 return super().cast_sql(expression, safe_prefix=safe_prefix) 1188 1189 def trycast_sql(self, expression: exp.TryCast) -> str: 1190 dtype = expression.to 1191 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1192 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1193 dtype.set("nullable", True) 1194 1195 return super().cast_sql(expression) 1196 1197 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1198 this = self.json_path_part(expression.this) 1199 return str(int(this) + 1) if is_int(this) else this 1200 1201 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1202 return f"AS {self.sql(expression, 'this')}" 1203 1204 def _any_to_has( 1205 self, 1206 expression: exp.EQ | exp.NEQ, 1207 default: t.Callable[[t.Any], str], 1208 prefix: str = "", 1209 ) -> str: 1210 if isinstance(expression.left, exp.Any): 1211 arr = expression.left 1212 this = expression.right 1213 elif isinstance(expression.right, exp.Any): 1214 arr = expression.right 1215 this = expression.left 1216 else: 1217 return default(expression) 1218 1219 return prefix + self.func("has", arr.this.unnest(), this) 1220 1221 def eq_sql(self, expression: exp.EQ) -> str: 1222 return self._any_to_has(expression, super().eq_sql) 1223 1224 def neq_sql(self, expression: exp.NEQ) -> str: 1225 return self._any_to_has(expression, super().neq_sql, "NOT ") 1226 1227 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1228 # Manually add a flag to make the search case-insensitive 1229 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1230 return self.func("match", expression.this, regex) 1231 1232 def datatype_sql(self, expression: exp.DataType) -> str: 1233 # String is the standard ClickHouse type, every other variant is just an alias. 1234 # Additionally, any supplied length parameter will be ignored. 1235 # 1236 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1237 if expression.this in self.STRING_TYPE_MAPPING: 1238 dtype = "String" 1239 else: 1240 dtype = super().datatype_sql(expression) 1241 1242 # This section changes the type to `Nullable(...)` if the following conditions hold: 1243 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1244 # and change their semantics 1245 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1246 # constraint: "Type of Map key must be a type, that can be represented by integer or 1247 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1248 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1249 parent = expression.parent 1250 nullable = expression.args.get("nullable") 1251 if nullable is True or ( 1252 nullable is None 1253 and not ( 1254 isinstance(parent, exp.DataType) 1255 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1256 and expression.index in (None, 0) 1257 ) 1258 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1259 ): 1260 dtype = f"Nullable({dtype})" 1261 1262 return dtype 1263 1264 def cte_sql(self, expression: exp.CTE) -> str: 1265 if expression.args.get("scalar"): 1266 this = self.sql(expression, "this") 1267 alias = self.sql(expression, "alias") 1268 return f"{this} AS {alias}" 1269 1270 return super().cte_sql(expression) 1271 1272 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1273 return super().after_limit_modifiers(expression) + [ 1274 ( 1275 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1276 if expression.args.get("settings") 1277 else "" 1278 ), 1279 ( 1280 self.seg("FORMAT ") + self.sql(expression, "format") 1281 if expression.args.get("format") 1282 else "" 1283 ), 1284 ] 1285 1286 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1287 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1288 1289 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1290 return f"ON CLUSTER {self.sql(expression, 'this')}" 1291 1292 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1293 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1294 exp.Properties.Location.POST_NAME 1295 ): 1296 this_name = self.sql( 1297 expression.this if isinstance(expression.this, exp.Schema) else expression, 1298 "this", 1299 ) 1300 this_properties = " ".join( 1301 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1302 ) 1303 this_schema = self.schema_columns_sql(expression.this) 1304 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1305 1306 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1307 1308 return super().createable_sql(expression, locations) 1309 1310 def create_sql(self, expression: exp.Create) -> str: 1311 # The comment property comes last in CTAS statements, i.e. after the query 1312 query = expression.expression 1313 if isinstance(query, exp.Query): 1314 comment_prop = expression.find(exp.SchemaCommentProperty) 1315 if comment_prop: 1316 comment_prop.pop() 1317 query.replace(exp.paren(query)) 1318 else: 1319 comment_prop = None 1320 1321 create_sql = super().create_sql(expression) 1322 1323 comment_sql = self.sql(comment_prop) 1324 comment_sql = f" {comment_sql}" if comment_sql else "" 1325 1326 return f"{create_sql}{comment_sql}" 1327 1328 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1329 this = self.indent(self.sql(expression, "this")) 1330 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1331 1332 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1333 this = self.sql(expression, "this") 1334 this = f" {this}" if this else "" 1335 expr = self.sql(expression, "expression") 1336 expr = f" {expr}" if expr else "" 1337 index_type = self.sql(expression, "index_type") 1338 index_type = f" TYPE {index_type}" if index_type else "" 1339 granularity = self.sql(expression, "granularity") 1340 granularity = f" GRANULARITY {granularity}" if granularity else "" 1341 1342 return f"INDEX{this}{expr}{index_type}{granularity}" 1343 1344 def partition_sql(self, expression: exp.Partition) -> str: 1345 return f"PARTITION {self.expressions(expression, flat=True)}" 1346 1347 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1348 return f"ID {self.sql(expression.this)}" 1349 1350 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1351 return ( 1352 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1353 ) 1354 1355 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1356 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1357 1358 def is_sql(self, expression: exp.Is) -> str: 1359 is_sql = super().is_sql(expression) 1360 1361 if isinstance(expression.parent, exp.Not): 1362 # value IS NOT NULL -> NOT (value IS NULL) 1363 is_sql = self.wrap(is_sql) 1364 1365 return is_sql 1366 1367 def in_sql(self, expression: exp.In) -> str: 1368 in_sql = super().in_sql(expression) 1369 1370 if isinstance(expression.parent, exp.Not) and expression.args.get("is_global"): 1371 in_sql = in_sql.replace("GLOBAL IN", "GLOBAL NOT IN", 1) 1372 1373 return in_sql 1374 1375 def not_sql(self, expression: exp.Not) -> str: 1376 if isinstance(expression.this, exp.In) and expression.this.args.get("is_global"): 1377 # let `GLOBAL IN` child interpose `NOT` 1378 return self.sql(expression, "this") 1379 1380 return super().not_sql(expression) 1381 1382 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1383 # If the VALUES clause contains tuples of expressions, we need to treat it 1384 # as a table since Clickhouse will automatically alias it as such. 1385 alias = expression.args.get("alias") 1386 1387 if alias and alias.args.get("columns") and expression.expressions: 1388 values = expression.expressions[0].expressions 1389 values_as_table = any(isinstance(value, exp.Tuple) for value in values) 1390 else: 1391 values_as_table = True 1392 1393 return super().values_sql(expression, values_as_table=values_as_table)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1171 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1172 strtodate_sql = self.function_fallback_sql(expression) 1173 1174 if not isinstance(expression.parent, exp.Cast): 1175 # StrToDate returns DATEs in other dialects (eg. postgres), so 1176 # this branch aims to improve the transpilation to clickhouse 1177 return self.cast_sql(exp.cast(expression, "DATE")) 1178 1179 return strtodate_sql
1181 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1182 this = expression.this 1183 1184 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1185 return self.sql(this) 1186 1187 return super().cast_sql(expression, safe_prefix=safe_prefix)
1189 def trycast_sql(self, expression: exp.TryCast) -> str: 1190 dtype = expression.to 1191 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1192 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1193 dtype.set("nullable", True) 1194 1195 return super().cast_sql(expression)
1232 def datatype_sql(self, expression: exp.DataType) -> str: 1233 # String is the standard ClickHouse type, every other variant is just an alias. 1234 # Additionally, any supplied length parameter will be ignored. 1235 # 1236 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1237 if expression.this in self.STRING_TYPE_MAPPING: 1238 dtype = "String" 1239 else: 1240 dtype = super().datatype_sql(expression) 1241 1242 # This section changes the type to `Nullable(...)` if the following conditions hold: 1243 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1244 # and change their semantics 1245 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1246 # constraint: "Type of Map key must be a type, that can be represented by integer or 1247 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1248 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1249 parent = expression.parent 1250 nullable = expression.args.get("nullable") 1251 if nullable is True or ( 1252 nullable is None 1253 and not ( 1254 isinstance(parent, exp.DataType) 1255 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1256 and expression.index in (None, 0) 1257 ) 1258 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1259 ): 1260 dtype = f"Nullable({dtype})" 1261 1262 return dtype
1272 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1273 return super().after_limit_modifiers(expression) + [ 1274 ( 1275 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1276 if expression.args.get("settings") 1277 else "" 1278 ), 1279 ( 1280 self.seg("FORMAT ") + self.sql(expression, "format") 1281 if expression.args.get("format") 1282 else "" 1283 ), 1284 ]
1292 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1293 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1294 exp.Properties.Location.POST_NAME 1295 ): 1296 this_name = self.sql( 1297 expression.this if isinstance(expression.this, exp.Schema) else expression, 1298 "this", 1299 ) 1300 this_properties = " ".join( 1301 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1302 ) 1303 this_schema = self.schema_columns_sql(expression.this) 1304 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1305 1306 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1307 1308 return super().createable_sql(expression, locations)
1310 def create_sql(self, expression: exp.Create) -> str: 1311 # The comment property comes last in CTAS statements, i.e. after the query 1312 query = expression.expression 1313 if isinstance(query, exp.Query): 1314 comment_prop = expression.find(exp.SchemaCommentProperty) 1315 if comment_prop: 1316 comment_prop.pop() 1317 query.replace(exp.paren(query)) 1318 else: 1319 comment_prop = None 1320 1321 create_sql = super().create_sql(expression) 1322 1323 comment_sql = self.sql(comment_prop) 1324 comment_sql = f" {comment_sql}" if comment_sql else "" 1325 1326 return f"{create_sql}{comment_sql}"
1332 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1333 this = self.sql(expression, "this") 1334 this = f" {this}" if this else "" 1335 expr = self.sql(expression, "expression") 1336 expr = f" {expr}" if expr else "" 1337 index_type = self.sql(expression, "index_type") 1338 index_type = f" TYPE {index_type}" if index_type else "" 1339 granularity = self.sql(expression, "granularity") 1340 granularity = f" GRANULARITY {granularity}" if granularity else "" 1341 1342 return f"INDEX{this}{expr}{index_type}{granularity}"
1382 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1383 # If the VALUES clause contains tuples of expressions, we need to treat it 1384 # as a table since Clickhouse will automatically alias it as such. 1385 alias = expression.args.get("alias") 1386 1387 if alias and alias.args.get("columns") and expression.expressions: 1388 values = expression.expressions[0].expressions 1389 values_as_table = any(isinstance(value, exp.Tuple) for value in values) 1390 else: 1391 values_as_table = True 1392 1393 return super().values_sql(expression, values_as_table=values_as_table)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SUPPORTS_WINDOW_EXCLUDE
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- PARSE_JSON_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- get_put_sql
- translatecharacters_sql