sqlglot.dialects.clickhouse
1from __future__ import annotations 2import typing as t 3import datetime 4from sqlglot import exp, generator, parser, tokens 5from sqlglot.dialects.dialect import ( 6 Dialect, 7 NormalizationStrategy, 8 arg_max_or_min_no_count, 9 build_date_delta, 10 build_formatted_time, 11 inline_array_sql, 12 json_extract_segments, 13 json_path_key_only_name, 14 length_or_char_length_sql, 15 no_pivot_sql, 16 build_json_extract_path, 17 rename_func, 18 sha256_sql, 19 strposition_sql, 20 var_map_sql, 21 timestamptrunc_sql, 22 unit_to_var, 23 trim_sql, 24) 25from sqlglot.generator import Generator 26from sqlglot.helper import is_int, seq_get 27from sqlglot.tokens import Token, TokenType 28from sqlglot.generator import unsupported_args 29 30DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd] 31 32 33def _build_date_format(args: t.List) -> exp.TimeToStr: 34 expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args) 35 36 timezone = seq_get(args, 2) 37 if timezone: 38 expr.set("zone", timezone) 39 40 return expr 41 42 43def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str: 44 scale = expression.args.get("scale") 45 timestamp = expression.this 46 47 if scale in (None, exp.UnixToTime.SECONDS): 48 return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 49 if scale == exp.UnixToTime.MILLIS: 50 return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 51 if scale == exp.UnixToTime.MICROS: 52 return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 53 if scale == exp.UnixToTime.NANOS: 54 return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 55 56 return self.func( 57 "fromUnixTimestamp", 58 exp.cast( 59 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 60 ), 61 ) 62 63 64def _lower_func(sql: str) -> str: 65 index = sql.index("(") 66 return sql[:index].lower() + sql[index:] 67 68 69def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str: 70 quantile = expression.args["quantile"] 71 args = f"({self.sql(expression, 'this')})" 72 73 if isinstance(quantile, exp.Array): 74 func = self.func("quantiles", *quantile) 75 else: 76 func = self.func("quantile", quantile) 77 78 return func + args 79 80 81def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: 82 if len(args) == 1: 83 return exp.CountIf(this=seq_get(args, 0)) 84 85 return exp.CombinedAggFunc(this="countIf", expressions=args) 86 87 88def _build_str_to_date(args: t.List) -> exp.Cast | exp.Anonymous: 89 if len(args) == 3: 90 return exp.Anonymous(this="STR_TO_DATE", expressions=args) 91 92 strtodate = exp.StrToDate.from_arg_list(args) 93 return exp.cast(strtodate, exp.DataType.build(exp.DataType.Type.DATETIME)) 94 95 96def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]: 97 def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str: 98 if not expression.unit: 99 return rename_func(name)(self, expression) 100 101 return self.func( 102 name, 103 unit_to_var(expression), 104 expression.expression, 105 expression.this, 106 expression.args.get("zone"), 107 ) 108 109 return _delta_sql 110 111 112def _timestrtotime_sql(self: ClickHouse.Generator, expression: exp.TimeStrToTime): 113 ts = expression.this 114 115 tz = expression.args.get("zone") 116 if tz and isinstance(ts, exp.Literal): 117 # Clickhouse will not accept timestamps that include a UTC offset, so we must remove them. 118 # The first step to removing is parsing the string with `datetime.datetime.fromisoformat`. 119 # 120 # In python <3.11, `fromisoformat()` can only parse timestamps of millisecond (3 digit) 121 # or microsecond (6 digit) precision. It will error if passed any other number of fractional 122 # digits, so we extract the fractional seconds and pad to 6 digits before parsing. 123 ts_string = ts.name.strip() 124 125 # separate [date and time] from [fractional seconds and UTC offset] 126 ts_parts = ts_string.split(".") 127 if len(ts_parts) == 2: 128 # separate fractional seconds and UTC offset 129 offset_sep = "+" if "+" in ts_parts[1] else "-" 130 ts_frac_parts = ts_parts[1].split(offset_sep) 131 num_frac_parts = len(ts_frac_parts) 132 133 # pad to 6 digits if fractional seconds present 134 ts_frac_parts[0] = ts_frac_parts[0].ljust(6, "0") 135 ts_string = "".join( 136 [ 137 ts_parts[0], # date and time 138 ".", 139 ts_frac_parts[0], # fractional seconds 140 offset_sep if num_frac_parts > 1 else "", 141 ts_frac_parts[1] if num_frac_parts > 1 else "", # utc offset (if present) 142 ] 143 ) 144 145 # return literal with no timezone, eg turn '2020-01-01 12:13:14-08:00' into '2020-01-01 12:13:14' 146 # this is because Clickhouse encodes the timezone as a data type parameter and throws an error if 147 # it's part of the timestamp string 148 ts_without_tz = ( 149 datetime.datetime.fromisoformat(ts_string).replace(tzinfo=None).isoformat(sep=" ") 150 ) 151 ts = exp.Literal.string(ts_without_tz) 152 153 # Non-nullable DateTime64 with microsecond precision 154 expressions = [exp.DataTypeParam(this=tz)] if tz else [] 155 datatype = exp.DataType.build( 156 exp.DataType.Type.DATETIME64, 157 expressions=[exp.DataTypeParam(this=exp.Literal.number(6)), *expressions], 158 nullable=False, 159 ) 160 161 return self.sql(exp.cast(ts, datatype, dialect=self.dialect)) 162 163 164def _map_sql(self: ClickHouse.Generator, expression: exp.Map | exp.VarMap) -> str: 165 if not (expression.parent and expression.parent.arg_key == "settings"): 166 return _lower_func(var_map_sql(self, expression)) 167 168 keys = expression.args.get("keys") 169 values = expression.args.get("values") 170 171 if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array): 172 self.unsupported("Cannot convert array columns into map.") 173 return "" 174 175 args = [] 176 for key, value in zip(keys.expressions, values.expressions): 177 args.append(f"{self.sql(key)}: {self.sql(value)}") 178 179 csv_args = ", ".join(args) 180 181 return f"{{{csv_args}}}" 182 183 184class ClickHouse(Dialect): 185 NORMALIZE_FUNCTIONS: bool | str = False 186 NULL_ORDERING = "nulls_are_last" 187 SUPPORTS_USER_DEFINED_TYPES = False 188 SAFE_DIVISION = True 189 LOG_BASE_FIRST: t.Optional[bool] = None 190 FORCE_EARLY_ALIAS_REF_EXPANSION = True 191 PRESERVE_ORIGINAL_NAMES = True 192 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 193 IDENTIFIERS_CAN_START_WITH_DIGIT = True 194 HEX_STRING_IS_INTEGER_TYPE = True 195 196 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 197 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 198 199 UNESCAPED_SEQUENCES = { 200 "\\0": "\0", 201 } 202 203 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 204 205 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 206 exp.Except: False, 207 exp.Intersect: False, 208 exp.Union: None, 209 } 210 211 def generate_values_aliases(self, expression: exp.Values) -> t.List[exp.Identifier]: 212 # Clickhouse allows VALUES to have an embedded structure e.g: 213 # VALUES('person String, place String', ('Noah', 'Paris'), ...) 214 # In this case, we don't want to qualify the columns 215 values = expression.expressions[0].expressions 216 217 structure = ( 218 values[0] 219 if (len(values) > 1 and values[0].is_string and isinstance(values[1], exp.Tuple)) 220 else None 221 ) 222 if structure: 223 # Split each column definition into the column name e.g: 224 # 'person String, place String' -> ['person', 'place'] 225 structure_coldefs = [coldef.strip() for coldef in structure.name.split(",")] 226 column_aliases = [ 227 exp.to_identifier(coldef.split(" ")[0]) for coldef in structure_coldefs 228 ] 229 else: 230 # Default column aliases in CH are "c1", "c2", etc. 231 column_aliases = [ 232 exp.to_identifier(f"c{i + 1}") for i in range(len(values[0].expressions)) 233 ] 234 235 return column_aliases 236 237 class Tokenizer(tokens.Tokenizer): 238 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 239 IDENTIFIERS = ['"', "`"] 240 IDENTIFIER_ESCAPES = ["\\"] 241 STRING_ESCAPES = ["'", "\\"] 242 BIT_STRINGS = [("0b", "")] 243 HEX_STRINGS = [("0x", ""), ("0X", "")] 244 HEREDOC_STRINGS = ["$"] 245 246 KEYWORDS = { 247 **tokens.Tokenizer.KEYWORDS, 248 ".:": TokenType.DOTCOLON, 249 "ATTACH": TokenType.COMMAND, 250 "DATE32": TokenType.DATE32, 251 "DATETIME64": TokenType.DATETIME64, 252 "DICTIONARY": TokenType.DICTIONARY, 253 "DYNAMIC": TokenType.DYNAMIC, 254 "ENUM8": TokenType.ENUM8, 255 "ENUM16": TokenType.ENUM16, 256 "FINAL": TokenType.FINAL, 257 "FIXEDSTRING": TokenType.FIXEDSTRING, 258 "FLOAT32": TokenType.FLOAT, 259 "FLOAT64": TokenType.DOUBLE, 260 "GLOBAL": TokenType.GLOBAL, 261 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 262 "MAP": TokenType.MAP, 263 "NESTED": TokenType.NESTED, 264 "NOTHING": TokenType.NOTHING, 265 "SAMPLE": TokenType.TABLE_SAMPLE, 266 "TUPLE": TokenType.STRUCT, 267 "UINT16": TokenType.USMALLINT, 268 "UINT32": TokenType.UINT, 269 "UINT64": TokenType.UBIGINT, 270 "UINT8": TokenType.UTINYINT, 271 "IPV4": TokenType.IPV4, 272 "IPV6": TokenType.IPV6, 273 "POINT": TokenType.POINT, 274 "RING": TokenType.RING, 275 "LINESTRING": TokenType.LINESTRING, 276 "MULTILINESTRING": TokenType.MULTILINESTRING, 277 "POLYGON": TokenType.POLYGON, 278 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 279 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 280 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 281 "SYSTEM": TokenType.COMMAND, 282 "PREWHERE": TokenType.PREWHERE, 283 } 284 KEYWORDS.pop("/*+") 285 286 SINGLE_TOKENS = { 287 **tokens.Tokenizer.SINGLE_TOKENS, 288 "$": TokenType.HEREDOC_STRING, 289 } 290 291 class Parser(parser.Parser): 292 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 293 # * select x from t1 union all select x from t2 limit 1; 294 # * select x from t1 union all (select x from t2 limit 1); 295 MODIFIERS_ATTACHED_TO_SET_OP = False 296 INTERVAL_SPANS = False 297 OPTIONAL_ALIAS_TOKEN_CTE = False 298 299 FUNCTIONS = { 300 **parser.Parser.FUNCTIONS, 301 "ANY": exp.AnyValue.from_arg_list, 302 "ARRAYSUM": exp.ArraySum.from_arg_list, 303 "COUNTIF": _build_count_if, 304 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 305 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 306 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 307 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 308 "DATE_FORMAT": _build_date_format, 309 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 310 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 311 "FORMATDATETIME": _build_date_format, 312 "JSONEXTRACTSTRING": build_json_extract_path( 313 exp.JSONExtractScalar, zero_based_indexing=False 314 ), 315 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 316 "MAP": parser.build_var_map, 317 "MATCH": exp.RegexpLike.from_arg_list, 318 "RANDCANONICAL": exp.Rand.from_arg_list, 319 "STR_TO_DATE": _build_str_to_date, 320 "TUPLE": exp.Struct.from_arg_list, 321 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 322 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 323 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 324 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 325 "UNIQ": exp.ApproxDistinct.from_arg_list, 326 "XOR": lambda args: exp.Xor(expressions=args), 327 "MD5": exp.MD5Digest.from_arg_list, 328 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 329 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 330 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 331 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 332 } 333 FUNCTIONS.pop("TRANSFORM") 334 335 AGG_FUNCTIONS = { 336 "count", 337 "min", 338 "max", 339 "sum", 340 "avg", 341 "any", 342 "stddevPop", 343 "stddevSamp", 344 "varPop", 345 "varSamp", 346 "corr", 347 "covarPop", 348 "covarSamp", 349 "entropy", 350 "exponentialMovingAverage", 351 "intervalLengthSum", 352 "kolmogorovSmirnovTest", 353 "mannWhitneyUTest", 354 "median", 355 "rankCorr", 356 "sumKahan", 357 "studentTTest", 358 "welchTTest", 359 "anyHeavy", 360 "anyLast", 361 "boundingRatio", 362 "first_value", 363 "last_value", 364 "argMin", 365 "argMax", 366 "avgWeighted", 367 "topK", 368 "topKWeighted", 369 "deltaSum", 370 "deltaSumTimestamp", 371 "groupArray", 372 "groupArrayLast", 373 "groupUniqArray", 374 "groupArrayInsertAt", 375 "groupArrayMovingAvg", 376 "groupArrayMovingSum", 377 "groupArraySample", 378 "groupBitAnd", 379 "groupBitOr", 380 "groupBitXor", 381 "groupBitmap", 382 "groupBitmapAnd", 383 "groupBitmapOr", 384 "groupBitmapXor", 385 "sumWithOverflow", 386 "sumMap", 387 "minMap", 388 "maxMap", 389 "skewSamp", 390 "skewPop", 391 "kurtSamp", 392 "kurtPop", 393 "uniq", 394 "uniqExact", 395 "uniqCombined", 396 "uniqCombined64", 397 "uniqHLL12", 398 "uniqTheta", 399 "quantile", 400 "quantiles", 401 "quantileExact", 402 "quantilesExact", 403 "quantileExactLow", 404 "quantilesExactLow", 405 "quantileExactHigh", 406 "quantilesExactHigh", 407 "quantileExactWeighted", 408 "quantilesExactWeighted", 409 "quantileTiming", 410 "quantilesTiming", 411 "quantileTimingWeighted", 412 "quantilesTimingWeighted", 413 "quantileDeterministic", 414 "quantilesDeterministic", 415 "quantileTDigest", 416 "quantilesTDigest", 417 "quantileTDigestWeighted", 418 "quantilesTDigestWeighted", 419 "quantileBFloat16", 420 "quantilesBFloat16", 421 "quantileBFloat16Weighted", 422 "quantilesBFloat16Weighted", 423 "simpleLinearRegression", 424 "stochasticLinearRegression", 425 "stochasticLogisticRegression", 426 "categoricalInformationValue", 427 "contingency", 428 "cramersV", 429 "cramersVBiasCorrected", 430 "theilsU", 431 "maxIntersections", 432 "maxIntersectionsPosition", 433 "meanZTest", 434 "quantileInterpolatedWeighted", 435 "quantilesInterpolatedWeighted", 436 "quantileGK", 437 "quantilesGK", 438 "sparkBar", 439 "sumCount", 440 "largestTriangleThreeBuckets", 441 "histogram", 442 "sequenceMatch", 443 "sequenceCount", 444 "windowFunnel", 445 "retention", 446 "uniqUpTo", 447 "sequenceNextNode", 448 "exponentialTimeDecayedAvg", 449 } 450 451 AGG_FUNCTIONS_SUFFIXES = [ 452 "If", 453 "Array", 454 "ArrayIf", 455 "Map", 456 "SimpleState", 457 "State", 458 "Merge", 459 "MergeState", 460 "ForEach", 461 "Distinct", 462 "OrDefault", 463 "OrNull", 464 "Resample", 465 "ArgMin", 466 "ArgMax", 467 ] 468 469 FUNC_TOKENS = { 470 *parser.Parser.FUNC_TOKENS, 471 TokenType.AND, 472 TokenType.OR, 473 TokenType.SET, 474 } 475 476 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 477 478 ID_VAR_TOKENS = { 479 *parser.Parser.ID_VAR_TOKENS, 480 TokenType.LIKE, 481 } 482 483 AGG_FUNC_MAPPING = ( 484 lambda functions, suffixes: { 485 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 486 } 487 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 488 489 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 490 491 FUNCTION_PARSERS = { 492 **parser.Parser.FUNCTION_PARSERS, 493 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 494 "QUANTILE": lambda self: self._parse_quantile(), 495 "MEDIAN": lambda self: self._parse_quantile(), 496 "COLUMNS": lambda self: self._parse_columns(), 497 } 498 499 FUNCTION_PARSERS.pop("MATCH") 500 501 PROPERTY_PARSERS = parser.Parser.PROPERTY_PARSERS.copy() 502 PROPERTY_PARSERS.pop("DYNAMIC") 503 504 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 505 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 506 507 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 508 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 509 510 RANGE_PARSERS = { 511 **parser.Parser.RANGE_PARSERS, 512 TokenType.GLOBAL: lambda self, this: self._parse_global_in(this), 513 } 514 515 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 516 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 517 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 518 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 519 520 JOIN_KINDS = { 521 *parser.Parser.JOIN_KINDS, 522 TokenType.ANY, 523 TokenType.ASOF, 524 TokenType.ARRAY, 525 } 526 527 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 528 TokenType.ANY, 529 TokenType.ARRAY, 530 TokenType.FINAL, 531 TokenType.FORMAT, 532 TokenType.SETTINGS, 533 } 534 535 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 536 TokenType.FORMAT, 537 } 538 539 LOG_DEFAULTS_TO_LN = True 540 541 QUERY_MODIFIER_PARSERS = { 542 **parser.Parser.QUERY_MODIFIER_PARSERS, 543 TokenType.SETTINGS: lambda self: ( 544 "settings", 545 self._advance() or self._parse_csv(self._parse_assignment), 546 ), 547 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 548 } 549 550 CONSTRAINT_PARSERS = { 551 **parser.Parser.CONSTRAINT_PARSERS, 552 "INDEX": lambda self: self._parse_index_constraint(), 553 "CODEC": lambda self: self._parse_compress(), 554 } 555 556 ALTER_PARSERS = { 557 **parser.Parser.ALTER_PARSERS, 558 "REPLACE": lambda self: self._parse_alter_table_replace(), 559 } 560 561 SCHEMA_UNNAMED_CONSTRAINTS = { 562 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 563 "INDEX", 564 } 565 566 PLACEHOLDER_PARSERS = { 567 **parser.Parser.PLACEHOLDER_PARSERS, 568 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 569 } 570 571 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 572 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 573 return self._parse_lambda() 574 575 def _parse_types( 576 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 577 ) -> t.Optional[exp.Expression]: 578 dtype = super()._parse_types( 579 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 580 ) 581 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 582 # Mark every type as non-nullable which is ClickHouse's default, unless it's 583 # already marked as nullable. This marker helps us transpile types from other 584 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 585 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 586 # fail in ClickHouse without the `Nullable` type constructor. 587 dtype.set("nullable", False) 588 589 return dtype 590 591 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 592 index = self._index 593 this = self._parse_bitwise() 594 if self._match(TokenType.FROM): 595 self._retreat(index) 596 return super()._parse_extract() 597 598 # We return Anonymous here because extract and regexpExtract have different semantics, 599 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 600 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 601 # 602 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 603 self._match(TokenType.COMMA) 604 return self.expression( 605 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 606 ) 607 608 def _parse_assignment(self) -> t.Optional[exp.Expression]: 609 this = super()._parse_assignment() 610 611 if self._match(TokenType.PLACEHOLDER): 612 return self.expression( 613 exp.If, 614 this=this, 615 true=self._parse_assignment(), 616 false=self._match(TokenType.COLON) and self._parse_assignment(), 617 ) 618 619 return this 620 621 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 622 """ 623 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 624 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 625 """ 626 index = self._index 627 628 this = self._parse_id_var() 629 self._match(TokenType.COLON) 630 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 631 self._match_text_seq("IDENTIFIER") and "Identifier" 632 ) 633 634 if not kind: 635 self._retreat(index) 636 return None 637 elif not self._match(TokenType.R_BRACE): 638 self.raise_error("Expecting }") 639 640 if isinstance(this, exp.Identifier) and not this.quoted: 641 this = exp.var(this.name) 642 643 return self.expression(exp.Placeholder, this=this, kind=kind) 644 645 def _parse_bracket( 646 self, this: t.Optional[exp.Expression] = None 647 ) -> t.Optional[exp.Expression]: 648 l_brace = self._match(TokenType.L_BRACE, advance=False) 649 bracket = super()._parse_bracket(this) 650 651 if l_brace and isinstance(bracket, exp.Struct): 652 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 653 for expression in bracket.expressions: 654 if not isinstance(expression, exp.PropertyEQ): 655 break 656 657 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 658 varmap.args["values"].append("expressions", expression.expression) 659 660 return varmap 661 662 return bracket 663 664 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 665 this = super()._parse_in(this) 666 this.set("is_global", is_global) 667 return this 668 669 def _parse_global_in(self, this: t.Optional[exp.Expression]) -> exp.Not | exp.In: 670 is_negated = self._match(TokenType.NOT) 671 this = self._match(TokenType.IN) and self._parse_in(this, is_global=True) 672 return self.expression(exp.Not, this=this) if is_negated else this 673 674 def _parse_table( 675 self, 676 schema: bool = False, 677 joins: bool = False, 678 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 679 parse_bracket: bool = False, 680 is_db_reference: bool = False, 681 parse_partition: bool = False, 682 ) -> t.Optional[exp.Expression]: 683 this = super()._parse_table( 684 schema=schema, 685 joins=joins, 686 alias_tokens=alias_tokens, 687 parse_bracket=parse_bracket, 688 is_db_reference=is_db_reference, 689 ) 690 691 if isinstance(this, exp.Table): 692 inner = this.this 693 alias = this.args.get("alias") 694 695 if isinstance(inner, exp.GenerateSeries) and alias and not alias.columns: 696 alias.set("columns", [exp.to_identifier("generate_series")]) 697 698 if self._match(TokenType.FINAL): 699 this = self.expression(exp.Final, this=this) 700 701 return this 702 703 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 704 return super()._parse_position(haystack_first=True) 705 706 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 707 def _parse_cte(self) -> t.Optional[exp.CTE]: 708 # WITH <identifier> AS <subquery expression> 709 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 710 711 if not cte: 712 # WITH <expression> AS <identifier> 713 cte = self.expression( 714 exp.CTE, 715 this=self._parse_assignment(), 716 alias=self._parse_table_alias(), 717 scalar=True, 718 ) 719 720 return cte 721 722 def _parse_join_parts( 723 self, 724 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 725 is_global = self._match(TokenType.GLOBAL) and self._prev 726 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 727 728 if kind_pre: 729 kind = self._match_set(self.JOIN_KINDS) and self._prev 730 side = self._match_set(self.JOIN_SIDES) and self._prev 731 return is_global, side, kind 732 733 return ( 734 is_global, 735 self._match_set(self.JOIN_SIDES) and self._prev, 736 self._match_set(self.JOIN_KINDS) and self._prev, 737 ) 738 739 def _parse_join( 740 self, skip_join_token: bool = False, parse_bracket: bool = False 741 ) -> t.Optional[exp.Join]: 742 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 743 if join: 744 join.set("global", join.args.pop("method", None)) 745 746 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 747 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 748 if join.kind == "ARRAY": 749 for table in join.find_all(exp.Table): 750 table.replace(table.to_column()) 751 752 return join 753 754 def _parse_function( 755 self, 756 functions: t.Optional[t.Dict[str, t.Callable]] = None, 757 anonymous: bool = False, 758 optional_parens: bool = True, 759 any_token: bool = False, 760 ) -> t.Optional[exp.Expression]: 761 expr = super()._parse_function( 762 functions=functions, 763 anonymous=anonymous, 764 optional_parens=optional_parens, 765 any_token=any_token, 766 ) 767 768 func = expr.this if isinstance(expr, exp.Window) else expr 769 770 # Aggregate functions can be split in 2 parts: <func_name><suffix> 771 parts = ( 772 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 773 ) 774 775 if parts: 776 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 777 params = self._parse_func_params(anon_func) 778 779 kwargs = { 780 "this": anon_func.this, 781 "expressions": anon_func.expressions, 782 } 783 if parts[1]: 784 exp_class: t.Type[exp.Expression] = ( 785 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 786 ) 787 else: 788 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 789 790 kwargs["exp_class"] = exp_class 791 if params: 792 kwargs["params"] = params 793 794 func = self.expression(**kwargs) 795 796 if isinstance(expr, exp.Window): 797 # The window's func was parsed as Anonymous in base parser, fix its 798 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 799 expr.set("this", func) 800 elif params: 801 # Params have blocked super()._parse_function() from parsing the following window 802 # (if that exists) as they're standing between the function call and the window spec 803 expr = self._parse_window(func) 804 else: 805 expr = func 806 807 return expr 808 809 def _parse_func_params( 810 self, this: t.Optional[exp.Func] = None 811 ) -> t.Optional[t.List[exp.Expression]]: 812 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 813 return self._parse_csv(self._parse_lambda) 814 815 if self._match(TokenType.L_PAREN): 816 params = self._parse_csv(self._parse_lambda) 817 self._match_r_paren(this) 818 return params 819 820 return None 821 822 def _parse_quantile(self) -> exp.Quantile: 823 this = self._parse_lambda() 824 params = self._parse_func_params() 825 if params: 826 return self.expression(exp.Quantile, this=params[0], quantile=this) 827 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 828 829 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 830 return super()._parse_wrapped_id_vars(optional=True) 831 832 def _parse_primary_key( 833 self, wrapped_optional: bool = False, in_props: bool = False 834 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 835 return super()._parse_primary_key( 836 wrapped_optional=wrapped_optional or in_props, in_props=in_props 837 ) 838 839 def _parse_on_property(self) -> t.Optional[exp.Expression]: 840 index = self._index 841 if self._match_text_seq("CLUSTER"): 842 this = self._parse_string() or self._parse_id_var() 843 if this: 844 return self.expression(exp.OnCluster, this=this) 845 else: 846 self._retreat(index) 847 return None 848 849 def _parse_index_constraint( 850 self, kind: t.Optional[str] = None 851 ) -> exp.IndexColumnConstraint: 852 # INDEX name1 expr TYPE type1(args) GRANULARITY value 853 this = self._parse_id_var() 854 expression = self._parse_assignment() 855 856 index_type = self._match_text_seq("TYPE") and ( 857 self._parse_function() or self._parse_var() 858 ) 859 860 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 861 862 return self.expression( 863 exp.IndexColumnConstraint, 864 this=this, 865 expression=expression, 866 index_type=index_type, 867 granularity=granularity, 868 ) 869 870 def _parse_partition(self) -> t.Optional[exp.Partition]: 871 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 872 if not self._match(TokenType.PARTITION): 873 return None 874 875 if self._match_text_seq("ID"): 876 # Corresponds to the PARTITION ID <string_value> syntax 877 expressions: t.List[exp.Expression] = [ 878 self.expression(exp.PartitionId, this=self._parse_string()) 879 ] 880 else: 881 expressions = self._parse_expressions() 882 883 return self.expression(exp.Partition, expressions=expressions) 884 885 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 886 partition = self._parse_partition() 887 888 if not partition or not self._match(TokenType.FROM): 889 return None 890 891 return self.expression( 892 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 893 ) 894 895 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 896 if not self._match_text_seq("PROJECTION"): 897 return None 898 899 return self.expression( 900 exp.ProjectionDef, 901 this=self._parse_id_var(), 902 expression=self._parse_wrapped(self._parse_statement), 903 ) 904 905 def _parse_constraint(self) -> t.Optional[exp.Expression]: 906 return super()._parse_constraint() or self._parse_projection_def() 907 908 def _parse_alias( 909 self, this: t.Optional[exp.Expression], explicit: bool = False 910 ) -> t.Optional[exp.Expression]: 911 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 912 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 913 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 914 return this 915 916 return super()._parse_alias(this=this, explicit=explicit) 917 918 def _parse_expression(self) -> t.Optional[exp.Expression]: 919 this = super()._parse_expression() 920 921 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 922 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 923 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 924 self._match(TokenType.R_PAREN) 925 926 return this 927 928 def _parse_columns(self) -> exp.Expression: 929 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 930 931 while self._next and self._match_text_seq(")", "APPLY", "("): 932 self._match(TokenType.R_PAREN) 933 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 934 return this 935 936 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 937 value = super()._parse_value(values=values) 938 if not value: 939 return None 940 941 # In Clickhouse "SELECT * FROM VALUES (1, 2, 3)" generates a table with a single column, in contrast 942 # to other dialects. For this case, we canonicalize the values into a tuple-of-tuples AST if it's not already one. 943 # In INSERT INTO statements the same clause actually references multiple columns (opposite semantics), 944 # but the final result is not altered by the extra parentheses. 945 # Note: Clickhouse allows VALUES([structure], value, ...) so the branch checks for the last expression 946 expressions = value.expressions 947 if values and not isinstance(expressions[-1], exp.Tuple): 948 value.set( 949 "expressions", 950 [self.expression(exp.Tuple, expressions=[expr]) for expr in expressions], 951 ) 952 953 return value 954 955 class Generator(generator.Generator): 956 QUERY_HINTS = False 957 STRUCT_DELIMITER = ("(", ")") 958 NVL2_SUPPORTED = False 959 TABLESAMPLE_REQUIRES_PARENS = False 960 TABLESAMPLE_SIZE_IS_ROWS = False 961 TABLESAMPLE_KEYWORDS = "SAMPLE" 962 LAST_DAY_SUPPORTS_DATE_PART = False 963 CAN_IMPLEMENT_ARRAY_ANY = True 964 SUPPORTS_TO_NUMBER = False 965 JOIN_HINTS = False 966 TABLE_HINTS = False 967 GROUPINGS_SEP = "" 968 SET_OP_MODIFIERS = False 969 ARRAY_SIZE_NAME = "LENGTH" 970 WRAP_DERIVED_VALUES = False 971 972 STRING_TYPE_MAPPING = { 973 exp.DataType.Type.BLOB: "String", 974 exp.DataType.Type.CHAR: "String", 975 exp.DataType.Type.LONGBLOB: "String", 976 exp.DataType.Type.LONGTEXT: "String", 977 exp.DataType.Type.MEDIUMBLOB: "String", 978 exp.DataType.Type.MEDIUMTEXT: "String", 979 exp.DataType.Type.TINYBLOB: "String", 980 exp.DataType.Type.TINYTEXT: "String", 981 exp.DataType.Type.TEXT: "String", 982 exp.DataType.Type.VARBINARY: "String", 983 exp.DataType.Type.VARCHAR: "String", 984 } 985 986 SUPPORTED_JSON_PATH_PARTS = { 987 exp.JSONPathKey, 988 exp.JSONPathRoot, 989 exp.JSONPathSubscript, 990 } 991 992 TYPE_MAPPING = { 993 **generator.Generator.TYPE_MAPPING, 994 **STRING_TYPE_MAPPING, 995 exp.DataType.Type.ARRAY: "Array", 996 exp.DataType.Type.BOOLEAN: "Bool", 997 exp.DataType.Type.BIGINT: "Int64", 998 exp.DataType.Type.DATE32: "Date32", 999 exp.DataType.Type.DATETIME: "DateTime", 1000 exp.DataType.Type.DATETIME2: "DateTime", 1001 exp.DataType.Type.SMALLDATETIME: "DateTime", 1002 exp.DataType.Type.DATETIME64: "DateTime64", 1003 exp.DataType.Type.DECIMAL: "Decimal", 1004 exp.DataType.Type.DECIMAL32: "Decimal32", 1005 exp.DataType.Type.DECIMAL64: "Decimal64", 1006 exp.DataType.Type.DECIMAL128: "Decimal128", 1007 exp.DataType.Type.DECIMAL256: "Decimal256", 1008 exp.DataType.Type.TIMESTAMP: "DateTime", 1009 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 1010 exp.DataType.Type.DOUBLE: "Float64", 1011 exp.DataType.Type.ENUM: "Enum", 1012 exp.DataType.Type.ENUM8: "Enum8", 1013 exp.DataType.Type.ENUM16: "Enum16", 1014 exp.DataType.Type.FIXEDSTRING: "FixedString", 1015 exp.DataType.Type.FLOAT: "Float32", 1016 exp.DataType.Type.INT: "Int32", 1017 exp.DataType.Type.MEDIUMINT: "Int32", 1018 exp.DataType.Type.INT128: "Int128", 1019 exp.DataType.Type.INT256: "Int256", 1020 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 1021 exp.DataType.Type.MAP: "Map", 1022 exp.DataType.Type.NESTED: "Nested", 1023 exp.DataType.Type.NOTHING: "Nothing", 1024 exp.DataType.Type.SMALLINT: "Int16", 1025 exp.DataType.Type.STRUCT: "Tuple", 1026 exp.DataType.Type.TINYINT: "Int8", 1027 exp.DataType.Type.UBIGINT: "UInt64", 1028 exp.DataType.Type.UINT: "UInt32", 1029 exp.DataType.Type.UINT128: "UInt128", 1030 exp.DataType.Type.UINT256: "UInt256", 1031 exp.DataType.Type.USMALLINT: "UInt16", 1032 exp.DataType.Type.UTINYINT: "UInt8", 1033 exp.DataType.Type.IPV4: "IPv4", 1034 exp.DataType.Type.IPV6: "IPv6", 1035 exp.DataType.Type.POINT: "Point", 1036 exp.DataType.Type.RING: "Ring", 1037 exp.DataType.Type.LINESTRING: "LineString", 1038 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 1039 exp.DataType.Type.POLYGON: "Polygon", 1040 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 1041 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 1042 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 1043 exp.DataType.Type.DYNAMIC: "Dynamic", 1044 } 1045 1046 TRANSFORMS = { 1047 **generator.Generator.TRANSFORMS, 1048 exp.AnyValue: rename_func("any"), 1049 exp.ApproxDistinct: rename_func("uniq"), 1050 exp.ArrayConcat: rename_func("arrayConcat"), 1051 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 1052 exp.ArraySum: rename_func("arraySum"), 1053 exp.ArgMax: arg_max_or_min_no_count("argMax"), 1054 exp.ArgMin: arg_max_or_min_no_count("argMin"), 1055 exp.Array: inline_array_sql, 1056 exp.CastToStrType: rename_func("CAST"), 1057 exp.CountIf: rename_func("countIf"), 1058 exp.CompressColumnConstraint: lambda self, 1059 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 1060 exp.ComputedColumnConstraint: lambda self, 1061 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 1062 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 1063 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 1064 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 1065 exp.DateStrToDate: rename_func("toDate"), 1066 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 1067 exp.Explode: rename_func("arrayJoin"), 1068 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 1069 exp.IsNan: rename_func("isNaN"), 1070 exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}", 1071 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 1072 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 1073 exp.JSONPathKey: json_path_key_only_name, 1074 exp.JSONPathRoot: lambda *_: "", 1075 exp.Length: length_or_char_length_sql, 1076 exp.Map: _map_sql, 1077 exp.Median: rename_func("median"), 1078 exp.Nullif: rename_func("nullIf"), 1079 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1080 exp.Pivot: no_pivot_sql, 1081 exp.Quantile: _quantile_sql, 1082 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 1083 exp.Rand: rename_func("randCanonical"), 1084 exp.StartsWith: rename_func("startsWith"), 1085 exp.StrPosition: lambda self, e: strposition_sql( 1086 self, 1087 e, 1088 func_name="POSITION", 1089 supports_position=True, 1090 use_ansi_position=False, 1091 ), 1092 exp.TimeToStr: lambda self, e: self.func( 1093 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 1094 ), 1095 exp.TimeStrToTime: _timestrtotime_sql, 1096 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1097 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1098 exp.VarMap: _map_sql, 1099 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1100 exp.MD5Digest: rename_func("MD5"), 1101 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1102 exp.SHA: rename_func("SHA1"), 1103 exp.SHA2: sha256_sql, 1104 exp.UnixToTime: _unix_to_time_sql, 1105 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 1106 exp.Trim: lambda self, e: trim_sql(self, e, default_trim_type="BOTH"), 1107 exp.Variance: rename_func("varSamp"), 1108 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1109 exp.Stddev: rename_func("stddevSamp"), 1110 exp.Chr: rename_func("CHAR"), 1111 exp.Lag: lambda self, e: self.func( 1112 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1113 ), 1114 exp.Lead: lambda self, e: self.func( 1115 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1116 ), 1117 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1118 rename_func("editDistance") 1119 ), 1120 } 1121 1122 PROPERTIES_LOCATION = { 1123 **generator.Generator.PROPERTIES_LOCATION, 1124 exp.OnCluster: exp.Properties.Location.POST_NAME, 1125 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1126 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1127 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1128 } 1129 1130 # There's no list in docs, but it can be found in Clickhouse code 1131 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1132 ON_CLUSTER_TARGETS = { 1133 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1134 "DATABASE", 1135 "TABLE", 1136 "VIEW", 1137 "DICTIONARY", 1138 "INDEX", 1139 "FUNCTION", 1140 "NAMED COLLECTION", 1141 } 1142 1143 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1144 NON_NULLABLE_TYPES = { 1145 exp.DataType.Type.ARRAY, 1146 exp.DataType.Type.MAP, 1147 exp.DataType.Type.STRUCT, 1148 exp.DataType.Type.POINT, 1149 exp.DataType.Type.RING, 1150 exp.DataType.Type.LINESTRING, 1151 exp.DataType.Type.MULTILINESTRING, 1152 exp.DataType.Type.POLYGON, 1153 exp.DataType.Type.MULTIPOLYGON, 1154 } 1155 1156 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1157 strtodate_sql = self.function_fallback_sql(expression) 1158 1159 if not isinstance(expression.parent, exp.Cast): 1160 # StrToDate returns DATEs in other dialects (eg. postgres), so 1161 # this branch aims to improve the transpilation to clickhouse 1162 return self.cast_sql(exp.cast(expression, "DATE")) 1163 1164 return strtodate_sql 1165 1166 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1167 this = expression.this 1168 1169 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1170 return self.sql(this) 1171 1172 return super().cast_sql(expression, safe_prefix=safe_prefix) 1173 1174 def trycast_sql(self, expression: exp.TryCast) -> str: 1175 dtype = expression.to 1176 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1177 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1178 dtype.set("nullable", True) 1179 1180 return super().cast_sql(expression) 1181 1182 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1183 this = self.json_path_part(expression.this) 1184 return str(int(this) + 1) if is_int(this) else this 1185 1186 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1187 return f"AS {self.sql(expression, 'this')}" 1188 1189 def _any_to_has( 1190 self, 1191 expression: exp.EQ | exp.NEQ, 1192 default: t.Callable[[t.Any], str], 1193 prefix: str = "", 1194 ) -> str: 1195 if isinstance(expression.left, exp.Any): 1196 arr = expression.left 1197 this = expression.right 1198 elif isinstance(expression.right, exp.Any): 1199 arr = expression.right 1200 this = expression.left 1201 else: 1202 return default(expression) 1203 1204 return prefix + self.func("has", arr.this.unnest(), this) 1205 1206 def eq_sql(self, expression: exp.EQ) -> str: 1207 return self._any_to_has(expression, super().eq_sql) 1208 1209 def neq_sql(self, expression: exp.NEQ) -> str: 1210 return self._any_to_has(expression, super().neq_sql, "NOT ") 1211 1212 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1213 # Manually add a flag to make the search case-insensitive 1214 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1215 return self.func("match", expression.this, regex) 1216 1217 def datatype_sql(self, expression: exp.DataType) -> str: 1218 # String is the standard ClickHouse type, every other variant is just an alias. 1219 # Additionally, any supplied length parameter will be ignored. 1220 # 1221 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1222 if expression.this in self.STRING_TYPE_MAPPING: 1223 dtype = "String" 1224 else: 1225 dtype = super().datatype_sql(expression) 1226 1227 # This section changes the type to `Nullable(...)` if the following conditions hold: 1228 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1229 # and change their semantics 1230 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1231 # constraint: "Type of Map key must be a type, that can be represented by integer or 1232 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1233 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1234 parent = expression.parent 1235 nullable = expression.args.get("nullable") 1236 if nullable is True or ( 1237 nullable is None 1238 and not ( 1239 isinstance(parent, exp.DataType) 1240 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1241 and expression.index in (None, 0) 1242 ) 1243 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1244 ): 1245 dtype = f"Nullable({dtype})" 1246 1247 return dtype 1248 1249 def cte_sql(self, expression: exp.CTE) -> str: 1250 if expression.args.get("scalar"): 1251 this = self.sql(expression, "this") 1252 alias = self.sql(expression, "alias") 1253 return f"{this} AS {alias}" 1254 1255 return super().cte_sql(expression) 1256 1257 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1258 return super().after_limit_modifiers(expression) + [ 1259 ( 1260 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1261 if expression.args.get("settings") 1262 else "" 1263 ), 1264 ( 1265 self.seg("FORMAT ") + self.sql(expression, "format") 1266 if expression.args.get("format") 1267 else "" 1268 ), 1269 ] 1270 1271 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1272 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1273 1274 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1275 return f"ON CLUSTER {self.sql(expression, 'this')}" 1276 1277 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1278 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1279 exp.Properties.Location.POST_NAME 1280 ): 1281 this_name = self.sql( 1282 expression.this if isinstance(expression.this, exp.Schema) else expression, 1283 "this", 1284 ) 1285 this_properties = " ".join( 1286 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1287 ) 1288 this_schema = self.schema_columns_sql(expression.this) 1289 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1290 1291 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1292 1293 return super().createable_sql(expression, locations) 1294 1295 def create_sql(self, expression: exp.Create) -> str: 1296 # The comment property comes last in CTAS statements, i.e. after the query 1297 query = expression.expression 1298 if isinstance(query, exp.Query): 1299 comment_prop = expression.find(exp.SchemaCommentProperty) 1300 if comment_prop: 1301 comment_prop.pop() 1302 query.replace(exp.paren(query)) 1303 else: 1304 comment_prop = None 1305 1306 create_sql = super().create_sql(expression) 1307 1308 comment_sql = self.sql(comment_prop) 1309 comment_sql = f" {comment_sql}" if comment_sql else "" 1310 1311 return f"{create_sql}{comment_sql}" 1312 1313 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1314 this = self.indent(self.sql(expression, "this")) 1315 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1316 1317 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1318 this = self.sql(expression, "this") 1319 this = f" {this}" if this else "" 1320 expr = self.sql(expression, "expression") 1321 expr = f" {expr}" if expr else "" 1322 index_type = self.sql(expression, "index_type") 1323 index_type = f" TYPE {index_type}" if index_type else "" 1324 granularity = self.sql(expression, "granularity") 1325 granularity = f" GRANULARITY {granularity}" if granularity else "" 1326 1327 return f"INDEX{this}{expr}{index_type}{granularity}" 1328 1329 def partition_sql(self, expression: exp.Partition) -> str: 1330 return f"PARTITION {self.expressions(expression, flat=True)}" 1331 1332 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1333 return f"ID {self.sql(expression.this)}" 1334 1335 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1336 return ( 1337 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1338 ) 1339 1340 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1341 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1342 1343 def is_sql(self, expression: exp.Is) -> str: 1344 is_sql = super().is_sql(expression) 1345 1346 if isinstance(expression.parent, exp.Not): 1347 # value IS NOT NULL -> NOT (value IS NULL) 1348 is_sql = self.wrap(is_sql) 1349 1350 return is_sql 1351 1352 def in_sql(self, expression: exp.In) -> str: 1353 in_sql = super().in_sql(expression) 1354 1355 if isinstance(expression.parent, exp.Not) and expression.args.get("is_global"): 1356 in_sql = in_sql.replace("GLOBAL IN", "GLOBAL NOT IN", 1) 1357 1358 return in_sql 1359 1360 def not_sql(self, expression: exp.Not) -> str: 1361 if isinstance(expression.this, exp.In) and expression.this.args.get("is_global"): 1362 # let `GLOBAL IN` child interpose `NOT` 1363 return self.sql(expression, "this") 1364 1365 return super().not_sql(expression) 1366 1367 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1368 # If the VALUES clause contains tuples of expressions, we need to treat it 1369 # as a table since Clickhouse will automatically alias it as such. 1370 alias = expression.args.get("alias") 1371 1372 if alias and alias.args.get("columns") and expression.expressions: 1373 values = expression.expressions[0].expressions 1374 values_as_table = any(isinstance(value, exp.Tuple) for value in values) 1375 else: 1376 values_as_table = True 1377 1378 return super().values_sql(expression, values_as_table=values_as_table)
185class ClickHouse(Dialect): 186 NORMALIZE_FUNCTIONS: bool | str = False 187 NULL_ORDERING = "nulls_are_last" 188 SUPPORTS_USER_DEFINED_TYPES = False 189 SAFE_DIVISION = True 190 LOG_BASE_FIRST: t.Optional[bool] = None 191 FORCE_EARLY_ALIAS_REF_EXPANSION = True 192 PRESERVE_ORIGINAL_NAMES = True 193 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 194 IDENTIFIERS_CAN_START_WITH_DIGIT = True 195 HEX_STRING_IS_INTEGER_TYPE = True 196 197 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 198 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 199 200 UNESCAPED_SEQUENCES = { 201 "\\0": "\0", 202 } 203 204 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 205 206 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 207 exp.Except: False, 208 exp.Intersect: False, 209 exp.Union: None, 210 } 211 212 def generate_values_aliases(self, expression: exp.Values) -> t.List[exp.Identifier]: 213 # Clickhouse allows VALUES to have an embedded structure e.g: 214 # VALUES('person String, place String', ('Noah', 'Paris'), ...) 215 # In this case, we don't want to qualify the columns 216 values = expression.expressions[0].expressions 217 218 structure = ( 219 values[0] 220 if (len(values) > 1 and values[0].is_string and isinstance(values[1], exp.Tuple)) 221 else None 222 ) 223 if structure: 224 # Split each column definition into the column name e.g: 225 # 'person String, place String' -> ['person', 'place'] 226 structure_coldefs = [coldef.strip() for coldef in structure.name.split(",")] 227 column_aliases = [ 228 exp.to_identifier(coldef.split(" ")[0]) for coldef in structure_coldefs 229 ] 230 else: 231 # Default column aliases in CH are "c1", "c2", etc. 232 column_aliases = [ 233 exp.to_identifier(f"c{i + 1}") for i in range(len(values[0].expressions)) 234 ] 235 236 return column_aliases 237 238 class Tokenizer(tokens.Tokenizer): 239 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 240 IDENTIFIERS = ['"', "`"] 241 IDENTIFIER_ESCAPES = ["\\"] 242 STRING_ESCAPES = ["'", "\\"] 243 BIT_STRINGS = [("0b", "")] 244 HEX_STRINGS = [("0x", ""), ("0X", "")] 245 HEREDOC_STRINGS = ["$"] 246 247 KEYWORDS = { 248 **tokens.Tokenizer.KEYWORDS, 249 ".:": TokenType.DOTCOLON, 250 "ATTACH": TokenType.COMMAND, 251 "DATE32": TokenType.DATE32, 252 "DATETIME64": TokenType.DATETIME64, 253 "DICTIONARY": TokenType.DICTIONARY, 254 "DYNAMIC": TokenType.DYNAMIC, 255 "ENUM8": TokenType.ENUM8, 256 "ENUM16": TokenType.ENUM16, 257 "FINAL": TokenType.FINAL, 258 "FIXEDSTRING": TokenType.FIXEDSTRING, 259 "FLOAT32": TokenType.FLOAT, 260 "FLOAT64": TokenType.DOUBLE, 261 "GLOBAL": TokenType.GLOBAL, 262 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 263 "MAP": TokenType.MAP, 264 "NESTED": TokenType.NESTED, 265 "NOTHING": TokenType.NOTHING, 266 "SAMPLE": TokenType.TABLE_SAMPLE, 267 "TUPLE": TokenType.STRUCT, 268 "UINT16": TokenType.USMALLINT, 269 "UINT32": TokenType.UINT, 270 "UINT64": TokenType.UBIGINT, 271 "UINT8": TokenType.UTINYINT, 272 "IPV4": TokenType.IPV4, 273 "IPV6": TokenType.IPV6, 274 "POINT": TokenType.POINT, 275 "RING": TokenType.RING, 276 "LINESTRING": TokenType.LINESTRING, 277 "MULTILINESTRING": TokenType.MULTILINESTRING, 278 "POLYGON": TokenType.POLYGON, 279 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 280 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 281 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 282 "SYSTEM": TokenType.COMMAND, 283 "PREWHERE": TokenType.PREWHERE, 284 } 285 KEYWORDS.pop("/*+") 286 287 SINGLE_TOKENS = { 288 **tokens.Tokenizer.SINGLE_TOKENS, 289 "$": TokenType.HEREDOC_STRING, 290 } 291 292 class Parser(parser.Parser): 293 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 294 # * select x from t1 union all select x from t2 limit 1; 295 # * select x from t1 union all (select x from t2 limit 1); 296 MODIFIERS_ATTACHED_TO_SET_OP = False 297 INTERVAL_SPANS = False 298 OPTIONAL_ALIAS_TOKEN_CTE = False 299 300 FUNCTIONS = { 301 **parser.Parser.FUNCTIONS, 302 "ANY": exp.AnyValue.from_arg_list, 303 "ARRAYSUM": exp.ArraySum.from_arg_list, 304 "COUNTIF": _build_count_if, 305 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 306 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 307 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 308 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 309 "DATE_FORMAT": _build_date_format, 310 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 311 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 312 "FORMATDATETIME": _build_date_format, 313 "JSONEXTRACTSTRING": build_json_extract_path( 314 exp.JSONExtractScalar, zero_based_indexing=False 315 ), 316 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 317 "MAP": parser.build_var_map, 318 "MATCH": exp.RegexpLike.from_arg_list, 319 "RANDCANONICAL": exp.Rand.from_arg_list, 320 "STR_TO_DATE": _build_str_to_date, 321 "TUPLE": exp.Struct.from_arg_list, 322 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 323 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 324 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 325 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 326 "UNIQ": exp.ApproxDistinct.from_arg_list, 327 "XOR": lambda args: exp.Xor(expressions=args), 328 "MD5": exp.MD5Digest.from_arg_list, 329 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 330 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 331 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 332 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 333 } 334 FUNCTIONS.pop("TRANSFORM") 335 336 AGG_FUNCTIONS = { 337 "count", 338 "min", 339 "max", 340 "sum", 341 "avg", 342 "any", 343 "stddevPop", 344 "stddevSamp", 345 "varPop", 346 "varSamp", 347 "corr", 348 "covarPop", 349 "covarSamp", 350 "entropy", 351 "exponentialMovingAverage", 352 "intervalLengthSum", 353 "kolmogorovSmirnovTest", 354 "mannWhitneyUTest", 355 "median", 356 "rankCorr", 357 "sumKahan", 358 "studentTTest", 359 "welchTTest", 360 "anyHeavy", 361 "anyLast", 362 "boundingRatio", 363 "first_value", 364 "last_value", 365 "argMin", 366 "argMax", 367 "avgWeighted", 368 "topK", 369 "topKWeighted", 370 "deltaSum", 371 "deltaSumTimestamp", 372 "groupArray", 373 "groupArrayLast", 374 "groupUniqArray", 375 "groupArrayInsertAt", 376 "groupArrayMovingAvg", 377 "groupArrayMovingSum", 378 "groupArraySample", 379 "groupBitAnd", 380 "groupBitOr", 381 "groupBitXor", 382 "groupBitmap", 383 "groupBitmapAnd", 384 "groupBitmapOr", 385 "groupBitmapXor", 386 "sumWithOverflow", 387 "sumMap", 388 "minMap", 389 "maxMap", 390 "skewSamp", 391 "skewPop", 392 "kurtSamp", 393 "kurtPop", 394 "uniq", 395 "uniqExact", 396 "uniqCombined", 397 "uniqCombined64", 398 "uniqHLL12", 399 "uniqTheta", 400 "quantile", 401 "quantiles", 402 "quantileExact", 403 "quantilesExact", 404 "quantileExactLow", 405 "quantilesExactLow", 406 "quantileExactHigh", 407 "quantilesExactHigh", 408 "quantileExactWeighted", 409 "quantilesExactWeighted", 410 "quantileTiming", 411 "quantilesTiming", 412 "quantileTimingWeighted", 413 "quantilesTimingWeighted", 414 "quantileDeterministic", 415 "quantilesDeterministic", 416 "quantileTDigest", 417 "quantilesTDigest", 418 "quantileTDigestWeighted", 419 "quantilesTDigestWeighted", 420 "quantileBFloat16", 421 "quantilesBFloat16", 422 "quantileBFloat16Weighted", 423 "quantilesBFloat16Weighted", 424 "simpleLinearRegression", 425 "stochasticLinearRegression", 426 "stochasticLogisticRegression", 427 "categoricalInformationValue", 428 "contingency", 429 "cramersV", 430 "cramersVBiasCorrected", 431 "theilsU", 432 "maxIntersections", 433 "maxIntersectionsPosition", 434 "meanZTest", 435 "quantileInterpolatedWeighted", 436 "quantilesInterpolatedWeighted", 437 "quantileGK", 438 "quantilesGK", 439 "sparkBar", 440 "sumCount", 441 "largestTriangleThreeBuckets", 442 "histogram", 443 "sequenceMatch", 444 "sequenceCount", 445 "windowFunnel", 446 "retention", 447 "uniqUpTo", 448 "sequenceNextNode", 449 "exponentialTimeDecayedAvg", 450 } 451 452 AGG_FUNCTIONS_SUFFIXES = [ 453 "If", 454 "Array", 455 "ArrayIf", 456 "Map", 457 "SimpleState", 458 "State", 459 "Merge", 460 "MergeState", 461 "ForEach", 462 "Distinct", 463 "OrDefault", 464 "OrNull", 465 "Resample", 466 "ArgMin", 467 "ArgMax", 468 ] 469 470 FUNC_TOKENS = { 471 *parser.Parser.FUNC_TOKENS, 472 TokenType.AND, 473 TokenType.OR, 474 TokenType.SET, 475 } 476 477 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 478 479 ID_VAR_TOKENS = { 480 *parser.Parser.ID_VAR_TOKENS, 481 TokenType.LIKE, 482 } 483 484 AGG_FUNC_MAPPING = ( 485 lambda functions, suffixes: { 486 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 487 } 488 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 489 490 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 491 492 FUNCTION_PARSERS = { 493 **parser.Parser.FUNCTION_PARSERS, 494 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 495 "QUANTILE": lambda self: self._parse_quantile(), 496 "MEDIAN": lambda self: self._parse_quantile(), 497 "COLUMNS": lambda self: self._parse_columns(), 498 } 499 500 FUNCTION_PARSERS.pop("MATCH") 501 502 PROPERTY_PARSERS = parser.Parser.PROPERTY_PARSERS.copy() 503 PROPERTY_PARSERS.pop("DYNAMIC") 504 505 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 506 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 507 508 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 509 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 510 511 RANGE_PARSERS = { 512 **parser.Parser.RANGE_PARSERS, 513 TokenType.GLOBAL: lambda self, this: self._parse_global_in(this), 514 } 515 516 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 517 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 518 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 519 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 520 521 JOIN_KINDS = { 522 *parser.Parser.JOIN_KINDS, 523 TokenType.ANY, 524 TokenType.ASOF, 525 TokenType.ARRAY, 526 } 527 528 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 529 TokenType.ANY, 530 TokenType.ARRAY, 531 TokenType.FINAL, 532 TokenType.FORMAT, 533 TokenType.SETTINGS, 534 } 535 536 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 537 TokenType.FORMAT, 538 } 539 540 LOG_DEFAULTS_TO_LN = True 541 542 QUERY_MODIFIER_PARSERS = { 543 **parser.Parser.QUERY_MODIFIER_PARSERS, 544 TokenType.SETTINGS: lambda self: ( 545 "settings", 546 self._advance() or self._parse_csv(self._parse_assignment), 547 ), 548 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 549 } 550 551 CONSTRAINT_PARSERS = { 552 **parser.Parser.CONSTRAINT_PARSERS, 553 "INDEX": lambda self: self._parse_index_constraint(), 554 "CODEC": lambda self: self._parse_compress(), 555 } 556 557 ALTER_PARSERS = { 558 **parser.Parser.ALTER_PARSERS, 559 "REPLACE": lambda self: self._parse_alter_table_replace(), 560 } 561 562 SCHEMA_UNNAMED_CONSTRAINTS = { 563 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 564 "INDEX", 565 } 566 567 PLACEHOLDER_PARSERS = { 568 **parser.Parser.PLACEHOLDER_PARSERS, 569 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 570 } 571 572 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 573 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 574 return self._parse_lambda() 575 576 def _parse_types( 577 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 578 ) -> t.Optional[exp.Expression]: 579 dtype = super()._parse_types( 580 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 581 ) 582 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 583 # Mark every type as non-nullable which is ClickHouse's default, unless it's 584 # already marked as nullable. This marker helps us transpile types from other 585 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 586 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 587 # fail in ClickHouse without the `Nullable` type constructor. 588 dtype.set("nullable", False) 589 590 return dtype 591 592 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 593 index = self._index 594 this = self._parse_bitwise() 595 if self._match(TokenType.FROM): 596 self._retreat(index) 597 return super()._parse_extract() 598 599 # We return Anonymous here because extract and regexpExtract have different semantics, 600 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 601 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 602 # 603 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 604 self._match(TokenType.COMMA) 605 return self.expression( 606 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 607 ) 608 609 def _parse_assignment(self) -> t.Optional[exp.Expression]: 610 this = super()._parse_assignment() 611 612 if self._match(TokenType.PLACEHOLDER): 613 return self.expression( 614 exp.If, 615 this=this, 616 true=self._parse_assignment(), 617 false=self._match(TokenType.COLON) and self._parse_assignment(), 618 ) 619 620 return this 621 622 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 623 """ 624 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 625 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 626 """ 627 index = self._index 628 629 this = self._parse_id_var() 630 self._match(TokenType.COLON) 631 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 632 self._match_text_seq("IDENTIFIER") and "Identifier" 633 ) 634 635 if not kind: 636 self._retreat(index) 637 return None 638 elif not self._match(TokenType.R_BRACE): 639 self.raise_error("Expecting }") 640 641 if isinstance(this, exp.Identifier) and not this.quoted: 642 this = exp.var(this.name) 643 644 return self.expression(exp.Placeholder, this=this, kind=kind) 645 646 def _parse_bracket( 647 self, this: t.Optional[exp.Expression] = None 648 ) -> t.Optional[exp.Expression]: 649 l_brace = self._match(TokenType.L_BRACE, advance=False) 650 bracket = super()._parse_bracket(this) 651 652 if l_brace and isinstance(bracket, exp.Struct): 653 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 654 for expression in bracket.expressions: 655 if not isinstance(expression, exp.PropertyEQ): 656 break 657 658 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 659 varmap.args["values"].append("expressions", expression.expression) 660 661 return varmap 662 663 return bracket 664 665 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 666 this = super()._parse_in(this) 667 this.set("is_global", is_global) 668 return this 669 670 def _parse_global_in(self, this: t.Optional[exp.Expression]) -> exp.Not | exp.In: 671 is_negated = self._match(TokenType.NOT) 672 this = self._match(TokenType.IN) and self._parse_in(this, is_global=True) 673 return self.expression(exp.Not, this=this) if is_negated else this 674 675 def _parse_table( 676 self, 677 schema: bool = False, 678 joins: bool = False, 679 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 680 parse_bracket: bool = False, 681 is_db_reference: bool = False, 682 parse_partition: bool = False, 683 ) -> t.Optional[exp.Expression]: 684 this = super()._parse_table( 685 schema=schema, 686 joins=joins, 687 alias_tokens=alias_tokens, 688 parse_bracket=parse_bracket, 689 is_db_reference=is_db_reference, 690 ) 691 692 if isinstance(this, exp.Table): 693 inner = this.this 694 alias = this.args.get("alias") 695 696 if isinstance(inner, exp.GenerateSeries) and alias and not alias.columns: 697 alias.set("columns", [exp.to_identifier("generate_series")]) 698 699 if self._match(TokenType.FINAL): 700 this = self.expression(exp.Final, this=this) 701 702 return this 703 704 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 705 return super()._parse_position(haystack_first=True) 706 707 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 708 def _parse_cte(self) -> t.Optional[exp.CTE]: 709 # WITH <identifier> AS <subquery expression> 710 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 711 712 if not cte: 713 # WITH <expression> AS <identifier> 714 cte = self.expression( 715 exp.CTE, 716 this=self._parse_assignment(), 717 alias=self._parse_table_alias(), 718 scalar=True, 719 ) 720 721 return cte 722 723 def _parse_join_parts( 724 self, 725 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 726 is_global = self._match(TokenType.GLOBAL) and self._prev 727 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 728 729 if kind_pre: 730 kind = self._match_set(self.JOIN_KINDS) and self._prev 731 side = self._match_set(self.JOIN_SIDES) and self._prev 732 return is_global, side, kind 733 734 return ( 735 is_global, 736 self._match_set(self.JOIN_SIDES) and self._prev, 737 self._match_set(self.JOIN_KINDS) and self._prev, 738 ) 739 740 def _parse_join( 741 self, skip_join_token: bool = False, parse_bracket: bool = False 742 ) -> t.Optional[exp.Join]: 743 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 744 if join: 745 join.set("global", join.args.pop("method", None)) 746 747 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 748 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 749 if join.kind == "ARRAY": 750 for table in join.find_all(exp.Table): 751 table.replace(table.to_column()) 752 753 return join 754 755 def _parse_function( 756 self, 757 functions: t.Optional[t.Dict[str, t.Callable]] = None, 758 anonymous: bool = False, 759 optional_parens: bool = True, 760 any_token: bool = False, 761 ) -> t.Optional[exp.Expression]: 762 expr = super()._parse_function( 763 functions=functions, 764 anonymous=anonymous, 765 optional_parens=optional_parens, 766 any_token=any_token, 767 ) 768 769 func = expr.this if isinstance(expr, exp.Window) else expr 770 771 # Aggregate functions can be split in 2 parts: <func_name><suffix> 772 parts = ( 773 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 774 ) 775 776 if parts: 777 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 778 params = self._parse_func_params(anon_func) 779 780 kwargs = { 781 "this": anon_func.this, 782 "expressions": anon_func.expressions, 783 } 784 if parts[1]: 785 exp_class: t.Type[exp.Expression] = ( 786 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 787 ) 788 else: 789 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 790 791 kwargs["exp_class"] = exp_class 792 if params: 793 kwargs["params"] = params 794 795 func = self.expression(**kwargs) 796 797 if isinstance(expr, exp.Window): 798 # The window's func was parsed as Anonymous in base parser, fix its 799 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 800 expr.set("this", func) 801 elif params: 802 # Params have blocked super()._parse_function() from parsing the following window 803 # (if that exists) as they're standing between the function call and the window spec 804 expr = self._parse_window(func) 805 else: 806 expr = func 807 808 return expr 809 810 def _parse_func_params( 811 self, this: t.Optional[exp.Func] = None 812 ) -> t.Optional[t.List[exp.Expression]]: 813 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 814 return self._parse_csv(self._parse_lambda) 815 816 if self._match(TokenType.L_PAREN): 817 params = self._parse_csv(self._parse_lambda) 818 self._match_r_paren(this) 819 return params 820 821 return None 822 823 def _parse_quantile(self) -> exp.Quantile: 824 this = self._parse_lambda() 825 params = self._parse_func_params() 826 if params: 827 return self.expression(exp.Quantile, this=params[0], quantile=this) 828 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 829 830 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 831 return super()._parse_wrapped_id_vars(optional=True) 832 833 def _parse_primary_key( 834 self, wrapped_optional: bool = False, in_props: bool = False 835 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 836 return super()._parse_primary_key( 837 wrapped_optional=wrapped_optional or in_props, in_props=in_props 838 ) 839 840 def _parse_on_property(self) -> t.Optional[exp.Expression]: 841 index = self._index 842 if self._match_text_seq("CLUSTER"): 843 this = self._parse_string() or self._parse_id_var() 844 if this: 845 return self.expression(exp.OnCluster, this=this) 846 else: 847 self._retreat(index) 848 return None 849 850 def _parse_index_constraint( 851 self, kind: t.Optional[str] = None 852 ) -> exp.IndexColumnConstraint: 853 # INDEX name1 expr TYPE type1(args) GRANULARITY value 854 this = self._parse_id_var() 855 expression = self._parse_assignment() 856 857 index_type = self._match_text_seq("TYPE") and ( 858 self._parse_function() or self._parse_var() 859 ) 860 861 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 862 863 return self.expression( 864 exp.IndexColumnConstraint, 865 this=this, 866 expression=expression, 867 index_type=index_type, 868 granularity=granularity, 869 ) 870 871 def _parse_partition(self) -> t.Optional[exp.Partition]: 872 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 873 if not self._match(TokenType.PARTITION): 874 return None 875 876 if self._match_text_seq("ID"): 877 # Corresponds to the PARTITION ID <string_value> syntax 878 expressions: t.List[exp.Expression] = [ 879 self.expression(exp.PartitionId, this=self._parse_string()) 880 ] 881 else: 882 expressions = self._parse_expressions() 883 884 return self.expression(exp.Partition, expressions=expressions) 885 886 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 887 partition = self._parse_partition() 888 889 if not partition or not self._match(TokenType.FROM): 890 return None 891 892 return self.expression( 893 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 894 ) 895 896 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 897 if not self._match_text_seq("PROJECTION"): 898 return None 899 900 return self.expression( 901 exp.ProjectionDef, 902 this=self._parse_id_var(), 903 expression=self._parse_wrapped(self._parse_statement), 904 ) 905 906 def _parse_constraint(self) -> t.Optional[exp.Expression]: 907 return super()._parse_constraint() or self._parse_projection_def() 908 909 def _parse_alias( 910 self, this: t.Optional[exp.Expression], explicit: bool = False 911 ) -> t.Optional[exp.Expression]: 912 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 913 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 914 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 915 return this 916 917 return super()._parse_alias(this=this, explicit=explicit) 918 919 def _parse_expression(self) -> t.Optional[exp.Expression]: 920 this = super()._parse_expression() 921 922 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 923 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 924 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 925 self._match(TokenType.R_PAREN) 926 927 return this 928 929 def _parse_columns(self) -> exp.Expression: 930 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 931 932 while self._next and self._match_text_seq(")", "APPLY", "("): 933 self._match(TokenType.R_PAREN) 934 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 935 return this 936 937 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 938 value = super()._parse_value(values=values) 939 if not value: 940 return None 941 942 # In Clickhouse "SELECT * FROM VALUES (1, 2, 3)" generates a table with a single column, in contrast 943 # to other dialects. For this case, we canonicalize the values into a tuple-of-tuples AST if it's not already one. 944 # In INSERT INTO statements the same clause actually references multiple columns (opposite semantics), 945 # but the final result is not altered by the extra parentheses. 946 # Note: Clickhouse allows VALUES([structure], value, ...) so the branch checks for the last expression 947 expressions = value.expressions 948 if values and not isinstance(expressions[-1], exp.Tuple): 949 value.set( 950 "expressions", 951 [self.expression(exp.Tuple, expressions=[expr]) for expr in expressions], 952 ) 953 954 return value 955 956 class Generator(generator.Generator): 957 QUERY_HINTS = False 958 STRUCT_DELIMITER = ("(", ")") 959 NVL2_SUPPORTED = False 960 TABLESAMPLE_REQUIRES_PARENS = False 961 TABLESAMPLE_SIZE_IS_ROWS = False 962 TABLESAMPLE_KEYWORDS = "SAMPLE" 963 LAST_DAY_SUPPORTS_DATE_PART = False 964 CAN_IMPLEMENT_ARRAY_ANY = True 965 SUPPORTS_TO_NUMBER = False 966 JOIN_HINTS = False 967 TABLE_HINTS = False 968 GROUPINGS_SEP = "" 969 SET_OP_MODIFIERS = False 970 ARRAY_SIZE_NAME = "LENGTH" 971 WRAP_DERIVED_VALUES = False 972 973 STRING_TYPE_MAPPING = { 974 exp.DataType.Type.BLOB: "String", 975 exp.DataType.Type.CHAR: "String", 976 exp.DataType.Type.LONGBLOB: "String", 977 exp.DataType.Type.LONGTEXT: "String", 978 exp.DataType.Type.MEDIUMBLOB: "String", 979 exp.DataType.Type.MEDIUMTEXT: "String", 980 exp.DataType.Type.TINYBLOB: "String", 981 exp.DataType.Type.TINYTEXT: "String", 982 exp.DataType.Type.TEXT: "String", 983 exp.DataType.Type.VARBINARY: "String", 984 exp.DataType.Type.VARCHAR: "String", 985 } 986 987 SUPPORTED_JSON_PATH_PARTS = { 988 exp.JSONPathKey, 989 exp.JSONPathRoot, 990 exp.JSONPathSubscript, 991 } 992 993 TYPE_MAPPING = { 994 **generator.Generator.TYPE_MAPPING, 995 **STRING_TYPE_MAPPING, 996 exp.DataType.Type.ARRAY: "Array", 997 exp.DataType.Type.BOOLEAN: "Bool", 998 exp.DataType.Type.BIGINT: "Int64", 999 exp.DataType.Type.DATE32: "Date32", 1000 exp.DataType.Type.DATETIME: "DateTime", 1001 exp.DataType.Type.DATETIME2: "DateTime", 1002 exp.DataType.Type.SMALLDATETIME: "DateTime", 1003 exp.DataType.Type.DATETIME64: "DateTime64", 1004 exp.DataType.Type.DECIMAL: "Decimal", 1005 exp.DataType.Type.DECIMAL32: "Decimal32", 1006 exp.DataType.Type.DECIMAL64: "Decimal64", 1007 exp.DataType.Type.DECIMAL128: "Decimal128", 1008 exp.DataType.Type.DECIMAL256: "Decimal256", 1009 exp.DataType.Type.TIMESTAMP: "DateTime", 1010 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 1011 exp.DataType.Type.DOUBLE: "Float64", 1012 exp.DataType.Type.ENUM: "Enum", 1013 exp.DataType.Type.ENUM8: "Enum8", 1014 exp.DataType.Type.ENUM16: "Enum16", 1015 exp.DataType.Type.FIXEDSTRING: "FixedString", 1016 exp.DataType.Type.FLOAT: "Float32", 1017 exp.DataType.Type.INT: "Int32", 1018 exp.DataType.Type.MEDIUMINT: "Int32", 1019 exp.DataType.Type.INT128: "Int128", 1020 exp.DataType.Type.INT256: "Int256", 1021 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 1022 exp.DataType.Type.MAP: "Map", 1023 exp.DataType.Type.NESTED: "Nested", 1024 exp.DataType.Type.NOTHING: "Nothing", 1025 exp.DataType.Type.SMALLINT: "Int16", 1026 exp.DataType.Type.STRUCT: "Tuple", 1027 exp.DataType.Type.TINYINT: "Int8", 1028 exp.DataType.Type.UBIGINT: "UInt64", 1029 exp.DataType.Type.UINT: "UInt32", 1030 exp.DataType.Type.UINT128: "UInt128", 1031 exp.DataType.Type.UINT256: "UInt256", 1032 exp.DataType.Type.USMALLINT: "UInt16", 1033 exp.DataType.Type.UTINYINT: "UInt8", 1034 exp.DataType.Type.IPV4: "IPv4", 1035 exp.DataType.Type.IPV6: "IPv6", 1036 exp.DataType.Type.POINT: "Point", 1037 exp.DataType.Type.RING: "Ring", 1038 exp.DataType.Type.LINESTRING: "LineString", 1039 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 1040 exp.DataType.Type.POLYGON: "Polygon", 1041 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 1042 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 1043 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 1044 exp.DataType.Type.DYNAMIC: "Dynamic", 1045 } 1046 1047 TRANSFORMS = { 1048 **generator.Generator.TRANSFORMS, 1049 exp.AnyValue: rename_func("any"), 1050 exp.ApproxDistinct: rename_func("uniq"), 1051 exp.ArrayConcat: rename_func("arrayConcat"), 1052 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 1053 exp.ArraySum: rename_func("arraySum"), 1054 exp.ArgMax: arg_max_or_min_no_count("argMax"), 1055 exp.ArgMin: arg_max_or_min_no_count("argMin"), 1056 exp.Array: inline_array_sql, 1057 exp.CastToStrType: rename_func("CAST"), 1058 exp.CountIf: rename_func("countIf"), 1059 exp.CompressColumnConstraint: lambda self, 1060 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 1061 exp.ComputedColumnConstraint: lambda self, 1062 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 1063 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 1064 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 1065 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 1066 exp.DateStrToDate: rename_func("toDate"), 1067 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 1068 exp.Explode: rename_func("arrayJoin"), 1069 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 1070 exp.IsNan: rename_func("isNaN"), 1071 exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}", 1072 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 1073 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 1074 exp.JSONPathKey: json_path_key_only_name, 1075 exp.JSONPathRoot: lambda *_: "", 1076 exp.Length: length_or_char_length_sql, 1077 exp.Map: _map_sql, 1078 exp.Median: rename_func("median"), 1079 exp.Nullif: rename_func("nullIf"), 1080 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1081 exp.Pivot: no_pivot_sql, 1082 exp.Quantile: _quantile_sql, 1083 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 1084 exp.Rand: rename_func("randCanonical"), 1085 exp.StartsWith: rename_func("startsWith"), 1086 exp.StrPosition: lambda self, e: strposition_sql( 1087 self, 1088 e, 1089 func_name="POSITION", 1090 supports_position=True, 1091 use_ansi_position=False, 1092 ), 1093 exp.TimeToStr: lambda self, e: self.func( 1094 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 1095 ), 1096 exp.TimeStrToTime: _timestrtotime_sql, 1097 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1098 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1099 exp.VarMap: _map_sql, 1100 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1101 exp.MD5Digest: rename_func("MD5"), 1102 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1103 exp.SHA: rename_func("SHA1"), 1104 exp.SHA2: sha256_sql, 1105 exp.UnixToTime: _unix_to_time_sql, 1106 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 1107 exp.Trim: lambda self, e: trim_sql(self, e, default_trim_type="BOTH"), 1108 exp.Variance: rename_func("varSamp"), 1109 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1110 exp.Stddev: rename_func("stddevSamp"), 1111 exp.Chr: rename_func("CHAR"), 1112 exp.Lag: lambda self, e: self.func( 1113 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1114 ), 1115 exp.Lead: lambda self, e: self.func( 1116 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1117 ), 1118 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1119 rename_func("editDistance") 1120 ), 1121 } 1122 1123 PROPERTIES_LOCATION = { 1124 **generator.Generator.PROPERTIES_LOCATION, 1125 exp.OnCluster: exp.Properties.Location.POST_NAME, 1126 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1127 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1128 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1129 } 1130 1131 # There's no list in docs, but it can be found in Clickhouse code 1132 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1133 ON_CLUSTER_TARGETS = { 1134 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1135 "DATABASE", 1136 "TABLE", 1137 "VIEW", 1138 "DICTIONARY", 1139 "INDEX", 1140 "FUNCTION", 1141 "NAMED COLLECTION", 1142 } 1143 1144 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1145 NON_NULLABLE_TYPES = { 1146 exp.DataType.Type.ARRAY, 1147 exp.DataType.Type.MAP, 1148 exp.DataType.Type.STRUCT, 1149 exp.DataType.Type.POINT, 1150 exp.DataType.Type.RING, 1151 exp.DataType.Type.LINESTRING, 1152 exp.DataType.Type.MULTILINESTRING, 1153 exp.DataType.Type.POLYGON, 1154 exp.DataType.Type.MULTIPOLYGON, 1155 } 1156 1157 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1158 strtodate_sql = self.function_fallback_sql(expression) 1159 1160 if not isinstance(expression.parent, exp.Cast): 1161 # StrToDate returns DATEs in other dialects (eg. postgres), so 1162 # this branch aims to improve the transpilation to clickhouse 1163 return self.cast_sql(exp.cast(expression, "DATE")) 1164 1165 return strtodate_sql 1166 1167 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1168 this = expression.this 1169 1170 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1171 return self.sql(this) 1172 1173 return super().cast_sql(expression, safe_prefix=safe_prefix) 1174 1175 def trycast_sql(self, expression: exp.TryCast) -> str: 1176 dtype = expression.to 1177 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1178 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1179 dtype.set("nullable", True) 1180 1181 return super().cast_sql(expression) 1182 1183 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1184 this = self.json_path_part(expression.this) 1185 return str(int(this) + 1) if is_int(this) else this 1186 1187 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1188 return f"AS {self.sql(expression, 'this')}" 1189 1190 def _any_to_has( 1191 self, 1192 expression: exp.EQ | exp.NEQ, 1193 default: t.Callable[[t.Any], str], 1194 prefix: str = "", 1195 ) -> str: 1196 if isinstance(expression.left, exp.Any): 1197 arr = expression.left 1198 this = expression.right 1199 elif isinstance(expression.right, exp.Any): 1200 arr = expression.right 1201 this = expression.left 1202 else: 1203 return default(expression) 1204 1205 return prefix + self.func("has", arr.this.unnest(), this) 1206 1207 def eq_sql(self, expression: exp.EQ) -> str: 1208 return self._any_to_has(expression, super().eq_sql) 1209 1210 def neq_sql(self, expression: exp.NEQ) -> str: 1211 return self._any_to_has(expression, super().neq_sql, "NOT ") 1212 1213 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1214 # Manually add a flag to make the search case-insensitive 1215 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1216 return self.func("match", expression.this, regex) 1217 1218 def datatype_sql(self, expression: exp.DataType) -> str: 1219 # String is the standard ClickHouse type, every other variant is just an alias. 1220 # Additionally, any supplied length parameter will be ignored. 1221 # 1222 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1223 if expression.this in self.STRING_TYPE_MAPPING: 1224 dtype = "String" 1225 else: 1226 dtype = super().datatype_sql(expression) 1227 1228 # This section changes the type to `Nullable(...)` if the following conditions hold: 1229 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1230 # and change their semantics 1231 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1232 # constraint: "Type of Map key must be a type, that can be represented by integer or 1233 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1234 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1235 parent = expression.parent 1236 nullable = expression.args.get("nullable") 1237 if nullable is True or ( 1238 nullable is None 1239 and not ( 1240 isinstance(parent, exp.DataType) 1241 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1242 and expression.index in (None, 0) 1243 ) 1244 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1245 ): 1246 dtype = f"Nullable({dtype})" 1247 1248 return dtype 1249 1250 def cte_sql(self, expression: exp.CTE) -> str: 1251 if expression.args.get("scalar"): 1252 this = self.sql(expression, "this") 1253 alias = self.sql(expression, "alias") 1254 return f"{this} AS {alias}" 1255 1256 return super().cte_sql(expression) 1257 1258 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1259 return super().after_limit_modifiers(expression) + [ 1260 ( 1261 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1262 if expression.args.get("settings") 1263 else "" 1264 ), 1265 ( 1266 self.seg("FORMAT ") + self.sql(expression, "format") 1267 if expression.args.get("format") 1268 else "" 1269 ), 1270 ] 1271 1272 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1273 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1274 1275 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1276 return f"ON CLUSTER {self.sql(expression, 'this')}" 1277 1278 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1279 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1280 exp.Properties.Location.POST_NAME 1281 ): 1282 this_name = self.sql( 1283 expression.this if isinstance(expression.this, exp.Schema) else expression, 1284 "this", 1285 ) 1286 this_properties = " ".join( 1287 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1288 ) 1289 this_schema = self.schema_columns_sql(expression.this) 1290 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1291 1292 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1293 1294 return super().createable_sql(expression, locations) 1295 1296 def create_sql(self, expression: exp.Create) -> str: 1297 # The comment property comes last in CTAS statements, i.e. after the query 1298 query = expression.expression 1299 if isinstance(query, exp.Query): 1300 comment_prop = expression.find(exp.SchemaCommentProperty) 1301 if comment_prop: 1302 comment_prop.pop() 1303 query.replace(exp.paren(query)) 1304 else: 1305 comment_prop = None 1306 1307 create_sql = super().create_sql(expression) 1308 1309 comment_sql = self.sql(comment_prop) 1310 comment_sql = f" {comment_sql}" if comment_sql else "" 1311 1312 return f"{create_sql}{comment_sql}" 1313 1314 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1315 this = self.indent(self.sql(expression, "this")) 1316 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1317 1318 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1319 this = self.sql(expression, "this") 1320 this = f" {this}" if this else "" 1321 expr = self.sql(expression, "expression") 1322 expr = f" {expr}" if expr else "" 1323 index_type = self.sql(expression, "index_type") 1324 index_type = f" TYPE {index_type}" if index_type else "" 1325 granularity = self.sql(expression, "granularity") 1326 granularity = f" GRANULARITY {granularity}" if granularity else "" 1327 1328 return f"INDEX{this}{expr}{index_type}{granularity}" 1329 1330 def partition_sql(self, expression: exp.Partition) -> str: 1331 return f"PARTITION {self.expressions(expression, flat=True)}" 1332 1333 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1334 return f"ID {self.sql(expression.this)}" 1335 1336 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1337 return ( 1338 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1339 ) 1340 1341 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1342 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1343 1344 def is_sql(self, expression: exp.Is) -> str: 1345 is_sql = super().is_sql(expression) 1346 1347 if isinstance(expression.parent, exp.Not): 1348 # value IS NOT NULL -> NOT (value IS NULL) 1349 is_sql = self.wrap(is_sql) 1350 1351 return is_sql 1352 1353 def in_sql(self, expression: exp.In) -> str: 1354 in_sql = super().in_sql(expression) 1355 1356 if isinstance(expression.parent, exp.Not) and expression.args.get("is_global"): 1357 in_sql = in_sql.replace("GLOBAL IN", "GLOBAL NOT IN", 1) 1358 1359 return in_sql 1360 1361 def not_sql(self, expression: exp.Not) -> str: 1362 if isinstance(expression.this, exp.In) and expression.this.args.get("is_global"): 1363 # let `GLOBAL IN` child interpose `NOT` 1364 return self.sql(expression, "this") 1365 1366 return super().not_sql(expression) 1367 1368 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1369 # If the VALUES clause contains tuples of expressions, we need to treat it 1370 # as a table since Clickhouse will automatically alias it as such. 1371 alias = expression.args.get("alias") 1372 1373 if alias and alias.args.get("columns") and expression.expressions: 1374 values = expression.expressions[0].expressions 1375 values_as_table = any(isinstance(value, exp.Tuple) for value in values) 1376 else: 1377 values_as_table = True 1378 1379 return super().values_sql(expression, values_as_table=values_as_table)
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether the name of the function should be preserved inside the node's metadata, can be useful for roundtripping deprecated vs new functions that share an AST node e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery
Whether number literals can include underscores for better readability
Whether hex strings such as x'CC' evaluate to integer or binary/blob type
Specifies the strategy according to which identifiers should be normalized.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse equivalent of CREATE SCHEMA is CREATE DATABASE.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
212 def generate_values_aliases(self, expression: exp.Values) -> t.List[exp.Identifier]: 213 # Clickhouse allows VALUES to have an embedded structure e.g: 214 # VALUES('person String, place String', ('Noah', 'Paris'), ...) 215 # In this case, we don't want to qualify the columns 216 values = expression.expressions[0].expressions 217 218 structure = ( 219 values[0] 220 if (len(values) > 1 and values[0].is_string and isinstance(values[1], exp.Tuple)) 221 else None 222 ) 223 if structure: 224 # Split each column definition into the column name e.g: 225 # 'person String, place String' -> ['person', 'place'] 226 structure_coldefs = [coldef.strip() for coldef in structure.name.split(",")] 227 column_aliases = [ 228 exp.to_identifier(coldef.split(" ")[0]) for coldef in structure_coldefs 229 ] 230 else: 231 # Default column aliases in CH are "c1", "c2", etc. 232 column_aliases = [ 233 exp.to_identifier(f"c{i + 1}") for i in range(len(values[0].expressions)) 234 ] 235 236 return column_aliases
238 class Tokenizer(tokens.Tokenizer): 239 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 240 IDENTIFIERS = ['"', "`"] 241 IDENTIFIER_ESCAPES = ["\\"] 242 STRING_ESCAPES = ["'", "\\"] 243 BIT_STRINGS = [("0b", "")] 244 HEX_STRINGS = [("0x", ""), ("0X", "")] 245 HEREDOC_STRINGS = ["$"] 246 247 KEYWORDS = { 248 **tokens.Tokenizer.KEYWORDS, 249 ".:": TokenType.DOTCOLON, 250 "ATTACH": TokenType.COMMAND, 251 "DATE32": TokenType.DATE32, 252 "DATETIME64": TokenType.DATETIME64, 253 "DICTIONARY": TokenType.DICTIONARY, 254 "DYNAMIC": TokenType.DYNAMIC, 255 "ENUM8": TokenType.ENUM8, 256 "ENUM16": TokenType.ENUM16, 257 "FINAL": TokenType.FINAL, 258 "FIXEDSTRING": TokenType.FIXEDSTRING, 259 "FLOAT32": TokenType.FLOAT, 260 "FLOAT64": TokenType.DOUBLE, 261 "GLOBAL": TokenType.GLOBAL, 262 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 263 "MAP": TokenType.MAP, 264 "NESTED": TokenType.NESTED, 265 "NOTHING": TokenType.NOTHING, 266 "SAMPLE": TokenType.TABLE_SAMPLE, 267 "TUPLE": TokenType.STRUCT, 268 "UINT16": TokenType.USMALLINT, 269 "UINT32": TokenType.UINT, 270 "UINT64": TokenType.UBIGINT, 271 "UINT8": TokenType.UTINYINT, 272 "IPV4": TokenType.IPV4, 273 "IPV6": TokenType.IPV6, 274 "POINT": TokenType.POINT, 275 "RING": TokenType.RING, 276 "LINESTRING": TokenType.LINESTRING, 277 "MULTILINESTRING": TokenType.MULTILINESTRING, 278 "POLYGON": TokenType.POLYGON, 279 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 280 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 281 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 282 "SYSTEM": TokenType.COMMAND, 283 "PREWHERE": TokenType.PREWHERE, 284 } 285 KEYWORDS.pop("/*+") 286 287 SINGLE_TOKENS = { 288 **tokens.Tokenizer.SINGLE_TOKENS, 289 "$": TokenType.HEREDOC_STRING, 290 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BYTE_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
292 class Parser(parser.Parser): 293 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 294 # * select x from t1 union all select x from t2 limit 1; 295 # * select x from t1 union all (select x from t2 limit 1); 296 MODIFIERS_ATTACHED_TO_SET_OP = False 297 INTERVAL_SPANS = False 298 OPTIONAL_ALIAS_TOKEN_CTE = False 299 300 FUNCTIONS = { 301 **parser.Parser.FUNCTIONS, 302 "ANY": exp.AnyValue.from_arg_list, 303 "ARRAYSUM": exp.ArraySum.from_arg_list, 304 "COUNTIF": _build_count_if, 305 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 306 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 307 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 308 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 309 "DATE_FORMAT": _build_date_format, 310 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 311 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 312 "FORMATDATETIME": _build_date_format, 313 "JSONEXTRACTSTRING": build_json_extract_path( 314 exp.JSONExtractScalar, zero_based_indexing=False 315 ), 316 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 317 "MAP": parser.build_var_map, 318 "MATCH": exp.RegexpLike.from_arg_list, 319 "RANDCANONICAL": exp.Rand.from_arg_list, 320 "STR_TO_DATE": _build_str_to_date, 321 "TUPLE": exp.Struct.from_arg_list, 322 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 323 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 324 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 325 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 326 "UNIQ": exp.ApproxDistinct.from_arg_list, 327 "XOR": lambda args: exp.Xor(expressions=args), 328 "MD5": exp.MD5Digest.from_arg_list, 329 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 330 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 331 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 332 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 333 } 334 FUNCTIONS.pop("TRANSFORM") 335 336 AGG_FUNCTIONS = { 337 "count", 338 "min", 339 "max", 340 "sum", 341 "avg", 342 "any", 343 "stddevPop", 344 "stddevSamp", 345 "varPop", 346 "varSamp", 347 "corr", 348 "covarPop", 349 "covarSamp", 350 "entropy", 351 "exponentialMovingAverage", 352 "intervalLengthSum", 353 "kolmogorovSmirnovTest", 354 "mannWhitneyUTest", 355 "median", 356 "rankCorr", 357 "sumKahan", 358 "studentTTest", 359 "welchTTest", 360 "anyHeavy", 361 "anyLast", 362 "boundingRatio", 363 "first_value", 364 "last_value", 365 "argMin", 366 "argMax", 367 "avgWeighted", 368 "topK", 369 "topKWeighted", 370 "deltaSum", 371 "deltaSumTimestamp", 372 "groupArray", 373 "groupArrayLast", 374 "groupUniqArray", 375 "groupArrayInsertAt", 376 "groupArrayMovingAvg", 377 "groupArrayMovingSum", 378 "groupArraySample", 379 "groupBitAnd", 380 "groupBitOr", 381 "groupBitXor", 382 "groupBitmap", 383 "groupBitmapAnd", 384 "groupBitmapOr", 385 "groupBitmapXor", 386 "sumWithOverflow", 387 "sumMap", 388 "minMap", 389 "maxMap", 390 "skewSamp", 391 "skewPop", 392 "kurtSamp", 393 "kurtPop", 394 "uniq", 395 "uniqExact", 396 "uniqCombined", 397 "uniqCombined64", 398 "uniqHLL12", 399 "uniqTheta", 400 "quantile", 401 "quantiles", 402 "quantileExact", 403 "quantilesExact", 404 "quantileExactLow", 405 "quantilesExactLow", 406 "quantileExactHigh", 407 "quantilesExactHigh", 408 "quantileExactWeighted", 409 "quantilesExactWeighted", 410 "quantileTiming", 411 "quantilesTiming", 412 "quantileTimingWeighted", 413 "quantilesTimingWeighted", 414 "quantileDeterministic", 415 "quantilesDeterministic", 416 "quantileTDigest", 417 "quantilesTDigest", 418 "quantileTDigestWeighted", 419 "quantilesTDigestWeighted", 420 "quantileBFloat16", 421 "quantilesBFloat16", 422 "quantileBFloat16Weighted", 423 "quantilesBFloat16Weighted", 424 "simpleLinearRegression", 425 "stochasticLinearRegression", 426 "stochasticLogisticRegression", 427 "categoricalInformationValue", 428 "contingency", 429 "cramersV", 430 "cramersVBiasCorrected", 431 "theilsU", 432 "maxIntersections", 433 "maxIntersectionsPosition", 434 "meanZTest", 435 "quantileInterpolatedWeighted", 436 "quantilesInterpolatedWeighted", 437 "quantileGK", 438 "quantilesGK", 439 "sparkBar", 440 "sumCount", 441 "largestTriangleThreeBuckets", 442 "histogram", 443 "sequenceMatch", 444 "sequenceCount", 445 "windowFunnel", 446 "retention", 447 "uniqUpTo", 448 "sequenceNextNode", 449 "exponentialTimeDecayedAvg", 450 } 451 452 AGG_FUNCTIONS_SUFFIXES = [ 453 "If", 454 "Array", 455 "ArrayIf", 456 "Map", 457 "SimpleState", 458 "State", 459 "Merge", 460 "MergeState", 461 "ForEach", 462 "Distinct", 463 "OrDefault", 464 "OrNull", 465 "Resample", 466 "ArgMin", 467 "ArgMax", 468 ] 469 470 FUNC_TOKENS = { 471 *parser.Parser.FUNC_TOKENS, 472 TokenType.AND, 473 TokenType.OR, 474 TokenType.SET, 475 } 476 477 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 478 479 ID_VAR_TOKENS = { 480 *parser.Parser.ID_VAR_TOKENS, 481 TokenType.LIKE, 482 } 483 484 AGG_FUNC_MAPPING = ( 485 lambda functions, suffixes: { 486 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 487 } 488 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 489 490 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 491 492 FUNCTION_PARSERS = { 493 **parser.Parser.FUNCTION_PARSERS, 494 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 495 "QUANTILE": lambda self: self._parse_quantile(), 496 "MEDIAN": lambda self: self._parse_quantile(), 497 "COLUMNS": lambda self: self._parse_columns(), 498 } 499 500 FUNCTION_PARSERS.pop("MATCH") 501 502 PROPERTY_PARSERS = parser.Parser.PROPERTY_PARSERS.copy() 503 PROPERTY_PARSERS.pop("DYNAMIC") 504 505 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 506 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 507 508 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 509 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 510 511 RANGE_PARSERS = { 512 **parser.Parser.RANGE_PARSERS, 513 TokenType.GLOBAL: lambda self, this: self._parse_global_in(this), 514 } 515 516 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 517 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 518 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 519 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 520 521 JOIN_KINDS = { 522 *parser.Parser.JOIN_KINDS, 523 TokenType.ANY, 524 TokenType.ASOF, 525 TokenType.ARRAY, 526 } 527 528 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 529 TokenType.ANY, 530 TokenType.ARRAY, 531 TokenType.FINAL, 532 TokenType.FORMAT, 533 TokenType.SETTINGS, 534 } 535 536 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 537 TokenType.FORMAT, 538 } 539 540 LOG_DEFAULTS_TO_LN = True 541 542 QUERY_MODIFIER_PARSERS = { 543 **parser.Parser.QUERY_MODIFIER_PARSERS, 544 TokenType.SETTINGS: lambda self: ( 545 "settings", 546 self._advance() or self._parse_csv(self._parse_assignment), 547 ), 548 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 549 } 550 551 CONSTRAINT_PARSERS = { 552 **parser.Parser.CONSTRAINT_PARSERS, 553 "INDEX": lambda self: self._parse_index_constraint(), 554 "CODEC": lambda self: self._parse_compress(), 555 } 556 557 ALTER_PARSERS = { 558 **parser.Parser.ALTER_PARSERS, 559 "REPLACE": lambda self: self._parse_alter_table_replace(), 560 } 561 562 SCHEMA_UNNAMED_CONSTRAINTS = { 563 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 564 "INDEX", 565 } 566 567 PLACEHOLDER_PARSERS = { 568 **parser.Parser.PLACEHOLDER_PARSERS, 569 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 570 } 571 572 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 573 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 574 return self._parse_lambda() 575 576 def _parse_types( 577 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 578 ) -> t.Optional[exp.Expression]: 579 dtype = super()._parse_types( 580 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 581 ) 582 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 583 # Mark every type as non-nullable which is ClickHouse's default, unless it's 584 # already marked as nullable. This marker helps us transpile types from other 585 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 586 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 587 # fail in ClickHouse without the `Nullable` type constructor. 588 dtype.set("nullable", False) 589 590 return dtype 591 592 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 593 index = self._index 594 this = self._parse_bitwise() 595 if self._match(TokenType.FROM): 596 self._retreat(index) 597 return super()._parse_extract() 598 599 # We return Anonymous here because extract and regexpExtract have different semantics, 600 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 601 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 602 # 603 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 604 self._match(TokenType.COMMA) 605 return self.expression( 606 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 607 ) 608 609 def _parse_assignment(self) -> t.Optional[exp.Expression]: 610 this = super()._parse_assignment() 611 612 if self._match(TokenType.PLACEHOLDER): 613 return self.expression( 614 exp.If, 615 this=this, 616 true=self._parse_assignment(), 617 false=self._match(TokenType.COLON) and self._parse_assignment(), 618 ) 619 620 return this 621 622 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 623 """ 624 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 625 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 626 """ 627 index = self._index 628 629 this = self._parse_id_var() 630 self._match(TokenType.COLON) 631 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 632 self._match_text_seq("IDENTIFIER") and "Identifier" 633 ) 634 635 if not kind: 636 self._retreat(index) 637 return None 638 elif not self._match(TokenType.R_BRACE): 639 self.raise_error("Expecting }") 640 641 if isinstance(this, exp.Identifier) and not this.quoted: 642 this = exp.var(this.name) 643 644 return self.expression(exp.Placeholder, this=this, kind=kind) 645 646 def _parse_bracket( 647 self, this: t.Optional[exp.Expression] = None 648 ) -> t.Optional[exp.Expression]: 649 l_brace = self._match(TokenType.L_BRACE, advance=False) 650 bracket = super()._parse_bracket(this) 651 652 if l_brace and isinstance(bracket, exp.Struct): 653 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 654 for expression in bracket.expressions: 655 if not isinstance(expression, exp.PropertyEQ): 656 break 657 658 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 659 varmap.args["values"].append("expressions", expression.expression) 660 661 return varmap 662 663 return bracket 664 665 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 666 this = super()._parse_in(this) 667 this.set("is_global", is_global) 668 return this 669 670 def _parse_global_in(self, this: t.Optional[exp.Expression]) -> exp.Not | exp.In: 671 is_negated = self._match(TokenType.NOT) 672 this = self._match(TokenType.IN) and self._parse_in(this, is_global=True) 673 return self.expression(exp.Not, this=this) if is_negated else this 674 675 def _parse_table( 676 self, 677 schema: bool = False, 678 joins: bool = False, 679 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 680 parse_bracket: bool = False, 681 is_db_reference: bool = False, 682 parse_partition: bool = False, 683 ) -> t.Optional[exp.Expression]: 684 this = super()._parse_table( 685 schema=schema, 686 joins=joins, 687 alias_tokens=alias_tokens, 688 parse_bracket=parse_bracket, 689 is_db_reference=is_db_reference, 690 ) 691 692 if isinstance(this, exp.Table): 693 inner = this.this 694 alias = this.args.get("alias") 695 696 if isinstance(inner, exp.GenerateSeries) and alias and not alias.columns: 697 alias.set("columns", [exp.to_identifier("generate_series")]) 698 699 if self._match(TokenType.FINAL): 700 this = self.expression(exp.Final, this=this) 701 702 return this 703 704 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 705 return super()._parse_position(haystack_first=True) 706 707 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 708 def _parse_cte(self) -> t.Optional[exp.CTE]: 709 # WITH <identifier> AS <subquery expression> 710 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 711 712 if not cte: 713 # WITH <expression> AS <identifier> 714 cte = self.expression( 715 exp.CTE, 716 this=self._parse_assignment(), 717 alias=self._parse_table_alias(), 718 scalar=True, 719 ) 720 721 return cte 722 723 def _parse_join_parts( 724 self, 725 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 726 is_global = self._match(TokenType.GLOBAL) and self._prev 727 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 728 729 if kind_pre: 730 kind = self._match_set(self.JOIN_KINDS) and self._prev 731 side = self._match_set(self.JOIN_SIDES) and self._prev 732 return is_global, side, kind 733 734 return ( 735 is_global, 736 self._match_set(self.JOIN_SIDES) and self._prev, 737 self._match_set(self.JOIN_KINDS) and self._prev, 738 ) 739 740 def _parse_join( 741 self, skip_join_token: bool = False, parse_bracket: bool = False 742 ) -> t.Optional[exp.Join]: 743 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 744 if join: 745 join.set("global", join.args.pop("method", None)) 746 747 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 748 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 749 if join.kind == "ARRAY": 750 for table in join.find_all(exp.Table): 751 table.replace(table.to_column()) 752 753 return join 754 755 def _parse_function( 756 self, 757 functions: t.Optional[t.Dict[str, t.Callable]] = None, 758 anonymous: bool = False, 759 optional_parens: bool = True, 760 any_token: bool = False, 761 ) -> t.Optional[exp.Expression]: 762 expr = super()._parse_function( 763 functions=functions, 764 anonymous=anonymous, 765 optional_parens=optional_parens, 766 any_token=any_token, 767 ) 768 769 func = expr.this if isinstance(expr, exp.Window) else expr 770 771 # Aggregate functions can be split in 2 parts: <func_name><suffix> 772 parts = ( 773 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 774 ) 775 776 if parts: 777 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 778 params = self._parse_func_params(anon_func) 779 780 kwargs = { 781 "this": anon_func.this, 782 "expressions": anon_func.expressions, 783 } 784 if parts[1]: 785 exp_class: t.Type[exp.Expression] = ( 786 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 787 ) 788 else: 789 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 790 791 kwargs["exp_class"] = exp_class 792 if params: 793 kwargs["params"] = params 794 795 func = self.expression(**kwargs) 796 797 if isinstance(expr, exp.Window): 798 # The window's func was parsed as Anonymous in base parser, fix its 799 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 800 expr.set("this", func) 801 elif params: 802 # Params have blocked super()._parse_function() from parsing the following window 803 # (if that exists) as they're standing between the function call and the window spec 804 expr = self._parse_window(func) 805 else: 806 expr = func 807 808 return expr 809 810 def _parse_func_params( 811 self, this: t.Optional[exp.Func] = None 812 ) -> t.Optional[t.List[exp.Expression]]: 813 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 814 return self._parse_csv(self._parse_lambda) 815 816 if self._match(TokenType.L_PAREN): 817 params = self._parse_csv(self._parse_lambda) 818 self._match_r_paren(this) 819 return params 820 821 return None 822 823 def _parse_quantile(self) -> exp.Quantile: 824 this = self._parse_lambda() 825 params = self._parse_func_params() 826 if params: 827 return self.expression(exp.Quantile, this=params[0], quantile=this) 828 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 829 830 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 831 return super()._parse_wrapped_id_vars(optional=True) 832 833 def _parse_primary_key( 834 self, wrapped_optional: bool = False, in_props: bool = False 835 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 836 return super()._parse_primary_key( 837 wrapped_optional=wrapped_optional or in_props, in_props=in_props 838 ) 839 840 def _parse_on_property(self) -> t.Optional[exp.Expression]: 841 index = self._index 842 if self._match_text_seq("CLUSTER"): 843 this = self._parse_string() or self._parse_id_var() 844 if this: 845 return self.expression(exp.OnCluster, this=this) 846 else: 847 self._retreat(index) 848 return None 849 850 def _parse_index_constraint( 851 self, kind: t.Optional[str] = None 852 ) -> exp.IndexColumnConstraint: 853 # INDEX name1 expr TYPE type1(args) GRANULARITY value 854 this = self._parse_id_var() 855 expression = self._parse_assignment() 856 857 index_type = self._match_text_seq("TYPE") and ( 858 self._parse_function() or self._parse_var() 859 ) 860 861 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 862 863 return self.expression( 864 exp.IndexColumnConstraint, 865 this=this, 866 expression=expression, 867 index_type=index_type, 868 granularity=granularity, 869 ) 870 871 def _parse_partition(self) -> t.Optional[exp.Partition]: 872 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 873 if not self._match(TokenType.PARTITION): 874 return None 875 876 if self._match_text_seq("ID"): 877 # Corresponds to the PARTITION ID <string_value> syntax 878 expressions: t.List[exp.Expression] = [ 879 self.expression(exp.PartitionId, this=self._parse_string()) 880 ] 881 else: 882 expressions = self._parse_expressions() 883 884 return self.expression(exp.Partition, expressions=expressions) 885 886 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 887 partition = self._parse_partition() 888 889 if not partition or not self._match(TokenType.FROM): 890 return None 891 892 return self.expression( 893 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 894 ) 895 896 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 897 if not self._match_text_seq("PROJECTION"): 898 return None 899 900 return self.expression( 901 exp.ProjectionDef, 902 this=self._parse_id_var(), 903 expression=self._parse_wrapped(self._parse_statement), 904 ) 905 906 def _parse_constraint(self) -> t.Optional[exp.Expression]: 907 return super()._parse_constraint() or self._parse_projection_def() 908 909 def _parse_alias( 910 self, this: t.Optional[exp.Expression], explicit: bool = False 911 ) -> t.Optional[exp.Expression]: 912 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 913 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 914 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 915 return this 916 917 return super()._parse_alias(this=this, explicit=explicit) 918 919 def _parse_expression(self) -> t.Optional[exp.Expression]: 920 this = super()._parse_expression() 921 922 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 923 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 924 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 925 self._match(TokenType.R_PAREN) 926 927 return this 928 929 def _parse_columns(self) -> exp.Expression: 930 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 931 932 while self._next and self._match_text_seq(")", "APPLY", "("): 933 self._match(TokenType.R_PAREN) 934 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 935 return this 936 937 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 938 value = super()._parse_value(values=values) 939 if not value: 940 return None 941 942 # In Clickhouse "SELECT * FROM VALUES (1, 2, 3)" generates a table with a single column, in contrast 943 # to other dialects. For this case, we canonicalize the values into a tuple-of-tuples AST if it's not already one. 944 # In INSERT INTO statements the same clause actually references multiple columns (opposite semantics), 945 # but the final result is not altered by the extra parentheses. 946 # Note: Clickhouse allows VALUES([structure], value, ...) so the branch checks for the last expression 947 expressions = value.expressions 948 if values and not isinstance(expressions[-1], exp.Tuple): 949 value.set( 950 "expressions", 951 [self.expression(exp.Tuple, expressions=[expr]) for expr in expressions], 952 ) 953 954 return value
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- ALTER_ALTER_PARSERS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- errors
- sql
956 class Generator(generator.Generator): 957 QUERY_HINTS = False 958 STRUCT_DELIMITER = ("(", ")") 959 NVL2_SUPPORTED = False 960 TABLESAMPLE_REQUIRES_PARENS = False 961 TABLESAMPLE_SIZE_IS_ROWS = False 962 TABLESAMPLE_KEYWORDS = "SAMPLE" 963 LAST_DAY_SUPPORTS_DATE_PART = False 964 CAN_IMPLEMENT_ARRAY_ANY = True 965 SUPPORTS_TO_NUMBER = False 966 JOIN_HINTS = False 967 TABLE_HINTS = False 968 GROUPINGS_SEP = "" 969 SET_OP_MODIFIERS = False 970 ARRAY_SIZE_NAME = "LENGTH" 971 WRAP_DERIVED_VALUES = False 972 973 STRING_TYPE_MAPPING = { 974 exp.DataType.Type.BLOB: "String", 975 exp.DataType.Type.CHAR: "String", 976 exp.DataType.Type.LONGBLOB: "String", 977 exp.DataType.Type.LONGTEXT: "String", 978 exp.DataType.Type.MEDIUMBLOB: "String", 979 exp.DataType.Type.MEDIUMTEXT: "String", 980 exp.DataType.Type.TINYBLOB: "String", 981 exp.DataType.Type.TINYTEXT: "String", 982 exp.DataType.Type.TEXT: "String", 983 exp.DataType.Type.VARBINARY: "String", 984 exp.DataType.Type.VARCHAR: "String", 985 } 986 987 SUPPORTED_JSON_PATH_PARTS = { 988 exp.JSONPathKey, 989 exp.JSONPathRoot, 990 exp.JSONPathSubscript, 991 } 992 993 TYPE_MAPPING = { 994 **generator.Generator.TYPE_MAPPING, 995 **STRING_TYPE_MAPPING, 996 exp.DataType.Type.ARRAY: "Array", 997 exp.DataType.Type.BOOLEAN: "Bool", 998 exp.DataType.Type.BIGINT: "Int64", 999 exp.DataType.Type.DATE32: "Date32", 1000 exp.DataType.Type.DATETIME: "DateTime", 1001 exp.DataType.Type.DATETIME2: "DateTime", 1002 exp.DataType.Type.SMALLDATETIME: "DateTime", 1003 exp.DataType.Type.DATETIME64: "DateTime64", 1004 exp.DataType.Type.DECIMAL: "Decimal", 1005 exp.DataType.Type.DECIMAL32: "Decimal32", 1006 exp.DataType.Type.DECIMAL64: "Decimal64", 1007 exp.DataType.Type.DECIMAL128: "Decimal128", 1008 exp.DataType.Type.DECIMAL256: "Decimal256", 1009 exp.DataType.Type.TIMESTAMP: "DateTime", 1010 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 1011 exp.DataType.Type.DOUBLE: "Float64", 1012 exp.DataType.Type.ENUM: "Enum", 1013 exp.DataType.Type.ENUM8: "Enum8", 1014 exp.DataType.Type.ENUM16: "Enum16", 1015 exp.DataType.Type.FIXEDSTRING: "FixedString", 1016 exp.DataType.Type.FLOAT: "Float32", 1017 exp.DataType.Type.INT: "Int32", 1018 exp.DataType.Type.MEDIUMINT: "Int32", 1019 exp.DataType.Type.INT128: "Int128", 1020 exp.DataType.Type.INT256: "Int256", 1021 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 1022 exp.DataType.Type.MAP: "Map", 1023 exp.DataType.Type.NESTED: "Nested", 1024 exp.DataType.Type.NOTHING: "Nothing", 1025 exp.DataType.Type.SMALLINT: "Int16", 1026 exp.DataType.Type.STRUCT: "Tuple", 1027 exp.DataType.Type.TINYINT: "Int8", 1028 exp.DataType.Type.UBIGINT: "UInt64", 1029 exp.DataType.Type.UINT: "UInt32", 1030 exp.DataType.Type.UINT128: "UInt128", 1031 exp.DataType.Type.UINT256: "UInt256", 1032 exp.DataType.Type.USMALLINT: "UInt16", 1033 exp.DataType.Type.UTINYINT: "UInt8", 1034 exp.DataType.Type.IPV4: "IPv4", 1035 exp.DataType.Type.IPV6: "IPv6", 1036 exp.DataType.Type.POINT: "Point", 1037 exp.DataType.Type.RING: "Ring", 1038 exp.DataType.Type.LINESTRING: "LineString", 1039 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 1040 exp.DataType.Type.POLYGON: "Polygon", 1041 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 1042 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 1043 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 1044 exp.DataType.Type.DYNAMIC: "Dynamic", 1045 } 1046 1047 TRANSFORMS = { 1048 **generator.Generator.TRANSFORMS, 1049 exp.AnyValue: rename_func("any"), 1050 exp.ApproxDistinct: rename_func("uniq"), 1051 exp.ArrayConcat: rename_func("arrayConcat"), 1052 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 1053 exp.ArraySum: rename_func("arraySum"), 1054 exp.ArgMax: arg_max_or_min_no_count("argMax"), 1055 exp.ArgMin: arg_max_or_min_no_count("argMin"), 1056 exp.Array: inline_array_sql, 1057 exp.CastToStrType: rename_func("CAST"), 1058 exp.CountIf: rename_func("countIf"), 1059 exp.CompressColumnConstraint: lambda self, 1060 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 1061 exp.ComputedColumnConstraint: lambda self, 1062 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 1063 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 1064 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 1065 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 1066 exp.DateStrToDate: rename_func("toDate"), 1067 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 1068 exp.Explode: rename_func("arrayJoin"), 1069 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 1070 exp.IsNan: rename_func("isNaN"), 1071 exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}", 1072 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 1073 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 1074 exp.JSONPathKey: json_path_key_only_name, 1075 exp.JSONPathRoot: lambda *_: "", 1076 exp.Length: length_or_char_length_sql, 1077 exp.Map: _map_sql, 1078 exp.Median: rename_func("median"), 1079 exp.Nullif: rename_func("nullIf"), 1080 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1081 exp.Pivot: no_pivot_sql, 1082 exp.Quantile: _quantile_sql, 1083 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 1084 exp.Rand: rename_func("randCanonical"), 1085 exp.StartsWith: rename_func("startsWith"), 1086 exp.StrPosition: lambda self, e: strposition_sql( 1087 self, 1088 e, 1089 func_name="POSITION", 1090 supports_position=True, 1091 use_ansi_position=False, 1092 ), 1093 exp.TimeToStr: lambda self, e: self.func( 1094 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 1095 ), 1096 exp.TimeStrToTime: _timestrtotime_sql, 1097 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1098 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1099 exp.VarMap: _map_sql, 1100 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1101 exp.MD5Digest: rename_func("MD5"), 1102 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1103 exp.SHA: rename_func("SHA1"), 1104 exp.SHA2: sha256_sql, 1105 exp.UnixToTime: _unix_to_time_sql, 1106 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 1107 exp.Trim: lambda self, e: trim_sql(self, e, default_trim_type="BOTH"), 1108 exp.Variance: rename_func("varSamp"), 1109 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1110 exp.Stddev: rename_func("stddevSamp"), 1111 exp.Chr: rename_func("CHAR"), 1112 exp.Lag: lambda self, e: self.func( 1113 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1114 ), 1115 exp.Lead: lambda self, e: self.func( 1116 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1117 ), 1118 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1119 rename_func("editDistance") 1120 ), 1121 } 1122 1123 PROPERTIES_LOCATION = { 1124 **generator.Generator.PROPERTIES_LOCATION, 1125 exp.OnCluster: exp.Properties.Location.POST_NAME, 1126 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1127 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1128 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1129 } 1130 1131 # There's no list in docs, but it can be found in Clickhouse code 1132 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1133 ON_CLUSTER_TARGETS = { 1134 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1135 "DATABASE", 1136 "TABLE", 1137 "VIEW", 1138 "DICTIONARY", 1139 "INDEX", 1140 "FUNCTION", 1141 "NAMED COLLECTION", 1142 } 1143 1144 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1145 NON_NULLABLE_TYPES = { 1146 exp.DataType.Type.ARRAY, 1147 exp.DataType.Type.MAP, 1148 exp.DataType.Type.STRUCT, 1149 exp.DataType.Type.POINT, 1150 exp.DataType.Type.RING, 1151 exp.DataType.Type.LINESTRING, 1152 exp.DataType.Type.MULTILINESTRING, 1153 exp.DataType.Type.POLYGON, 1154 exp.DataType.Type.MULTIPOLYGON, 1155 } 1156 1157 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1158 strtodate_sql = self.function_fallback_sql(expression) 1159 1160 if not isinstance(expression.parent, exp.Cast): 1161 # StrToDate returns DATEs in other dialects (eg. postgres), so 1162 # this branch aims to improve the transpilation to clickhouse 1163 return self.cast_sql(exp.cast(expression, "DATE")) 1164 1165 return strtodate_sql 1166 1167 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1168 this = expression.this 1169 1170 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1171 return self.sql(this) 1172 1173 return super().cast_sql(expression, safe_prefix=safe_prefix) 1174 1175 def trycast_sql(self, expression: exp.TryCast) -> str: 1176 dtype = expression.to 1177 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1178 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1179 dtype.set("nullable", True) 1180 1181 return super().cast_sql(expression) 1182 1183 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1184 this = self.json_path_part(expression.this) 1185 return str(int(this) + 1) if is_int(this) else this 1186 1187 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1188 return f"AS {self.sql(expression, 'this')}" 1189 1190 def _any_to_has( 1191 self, 1192 expression: exp.EQ | exp.NEQ, 1193 default: t.Callable[[t.Any], str], 1194 prefix: str = "", 1195 ) -> str: 1196 if isinstance(expression.left, exp.Any): 1197 arr = expression.left 1198 this = expression.right 1199 elif isinstance(expression.right, exp.Any): 1200 arr = expression.right 1201 this = expression.left 1202 else: 1203 return default(expression) 1204 1205 return prefix + self.func("has", arr.this.unnest(), this) 1206 1207 def eq_sql(self, expression: exp.EQ) -> str: 1208 return self._any_to_has(expression, super().eq_sql) 1209 1210 def neq_sql(self, expression: exp.NEQ) -> str: 1211 return self._any_to_has(expression, super().neq_sql, "NOT ") 1212 1213 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1214 # Manually add a flag to make the search case-insensitive 1215 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1216 return self.func("match", expression.this, regex) 1217 1218 def datatype_sql(self, expression: exp.DataType) -> str: 1219 # String is the standard ClickHouse type, every other variant is just an alias. 1220 # Additionally, any supplied length parameter will be ignored. 1221 # 1222 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1223 if expression.this in self.STRING_TYPE_MAPPING: 1224 dtype = "String" 1225 else: 1226 dtype = super().datatype_sql(expression) 1227 1228 # This section changes the type to `Nullable(...)` if the following conditions hold: 1229 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1230 # and change their semantics 1231 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1232 # constraint: "Type of Map key must be a type, that can be represented by integer or 1233 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1234 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1235 parent = expression.parent 1236 nullable = expression.args.get("nullable") 1237 if nullable is True or ( 1238 nullable is None 1239 and not ( 1240 isinstance(parent, exp.DataType) 1241 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1242 and expression.index in (None, 0) 1243 ) 1244 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1245 ): 1246 dtype = f"Nullable({dtype})" 1247 1248 return dtype 1249 1250 def cte_sql(self, expression: exp.CTE) -> str: 1251 if expression.args.get("scalar"): 1252 this = self.sql(expression, "this") 1253 alias = self.sql(expression, "alias") 1254 return f"{this} AS {alias}" 1255 1256 return super().cte_sql(expression) 1257 1258 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1259 return super().after_limit_modifiers(expression) + [ 1260 ( 1261 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1262 if expression.args.get("settings") 1263 else "" 1264 ), 1265 ( 1266 self.seg("FORMAT ") + self.sql(expression, "format") 1267 if expression.args.get("format") 1268 else "" 1269 ), 1270 ] 1271 1272 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1273 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1274 1275 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1276 return f"ON CLUSTER {self.sql(expression, 'this')}" 1277 1278 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1279 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1280 exp.Properties.Location.POST_NAME 1281 ): 1282 this_name = self.sql( 1283 expression.this if isinstance(expression.this, exp.Schema) else expression, 1284 "this", 1285 ) 1286 this_properties = " ".join( 1287 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1288 ) 1289 this_schema = self.schema_columns_sql(expression.this) 1290 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1291 1292 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1293 1294 return super().createable_sql(expression, locations) 1295 1296 def create_sql(self, expression: exp.Create) -> str: 1297 # The comment property comes last in CTAS statements, i.e. after the query 1298 query = expression.expression 1299 if isinstance(query, exp.Query): 1300 comment_prop = expression.find(exp.SchemaCommentProperty) 1301 if comment_prop: 1302 comment_prop.pop() 1303 query.replace(exp.paren(query)) 1304 else: 1305 comment_prop = None 1306 1307 create_sql = super().create_sql(expression) 1308 1309 comment_sql = self.sql(comment_prop) 1310 comment_sql = f" {comment_sql}" if comment_sql else "" 1311 1312 return f"{create_sql}{comment_sql}" 1313 1314 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1315 this = self.indent(self.sql(expression, "this")) 1316 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1317 1318 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1319 this = self.sql(expression, "this") 1320 this = f" {this}" if this else "" 1321 expr = self.sql(expression, "expression") 1322 expr = f" {expr}" if expr else "" 1323 index_type = self.sql(expression, "index_type") 1324 index_type = f" TYPE {index_type}" if index_type else "" 1325 granularity = self.sql(expression, "granularity") 1326 granularity = f" GRANULARITY {granularity}" if granularity else "" 1327 1328 return f"INDEX{this}{expr}{index_type}{granularity}" 1329 1330 def partition_sql(self, expression: exp.Partition) -> str: 1331 return f"PARTITION {self.expressions(expression, flat=True)}" 1332 1333 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1334 return f"ID {self.sql(expression.this)}" 1335 1336 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1337 return ( 1338 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1339 ) 1340 1341 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1342 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1343 1344 def is_sql(self, expression: exp.Is) -> str: 1345 is_sql = super().is_sql(expression) 1346 1347 if isinstance(expression.parent, exp.Not): 1348 # value IS NOT NULL -> NOT (value IS NULL) 1349 is_sql = self.wrap(is_sql) 1350 1351 return is_sql 1352 1353 def in_sql(self, expression: exp.In) -> str: 1354 in_sql = super().in_sql(expression) 1355 1356 if isinstance(expression.parent, exp.Not) and expression.args.get("is_global"): 1357 in_sql = in_sql.replace("GLOBAL IN", "GLOBAL NOT IN", 1) 1358 1359 return in_sql 1360 1361 def not_sql(self, expression: exp.Not) -> str: 1362 if isinstance(expression.this, exp.In) and expression.this.args.get("is_global"): 1363 # let `GLOBAL IN` child interpose `NOT` 1364 return self.sql(expression, "this") 1365 1366 return super().not_sql(expression) 1367 1368 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1369 # If the VALUES clause contains tuples of expressions, we need to treat it 1370 # as a table since Clickhouse will automatically alias it as such. 1371 alias = expression.args.get("alias") 1372 1373 if alias and alias.args.get("columns") and expression.expressions: 1374 values = expression.expressions[0].expressions 1375 values_as_table = any(isinstance(value, exp.Tuple) for value in values) 1376 else: 1377 values_as_table = True 1378 1379 return super().values_sql(expression, values_as_table=values_as_table)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1157 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1158 strtodate_sql = self.function_fallback_sql(expression) 1159 1160 if not isinstance(expression.parent, exp.Cast): 1161 # StrToDate returns DATEs in other dialects (eg. postgres), so 1162 # this branch aims to improve the transpilation to clickhouse 1163 return self.cast_sql(exp.cast(expression, "DATE")) 1164 1165 return strtodate_sql
1167 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1168 this = expression.this 1169 1170 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1171 return self.sql(this) 1172 1173 return super().cast_sql(expression, safe_prefix=safe_prefix)
1175 def trycast_sql(self, expression: exp.TryCast) -> str: 1176 dtype = expression.to 1177 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1178 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1179 dtype.set("nullable", True) 1180 1181 return super().cast_sql(expression)
1218 def datatype_sql(self, expression: exp.DataType) -> str: 1219 # String is the standard ClickHouse type, every other variant is just an alias. 1220 # Additionally, any supplied length parameter will be ignored. 1221 # 1222 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1223 if expression.this in self.STRING_TYPE_MAPPING: 1224 dtype = "String" 1225 else: 1226 dtype = super().datatype_sql(expression) 1227 1228 # This section changes the type to `Nullable(...)` if the following conditions hold: 1229 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1230 # and change their semantics 1231 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1232 # constraint: "Type of Map key must be a type, that can be represented by integer or 1233 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1234 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1235 parent = expression.parent 1236 nullable = expression.args.get("nullable") 1237 if nullable is True or ( 1238 nullable is None 1239 and not ( 1240 isinstance(parent, exp.DataType) 1241 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1242 and expression.index in (None, 0) 1243 ) 1244 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1245 ): 1246 dtype = f"Nullable({dtype})" 1247 1248 return dtype
1258 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1259 return super().after_limit_modifiers(expression) + [ 1260 ( 1261 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1262 if expression.args.get("settings") 1263 else "" 1264 ), 1265 ( 1266 self.seg("FORMAT ") + self.sql(expression, "format") 1267 if expression.args.get("format") 1268 else "" 1269 ), 1270 ]
1278 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1279 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1280 exp.Properties.Location.POST_NAME 1281 ): 1282 this_name = self.sql( 1283 expression.this if isinstance(expression.this, exp.Schema) else expression, 1284 "this", 1285 ) 1286 this_properties = " ".join( 1287 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1288 ) 1289 this_schema = self.schema_columns_sql(expression.this) 1290 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1291 1292 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1293 1294 return super().createable_sql(expression, locations)
1296 def create_sql(self, expression: exp.Create) -> str: 1297 # The comment property comes last in CTAS statements, i.e. after the query 1298 query = expression.expression 1299 if isinstance(query, exp.Query): 1300 comment_prop = expression.find(exp.SchemaCommentProperty) 1301 if comment_prop: 1302 comment_prop.pop() 1303 query.replace(exp.paren(query)) 1304 else: 1305 comment_prop = None 1306 1307 create_sql = super().create_sql(expression) 1308 1309 comment_sql = self.sql(comment_prop) 1310 comment_sql = f" {comment_sql}" if comment_sql else "" 1311 1312 return f"{create_sql}{comment_sql}"
1318 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1319 this = self.sql(expression, "this") 1320 this = f" {this}" if this else "" 1321 expr = self.sql(expression, "expression") 1322 expr = f" {expr}" if expr else "" 1323 index_type = self.sql(expression, "index_type") 1324 index_type = f" TYPE {index_type}" if index_type else "" 1325 granularity = self.sql(expression, "granularity") 1326 granularity = f" GRANULARITY {granularity}" if granularity else "" 1327 1328 return f"INDEX{this}{expr}{index_type}{granularity}"
1368 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1369 # If the VALUES clause contains tuples of expressions, we need to treat it 1370 # as a table since Clickhouse will automatically alias it as such. 1371 alias = expression.args.get("alias") 1372 1373 if alias and alias.args.get("columns") and expression.expressions: 1374 values = expression.expressions[0].expressions 1375 values_as_table = any(isinstance(value, exp.Tuple) for value in values) 1376 else: 1377 values_as_table = True 1378 1379 return super().values_sql(expression, values_as_table=values_as_table)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- SUPPORTS_UNIX_SECONDS
- PARSE_JSON_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- get_put_sql