sqlglot.dialects.clickhouse
1from __future__ import annotations 2import typing as t 3import datetime 4from sqlglot import exp, generator, parser, tokens 5from sqlglot.dialects.dialect import ( 6 Dialect, 7 NormalizationStrategy, 8 arg_max_or_min_no_count, 9 build_date_delta, 10 build_formatted_time, 11 inline_array_sql, 12 json_extract_segments, 13 json_path_key_only_name, 14 length_or_char_length_sql, 15 no_pivot_sql, 16 build_json_extract_path, 17 rename_func, 18 remove_from_array_using_filter, 19 sha256_sql, 20 strposition_sql, 21 var_map_sql, 22 timestamptrunc_sql, 23 unit_to_var, 24 trim_sql, 25) 26from sqlglot.generator import Generator 27from sqlglot.helper import is_int, seq_get 28from sqlglot.tokens import Token, TokenType 29from sqlglot.generator import unsupported_args 30 31DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd] 32 33 34def _build_date_format(args: t.List) -> exp.TimeToStr: 35 expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args) 36 37 timezone = seq_get(args, 2) 38 if timezone: 39 expr.set("zone", timezone) 40 41 return expr 42 43 44def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str: 45 scale = expression.args.get("scale") 46 timestamp = expression.this 47 48 if scale in (None, exp.UnixToTime.SECONDS): 49 return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 50 if scale == exp.UnixToTime.MILLIS: 51 return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 52 if scale == exp.UnixToTime.MICROS: 53 return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 54 if scale == exp.UnixToTime.NANOS: 55 return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 56 57 return self.func( 58 "fromUnixTimestamp", 59 exp.cast( 60 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 61 ), 62 ) 63 64 65def _lower_func(sql: str) -> str: 66 index = sql.index("(") 67 return sql[:index].lower() + sql[index:] 68 69 70def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str: 71 quantile = expression.args["quantile"] 72 args = f"({self.sql(expression, 'this')})" 73 74 if isinstance(quantile, exp.Array): 75 func = self.func("quantiles", *quantile) 76 else: 77 func = self.func("quantile", quantile) 78 79 return func + args 80 81 82def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: 83 if len(args) == 1: 84 return exp.CountIf(this=seq_get(args, 0)) 85 86 return exp.CombinedAggFunc(this="countIf", expressions=args) 87 88 89def _build_str_to_date(args: t.List) -> exp.Cast | exp.Anonymous: 90 if len(args) == 3: 91 return exp.Anonymous(this="STR_TO_DATE", expressions=args) 92 93 strtodate = exp.StrToDate.from_arg_list(args) 94 return exp.cast(strtodate, exp.DataType.build(exp.DataType.Type.DATETIME)) 95 96 97def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]: 98 def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str: 99 if not expression.unit: 100 return rename_func(name)(self, expression) 101 102 return self.func( 103 name, 104 unit_to_var(expression), 105 expression.expression, 106 expression.this, 107 expression.args.get("zone"), 108 ) 109 110 return _delta_sql 111 112 113def _timestrtotime_sql(self: ClickHouse.Generator, expression: exp.TimeStrToTime): 114 ts = expression.this 115 116 tz = expression.args.get("zone") 117 if tz and isinstance(ts, exp.Literal): 118 # Clickhouse will not accept timestamps that include a UTC offset, so we must remove them. 119 # The first step to removing is parsing the string with `datetime.datetime.fromisoformat`. 120 # 121 # In python <3.11, `fromisoformat()` can only parse timestamps of millisecond (3 digit) 122 # or microsecond (6 digit) precision. It will error if passed any other number of fractional 123 # digits, so we extract the fractional seconds and pad to 6 digits before parsing. 124 ts_string = ts.name.strip() 125 126 # separate [date and time] from [fractional seconds and UTC offset] 127 ts_parts = ts_string.split(".") 128 if len(ts_parts) == 2: 129 # separate fractional seconds and UTC offset 130 offset_sep = "+" if "+" in ts_parts[1] else "-" 131 ts_frac_parts = ts_parts[1].split(offset_sep) 132 num_frac_parts = len(ts_frac_parts) 133 134 # pad to 6 digits if fractional seconds present 135 ts_frac_parts[0] = ts_frac_parts[0].ljust(6, "0") 136 ts_string = "".join( 137 [ 138 ts_parts[0], # date and time 139 ".", 140 ts_frac_parts[0], # fractional seconds 141 offset_sep if num_frac_parts > 1 else "", 142 ts_frac_parts[1] if num_frac_parts > 1 else "", # utc offset (if present) 143 ] 144 ) 145 146 # return literal with no timezone, eg turn '2020-01-01 12:13:14-08:00' into '2020-01-01 12:13:14' 147 # this is because Clickhouse encodes the timezone as a data type parameter and throws an error if 148 # it's part of the timestamp string 149 ts_without_tz = ( 150 datetime.datetime.fromisoformat(ts_string).replace(tzinfo=None).isoformat(sep=" ") 151 ) 152 ts = exp.Literal.string(ts_without_tz) 153 154 # Non-nullable DateTime64 with microsecond precision 155 expressions = [exp.DataTypeParam(this=tz)] if tz else [] 156 datatype = exp.DataType.build( 157 exp.DataType.Type.DATETIME64, 158 expressions=[exp.DataTypeParam(this=exp.Literal.number(6)), *expressions], 159 nullable=False, 160 ) 161 162 return self.sql(exp.cast(ts, datatype, dialect=self.dialect)) 163 164 165def _map_sql(self: ClickHouse.Generator, expression: exp.Map | exp.VarMap) -> str: 166 if not (expression.parent and expression.parent.arg_key == "settings"): 167 return _lower_func(var_map_sql(self, expression)) 168 169 keys = expression.args.get("keys") 170 values = expression.args.get("values") 171 172 if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array): 173 self.unsupported("Cannot convert array columns into map.") 174 return "" 175 176 args = [] 177 for key, value in zip(keys.expressions, values.expressions): 178 args.append(f"{self.sql(key)}: {self.sql(value)}") 179 180 csv_args = ", ".join(args) 181 182 return f"{{{csv_args}}}" 183 184 185class ClickHouse(Dialect): 186 NORMALIZE_FUNCTIONS: bool | str = False 187 NULL_ORDERING = "nulls_are_last" 188 SUPPORTS_USER_DEFINED_TYPES = False 189 SAFE_DIVISION = True 190 LOG_BASE_FIRST: t.Optional[bool] = None 191 FORCE_EARLY_ALIAS_REF_EXPANSION = True 192 PRESERVE_ORIGINAL_NAMES = True 193 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 194 IDENTIFIERS_CAN_START_WITH_DIGIT = True 195 HEX_STRING_IS_INTEGER_TYPE = True 196 197 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 198 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 199 200 UNESCAPED_SEQUENCES = { 201 "\\0": "\0", 202 } 203 204 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 205 206 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 207 exp.Except: False, 208 exp.Intersect: False, 209 exp.Union: None, 210 } 211 212 def generate_values_aliases(self, expression: exp.Values) -> t.List[exp.Identifier]: 213 # Clickhouse allows VALUES to have an embedded structure e.g: 214 # VALUES('person String, place String', ('Noah', 'Paris'), ...) 215 # In this case, we don't want to qualify the columns 216 values = expression.expressions[0].expressions 217 218 structure = ( 219 values[0] 220 if (len(values) > 1 and values[0].is_string and isinstance(values[1], exp.Tuple)) 221 else None 222 ) 223 if structure: 224 # Split each column definition into the column name e.g: 225 # 'person String, place String' -> ['person', 'place'] 226 structure_coldefs = [coldef.strip() for coldef in structure.name.split(",")] 227 column_aliases = [ 228 exp.to_identifier(coldef.split(" ")[0]) for coldef in structure_coldefs 229 ] 230 else: 231 # Default column aliases in CH are "c1", "c2", etc. 232 column_aliases = [ 233 exp.to_identifier(f"c{i + 1}") for i in range(len(values[0].expressions)) 234 ] 235 236 return column_aliases 237 238 class Tokenizer(tokens.Tokenizer): 239 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 240 IDENTIFIERS = ['"', "`"] 241 IDENTIFIER_ESCAPES = ["\\"] 242 STRING_ESCAPES = ["'", "\\"] 243 BIT_STRINGS = [("0b", "")] 244 HEX_STRINGS = [("0x", ""), ("0X", "")] 245 HEREDOC_STRINGS = ["$"] 246 247 KEYWORDS = { 248 **tokens.Tokenizer.KEYWORDS, 249 ".:": TokenType.DOTCOLON, 250 "ATTACH": TokenType.COMMAND, 251 "DATE32": TokenType.DATE32, 252 "DATETIME64": TokenType.DATETIME64, 253 "DICTIONARY": TokenType.DICTIONARY, 254 "DYNAMIC": TokenType.DYNAMIC, 255 "ENUM8": TokenType.ENUM8, 256 "ENUM16": TokenType.ENUM16, 257 "EXCHANGE": TokenType.COMMAND, 258 "FINAL": TokenType.FINAL, 259 "FIXEDSTRING": TokenType.FIXEDSTRING, 260 "FLOAT32": TokenType.FLOAT, 261 "FLOAT64": TokenType.DOUBLE, 262 "GLOBAL": TokenType.GLOBAL, 263 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 264 "MAP": TokenType.MAP, 265 "NESTED": TokenType.NESTED, 266 "NOTHING": TokenType.NOTHING, 267 "SAMPLE": TokenType.TABLE_SAMPLE, 268 "TUPLE": TokenType.STRUCT, 269 "UINT16": TokenType.USMALLINT, 270 "UINT32": TokenType.UINT, 271 "UINT64": TokenType.UBIGINT, 272 "UINT8": TokenType.UTINYINT, 273 "IPV4": TokenType.IPV4, 274 "IPV6": TokenType.IPV6, 275 "POINT": TokenType.POINT, 276 "RING": TokenType.RING, 277 "LINESTRING": TokenType.LINESTRING, 278 "MULTILINESTRING": TokenType.MULTILINESTRING, 279 "POLYGON": TokenType.POLYGON, 280 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 281 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 282 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 283 "SYSTEM": TokenType.COMMAND, 284 "PREWHERE": TokenType.PREWHERE, 285 } 286 KEYWORDS.pop("/*+") 287 288 SINGLE_TOKENS = { 289 **tokens.Tokenizer.SINGLE_TOKENS, 290 "$": TokenType.HEREDOC_STRING, 291 } 292 293 class Parser(parser.Parser): 294 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 295 # * select x from t1 union all select x from t2 limit 1; 296 # * select x from t1 union all (select x from t2 limit 1); 297 MODIFIERS_ATTACHED_TO_SET_OP = False 298 INTERVAL_SPANS = False 299 OPTIONAL_ALIAS_TOKEN_CTE = False 300 JOINS_HAVE_EQUAL_PRECEDENCE = True 301 302 FUNCTIONS = { 303 **parser.Parser.FUNCTIONS, 304 "ANY": exp.AnyValue.from_arg_list, 305 "ARRAYSUM": exp.ArraySum.from_arg_list, 306 "COUNTIF": _build_count_if, 307 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 308 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 309 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 310 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 311 "DATE_FORMAT": _build_date_format, 312 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 313 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 314 "FORMATDATETIME": _build_date_format, 315 "JSONEXTRACTSTRING": build_json_extract_path( 316 exp.JSONExtractScalar, zero_based_indexing=False 317 ), 318 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 319 "MAP": parser.build_var_map, 320 "MATCH": exp.RegexpLike.from_arg_list, 321 "RANDCANONICAL": exp.Rand.from_arg_list, 322 "STR_TO_DATE": _build_str_to_date, 323 "TUPLE": exp.Struct.from_arg_list, 324 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 325 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 326 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 327 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 328 "UNIQ": exp.ApproxDistinct.from_arg_list, 329 "XOR": lambda args: exp.Xor(expressions=args), 330 "MD5": exp.MD5Digest.from_arg_list, 331 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 332 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 333 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 334 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 335 } 336 FUNCTIONS.pop("TRANSFORM") 337 338 AGG_FUNCTIONS = { 339 "count", 340 "min", 341 "max", 342 "sum", 343 "avg", 344 "any", 345 "stddevPop", 346 "stddevSamp", 347 "varPop", 348 "varSamp", 349 "corr", 350 "covarPop", 351 "covarSamp", 352 "entropy", 353 "exponentialMovingAverage", 354 "intervalLengthSum", 355 "kolmogorovSmirnovTest", 356 "mannWhitneyUTest", 357 "median", 358 "rankCorr", 359 "sumKahan", 360 "studentTTest", 361 "welchTTest", 362 "anyHeavy", 363 "anyLast", 364 "boundingRatio", 365 "first_value", 366 "last_value", 367 "argMin", 368 "argMax", 369 "avgWeighted", 370 "topK", 371 "topKWeighted", 372 "deltaSum", 373 "deltaSumTimestamp", 374 "groupArray", 375 "groupArrayLast", 376 "groupUniqArray", 377 "groupArrayInsertAt", 378 "groupArrayMovingAvg", 379 "groupArrayMovingSum", 380 "groupArraySample", 381 "groupBitAnd", 382 "groupBitOr", 383 "groupBitXor", 384 "groupBitmap", 385 "groupBitmapAnd", 386 "groupBitmapOr", 387 "groupBitmapXor", 388 "sumWithOverflow", 389 "sumMap", 390 "minMap", 391 "maxMap", 392 "skewSamp", 393 "skewPop", 394 "kurtSamp", 395 "kurtPop", 396 "uniq", 397 "uniqExact", 398 "uniqCombined", 399 "uniqCombined64", 400 "uniqHLL12", 401 "uniqTheta", 402 "quantile", 403 "quantiles", 404 "quantileExact", 405 "quantilesExact", 406 "quantileExactLow", 407 "quantilesExactLow", 408 "quantileExactHigh", 409 "quantilesExactHigh", 410 "quantileExactWeighted", 411 "quantilesExactWeighted", 412 "quantileTiming", 413 "quantilesTiming", 414 "quantileTimingWeighted", 415 "quantilesTimingWeighted", 416 "quantileDeterministic", 417 "quantilesDeterministic", 418 "quantileTDigest", 419 "quantilesTDigest", 420 "quantileTDigestWeighted", 421 "quantilesTDigestWeighted", 422 "quantileBFloat16", 423 "quantilesBFloat16", 424 "quantileBFloat16Weighted", 425 "quantilesBFloat16Weighted", 426 "simpleLinearRegression", 427 "stochasticLinearRegression", 428 "stochasticLogisticRegression", 429 "categoricalInformationValue", 430 "contingency", 431 "cramersV", 432 "cramersVBiasCorrected", 433 "theilsU", 434 "maxIntersections", 435 "maxIntersectionsPosition", 436 "meanZTest", 437 "quantileInterpolatedWeighted", 438 "quantilesInterpolatedWeighted", 439 "quantileGK", 440 "quantilesGK", 441 "sparkBar", 442 "sumCount", 443 "largestTriangleThreeBuckets", 444 "histogram", 445 "sequenceMatch", 446 "sequenceCount", 447 "windowFunnel", 448 "retention", 449 "uniqUpTo", 450 "sequenceNextNode", 451 "exponentialTimeDecayedAvg", 452 } 453 454 AGG_FUNCTIONS_SUFFIXES = [ 455 "If", 456 "Array", 457 "ArrayIf", 458 "Map", 459 "SimpleState", 460 "State", 461 "Merge", 462 "MergeState", 463 "ForEach", 464 "Distinct", 465 "OrDefault", 466 "OrNull", 467 "Resample", 468 "ArgMin", 469 "ArgMax", 470 ] 471 472 FUNC_TOKENS = { 473 *parser.Parser.FUNC_TOKENS, 474 TokenType.AND, 475 TokenType.OR, 476 TokenType.SET, 477 } 478 479 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 480 481 ID_VAR_TOKENS = { 482 *parser.Parser.ID_VAR_TOKENS, 483 TokenType.LIKE, 484 } 485 486 AGG_FUNC_MAPPING = ( 487 lambda functions, suffixes: { 488 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 489 } 490 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 491 492 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 493 494 FUNCTION_PARSERS = { 495 **parser.Parser.FUNCTION_PARSERS, 496 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 497 "QUANTILE": lambda self: self._parse_quantile(), 498 "MEDIAN": lambda self: self._parse_quantile(), 499 "COLUMNS": lambda self: self._parse_columns(), 500 } 501 502 FUNCTION_PARSERS.pop("MATCH") 503 504 PROPERTY_PARSERS = { 505 **parser.Parser.PROPERTY_PARSERS, 506 "ENGINE": lambda self: self._parse_engine_property(), 507 } 508 PROPERTY_PARSERS.pop("DYNAMIC") 509 510 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 511 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 512 513 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 514 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 515 516 RANGE_PARSERS = { 517 **parser.Parser.RANGE_PARSERS, 518 TokenType.GLOBAL: lambda self, this: self._parse_global_in(this), 519 } 520 521 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 522 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 523 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 524 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 525 526 JOIN_KINDS = { 527 *parser.Parser.JOIN_KINDS, 528 TokenType.ANY, 529 TokenType.ASOF, 530 TokenType.ARRAY, 531 } 532 533 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 534 TokenType.ANY, 535 TokenType.ARRAY, 536 TokenType.FINAL, 537 TokenType.FORMAT, 538 TokenType.SETTINGS, 539 } 540 541 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 542 TokenType.FORMAT, 543 } 544 545 LOG_DEFAULTS_TO_LN = True 546 547 QUERY_MODIFIER_PARSERS = { 548 **parser.Parser.QUERY_MODIFIER_PARSERS, 549 TokenType.SETTINGS: lambda self: ( 550 "settings", 551 self._advance() or self._parse_csv(self._parse_assignment), 552 ), 553 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 554 } 555 556 CONSTRAINT_PARSERS = { 557 **parser.Parser.CONSTRAINT_PARSERS, 558 "INDEX": lambda self: self._parse_index_constraint(), 559 "CODEC": lambda self: self._parse_compress(), 560 } 561 562 ALTER_PARSERS = { 563 **parser.Parser.ALTER_PARSERS, 564 "REPLACE": lambda self: self._parse_alter_table_replace(), 565 } 566 567 SCHEMA_UNNAMED_CONSTRAINTS = { 568 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 569 "INDEX", 570 } 571 572 PLACEHOLDER_PARSERS = { 573 **parser.Parser.PLACEHOLDER_PARSERS, 574 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 575 } 576 577 def _parse_engine_property(self) -> exp.EngineProperty: 578 self._match(TokenType.EQ) 579 return self.expression( 580 exp.EngineProperty, 581 this=self._parse_field(any_token=True, anonymous_func=True), 582 ) 583 584 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 585 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 586 return self._parse_lambda() 587 588 def _parse_types( 589 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 590 ) -> t.Optional[exp.Expression]: 591 dtype = super()._parse_types( 592 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 593 ) 594 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 595 # Mark every type as non-nullable which is ClickHouse's default, unless it's 596 # already marked as nullable. This marker helps us transpile types from other 597 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 598 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 599 # fail in ClickHouse without the `Nullable` type constructor. 600 dtype.set("nullable", False) 601 602 return dtype 603 604 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 605 index = self._index 606 this = self._parse_bitwise() 607 if self._match(TokenType.FROM): 608 self._retreat(index) 609 return super()._parse_extract() 610 611 # We return Anonymous here because extract and regexpExtract have different semantics, 612 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 613 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 614 # 615 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 616 self._match(TokenType.COMMA) 617 return self.expression( 618 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 619 ) 620 621 def _parse_assignment(self) -> t.Optional[exp.Expression]: 622 this = super()._parse_assignment() 623 624 if self._match(TokenType.PLACEHOLDER): 625 return self.expression( 626 exp.If, 627 this=this, 628 true=self._parse_assignment(), 629 false=self._match(TokenType.COLON) and self._parse_assignment(), 630 ) 631 632 return this 633 634 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 635 """ 636 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 637 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 638 """ 639 index = self._index 640 641 this = self._parse_id_var() 642 self._match(TokenType.COLON) 643 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 644 self._match_text_seq("IDENTIFIER") and "Identifier" 645 ) 646 647 if not kind: 648 self._retreat(index) 649 return None 650 elif not self._match(TokenType.R_BRACE): 651 self.raise_error("Expecting }") 652 653 if isinstance(this, exp.Identifier) and not this.quoted: 654 this = exp.var(this.name) 655 656 return self.expression(exp.Placeholder, this=this, kind=kind) 657 658 def _parse_bracket( 659 self, this: t.Optional[exp.Expression] = None 660 ) -> t.Optional[exp.Expression]: 661 l_brace = self._match(TokenType.L_BRACE, advance=False) 662 bracket = super()._parse_bracket(this) 663 664 if l_brace and isinstance(bracket, exp.Struct): 665 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 666 for expression in bracket.expressions: 667 if not isinstance(expression, exp.PropertyEQ): 668 break 669 670 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 671 varmap.args["values"].append("expressions", expression.expression) 672 673 return varmap 674 675 return bracket 676 677 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 678 this = super()._parse_in(this) 679 this.set("is_global", is_global) 680 return this 681 682 def _parse_global_in(self, this: t.Optional[exp.Expression]) -> exp.Not | exp.In: 683 is_negated = self._match(TokenType.NOT) 684 this = self._match(TokenType.IN) and self._parse_in(this, is_global=True) 685 return self.expression(exp.Not, this=this) if is_negated else this 686 687 def _parse_table( 688 self, 689 schema: bool = False, 690 joins: bool = False, 691 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 692 parse_bracket: bool = False, 693 is_db_reference: bool = False, 694 parse_partition: bool = False, 695 consume_pipe: bool = False, 696 ) -> t.Optional[exp.Expression]: 697 this = super()._parse_table( 698 schema=schema, 699 joins=joins, 700 alias_tokens=alias_tokens, 701 parse_bracket=parse_bracket, 702 is_db_reference=is_db_reference, 703 ) 704 705 if isinstance(this, exp.Table): 706 inner = this.this 707 alias = this.args.get("alias") 708 709 if isinstance(inner, exp.GenerateSeries) and alias and not alias.columns: 710 alias.set("columns", [exp.to_identifier("generate_series")]) 711 712 if self._match(TokenType.FINAL): 713 this = self.expression(exp.Final, this=this) 714 715 return this 716 717 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 718 return super()._parse_position(haystack_first=True) 719 720 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 721 def _parse_cte(self) -> t.Optional[exp.CTE]: 722 # WITH <identifier> AS <subquery expression> 723 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 724 725 if not cte: 726 # WITH <expression> AS <identifier> 727 cte = self.expression( 728 exp.CTE, 729 this=self._parse_assignment(), 730 alias=self._parse_table_alias(), 731 scalar=True, 732 ) 733 734 return cte 735 736 def _parse_join_parts( 737 self, 738 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 739 is_global = self._match(TokenType.GLOBAL) and self._prev 740 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 741 742 if kind_pre: 743 kind = self._match_set(self.JOIN_KINDS) and self._prev 744 side = self._match_set(self.JOIN_SIDES) and self._prev 745 return is_global, side, kind 746 747 return ( 748 is_global, 749 self._match_set(self.JOIN_SIDES) and self._prev, 750 self._match_set(self.JOIN_KINDS) and self._prev, 751 ) 752 753 def _parse_join( 754 self, skip_join_token: bool = False, parse_bracket: bool = False 755 ) -> t.Optional[exp.Join]: 756 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 757 if join: 758 join.set("global", join.args.pop("method", None)) 759 760 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 761 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 762 if join.kind == "ARRAY": 763 for table in join.find_all(exp.Table): 764 table.replace(table.to_column()) 765 766 return join 767 768 def _parse_function( 769 self, 770 functions: t.Optional[t.Dict[str, t.Callable]] = None, 771 anonymous: bool = False, 772 optional_parens: bool = True, 773 any_token: bool = False, 774 ) -> t.Optional[exp.Expression]: 775 expr = super()._parse_function( 776 functions=functions, 777 anonymous=anonymous, 778 optional_parens=optional_parens, 779 any_token=any_token, 780 ) 781 782 func = expr.this if isinstance(expr, exp.Window) else expr 783 784 # Aggregate functions can be split in 2 parts: <func_name><suffix> 785 parts = ( 786 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 787 ) 788 789 if parts: 790 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 791 params = self._parse_func_params(anon_func) 792 793 kwargs = { 794 "this": anon_func.this, 795 "expressions": anon_func.expressions, 796 } 797 if parts[1]: 798 exp_class: t.Type[exp.Expression] = ( 799 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 800 ) 801 else: 802 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 803 804 kwargs["exp_class"] = exp_class 805 if params: 806 kwargs["params"] = params 807 808 func = self.expression(**kwargs) 809 810 if isinstance(expr, exp.Window): 811 # The window's func was parsed as Anonymous in base parser, fix its 812 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 813 expr.set("this", func) 814 elif params: 815 # Params have blocked super()._parse_function() from parsing the following window 816 # (if that exists) as they're standing between the function call and the window spec 817 expr = self._parse_window(func) 818 else: 819 expr = func 820 821 return expr 822 823 def _parse_func_params( 824 self, this: t.Optional[exp.Func] = None 825 ) -> t.Optional[t.List[exp.Expression]]: 826 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 827 return self._parse_csv(self._parse_lambda) 828 829 if self._match(TokenType.L_PAREN): 830 params = self._parse_csv(self._parse_lambda) 831 self._match_r_paren(this) 832 return params 833 834 return None 835 836 def _parse_quantile(self) -> exp.Quantile: 837 this = self._parse_lambda() 838 params = self._parse_func_params() 839 if params: 840 return self.expression(exp.Quantile, this=params[0], quantile=this) 841 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 842 843 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 844 return super()._parse_wrapped_id_vars(optional=True) 845 846 def _parse_primary_key( 847 self, wrapped_optional: bool = False, in_props: bool = False 848 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 849 return super()._parse_primary_key( 850 wrapped_optional=wrapped_optional or in_props, in_props=in_props 851 ) 852 853 def _parse_on_property(self) -> t.Optional[exp.Expression]: 854 index = self._index 855 if self._match_text_seq("CLUSTER"): 856 this = self._parse_string() or self._parse_id_var() 857 if this: 858 return self.expression(exp.OnCluster, this=this) 859 else: 860 self._retreat(index) 861 return None 862 863 def _parse_index_constraint( 864 self, kind: t.Optional[str] = None 865 ) -> exp.IndexColumnConstraint: 866 # INDEX name1 expr TYPE type1(args) GRANULARITY value 867 this = self._parse_id_var() 868 expression = self._parse_assignment() 869 870 index_type = self._match_text_seq("TYPE") and ( 871 self._parse_function() or self._parse_var() 872 ) 873 874 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 875 876 return self.expression( 877 exp.IndexColumnConstraint, 878 this=this, 879 expression=expression, 880 index_type=index_type, 881 granularity=granularity, 882 ) 883 884 def _parse_partition(self) -> t.Optional[exp.Partition]: 885 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 886 if not self._match(TokenType.PARTITION): 887 return None 888 889 if self._match_text_seq("ID"): 890 # Corresponds to the PARTITION ID <string_value> syntax 891 expressions: t.List[exp.Expression] = [ 892 self.expression(exp.PartitionId, this=self._parse_string()) 893 ] 894 else: 895 expressions = self._parse_expressions() 896 897 return self.expression(exp.Partition, expressions=expressions) 898 899 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 900 partition = self._parse_partition() 901 902 if not partition or not self._match(TokenType.FROM): 903 return None 904 905 return self.expression( 906 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 907 ) 908 909 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 910 if not self._match_text_seq("PROJECTION"): 911 return None 912 913 return self.expression( 914 exp.ProjectionDef, 915 this=self._parse_id_var(), 916 expression=self._parse_wrapped(self._parse_statement), 917 ) 918 919 def _parse_constraint(self) -> t.Optional[exp.Expression]: 920 return super()._parse_constraint() or self._parse_projection_def() 921 922 def _parse_alias( 923 self, this: t.Optional[exp.Expression], explicit: bool = False 924 ) -> t.Optional[exp.Expression]: 925 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 926 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 927 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 928 return this 929 930 return super()._parse_alias(this=this, explicit=explicit) 931 932 def _parse_expression(self) -> t.Optional[exp.Expression]: 933 this = super()._parse_expression() 934 935 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 936 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 937 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 938 self._match(TokenType.R_PAREN) 939 940 return this 941 942 def _parse_columns(self) -> exp.Expression: 943 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 944 945 while self._next and self._match_text_seq(")", "APPLY", "("): 946 self._match(TokenType.R_PAREN) 947 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 948 return this 949 950 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 951 value = super()._parse_value(values=values) 952 if not value: 953 return None 954 955 # In Clickhouse "SELECT * FROM VALUES (1, 2, 3)" generates a table with a single column, in contrast 956 # to other dialects. For this case, we canonicalize the values into a tuple-of-tuples AST if it's not already one. 957 # In INSERT INTO statements the same clause actually references multiple columns (opposite semantics), 958 # but the final result is not altered by the extra parentheses. 959 # Note: Clickhouse allows VALUES([structure], value, ...) so the branch checks for the last expression 960 expressions = value.expressions 961 if values and not isinstance(expressions[-1], exp.Tuple): 962 value.set( 963 "expressions", 964 [self.expression(exp.Tuple, expressions=[expr]) for expr in expressions], 965 ) 966 967 return value 968 969 class Generator(generator.Generator): 970 QUERY_HINTS = False 971 STRUCT_DELIMITER = ("(", ")") 972 NVL2_SUPPORTED = False 973 TABLESAMPLE_REQUIRES_PARENS = False 974 TABLESAMPLE_SIZE_IS_ROWS = False 975 TABLESAMPLE_KEYWORDS = "SAMPLE" 976 LAST_DAY_SUPPORTS_DATE_PART = False 977 CAN_IMPLEMENT_ARRAY_ANY = True 978 SUPPORTS_TO_NUMBER = False 979 JOIN_HINTS = False 980 TABLE_HINTS = False 981 GROUPINGS_SEP = "" 982 SET_OP_MODIFIERS = False 983 ARRAY_SIZE_NAME = "LENGTH" 984 WRAP_DERIVED_VALUES = False 985 986 STRING_TYPE_MAPPING = { 987 exp.DataType.Type.BLOB: "String", 988 exp.DataType.Type.CHAR: "String", 989 exp.DataType.Type.LONGBLOB: "String", 990 exp.DataType.Type.LONGTEXT: "String", 991 exp.DataType.Type.MEDIUMBLOB: "String", 992 exp.DataType.Type.MEDIUMTEXT: "String", 993 exp.DataType.Type.TINYBLOB: "String", 994 exp.DataType.Type.TINYTEXT: "String", 995 exp.DataType.Type.TEXT: "String", 996 exp.DataType.Type.VARBINARY: "String", 997 exp.DataType.Type.VARCHAR: "String", 998 } 999 1000 SUPPORTED_JSON_PATH_PARTS = { 1001 exp.JSONPathKey, 1002 exp.JSONPathRoot, 1003 exp.JSONPathSubscript, 1004 } 1005 1006 TYPE_MAPPING = { 1007 **generator.Generator.TYPE_MAPPING, 1008 **STRING_TYPE_MAPPING, 1009 exp.DataType.Type.ARRAY: "Array", 1010 exp.DataType.Type.BOOLEAN: "Bool", 1011 exp.DataType.Type.BIGINT: "Int64", 1012 exp.DataType.Type.DATE32: "Date32", 1013 exp.DataType.Type.DATETIME: "DateTime", 1014 exp.DataType.Type.DATETIME2: "DateTime", 1015 exp.DataType.Type.SMALLDATETIME: "DateTime", 1016 exp.DataType.Type.DATETIME64: "DateTime64", 1017 exp.DataType.Type.DECIMAL: "Decimal", 1018 exp.DataType.Type.DECIMAL32: "Decimal32", 1019 exp.DataType.Type.DECIMAL64: "Decimal64", 1020 exp.DataType.Type.DECIMAL128: "Decimal128", 1021 exp.DataType.Type.DECIMAL256: "Decimal256", 1022 exp.DataType.Type.TIMESTAMP: "DateTime", 1023 exp.DataType.Type.TIMESTAMPNTZ: "DateTime", 1024 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 1025 exp.DataType.Type.DOUBLE: "Float64", 1026 exp.DataType.Type.ENUM: "Enum", 1027 exp.DataType.Type.ENUM8: "Enum8", 1028 exp.DataType.Type.ENUM16: "Enum16", 1029 exp.DataType.Type.FIXEDSTRING: "FixedString", 1030 exp.DataType.Type.FLOAT: "Float32", 1031 exp.DataType.Type.INT: "Int32", 1032 exp.DataType.Type.MEDIUMINT: "Int32", 1033 exp.DataType.Type.INT128: "Int128", 1034 exp.DataType.Type.INT256: "Int256", 1035 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 1036 exp.DataType.Type.MAP: "Map", 1037 exp.DataType.Type.NESTED: "Nested", 1038 exp.DataType.Type.NOTHING: "Nothing", 1039 exp.DataType.Type.SMALLINT: "Int16", 1040 exp.DataType.Type.STRUCT: "Tuple", 1041 exp.DataType.Type.TINYINT: "Int8", 1042 exp.DataType.Type.UBIGINT: "UInt64", 1043 exp.DataType.Type.UINT: "UInt32", 1044 exp.DataType.Type.UINT128: "UInt128", 1045 exp.DataType.Type.UINT256: "UInt256", 1046 exp.DataType.Type.USMALLINT: "UInt16", 1047 exp.DataType.Type.UTINYINT: "UInt8", 1048 exp.DataType.Type.IPV4: "IPv4", 1049 exp.DataType.Type.IPV6: "IPv6", 1050 exp.DataType.Type.POINT: "Point", 1051 exp.DataType.Type.RING: "Ring", 1052 exp.DataType.Type.LINESTRING: "LineString", 1053 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 1054 exp.DataType.Type.POLYGON: "Polygon", 1055 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 1056 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 1057 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 1058 exp.DataType.Type.DYNAMIC: "Dynamic", 1059 } 1060 1061 TRANSFORMS = { 1062 **generator.Generator.TRANSFORMS, 1063 exp.AnyValue: rename_func("any"), 1064 exp.ApproxDistinct: rename_func("uniq"), 1065 exp.ArrayConcat: rename_func("arrayConcat"), 1066 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 1067 exp.ArrayRemove: remove_from_array_using_filter, 1068 exp.ArraySum: rename_func("arraySum"), 1069 exp.ArgMax: arg_max_or_min_no_count("argMax"), 1070 exp.ArgMin: arg_max_or_min_no_count("argMin"), 1071 exp.Array: inline_array_sql, 1072 exp.CastToStrType: rename_func("CAST"), 1073 exp.CountIf: rename_func("countIf"), 1074 exp.CompressColumnConstraint: lambda self, 1075 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 1076 exp.ComputedColumnConstraint: lambda self, 1077 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 1078 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 1079 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 1080 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 1081 exp.DateStrToDate: rename_func("toDate"), 1082 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 1083 exp.Explode: rename_func("arrayJoin"), 1084 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 1085 exp.IsNan: rename_func("isNaN"), 1086 exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}", 1087 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 1088 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 1089 exp.JSONPathKey: json_path_key_only_name, 1090 exp.JSONPathRoot: lambda *_: "", 1091 exp.Length: length_or_char_length_sql, 1092 exp.Map: _map_sql, 1093 exp.Median: rename_func("median"), 1094 exp.Nullif: rename_func("nullIf"), 1095 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1096 exp.Pivot: no_pivot_sql, 1097 exp.Quantile: _quantile_sql, 1098 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 1099 exp.Rand: rename_func("randCanonical"), 1100 exp.StartsWith: rename_func("startsWith"), 1101 exp.EndsWith: rename_func("endsWith"), 1102 exp.StrPosition: lambda self, e: strposition_sql( 1103 self, 1104 e, 1105 func_name="POSITION", 1106 supports_position=True, 1107 use_ansi_position=False, 1108 ), 1109 exp.TimeToStr: lambda self, e: self.func( 1110 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 1111 ), 1112 exp.TimeStrToTime: _timestrtotime_sql, 1113 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1114 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1115 exp.VarMap: _map_sql, 1116 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1117 exp.MD5Digest: rename_func("MD5"), 1118 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1119 exp.SHA: rename_func("SHA1"), 1120 exp.SHA2: sha256_sql, 1121 exp.UnixToTime: _unix_to_time_sql, 1122 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 1123 exp.Trim: lambda self, e: trim_sql(self, e, default_trim_type="BOTH"), 1124 exp.Variance: rename_func("varSamp"), 1125 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1126 exp.Stddev: rename_func("stddevSamp"), 1127 exp.Chr: rename_func("CHAR"), 1128 exp.Lag: lambda self, e: self.func( 1129 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1130 ), 1131 exp.Lead: lambda self, e: self.func( 1132 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1133 ), 1134 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1135 rename_func("editDistance") 1136 ), 1137 } 1138 1139 PROPERTIES_LOCATION = { 1140 **generator.Generator.PROPERTIES_LOCATION, 1141 exp.OnCluster: exp.Properties.Location.POST_NAME, 1142 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1143 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1144 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1145 } 1146 1147 # There's no list in docs, but it can be found in Clickhouse code 1148 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1149 ON_CLUSTER_TARGETS = { 1150 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1151 "DATABASE", 1152 "TABLE", 1153 "VIEW", 1154 "DICTIONARY", 1155 "INDEX", 1156 "FUNCTION", 1157 "NAMED COLLECTION", 1158 } 1159 1160 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1161 NON_NULLABLE_TYPES = { 1162 exp.DataType.Type.ARRAY, 1163 exp.DataType.Type.MAP, 1164 exp.DataType.Type.STRUCT, 1165 exp.DataType.Type.POINT, 1166 exp.DataType.Type.RING, 1167 exp.DataType.Type.LINESTRING, 1168 exp.DataType.Type.MULTILINESTRING, 1169 exp.DataType.Type.POLYGON, 1170 exp.DataType.Type.MULTIPOLYGON, 1171 } 1172 1173 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1174 strtodate_sql = self.function_fallback_sql(expression) 1175 1176 if not isinstance(expression.parent, exp.Cast): 1177 # StrToDate returns DATEs in other dialects (eg. postgres), so 1178 # this branch aims to improve the transpilation to clickhouse 1179 return self.cast_sql(exp.cast(expression, "DATE")) 1180 1181 return strtodate_sql 1182 1183 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1184 this = expression.this 1185 1186 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1187 return self.sql(this) 1188 1189 return super().cast_sql(expression, safe_prefix=safe_prefix) 1190 1191 def trycast_sql(self, expression: exp.TryCast) -> str: 1192 dtype = expression.to 1193 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1194 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1195 dtype.set("nullable", True) 1196 1197 return super().cast_sql(expression) 1198 1199 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1200 this = self.json_path_part(expression.this) 1201 return str(int(this) + 1) if is_int(this) else this 1202 1203 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1204 return f"AS {self.sql(expression, 'this')}" 1205 1206 def _any_to_has( 1207 self, 1208 expression: exp.EQ | exp.NEQ, 1209 default: t.Callable[[t.Any], str], 1210 prefix: str = "", 1211 ) -> str: 1212 if isinstance(expression.left, exp.Any): 1213 arr = expression.left 1214 this = expression.right 1215 elif isinstance(expression.right, exp.Any): 1216 arr = expression.right 1217 this = expression.left 1218 else: 1219 return default(expression) 1220 1221 return prefix + self.func("has", arr.this.unnest(), this) 1222 1223 def eq_sql(self, expression: exp.EQ) -> str: 1224 return self._any_to_has(expression, super().eq_sql) 1225 1226 def neq_sql(self, expression: exp.NEQ) -> str: 1227 return self._any_to_has(expression, super().neq_sql, "NOT ") 1228 1229 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1230 # Manually add a flag to make the search case-insensitive 1231 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1232 return self.func("match", expression.this, regex) 1233 1234 def datatype_sql(self, expression: exp.DataType) -> str: 1235 # String is the standard ClickHouse type, every other variant is just an alias. 1236 # Additionally, any supplied length parameter will be ignored. 1237 # 1238 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1239 if expression.this in self.STRING_TYPE_MAPPING: 1240 dtype = "String" 1241 else: 1242 dtype = super().datatype_sql(expression) 1243 1244 # This section changes the type to `Nullable(...)` if the following conditions hold: 1245 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1246 # and change their semantics 1247 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1248 # constraint: "Type of Map key must be a type, that can be represented by integer or 1249 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1250 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1251 parent = expression.parent 1252 nullable = expression.args.get("nullable") 1253 if nullable is True or ( 1254 nullable is None 1255 and not ( 1256 isinstance(parent, exp.DataType) 1257 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1258 and expression.index in (None, 0) 1259 ) 1260 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1261 ): 1262 dtype = f"Nullable({dtype})" 1263 1264 return dtype 1265 1266 def cte_sql(self, expression: exp.CTE) -> str: 1267 if expression.args.get("scalar"): 1268 this = self.sql(expression, "this") 1269 alias = self.sql(expression, "alias") 1270 return f"{this} AS {alias}" 1271 1272 return super().cte_sql(expression) 1273 1274 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1275 return super().after_limit_modifiers(expression) + [ 1276 ( 1277 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1278 if expression.args.get("settings") 1279 else "" 1280 ), 1281 ( 1282 self.seg("FORMAT ") + self.sql(expression, "format") 1283 if expression.args.get("format") 1284 else "" 1285 ), 1286 ] 1287 1288 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1289 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1290 1291 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1292 return f"ON CLUSTER {self.sql(expression, 'this')}" 1293 1294 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1295 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1296 exp.Properties.Location.POST_NAME 1297 ): 1298 this_name = self.sql( 1299 expression.this if isinstance(expression.this, exp.Schema) else expression, 1300 "this", 1301 ) 1302 this_properties = " ".join( 1303 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1304 ) 1305 this_schema = self.schema_columns_sql(expression.this) 1306 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1307 1308 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1309 1310 return super().createable_sql(expression, locations) 1311 1312 def create_sql(self, expression: exp.Create) -> str: 1313 # The comment property comes last in CTAS statements, i.e. after the query 1314 query = expression.expression 1315 if isinstance(query, exp.Query): 1316 comment_prop = expression.find(exp.SchemaCommentProperty) 1317 if comment_prop: 1318 comment_prop.pop() 1319 query.replace(exp.paren(query)) 1320 else: 1321 comment_prop = None 1322 1323 create_sql = super().create_sql(expression) 1324 1325 comment_sql = self.sql(comment_prop) 1326 comment_sql = f" {comment_sql}" if comment_sql else "" 1327 1328 return f"{create_sql}{comment_sql}" 1329 1330 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1331 this = self.indent(self.sql(expression, "this")) 1332 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1333 1334 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1335 this = self.sql(expression, "this") 1336 this = f" {this}" if this else "" 1337 expr = self.sql(expression, "expression") 1338 expr = f" {expr}" if expr else "" 1339 index_type = self.sql(expression, "index_type") 1340 index_type = f" TYPE {index_type}" if index_type else "" 1341 granularity = self.sql(expression, "granularity") 1342 granularity = f" GRANULARITY {granularity}" if granularity else "" 1343 1344 return f"INDEX{this}{expr}{index_type}{granularity}" 1345 1346 def partition_sql(self, expression: exp.Partition) -> str: 1347 return f"PARTITION {self.expressions(expression, flat=True)}" 1348 1349 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1350 return f"ID {self.sql(expression.this)}" 1351 1352 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1353 return ( 1354 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1355 ) 1356 1357 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1358 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1359 1360 def is_sql(self, expression: exp.Is) -> str: 1361 is_sql = super().is_sql(expression) 1362 1363 if isinstance(expression.parent, exp.Not): 1364 # value IS NOT NULL -> NOT (value IS NULL) 1365 is_sql = self.wrap(is_sql) 1366 1367 return is_sql 1368 1369 def in_sql(self, expression: exp.In) -> str: 1370 in_sql = super().in_sql(expression) 1371 1372 if isinstance(expression.parent, exp.Not) and expression.args.get("is_global"): 1373 in_sql = in_sql.replace("GLOBAL IN", "GLOBAL NOT IN", 1) 1374 1375 return in_sql 1376 1377 def not_sql(self, expression: exp.Not) -> str: 1378 if isinstance(expression.this, exp.In) and expression.this.args.get("is_global"): 1379 # let `GLOBAL IN` child interpose `NOT` 1380 return self.sql(expression, "this") 1381 1382 return super().not_sql(expression) 1383 1384 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1385 # If the VALUES clause contains tuples of expressions, we need to treat it 1386 # as a table since Clickhouse will automatically alias it as such. 1387 alias = expression.args.get("alias") 1388 1389 if alias and alias.args.get("columns") and expression.expressions: 1390 values = expression.expressions[0].expressions 1391 values_as_table = any(isinstance(value, exp.Tuple) for value in values) 1392 else: 1393 values_as_table = True 1394 1395 return super().values_sql(expression, values_as_table=values_as_table)
186class ClickHouse(Dialect): 187 NORMALIZE_FUNCTIONS: bool | str = False 188 NULL_ORDERING = "nulls_are_last" 189 SUPPORTS_USER_DEFINED_TYPES = False 190 SAFE_DIVISION = True 191 LOG_BASE_FIRST: t.Optional[bool] = None 192 FORCE_EARLY_ALIAS_REF_EXPANSION = True 193 PRESERVE_ORIGINAL_NAMES = True 194 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 195 IDENTIFIERS_CAN_START_WITH_DIGIT = True 196 HEX_STRING_IS_INTEGER_TYPE = True 197 198 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 199 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 200 201 UNESCAPED_SEQUENCES = { 202 "\\0": "\0", 203 } 204 205 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 206 207 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 208 exp.Except: False, 209 exp.Intersect: False, 210 exp.Union: None, 211 } 212 213 def generate_values_aliases(self, expression: exp.Values) -> t.List[exp.Identifier]: 214 # Clickhouse allows VALUES to have an embedded structure e.g: 215 # VALUES('person String, place String', ('Noah', 'Paris'), ...) 216 # In this case, we don't want to qualify the columns 217 values = expression.expressions[0].expressions 218 219 structure = ( 220 values[0] 221 if (len(values) > 1 and values[0].is_string and isinstance(values[1], exp.Tuple)) 222 else None 223 ) 224 if structure: 225 # Split each column definition into the column name e.g: 226 # 'person String, place String' -> ['person', 'place'] 227 structure_coldefs = [coldef.strip() for coldef in structure.name.split(",")] 228 column_aliases = [ 229 exp.to_identifier(coldef.split(" ")[0]) for coldef in structure_coldefs 230 ] 231 else: 232 # Default column aliases in CH are "c1", "c2", etc. 233 column_aliases = [ 234 exp.to_identifier(f"c{i + 1}") for i in range(len(values[0].expressions)) 235 ] 236 237 return column_aliases 238 239 class Tokenizer(tokens.Tokenizer): 240 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 241 IDENTIFIERS = ['"', "`"] 242 IDENTIFIER_ESCAPES = ["\\"] 243 STRING_ESCAPES = ["'", "\\"] 244 BIT_STRINGS = [("0b", "")] 245 HEX_STRINGS = [("0x", ""), ("0X", "")] 246 HEREDOC_STRINGS = ["$"] 247 248 KEYWORDS = { 249 **tokens.Tokenizer.KEYWORDS, 250 ".:": TokenType.DOTCOLON, 251 "ATTACH": TokenType.COMMAND, 252 "DATE32": TokenType.DATE32, 253 "DATETIME64": TokenType.DATETIME64, 254 "DICTIONARY": TokenType.DICTIONARY, 255 "DYNAMIC": TokenType.DYNAMIC, 256 "ENUM8": TokenType.ENUM8, 257 "ENUM16": TokenType.ENUM16, 258 "EXCHANGE": TokenType.COMMAND, 259 "FINAL": TokenType.FINAL, 260 "FIXEDSTRING": TokenType.FIXEDSTRING, 261 "FLOAT32": TokenType.FLOAT, 262 "FLOAT64": TokenType.DOUBLE, 263 "GLOBAL": TokenType.GLOBAL, 264 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 265 "MAP": TokenType.MAP, 266 "NESTED": TokenType.NESTED, 267 "NOTHING": TokenType.NOTHING, 268 "SAMPLE": TokenType.TABLE_SAMPLE, 269 "TUPLE": TokenType.STRUCT, 270 "UINT16": TokenType.USMALLINT, 271 "UINT32": TokenType.UINT, 272 "UINT64": TokenType.UBIGINT, 273 "UINT8": TokenType.UTINYINT, 274 "IPV4": TokenType.IPV4, 275 "IPV6": TokenType.IPV6, 276 "POINT": TokenType.POINT, 277 "RING": TokenType.RING, 278 "LINESTRING": TokenType.LINESTRING, 279 "MULTILINESTRING": TokenType.MULTILINESTRING, 280 "POLYGON": TokenType.POLYGON, 281 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 282 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 283 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 284 "SYSTEM": TokenType.COMMAND, 285 "PREWHERE": TokenType.PREWHERE, 286 } 287 KEYWORDS.pop("/*+") 288 289 SINGLE_TOKENS = { 290 **tokens.Tokenizer.SINGLE_TOKENS, 291 "$": TokenType.HEREDOC_STRING, 292 } 293 294 class Parser(parser.Parser): 295 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 296 # * select x from t1 union all select x from t2 limit 1; 297 # * select x from t1 union all (select x from t2 limit 1); 298 MODIFIERS_ATTACHED_TO_SET_OP = False 299 INTERVAL_SPANS = False 300 OPTIONAL_ALIAS_TOKEN_CTE = False 301 JOINS_HAVE_EQUAL_PRECEDENCE = True 302 303 FUNCTIONS = { 304 **parser.Parser.FUNCTIONS, 305 "ANY": exp.AnyValue.from_arg_list, 306 "ARRAYSUM": exp.ArraySum.from_arg_list, 307 "COUNTIF": _build_count_if, 308 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 309 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 310 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 311 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 312 "DATE_FORMAT": _build_date_format, 313 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 314 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 315 "FORMATDATETIME": _build_date_format, 316 "JSONEXTRACTSTRING": build_json_extract_path( 317 exp.JSONExtractScalar, zero_based_indexing=False 318 ), 319 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 320 "MAP": parser.build_var_map, 321 "MATCH": exp.RegexpLike.from_arg_list, 322 "RANDCANONICAL": exp.Rand.from_arg_list, 323 "STR_TO_DATE": _build_str_to_date, 324 "TUPLE": exp.Struct.from_arg_list, 325 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 326 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 327 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 328 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 329 "UNIQ": exp.ApproxDistinct.from_arg_list, 330 "XOR": lambda args: exp.Xor(expressions=args), 331 "MD5": exp.MD5Digest.from_arg_list, 332 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 333 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 334 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 335 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 336 } 337 FUNCTIONS.pop("TRANSFORM") 338 339 AGG_FUNCTIONS = { 340 "count", 341 "min", 342 "max", 343 "sum", 344 "avg", 345 "any", 346 "stddevPop", 347 "stddevSamp", 348 "varPop", 349 "varSamp", 350 "corr", 351 "covarPop", 352 "covarSamp", 353 "entropy", 354 "exponentialMovingAverage", 355 "intervalLengthSum", 356 "kolmogorovSmirnovTest", 357 "mannWhitneyUTest", 358 "median", 359 "rankCorr", 360 "sumKahan", 361 "studentTTest", 362 "welchTTest", 363 "anyHeavy", 364 "anyLast", 365 "boundingRatio", 366 "first_value", 367 "last_value", 368 "argMin", 369 "argMax", 370 "avgWeighted", 371 "topK", 372 "topKWeighted", 373 "deltaSum", 374 "deltaSumTimestamp", 375 "groupArray", 376 "groupArrayLast", 377 "groupUniqArray", 378 "groupArrayInsertAt", 379 "groupArrayMovingAvg", 380 "groupArrayMovingSum", 381 "groupArraySample", 382 "groupBitAnd", 383 "groupBitOr", 384 "groupBitXor", 385 "groupBitmap", 386 "groupBitmapAnd", 387 "groupBitmapOr", 388 "groupBitmapXor", 389 "sumWithOverflow", 390 "sumMap", 391 "minMap", 392 "maxMap", 393 "skewSamp", 394 "skewPop", 395 "kurtSamp", 396 "kurtPop", 397 "uniq", 398 "uniqExact", 399 "uniqCombined", 400 "uniqCombined64", 401 "uniqHLL12", 402 "uniqTheta", 403 "quantile", 404 "quantiles", 405 "quantileExact", 406 "quantilesExact", 407 "quantileExactLow", 408 "quantilesExactLow", 409 "quantileExactHigh", 410 "quantilesExactHigh", 411 "quantileExactWeighted", 412 "quantilesExactWeighted", 413 "quantileTiming", 414 "quantilesTiming", 415 "quantileTimingWeighted", 416 "quantilesTimingWeighted", 417 "quantileDeterministic", 418 "quantilesDeterministic", 419 "quantileTDigest", 420 "quantilesTDigest", 421 "quantileTDigestWeighted", 422 "quantilesTDigestWeighted", 423 "quantileBFloat16", 424 "quantilesBFloat16", 425 "quantileBFloat16Weighted", 426 "quantilesBFloat16Weighted", 427 "simpleLinearRegression", 428 "stochasticLinearRegression", 429 "stochasticLogisticRegression", 430 "categoricalInformationValue", 431 "contingency", 432 "cramersV", 433 "cramersVBiasCorrected", 434 "theilsU", 435 "maxIntersections", 436 "maxIntersectionsPosition", 437 "meanZTest", 438 "quantileInterpolatedWeighted", 439 "quantilesInterpolatedWeighted", 440 "quantileGK", 441 "quantilesGK", 442 "sparkBar", 443 "sumCount", 444 "largestTriangleThreeBuckets", 445 "histogram", 446 "sequenceMatch", 447 "sequenceCount", 448 "windowFunnel", 449 "retention", 450 "uniqUpTo", 451 "sequenceNextNode", 452 "exponentialTimeDecayedAvg", 453 } 454 455 AGG_FUNCTIONS_SUFFIXES = [ 456 "If", 457 "Array", 458 "ArrayIf", 459 "Map", 460 "SimpleState", 461 "State", 462 "Merge", 463 "MergeState", 464 "ForEach", 465 "Distinct", 466 "OrDefault", 467 "OrNull", 468 "Resample", 469 "ArgMin", 470 "ArgMax", 471 ] 472 473 FUNC_TOKENS = { 474 *parser.Parser.FUNC_TOKENS, 475 TokenType.AND, 476 TokenType.OR, 477 TokenType.SET, 478 } 479 480 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 481 482 ID_VAR_TOKENS = { 483 *parser.Parser.ID_VAR_TOKENS, 484 TokenType.LIKE, 485 } 486 487 AGG_FUNC_MAPPING = ( 488 lambda functions, suffixes: { 489 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 490 } 491 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 492 493 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 494 495 FUNCTION_PARSERS = { 496 **parser.Parser.FUNCTION_PARSERS, 497 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 498 "QUANTILE": lambda self: self._parse_quantile(), 499 "MEDIAN": lambda self: self._parse_quantile(), 500 "COLUMNS": lambda self: self._parse_columns(), 501 } 502 503 FUNCTION_PARSERS.pop("MATCH") 504 505 PROPERTY_PARSERS = { 506 **parser.Parser.PROPERTY_PARSERS, 507 "ENGINE": lambda self: self._parse_engine_property(), 508 } 509 PROPERTY_PARSERS.pop("DYNAMIC") 510 511 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 512 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 513 514 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 515 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 516 517 RANGE_PARSERS = { 518 **parser.Parser.RANGE_PARSERS, 519 TokenType.GLOBAL: lambda self, this: self._parse_global_in(this), 520 } 521 522 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 523 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 524 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 525 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 526 527 JOIN_KINDS = { 528 *parser.Parser.JOIN_KINDS, 529 TokenType.ANY, 530 TokenType.ASOF, 531 TokenType.ARRAY, 532 } 533 534 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 535 TokenType.ANY, 536 TokenType.ARRAY, 537 TokenType.FINAL, 538 TokenType.FORMAT, 539 TokenType.SETTINGS, 540 } 541 542 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 543 TokenType.FORMAT, 544 } 545 546 LOG_DEFAULTS_TO_LN = True 547 548 QUERY_MODIFIER_PARSERS = { 549 **parser.Parser.QUERY_MODIFIER_PARSERS, 550 TokenType.SETTINGS: lambda self: ( 551 "settings", 552 self._advance() or self._parse_csv(self._parse_assignment), 553 ), 554 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 555 } 556 557 CONSTRAINT_PARSERS = { 558 **parser.Parser.CONSTRAINT_PARSERS, 559 "INDEX": lambda self: self._parse_index_constraint(), 560 "CODEC": lambda self: self._parse_compress(), 561 } 562 563 ALTER_PARSERS = { 564 **parser.Parser.ALTER_PARSERS, 565 "REPLACE": lambda self: self._parse_alter_table_replace(), 566 } 567 568 SCHEMA_UNNAMED_CONSTRAINTS = { 569 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 570 "INDEX", 571 } 572 573 PLACEHOLDER_PARSERS = { 574 **parser.Parser.PLACEHOLDER_PARSERS, 575 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 576 } 577 578 def _parse_engine_property(self) -> exp.EngineProperty: 579 self._match(TokenType.EQ) 580 return self.expression( 581 exp.EngineProperty, 582 this=self._parse_field(any_token=True, anonymous_func=True), 583 ) 584 585 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 586 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 587 return self._parse_lambda() 588 589 def _parse_types( 590 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 591 ) -> t.Optional[exp.Expression]: 592 dtype = super()._parse_types( 593 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 594 ) 595 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 596 # Mark every type as non-nullable which is ClickHouse's default, unless it's 597 # already marked as nullable. This marker helps us transpile types from other 598 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 599 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 600 # fail in ClickHouse without the `Nullable` type constructor. 601 dtype.set("nullable", False) 602 603 return dtype 604 605 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 606 index = self._index 607 this = self._parse_bitwise() 608 if self._match(TokenType.FROM): 609 self._retreat(index) 610 return super()._parse_extract() 611 612 # We return Anonymous here because extract and regexpExtract have different semantics, 613 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 614 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 615 # 616 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 617 self._match(TokenType.COMMA) 618 return self.expression( 619 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 620 ) 621 622 def _parse_assignment(self) -> t.Optional[exp.Expression]: 623 this = super()._parse_assignment() 624 625 if self._match(TokenType.PLACEHOLDER): 626 return self.expression( 627 exp.If, 628 this=this, 629 true=self._parse_assignment(), 630 false=self._match(TokenType.COLON) and self._parse_assignment(), 631 ) 632 633 return this 634 635 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 636 """ 637 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 638 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 639 """ 640 index = self._index 641 642 this = self._parse_id_var() 643 self._match(TokenType.COLON) 644 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 645 self._match_text_seq("IDENTIFIER") and "Identifier" 646 ) 647 648 if not kind: 649 self._retreat(index) 650 return None 651 elif not self._match(TokenType.R_BRACE): 652 self.raise_error("Expecting }") 653 654 if isinstance(this, exp.Identifier) and not this.quoted: 655 this = exp.var(this.name) 656 657 return self.expression(exp.Placeholder, this=this, kind=kind) 658 659 def _parse_bracket( 660 self, this: t.Optional[exp.Expression] = None 661 ) -> t.Optional[exp.Expression]: 662 l_brace = self._match(TokenType.L_BRACE, advance=False) 663 bracket = super()._parse_bracket(this) 664 665 if l_brace and isinstance(bracket, exp.Struct): 666 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 667 for expression in bracket.expressions: 668 if not isinstance(expression, exp.PropertyEQ): 669 break 670 671 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 672 varmap.args["values"].append("expressions", expression.expression) 673 674 return varmap 675 676 return bracket 677 678 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 679 this = super()._parse_in(this) 680 this.set("is_global", is_global) 681 return this 682 683 def _parse_global_in(self, this: t.Optional[exp.Expression]) -> exp.Not | exp.In: 684 is_negated = self._match(TokenType.NOT) 685 this = self._match(TokenType.IN) and self._parse_in(this, is_global=True) 686 return self.expression(exp.Not, this=this) if is_negated else this 687 688 def _parse_table( 689 self, 690 schema: bool = False, 691 joins: bool = False, 692 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 693 parse_bracket: bool = False, 694 is_db_reference: bool = False, 695 parse_partition: bool = False, 696 consume_pipe: bool = False, 697 ) -> t.Optional[exp.Expression]: 698 this = super()._parse_table( 699 schema=schema, 700 joins=joins, 701 alias_tokens=alias_tokens, 702 parse_bracket=parse_bracket, 703 is_db_reference=is_db_reference, 704 ) 705 706 if isinstance(this, exp.Table): 707 inner = this.this 708 alias = this.args.get("alias") 709 710 if isinstance(inner, exp.GenerateSeries) and alias and not alias.columns: 711 alias.set("columns", [exp.to_identifier("generate_series")]) 712 713 if self._match(TokenType.FINAL): 714 this = self.expression(exp.Final, this=this) 715 716 return this 717 718 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 719 return super()._parse_position(haystack_first=True) 720 721 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 722 def _parse_cte(self) -> t.Optional[exp.CTE]: 723 # WITH <identifier> AS <subquery expression> 724 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 725 726 if not cte: 727 # WITH <expression> AS <identifier> 728 cte = self.expression( 729 exp.CTE, 730 this=self._parse_assignment(), 731 alias=self._parse_table_alias(), 732 scalar=True, 733 ) 734 735 return cte 736 737 def _parse_join_parts( 738 self, 739 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 740 is_global = self._match(TokenType.GLOBAL) and self._prev 741 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 742 743 if kind_pre: 744 kind = self._match_set(self.JOIN_KINDS) and self._prev 745 side = self._match_set(self.JOIN_SIDES) and self._prev 746 return is_global, side, kind 747 748 return ( 749 is_global, 750 self._match_set(self.JOIN_SIDES) and self._prev, 751 self._match_set(self.JOIN_KINDS) and self._prev, 752 ) 753 754 def _parse_join( 755 self, skip_join_token: bool = False, parse_bracket: bool = False 756 ) -> t.Optional[exp.Join]: 757 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 758 if join: 759 join.set("global", join.args.pop("method", None)) 760 761 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 762 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 763 if join.kind == "ARRAY": 764 for table in join.find_all(exp.Table): 765 table.replace(table.to_column()) 766 767 return join 768 769 def _parse_function( 770 self, 771 functions: t.Optional[t.Dict[str, t.Callable]] = None, 772 anonymous: bool = False, 773 optional_parens: bool = True, 774 any_token: bool = False, 775 ) -> t.Optional[exp.Expression]: 776 expr = super()._parse_function( 777 functions=functions, 778 anonymous=anonymous, 779 optional_parens=optional_parens, 780 any_token=any_token, 781 ) 782 783 func = expr.this if isinstance(expr, exp.Window) else expr 784 785 # Aggregate functions can be split in 2 parts: <func_name><suffix> 786 parts = ( 787 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 788 ) 789 790 if parts: 791 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 792 params = self._parse_func_params(anon_func) 793 794 kwargs = { 795 "this": anon_func.this, 796 "expressions": anon_func.expressions, 797 } 798 if parts[1]: 799 exp_class: t.Type[exp.Expression] = ( 800 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 801 ) 802 else: 803 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 804 805 kwargs["exp_class"] = exp_class 806 if params: 807 kwargs["params"] = params 808 809 func = self.expression(**kwargs) 810 811 if isinstance(expr, exp.Window): 812 # The window's func was parsed as Anonymous in base parser, fix its 813 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 814 expr.set("this", func) 815 elif params: 816 # Params have blocked super()._parse_function() from parsing the following window 817 # (if that exists) as they're standing between the function call and the window spec 818 expr = self._parse_window(func) 819 else: 820 expr = func 821 822 return expr 823 824 def _parse_func_params( 825 self, this: t.Optional[exp.Func] = None 826 ) -> t.Optional[t.List[exp.Expression]]: 827 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 828 return self._parse_csv(self._parse_lambda) 829 830 if self._match(TokenType.L_PAREN): 831 params = self._parse_csv(self._parse_lambda) 832 self._match_r_paren(this) 833 return params 834 835 return None 836 837 def _parse_quantile(self) -> exp.Quantile: 838 this = self._parse_lambda() 839 params = self._parse_func_params() 840 if params: 841 return self.expression(exp.Quantile, this=params[0], quantile=this) 842 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 843 844 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 845 return super()._parse_wrapped_id_vars(optional=True) 846 847 def _parse_primary_key( 848 self, wrapped_optional: bool = False, in_props: bool = False 849 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 850 return super()._parse_primary_key( 851 wrapped_optional=wrapped_optional or in_props, in_props=in_props 852 ) 853 854 def _parse_on_property(self) -> t.Optional[exp.Expression]: 855 index = self._index 856 if self._match_text_seq("CLUSTER"): 857 this = self._parse_string() or self._parse_id_var() 858 if this: 859 return self.expression(exp.OnCluster, this=this) 860 else: 861 self._retreat(index) 862 return None 863 864 def _parse_index_constraint( 865 self, kind: t.Optional[str] = None 866 ) -> exp.IndexColumnConstraint: 867 # INDEX name1 expr TYPE type1(args) GRANULARITY value 868 this = self._parse_id_var() 869 expression = self._parse_assignment() 870 871 index_type = self._match_text_seq("TYPE") and ( 872 self._parse_function() or self._parse_var() 873 ) 874 875 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 876 877 return self.expression( 878 exp.IndexColumnConstraint, 879 this=this, 880 expression=expression, 881 index_type=index_type, 882 granularity=granularity, 883 ) 884 885 def _parse_partition(self) -> t.Optional[exp.Partition]: 886 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 887 if not self._match(TokenType.PARTITION): 888 return None 889 890 if self._match_text_seq("ID"): 891 # Corresponds to the PARTITION ID <string_value> syntax 892 expressions: t.List[exp.Expression] = [ 893 self.expression(exp.PartitionId, this=self._parse_string()) 894 ] 895 else: 896 expressions = self._parse_expressions() 897 898 return self.expression(exp.Partition, expressions=expressions) 899 900 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 901 partition = self._parse_partition() 902 903 if not partition or not self._match(TokenType.FROM): 904 return None 905 906 return self.expression( 907 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 908 ) 909 910 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 911 if not self._match_text_seq("PROJECTION"): 912 return None 913 914 return self.expression( 915 exp.ProjectionDef, 916 this=self._parse_id_var(), 917 expression=self._parse_wrapped(self._parse_statement), 918 ) 919 920 def _parse_constraint(self) -> t.Optional[exp.Expression]: 921 return super()._parse_constraint() or self._parse_projection_def() 922 923 def _parse_alias( 924 self, this: t.Optional[exp.Expression], explicit: bool = False 925 ) -> t.Optional[exp.Expression]: 926 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 927 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 928 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 929 return this 930 931 return super()._parse_alias(this=this, explicit=explicit) 932 933 def _parse_expression(self) -> t.Optional[exp.Expression]: 934 this = super()._parse_expression() 935 936 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 937 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 938 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 939 self._match(TokenType.R_PAREN) 940 941 return this 942 943 def _parse_columns(self) -> exp.Expression: 944 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 945 946 while self._next and self._match_text_seq(")", "APPLY", "("): 947 self._match(TokenType.R_PAREN) 948 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 949 return this 950 951 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 952 value = super()._parse_value(values=values) 953 if not value: 954 return None 955 956 # In Clickhouse "SELECT * FROM VALUES (1, 2, 3)" generates a table with a single column, in contrast 957 # to other dialects. For this case, we canonicalize the values into a tuple-of-tuples AST if it's not already one. 958 # In INSERT INTO statements the same clause actually references multiple columns (opposite semantics), 959 # but the final result is not altered by the extra parentheses. 960 # Note: Clickhouse allows VALUES([structure], value, ...) so the branch checks for the last expression 961 expressions = value.expressions 962 if values and not isinstance(expressions[-1], exp.Tuple): 963 value.set( 964 "expressions", 965 [self.expression(exp.Tuple, expressions=[expr]) for expr in expressions], 966 ) 967 968 return value 969 970 class Generator(generator.Generator): 971 QUERY_HINTS = False 972 STRUCT_DELIMITER = ("(", ")") 973 NVL2_SUPPORTED = False 974 TABLESAMPLE_REQUIRES_PARENS = False 975 TABLESAMPLE_SIZE_IS_ROWS = False 976 TABLESAMPLE_KEYWORDS = "SAMPLE" 977 LAST_DAY_SUPPORTS_DATE_PART = False 978 CAN_IMPLEMENT_ARRAY_ANY = True 979 SUPPORTS_TO_NUMBER = False 980 JOIN_HINTS = False 981 TABLE_HINTS = False 982 GROUPINGS_SEP = "" 983 SET_OP_MODIFIERS = False 984 ARRAY_SIZE_NAME = "LENGTH" 985 WRAP_DERIVED_VALUES = False 986 987 STRING_TYPE_MAPPING = { 988 exp.DataType.Type.BLOB: "String", 989 exp.DataType.Type.CHAR: "String", 990 exp.DataType.Type.LONGBLOB: "String", 991 exp.DataType.Type.LONGTEXT: "String", 992 exp.DataType.Type.MEDIUMBLOB: "String", 993 exp.DataType.Type.MEDIUMTEXT: "String", 994 exp.DataType.Type.TINYBLOB: "String", 995 exp.DataType.Type.TINYTEXT: "String", 996 exp.DataType.Type.TEXT: "String", 997 exp.DataType.Type.VARBINARY: "String", 998 exp.DataType.Type.VARCHAR: "String", 999 } 1000 1001 SUPPORTED_JSON_PATH_PARTS = { 1002 exp.JSONPathKey, 1003 exp.JSONPathRoot, 1004 exp.JSONPathSubscript, 1005 } 1006 1007 TYPE_MAPPING = { 1008 **generator.Generator.TYPE_MAPPING, 1009 **STRING_TYPE_MAPPING, 1010 exp.DataType.Type.ARRAY: "Array", 1011 exp.DataType.Type.BOOLEAN: "Bool", 1012 exp.DataType.Type.BIGINT: "Int64", 1013 exp.DataType.Type.DATE32: "Date32", 1014 exp.DataType.Type.DATETIME: "DateTime", 1015 exp.DataType.Type.DATETIME2: "DateTime", 1016 exp.DataType.Type.SMALLDATETIME: "DateTime", 1017 exp.DataType.Type.DATETIME64: "DateTime64", 1018 exp.DataType.Type.DECIMAL: "Decimal", 1019 exp.DataType.Type.DECIMAL32: "Decimal32", 1020 exp.DataType.Type.DECIMAL64: "Decimal64", 1021 exp.DataType.Type.DECIMAL128: "Decimal128", 1022 exp.DataType.Type.DECIMAL256: "Decimal256", 1023 exp.DataType.Type.TIMESTAMP: "DateTime", 1024 exp.DataType.Type.TIMESTAMPNTZ: "DateTime", 1025 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 1026 exp.DataType.Type.DOUBLE: "Float64", 1027 exp.DataType.Type.ENUM: "Enum", 1028 exp.DataType.Type.ENUM8: "Enum8", 1029 exp.DataType.Type.ENUM16: "Enum16", 1030 exp.DataType.Type.FIXEDSTRING: "FixedString", 1031 exp.DataType.Type.FLOAT: "Float32", 1032 exp.DataType.Type.INT: "Int32", 1033 exp.DataType.Type.MEDIUMINT: "Int32", 1034 exp.DataType.Type.INT128: "Int128", 1035 exp.DataType.Type.INT256: "Int256", 1036 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 1037 exp.DataType.Type.MAP: "Map", 1038 exp.DataType.Type.NESTED: "Nested", 1039 exp.DataType.Type.NOTHING: "Nothing", 1040 exp.DataType.Type.SMALLINT: "Int16", 1041 exp.DataType.Type.STRUCT: "Tuple", 1042 exp.DataType.Type.TINYINT: "Int8", 1043 exp.DataType.Type.UBIGINT: "UInt64", 1044 exp.DataType.Type.UINT: "UInt32", 1045 exp.DataType.Type.UINT128: "UInt128", 1046 exp.DataType.Type.UINT256: "UInt256", 1047 exp.DataType.Type.USMALLINT: "UInt16", 1048 exp.DataType.Type.UTINYINT: "UInt8", 1049 exp.DataType.Type.IPV4: "IPv4", 1050 exp.DataType.Type.IPV6: "IPv6", 1051 exp.DataType.Type.POINT: "Point", 1052 exp.DataType.Type.RING: "Ring", 1053 exp.DataType.Type.LINESTRING: "LineString", 1054 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 1055 exp.DataType.Type.POLYGON: "Polygon", 1056 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 1057 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 1058 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 1059 exp.DataType.Type.DYNAMIC: "Dynamic", 1060 } 1061 1062 TRANSFORMS = { 1063 **generator.Generator.TRANSFORMS, 1064 exp.AnyValue: rename_func("any"), 1065 exp.ApproxDistinct: rename_func("uniq"), 1066 exp.ArrayConcat: rename_func("arrayConcat"), 1067 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 1068 exp.ArrayRemove: remove_from_array_using_filter, 1069 exp.ArraySum: rename_func("arraySum"), 1070 exp.ArgMax: arg_max_or_min_no_count("argMax"), 1071 exp.ArgMin: arg_max_or_min_no_count("argMin"), 1072 exp.Array: inline_array_sql, 1073 exp.CastToStrType: rename_func("CAST"), 1074 exp.CountIf: rename_func("countIf"), 1075 exp.CompressColumnConstraint: lambda self, 1076 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 1077 exp.ComputedColumnConstraint: lambda self, 1078 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 1079 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 1080 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 1081 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 1082 exp.DateStrToDate: rename_func("toDate"), 1083 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 1084 exp.Explode: rename_func("arrayJoin"), 1085 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 1086 exp.IsNan: rename_func("isNaN"), 1087 exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}", 1088 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 1089 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 1090 exp.JSONPathKey: json_path_key_only_name, 1091 exp.JSONPathRoot: lambda *_: "", 1092 exp.Length: length_or_char_length_sql, 1093 exp.Map: _map_sql, 1094 exp.Median: rename_func("median"), 1095 exp.Nullif: rename_func("nullIf"), 1096 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1097 exp.Pivot: no_pivot_sql, 1098 exp.Quantile: _quantile_sql, 1099 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 1100 exp.Rand: rename_func("randCanonical"), 1101 exp.StartsWith: rename_func("startsWith"), 1102 exp.EndsWith: rename_func("endsWith"), 1103 exp.StrPosition: lambda self, e: strposition_sql( 1104 self, 1105 e, 1106 func_name="POSITION", 1107 supports_position=True, 1108 use_ansi_position=False, 1109 ), 1110 exp.TimeToStr: lambda self, e: self.func( 1111 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 1112 ), 1113 exp.TimeStrToTime: _timestrtotime_sql, 1114 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1115 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1116 exp.VarMap: _map_sql, 1117 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1118 exp.MD5Digest: rename_func("MD5"), 1119 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1120 exp.SHA: rename_func("SHA1"), 1121 exp.SHA2: sha256_sql, 1122 exp.UnixToTime: _unix_to_time_sql, 1123 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 1124 exp.Trim: lambda self, e: trim_sql(self, e, default_trim_type="BOTH"), 1125 exp.Variance: rename_func("varSamp"), 1126 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1127 exp.Stddev: rename_func("stddevSamp"), 1128 exp.Chr: rename_func("CHAR"), 1129 exp.Lag: lambda self, e: self.func( 1130 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1131 ), 1132 exp.Lead: lambda self, e: self.func( 1133 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1134 ), 1135 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1136 rename_func("editDistance") 1137 ), 1138 } 1139 1140 PROPERTIES_LOCATION = { 1141 **generator.Generator.PROPERTIES_LOCATION, 1142 exp.OnCluster: exp.Properties.Location.POST_NAME, 1143 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1144 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1145 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1146 } 1147 1148 # There's no list in docs, but it can be found in Clickhouse code 1149 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1150 ON_CLUSTER_TARGETS = { 1151 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1152 "DATABASE", 1153 "TABLE", 1154 "VIEW", 1155 "DICTIONARY", 1156 "INDEX", 1157 "FUNCTION", 1158 "NAMED COLLECTION", 1159 } 1160 1161 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1162 NON_NULLABLE_TYPES = { 1163 exp.DataType.Type.ARRAY, 1164 exp.DataType.Type.MAP, 1165 exp.DataType.Type.STRUCT, 1166 exp.DataType.Type.POINT, 1167 exp.DataType.Type.RING, 1168 exp.DataType.Type.LINESTRING, 1169 exp.DataType.Type.MULTILINESTRING, 1170 exp.DataType.Type.POLYGON, 1171 exp.DataType.Type.MULTIPOLYGON, 1172 } 1173 1174 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1175 strtodate_sql = self.function_fallback_sql(expression) 1176 1177 if not isinstance(expression.parent, exp.Cast): 1178 # StrToDate returns DATEs in other dialects (eg. postgres), so 1179 # this branch aims to improve the transpilation to clickhouse 1180 return self.cast_sql(exp.cast(expression, "DATE")) 1181 1182 return strtodate_sql 1183 1184 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1185 this = expression.this 1186 1187 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1188 return self.sql(this) 1189 1190 return super().cast_sql(expression, safe_prefix=safe_prefix) 1191 1192 def trycast_sql(self, expression: exp.TryCast) -> str: 1193 dtype = expression.to 1194 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1195 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1196 dtype.set("nullable", True) 1197 1198 return super().cast_sql(expression) 1199 1200 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1201 this = self.json_path_part(expression.this) 1202 return str(int(this) + 1) if is_int(this) else this 1203 1204 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1205 return f"AS {self.sql(expression, 'this')}" 1206 1207 def _any_to_has( 1208 self, 1209 expression: exp.EQ | exp.NEQ, 1210 default: t.Callable[[t.Any], str], 1211 prefix: str = "", 1212 ) -> str: 1213 if isinstance(expression.left, exp.Any): 1214 arr = expression.left 1215 this = expression.right 1216 elif isinstance(expression.right, exp.Any): 1217 arr = expression.right 1218 this = expression.left 1219 else: 1220 return default(expression) 1221 1222 return prefix + self.func("has", arr.this.unnest(), this) 1223 1224 def eq_sql(self, expression: exp.EQ) -> str: 1225 return self._any_to_has(expression, super().eq_sql) 1226 1227 def neq_sql(self, expression: exp.NEQ) -> str: 1228 return self._any_to_has(expression, super().neq_sql, "NOT ") 1229 1230 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1231 # Manually add a flag to make the search case-insensitive 1232 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1233 return self.func("match", expression.this, regex) 1234 1235 def datatype_sql(self, expression: exp.DataType) -> str: 1236 # String is the standard ClickHouse type, every other variant is just an alias. 1237 # Additionally, any supplied length parameter will be ignored. 1238 # 1239 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1240 if expression.this in self.STRING_TYPE_MAPPING: 1241 dtype = "String" 1242 else: 1243 dtype = super().datatype_sql(expression) 1244 1245 # This section changes the type to `Nullable(...)` if the following conditions hold: 1246 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1247 # and change their semantics 1248 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1249 # constraint: "Type of Map key must be a type, that can be represented by integer or 1250 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1251 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1252 parent = expression.parent 1253 nullable = expression.args.get("nullable") 1254 if nullable is True or ( 1255 nullable is None 1256 and not ( 1257 isinstance(parent, exp.DataType) 1258 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1259 and expression.index in (None, 0) 1260 ) 1261 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1262 ): 1263 dtype = f"Nullable({dtype})" 1264 1265 return dtype 1266 1267 def cte_sql(self, expression: exp.CTE) -> str: 1268 if expression.args.get("scalar"): 1269 this = self.sql(expression, "this") 1270 alias = self.sql(expression, "alias") 1271 return f"{this} AS {alias}" 1272 1273 return super().cte_sql(expression) 1274 1275 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1276 return super().after_limit_modifiers(expression) + [ 1277 ( 1278 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1279 if expression.args.get("settings") 1280 else "" 1281 ), 1282 ( 1283 self.seg("FORMAT ") + self.sql(expression, "format") 1284 if expression.args.get("format") 1285 else "" 1286 ), 1287 ] 1288 1289 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1290 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1291 1292 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1293 return f"ON CLUSTER {self.sql(expression, 'this')}" 1294 1295 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1296 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1297 exp.Properties.Location.POST_NAME 1298 ): 1299 this_name = self.sql( 1300 expression.this if isinstance(expression.this, exp.Schema) else expression, 1301 "this", 1302 ) 1303 this_properties = " ".join( 1304 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1305 ) 1306 this_schema = self.schema_columns_sql(expression.this) 1307 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1308 1309 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1310 1311 return super().createable_sql(expression, locations) 1312 1313 def create_sql(self, expression: exp.Create) -> str: 1314 # The comment property comes last in CTAS statements, i.e. after the query 1315 query = expression.expression 1316 if isinstance(query, exp.Query): 1317 comment_prop = expression.find(exp.SchemaCommentProperty) 1318 if comment_prop: 1319 comment_prop.pop() 1320 query.replace(exp.paren(query)) 1321 else: 1322 comment_prop = None 1323 1324 create_sql = super().create_sql(expression) 1325 1326 comment_sql = self.sql(comment_prop) 1327 comment_sql = f" {comment_sql}" if comment_sql else "" 1328 1329 return f"{create_sql}{comment_sql}" 1330 1331 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1332 this = self.indent(self.sql(expression, "this")) 1333 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1334 1335 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1336 this = self.sql(expression, "this") 1337 this = f" {this}" if this else "" 1338 expr = self.sql(expression, "expression") 1339 expr = f" {expr}" if expr else "" 1340 index_type = self.sql(expression, "index_type") 1341 index_type = f" TYPE {index_type}" if index_type else "" 1342 granularity = self.sql(expression, "granularity") 1343 granularity = f" GRANULARITY {granularity}" if granularity else "" 1344 1345 return f"INDEX{this}{expr}{index_type}{granularity}" 1346 1347 def partition_sql(self, expression: exp.Partition) -> str: 1348 return f"PARTITION {self.expressions(expression, flat=True)}" 1349 1350 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1351 return f"ID {self.sql(expression.this)}" 1352 1353 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1354 return ( 1355 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1356 ) 1357 1358 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1359 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1360 1361 def is_sql(self, expression: exp.Is) -> str: 1362 is_sql = super().is_sql(expression) 1363 1364 if isinstance(expression.parent, exp.Not): 1365 # value IS NOT NULL -> NOT (value IS NULL) 1366 is_sql = self.wrap(is_sql) 1367 1368 return is_sql 1369 1370 def in_sql(self, expression: exp.In) -> str: 1371 in_sql = super().in_sql(expression) 1372 1373 if isinstance(expression.parent, exp.Not) and expression.args.get("is_global"): 1374 in_sql = in_sql.replace("GLOBAL IN", "GLOBAL NOT IN", 1) 1375 1376 return in_sql 1377 1378 def not_sql(self, expression: exp.Not) -> str: 1379 if isinstance(expression.this, exp.In) and expression.this.args.get("is_global"): 1380 # let `GLOBAL IN` child interpose `NOT` 1381 return self.sql(expression, "this") 1382 1383 return super().not_sql(expression) 1384 1385 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1386 # If the VALUES clause contains tuples of expressions, we need to treat it 1387 # as a table since Clickhouse will automatically alias it as such. 1388 alias = expression.args.get("alias") 1389 1390 if alias and alias.args.get("columns") and expression.expressions: 1391 values = expression.expressions[0].expressions 1392 values_as_table = any(isinstance(value, exp.Tuple) for value in values) 1393 else: 1394 values_as_table = True 1395 1396 return super().values_sql(expression, values_as_table=values_as_table)
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether the name of the function should be preserved inside the node's metadata, can be useful for roundtripping deprecated vs new functions that share an AST node e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery
Whether number literals can include underscores for better readability
Whether hex strings such as x'CC' evaluate to integer or binary/blob type
Specifies the strategy according to which identifiers should be normalized.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse equivalent of CREATE SCHEMA is CREATE DATABASE.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
213 def generate_values_aliases(self, expression: exp.Values) -> t.List[exp.Identifier]: 214 # Clickhouse allows VALUES to have an embedded structure e.g: 215 # VALUES('person String, place String', ('Noah', 'Paris'), ...) 216 # In this case, we don't want to qualify the columns 217 values = expression.expressions[0].expressions 218 219 structure = ( 220 values[0] 221 if (len(values) > 1 and values[0].is_string and isinstance(values[1], exp.Tuple)) 222 else None 223 ) 224 if structure: 225 # Split each column definition into the column name e.g: 226 # 'person String, place String' -> ['person', 'place'] 227 structure_coldefs = [coldef.strip() for coldef in structure.name.split(",")] 228 column_aliases = [ 229 exp.to_identifier(coldef.split(" ")[0]) for coldef in structure_coldefs 230 ] 231 else: 232 # Default column aliases in CH are "c1", "c2", etc. 233 column_aliases = [ 234 exp.to_identifier(f"c{i + 1}") for i in range(len(values[0].expressions)) 235 ] 236 237 return column_aliases
239 class Tokenizer(tokens.Tokenizer): 240 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 241 IDENTIFIERS = ['"', "`"] 242 IDENTIFIER_ESCAPES = ["\\"] 243 STRING_ESCAPES = ["'", "\\"] 244 BIT_STRINGS = [("0b", "")] 245 HEX_STRINGS = [("0x", ""), ("0X", "")] 246 HEREDOC_STRINGS = ["$"] 247 248 KEYWORDS = { 249 **tokens.Tokenizer.KEYWORDS, 250 ".:": TokenType.DOTCOLON, 251 "ATTACH": TokenType.COMMAND, 252 "DATE32": TokenType.DATE32, 253 "DATETIME64": TokenType.DATETIME64, 254 "DICTIONARY": TokenType.DICTIONARY, 255 "DYNAMIC": TokenType.DYNAMIC, 256 "ENUM8": TokenType.ENUM8, 257 "ENUM16": TokenType.ENUM16, 258 "EXCHANGE": TokenType.COMMAND, 259 "FINAL": TokenType.FINAL, 260 "FIXEDSTRING": TokenType.FIXEDSTRING, 261 "FLOAT32": TokenType.FLOAT, 262 "FLOAT64": TokenType.DOUBLE, 263 "GLOBAL": TokenType.GLOBAL, 264 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 265 "MAP": TokenType.MAP, 266 "NESTED": TokenType.NESTED, 267 "NOTHING": TokenType.NOTHING, 268 "SAMPLE": TokenType.TABLE_SAMPLE, 269 "TUPLE": TokenType.STRUCT, 270 "UINT16": TokenType.USMALLINT, 271 "UINT32": TokenType.UINT, 272 "UINT64": TokenType.UBIGINT, 273 "UINT8": TokenType.UTINYINT, 274 "IPV4": TokenType.IPV4, 275 "IPV6": TokenType.IPV6, 276 "POINT": TokenType.POINT, 277 "RING": TokenType.RING, 278 "LINESTRING": TokenType.LINESTRING, 279 "MULTILINESTRING": TokenType.MULTILINESTRING, 280 "POLYGON": TokenType.POLYGON, 281 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 282 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 283 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 284 "SYSTEM": TokenType.COMMAND, 285 "PREWHERE": TokenType.PREWHERE, 286 } 287 KEYWORDS.pop("/*+") 288 289 SINGLE_TOKENS = { 290 **tokens.Tokenizer.SINGLE_TOKENS, 291 "$": TokenType.HEREDOC_STRING, 292 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BYTE_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
294 class Parser(parser.Parser): 295 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 296 # * select x from t1 union all select x from t2 limit 1; 297 # * select x from t1 union all (select x from t2 limit 1); 298 MODIFIERS_ATTACHED_TO_SET_OP = False 299 INTERVAL_SPANS = False 300 OPTIONAL_ALIAS_TOKEN_CTE = False 301 JOINS_HAVE_EQUAL_PRECEDENCE = True 302 303 FUNCTIONS = { 304 **parser.Parser.FUNCTIONS, 305 "ANY": exp.AnyValue.from_arg_list, 306 "ARRAYSUM": exp.ArraySum.from_arg_list, 307 "COUNTIF": _build_count_if, 308 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 309 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 310 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 311 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 312 "DATE_FORMAT": _build_date_format, 313 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 314 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 315 "FORMATDATETIME": _build_date_format, 316 "JSONEXTRACTSTRING": build_json_extract_path( 317 exp.JSONExtractScalar, zero_based_indexing=False 318 ), 319 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 320 "MAP": parser.build_var_map, 321 "MATCH": exp.RegexpLike.from_arg_list, 322 "RANDCANONICAL": exp.Rand.from_arg_list, 323 "STR_TO_DATE": _build_str_to_date, 324 "TUPLE": exp.Struct.from_arg_list, 325 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 326 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 327 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 328 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 329 "UNIQ": exp.ApproxDistinct.from_arg_list, 330 "XOR": lambda args: exp.Xor(expressions=args), 331 "MD5": exp.MD5Digest.from_arg_list, 332 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 333 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 334 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 335 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 336 } 337 FUNCTIONS.pop("TRANSFORM") 338 339 AGG_FUNCTIONS = { 340 "count", 341 "min", 342 "max", 343 "sum", 344 "avg", 345 "any", 346 "stddevPop", 347 "stddevSamp", 348 "varPop", 349 "varSamp", 350 "corr", 351 "covarPop", 352 "covarSamp", 353 "entropy", 354 "exponentialMovingAverage", 355 "intervalLengthSum", 356 "kolmogorovSmirnovTest", 357 "mannWhitneyUTest", 358 "median", 359 "rankCorr", 360 "sumKahan", 361 "studentTTest", 362 "welchTTest", 363 "anyHeavy", 364 "anyLast", 365 "boundingRatio", 366 "first_value", 367 "last_value", 368 "argMin", 369 "argMax", 370 "avgWeighted", 371 "topK", 372 "topKWeighted", 373 "deltaSum", 374 "deltaSumTimestamp", 375 "groupArray", 376 "groupArrayLast", 377 "groupUniqArray", 378 "groupArrayInsertAt", 379 "groupArrayMovingAvg", 380 "groupArrayMovingSum", 381 "groupArraySample", 382 "groupBitAnd", 383 "groupBitOr", 384 "groupBitXor", 385 "groupBitmap", 386 "groupBitmapAnd", 387 "groupBitmapOr", 388 "groupBitmapXor", 389 "sumWithOverflow", 390 "sumMap", 391 "minMap", 392 "maxMap", 393 "skewSamp", 394 "skewPop", 395 "kurtSamp", 396 "kurtPop", 397 "uniq", 398 "uniqExact", 399 "uniqCombined", 400 "uniqCombined64", 401 "uniqHLL12", 402 "uniqTheta", 403 "quantile", 404 "quantiles", 405 "quantileExact", 406 "quantilesExact", 407 "quantileExactLow", 408 "quantilesExactLow", 409 "quantileExactHigh", 410 "quantilesExactHigh", 411 "quantileExactWeighted", 412 "quantilesExactWeighted", 413 "quantileTiming", 414 "quantilesTiming", 415 "quantileTimingWeighted", 416 "quantilesTimingWeighted", 417 "quantileDeterministic", 418 "quantilesDeterministic", 419 "quantileTDigest", 420 "quantilesTDigest", 421 "quantileTDigestWeighted", 422 "quantilesTDigestWeighted", 423 "quantileBFloat16", 424 "quantilesBFloat16", 425 "quantileBFloat16Weighted", 426 "quantilesBFloat16Weighted", 427 "simpleLinearRegression", 428 "stochasticLinearRegression", 429 "stochasticLogisticRegression", 430 "categoricalInformationValue", 431 "contingency", 432 "cramersV", 433 "cramersVBiasCorrected", 434 "theilsU", 435 "maxIntersections", 436 "maxIntersectionsPosition", 437 "meanZTest", 438 "quantileInterpolatedWeighted", 439 "quantilesInterpolatedWeighted", 440 "quantileGK", 441 "quantilesGK", 442 "sparkBar", 443 "sumCount", 444 "largestTriangleThreeBuckets", 445 "histogram", 446 "sequenceMatch", 447 "sequenceCount", 448 "windowFunnel", 449 "retention", 450 "uniqUpTo", 451 "sequenceNextNode", 452 "exponentialTimeDecayedAvg", 453 } 454 455 AGG_FUNCTIONS_SUFFIXES = [ 456 "If", 457 "Array", 458 "ArrayIf", 459 "Map", 460 "SimpleState", 461 "State", 462 "Merge", 463 "MergeState", 464 "ForEach", 465 "Distinct", 466 "OrDefault", 467 "OrNull", 468 "Resample", 469 "ArgMin", 470 "ArgMax", 471 ] 472 473 FUNC_TOKENS = { 474 *parser.Parser.FUNC_TOKENS, 475 TokenType.AND, 476 TokenType.OR, 477 TokenType.SET, 478 } 479 480 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 481 482 ID_VAR_TOKENS = { 483 *parser.Parser.ID_VAR_TOKENS, 484 TokenType.LIKE, 485 } 486 487 AGG_FUNC_MAPPING = ( 488 lambda functions, suffixes: { 489 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 490 } 491 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 492 493 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 494 495 FUNCTION_PARSERS = { 496 **parser.Parser.FUNCTION_PARSERS, 497 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 498 "QUANTILE": lambda self: self._parse_quantile(), 499 "MEDIAN": lambda self: self._parse_quantile(), 500 "COLUMNS": lambda self: self._parse_columns(), 501 } 502 503 FUNCTION_PARSERS.pop("MATCH") 504 505 PROPERTY_PARSERS = { 506 **parser.Parser.PROPERTY_PARSERS, 507 "ENGINE": lambda self: self._parse_engine_property(), 508 } 509 PROPERTY_PARSERS.pop("DYNAMIC") 510 511 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 512 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 513 514 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 515 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 516 517 RANGE_PARSERS = { 518 **parser.Parser.RANGE_PARSERS, 519 TokenType.GLOBAL: lambda self, this: self._parse_global_in(this), 520 } 521 522 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 523 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 524 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 525 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 526 527 JOIN_KINDS = { 528 *parser.Parser.JOIN_KINDS, 529 TokenType.ANY, 530 TokenType.ASOF, 531 TokenType.ARRAY, 532 } 533 534 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 535 TokenType.ANY, 536 TokenType.ARRAY, 537 TokenType.FINAL, 538 TokenType.FORMAT, 539 TokenType.SETTINGS, 540 } 541 542 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 543 TokenType.FORMAT, 544 } 545 546 LOG_DEFAULTS_TO_LN = True 547 548 QUERY_MODIFIER_PARSERS = { 549 **parser.Parser.QUERY_MODIFIER_PARSERS, 550 TokenType.SETTINGS: lambda self: ( 551 "settings", 552 self._advance() or self._parse_csv(self._parse_assignment), 553 ), 554 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 555 } 556 557 CONSTRAINT_PARSERS = { 558 **parser.Parser.CONSTRAINT_PARSERS, 559 "INDEX": lambda self: self._parse_index_constraint(), 560 "CODEC": lambda self: self._parse_compress(), 561 } 562 563 ALTER_PARSERS = { 564 **parser.Parser.ALTER_PARSERS, 565 "REPLACE": lambda self: self._parse_alter_table_replace(), 566 } 567 568 SCHEMA_UNNAMED_CONSTRAINTS = { 569 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 570 "INDEX", 571 } 572 573 PLACEHOLDER_PARSERS = { 574 **parser.Parser.PLACEHOLDER_PARSERS, 575 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 576 } 577 578 def _parse_engine_property(self) -> exp.EngineProperty: 579 self._match(TokenType.EQ) 580 return self.expression( 581 exp.EngineProperty, 582 this=self._parse_field(any_token=True, anonymous_func=True), 583 ) 584 585 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 586 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 587 return self._parse_lambda() 588 589 def _parse_types( 590 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 591 ) -> t.Optional[exp.Expression]: 592 dtype = super()._parse_types( 593 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 594 ) 595 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 596 # Mark every type as non-nullable which is ClickHouse's default, unless it's 597 # already marked as nullable. This marker helps us transpile types from other 598 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 599 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 600 # fail in ClickHouse without the `Nullable` type constructor. 601 dtype.set("nullable", False) 602 603 return dtype 604 605 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 606 index = self._index 607 this = self._parse_bitwise() 608 if self._match(TokenType.FROM): 609 self._retreat(index) 610 return super()._parse_extract() 611 612 # We return Anonymous here because extract and regexpExtract have different semantics, 613 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 614 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 615 # 616 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 617 self._match(TokenType.COMMA) 618 return self.expression( 619 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 620 ) 621 622 def _parse_assignment(self) -> t.Optional[exp.Expression]: 623 this = super()._parse_assignment() 624 625 if self._match(TokenType.PLACEHOLDER): 626 return self.expression( 627 exp.If, 628 this=this, 629 true=self._parse_assignment(), 630 false=self._match(TokenType.COLON) and self._parse_assignment(), 631 ) 632 633 return this 634 635 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 636 """ 637 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 638 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 639 """ 640 index = self._index 641 642 this = self._parse_id_var() 643 self._match(TokenType.COLON) 644 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 645 self._match_text_seq("IDENTIFIER") and "Identifier" 646 ) 647 648 if not kind: 649 self._retreat(index) 650 return None 651 elif not self._match(TokenType.R_BRACE): 652 self.raise_error("Expecting }") 653 654 if isinstance(this, exp.Identifier) and not this.quoted: 655 this = exp.var(this.name) 656 657 return self.expression(exp.Placeholder, this=this, kind=kind) 658 659 def _parse_bracket( 660 self, this: t.Optional[exp.Expression] = None 661 ) -> t.Optional[exp.Expression]: 662 l_brace = self._match(TokenType.L_BRACE, advance=False) 663 bracket = super()._parse_bracket(this) 664 665 if l_brace and isinstance(bracket, exp.Struct): 666 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 667 for expression in bracket.expressions: 668 if not isinstance(expression, exp.PropertyEQ): 669 break 670 671 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 672 varmap.args["values"].append("expressions", expression.expression) 673 674 return varmap 675 676 return bracket 677 678 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 679 this = super()._parse_in(this) 680 this.set("is_global", is_global) 681 return this 682 683 def _parse_global_in(self, this: t.Optional[exp.Expression]) -> exp.Not | exp.In: 684 is_negated = self._match(TokenType.NOT) 685 this = self._match(TokenType.IN) and self._parse_in(this, is_global=True) 686 return self.expression(exp.Not, this=this) if is_negated else this 687 688 def _parse_table( 689 self, 690 schema: bool = False, 691 joins: bool = False, 692 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 693 parse_bracket: bool = False, 694 is_db_reference: bool = False, 695 parse_partition: bool = False, 696 consume_pipe: bool = False, 697 ) -> t.Optional[exp.Expression]: 698 this = super()._parse_table( 699 schema=schema, 700 joins=joins, 701 alias_tokens=alias_tokens, 702 parse_bracket=parse_bracket, 703 is_db_reference=is_db_reference, 704 ) 705 706 if isinstance(this, exp.Table): 707 inner = this.this 708 alias = this.args.get("alias") 709 710 if isinstance(inner, exp.GenerateSeries) and alias and not alias.columns: 711 alias.set("columns", [exp.to_identifier("generate_series")]) 712 713 if self._match(TokenType.FINAL): 714 this = self.expression(exp.Final, this=this) 715 716 return this 717 718 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 719 return super()._parse_position(haystack_first=True) 720 721 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 722 def _parse_cte(self) -> t.Optional[exp.CTE]: 723 # WITH <identifier> AS <subquery expression> 724 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 725 726 if not cte: 727 # WITH <expression> AS <identifier> 728 cte = self.expression( 729 exp.CTE, 730 this=self._parse_assignment(), 731 alias=self._parse_table_alias(), 732 scalar=True, 733 ) 734 735 return cte 736 737 def _parse_join_parts( 738 self, 739 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 740 is_global = self._match(TokenType.GLOBAL) and self._prev 741 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 742 743 if kind_pre: 744 kind = self._match_set(self.JOIN_KINDS) and self._prev 745 side = self._match_set(self.JOIN_SIDES) and self._prev 746 return is_global, side, kind 747 748 return ( 749 is_global, 750 self._match_set(self.JOIN_SIDES) and self._prev, 751 self._match_set(self.JOIN_KINDS) and self._prev, 752 ) 753 754 def _parse_join( 755 self, skip_join_token: bool = False, parse_bracket: bool = False 756 ) -> t.Optional[exp.Join]: 757 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 758 if join: 759 join.set("global", join.args.pop("method", None)) 760 761 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 762 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 763 if join.kind == "ARRAY": 764 for table in join.find_all(exp.Table): 765 table.replace(table.to_column()) 766 767 return join 768 769 def _parse_function( 770 self, 771 functions: t.Optional[t.Dict[str, t.Callable]] = None, 772 anonymous: bool = False, 773 optional_parens: bool = True, 774 any_token: bool = False, 775 ) -> t.Optional[exp.Expression]: 776 expr = super()._parse_function( 777 functions=functions, 778 anonymous=anonymous, 779 optional_parens=optional_parens, 780 any_token=any_token, 781 ) 782 783 func = expr.this if isinstance(expr, exp.Window) else expr 784 785 # Aggregate functions can be split in 2 parts: <func_name><suffix> 786 parts = ( 787 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 788 ) 789 790 if parts: 791 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 792 params = self._parse_func_params(anon_func) 793 794 kwargs = { 795 "this": anon_func.this, 796 "expressions": anon_func.expressions, 797 } 798 if parts[1]: 799 exp_class: t.Type[exp.Expression] = ( 800 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 801 ) 802 else: 803 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 804 805 kwargs["exp_class"] = exp_class 806 if params: 807 kwargs["params"] = params 808 809 func = self.expression(**kwargs) 810 811 if isinstance(expr, exp.Window): 812 # The window's func was parsed as Anonymous in base parser, fix its 813 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 814 expr.set("this", func) 815 elif params: 816 # Params have blocked super()._parse_function() from parsing the following window 817 # (if that exists) as they're standing between the function call and the window spec 818 expr = self._parse_window(func) 819 else: 820 expr = func 821 822 return expr 823 824 def _parse_func_params( 825 self, this: t.Optional[exp.Func] = None 826 ) -> t.Optional[t.List[exp.Expression]]: 827 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 828 return self._parse_csv(self._parse_lambda) 829 830 if self._match(TokenType.L_PAREN): 831 params = self._parse_csv(self._parse_lambda) 832 self._match_r_paren(this) 833 return params 834 835 return None 836 837 def _parse_quantile(self) -> exp.Quantile: 838 this = self._parse_lambda() 839 params = self._parse_func_params() 840 if params: 841 return self.expression(exp.Quantile, this=params[0], quantile=this) 842 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 843 844 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 845 return super()._parse_wrapped_id_vars(optional=True) 846 847 def _parse_primary_key( 848 self, wrapped_optional: bool = False, in_props: bool = False 849 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 850 return super()._parse_primary_key( 851 wrapped_optional=wrapped_optional or in_props, in_props=in_props 852 ) 853 854 def _parse_on_property(self) -> t.Optional[exp.Expression]: 855 index = self._index 856 if self._match_text_seq("CLUSTER"): 857 this = self._parse_string() or self._parse_id_var() 858 if this: 859 return self.expression(exp.OnCluster, this=this) 860 else: 861 self._retreat(index) 862 return None 863 864 def _parse_index_constraint( 865 self, kind: t.Optional[str] = None 866 ) -> exp.IndexColumnConstraint: 867 # INDEX name1 expr TYPE type1(args) GRANULARITY value 868 this = self._parse_id_var() 869 expression = self._parse_assignment() 870 871 index_type = self._match_text_seq("TYPE") and ( 872 self._parse_function() or self._parse_var() 873 ) 874 875 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 876 877 return self.expression( 878 exp.IndexColumnConstraint, 879 this=this, 880 expression=expression, 881 index_type=index_type, 882 granularity=granularity, 883 ) 884 885 def _parse_partition(self) -> t.Optional[exp.Partition]: 886 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 887 if not self._match(TokenType.PARTITION): 888 return None 889 890 if self._match_text_seq("ID"): 891 # Corresponds to the PARTITION ID <string_value> syntax 892 expressions: t.List[exp.Expression] = [ 893 self.expression(exp.PartitionId, this=self._parse_string()) 894 ] 895 else: 896 expressions = self._parse_expressions() 897 898 return self.expression(exp.Partition, expressions=expressions) 899 900 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 901 partition = self._parse_partition() 902 903 if not partition or not self._match(TokenType.FROM): 904 return None 905 906 return self.expression( 907 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 908 ) 909 910 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 911 if not self._match_text_seq("PROJECTION"): 912 return None 913 914 return self.expression( 915 exp.ProjectionDef, 916 this=self._parse_id_var(), 917 expression=self._parse_wrapped(self._parse_statement), 918 ) 919 920 def _parse_constraint(self) -> t.Optional[exp.Expression]: 921 return super()._parse_constraint() or self._parse_projection_def() 922 923 def _parse_alias( 924 self, this: t.Optional[exp.Expression], explicit: bool = False 925 ) -> t.Optional[exp.Expression]: 926 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 927 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 928 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 929 return this 930 931 return super()._parse_alias(this=this, explicit=explicit) 932 933 def _parse_expression(self) -> t.Optional[exp.Expression]: 934 this = super()._parse_expression() 935 936 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 937 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 938 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 939 self._match(TokenType.R_PAREN) 940 941 return this 942 943 def _parse_columns(self) -> exp.Expression: 944 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 945 946 while self._next and self._match_text_seq(")", "APPLY", "("): 947 self._match(TokenType.R_PAREN) 948 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 949 return this 950 951 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 952 value = super()._parse_value(values=values) 953 if not value: 954 return None 955 956 # In Clickhouse "SELECT * FROM VALUES (1, 2, 3)" generates a table with a single column, in contrast 957 # to other dialects. For this case, we canonicalize the values into a tuple-of-tuples AST if it's not already one. 958 # In INSERT INTO statements the same clause actually references multiple columns (opposite semantics), 959 # but the final result is not altered by the extra parentheses. 960 # Note: Clickhouse allows VALUES([structure], value, ...) so the branch checks for the last expression 961 expressions = value.expressions 962 if values and not isinstance(expressions[-1], exp.Tuple): 963 value.set( 964 "expressions", 965 [self.expression(exp.Tuple, expressions=[expr]) for expr in expressions], 966 ) 967 968 return value
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- ALTER_ALTER_PARSERS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- ALTER_RENAME_REQUIRES_COLUMN
- ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- errors
- sql
970 class Generator(generator.Generator): 971 QUERY_HINTS = False 972 STRUCT_DELIMITER = ("(", ")") 973 NVL2_SUPPORTED = False 974 TABLESAMPLE_REQUIRES_PARENS = False 975 TABLESAMPLE_SIZE_IS_ROWS = False 976 TABLESAMPLE_KEYWORDS = "SAMPLE" 977 LAST_DAY_SUPPORTS_DATE_PART = False 978 CAN_IMPLEMENT_ARRAY_ANY = True 979 SUPPORTS_TO_NUMBER = False 980 JOIN_HINTS = False 981 TABLE_HINTS = False 982 GROUPINGS_SEP = "" 983 SET_OP_MODIFIERS = False 984 ARRAY_SIZE_NAME = "LENGTH" 985 WRAP_DERIVED_VALUES = False 986 987 STRING_TYPE_MAPPING = { 988 exp.DataType.Type.BLOB: "String", 989 exp.DataType.Type.CHAR: "String", 990 exp.DataType.Type.LONGBLOB: "String", 991 exp.DataType.Type.LONGTEXT: "String", 992 exp.DataType.Type.MEDIUMBLOB: "String", 993 exp.DataType.Type.MEDIUMTEXT: "String", 994 exp.DataType.Type.TINYBLOB: "String", 995 exp.DataType.Type.TINYTEXT: "String", 996 exp.DataType.Type.TEXT: "String", 997 exp.DataType.Type.VARBINARY: "String", 998 exp.DataType.Type.VARCHAR: "String", 999 } 1000 1001 SUPPORTED_JSON_PATH_PARTS = { 1002 exp.JSONPathKey, 1003 exp.JSONPathRoot, 1004 exp.JSONPathSubscript, 1005 } 1006 1007 TYPE_MAPPING = { 1008 **generator.Generator.TYPE_MAPPING, 1009 **STRING_TYPE_MAPPING, 1010 exp.DataType.Type.ARRAY: "Array", 1011 exp.DataType.Type.BOOLEAN: "Bool", 1012 exp.DataType.Type.BIGINT: "Int64", 1013 exp.DataType.Type.DATE32: "Date32", 1014 exp.DataType.Type.DATETIME: "DateTime", 1015 exp.DataType.Type.DATETIME2: "DateTime", 1016 exp.DataType.Type.SMALLDATETIME: "DateTime", 1017 exp.DataType.Type.DATETIME64: "DateTime64", 1018 exp.DataType.Type.DECIMAL: "Decimal", 1019 exp.DataType.Type.DECIMAL32: "Decimal32", 1020 exp.DataType.Type.DECIMAL64: "Decimal64", 1021 exp.DataType.Type.DECIMAL128: "Decimal128", 1022 exp.DataType.Type.DECIMAL256: "Decimal256", 1023 exp.DataType.Type.TIMESTAMP: "DateTime", 1024 exp.DataType.Type.TIMESTAMPNTZ: "DateTime", 1025 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 1026 exp.DataType.Type.DOUBLE: "Float64", 1027 exp.DataType.Type.ENUM: "Enum", 1028 exp.DataType.Type.ENUM8: "Enum8", 1029 exp.DataType.Type.ENUM16: "Enum16", 1030 exp.DataType.Type.FIXEDSTRING: "FixedString", 1031 exp.DataType.Type.FLOAT: "Float32", 1032 exp.DataType.Type.INT: "Int32", 1033 exp.DataType.Type.MEDIUMINT: "Int32", 1034 exp.DataType.Type.INT128: "Int128", 1035 exp.DataType.Type.INT256: "Int256", 1036 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 1037 exp.DataType.Type.MAP: "Map", 1038 exp.DataType.Type.NESTED: "Nested", 1039 exp.DataType.Type.NOTHING: "Nothing", 1040 exp.DataType.Type.SMALLINT: "Int16", 1041 exp.DataType.Type.STRUCT: "Tuple", 1042 exp.DataType.Type.TINYINT: "Int8", 1043 exp.DataType.Type.UBIGINT: "UInt64", 1044 exp.DataType.Type.UINT: "UInt32", 1045 exp.DataType.Type.UINT128: "UInt128", 1046 exp.DataType.Type.UINT256: "UInt256", 1047 exp.DataType.Type.USMALLINT: "UInt16", 1048 exp.DataType.Type.UTINYINT: "UInt8", 1049 exp.DataType.Type.IPV4: "IPv4", 1050 exp.DataType.Type.IPV6: "IPv6", 1051 exp.DataType.Type.POINT: "Point", 1052 exp.DataType.Type.RING: "Ring", 1053 exp.DataType.Type.LINESTRING: "LineString", 1054 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 1055 exp.DataType.Type.POLYGON: "Polygon", 1056 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 1057 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 1058 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 1059 exp.DataType.Type.DYNAMIC: "Dynamic", 1060 } 1061 1062 TRANSFORMS = { 1063 **generator.Generator.TRANSFORMS, 1064 exp.AnyValue: rename_func("any"), 1065 exp.ApproxDistinct: rename_func("uniq"), 1066 exp.ArrayConcat: rename_func("arrayConcat"), 1067 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 1068 exp.ArrayRemove: remove_from_array_using_filter, 1069 exp.ArraySum: rename_func("arraySum"), 1070 exp.ArgMax: arg_max_or_min_no_count("argMax"), 1071 exp.ArgMin: arg_max_or_min_no_count("argMin"), 1072 exp.Array: inline_array_sql, 1073 exp.CastToStrType: rename_func("CAST"), 1074 exp.CountIf: rename_func("countIf"), 1075 exp.CompressColumnConstraint: lambda self, 1076 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 1077 exp.ComputedColumnConstraint: lambda self, 1078 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 1079 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 1080 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 1081 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 1082 exp.DateStrToDate: rename_func("toDate"), 1083 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 1084 exp.Explode: rename_func("arrayJoin"), 1085 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 1086 exp.IsNan: rename_func("isNaN"), 1087 exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}", 1088 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 1089 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 1090 exp.JSONPathKey: json_path_key_only_name, 1091 exp.JSONPathRoot: lambda *_: "", 1092 exp.Length: length_or_char_length_sql, 1093 exp.Map: _map_sql, 1094 exp.Median: rename_func("median"), 1095 exp.Nullif: rename_func("nullIf"), 1096 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1097 exp.Pivot: no_pivot_sql, 1098 exp.Quantile: _quantile_sql, 1099 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 1100 exp.Rand: rename_func("randCanonical"), 1101 exp.StartsWith: rename_func("startsWith"), 1102 exp.EndsWith: rename_func("endsWith"), 1103 exp.StrPosition: lambda self, e: strposition_sql( 1104 self, 1105 e, 1106 func_name="POSITION", 1107 supports_position=True, 1108 use_ansi_position=False, 1109 ), 1110 exp.TimeToStr: lambda self, e: self.func( 1111 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 1112 ), 1113 exp.TimeStrToTime: _timestrtotime_sql, 1114 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1115 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1116 exp.VarMap: _map_sql, 1117 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1118 exp.MD5Digest: rename_func("MD5"), 1119 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1120 exp.SHA: rename_func("SHA1"), 1121 exp.SHA2: sha256_sql, 1122 exp.UnixToTime: _unix_to_time_sql, 1123 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 1124 exp.Trim: lambda self, e: trim_sql(self, e, default_trim_type="BOTH"), 1125 exp.Variance: rename_func("varSamp"), 1126 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1127 exp.Stddev: rename_func("stddevSamp"), 1128 exp.Chr: rename_func("CHAR"), 1129 exp.Lag: lambda self, e: self.func( 1130 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1131 ), 1132 exp.Lead: lambda self, e: self.func( 1133 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1134 ), 1135 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1136 rename_func("editDistance") 1137 ), 1138 } 1139 1140 PROPERTIES_LOCATION = { 1141 **generator.Generator.PROPERTIES_LOCATION, 1142 exp.OnCluster: exp.Properties.Location.POST_NAME, 1143 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1144 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1145 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1146 } 1147 1148 # There's no list in docs, but it can be found in Clickhouse code 1149 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1150 ON_CLUSTER_TARGETS = { 1151 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1152 "DATABASE", 1153 "TABLE", 1154 "VIEW", 1155 "DICTIONARY", 1156 "INDEX", 1157 "FUNCTION", 1158 "NAMED COLLECTION", 1159 } 1160 1161 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1162 NON_NULLABLE_TYPES = { 1163 exp.DataType.Type.ARRAY, 1164 exp.DataType.Type.MAP, 1165 exp.DataType.Type.STRUCT, 1166 exp.DataType.Type.POINT, 1167 exp.DataType.Type.RING, 1168 exp.DataType.Type.LINESTRING, 1169 exp.DataType.Type.MULTILINESTRING, 1170 exp.DataType.Type.POLYGON, 1171 exp.DataType.Type.MULTIPOLYGON, 1172 } 1173 1174 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1175 strtodate_sql = self.function_fallback_sql(expression) 1176 1177 if not isinstance(expression.parent, exp.Cast): 1178 # StrToDate returns DATEs in other dialects (eg. postgres), so 1179 # this branch aims to improve the transpilation to clickhouse 1180 return self.cast_sql(exp.cast(expression, "DATE")) 1181 1182 return strtodate_sql 1183 1184 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1185 this = expression.this 1186 1187 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1188 return self.sql(this) 1189 1190 return super().cast_sql(expression, safe_prefix=safe_prefix) 1191 1192 def trycast_sql(self, expression: exp.TryCast) -> str: 1193 dtype = expression.to 1194 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1195 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1196 dtype.set("nullable", True) 1197 1198 return super().cast_sql(expression) 1199 1200 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1201 this = self.json_path_part(expression.this) 1202 return str(int(this) + 1) if is_int(this) else this 1203 1204 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1205 return f"AS {self.sql(expression, 'this')}" 1206 1207 def _any_to_has( 1208 self, 1209 expression: exp.EQ | exp.NEQ, 1210 default: t.Callable[[t.Any], str], 1211 prefix: str = "", 1212 ) -> str: 1213 if isinstance(expression.left, exp.Any): 1214 arr = expression.left 1215 this = expression.right 1216 elif isinstance(expression.right, exp.Any): 1217 arr = expression.right 1218 this = expression.left 1219 else: 1220 return default(expression) 1221 1222 return prefix + self.func("has", arr.this.unnest(), this) 1223 1224 def eq_sql(self, expression: exp.EQ) -> str: 1225 return self._any_to_has(expression, super().eq_sql) 1226 1227 def neq_sql(self, expression: exp.NEQ) -> str: 1228 return self._any_to_has(expression, super().neq_sql, "NOT ") 1229 1230 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1231 # Manually add a flag to make the search case-insensitive 1232 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1233 return self.func("match", expression.this, regex) 1234 1235 def datatype_sql(self, expression: exp.DataType) -> str: 1236 # String is the standard ClickHouse type, every other variant is just an alias. 1237 # Additionally, any supplied length parameter will be ignored. 1238 # 1239 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1240 if expression.this in self.STRING_TYPE_MAPPING: 1241 dtype = "String" 1242 else: 1243 dtype = super().datatype_sql(expression) 1244 1245 # This section changes the type to `Nullable(...)` if the following conditions hold: 1246 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1247 # and change their semantics 1248 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1249 # constraint: "Type of Map key must be a type, that can be represented by integer or 1250 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1251 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1252 parent = expression.parent 1253 nullable = expression.args.get("nullable") 1254 if nullable is True or ( 1255 nullable is None 1256 and not ( 1257 isinstance(parent, exp.DataType) 1258 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1259 and expression.index in (None, 0) 1260 ) 1261 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1262 ): 1263 dtype = f"Nullable({dtype})" 1264 1265 return dtype 1266 1267 def cte_sql(self, expression: exp.CTE) -> str: 1268 if expression.args.get("scalar"): 1269 this = self.sql(expression, "this") 1270 alias = self.sql(expression, "alias") 1271 return f"{this} AS {alias}" 1272 1273 return super().cte_sql(expression) 1274 1275 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1276 return super().after_limit_modifiers(expression) + [ 1277 ( 1278 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1279 if expression.args.get("settings") 1280 else "" 1281 ), 1282 ( 1283 self.seg("FORMAT ") + self.sql(expression, "format") 1284 if expression.args.get("format") 1285 else "" 1286 ), 1287 ] 1288 1289 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1290 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1291 1292 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1293 return f"ON CLUSTER {self.sql(expression, 'this')}" 1294 1295 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1296 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1297 exp.Properties.Location.POST_NAME 1298 ): 1299 this_name = self.sql( 1300 expression.this if isinstance(expression.this, exp.Schema) else expression, 1301 "this", 1302 ) 1303 this_properties = " ".join( 1304 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1305 ) 1306 this_schema = self.schema_columns_sql(expression.this) 1307 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1308 1309 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1310 1311 return super().createable_sql(expression, locations) 1312 1313 def create_sql(self, expression: exp.Create) -> str: 1314 # The comment property comes last in CTAS statements, i.e. after the query 1315 query = expression.expression 1316 if isinstance(query, exp.Query): 1317 comment_prop = expression.find(exp.SchemaCommentProperty) 1318 if comment_prop: 1319 comment_prop.pop() 1320 query.replace(exp.paren(query)) 1321 else: 1322 comment_prop = None 1323 1324 create_sql = super().create_sql(expression) 1325 1326 comment_sql = self.sql(comment_prop) 1327 comment_sql = f" {comment_sql}" if comment_sql else "" 1328 1329 return f"{create_sql}{comment_sql}" 1330 1331 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1332 this = self.indent(self.sql(expression, "this")) 1333 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1334 1335 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1336 this = self.sql(expression, "this") 1337 this = f" {this}" if this else "" 1338 expr = self.sql(expression, "expression") 1339 expr = f" {expr}" if expr else "" 1340 index_type = self.sql(expression, "index_type") 1341 index_type = f" TYPE {index_type}" if index_type else "" 1342 granularity = self.sql(expression, "granularity") 1343 granularity = f" GRANULARITY {granularity}" if granularity else "" 1344 1345 return f"INDEX{this}{expr}{index_type}{granularity}" 1346 1347 def partition_sql(self, expression: exp.Partition) -> str: 1348 return f"PARTITION {self.expressions(expression, flat=True)}" 1349 1350 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1351 return f"ID {self.sql(expression.this)}" 1352 1353 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1354 return ( 1355 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1356 ) 1357 1358 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1359 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1360 1361 def is_sql(self, expression: exp.Is) -> str: 1362 is_sql = super().is_sql(expression) 1363 1364 if isinstance(expression.parent, exp.Not): 1365 # value IS NOT NULL -> NOT (value IS NULL) 1366 is_sql = self.wrap(is_sql) 1367 1368 return is_sql 1369 1370 def in_sql(self, expression: exp.In) -> str: 1371 in_sql = super().in_sql(expression) 1372 1373 if isinstance(expression.parent, exp.Not) and expression.args.get("is_global"): 1374 in_sql = in_sql.replace("GLOBAL IN", "GLOBAL NOT IN", 1) 1375 1376 return in_sql 1377 1378 def not_sql(self, expression: exp.Not) -> str: 1379 if isinstance(expression.this, exp.In) and expression.this.args.get("is_global"): 1380 # let `GLOBAL IN` child interpose `NOT` 1381 return self.sql(expression, "this") 1382 1383 return super().not_sql(expression) 1384 1385 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1386 # If the VALUES clause contains tuples of expressions, we need to treat it 1387 # as a table since Clickhouse will automatically alias it as such. 1388 alias = expression.args.get("alias") 1389 1390 if alias and alias.args.get("columns") and expression.expressions: 1391 values = expression.expressions[0].expressions 1392 values_as_table = any(isinstance(value, exp.Tuple) for value in values) 1393 else: 1394 values_as_table = True 1395 1396 return super().values_sql(expression, values_as_table=values_as_table)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1174 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1175 strtodate_sql = self.function_fallback_sql(expression) 1176 1177 if not isinstance(expression.parent, exp.Cast): 1178 # StrToDate returns DATEs in other dialects (eg. postgres), so 1179 # this branch aims to improve the transpilation to clickhouse 1180 return self.cast_sql(exp.cast(expression, "DATE")) 1181 1182 return strtodate_sql
1184 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1185 this = expression.this 1186 1187 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1188 return self.sql(this) 1189 1190 return super().cast_sql(expression, safe_prefix=safe_prefix)
1192 def trycast_sql(self, expression: exp.TryCast) -> str: 1193 dtype = expression.to 1194 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1195 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1196 dtype.set("nullable", True) 1197 1198 return super().cast_sql(expression)
1235 def datatype_sql(self, expression: exp.DataType) -> str: 1236 # String is the standard ClickHouse type, every other variant is just an alias. 1237 # Additionally, any supplied length parameter will be ignored. 1238 # 1239 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1240 if expression.this in self.STRING_TYPE_MAPPING: 1241 dtype = "String" 1242 else: 1243 dtype = super().datatype_sql(expression) 1244 1245 # This section changes the type to `Nullable(...)` if the following conditions hold: 1246 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1247 # and change their semantics 1248 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1249 # constraint: "Type of Map key must be a type, that can be represented by integer or 1250 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1251 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1252 parent = expression.parent 1253 nullable = expression.args.get("nullable") 1254 if nullable is True or ( 1255 nullable is None 1256 and not ( 1257 isinstance(parent, exp.DataType) 1258 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1259 and expression.index in (None, 0) 1260 ) 1261 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1262 ): 1263 dtype = f"Nullable({dtype})" 1264 1265 return dtype
1275 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1276 return super().after_limit_modifiers(expression) + [ 1277 ( 1278 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1279 if expression.args.get("settings") 1280 else "" 1281 ), 1282 ( 1283 self.seg("FORMAT ") + self.sql(expression, "format") 1284 if expression.args.get("format") 1285 else "" 1286 ), 1287 ]
1295 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1296 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1297 exp.Properties.Location.POST_NAME 1298 ): 1299 this_name = self.sql( 1300 expression.this if isinstance(expression.this, exp.Schema) else expression, 1301 "this", 1302 ) 1303 this_properties = " ".join( 1304 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1305 ) 1306 this_schema = self.schema_columns_sql(expression.this) 1307 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1308 1309 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1310 1311 return super().createable_sql(expression, locations)
1313 def create_sql(self, expression: exp.Create) -> str: 1314 # The comment property comes last in CTAS statements, i.e. after the query 1315 query = expression.expression 1316 if isinstance(query, exp.Query): 1317 comment_prop = expression.find(exp.SchemaCommentProperty) 1318 if comment_prop: 1319 comment_prop.pop() 1320 query.replace(exp.paren(query)) 1321 else: 1322 comment_prop = None 1323 1324 create_sql = super().create_sql(expression) 1325 1326 comment_sql = self.sql(comment_prop) 1327 comment_sql = f" {comment_sql}" if comment_sql else "" 1328 1329 return f"{create_sql}{comment_sql}"
1335 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1336 this = self.sql(expression, "this") 1337 this = f" {this}" if this else "" 1338 expr = self.sql(expression, "expression") 1339 expr = f" {expr}" if expr else "" 1340 index_type = self.sql(expression, "index_type") 1341 index_type = f" TYPE {index_type}" if index_type else "" 1342 granularity = self.sql(expression, "granularity") 1343 granularity = f" GRANULARITY {granularity}" if granularity else "" 1344 1345 return f"INDEX{this}{expr}{index_type}{granularity}"
1385 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1386 # If the VALUES clause contains tuples of expressions, we need to treat it 1387 # as a table since Clickhouse will automatically alias it as such. 1388 alias = expression.args.get("alias") 1389 1390 if alias and alias.args.get("columns") and expression.expressions: 1391 values = expression.expressions[0].expressions 1392 values_as_table = any(isinstance(value, exp.Tuple) for value in values) 1393 else: 1394 values_as_table = True 1395 1396 return super().values_sql(expression, values_as_table=values_as_table)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SUPPORTS_WINDOW_EXCLUDE
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- NORMALIZE_EXTRACT_DATE_PARTS
- PARSE_JSON_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- get_put_sql
- translatecharacters_sql