sqlglot.dialects.clickhouse
1from __future__ import annotations 2import typing as t 3import datetime 4from sqlglot import exp, generator, parser, tokens 5from sqlglot._typing import E 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 arg_max_or_min_no_count, 10 build_date_delta, 11 build_formatted_time, 12 build_like, 13 inline_array_sql, 14 json_extract_segments, 15 json_path_key_only_name, 16 length_or_char_length_sql, 17 no_pivot_sql, 18 build_json_extract_path, 19 rename_func, 20 remove_from_array_using_filter, 21 sha256_sql, 22 strposition_sql, 23 var_map_sql, 24 timestamptrunc_sql, 25 unit_to_var, 26 trim_sql, 27 sha2_digest_sql, 28) 29from sqlglot.generator import Generator 30from sqlglot.helper import is_int, seq_get 31from sqlglot.tokens import Token, TokenType 32from sqlglot.generator import unsupported_args 33 34DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd] 35 36 37def _build_datetime_format( 38 expr_type: t.Type[E], 39) -> t.Callable[[t.List], E]: 40 def _builder(args: t.List) -> E: 41 expr = build_formatted_time(expr_type, "clickhouse")(args) 42 43 timezone = seq_get(args, 2) 44 if timezone: 45 expr.set("zone", timezone) 46 47 return expr 48 49 return _builder 50 51 52def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str: 53 scale = expression.args.get("scale") 54 timestamp = expression.this 55 56 if scale in (None, exp.UnixToTime.SECONDS): 57 return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 58 if scale == exp.UnixToTime.MILLIS: 59 return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 60 if scale == exp.UnixToTime.MICROS: 61 return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 62 if scale == exp.UnixToTime.NANOS: 63 return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 64 65 return self.func( 66 "fromUnixTimestamp", 67 exp.cast( 68 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 69 ), 70 ) 71 72 73def _lower_func(sql: str) -> str: 74 index = sql.index("(") 75 return sql[:index].lower() + sql[index:] 76 77 78def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str: 79 quantile = expression.args["quantile"] 80 args = f"({self.sql(expression, 'this')})" 81 82 if isinstance(quantile, exp.Array): 83 func = self.func("quantiles", *quantile) 84 else: 85 func = self.func("quantile", quantile) 86 87 return func + args 88 89 90def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: 91 if len(args) == 1: 92 return exp.CountIf(this=seq_get(args, 0)) 93 94 return exp.CombinedAggFunc(this="countIf", expressions=args) 95 96 97def _build_str_to_date(args: t.List) -> exp.Cast | exp.Anonymous: 98 if len(args) == 3: 99 return exp.Anonymous(this="STR_TO_DATE", expressions=args) 100 101 strtodate = exp.StrToDate.from_arg_list(args) 102 return exp.cast(strtodate, exp.DataType.build(exp.DataType.Type.DATETIME)) 103 104 105def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]: 106 def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str: 107 if not expression.unit: 108 return rename_func(name)(self, expression) 109 110 return self.func( 111 name, 112 unit_to_var(expression), 113 expression.expression, 114 expression.this, 115 expression.args.get("zone"), 116 ) 117 118 return _delta_sql 119 120 121def _timestrtotime_sql(self: ClickHouse.Generator, expression: exp.TimeStrToTime): 122 ts = expression.this 123 124 tz = expression.args.get("zone") 125 if tz and isinstance(ts, exp.Literal): 126 # Clickhouse will not accept timestamps that include a UTC offset, so we must remove them. 127 # The first step to removing is parsing the string with `datetime.datetime.fromisoformat`. 128 # 129 # In python <3.11, `fromisoformat()` can only parse timestamps of millisecond (3 digit) 130 # or microsecond (6 digit) precision. It will error if passed any other number of fractional 131 # digits, so we extract the fractional seconds and pad to 6 digits before parsing. 132 ts_string = ts.name.strip() 133 134 # separate [date and time] from [fractional seconds and UTC offset] 135 ts_parts = ts_string.split(".") 136 if len(ts_parts) == 2: 137 # separate fractional seconds and UTC offset 138 offset_sep = "+" if "+" in ts_parts[1] else "-" 139 ts_frac_parts = ts_parts[1].split(offset_sep) 140 num_frac_parts = len(ts_frac_parts) 141 142 # pad to 6 digits if fractional seconds present 143 ts_frac_parts[0] = ts_frac_parts[0].ljust(6, "0") 144 ts_string = "".join( 145 [ 146 ts_parts[0], # date and time 147 ".", 148 ts_frac_parts[0], # fractional seconds 149 offset_sep if num_frac_parts > 1 else "", 150 ts_frac_parts[1] if num_frac_parts > 1 else "", # utc offset (if present) 151 ] 152 ) 153 154 # return literal with no timezone, eg turn '2020-01-01 12:13:14-08:00' into '2020-01-01 12:13:14' 155 # this is because Clickhouse encodes the timezone as a data type parameter and throws an error if 156 # it's part of the timestamp string 157 ts_without_tz = ( 158 datetime.datetime.fromisoformat(ts_string).replace(tzinfo=None).isoformat(sep=" ") 159 ) 160 ts = exp.Literal.string(ts_without_tz) 161 162 # Non-nullable DateTime64 with microsecond precision 163 expressions = [exp.DataTypeParam(this=tz)] if tz else [] 164 datatype = exp.DataType.build( 165 exp.DataType.Type.DATETIME64, 166 expressions=[exp.DataTypeParam(this=exp.Literal.number(6)), *expressions], 167 nullable=False, 168 ) 169 170 return self.sql(exp.cast(ts, datatype, dialect=self.dialect)) 171 172 173def _map_sql(self: ClickHouse.Generator, expression: exp.Map | exp.VarMap) -> str: 174 if not (expression.parent and expression.parent.arg_key == "settings"): 175 return _lower_func(var_map_sql(self, expression)) 176 177 keys = expression.args.get("keys") 178 values = expression.args.get("values") 179 180 if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array): 181 self.unsupported("Cannot convert array columns into map.") 182 return "" 183 184 args = [] 185 for key, value in zip(keys.expressions, values.expressions): 186 args.append(f"{self.sql(key)}: {self.sql(value)}") 187 188 csv_args = ", ".join(args) 189 190 return f"{{{csv_args}}}" 191 192 193def _build_timestamp_trunc(unit: str) -> t.Callable[[t.List], exp.TimestampTrunc]: 194 return lambda args: exp.TimestampTrunc( 195 this=seq_get(args, 0), unit=exp.var(unit), zone=seq_get(args, 1) 196 ) 197 198 199def _build_split_by_char(args: t.List) -> exp.Split | exp.Anonymous: 200 sep = seq_get(args, 0) 201 if isinstance(sep, exp.Literal): 202 sep_value = sep.to_py() 203 if isinstance(sep_value, str) and len(sep_value.encode("utf-8")) == 1: 204 return _build_split(exp.Split)(args) 205 206 return exp.Anonymous(this="splitByChar", expressions=args) 207 208 209def _build_split(exp_class: t.Type[E]) -> t.Callable[[t.List], E]: 210 return lambda args: exp_class( 211 this=seq_get(args, 1), expression=seq_get(args, 0), limit=seq_get(args, 2) 212 ) 213 214 215# Skip the 'week' unit since ClickHouse's toStartOfWeek 216# uses an extra mode argument to specify the first day of the week 217TIMESTAMP_TRUNC_UNITS = { 218 "MICROSECOND", 219 "MILLISECOND", 220 "SECOND", 221 "MINUTE", 222 "HOUR", 223 "DAY", 224 "MONTH", 225 "QUARTER", 226 "YEAR", 227} 228 229 230class ClickHouse(Dialect): 231 INDEX_OFFSET = 1 232 NORMALIZE_FUNCTIONS: bool | str = False 233 NULL_ORDERING = "nulls_are_last" 234 SUPPORTS_USER_DEFINED_TYPES = False 235 SAFE_DIVISION = True 236 LOG_BASE_FIRST: t.Optional[bool] = None 237 FORCE_EARLY_ALIAS_REF_EXPANSION = True 238 PRESERVE_ORIGINAL_NAMES = True 239 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 240 IDENTIFIERS_CAN_START_WITH_DIGIT = True 241 HEX_STRING_IS_INTEGER_TYPE = True 242 243 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 244 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 245 246 UNESCAPED_SEQUENCES = { 247 "\\0": "\0", 248 } 249 250 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 251 252 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 253 exp.Except: False, 254 exp.Intersect: False, 255 exp.Union: None, 256 } 257 258 def generate_values_aliases(self, expression: exp.Values) -> t.List[exp.Identifier]: 259 # Clickhouse allows VALUES to have an embedded structure e.g: 260 # VALUES('person String, place String', ('Noah', 'Paris'), ...) 261 # In this case, we don't want to qualify the columns 262 values = expression.expressions[0].expressions 263 264 structure = ( 265 values[0] 266 if (len(values) > 1 and values[0].is_string and isinstance(values[1], exp.Tuple)) 267 else None 268 ) 269 if structure: 270 # Split each column definition into the column name e.g: 271 # 'person String, place String' -> ['person', 'place'] 272 structure_coldefs = [coldef.strip() for coldef in structure.name.split(",")] 273 column_aliases = [ 274 exp.to_identifier(coldef.split(" ")[0]) for coldef in structure_coldefs 275 ] 276 else: 277 # Default column aliases in CH are "c1", "c2", etc. 278 column_aliases = [ 279 exp.to_identifier(f"c{i + 1}") for i in range(len(values[0].expressions)) 280 ] 281 282 return column_aliases 283 284 class Tokenizer(tokens.Tokenizer): 285 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 286 IDENTIFIERS = ['"', "`"] 287 IDENTIFIER_ESCAPES = ["\\"] 288 STRING_ESCAPES = ["'", "\\"] 289 BIT_STRINGS = [("0b", "")] 290 HEX_STRINGS = [("0x", ""), ("0X", "")] 291 HEREDOC_STRINGS = ["$"] 292 293 KEYWORDS = { 294 **tokens.Tokenizer.KEYWORDS, 295 ".:": TokenType.DOTCOLON, 296 "ATTACH": TokenType.COMMAND, 297 "DATE32": TokenType.DATE32, 298 "DATETIME64": TokenType.DATETIME64, 299 "DICTIONARY": TokenType.DICTIONARY, 300 "DYNAMIC": TokenType.DYNAMIC, 301 "ENUM8": TokenType.ENUM8, 302 "ENUM16": TokenType.ENUM16, 303 "EXCHANGE": TokenType.COMMAND, 304 "FINAL": TokenType.FINAL, 305 "FIXEDSTRING": TokenType.FIXEDSTRING, 306 "FLOAT32": TokenType.FLOAT, 307 "FLOAT64": TokenType.DOUBLE, 308 "GLOBAL": TokenType.GLOBAL, 309 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 310 "MAP": TokenType.MAP, 311 "NESTED": TokenType.NESTED, 312 "NOTHING": TokenType.NOTHING, 313 "SAMPLE": TokenType.TABLE_SAMPLE, 314 "TUPLE": TokenType.STRUCT, 315 "UINT16": TokenType.USMALLINT, 316 "UINT32": TokenType.UINT, 317 "UINT64": TokenType.UBIGINT, 318 "UINT8": TokenType.UTINYINT, 319 "IPV4": TokenType.IPV4, 320 "IPV6": TokenType.IPV6, 321 "POINT": TokenType.POINT, 322 "RING": TokenType.RING, 323 "LINESTRING": TokenType.LINESTRING, 324 "MULTILINESTRING": TokenType.MULTILINESTRING, 325 "POLYGON": TokenType.POLYGON, 326 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 327 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 328 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 329 "SYSTEM": TokenType.COMMAND, 330 "PREWHERE": TokenType.PREWHERE, 331 } 332 KEYWORDS.pop("/*+") 333 334 SINGLE_TOKENS = { 335 **tokens.Tokenizer.SINGLE_TOKENS, 336 "$": TokenType.HEREDOC_STRING, 337 } 338 339 class Parser(parser.Parser): 340 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 341 # * select x from t1 union all select x from t2 limit 1; 342 # * select x from t1 union all (select x from t2 limit 1); 343 MODIFIERS_ATTACHED_TO_SET_OP = False 344 INTERVAL_SPANS = False 345 OPTIONAL_ALIAS_TOKEN_CTE = False 346 JOINS_HAVE_EQUAL_PRECEDENCE = True 347 348 FUNCTIONS = { 349 **parser.Parser.FUNCTIONS, 350 **{ 351 f"TOSTARTOF{unit}": _build_timestamp_trunc(unit=unit) 352 for unit in TIMESTAMP_TRUNC_UNITS 353 }, 354 "ANY": exp.AnyValue.from_arg_list, 355 "ARRAYSUM": exp.ArraySum.from_arg_list, 356 "ARRAYREVERSE": exp.ArrayReverse.from_arg_list, 357 "ARRAYSLICE": exp.ArraySlice.from_arg_list, 358 "CURRENTDATABASE": exp.CurrentDatabase.from_arg_list, 359 "CURRENTSCHEMAS": exp.CurrentSchemas.from_arg_list, 360 "COUNTIF": _build_count_if, 361 "COSINEDISTANCE": exp.CosineDistance.from_arg_list, 362 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 363 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 364 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 365 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 366 "DATE_FORMAT": _build_datetime_format(exp.TimeToStr), 367 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 368 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 369 "FORMATDATETIME": _build_datetime_format(exp.TimeToStr), 370 "HAS": exp.ArrayContains.from_arg_list, 371 "ILIKE": build_like(exp.ILike), 372 "JSONEXTRACTSTRING": build_json_extract_path( 373 exp.JSONExtractScalar, zero_based_indexing=False 374 ), 375 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 376 "LIKE": build_like(exp.Like), 377 "L2Distance": exp.EuclideanDistance.from_arg_list, 378 "MAP": parser.build_var_map, 379 "MATCH": exp.RegexpLike.from_arg_list, 380 "NOTLIKE": build_like(exp.Like, not_like=True), 381 "PARSEDATETIME": _build_datetime_format(exp.ParseDatetime), 382 "RANDCANONICAL": exp.Rand.from_arg_list, 383 "STR_TO_DATE": _build_str_to_date, 384 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 385 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 386 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 387 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 388 "TOMONDAY": _build_timestamp_trunc("WEEK"), 389 "UNIQ": exp.ApproxDistinct.from_arg_list, 390 "XOR": lambda args: exp.Xor(expressions=args), 391 "MD5": exp.MD5Digest.from_arg_list, 392 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 393 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 394 "SPLITBYCHAR": _build_split_by_char, 395 "SPLITBYREGEXP": _build_split(exp.RegexpSplit), 396 "SPLITBYSTRING": _build_split(exp.Split), 397 "SUBSTRINGINDEX": exp.SubstringIndex.from_arg_list, 398 "TOTYPENAME": exp.Typeof.from_arg_list, 399 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 400 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 401 } 402 FUNCTIONS.pop("TRANSFORM") 403 FUNCTIONS.pop("APPROX_TOP_SUM") 404 405 AGG_FUNCTIONS = { 406 "count", 407 "min", 408 "max", 409 "sum", 410 "avg", 411 "any", 412 "stddevPop", 413 "stddevSamp", 414 "varPop", 415 "varSamp", 416 "corr", 417 "covarPop", 418 "covarSamp", 419 "entropy", 420 "exponentialMovingAverage", 421 "intervalLengthSum", 422 "kolmogorovSmirnovTest", 423 "mannWhitneyUTest", 424 "median", 425 "rankCorr", 426 "sumKahan", 427 "studentTTest", 428 "welchTTest", 429 "anyHeavy", 430 "anyLast", 431 "boundingRatio", 432 "first_value", 433 "last_value", 434 "argMin", 435 "argMax", 436 "avgWeighted", 437 "topK", 438 "approx_top_sum", 439 "topKWeighted", 440 "deltaSum", 441 "deltaSumTimestamp", 442 "groupArray", 443 "groupArrayLast", 444 "groupUniqArray", 445 "groupArrayInsertAt", 446 "groupArrayMovingAvg", 447 "groupArrayMovingSum", 448 "groupArraySample", 449 "groupBitAnd", 450 "groupBitOr", 451 "groupBitXor", 452 "groupBitmap", 453 "groupBitmapAnd", 454 "groupBitmapOr", 455 "groupBitmapXor", 456 "sumWithOverflow", 457 "sumMap", 458 "minMap", 459 "maxMap", 460 "skewSamp", 461 "skewPop", 462 "kurtSamp", 463 "kurtPop", 464 "uniq", 465 "uniqExact", 466 "uniqCombined", 467 "uniqCombined64", 468 "uniqHLL12", 469 "uniqTheta", 470 "quantile", 471 "quantiles", 472 "quantileExact", 473 "quantilesExact", 474 "quantilesExactExclusive", 475 "quantileExactLow", 476 "quantilesExactLow", 477 "quantileExactHigh", 478 "quantilesExactHigh", 479 "quantileExactWeighted", 480 "quantilesExactWeighted", 481 "quantileTiming", 482 "quantilesTiming", 483 "quantileTimingWeighted", 484 "quantilesTimingWeighted", 485 "quantileDeterministic", 486 "quantilesDeterministic", 487 "quantileTDigest", 488 "quantilesTDigest", 489 "quantileTDigestWeighted", 490 "quantilesTDigestWeighted", 491 "quantileBFloat16", 492 "quantilesBFloat16", 493 "quantileBFloat16Weighted", 494 "quantilesBFloat16Weighted", 495 "simpleLinearRegression", 496 "stochasticLinearRegression", 497 "stochasticLogisticRegression", 498 "categoricalInformationValue", 499 "contingency", 500 "cramersV", 501 "cramersVBiasCorrected", 502 "theilsU", 503 "maxIntersections", 504 "maxIntersectionsPosition", 505 "meanZTest", 506 "quantileInterpolatedWeighted", 507 "quantilesInterpolatedWeighted", 508 "quantileGK", 509 "quantilesGK", 510 "sparkBar", 511 "sumCount", 512 "largestTriangleThreeBuckets", 513 "histogram", 514 "sequenceMatch", 515 "sequenceCount", 516 "windowFunnel", 517 "retention", 518 "uniqUpTo", 519 "sequenceNextNode", 520 "exponentialTimeDecayedAvg", 521 } 522 523 AGG_FUNCTIONS_SUFFIXES = [ 524 "If", 525 "Array", 526 "ArrayIf", 527 "Map", 528 "SimpleState", 529 "State", 530 "Merge", 531 "MergeState", 532 "ForEach", 533 "Distinct", 534 "OrDefault", 535 "OrNull", 536 "Resample", 537 "ArgMin", 538 "ArgMax", 539 ] 540 541 FUNC_TOKENS = { 542 *parser.Parser.FUNC_TOKENS, 543 TokenType.AND, 544 TokenType.OR, 545 TokenType.SET, 546 } 547 548 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 549 550 ID_VAR_TOKENS = { 551 *parser.Parser.ID_VAR_TOKENS, 552 TokenType.LIKE, 553 } 554 555 AGG_FUNC_MAPPING = ( 556 lambda functions, suffixes: { 557 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 558 } 559 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 560 561 FUNCTION_PARSERS = { 562 **parser.Parser.FUNCTION_PARSERS, 563 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 564 "QUANTILE": lambda self: self._parse_quantile(), 565 "MEDIAN": lambda self: self._parse_quantile(), 566 "COLUMNS": lambda self: self._parse_columns(), 567 "TUPLE": lambda self: exp.Struct.from_arg_list(self._parse_function_args(alias=True)), 568 "AND": lambda self: exp.and_(*self._parse_function_args(alias=False)), 569 "OR": lambda self: exp.or_(*self._parse_function_args(alias=False)), 570 } 571 572 FUNCTION_PARSERS.pop("MATCH") 573 574 PROPERTY_PARSERS = { 575 **parser.Parser.PROPERTY_PARSERS, 576 "ENGINE": lambda self: self._parse_engine_property(), 577 } 578 PROPERTY_PARSERS.pop("DYNAMIC") 579 580 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 581 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 582 583 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 584 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 585 586 RANGE_PARSERS = { 587 **parser.Parser.RANGE_PARSERS, 588 TokenType.GLOBAL: lambda self, this: self._parse_global_in(this), 589 } 590 591 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 592 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 593 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 594 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 595 596 JOIN_KINDS = { 597 *parser.Parser.JOIN_KINDS, 598 TokenType.ANY, 599 TokenType.ASOF, 600 TokenType.ARRAY, 601 } 602 603 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 604 TokenType.ANY, 605 TokenType.ARRAY, 606 TokenType.FINAL, 607 TokenType.FORMAT, 608 TokenType.SETTINGS, 609 } 610 611 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 612 TokenType.FORMAT, 613 } 614 615 LOG_DEFAULTS_TO_LN = True 616 617 QUERY_MODIFIER_PARSERS = { 618 **parser.Parser.QUERY_MODIFIER_PARSERS, 619 TokenType.SETTINGS: lambda self: ( 620 "settings", 621 self._advance() or self._parse_csv(self._parse_assignment), 622 ), 623 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 624 } 625 626 CONSTRAINT_PARSERS = { 627 **parser.Parser.CONSTRAINT_PARSERS, 628 "INDEX": lambda self: self._parse_index_constraint(), 629 "CODEC": lambda self: self._parse_compress(), 630 } 631 632 ALTER_PARSERS = { 633 **parser.Parser.ALTER_PARSERS, 634 "REPLACE": lambda self: self._parse_alter_table_replace(), 635 } 636 637 SCHEMA_UNNAMED_CONSTRAINTS = { 638 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 639 "INDEX", 640 } 641 642 PLACEHOLDER_PARSERS = { 643 **parser.Parser.PLACEHOLDER_PARSERS, 644 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 645 } 646 647 def _parse_engine_property(self) -> exp.EngineProperty: 648 self._match(TokenType.EQ) 649 return self.expression( 650 exp.EngineProperty, 651 this=self._parse_field(any_token=True, anonymous_func=True), 652 ) 653 654 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 655 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 656 return self._parse_lambda() 657 658 def _parse_types( 659 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 660 ) -> t.Optional[exp.Expression]: 661 dtype = super()._parse_types( 662 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 663 ) 664 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 665 # Mark every type as non-nullable which is ClickHouse's default, unless it's 666 # already marked as nullable. This marker helps us transpile types from other 667 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 668 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 669 # fail in ClickHouse without the `Nullable` type constructor. 670 dtype.set("nullable", False) 671 672 return dtype 673 674 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 675 index = self._index 676 this = self._parse_bitwise() 677 if self._match(TokenType.FROM): 678 self._retreat(index) 679 return super()._parse_extract() 680 681 # We return Anonymous here because extract and regexpExtract have different semantics, 682 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 683 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 684 # 685 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 686 self._match(TokenType.COMMA) 687 return self.expression( 688 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 689 ) 690 691 def _parse_assignment(self) -> t.Optional[exp.Expression]: 692 this = super()._parse_assignment() 693 694 if self._match(TokenType.PLACEHOLDER): 695 return self.expression( 696 exp.If, 697 this=this, 698 true=self._parse_assignment(), 699 false=self._match(TokenType.COLON) and self._parse_assignment(), 700 ) 701 702 return this 703 704 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 705 """ 706 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 707 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 708 """ 709 index = self._index 710 711 this = self._parse_id_var() 712 self._match(TokenType.COLON) 713 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 714 self._match_text_seq("IDENTIFIER") and "Identifier" 715 ) 716 717 if not kind: 718 self._retreat(index) 719 return None 720 elif not self._match(TokenType.R_BRACE): 721 self.raise_error("Expecting }") 722 723 if isinstance(this, exp.Identifier) and not this.quoted: 724 this = exp.var(this.name) 725 726 return self.expression(exp.Placeholder, this=this, kind=kind) 727 728 def _parse_bracket( 729 self, this: t.Optional[exp.Expression] = None 730 ) -> t.Optional[exp.Expression]: 731 l_brace = self._match(TokenType.L_BRACE, advance=False) 732 bracket = super()._parse_bracket(this) 733 734 if l_brace and isinstance(bracket, exp.Struct): 735 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 736 for expression in bracket.expressions: 737 if not isinstance(expression, exp.PropertyEQ): 738 break 739 740 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 741 varmap.args["values"].append("expressions", expression.expression) 742 743 return varmap 744 745 return bracket 746 747 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 748 this = super()._parse_in(this) 749 this.set("is_global", is_global) 750 return this 751 752 def _parse_global_in(self, this: t.Optional[exp.Expression]) -> exp.Not | exp.In: 753 is_negated = self._match(TokenType.NOT) 754 this = self._match(TokenType.IN) and self._parse_in(this, is_global=True) 755 return self.expression(exp.Not, this=this) if is_negated else this 756 757 def _parse_table( 758 self, 759 schema: bool = False, 760 joins: bool = False, 761 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 762 parse_bracket: bool = False, 763 is_db_reference: bool = False, 764 parse_partition: bool = False, 765 consume_pipe: bool = False, 766 ) -> t.Optional[exp.Expression]: 767 this = super()._parse_table( 768 schema=schema, 769 joins=joins, 770 alias_tokens=alias_tokens, 771 parse_bracket=parse_bracket, 772 is_db_reference=is_db_reference, 773 ) 774 775 if isinstance(this, exp.Table): 776 inner = this.this 777 alias = this.args.get("alias") 778 779 if isinstance(inner, exp.GenerateSeries) and alias and not alias.columns: 780 alias.set("columns", [exp.to_identifier("generate_series")]) 781 782 if self._match(TokenType.FINAL): 783 this = self.expression(exp.Final, this=this) 784 785 return this 786 787 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 788 return super()._parse_position(haystack_first=True) 789 790 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 791 def _parse_cte(self) -> t.Optional[exp.CTE]: 792 # WITH <identifier> AS <subquery expression> 793 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 794 795 if not cte: 796 # WITH <expression> AS <identifier> 797 cte = self.expression( 798 exp.CTE, 799 this=self._parse_assignment(), 800 alias=self._parse_table_alias(), 801 scalar=True, 802 ) 803 804 return cte 805 806 def _parse_join_parts( 807 self, 808 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 809 is_global = self._match(TokenType.GLOBAL) and self._prev 810 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 811 812 if kind_pre: 813 kind = self._match_set(self.JOIN_KINDS) and self._prev 814 side = self._match_set(self.JOIN_SIDES) and self._prev 815 return is_global, side, kind 816 817 return ( 818 is_global, 819 self._match_set(self.JOIN_SIDES) and self._prev, 820 self._match_set(self.JOIN_KINDS) and self._prev, 821 ) 822 823 def _parse_join( 824 self, skip_join_token: bool = False, parse_bracket: bool = False 825 ) -> t.Optional[exp.Join]: 826 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 827 if join: 828 method = join.args.get("method") 829 join.set("method", None) 830 join.set("global_", method) 831 832 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 833 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 834 if join.kind == "ARRAY": 835 for table in join.find_all(exp.Table): 836 table.replace(table.to_column()) 837 838 return join 839 840 def _parse_function( 841 self, 842 functions: t.Optional[t.Dict[str, t.Callable]] = None, 843 anonymous: bool = False, 844 optional_parens: bool = True, 845 any_token: bool = False, 846 ) -> t.Optional[exp.Expression]: 847 expr = super()._parse_function( 848 functions=functions, 849 anonymous=anonymous, 850 optional_parens=optional_parens, 851 any_token=any_token, 852 ) 853 854 func = expr.this if isinstance(expr, exp.Window) else expr 855 856 # Aggregate functions can be split in 2 parts: <func_name><suffix> 857 parts = ( 858 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 859 ) 860 861 if parts: 862 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 863 params = self._parse_func_params(anon_func) 864 865 kwargs = { 866 "this": anon_func.this, 867 "expressions": anon_func.expressions, 868 } 869 if parts[1]: 870 exp_class: t.Type[exp.Expression] = ( 871 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 872 ) 873 else: 874 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 875 876 kwargs["exp_class"] = exp_class 877 if params: 878 kwargs["params"] = params 879 880 func = self.expression(**kwargs) 881 882 if isinstance(expr, exp.Window): 883 # The window's func was parsed as Anonymous in base parser, fix its 884 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 885 expr.set("this", func) 886 elif params: 887 # Params have blocked super()._parse_function() from parsing the following window 888 # (if that exists) as they're standing between the function call and the window spec 889 expr = self._parse_window(func) 890 else: 891 expr = func 892 893 return expr 894 895 def _parse_func_params( 896 self, this: t.Optional[exp.Func] = None 897 ) -> t.Optional[t.List[exp.Expression]]: 898 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 899 return self._parse_csv(self._parse_lambda) 900 901 if self._match(TokenType.L_PAREN): 902 params = self._parse_csv(self._parse_lambda) 903 self._match_r_paren(this) 904 return params 905 906 return None 907 908 def _parse_quantile(self) -> exp.Quantile: 909 this = self._parse_lambda() 910 params = self._parse_func_params() 911 if params: 912 return self.expression(exp.Quantile, this=params[0], quantile=this) 913 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 914 915 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 916 return super()._parse_wrapped_id_vars(optional=True) 917 918 def _parse_primary_key( 919 self, wrapped_optional: bool = False, in_props: bool = False 920 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 921 return super()._parse_primary_key( 922 wrapped_optional=wrapped_optional or in_props, in_props=in_props 923 ) 924 925 def _parse_on_property(self) -> t.Optional[exp.Expression]: 926 index = self._index 927 if self._match_text_seq("CLUSTER"): 928 this = self._parse_string() or self._parse_id_var() 929 if this: 930 return self.expression(exp.OnCluster, this=this) 931 else: 932 self._retreat(index) 933 return None 934 935 def _parse_index_constraint( 936 self, kind: t.Optional[str] = None 937 ) -> exp.IndexColumnConstraint: 938 # INDEX name1 expr TYPE type1(args) GRANULARITY value 939 this = self._parse_id_var() 940 expression = self._parse_assignment() 941 942 index_type = self._match_text_seq("TYPE") and ( 943 self._parse_function() or self._parse_var() 944 ) 945 946 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 947 948 return self.expression( 949 exp.IndexColumnConstraint, 950 this=this, 951 expression=expression, 952 index_type=index_type, 953 granularity=granularity, 954 ) 955 956 def _parse_partition(self) -> t.Optional[exp.Partition]: 957 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 958 if not self._match(TokenType.PARTITION): 959 return None 960 961 if self._match_text_seq("ID"): 962 # Corresponds to the PARTITION ID <string_value> syntax 963 expressions: t.List[exp.Expression] = [ 964 self.expression(exp.PartitionId, this=self._parse_string()) 965 ] 966 else: 967 expressions = self._parse_expressions() 968 969 return self.expression(exp.Partition, expressions=expressions) 970 971 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 972 partition = self._parse_partition() 973 974 if not partition or not self._match(TokenType.FROM): 975 return None 976 977 return self.expression( 978 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 979 ) 980 981 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 982 if not self._match_text_seq("PROJECTION"): 983 return None 984 985 return self.expression( 986 exp.ProjectionDef, 987 this=self._parse_id_var(), 988 expression=self._parse_wrapped(self._parse_statement), 989 ) 990 991 def _parse_constraint(self) -> t.Optional[exp.Expression]: 992 return super()._parse_constraint() or self._parse_projection_def() 993 994 def _parse_alias( 995 self, this: t.Optional[exp.Expression], explicit: bool = False 996 ) -> t.Optional[exp.Expression]: 997 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 998 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 999 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 1000 return this 1001 1002 return super()._parse_alias(this=this, explicit=explicit) 1003 1004 def _parse_expression(self) -> t.Optional[exp.Expression]: 1005 this = super()._parse_expression() 1006 1007 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 1008 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 1009 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 1010 self._match(TokenType.R_PAREN) 1011 1012 return this 1013 1014 def _parse_columns(self) -> exp.Expression: 1015 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 1016 1017 while self._next and self._match_text_seq(")", "APPLY", "("): 1018 self._match(TokenType.R_PAREN) 1019 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 1020 return this 1021 1022 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 1023 value = super()._parse_value(values=values) 1024 if not value: 1025 return None 1026 1027 # In Clickhouse "SELECT * FROM VALUES (1, 2, 3)" generates a table with a single column, in contrast 1028 # to other dialects. For this case, we canonicalize the values into a tuple-of-tuples AST if it's not already one. 1029 # In INSERT INTO statements the same clause actually references multiple columns (opposite semantics), 1030 # but the final result is not altered by the extra parentheses. 1031 # Note: Clickhouse allows VALUES([structure], value, ...) so the branch checks for the last expression 1032 expressions = value.expressions 1033 if values and not isinstance(expressions[-1], exp.Tuple): 1034 value.set( 1035 "expressions", 1036 [self.expression(exp.Tuple, expressions=[expr]) for expr in expressions], 1037 ) 1038 1039 return value 1040 1041 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1042 # ClickHouse allows custom expressions as partition key 1043 # https://clickhouse.com/docs/engines/table-engines/mergetree-family/custom-partitioning-key 1044 return self.expression( 1045 exp.PartitionedByProperty, 1046 this=self._parse_assignment(), 1047 ) 1048 1049 class Generator(generator.Generator): 1050 QUERY_HINTS = False 1051 STRUCT_DELIMITER = ("(", ")") 1052 NVL2_SUPPORTED = False 1053 TABLESAMPLE_REQUIRES_PARENS = False 1054 TABLESAMPLE_SIZE_IS_ROWS = False 1055 TABLESAMPLE_KEYWORDS = "SAMPLE" 1056 LAST_DAY_SUPPORTS_DATE_PART = False 1057 CAN_IMPLEMENT_ARRAY_ANY = True 1058 SUPPORTS_TO_NUMBER = False 1059 JOIN_HINTS = False 1060 TABLE_HINTS = False 1061 GROUPINGS_SEP = "" 1062 SET_OP_MODIFIERS = False 1063 ARRAY_SIZE_NAME = "LENGTH" 1064 WRAP_DERIVED_VALUES = False 1065 1066 STRING_TYPE_MAPPING = { 1067 exp.DataType.Type.BLOB: "String", 1068 exp.DataType.Type.CHAR: "String", 1069 exp.DataType.Type.LONGBLOB: "String", 1070 exp.DataType.Type.LONGTEXT: "String", 1071 exp.DataType.Type.MEDIUMBLOB: "String", 1072 exp.DataType.Type.MEDIUMTEXT: "String", 1073 exp.DataType.Type.TINYBLOB: "String", 1074 exp.DataType.Type.TINYTEXT: "String", 1075 exp.DataType.Type.TEXT: "String", 1076 exp.DataType.Type.VARBINARY: "String", 1077 exp.DataType.Type.VARCHAR: "String", 1078 } 1079 1080 SUPPORTED_JSON_PATH_PARTS = { 1081 exp.JSONPathKey, 1082 exp.JSONPathRoot, 1083 exp.JSONPathSubscript, 1084 } 1085 1086 TYPE_MAPPING = { 1087 **generator.Generator.TYPE_MAPPING, 1088 **STRING_TYPE_MAPPING, 1089 exp.DataType.Type.ARRAY: "Array", 1090 exp.DataType.Type.BOOLEAN: "Bool", 1091 exp.DataType.Type.BIGINT: "Int64", 1092 exp.DataType.Type.DATE32: "Date32", 1093 exp.DataType.Type.DATETIME: "DateTime", 1094 exp.DataType.Type.DATETIME2: "DateTime", 1095 exp.DataType.Type.SMALLDATETIME: "DateTime", 1096 exp.DataType.Type.DATETIME64: "DateTime64", 1097 exp.DataType.Type.DECIMAL: "Decimal", 1098 exp.DataType.Type.DECIMAL32: "Decimal32", 1099 exp.DataType.Type.DECIMAL64: "Decimal64", 1100 exp.DataType.Type.DECIMAL128: "Decimal128", 1101 exp.DataType.Type.DECIMAL256: "Decimal256", 1102 exp.DataType.Type.TIMESTAMP: "DateTime", 1103 exp.DataType.Type.TIMESTAMPNTZ: "DateTime", 1104 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 1105 exp.DataType.Type.DOUBLE: "Float64", 1106 exp.DataType.Type.ENUM: "Enum", 1107 exp.DataType.Type.ENUM8: "Enum8", 1108 exp.DataType.Type.ENUM16: "Enum16", 1109 exp.DataType.Type.FIXEDSTRING: "FixedString", 1110 exp.DataType.Type.FLOAT: "Float32", 1111 exp.DataType.Type.INT: "Int32", 1112 exp.DataType.Type.MEDIUMINT: "Int32", 1113 exp.DataType.Type.INT128: "Int128", 1114 exp.DataType.Type.INT256: "Int256", 1115 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 1116 exp.DataType.Type.MAP: "Map", 1117 exp.DataType.Type.NESTED: "Nested", 1118 exp.DataType.Type.NOTHING: "Nothing", 1119 exp.DataType.Type.SMALLINT: "Int16", 1120 exp.DataType.Type.STRUCT: "Tuple", 1121 exp.DataType.Type.TINYINT: "Int8", 1122 exp.DataType.Type.UBIGINT: "UInt64", 1123 exp.DataType.Type.UINT: "UInt32", 1124 exp.DataType.Type.UINT128: "UInt128", 1125 exp.DataType.Type.UINT256: "UInt256", 1126 exp.DataType.Type.USMALLINT: "UInt16", 1127 exp.DataType.Type.UTINYINT: "UInt8", 1128 exp.DataType.Type.IPV4: "IPv4", 1129 exp.DataType.Type.IPV6: "IPv6", 1130 exp.DataType.Type.POINT: "Point", 1131 exp.DataType.Type.RING: "Ring", 1132 exp.DataType.Type.LINESTRING: "LineString", 1133 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 1134 exp.DataType.Type.POLYGON: "Polygon", 1135 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 1136 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 1137 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 1138 exp.DataType.Type.DYNAMIC: "Dynamic", 1139 } 1140 1141 TRANSFORMS = { 1142 **generator.Generator.TRANSFORMS, 1143 exp.AnyValue: rename_func("any"), 1144 exp.ApproxDistinct: rename_func("uniq"), 1145 exp.ArrayConcat: rename_func("arrayConcat"), 1146 exp.ArrayContains: rename_func("has"), 1147 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 1148 exp.ArrayRemove: remove_from_array_using_filter, 1149 exp.ArrayReverse: rename_func("arrayReverse"), 1150 exp.ArraySlice: rename_func("arraySlice"), 1151 exp.ArraySum: rename_func("arraySum"), 1152 exp.ArgMax: arg_max_or_min_no_count("argMax"), 1153 exp.ArgMin: arg_max_or_min_no_count("argMin"), 1154 exp.Array: inline_array_sql, 1155 exp.CastToStrType: rename_func("CAST"), 1156 exp.CurrentDatabase: rename_func("CURRENT_DATABASE"), 1157 exp.CurrentSchemas: rename_func("CURRENT_SCHEMAS"), 1158 exp.CountIf: rename_func("countIf"), 1159 exp.CosineDistance: rename_func("cosineDistance"), 1160 exp.CompressColumnConstraint: lambda self, 1161 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 1162 exp.ComputedColumnConstraint: lambda self, 1163 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 1164 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 1165 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 1166 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 1167 exp.DateStrToDate: rename_func("toDate"), 1168 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 1169 exp.Explode: rename_func("arrayJoin"), 1170 exp.FarmFingerprint: rename_func("farmFingerprint64"), 1171 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 1172 exp.IsNan: rename_func("isNaN"), 1173 exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}", 1174 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 1175 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 1176 exp.JSONPathKey: json_path_key_only_name, 1177 exp.JSONPathRoot: lambda *_: "", 1178 exp.Length: length_or_char_length_sql, 1179 exp.Map: _map_sql, 1180 exp.Median: rename_func("median"), 1181 exp.Nullif: rename_func("nullIf"), 1182 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1183 exp.Pivot: no_pivot_sql, 1184 exp.Quantile: _quantile_sql, 1185 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 1186 exp.Rand: rename_func("randCanonical"), 1187 exp.StartsWith: rename_func("startsWith"), 1188 exp.Struct: rename_func("tuple"), 1189 exp.EndsWith: rename_func("endsWith"), 1190 exp.EuclideanDistance: rename_func("L2Distance"), 1191 exp.StrPosition: lambda self, e: strposition_sql( 1192 self, 1193 e, 1194 func_name="POSITION", 1195 supports_position=True, 1196 use_ansi_position=False, 1197 ), 1198 exp.TimeToStr: lambda self, e: self.func( 1199 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 1200 ), 1201 exp.TimeStrToTime: _timestrtotime_sql, 1202 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1203 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1204 exp.Typeof: rename_func("toTypeName"), 1205 exp.VarMap: _map_sql, 1206 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1207 exp.MD5Digest: rename_func("MD5"), 1208 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1209 exp.SHA: rename_func("SHA1"), 1210 exp.SHA1Digest: rename_func("SHA1"), 1211 exp.SHA2: sha256_sql, 1212 exp.SHA2Digest: sha2_digest_sql, 1213 exp.Split: lambda self, e: self.func( 1214 "splitByString", e.args.get("expression"), e.this, e.args.get("limit") 1215 ), 1216 exp.RegexpSplit: lambda self, e: self.func( 1217 "splitByRegexp", e.args.get("expression"), e.this, e.args.get("limit") 1218 ), 1219 exp.UnixToTime: _unix_to_time_sql, 1220 exp.TimestampTrunc: timestamptrunc_sql(func="dateTrunc", zone=True), 1221 exp.Trim: lambda self, e: trim_sql(self, e, default_trim_type="BOTH"), 1222 exp.Variance: rename_func("varSamp"), 1223 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1224 exp.Stddev: rename_func("stddevSamp"), 1225 exp.Chr: rename_func("CHAR"), 1226 exp.Lag: lambda self, e: self.func( 1227 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1228 ), 1229 exp.Lead: lambda self, e: self.func( 1230 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1231 ), 1232 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1233 rename_func("editDistance") 1234 ), 1235 exp.ParseDatetime: rename_func("parseDateTime"), 1236 } 1237 1238 PROPERTIES_LOCATION = { 1239 **generator.Generator.PROPERTIES_LOCATION, 1240 exp.OnCluster: exp.Properties.Location.POST_NAME, 1241 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1242 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1243 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1244 } 1245 1246 # There's no list in docs, but it can be found in Clickhouse code 1247 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1248 ON_CLUSTER_TARGETS = { 1249 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1250 "DATABASE", 1251 "TABLE", 1252 "VIEW", 1253 "DICTIONARY", 1254 "INDEX", 1255 "FUNCTION", 1256 "NAMED COLLECTION", 1257 } 1258 1259 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1260 NON_NULLABLE_TYPES = { 1261 exp.DataType.Type.ARRAY, 1262 exp.DataType.Type.MAP, 1263 exp.DataType.Type.STRUCT, 1264 exp.DataType.Type.POINT, 1265 exp.DataType.Type.RING, 1266 exp.DataType.Type.LINESTRING, 1267 exp.DataType.Type.MULTILINESTRING, 1268 exp.DataType.Type.POLYGON, 1269 exp.DataType.Type.MULTIPOLYGON, 1270 } 1271 1272 def offset_sql(self, expression: exp.Offset) -> str: 1273 offset = super().offset_sql(expression) 1274 1275 # OFFSET ... FETCH syntax requires a "ROW" or "ROWS" keyword 1276 # https://clickhouse.com/docs/sql-reference/statements/select/offset 1277 parent = expression.parent 1278 if isinstance(parent, exp.Select) and isinstance(parent.args.get("limit"), exp.Fetch): 1279 offset = f"{offset} ROWS" 1280 1281 return offset 1282 1283 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1284 strtodate_sql = self.function_fallback_sql(expression) 1285 1286 if not isinstance(expression.parent, exp.Cast): 1287 # StrToDate returns DATEs in other dialects (eg. postgres), so 1288 # this branch aims to improve the transpilation to clickhouse 1289 return self.cast_sql(exp.cast(expression, "DATE")) 1290 1291 return strtodate_sql 1292 1293 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1294 this = expression.this 1295 1296 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1297 return self.sql(this) 1298 1299 return super().cast_sql(expression, safe_prefix=safe_prefix) 1300 1301 def trycast_sql(self, expression: exp.TryCast) -> str: 1302 dtype = expression.to 1303 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1304 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1305 dtype.set("nullable", True) 1306 1307 return super().cast_sql(expression) 1308 1309 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1310 this = self.json_path_part(expression.this) 1311 return str(int(this) + 1) if is_int(this) else this 1312 1313 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1314 return f"AS {self.sql(expression, 'this')}" 1315 1316 def _any_to_has( 1317 self, 1318 expression: exp.EQ | exp.NEQ, 1319 default: t.Callable[[t.Any], str], 1320 prefix: str = "", 1321 ) -> str: 1322 if isinstance(expression.left, exp.Any): 1323 arr = expression.left 1324 this = expression.right 1325 elif isinstance(expression.right, exp.Any): 1326 arr = expression.right 1327 this = expression.left 1328 else: 1329 return default(expression) 1330 1331 return prefix + self.func("has", arr.this.unnest(), this) 1332 1333 def eq_sql(self, expression: exp.EQ) -> str: 1334 return self._any_to_has(expression, super().eq_sql) 1335 1336 def neq_sql(self, expression: exp.NEQ) -> str: 1337 return self._any_to_has(expression, super().neq_sql, "NOT ") 1338 1339 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1340 # Manually add a flag to make the search case-insensitive 1341 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1342 return self.func("match", expression.this, regex) 1343 1344 def datatype_sql(self, expression: exp.DataType) -> str: 1345 # String is the standard ClickHouse type, every other variant is just an alias. 1346 # Additionally, any supplied length parameter will be ignored. 1347 # 1348 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1349 if expression.this in self.STRING_TYPE_MAPPING: 1350 dtype = "String" 1351 else: 1352 dtype = super().datatype_sql(expression) 1353 1354 # This section changes the type to `Nullable(...)` if the following conditions hold: 1355 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1356 # and change their semantics 1357 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1358 # constraint: "Type of Map key must be a type, that can be represented by integer or 1359 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1360 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1361 parent = expression.parent 1362 nullable = expression.args.get("nullable") 1363 if nullable is True or ( 1364 nullable is None 1365 and not ( 1366 isinstance(parent, exp.DataType) 1367 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1368 and expression.index in (None, 0) 1369 ) 1370 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1371 ): 1372 dtype = f"Nullable({dtype})" 1373 1374 return dtype 1375 1376 def cte_sql(self, expression: exp.CTE) -> str: 1377 if expression.args.get("scalar"): 1378 this = self.sql(expression, "this") 1379 alias = self.sql(expression, "alias") 1380 return f"{this} AS {alias}" 1381 1382 return super().cte_sql(expression) 1383 1384 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1385 return super().after_limit_modifiers(expression) + [ 1386 ( 1387 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1388 if expression.args.get("settings") 1389 else "" 1390 ), 1391 ( 1392 self.seg("FORMAT ") + self.sql(expression, "format") 1393 if expression.args.get("format") 1394 else "" 1395 ), 1396 ] 1397 1398 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1399 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1400 1401 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1402 return f"ON CLUSTER {self.sql(expression, 'this')}" 1403 1404 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1405 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1406 exp.Properties.Location.POST_NAME 1407 ): 1408 this_name = self.sql( 1409 expression.this if isinstance(expression.this, exp.Schema) else expression, 1410 "this", 1411 ) 1412 this_properties = " ".join( 1413 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1414 ) 1415 this_schema = self.schema_columns_sql(expression.this) 1416 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1417 1418 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1419 1420 return super().createable_sql(expression, locations) 1421 1422 def create_sql(self, expression: exp.Create) -> str: 1423 # The comment property comes last in CTAS statements, i.e. after the query 1424 query = expression.expression 1425 if isinstance(query, exp.Query): 1426 comment_prop = expression.find(exp.SchemaCommentProperty) 1427 if comment_prop: 1428 comment_prop.pop() 1429 query.replace(exp.paren(query)) 1430 else: 1431 comment_prop = None 1432 1433 create_sql = super().create_sql(expression) 1434 1435 comment_sql = self.sql(comment_prop) 1436 comment_sql = f" {comment_sql}" if comment_sql else "" 1437 1438 return f"{create_sql}{comment_sql}" 1439 1440 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1441 this = self.indent(self.sql(expression, "this")) 1442 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1443 1444 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1445 this = self.sql(expression, "this") 1446 this = f" {this}" if this else "" 1447 expr = self.sql(expression, "expression") 1448 expr = f" {expr}" if expr else "" 1449 index_type = self.sql(expression, "index_type") 1450 index_type = f" TYPE {index_type}" if index_type else "" 1451 granularity = self.sql(expression, "granularity") 1452 granularity = f" GRANULARITY {granularity}" if granularity else "" 1453 1454 return f"INDEX{this}{expr}{index_type}{granularity}" 1455 1456 def partition_sql(self, expression: exp.Partition) -> str: 1457 return f"PARTITION {self.expressions(expression, flat=True)}" 1458 1459 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1460 return f"ID {self.sql(expression.this)}" 1461 1462 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1463 return ( 1464 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1465 ) 1466 1467 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1468 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1469 1470 def is_sql(self, expression: exp.Is) -> str: 1471 is_sql = super().is_sql(expression) 1472 1473 if isinstance(expression.parent, exp.Not): 1474 # value IS NOT NULL -> NOT (value IS NULL) 1475 is_sql = self.wrap(is_sql) 1476 1477 return is_sql 1478 1479 def in_sql(self, expression: exp.In) -> str: 1480 in_sql = super().in_sql(expression) 1481 1482 if isinstance(expression.parent, exp.Not) and expression.args.get("is_global"): 1483 in_sql = in_sql.replace("GLOBAL IN", "GLOBAL NOT IN", 1) 1484 1485 return in_sql 1486 1487 def not_sql(self, expression: exp.Not) -> str: 1488 if isinstance(expression.this, exp.In): 1489 if expression.this.args.get("is_global"): 1490 # let `GLOBAL IN` child interpose `NOT` 1491 return self.sql(expression, "this") 1492 1493 expression.set("this", exp.paren(expression.this, copy=False)) 1494 1495 return super().not_sql(expression) 1496 1497 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1498 # If the VALUES clause contains tuples of expressions, we need to treat it 1499 # as a table since Clickhouse will automatically alias it as such. 1500 alias = expression.args.get("alias") 1501 1502 if alias and alias.args.get("columns") and expression.expressions: 1503 values = expression.expressions[0].expressions 1504 values_as_table = any(isinstance(value, exp.Tuple) for value in values) 1505 else: 1506 values_as_table = True 1507 1508 return super().values_sql(expression, values_as_table=values_as_table)
231class ClickHouse(Dialect): 232 INDEX_OFFSET = 1 233 NORMALIZE_FUNCTIONS: bool | str = False 234 NULL_ORDERING = "nulls_are_last" 235 SUPPORTS_USER_DEFINED_TYPES = False 236 SAFE_DIVISION = True 237 LOG_BASE_FIRST: t.Optional[bool] = None 238 FORCE_EARLY_ALIAS_REF_EXPANSION = True 239 PRESERVE_ORIGINAL_NAMES = True 240 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 241 IDENTIFIERS_CAN_START_WITH_DIGIT = True 242 HEX_STRING_IS_INTEGER_TYPE = True 243 244 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 245 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 246 247 UNESCAPED_SEQUENCES = { 248 "\\0": "\0", 249 } 250 251 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 252 253 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 254 exp.Except: False, 255 exp.Intersect: False, 256 exp.Union: None, 257 } 258 259 def generate_values_aliases(self, expression: exp.Values) -> t.List[exp.Identifier]: 260 # Clickhouse allows VALUES to have an embedded structure e.g: 261 # VALUES('person String, place String', ('Noah', 'Paris'), ...) 262 # In this case, we don't want to qualify the columns 263 values = expression.expressions[0].expressions 264 265 structure = ( 266 values[0] 267 if (len(values) > 1 and values[0].is_string and isinstance(values[1], exp.Tuple)) 268 else None 269 ) 270 if structure: 271 # Split each column definition into the column name e.g: 272 # 'person String, place String' -> ['person', 'place'] 273 structure_coldefs = [coldef.strip() for coldef in structure.name.split(",")] 274 column_aliases = [ 275 exp.to_identifier(coldef.split(" ")[0]) for coldef in structure_coldefs 276 ] 277 else: 278 # Default column aliases in CH are "c1", "c2", etc. 279 column_aliases = [ 280 exp.to_identifier(f"c{i + 1}") for i in range(len(values[0].expressions)) 281 ] 282 283 return column_aliases 284 285 class Tokenizer(tokens.Tokenizer): 286 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 287 IDENTIFIERS = ['"', "`"] 288 IDENTIFIER_ESCAPES = ["\\"] 289 STRING_ESCAPES = ["'", "\\"] 290 BIT_STRINGS = [("0b", "")] 291 HEX_STRINGS = [("0x", ""), ("0X", "")] 292 HEREDOC_STRINGS = ["$"] 293 294 KEYWORDS = { 295 **tokens.Tokenizer.KEYWORDS, 296 ".:": TokenType.DOTCOLON, 297 "ATTACH": TokenType.COMMAND, 298 "DATE32": TokenType.DATE32, 299 "DATETIME64": TokenType.DATETIME64, 300 "DICTIONARY": TokenType.DICTIONARY, 301 "DYNAMIC": TokenType.DYNAMIC, 302 "ENUM8": TokenType.ENUM8, 303 "ENUM16": TokenType.ENUM16, 304 "EXCHANGE": TokenType.COMMAND, 305 "FINAL": TokenType.FINAL, 306 "FIXEDSTRING": TokenType.FIXEDSTRING, 307 "FLOAT32": TokenType.FLOAT, 308 "FLOAT64": TokenType.DOUBLE, 309 "GLOBAL": TokenType.GLOBAL, 310 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 311 "MAP": TokenType.MAP, 312 "NESTED": TokenType.NESTED, 313 "NOTHING": TokenType.NOTHING, 314 "SAMPLE": TokenType.TABLE_SAMPLE, 315 "TUPLE": TokenType.STRUCT, 316 "UINT16": TokenType.USMALLINT, 317 "UINT32": TokenType.UINT, 318 "UINT64": TokenType.UBIGINT, 319 "UINT8": TokenType.UTINYINT, 320 "IPV4": TokenType.IPV4, 321 "IPV6": TokenType.IPV6, 322 "POINT": TokenType.POINT, 323 "RING": TokenType.RING, 324 "LINESTRING": TokenType.LINESTRING, 325 "MULTILINESTRING": TokenType.MULTILINESTRING, 326 "POLYGON": TokenType.POLYGON, 327 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 328 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 329 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 330 "SYSTEM": TokenType.COMMAND, 331 "PREWHERE": TokenType.PREWHERE, 332 } 333 KEYWORDS.pop("/*+") 334 335 SINGLE_TOKENS = { 336 **tokens.Tokenizer.SINGLE_TOKENS, 337 "$": TokenType.HEREDOC_STRING, 338 } 339 340 class Parser(parser.Parser): 341 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 342 # * select x from t1 union all select x from t2 limit 1; 343 # * select x from t1 union all (select x from t2 limit 1); 344 MODIFIERS_ATTACHED_TO_SET_OP = False 345 INTERVAL_SPANS = False 346 OPTIONAL_ALIAS_TOKEN_CTE = False 347 JOINS_HAVE_EQUAL_PRECEDENCE = True 348 349 FUNCTIONS = { 350 **parser.Parser.FUNCTIONS, 351 **{ 352 f"TOSTARTOF{unit}": _build_timestamp_trunc(unit=unit) 353 for unit in TIMESTAMP_TRUNC_UNITS 354 }, 355 "ANY": exp.AnyValue.from_arg_list, 356 "ARRAYSUM": exp.ArraySum.from_arg_list, 357 "ARRAYREVERSE": exp.ArrayReverse.from_arg_list, 358 "ARRAYSLICE": exp.ArraySlice.from_arg_list, 359 "CURRENTDATABASE": exp.CurrentDatabase.from_arg_list, 360 "CURRENTSCHEMAS": exp.CurrentSchemas.from_arg_list, 361 "COUNTIF": _build_count_if, 362 "COSINEDISTANCE": exp.CosineDistance.from_arg_list, 363 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 364 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 365 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 366 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 367 "DATE_FORMAT": _build_datetime_format(exp.TimeToStr), 368 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 369 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 370 "FORMATDATETIME": _build_datetime_format(exp.TimeToStr), 371 "HAS": exp.ArrayContains.from_arg_list, 372 "ILIKE": build_like(exp.ILike), 373 "JSONEXTRACTSTRING": build_json_extract_path( 374 exp.JSONExtractScalar, zero_based_indexing=False 375 ), 376 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 377 "LIKE": build_like(exp.Like), 378 "L2Distance": exp.EuclideanDistance.from_arg_list, 379 "MAP": parser.build_var_map, 380 "MATCH": exp.RegexpLike.from_arg_list, 381 "NOTLIKE": build_like(exp.Like, not_like=True), 382 "PARSEDATETIME": _build_datetime_format(exp.ParseDatetime), 383 "RANDCANONICAL": exp.Rand.from_arg_list, 384 "STR_TO_DATE": _build_str_to_date, 385 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 386 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 387 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 388 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 389 "TOMONDAY": _build_timestamp_trunc("WEEK"), 390 "UNIQ": exp.ApproxDistinct.from_arg_list, 391 "XOR": lambda args: exp.Xor(expressions=args), 392 "MD5": exp.MD5Digest.from_arg_list, 393 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 394 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 395 "SPLITBYCHAR": _build_split_by_char, 396 "SPLITBYREGEXP": _build_split(exp.RegexpSplit), 397 "SPLITBYSTRING": _build_split(exp.Split), 398 "SUBSTRINGINDEX": exp.SubstringIndex.from_arg_list, 399 "TOTYPENAME": exp.Typeof.from_arg_list, 400 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 401 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 402 } 403 FUNCTIONS.pop("TRANSFORM") 404 FUNCTIONS.pop("APPROX_TOP_SUM") 405 406 AGG_FUNCTIONS = { 407 "count", 408 "min", 409 "max", 410 "sum", 411 "avg", 412 "any", 413 "stddevPop", 414 "stddevSamp", 415 "varPop", 416 "varSamp", 417 "corr", 418 "covarPop", 419 "covarSamp", 420 "entropy", 421 "exponentialMovingAverage", 422 "intervalLengthSum", 423 "kolmogorovSmirnovTest", 424 "mannWhitneyUTest", 425 "median", 426 "rankCorr", 427 "sumKahan", 428 "studentTTest", 429 "welchTTest", 430 "anyHeavy", 431 "anyLast", 432 "boundingRatio", 433 "first_value", 434 "last_value", 435 "argMin", 436 "argMax", 437 "avgWeighted", 438 "topK", 439 "approx_top_sum", 440 "topKWeighted", 441 "deltaSum", 442 "deltaSumTimestamp", 443 "groupArray", 444 "groupArrayLast", 445 "groupUniqArray", 446 "groupArrayInsertAt", 447 "groupArrayMovingAvg", 448 "groupArrayMovingSum", 449 "groupArraySample", 450 "groupBitAnd", 451 "groupBitOr", 452 "groupBitXor", 453 "groupBitmap", 454 "groupBitmapAnd", 455 "groupBitmapOr", 456 "groupBitmapXor", 457 "sumWithOverflow", 458 "sumMap", 459 "minMap", 460 "maxMap", 461 "skewSamp", 462 "skewPop", 463 "kurtSamp", 464 "kurtPop", 465 "uniq", 466 "uniqExact", 467 "uniqCombined", 468 "uniqCombined64", 469 "uniqHLL12", 470 "uniqTheta", 471 "quantile", 472 "quantiles", 473 "quantileExact", 474 "quantilesExact", 475 "quantilesExactExclusive", 476 "quantileExactLow", 477 "quantilesExactLow", 478 "quantileExactHigh", 479 "quantilesExactHigh", 480 "quantileExactWeighted", 481 "quantilesExactWeighted", 482 "quantileTiming", 483 "quantilesTiming", 484 "quantileTimingWeighted", 485 "quantilesTimingWeighted", 486 "quantileDeterministic", 487 "quantilesDeterministic", 488 "quantileTDigest", 489 "quantilesTDigest", 490 "quantileTDigestWeighted", 491 "quantilesTDigestWeighted", 492 "quantileBFloat16", 493 "quantilesBFloat16", 494 "quantileBFloat16Weighted", 495 "quantilesBFloat16Weighted", 496 "simpleLinearRegression", 497 "stochasticLinearRegression", 498 "stochasticLogisticRegression", 499 "categoricalInformationValue", 500 "contingency", 501 "cramersV", 502 "cramersVBiasCorrected", 503 "theilsU", 504 "maxIntersections", 505 "maxIntersectionsPosition", 506 "meanZTest", 507 "quantileInterpolatedWeighted", 508 "quantilesInterpolatedWeighted", 509 "quantileGK", 510 "quantilesGK", 511 "sparkBar", 512 "sumCount", 513 "largestTriangleThreeBuckets", 514 "histogram", 515 "sequenceMatch", 516 "sequenceCount", 517 "windowFunnel", 518 "retention", 519 "uniqUpTo", 520 "sequenceNextNode", 521 "exponentialTimeDecayedAvg", 522 } 523 524 AGG_FUNCTIONS_SUFFIXES = [ 525 "If", 526 "Array", 527 "ArrayIf", 528 "Map", 529 "SimpleState", 530 "State", 531 "Merge", 532 "MergeState", 533 "ForEach", 534 "Distinct", 535 "OrDefault", 536 "OrNull", 537 "Resample", 538 "ArgMin", 539 "ArgMax", 540 ] 541 542 FUNC_TOKENS = { 543 *parser.Parser.FUNC_TOKENS, 544 TokenType.AND, 545 TokenType.OR, 546 TokenType.SET, 547 } 548 549 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 550 551 ID_VAR_TOKENS = { 552 *parser.Parser.ID_VAR_TOKENS, 553 TokenType.LIKE, 554 } 555 556 AGG_FUNC_MAPPING = ( 557 lambda functions, suffixes: { 558 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 559 } 560 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 561 562 FUNCTION_PARSERS = { 563 **parser.Parser.FUNCTION_PARSERS, 564 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 565 "QUANTILE": lambda self: self._parse_quantile(), 566 "MEDIAN": lambda self: self._parse_quantile(), 567 "COLUMNS": lambda self: self._parse_columns(), 568 "TUPLE": lambda self: exp.Struct.from_arg_list(self._parse_function_args(alias=True)), 569 "AND": lambda self: exp.and_(*self._parse_function_args(alias=False)), 570 "OR": lambda self: exp.or_(*self._parse_function_args(alias=False)), 571 } 572 573 FUNCTION_PARSERS.pop("MATCH") 574 575 PROPERTY_PARSERS = { 576 **parser.Parser.PROPERTY_PARSERS, 577 "ENGINE": lambda self: self._parse_engine_property(), 578 } 579 PROPERTY_PARSERS.pop("DYNAMIC") 580 581 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 582 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 583 584 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 585 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 586 587 RANGE_PARSERS = { 588 **parser.Parser.RANGE_PARSERS, 589 TokenType.GLOBAL: lambda self, this: self._parse_global_in(this), 590 } 591 592 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 593 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 594 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 595 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 596 597 JOIN_KINDS = { 598 *parser.Parser.JOIN_KINDS, 599 TokenType.ANY, 600 TokenType.ASOF, 601 TokenType.ARRAY, 602 } 603 604 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 605 TokenType.ANY, 606 TokenType.ARRAY, 607 TokenType.FINAL, 608 TokenType.FORMAT, 609 TokenType.SETTINGS, 610 } 611 612 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 613 TokenType.FORMAT, 614 } 615 616 LOG_DEFAULTS_TO_LN = True 617 618 QUERY_MODIFIER_PARSERS = { 619 **parser.Parser.QUERY_MODIFIER_PARSERS, 620 TokenType.SETTINGS: lambda self: ( 621 "settings", 622 self._advance() or self._parse_csv(self._parse_assignment), 623 ), 624 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 625 } 626 627 CONSTRAINT_PARSERS = { 628 **parser.Parser.CONSTRAINT_PARSERS, 629 "INDEX": lambda self: self._parse_index_constraint(), 630 "CODEC": lambda self: self._parse_compress(), 631 } 632 633 ALTER_PARSERS = { 634 **parser.Parser.ALTER_PARSERS, 635 "REPLACE": lambda self: self._parse_alter_table_replace(), 636 } 637 638 SCHEMA_UNNAMED_CONSTRAINTS = { 639 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 640 "INDEX", 641 } 642 643 PLACEHOLDER_PARSERS = { 644 **parser.Parser.PLACEHOLDER_PARSERS, 645 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 646 } 647 648 def _parse_engine_property(self) -> exp.EngineProperty: 649 self._match(TokenType.EQ) 650 return self.expression( 651 exp.EngineProperty, 652 this=self._parse_field(any_token=True, anonymous_func=True), 653 ) 654 655 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 656 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 657 return self._parse_lambda() 658 659 def _parse_types( 660 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 661 ) -> t.Optional[exp.Expression]: 662 dtype = super()._parse_types( 663 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 664 ) 665 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 666 # Mark every type as non-nullable which is ClickHouse's default, unless it's 667 # already marked as nullable. This marker helps us transpile types from other 668 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 669 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 670 # fail in ClickHouse without the `Nullable` type constructor. 671 dtype.set("nullable", False) 672 673 return dtype 674 675 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 676 index = self._index 677 this = self._parse_bitwise() 678 if self._match(TokenType.FROM): 679 self._retreat(index) 680 return super()._parse_extract() 681 682 # We return Anonymous here because extract and regexpExtract have different semantics, 683 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 684 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 685 # 686 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 687 self._match(TokenType.COMMA) 688 return self.expression( 689 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 690 ) 691 692 def _parse_assignment(self) -> t.Optional[exp.Expression]: 693 this = super()._parse_assignment() 694 695 if self._match(TokenType.PLACEHOLDER): 696 return self.expression( 697 exp.If, 698 this=this, 699 true=self._parse_assignment(), 700 false=self._match(TokenType.COLON) and self._parse_assignment(), 701 ) 702 703 return this 704 705 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 706 """ 707 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 708 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 709 """ 710 index = self._index 711 712 this = self._parse_id_var() 713 self._match(TokenType.COLON) 714 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 715 self._match_text_seq("IDENTIFIER") and "Identifier" 716 ) 717 718 if not kind: 719 self._retreat(index) 720 return None 721 elif not self._match(TokenType.R_BRACE): 722 self.raise_error("Expecting }") 723 724 if isinstance(this, exp.Identifier) and not this.quoted: 725 this = exp.var(this.name) 726 727 return self.expression(exp.Placeholder, this=this, kind=kind) 728 729 def _parse_bracket( 730 self, this: t.Optional[exp.Expression] = None 731 ) -> t.Optional[exp.Expression]: 732 l_brace = self._match(TokenType.L_BRACE, advance=False) 733 bracket = super()._parse_bracket(this) 734 735 if l_brace and isinstance(bracket, exp.Struct): 736 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 737 for expression in bracket.expressions: 738 if not isinstance(expression, exp.PropertyEQ): 739 break 740 741 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 742 varmap.args["values"].append("expressions", expression.expression) 743 744 return varmap 745 746 return bracket 747 748 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 749 this = super()._parse_in(this) 750 this.set("is_global", is_global) 751 return this 752 753 def _parse_global_in(self, this: t.Optional[exp.Expression]) -> exp.Not | exp.In: 754 is_negated = self._match(TokenType.NOT) 755 this = self._match(TokenType.IN) and self._parse_in(this, is_global=True) 756 return self.expression(exp.Not, this=this) if is_negated else this 757 758 def _parse_table( 759 self, 760 schema: bool = False, 761 joins: bool = False, 762 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 763 parse_bracket: bool = False, 764 is_db_reference: bool = False, 765 parse_partition: bool = False, 766 consume_pipe: bool = False, 767 ) -> t.Optional[exp.Expression]: 768 this = super()._parse_table( 769 schema=schema, 770 joins=joins, 771 alias_tokens=alias_tokens, 772 parse_bracket=parse_bracket, 773 is_db_reference=is_db_reference, 774 ) 775 776 if isinstance(this, exp.Table): 777 inner = this.this 778 alias = this.args.get("alias") 779 780 if isinstance(inner, exp.GenerateSeries) and alias and not alias.columns: 781 alias.set("columns", [exp.to_identifier("generate_series")]) 782 783 if self._match(TokenType.FINAL): 784 this = self.expression(exp.Final, this=this) 785 786 return this 787 788 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 789 return super()._parse_position(haystack_first=True) 790 791 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 792 def _parse_cte(self) -> t.Optional[exp.CTE]: 793 # WITH <identifier> AS <subquery expression> 794 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 795 796 if not cte: 797 # WITH <expression> AS <identifier> 798 cte = self.expression( 799 exp.CTE, 800 this=self._parse_assignment(), 801 alias=self._parse_table_alias(), 802 scalar=True, 803 ) 804 805 return cte 806 807 def _parse_join_parts( 808 self, 809 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 810 is_global = self._match(TokenType.GLOBAL) and self._prev 811 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 812 813 if kind_pre: 814 kind = self._match_set(self.JOIN_KINDS) and self._prev 815 side = self._match_set(self.JOIN_SIDES) and self._prev 816 return is_global, side, kind 817 818 return ( 819 is_global, 820 self._match_set(self.JOIN_SIDES) and self._prev, 821 self._match_set(self.JOIN_KINDS) and self._prev, 822 ) 823 824 def _parse_join( 825 self, skip_join_token: bool = False, parse_bracket: bool = False 826 ) -> t.Optional[exp.Join]: 827 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 828 if join: 829 method = join.args.get("method") 830 join.set("method", None) 831 join.set("global_", method) 832 833 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 834 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 835 if join.kind == "ARRAY": 836 for table in join.find_all(exp.Table): 837 table.replace(table.to_column()) 838 839 return join 840 841 def _parse_function( 842 self, 843 functions: t.Optional[t.Dict[str, t.Callable]] = None, 844 anonymous: bool = False, 845 optional_parens: bool = True, 846 any_token: bool = False, 847 ) -> t.Optional[exp.Expression]: 848 expr = super()._parse_function( 849 functions=functions, 850 anonymous=anonymous, 851 optional_parens=optional_parens, 852 any_token=any_token, 853 ) 854 855 func = expr.this if isinstance(expr, exp.Window) else expr 856 857 # Aggregate functions can be split in 2 parts: <func_name><suffix> 858 parts = ( 859 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 860 ) 861 862 if parts: 863 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 864 params = self._parse_func_params(anon_func) 865 866 kwargs = { 867 "this": anon_func.this, 868 "expressions": anon_func.expressions, 869 } 870 if parts[1]: 871 exp_class: t.Type[exp.Expression] = ( 872 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 873 ) 874 else: 875 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 876 877 kwargs["exp_class"] = exp_class 878 if params: 879 kwargs["params"] = params 880 881 func = self.expression(**kwargs) 882 883 if isinstance(expr, exp.Window): 884 # The window's func was parsed as Anonymous in base parser, fix its 885 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 886 expr.set("this", func) 887 elif params: 888 # Params have blocked super()._parse_function() from parsing the following window 889 # (if that exists) as they're standing between the function call and the window spec 890 expr = self._parse_window(func) 891 else: 892 expr = func 893 894 return expr 895 896 def _parse_func_params( 897 self, this: t.Optional[exp.Func] = None 898 ) -> t.Optional[t.List[exp.Expression]]: 899 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 900 return self._parse_csv(self._parse_lambda) 901 902 if self._match(TokenType.L_PAREN): 903 params = self._parse_csv(self._parse_lambda) 904 self._match_r_paren(this) 905 return params 906 907 return None 908 909 def _parse_quantile(self) -> exp.Quantile: 910 this = self._parse_lambda() 911 params = self._parse_func_params() 912 if params: 913 return self.expression(exp.Quantile, this=params[0], quantile=this) 914 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 915 916 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 917 return super()._parse_wrapped_id_vars(optional=True) 918 919 def _parse_primary_key( 920 self, wrapped_optional: bool = False, in_props: bool = False 921 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 922 return super()._parse_primary_key( 923 wrapped_optional=wrapped_optional or in_props, in_props=in_props 924 ) 925 926 def _parse_on_property(self) -> t.Optional[exp.Expression]: 927 index = self._index 928 if self._match_text_seq("CLUSTER"): 929 this = self._parse_string() or self._parse_id_var() 930 if this: 931 return self.expression(exp.OnCluster, this=this) 932 else: 933 self._retreat(index) 934 return None 935 936 def _parse_index_constraint( 937 self, kind: t.Optional[str] = None 938 ) -> exp.IndexColumnConstraint: 939 # INDEX name1 expr TYPE type1(args) GRANULARITY value 940 this = self._parse_id_var() 941 expression = self._parse_assignment() 942 943 index_type = self._match_text_seq("TYPE") and ( 944 self._parse_function() or self._parse_var() 945 ) 946 947 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 948 949 return self.expression( 950 exp.IndexColumnConstraint, 951 this=this, 952 expression=expression, 953 index_type=index_type, 954 granularity=granularity, 955 ) 956 957 def _parse_partition(self) -> t.Optional[exp.Partition]: 958 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 959 if not self._match(TokenType.PARTITION): 960 return None 961 962 if self._match_text_seq("ID"): 963 # Corresponds to the PARTITION ID <string_value> syntax 964 expressions: t.List[exp.Expression] = [ 965 self.expression(exp.PartitionId, this=self._parse_string()) 966 ] 967 else: 968 expressions = self._parse_expressions() 969 970 return self.expression(exp.Partition, expressions=expressions) 971 972 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 973 partition = self._parse_partition() 974 975 if not partition or not self._match(TokenType.FROM): 976 return None 977 978 return self.expression( 979 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 980 ) 981 982 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 983 if not self._match_text_seq("PROJECTION"): 984 return None 985 986 return self.expression( 987 exp.ProjectionDef, 988 this=self._parse_id_var(), 989 expression=self._parse_wrapped(self._parse_statement), 990 ) 991 992 def _parse_constraint(self) -> t.Optional[exp.Expression]: 993 return super()._parse_constraint() or self._parse_projection_def() 994 995 def _parse_alias( 996 self, this: t.Optional[exp.Expression], explicit: bool = False 997 ) -> t.Optional[exp.Expression]: 998 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 999 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 1000 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 1001 return this 1002 1003 return super()._parse_alias(this=this, explicit=explicit) 1004 1005 def _parse_expression(self) -> t.Optional[exp.Expression]: 1006 this = super()._parse_expression() 1007 1008 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 1009 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 1010 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 1011 self._match(TokenType.R_PAREN) 1012 1013 return this 1014 1015 def _parse_columns(self) -> exp.Expression: 1016 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 1017 1018 while self._next and self._match_text_seq(")", "APPLY", "("): 1019 self._match(TokenType.R_PAREN) 1020 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 1021 return this 1022 1023 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 1024 value = super()._parse_value(values=values) 1025 if not value: 1026 return None 1027 1028 # In Clickhouse "SELECT * FROM VALUES (1, 2, 3)" generates a table with a single column, in contrast 1029 # to other dialects. For this case, we canonicalize the values into a tuple-of-tuples AST if it's not already one. 1030 # In INSERT INTO statements the same clause actually references multiple columns (opposite semantics), 1031 # but the final result is not altered by the extra parentheses. 1032 # Note: Clickhouse allows VALUES([structure], value, ...) so the branch checks for the last expression 1033 expressions = value.expressions 1034 if values and not isinstance(expressions[-1], exp.Tuple): 1035 value.set( 1036 "expressions", 1037 [self.expression(exp.Tuple, expressions=[expr]) for expr in expressions], 1038 ) 1039 1040 return value 1041 1042 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1043 # ClickHouse allows custom expressions as partition key 1044 # https://clickhouse.com/docs/engines/table-engines/mergetree-family/custom-partitioning-key 1045 return self.expression( 1046 exp.PartitionedByProperty, 1047 this=self._parse_assignment(), 1048 ) 1049 1050 class Generator(generator.Generator): 1051 QUERY_HINTS = False 1052 STRUCT_DELIMITER = ("(", ")") 1053 NVL2_SUPPORTED = False 1054 TABLESAMPLE_REQUIRES_PARENS = False 1055 TABLESAMPLE_SIZE_IS_ROWS = False 1056 TABLESAMPLE_KEYWORDS = "SAMPLE" 1057 LAST_DAY_SUPPORTS_DATE_PART = False 1058 CAN_IMPLEMENT_ARRAY_ANY = True 1059 SUPPORTS_TO_NUMBER = False 1060 JOIN_HINTS = False 1061 TABLE_HINTS = False 1062 GROUPINGS_SEP = "" 1063 SET_OP_MODIFIERS = False 1064 ARRAY_SIZE_NAME = "LENGTH" 1065 WRAP_DERIVED_VALUES = False 1066 1067 STRING_TYPE_MAPPING = { 1068 exp.DataType.Type.BLOB: "String", 1069 exp.DataType.Type.CHAR: "String", 1070 exp.DataType.Type.LONGBLOB: "String", 1071 exp.DataType.Type.LONGTEXT: "String", 1072 exp.DataType.Type.MEDIUMBLOB: "String", 1073 exp.DataType.Type.MEDIUMTEXT: "String", 1074 exp.DataType.Type.TINYBLOB: "String", 1075 exp.DataType.Type.TINYTEXT: "String", 1076 exp.DataType.Type.TEXT: "String", 1077 exp.DataType.Type.VARBINARY: "String", 1078 exp.DataType.Type.VARCHAR: "String", 1079 } 1080 1081 SUPPORTED_JSON_PATH_PARTS = { 1082 exp.JSONPathKey, 1083 exp.JSONPathRoot, 1084 exp.JSONPathSubscript, 1085 } 1086 1087 TYPE_MAPPING = { 1088 **generator.Generator.TYPE_MAPPING, 1089 **STRING_TYPE_MAPPING, 1090 exp.DataType.Type.ARRAY: "Array", 1091 exp.DataType.Type.BOOLEAN: "Bool", 1092 exp.DataType.Type.BIGINT: "Int64", 1093 exp.DataType.Type.DATE32: "Date32", 1094 exp.DataType.Type.DATETIME: "DateTime", 1095 exp.DataType.Type.DATETIME2: "DateTime", 1096 exp.DataType.Type.SMALLDATETIME: "DateTime", 1097 exp.DataType.Type.DATETIME64: "DateTime64", 1098 exp.DataType.Type.DECIMAL: "Decimal", 1099 exp.DataType.Type.DECIMAL32: "Decimal32", 1100 exp.DataType.Type.DECIMAL64: "Decimal64", 1101 exp.DataType.Type.DECIMAL128: "Decimal128", 1102 exp.DataType.Type.DECIMAL256: "Decimal256", 1103 exp.DataType.Type.TIMESTAMP: "DateTime", 1104 exp.DataType.Type.TIMESTAMPNTZ: "DateTime", 1105 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 1106 exp.DataType.Type.DOUBLE: "Float64", 1107 exp.DataType.Type.ENUM: "Enum", 1108 exp.DataType.Type.ENUM8: "Enum8", 1109 exp.DataType.Type.ENUM16: "Enum16", 1110 exp.DataType.Type.FIXEDSTRING: "FixedString", 1111 exp.DataType.Type.FLOAT: "Float32", 1112 exp.DataType.Type.INT: "Int32", 1113 exp.DataType.Type.MEDIUMINT: "Int32", 1114 exp.DataType.Type.INT128: "Int128", 1115 exp.DataType.Type.INT256: "Int256", 1116 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 1117 exp.DataType.Type.MAP: "Map", 1118 exp.DataType.Type.NESTED: "Nested", 1119 exp.DataType.Type.NOTHING: "Nothing", 1120 exp.DataType.Type.SMALLINT: "Int16", 1121 exp.DataType.Type.STRUCT: "Tuple", 1122 exp.DataType.Type.TINYINT: "Int8", 1123 exp.DataType.Type.UBIGINT: "UInt64", 1124 exp.DataType.Type.UINT: "UInt32", 1125 exp.DataType.Type.UINT128: "UInt128", 1126 exp.DataType.Type.UINT256: "UInt256", 1127 exp.DataType.Type.USMALLINT: "UInt16", 1128 exp.DataType.Type.UTINYINT: "UInt8", 1129 exp.DataType.Type.IPV4: "IPv4", 1130 exp.DataType.Type.IPV6: "IPv6", 1131 exp.DataType.Type.POINT: "Point", 1132 exp.DataType.Type.RING: "Ring", 1133 exp.DataType.Type.LINESTRING: "LineString", 1134 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 1135 exp.DataType.Type.POLYGON: "Polygon", 1136 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 1137 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 1138 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 1139 exp.DataType.Type.DYNAMIC: "Dynamic", 1140 } 1141 1142 TRANSFORMS = { 1143 **generator.Generator.TRANSFORMS, 1144 exp.AnyValue: rename_func("any"), 1145 exp.ApproxDistinct: rename_func("uniq"), 1146 exp.ArrayConcat: rename_func("arrayConcat"), 1147 exp.ArrayContains: rename_func("has"), 1148 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 1149 exp.ArrayRemove: remove_from_array_using_filter, 1150 exp.ArrayReverse: rename_func("arrayReverse"), 1151 exp.ArraySlice: rename_func("arraySlice"), 1152 exp.ArraySum: rename_func("arraySum"), 1153 exp.ArgMax: arg_max_or_min_no_count("argMax"), 1154 exp.ArgMin: arg_max_or_min_no_count("argMin"), 1155 exp.Array: inline_array_sql, 1156 exp.CastToStrType: rename_func("CAST"), 1157 exp.CurrentDatabase: rename_func("CURRENT_DATABASE"), 1158 exp.CurrentSchemas: rename_func("CURRENT_SCHEMAS"), 1159 exp.CountIf: rename_func("countIf"), 1160 exp.CosineDistance: rename_func("cosineDistance"), 1161 exp.CompressColumnConstraint: lambda self, 1162 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 1163 exp.ComputedColumnConstraint: lambda self, 1164 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 1165 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 1166 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 1167 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 1168 exp.DateStrToDate: rename_func("toDate"), 1169 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 1170 exp.Explode: rename_func("arrayJoin"), 1171 exp.FarmFingerprint: rename_func("farmFingerprint64"), 1172 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 1173 exp.IsNan: rename_func("isNaN"), 1174 exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}", 1175 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 1176 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 1177 exp.JSONPathKey: json_path_key_only_name, 1178 exp.JSONPathRoot: lambda *_: "", 1179 exp.Length: length_or_char_length_sql, 1180 exp.Map: _map_sql, 1181 exp.Median: rename_func("median"), 1182 exp.Nullif: rename_func("nullIf"), 1183 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1184 exp.Pivot: no_pivot_sql, 1185 exp.Quantile: _quantile_sql, 1186 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 1187 exp.Rand: rename_func("randCanonical"), 1188 exp.StartsWith: rename_func("startsWith"), 1189 exp.Struct: rename_func("tuple"), 1190 exp.EndsWith: rename_func("endsWith"), 1191 exp.EuclideanDistance: rename_func("L2Distance"), 1192 exp.StrPosition: lambda self, e: strposition_sql( 1193 self, 1194 e, 1195 func_name="POSITION", 1196 supports_position=True, 1197 use_ansi_position=False, 1198 ), 1199 exp.TimeToStr: lambda self, e: self.func( 1200 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 1201 ), 1202 exp.TimeStrToTime: _timestrtotime_sql, 1203 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1204 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1205 exp.Typeof: rename_func("toTypeName"), 1206 exp.VarMap: _map_sql, 1207 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1208 exp.MD5Digest: rename_func("MD5"), 1209 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1210 exp.SHA: rename_func("SHA1"), 1211 exp.SHA1Digest: rename_func("SHA1"), 1212 exp.SHA2: sha256_sql, 1213 exp.SHA2Digest: sha2_digest_sql, 1214 exp.Split: lambda self, e: self.func( 1215 "splitByString", e.args.get("expression"), e.this, e.args.get("limit") 1216 ), 1217 exp.RegexpSplit: lambda self, e: self.func( 1218 "splitByRegexp", e.args.get("expression"), e.this, e.args.get("limit") 1219 ), 1220 exp.UnixToTime: _unix_to_time_sql, 1221 exp.TimestampTrunc: timestamptrunc_sql(func="dateTrunc", zone=True), 1222 exp.Trim: lambda self, e: trim_sql(self, e, default_trim_type="BOTH"), 1223 exp.Variance: rename_func("varSamp"), 1224 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1225 exp.Stddev: rename_func("stddevSamp"), 1226 exp.Chr: rename_func("CHAR"), 1227 exp.Lag: lambda self, e: self.func( 1228 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1229 ), 1230 exp.Lead: lambda self, e: self.func( 1231 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1232 ), 1233 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1234 rename_func("editDistance") 1235 ), 1236 exp.ParseDatetime: rename_func("parseDateTime"), 1237 } 1238 1239 PROPERTIES_LOCATION = { 1240 **generator.Generator.PROPERTIES_LOCATION, 1241 exp.OnCluster: exp.Properties.Location.POST_NAME, 1242 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1243 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1244 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1245 } 1246 1247 # There's no list in docs, but it can be found in Clickhouse code 1248 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1249 ON_CLUSTER_TARGETS = { 1250 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1251 "DATABASE", 1252 "TABLE", 1253 "VIEW", 1254 "DICTIONARY", 1255 "INDEX", 1256 "FUNCTION", 1257 "NAMED COLLECTION", 1258 } 1259 1260 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1261 NON_NULLABLE_TYPES = { 1262 exp.DataType.Type.ARRAY, 1263 exp.DataType.Type.MAP, 1264 exp.DataType.Type.STRUCT, 1265 exp.DataType.Type.POINT, 1266 exp.DataType.Type.RING, 1267 exp.DataType.Type.LINESTRING, 1268 exp.DataType.Type.MULTILINESTRING, 1269 exp.DataType.Type.POLYGON, 1270 exp.DataType.Type.MULTIPOLYGON, 1271 } 1272 1273 def offset_sql(self, expression: exp.Offset) -> str: 1274 offset = super().offset_sql(expression) 1275 1276 # OFFSET ... FETCH syntax requires a "ROW" or "ROWS" keyword 1277 # https://clickhouse.com/docs/sql-reference/statements/select/offset 1278 parent = expression.parent 1279 if isinstance(parent, exp.Select) and isinstance(parent.args.get("limit"), exp.Fetch): 1280 offset = f"{offset} ROWS" 1281 1282 return offset 1283 1284 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1285 strtodate_sql = self.function_fallback_sql(expression) 1286 1287 if not isinstance(expression.parent, exp.Cast): 1288 # StrToDate returns DATEs in other dialects (eg. postgres), so 1289 # this branch aims to improve the transpilation to clickhouse 1290 return self.cast_sql(exp.cast(expression, "DATE")) 1291 1292 return strtodate_sql 1293 1294 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1295 this = expression.this 1296 1297 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1298 return self.sql(this) 1299 1300 return super().cast_sql(expression, safe_prefix=safe_prefix) 1301 1302 def trycast_sql(self, expression: exp.TryCast) -> str: 1303 dtype = expression.to 1304 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1305 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1306 dtype.set("nullable", True) 1307 1308 return super().cast_sql(expression) 1309 1310 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1311 this = self.json_path_part(expression.this) 1312 return str(int(this) + 1) if is_int(this) else this 1313 1314 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1315 return f"AS {self.sql(expression, 'this')}" 1316 1317 def _any_to_has( 1318 self, 1319 expression: exp.EQ | exp.NEQ, 1320 default: t.Callable[[t.Any], str], 1321 prefix: str = "", 1322 ) -> str: 1323 if isinstance(expression.left, exp.Any): 1324 arr = expression.left 1325 this = expression.right 1326 elif isinstance(expression.right, exp.Any): 1327 arr = expression.right 1328 this = expression.left 1329 else: 1330 return default(expression) 1331 1332 return prefix + self.func("has", arr.this.unnest(), this) 1333 1334 def eq_sql(self, expression: exp.EQ) -> str: 1335 return self._any_to_has(expression, super().eq_sql) 1336 1337 def neq_sql(self, expression: exp.NEQ) -> str: 1338 return self._any_to_has(expression, super().neq_sql, "NOT ") 1339 1340 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1341 # Manually add a flag to make the search case-insensitive 1342 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1343 return self.func("match", expression.this, regex) 1344 1345 def datatype_sql(self, expression: exp.DataType) -> str: 1346 # String is the standard ClickHouse type, every other variant is just an alias. 1347 # Additionally, any supplied length parameter will be ignored. 1348 # 1349 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1350 if expression.this in self.STRING_TYPE_MAPPING: 1351 dtype = "String" 1352 else: 1353 dtype = super().datatype_sql(expression) 1354 1355 # This section changes the type to `Nullable(...)` if the following conditions hold: 1356 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1357 # and change their semantics 1358 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1359 # constraint: "Type of Map key must be a type, that can be represented by integer or 1360 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1361 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1362 parent = expression.parent 1363 nullable = expression.args.get("nullable") 1364 if nullable is True or ( 1365 nullable is None 1366 and not ( 1367 isinstance(parent, exp.DataType) 1368 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1369 and expression.index in (None, 0) 1370 ) 1371 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1372 ): 1373 dtype = f"Nullable({dtype})" 1374 1375 return dtype 1376 1377 def cte_sql(self, expression: exp.CTE) -> str: 1378 if expression.args.get("scalar"): 1379 this = self.sql(expression, "this") 1380 alias = self.sql(expression, "alias") 1381 return f"{this} AS {alias}" 1382 1383 return super().cte_sql(expression) 1384 1385 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1386 return super().after_limit_modifiers(expression) + [ 1387 ( 1388 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1389 if expression.args.get("settings") 1390 else "" 1391 ), 1392 ( 1393 self.seg("FORMAT ") + self.sql(expression, "format") 1394 if expression.args.get("format") 1395 else "" 1396 ), 1397 ] 1398 1399 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1400 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1401 1402 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1403 return f"ON CLUSTER {self.sql(expression, 'this')}" 1404 1405 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1406 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1407 exp.Properties.Location.POST_NAME 1408 ): 1409 this_name = self.sql( 1410 expression.this if isinstance(expression.this, exp.Schema) else expression, 1411 "this", 1412 ) 1413 this_properties = " ".join( 1414 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1415 ) 1416 this_schema = self.schema_columns_sql(expression.this) 1417 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1418 1419 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1420 1421 return super().createable_sql(expression, locations) 1422 1423 def create_sql(self, expression: exp.Create) -> str: 1424 # The comment property comes last in CTAS statements, i.e. after the query 1425 query = expression.expression 1426 if isinstance(query, exp.Query): 1427 comment_prop = expression.find(exp.SchemaCommentProperty) 1428 if comment_prop: 1429 comment_prop.pop() 1430 query.replace(exp.paren(query)) 1431 else: 1432 comment_prop = None 1433 1434 create_sql = super().create_sql(expression) 1435 1436 comment_sql = self.sql(comment_prop) 1437 comment_sql = f" {comment_sql}" if comment_sql else "" 1438 1439 return f"{create_sql}{comment_sql}" 1440 1441 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1442 this = self.indent(self.sql(expression, "this")) 1443 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1444 1445 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1446 this = self.sql(expression, "this") 1447 this = f" {this}" if this else "" 1448 expr = self.sql(expression, "expression") 1449 expr = f" {expr}" if expr else "" 1450 index_type = self.sql(expression, "index_type") 1451 index_type = f" TYPE {index_type}" if index_type else "" 1452 granularity = self.sql(expression, "granularity") 1453 granularity = f" GRANULARITY {granularity}" if granularity else "" 1454 1455 return f"INDEX{this}{expr}{index_type}{granularity}" 1456 1457 def partition_sql(self, expression: exp.Partition) -> str: 1458 return f"PARTITION {self.expressions(expression, flat=True)}" 1459 1460 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1461 return f"ID {self.sql(expression.this)}" 1462 1463 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1464 return ( 1465 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1466 ) 1467 1468 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1469 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1470 1471 def is_sql(self, expression: exp.Is) -> str: 1472 is_sql = super().is_sql(expression) 1473 1474 if isinstance(expression.parent, exp.Not): 1475 # value IS NOT NULL -> NOT (value IS NULL) 1476 is_sql = self.wrap(is_sql) 1477 1478 return is_sql 1479 1480 def in_sql(self, expression: exp.In) -> str: 1481 in_sql = super().in_sql(expression) 1482 1483 if isinstance(expression.parent, exp.Not) and expression.args.get("is_global"): 1484 in_sql = in_sql.replace("GLOBAL IN", "GLOBAL NOT IN", 1) 1485 1486 return in_sql 1487 1488 def not_sql(self, expression: exp.Not) -> str: 1489 if isinstance(expression.this, exp.In): 1490 if expression.this.args.get("is_global"): 1491 # let `GLOBAL IN` child interpose `NOT` 1492 return self.sql(expression, "this") 1493 1494 expression.set("this", exp.paren(expression.this, copy=False)) 1495 1496 return super().not_sql(expression) 1497 1498 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1499 # If the VALUES clause contains tuples of expressions, we need to treat it 1500 # as a table since Clickhouse will automatically alias it as such. 1501 alias = expression.args.get("alias") 1502 1503 if alias and alias.args.get("columns") and expression.expressions: 1504 values = expression.expressions[0].expressions 1505 values_as_table = any(isinstance(value, exp.Tuple) for value in values) 1506 else: 1507 values_as_table = True 1508 1509 return super().values_sql(expression, values_as_table=values_as_table)
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Default NULL ordering method to use if not explicitly set.
Possible values: "nulls_are_small", "nulls_are_large", "nulls_are_last"
Whether the base comes first in the LOG function.
Possible values: True, False, None (two arguments are not supported by LOG)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether the name of the function should be preserved inside the node's metadata, can be useful for roundtripping deprecated vs new functions that share an AST node e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery
Whether number literals can include underscores for better readability
Whether hex strings such as x'CC' evaluate to integer or binary/blob type
Specifies the strategy according to which identifiers should be normalized.
Mapping of an escaped sequence (\n) to its unescaped version (
).
Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse equivalent of CREATE SCHEMA is CREATE DATABASE.
Whether a set operation uses DISTINCT by default. This is None when either DISTINCT or ALL
must be explicitly specified.
259 def generate_values_aliases(self, expression: exp.Values) -> t.List[exp.Identifier]: 260 # Clickhouse allows VALUES to have an embedded structure e.g: 261 # VALUES('person String, place String', ('Noah', 'Paris'), ...) 262 # In this case, we don't want to qualify the columns 263 values = expression.expressions[0].expressions 264 265 structure = ( 266 values[0] 267 if (len(values) > 1 and values[0].is_string and isinstance(values[1], exp.Tuple)) 268 else None 269 ) 270 if structure: 271 # Split each column definition into the column name e.g: 272 # 'person String, place String' -> ['person', 'place'] 273 structure_coldefs = [coldef.strip() for coldef in structure.name.split(",")] 274 column_aliases = [ 275 exp.to_identifier(coldef.split(" ")[0]) for coldef in structure_coldefs 276 ] 277 else: 278 # Default column aliases in CH are "c1", "c2", etc. 279 column_aliases = [ 280 exp.to_identifier(f"c{i + 1}") for i in range(len(values[0].expressions)) 281 ] 282 283 return column_aliases
285 class Tokenizer(tokens.Tokenizer): 286 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 287 IDENTIFIERS = ['"', "`"] 288 IDENTIFIER_ESCAPES = ["\\"] 289 STRING_ESCAPES = ["'", "\\"] 290 BIT_STRINGS = [("0b", "")] 291 HEX_STRINGS = [("0x", ""), ("0X", "")] 292 HEREDOC_STRINGS = ["$"] 293 294 KEYWORDS = { 295 **tokens.Tokenizer.KEYWORDS, 296 ".:": TokenType.DOTCOLON, 297 "ATTACH": TokenType.COMMAND, 298 "DATE32": TokenType.DATE32, 299 "DATETIME64": TokenType.DATETIME64, 300 "DICTIONARY": TokenType.DICTIONARY, 301 "DYNAMIC": TokenType.DYNAMIC, 302 "ENUM8": TokenType.ENUM8, 303 "ENUM16": TokenType.ENUM16, 304 "EXCHANGE": TokenType.COMMAND, 305 "FINAL": TokenType.FINAL, 306 "FIXEDSTRING": TokenType.FIXEDSTRING, 307 "FLOAT32": TokenType.FLOAT, 308 "FLOAT64": TokenType.DOUBLE, 309 "GLOBAL": TokenType.GLOBAL, 310 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 311 "MAP": TokenType.MAP, 312 "NESTED": TokenType.NESTED, 313 "NOTHING": TokenType.NOTHING, 314 "SAMPLE": TokenType.TABLE_SAMPLE, 315 "TUPLE": TokenType.STRUCT, 316 "UINT16": TokenType.USMALLINT, 317 "UINT32": TokenType.UINT, 318 "UINT64": TokenType.UBIGINT, 319 "UINT8": TokenType.UTINYINT, 320 "IPV4": TokenType.IPV4, 321 "IPV6": TokenType.IPV6, 322 "POINT": TokenType.POINT, 323 "RING": TokenType.RING, 324 "LINESTRING": TokenType.LINESTRING, 325 "MULTILINESTRING": TokenType.MULTILINESTRING, 326 "POLYGON": TokenType.POLYGON, 327 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 328 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 329 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 330 "SYSTEM": TokenType.COMMAND, 331 "PREWHERE": TokenType.PREWHERE, 332 } 333 KEYWORDS.pop("/*+") 334 335 SINGLE_TOKENS = { 336 **tokens.Tokenizer.SINGLE_TOKENS, 337 "$": TokenType.HEREDOC_STRING, 338 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BYTE_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- QUOTES
- VAR_SINGLE_TOKENS
- ESCAPE_FOLLOW_CHARS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
340 class Parser(parser.Parser): 341 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 342 # * select x from t1 union all select x from t2 limit 1; 343 # * select x from t1 union all (select x from t2 limit 1); 344 MODIFIERS_ATTACHED_TO_SET_OP = False 345 INTERVAL_SPANS = False 346 OPTIONAL_ALIAS_TOKEN_CTE = False 347 JOINS_HAVE_EQUAL_PRECEDENCE = True 348 349 FUNCTIONS = { 350 **parser.Parser.FUNCTIONS, 351 **{ 352 f"TOSTARTOF{unit}": _build_timestamp_trunc(unit=unit) 353 for unit in TIMESTAMP_TRUNC_UNITS 354 }, 355 "ANY": exp.AnyValue.from_arg_list, 356 "ARRAYSUM": exp.ArraySum.from_arg_list, 357 "ARRAYREVERSE": exp.ArrayReverse.from_arg_list, 358 "ARRAYSLICE": exp.ArraySlice.from_arg_list, 359 "CURRENTDATABASE": exp.CurrentDatabase.from_arg_list, 360 "CURRENTSCHEMAS": exp.CurrentSchemas.from_arg_list, 361 "COUNTIF": _build_count_if, 362 "COSINEDISTANCE": exp.CosineDistance.from_arg_list, 363 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 364 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 365 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 366 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 367 "DATE_FORMAT": _build_datetime_format(exp.TimeToStr), 368 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 369 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 370 "FORMATDATETIME": _build_datetime_format(exp.TimeToStr), 371 "HAS": exp.ArrayContains.from_arg_list, 372 "ILIKE": build_like(exp.ILike), 373 "JSONEXTRACTSTRING": build_json_extract_path( 374 exp.JSONExtractScalar, zero_based_indexing=False 375 ), 376 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 377 "LIKE": build_like(exp.Like), 378 "L2Distance": exp.EuclideanDistance.from_arg_list, 379 "MAP": parser.build_var_map, 380 "MATCH": exp.RegexpLike.from_arg_list, 381 "NOTLIKE": build_like(exp.Like, not_like=True), 382 "PARSEDATETIME": _build_datetime_format(exp.ParseDatetime), 383 "RANDCANONICAL": exp.Rand.from_arg_list, 384 "STR_TO_DATE": _build_str_to_date, 385 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 386 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 387 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 388 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 389 "TOMONDAY": _build_timestamp_trunc("WEEK"), 390 "UNIQ": exp.ApproxDistinct.from_arg_list, 391 "XOR": lambda args: exp.Xor(expressions=args), 392 "MD5": exp.MD5Digest.from_arg_list, 393 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 394 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 395 "SPLITBYCHAR": _build_split_by_char, 396 "SPLITBYREGEXP": _build_split(exp.RegexpSplit), 397 "SPLITBYSTRING": _build_split(exp.Split), 398 "SUBSTRINGINDEX": exp.SubstringIndex.from_arg_list, 399 "TOTYPENAME": exp.Typeof.from_arg_list, 400 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 401 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 402 } 403 FUNCTIONS.pop("TRANSFORM") 404 FUNCTIONS.pop("APPROX_TOP_SUM") 405 406 AGG_FUNCTIONS = { 407 "count", 408 "min", 409 "max", 410 "sum", 411 "avg", 412 "any", 413 "stddevPop", 414 "stddevSamp", 415 "varPop", 416 "varSamp", 417 "corr", 418 "covarPop", 419 "covarSamp", 420 "entropy", 421 "exponentialMovingAverage", 422 "intervalLengthSum", 423 "kolmogorovSmirnovTest", 424 "mannWhitneyUTest", 425 "median", 426 "rankCorr", 427 "sumKahan", 428 "studentTTest", 429 "welchTTest", 430 "anyHeavy", 431 "anyLast", 432 "boundingRatio", 433 "first_value", 434 "last_value", 435 "argMin", 436 "argMax", 437 "avgWeighted", 438 "topK", 439 "approx_top_sum", 440 "topKWeighted", 441 "deltaSum", 442 "deltaSumTimestamp", 443 "groupArray", 444 "groupArrayLast", 445 "groupUniqArray", 446 "groupArrayInsertAt", 447 "groupArrayMovingAvg", 448 "groupArrayMovingSum", 449 "groupArraySample", 450 "groupBitAnd", 451 "groupBitOr", 452 "groupBitXor", 453 "groupBitmap", 454 "groupBitmapAnd", 455 "groupBitmapOr", 456 "groupBitmapXor", 457 "sumWithOverflow", 458 "sumMap", 459 "minMap", 460 "maxMap", 461 "skewSamp", 462 "skewPop", 463 "kurtSamp", 464 "kurtPop", 465 "uniq", 466 "uniqExact", 467 "uniqCombined", 468 "uniqCombined64", 469 "uniqHLL12", 470 "uniqTheta", 471 "quantile", 472 "quantiles", 473 "quantileExact", 474 "quantilesExact", 475 "quantilesExactExclusive", 476 "quantileExactLow", 477 "quantilesExactLow", 478 "quantileExactHigh", 479 "quantilesExactHigh", 480 "quantileExactWeighted", 481 "quantilesExactWeighted", 482 "quantileTiming", 483 "quantilesTiming", 484 "quantileTimingWeighted", 485 "quantilesTimingWeighted", 486 "quantileDeterministic", 487 "quantilesDeterministic", 488 "quantileTDigest", 489 "quantilesTDigest", 490 "quantileTDigestWeighted", 491 "quantilesTDigestWeighted", 492 "quantileBFloat16", 493 "quantilesBFloat16", 494 "quantileBFloat16Weighted", 495 "quantilesBFloat16Weighted", 496 "simpleLinearRegression", 497 "stochasticLinearRegression", 498 "stochasticLogisticRegression", 499 "categoricalInformationValue", 500 "contingency", 501 "cramersV", 502 "cramersVBiasCorrected", 503 "theilsU", 504 "maxIntersections", 505 "maxIntersectionsPosition", 506 "meanZTest", 507 "quantileInterpolatedWeighted", 508 "quantilesInterpolatedWeighted", 509 "quantileGK", 510 "quantilesGK", 511 "sparkBar", 512 "sumCount", 513 "largestTriangleThreeBuckets", 514 "histogram", 515 "sequenceMatch", 516 "sequenceCount", 517 "windowFunnel", 518 "retention", 519 "uniqUpTo", 520 "sequenceNextNode", 521 "exponentialTimeDecayedAvg", 522 } 523 524 AGG_FUNCTIONS_SUFFIXES = [ 525 "If", 526 "Array", 527 "ArrayIf", 528 "Map", 529 "SimpleState", 530 "State", 531 "Merge", 532 "MergeState", 533 "ForEach", 534 "Distinct", 535 "OrDefault", 536 "OrNull", 537 "Resample", 538 "ArgMin", 539 "ArgMax", 540 ] 541 542 FUNC_TOKENS = { 543 *parser.Parser.FUNC_TOKENS, 544 TokenType.AND, 545 TokenType.OR, 546 TokenType.SET, 547 } 548 549 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 550 551 ID_VAR_TOKENS = { 552 *parser.Parser.ID_VAR_TOKENS, 553 TokenType.LIKE, 554 } 555 556 AGG_FUNC_MAPPING = ( 557 lambda functions, suffixes: { 558 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 559 } 560 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 561 562 FUNCTION_PARSERS = { 563 **parser.Parser.FUNCTION_PARSERS, 564 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 565 "QUANTILE": lambda self: self._parse_quantile(), 566 "MEDIAN": lambda self: self._parse_quantile(), 567 "COLUMNS": lambda self: self._parse_columns(), 568 "TUPLE": lambda self: exp.Struct.from_arg_list(self._parse_function_args(alias=True)), 569 "AND": lambda self: exp.and_(*self._parse_function_args(alias=False)), 570 "OR": lambda self: exp.or_(*self._parse_function_args(alias=False)), 571 } 572 573 FUNCTION_PARSERS.pop("MATCH") 574 575 PROPERTY_PARSERS = { 576 **parser.Parser.PROPERTY_PARSERS, 577 "ENGINE": lambda self: self._parse_engine_property(), 578 } 579 PROPERTY_PARSERS.pop("DYNAMIC") 580 581 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 582 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 583 584 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 585 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 586 587 RANGE_PARSERS = { 588 **parser.Parser.RANGE_PARSERS, 589 TokenType.GLOBAL: lambda self, this: self._parse_global_in(this), 590 } 591 592 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 593 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 594 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 595 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 596 597 JOIN_KINDS = { 598 *parser.Parser.JOIN_KINDS, 599 TokenType.ANY, 600 TokenType.ASOF, 601 TokenType.ARRAY, 602 } 603 604 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 605 TokenType.ANY, 606 TokenType.ARRAY, 607 TokenType.FINAL, 608 TokenType.FORMAT, 609 TokenType.SETTINGS, 610 } 611 612 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 613 TokenType.FORMAT, 614 } 615 616 LOG_DEFAULTS_TO_LN = True 617 618 QUERY_MODIFIER_PARSERS = { 619 **parser.Parser.QUERY_MODIFIER_PARSERS, 620 TokenType.SETTINGS: lambda self: ( 621 "settings", 622 self._advance() or self._parse_csv(self._parse_assignment), 623 ), 624 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 625 } 626 627 CONSTRAINT_PARSERS = { 628 **parser.Parser.CONSTRAINT_PARSERS, 629 "INDEX": lambda self: self._parse_index_constraint(), 630 "CODEC": lambda self: self._parse_compress(), 631 } 632 633 ALTER_PARSERS = { 634 **parser.Parser.ALTER_PARSERS, 635 "REPLACE": lambda self: self._parse_alter_table_replace(), 636 } 637 638 SCHEMA_UNNAMED_CONSTRAINTS = { 639 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 640 "INDEX", 641 } 642 643 PLACEHOLDER_PARSERS = { 644 **parser.Parser.PLACEHOLDER_PARSERS, 645 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 646 } 647 648 def _parse_engine_property(self) -> exp.EngineProperty: 649 self._match(TokenType.EQ) 650 return self.expression( 651 exp.EngineProperty, 652 this=self._parse_field(any_token=True, anonymous_func=True), 653 ) 654 655 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 656 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 657 return self._parse_lambda() 658 659 def _parse_types( 660 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 661 ) -> t.Optional[exp.Expression]: 662 dtype = super()._parse_types( 663 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 664 ) 665 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 666 # Mark every type as non-nullable which is ClickHouse's default, unless it's 667 # already marked as nullable. This marker helps us transpile types from other 668 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 669 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 670 # fail in ClickHouse without the `Nullable` type constructor. 671 dtype.set("nullable", False) 672 673 return dtype 674 675 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 676 index = self._index 677 this = self._parse_bitwise() 678 if self._match(TokenType.FROM): 679 self._retreat(index) 680 return super()._parse_extract() 681 682 # We return Anonymous here because extract and regexpExtract have different semantics, 683 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 684 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 685 # 686 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 687 self._match(TokenType.COMMA) 688 return self.expression( 689 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 690 ) 691 692 def _parse_assignment(self) -> t.Optional[exp.Expression]: 693 this = super()._parse_assignment() 694 695 if self._match(TokenType.PLACEHOLDER): 696 return self.expression( 697 exp.If, 698 this=this, 699 true=self._parse_assignment(), 700 false=self._match(TokenType.COLON) and self._parse_assignment(), 701 ) 702 703 return this 704 705 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 706 """ 707 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 708 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 709 """ 710 index = self._index 711 712 this = self._parse_id_var() 713 self._match(TokenType.COLON) 714 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 715 self._match_text_seq("IDENTIFIER") and "Identifier" 716 ) 717 718 if not kind: 719 self._retreat(index) 720 return None 721 elif not self._match(TokenType.R_BRACE): 722 self.raise_error("Expecting }") 723 724 if isinstance(this, exp.Identifier) and not this.quoted: 725 this = exp.var(this.name) 726 727 return self.expression(exp.Placeholder, this=this, kind=kind) 728 729 def _parse_bracket( 730 self, this: t.Optional[exp.Expression] = None 731 ) -> t.Optional[exp.Expression]: 732 l_brace = self._match(TokenType.L_BRACE, advance=False) 733 bracket = super()._parse_bracket(this) 734 735 if l_brace and isinstance(bracket, exp.Struct): 736 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 737 for expression in bracket.expressions: 738 if not isinstance(expression, exp.PropertyEQ): 739 break 740 741 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 742 varmap.args["values"].append("expressions", expression.expression) 743 744 return varmap 745 746 return bracket 747 748 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 749 this = super()._parse_in(this) 750 this.set("is_global", is_global) 751 return this 752 753 def _parse_global_in(self, this: t.Optional[exp.Expression]) -> exp.Not | exp.In: 754 is_negated = self._match(TokenType.NOT) 755 this = self._match(TokenType.IN) and self._parse_in(this, is_global=True) 756 return self.expression(exp.Not, this=this) if is_negated else this 757 758 def _parse_table( 759 self, 760 schema: bool = False, 761 joins: bool = False, 762 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 763 parse_bracket: bool = False, 764 is_db_reference: bool = False, 765 parse_partition: bool = False, 766 consume_pipe: bool = False, 767 ) -> t.Optional[exp.Expression]: 768 this = super()._parse_table( 769 schema=schema, 770 joins=joins, 771 alias_tokens=alias_tokens, 772 parse_bracket=parse_bracket, 773 is_db_reference=is_db_reference, 774 ) 775 776 if isinstance(this, exp.Table): 777 inner = this.this 778 alias = this.args.get("alias") 779 780 if isinstance(inner, exp.GenerateSeries) and alias and not alias.columns: 781 alias.set("columns", [exp.to_identifier("generate_series")]) 782 783 if self._match(TokenType.FINAL): 784 this = self.expression(exp.Final, this=this) 785 786 return this 787 788 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 789 return super()._parse_position(haystack_first=True) 790 791 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 792 def _parse_cte(self) -> t.Optional[exp.CTE]: 793 # WITH <identifier> AS <subquery expression> 794 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 795 796 if not cte: 797 # WITH <expression> AS <identifier> 798 cte = self.expression( 799 exp.CTE, 800 this=self._parse_assignment(), 801 alias=self._parse_table_alias(), 802 scalar=True, 803 ) 804 805 return cte 806 807 def _parse_join_parts( 808 self, 809 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 810 is_global = self._match(TokenType.GLOBAL) and self._prev 811 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 812 813 if kind_pre: 814 kind = self._match_set(self.JOIN_KINDS) and self._prev 815 side = self._match_set(self.JOIN_SIDES) and self._prev 816 return is_global, side, kind 817 818 return ( 819 is_global, 820 self._match_set(self.JOIN_SIDES) and self._prev, 821 self._match_set(self.JOIN_KINDS) and self._prev, 822 ) 823 824 def _parse_join( 825 self, skip_join_token: bool = False, parse_bracket: bool = False 826 ) -> t.Optional[exp.Join]: 827 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 828 if join: 829 method = join.args.get("method") 830 join.set("method", None) 831 join.set("global_", method) 832 833 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 834 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 835 if join.kind == "ARRAY": 836 for table in join.find_all(exp.Table): 837 table.replace(table.to_column()) 838 839 return join 840 841 def _parse_function( 842 self, 843 functions: t.Optional[t.Dict[str, t.Callable]] = None, 844 anonymous: bool = False, 845 optional_parens: bool = True, 846 any_token: bool = False, 847 ) -> t.Optional[exp.Expression]: 848 expr = super()._parse_function( 849 functions=functions, 850 anonymous=anonymous, 851 optional_parens=optional_parens, 852 any_token=any_token, 853 ) 854 855 func = expr.this if isinstance(expr, exp.Window) else expr 856 857 # Aggregate functions can be split in 2 parts: <func_name><suffix> 858 parts = ( 859 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 860 ) 861 862 if parts: 863 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 864 params = self._parse_func_params(anon_func) 865 866 kwargs = { 867 "this": anon_func.this, 868 "expressions": anon_func.expressions, 869 } 870 if parts[1]: 871 exp_class: t.Type[exp.Expression] = ( 872 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 873 ) 874 else: 875 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 876 877 kwargs["exp_class"] = exp_class 878 if params: 879 kwargs["params"] = params 880 881 func = self.expression(**kwargs) 882 883 if isinstance(expr, exp.Window): 884 # The window's func was parsed as Anonymous in base parser, fix its 885 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 886 expr.set("this", func) 887 elif params: 888 # Params have blocked super()._parse_function() from parsing the following window 889 # (if that exists) as they're standing between the function call and the window spec 890 expr = self._parse_window(func) 891 else: 892 expr = func 893 894 return expr 895 896 def _parse_func_params( 897 self, this: t.Optional[exp.Func] = None 898 ) -> t.Optional[t.List[exp.Expression]]: 899 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 900 return self._parse_csv(self._parse_lambda) 901 902 if self._match(TokenType.L_PAREN): 903 params = self._parse_csv(self._parse_lambda) 904 self._match_r_paren(this) 905 return params 906 907 return None 908 909 def _parse_quantile(self) -> exp.Quantile: 910 this = self._parse_lambda() 911 params = self._parse_func_params() 912 if params: 913 return self.expression(exp.Quantile, this=params[0], quantile=this) 914 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 915 916 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 917 return super()._parse_wrapped_id_vars(optional=True) 918 919 def _parse_primary_key( 920 self, wrapped_optional: bool = False, in_props: bool = False 921 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 922 return super()._parse_primary_key( 923 wrapped_optional=wrapped_optional or in_props, in_props=in_props 924 ) 925 926 def _parse_on_property(self) -> t.Optional[exp.Expression]: 927 index = self._index 928 if self._match_text_seq("CLUSTER"): 929 this = self._parse_string() or self._parse_id_var() 930 if this: 931 return self.expression(exp.OnCluster, this=this) 932 else: 933 self._retreat(index) 934 return None 935 936 def _parse_index_constraint( 937 self, kind: t.Optional[str] = None 938 ) -> exp.IndexColumnConstraint: 939 # INDEX name1 expr TYPE type1(args) GRANULARITY value 940 this = self._parse_id_var() 941 expression = self._parse_assignment() 942 943 index_type = self._match_text_seq("TYPE") and ( 944 self._parse_function() or self._parse_var() 945 ) 946 947 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 948 949 return self.expression( 950 exp.IndexColumnConstraint, 951 this=this, 952 expression=expression, 953 index_type=index_type, 954 granularity=granularity, 955 ) 956 957 def _parse_partition(self) -> t.Optional[exp.Partition]: 958 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 959 if not self._match(TokenType.PARTITION): 960 return None 961 962 if self._match_text_seq("ID"): 963 # Corresponds to the PARTITION ID <string_value> syntax 964 expressions: t.List[exp.Expression] = [ 965 self.expression(exp.PartitionId, this=self._parse_string()) 966 ] 967 else: 968 expressions = self._parse_expressions() 969 970 return self.expression(exp.Partition, expressions=expressions) 971 972 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 973 partition = self._parse_partition() 974 975 if not partition or not self._match(TokenType.FROM): 976 return None 977 978 return self.expression( 979 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 980 ) 981 982 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 983 if not self._match_text_seq("PROJECTION"): 984 return None 985 986 return self.expression( 987 exp.ProjectionDef, 988 this=self._parse_id_var(), 989 expression=self._parse_wrapped(self._parse_statement), 990 ) 991 992 def _parse_constraint(self) -> t.Optional[exp.Expression]: 993 return super()._parse_constraint() or self._parse_projection_def() 994 995 def _parse_alias( 996 self, this: t.Optional[exp.Expression], explicit: bool = False 997 ) -> t.Optional[exp.Expression]: 998 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 999 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 1000 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 1001 return this 1002 1003 return super()._parse_alias(this=this, explicit=explicit) 1004 1005 def _parse_expression(self) -> t.Optional[exp.Expression]: 1006 this = super()._parse_expression() 1007 1008 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 1009 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 1010 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 1011 self._match(TokenType.R_PAREN) 1012 1013 return this 1014 1015 def _parse_columns(self) -> exp.Expression: 1016 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 1017 1018 while self._next and self._match_text_seq(")", "APPLY", "("): 1019 self._match(TokenType.R_PAREN) 1020 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 1021 return this 1022 1023 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 1024 value = super()._parse_value(values=values) 1025 if not value: 1026 return None 1027 1028 # In Clickhouse "SELECT * FROM VALUES (1, 2, 3)" generates a table with a single column, in contrast 1029 # to other dialects. For this case, we canonicalize the values into a tuple-of-tuples AST if it's not already one. 1030 # In INSERT INTO statements the same clause actually references multiple columns (opposite semantics), 1031 # but the final result is not altered by the extra parentheses. 1032 # Note: Clickhouse allows VALUES([structure], value, ...) so the branch checks for the last expression 1033 expressions = value.expressions 1034 if values and not isinstance(expressions[-1], exp.Tuple): 1035 value.set( 1036 "expressions", 1037 [self.expression(exp.Tuple, expressions=[expr]) for expr in expressions], 1038 ) 1039 1040 return value 1041 1042 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1043 # ClickHouse allows custom expressions as partition key 1044 # https://clickhouse.com/docs/engines/table-engines/mergetree-family/custom-partitioning-key 1045 return self.expression( 1046 exp.PartitionedByProperty, 1047 this=self._parse_assignment(), 1048 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_HINTS
- LAMBDAS
- CAST_COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- ALTER_ALTER_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_TOKENS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- SET_ASSIGNMENT_DELIMITERS
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- ALTER_RENAME_REQUIRES_COLUMN
- ALTER_TABLE_PARTITIONS
- ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
- MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS
- JSON_EXTRACT_REQUIRES_JSON_EXPRESSION
- ADD_JOIN_ON_TRUE
- SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- build_cast
- errors
- sql
1050 class Generator(generator.Generator): 1051 QUERY_HINTS = False 1052 STRUCT_DELIMITER = ("(", ")") 1053 NVL2_SUPPORTED = False 1054 TABLESAMPLE_REQUIRES_PARENS = False 1055 TABLESAMPLE_SIZE_IS_ROWS = False 1056 TABLESAMPLE_KEYWORDS = "SAMPLE" 1057 LAST_DAY_SUPPORTS_DATE_PART = False 1058 CAN_IMPLEMENT_ARRAY_ANY = True 1059 SUPPORTS_TO_NUMBER = False 1060 JOIN_HINTS = False 1061 TABLE_HINTS = False 1062 GROUPINGS_SEP = "" 1063 SET_OP_MODIFIERS = False 1064 ARRAY_SIZE_NAME = "LENGTH" 1065 WRAP_DERIVED_VALUES = False 1066 1067 STRING_TYPE_MAPPING = { 1068 exp.DataType.Type.BLOB: "String", 1069 exp.DataType.Type.CHAR: "String", 1070 exp.DataType.Type.LONGBLOB: "String", 1071 exp.DataType.Type.LONGTEXT: "String", 1072 exp.DataType.Type.MEDIUMBLOB: "String", 1073 exp.DataType.Type.MEDIUMTEXT: "String", 1074 exp.DataType.Type.TINYBLOB: "String", 1075 exp.DataType.Type.TINYTEXT: "String", 1076 exp.DataType.Type.TEXT: "String", 1077 exp.DataType.Type.VARBINARY: "String", 1078 exp.DataType.Type.VARCHAR: "String", 1079 } 1080 1081 SUPPORTED_JSON_PATH_PARTS = { 1082 exp.JSONPathKey, 1083 exp.JSONPathRoot, 1084 exp.JSONPathSubscript, 1085 } 1086 1087 TYPE_MAPPING = { 1088 **generator.Generator.TYPE_MAPPING, 1089 **STRING_TYPE_MAPPING, 1090 exp.DataType.Type.ARRAY: "Array", 1091 exp.DataType.Type.BOOLEAN: "Bool", 1092 exp.DataType.Type.BIGINT: "Int64", 1093 exp.DataType.Type.DATE32: "Date32", 1094 exp.DataType.Type.DATETIME: "DateTime", 1095 exp.DataType.Type.DATETIME2: "DateTime", 1096 exp.DataType.Type.SMALLDATETIME: "DateTime", 1097 exp.DataType.Type.DATETIME64: "DateTime64", 1098 exp.DataType.Type.DECIMAL: "Decimal", 1099 exp.DataType.Type.DECIMAL32: "Decimal32", 1100 exp.DataType.Type.DECIMAL64: "Decimal64", 1101 exp.DataType.Type.DECIMAL128: "Decimal128", 1102 exp.DataType.Type.DECIMAL256: "Decimal256", 1103 exp.DataType.Type.TIMESTAMP: "DateTime", 1104 exp.DataType.Type.TIMESTAMPNTZ: "DateTime", 1105 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 1106 exp.DataType.Type.DOUBLE: "Float64", 1107 exp.DataType.Type.ENUM: "Enum", 1108 exp.DataType.Type.ENUM8: "Enum8", 1109 exp.DataType.Type.ENUM16: "Enum16", 1110 exp.DataType.Type.FIXEDSTRING: "FixedString", 1111 exp.DataType.Type.FLOAT: "Float32", 1112 exp.DataType.Type.INT: "Int32", 1113 exp.DataType.Type.MEDIUMINT: "Int32", 1114 exp.DataType.Type.INT128: "Int128", 1115 exp.DataType.Type.INT256: "Int256", 1116 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 1117 exp.DataType.Type.MAP: "Map", 1118 exp.DataType.Type.NESTED: "Nested", 1119 exp.DataType.Type.NOTHING: "Nothing", 1120 exp.DataType.Type.SMALLINT: "Int16", 1121 exp.DataType.Type.STRUCT: "Tuple", 1122 exp.DataType.Type.TINYINT: "Int8", 1123 exp.DataType.Type.UBIGINT: "UInt64", 1124 exp.DataType.Type.UINT: "UInt32", 1125 exp.DataType.Type.UINT128: "UInt128", 1126 exp.DataType.Type.UINT256: "UInt256", 1127 exp.DataType.Type.USMALLINT: "UInt16", 1128 exp.DataType.Type.UTINYINT: "UInt8", 1129 exp.DataType.Type.IPV4: "IPv4", 1130 exp.DataType.Type.IPV6: "IPv6", 1131 exp.DataType.Type.POINT: "Point", 1132 exp.DataType.Type.RING: "Ring", 1133 exp.DataType.Type.LINESTRING: "LineString", 1134 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 1135 exp.DataType.Type.POLYGON: "Polygon", 1136 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 1137 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 1138 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 1139 exp.DataType.Type.DYNAMIC: "Dynamic", 1140 } 1141 1142 TRANSFORMS = { 1143 **generator.Generator.TRANSFORMS, 1144 exp.AnyValue: rename_func("any"), 1145 exp.ApproxDistinct: rename_func("uniq"), 1146 exp.ArrayConcat: rename_func("arrayConcat"), 1147 exp.ArrayContains: rename_func("has"), 1148 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 1149 exp.ArrayRemove: remove_from_array_using_filter, 1150 exp.ArrayReverse: rename_func("arrayReverse"), 1151 exp.ArraySlice: rename_func("arraySlice"), 1152 exp.ArraySum: rename_func("arraySum"), 1153 exp.ArgMax: arg_max_or_min_no_count("argMax"), 1154 exp.ArgMin: arg_max_or_min_no_count("argMin"), 1155 exp.Array: inline_array_sql, 1156 exp.CastToStrType: rename_func("CAST"), 1157 exp.CurrentDatabase: rename_func("CURRENT_DATABASE"), 1158 exp.CurrentSchemas: rename_func("CURRENT_SCHEMAS"), 1159 exp.CountIf: rename_func("countIf"), 1160 exp.CosineDistance: rename_func("cosineDistance"), 1161 exp.CompressColumnConstraint: lambda self, 1162 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 1163 exp.ComputedColumnConstraint: lambda self, 1164 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 1165 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 1166 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 1167 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 1168 exp.DateStrToDate: rename_func("toDate"), 1169 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 1170 exp.Explode: rename_func("arrayJoin"), 1171 exp.FarmFingerprint: rename_func("farmFingerprint64"), 1172 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 1173 exp.IsNan: rename_func("isNaN"), 1174 exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}", 1175 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 1176 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 1177 exp.JSONPathKey: json_path_key_only_name, 1178 exp.JSONPathRoot: lambda *_: "", 1179 exp.Length: length_or_char_length_sql, 1180 exp.Map: _map_sql, 1181 exp.Median: rename_func("median"), 1182 exp.Nullif: rename_func("nullIf"), 1183 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1184 exp.Pivot: no_pivot_sql, 1185 exp.Quantile: _quantile_sql, 1186 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 1187 exp.Rand: rename_func("randCanonical"), 1188 exp.StartsWith: rename_func("startsWith"), 1189 exp.Struct: rename_func("tuple"), 1190 exp.EndsWith: rename_func("endsWith"), 1191 exp.EuclideanDistance: rename_func("L2Distance"), 1192 exp.StrPosition: lambda self, e: strposition_sql( 1193 self, 1194 e, 1195 func_name="POSITION", 1196 supports_position=True, 1197 use_ansi_position=False, 1198 ), 1199 exp.TimeToStr: lambda self, e: self.func( 1200 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 1201 ), 1202 exp.TimeStrToTime: _timestrtotime_sql, 1203 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1204 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1205 exp.Typeof: rename_func("toTypeName"), 1206 exp.VarMap: _map_sql, 1207 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1208 exp.MD5Digest: rename_func("MD5"), 1209 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1210 exp.SHA: rename_func("SHA1"), 1211 exp.SHA1Digest: rename_func("SHA1"), 1212 exp.SHA2: sha256_sql, 1213 exp.SHA2Digest: sha2_digest_sql, 1214 exp.Split: lambda self, e: self.func( 1215 "splitByString", e.args.get("expression"), e.this, e.args.get("limit") 1216 ), 1217 exp.RegexpSplit: lambda self, e: self.func( 1218 "splitByRegexp", e.args.get("expression"), e.this, e.args.get("limit") 1219 ), 1220 exp.UnixToTime: _unix_to_time_sql, 1221 exp.TimestampTrunc: timestamptrunc_sql(func="dateTrunc", zone=True), 1222 exp.Trim: lambda self, e: trim_sql(self, e, default_trim_type="BOTH"), 1223 exp.Variance: rename_func("varSamp"), 1224 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1225 exp.Stddev: rename_func("stddevSamp"), 1226 exp.Chr: rename_func("CHAR"), 1227 exp.Lag: lambda self, e: self.func( 1228 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1229 ), 1230 exp.Lead: lambda self, e: self.func( 1231 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1232 ), 1233 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1234 rename_func("editDistance") 1235 ), 1236 exp.ParseDatetime: rename_func("parseDateTime"), 1237 } 1238 1239 PROPERTIES_LOCATION = { 1240 **generator.Generator.PROPERTIES_LOCATION, 1241 exp.OnCluster: exp.Properties.Location.POST_NAME, 1242 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1243 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1244 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1245 } 1246 1247 # There's no list in docs, but it can be found in Clickhouse code 1248 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1249 ON_CLUSTER_TARGETS = { 1250 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1251 "DATABASE", 1252 "TABLE", 1253 "VIEW", 1254 "DICTIONARY", 1255 "INDEX", 1256 "FUNCTION", 1257 "NAMED COLLECTION", 1258 } 1259 1260 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1261 NON_NULLABLE_TYPES = { 1262 exp.DataType.Type.ARRAY, 1263 exp.DataType.Type.MAP, 1264 exp.DataType.Type.STRUCT, 1265 exp.DataType.Type.POINT, 1266 exp.DataType.Type.RING, 1267 exp.DataType.Type.LINESTRING, 1268 exp.DataType.Type.MULTILINESTRING, 1269 exp.DataType.Type.POLYGON, 1270 exp.DataType.Type.MULTIPOLYGON, 1271 } 1272 1273 def offset_sql(self, expression: exp.Offset) -> str: 1274 offset = super().offset_sql(expression) 1275 1276 # OFFSET ... FETCH syntax requires a "ROW" or "ROWS" keyword 1277 # https://clickhouse.com/docs/sql-reference/statements/select/offset 1278 parent = expression.parent 1279 if isinstance(parent, exp.Select) and isinstance(parent.args.get("limit"), exp.Fetch): 1280 offset = f"{offset} ROWS" 1281 1282 return offset 1283 1284 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1285 strtodate_sql = self.function_fallback_sql(expression) 1286 1287 if not isinstance(expression.parent, exp.Cast): 1288 # StrToDate returns DATEs in other dialects (eg. postgres), so 1289 # this branch aims to improve the transpilation to clickhouse 1290 return self.cast_sql(exp.cast(expression, "DATE")) 1291 1292 return strtodate_sql 1293 1294 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1295 this = expression.this 1296 1297 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1298 return self.sql(this) 1299 1300 return super().cast_sql(expression, safe_prefix=safe_prefix) 1301 1302 def trycast_sql(self, expression: exp.TryCast) -> str: 1303 dtype = expression.to 1304 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1305 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1306 dtype.set("nullable", True) 1307 1308 return super().cast_sql(expression) 1309 1310 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1311 this = self.json_path_part(expression.this) 1312 return str(int(this) + 1) if is_int(this) else this 1313 1314 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1315 return f"AS {self.sql(expression, 'this')}" 1316 1317 def _any_to_has( 1318 self, 1319 expression: exp.EQ | exp.NEQ, 1320 default: t.Callable[[t.Any], str], 1321 prefix: str = "", 1322 ) -> str: 1323 if isinstance(expression.left, exp.Any): 1324 arr = expression.left 1325 this = expression.right 1326 elif isinstance(expression.right, exp.Any): 1327 arr = expression.right 1328 this = expression.left 1329 else: 1330 return default(expression) 1331 1332 return prefix + self.func("has", arr.this.unnest(), this) 1333 1334 def eq_sql(self, expression: exp.EQ) -> str: 1335 return self._any_to_has(expression, super().eq_sql) 1336 1337 def neq_sql(self, expression: exp.NEQ) -> str: 1338 return self._any_to_has(expression, super().neq_sql, "NOT ") 1339 1340 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1341 # Manually add a flag to make the search case-insensitive 1342 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1343 return self.func("match", expression.this, regex) 1344 1345 def datatype_sql(self, expression: exp.DataType) -> str: 1346 # String is the standard ClickHouse type, every other variant is just an alias. 1347 # Additionally, any supplied length parameter will be ignored. 1348 # 1349 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1350 if expression.this in self.STRING_TYPE_MAPPING: 1351 dtype = "String" 1352 else: 1353 dtype = super().datatype_sql(expression) 1354 1355 # This section changes the type to `Nullable(...)` if the following conditions hold: 1356 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1357 # and change their semantics 1358 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1359 # constraint: "Type of Map key must be a type, that can be represented by integer or 1360 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1361 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1362 parent = expression.parent 1363 nullable = expression.args.get("nullable") 1364 if nullable is True or ( 1365 nullable is None 1366 and not ( 1367 isinstance(parent, exp.DataType) 1368 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1369 and expression.index in (None, 0) 1370 ) 1371 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1372 ): 1373 dtype = f"Nullable({dtype})" 1374 1375 return dtype 1376 1377 def cte_sql(self, expression: exp.CTE) -> str: 1378 if expression.args.get("scalar"): 1379 this = self.sql(expression, "this") 1380 alias = self.sql(expression, "alias") 1381 return f"{this} AS {alias}" 1382 1383 return super().cte_sql(expression) 1384 1385 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1386 return super().after_limit_modifiers(expression) + [ 1387 ( 1388 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1389 if expression.args.get("settings") 1390 else "" 1391 ), 1392 ( 1393 self.seg("FORMAT ") + self.sql(expression, "format") 1394 if expression.args.get("format") 1395 else "" 1396 ), 1397 ] 1398 1399 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1400 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1401 1402 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1403 return f"ON CLUSTER {self.sql(expression, 'this')}" 1404 1405 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1406 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1407 exp.Properties.Location.POST_NAME 1408 ): 1409 this_name = self.sql( 1410 expression.this if isinstance(expression.this, exp.Schema) else expression, 1411 "this", 1412 ) 1413 this_properties = " ".join( 1414 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1415 ) 1416 this_schema = self.schema_columns_sql(expression.this) 1417 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1418 1419 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1420 1421 return super().createable_sql(expression, locations) 1422 1423 def create_sql(self, expression: exp.Create) -> str: 1424 # The comment property comes last in CTAS statements, i.e. after the query 1425 query = expression.expression 1426 if isinstance(query, exp.Query): 1427 comment_prop = expression.find(exp.SchemaCommentProperty) 1428 if comment_prop: 1429 comment_prop.pop() 1430 query.replace(exp.paren(query)) 1431 else: 1432 comment_prop = None 1433 1434 create_sql = super().create_sql(expression) 1435 1436 comment_sql = self.sql(comment_prop) 1437 comment_sql = f" {comment_sql}" if comment_sql else "" 1438 1439 return f"{create_sql}{comment_sql}" 1440 1441 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1442 this = self.indent(self.sql(expression, "this")) 1443 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1444 1445 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1446 this = self.sql(expression, "this") 1447 this = f" {this}" if this else "" 1448 expr = self.sql(expression, "expression") 1449 expr = f" {expr}" if expr else "" 1450 index_type = self.sql(expression, "index_type") 1451 index_type = f" TYPE {index_type}" if index_type else "" 1452 granularity = self.sql(expression, "granularity") 1453 granularity = f" GRANULARITY {granularity}" if granularity else "" 1454 1455 return f"INDEX{this}{expr}{index_type}{granularity}" 1456 1457 def partition_sql(self, expression: exp.Partition) -> str: 1458 return f"PARTITION {self.expressions(expression, flat=True)}" 1459 1460 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1461 return f"ID {self.sql(expression.this)}" 1462 1463 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1464 return ( 1465 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1466 ) 1467 1468 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1469 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1470 1471 def is_sql(self, expression: exp.Is) -> str: 1472 is_sql = super().is_sql(expression) 1473 1474 if isinstance(expression.parent, exp.Not): 1475 # value IS NOT NULL -> NOT (value IS NULL) 1476 is_sql = self.wrap(is_sql) 1477 1478 return is_sql 1479 1480 def in_sql(self, expression: exp.In) -> str: 1481 in_sql = super().in_sql(expression) 1482 1483 if isinstance(expression.parent, exp.Not) and expression.args.get("is_global"): 1484 in_sql = in_sql.replace("GLOBAL IN", "GLOBAL NOT IN", 1) 1485 1486 return in_sql 1487 1488 def not_sql(self, expression: exp.Not) -> str: 1489 if isinstance(expression.this, exp.In): 1490 if expression.this.args.get("is_global"): 1491 # let `GLOBAL IN` child interpose `NOT` 1492 return self.sql(expression, "this") 1493 1494 expression.set("this", exp.paren(expression.this, copy=False)) 1495 1496 return super().not_sql(expression) 1497 1498 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1499 # If the VALUES clause contains tuples of expressions, we need to treat it 1500 # as a table since Clickhouse will automatically alias it as such. 1501 alias = expression.args.get("alias") 1502 1503 if alias and alias.args.get("columns") and expression.expressions: 1504 values = expression.expressions[0].expressions 1505 values_as_table = any(isinstance(value, exp.Tuple) for value in values) 1506 else: 1507 values_as_table = True 1508 1509 return super().values_sql(expression, values_as_table=values_as_table)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True: Always quote except for specials cases. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHEREclause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1273 def offset_sql(self, expression: exp.Offset) -> str: 1274 offset = super().offset_sql(expression) 1275 1276 # OFFSET ... FETCH syntax requires a "ROW" or "ROWS" keyword 1277 # https://clickhouse.com/docs/sql-reference/statements/select/offset 1278 parent = expression.parent 1279 if isinstance(parent, exp.Select) and isinstance(parent.args.get("limit"), exp.Fetch): 1280 offset = f"{offset} ROWS" 1281 1282 return offset
1284 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1285 strtodate_sql = self.function_fallback_sql(expression) 1286 1287 if not isinstance(expression.parent, exp.Cast): 1288 # StrToDate returns DATEs in other dialects (eg. postgres), so 1289 # this branch aims to improve the transpilation to clickhouse 1290 return self.cast_sql(exp.cast(expression, "DATE")) 1291 1292 return strtodate_sql
1294 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1295 this = expression.this 1296 1297 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1298 return self.sql(this) 1299 1300 return super().cast_sql(expression, safe_prefix=safe_prefix)
1302 def trycast_sql(self, expression: exp.TryCast) -> str: 1303 dtype = expression.to 1304 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1305 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1306 dtype.set("nullable", True) 1307 1308 return super().cast_sql(expression)
1345 def datatype_sql(self, expression: exp.DataType) -> str: 1346 # String is the standard ClickHouse type, every other variant is just an alias. 1347 # Additionally, any supplied length parameter will be ignored. 1348 # 1349 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1350 if expression.this in self.STRING_TYPE_MAPPING: 1351 dtype = "String" 1352 else: 1353 dtype = super().datatype_sql(expression) 1354 1355 # This section changes the type to `Nullable(...)` if the following conditions hold: 1356 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1357 # and change their semantics 1358 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1359 # constraint: "Type of Map key must be a type, that can be represented by integer or 1360 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1361 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1362 parent = expression.parent 1363 nullable = expression.args.get("nullable") 1364 if nullable is True or ( 1365 nullable is None 1366 and not ( 1367 isinstance(parent, exp.DataType) 1368 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1369 and expression.index in (None, 0) 1370 ) 1371 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1372 ): 1373 dtype = f"Nullable({dtype})" 1374 1375 return dtype
1385 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1386 return super().after_limit_modifiers(expression) + [ 1387 ( 1388 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1389 if expression.args.get("settings") 1390 else "" 1391 ), 1392 ( 1393 self.seg("FORMAT ") + self.sql(expression, "format") 1394 if expression.args.get("format") 1395 else "" 1396 ), 1397 ]
1405 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1406 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1407 exp.Properties.Location.POST_NAME 1408 ): 1409 this_name = self.sql( 1410 expression.this if isinstance(expression.this, exp.Schema) else expression, 1411 "this", 1412 ) 1413 this_properties = " ".join( 1414 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1415 ) 1416 this_schema = self.schema_columns_sql(expression.this) 1417 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1418 1419 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1420 1421 return super().createable_sql(expression, locations)
1423 def create_sql(self, expression: exp.Create) -> str: 1424 # The comment property comes last in CTAS statements, i.e. after the query 1425 query = expression.expression 1426 if isinstance(query, exp.Query): 1427 comment_prop = expression.find(exp.SchemaCommentProperty) 1428 if comment_prop: 1429 comment_prop.pop() 1430 query.replace(exp.paren(query)) 1431 else: 1432 comment_prop = None 1433 1434 create_sql = super().create_sql(expression) 1435 1436 comment_sql = self.sql(comment_prop) 1437 comment_sql = f" {comment_sql}" if comment_sql else "" 1438 1439 return f"{create_sql}{comment_sql}"
1445 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1446 this = self.sql(expression, "this") 1447 this = f" {this}" if this else "" 1448 expr = self.sql(expression, "expression") 1449 expr = f" {expr}" if expr else "" 1450 index_type = self.sql(expression, "index_type") 1451 index_type = f" TYPE {index_type}" if index_type else "" 1452 granularity = self.sql(expression, "granularity") 1453 granularity = f" GRANULARITY {granularity}" if granularity else "" 1454 1455 return f"INDEX{this}{expr}{index_type}{granularity}"
1488 def not_sql(self, expression: exp.Not) -> str: 1489 if isinstance(expression.this, exp.In): 1490 if expression.this.args.get("is_global"): 1491 # let `GLOBAL IN` child interpose `NOT` 1492 return self.sql(expression, "this") 1493 1494 expression.set("this", exp.paren(expression.this, copy=False)) 1495 1496 return super().not_sql(expression)
1498 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1499 # If the VALUES clause contains tuples of expressions, we need to treat it 1500 # as a table since Clickhouse will automatically alias it as such. 1501 alias = expression.args.get("alias") 1502 1503 if alias and alias.args.get("columns") and expression.expressions: 1504 values = expression.expressions[0].expressions 1505 values_as_table = any(isinstance(value, exp.Tuple) for value in values) 1506 else: 1507 values_as_table = True 1508 1509 return super().values_sql(expression, values_as_table=values_as_table)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- INDEX_ON
- INOUT_SEPARATOR
- DIRECTED_JOINS
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SUPPORTS_WINDOW_EXCLUDE
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- UNICODE_SUBSTITUTE
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- NORMALIZE_EXTRACT_DATE_PARTS
- PARSE_JSON_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- SUPPORTS_BETWEEN_FLAGS
- SUPPORTS_LIKE_QUANTIFIERS
- MATCH_AGAINST_TABLE_PREFIX
- SET_ASSIGNMENT_REQUIRES_VARIABLE_KEYWORD
- UPDATE_STATEMENT_SUPPORTS_FROM
- UNSUPPORTED_TYPES
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS
- SAFE_JSON_PATH_KEY_RE
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- pseudocolumn_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- inoutcolumnconstraint_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- rollupindex_sql
- rollupproperty_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- setitem_sql
- set_sql
- queryband_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- booland_sql
- boolor_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- formatphrase_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- strtotime_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- altersession_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- like_sql
- ilike_sql
- match_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- nullsafeeq_sql
- nullsafeneq_sql
- sub_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- generateembedding_sql
- mltranslate_sql
- mlforecast_sql
- featuresattime_sql
- vectorsearch_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- slice_sql
- apply_sql
- grant_sql
- revoke_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- install_sql
- get_put_sql
- translatecharacters_sql
- decodecase_sql
- semanticview_sql
- getextract_sql
- datefromunixdate_sql
- space_sql
- buildproperty_sql
- refreshtriggerproperty_sql
- modelattribute_sql
- directorystage_sql
- uuid_sql
- initcap_sql
- localtime_sql
- localtimestamp_sql
- weekstart_sql
- chr_sql