sqlglot.dialects.clickhouse
1from __future__ import annotations 2import typing as t 3import datetime 4from sqlglot import exp, generator, parser, tokens 5from sqlglot._typing import E 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 arg_max_or_min_no_count, 10 build_date_delta, 11 build_formatted_time, 12 build_like, 13 inline_array_sql, 14 jarowinkler_similarity, 15 json_extract_segments, 16 json_path_key_only_name, 17 length_or_char_length_sql, 18 no_pivot_sql, 19 build_json_extract_path, 20 rename_func, 21 remove_from_array_using_filter, 22 sha256_sql, 23 strposition_sql, 24 var_map_sql, 25 timestamptrunc_sql, 26 unit_to_var, 27 trim_sql, 28 sha2_digest_sql, 29) 30from sqlglot.generator import Generator 31from sqlglot.helper import is_int, seq_get 32from sqlglot.tokens import Token, TokenType 33from sqlglot.generator import unsupported_args 34from sqlglot.typing.clickhouse import EXPRESSION_METADATA 35 36DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd] 37 38 39def _build_datetime_format( 40 expr_type: t.Type[E], 41) -> t.Callable[[t.List], E]: 42 def _builder(args: t.List) -> E: 43 expr = build_formatted_time(expr_type, "clickhouse")(args) 44 45 timezone = seq_get(args, 2) 46 if timezone: 47 expr.set("zone", timezone) 48 49 return expr 50 51 return _builder 52 53 54def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str: 55 scale = expression.args.get("scale") 56 timestamp = expression.this 57 58 if scale in (None, exp.UnixToTime.SECONDS): 59 return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 60 if scale == exp.UnixToTime.MILLIS: 61 return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 62 if scale == exp.UnixToTime.MICROS: 63 return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 64 if scale == exp.UnixToTime.NANOS: 65 return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 66 67 return self.func( 68 "fromUnixTimestamp", 69 exp.cast( 70 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 71 ), 72 ) 73 74 75def _lower_func(sql: str) -> str: 76 index = sql.index("(") 77 return sql[:index].lower() + sql[index:] 78 79 80def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str: 81 quantile = expression.args["quantile"] 82 args = f"({self.sql(expression, 'this')})" 83 84 if isinstance(quantile, exp.Array): 85 func = self.func("quantiles", *quantile) 86 else: 87 func = self.func("quantile", quantile) 88 89 return func + args 90 91 92def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: 93 if len(args) == 1: 94 return exp.CountIf(this=seq_get(args, 0)) 95 96 return exp.CombinedAggFunc(this="countIf", expressions=args) 97 98 99def _build_str_to_date(args: t.List) -> exp.Cast | exp.Anonymous: 100 if len(args) == 3: 101 return exp.Anonymous(this="STR_TO_DATE", expressions=args) 102 103 strtodate = exp.StrToDate.from_arg_list(args) 104 return exp.cast(strtodate, exp.DataType.build(exp.DataType.Type.DATETIME)) 105 106 107def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]: 108 def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str: 109 if not expression.unit: 110 return rename_func(name)(self, expression) 111 112 return self.func( 113 name, 114 unit_to_var(expression), 115 expression.expression, 116 expression.this, 117 expression.args.get("zone"), 118 ) 119 120 return _delta_sql 121 122 123def _timestrtotime_sql(self: ClickHouse.Generator, expression: exp.TimeStrToTime): 124 ts = expression.this 125 126 tz = expression.args.get("zone") 127 if tz and isinstance(ts, exp.Literal): 128 # Clickhouse will not accept timestamps that include a UTC offset, so we must remove them. 129 # The first step to removing is parsing the string with `datetime.datetime.fromisoformat`. 130 # 131 # In python <3.11, `fromisoformat()` can only parse timestamps of millisecond (3 digit) 132 # or microsecond (6 digit) precision. It will error if passed any other number of fractional 133 # digits, so we extract the fractional seconds and pad to 6 digits before parsing. 134 ts_string = ts.name.strip() 135 136 # separate [date and time] from [fractional seconds and UTC offset] 137 ts_parts = ts_string.split(".") 138 if len(ts_parts) == 2: 139 # separate fractional seconds and UTC offset 140 offset_sep = "+" if "+" in ts_parts[1] else "-" 141 ts_frac_parts = ts_parts[1].split(offset_sep) 142 num_frac_parts = len(ts_frac_parts) 143 144 # pad to 6 digits if fractional seconds present 145 ts_frac_parts[0] = ts_frac_parts[0].ljust(6, "0") 146 ts_string = "".join( 147 [ 148 ts_parts[0], # date and time 149 ".", 150 ts_frac_parts[0], # fractional seconds 151 offset_sep if num_frac_parts > 1 else "", 152 ts_frac_parts[1] if num_frac_parts > 1 else "", # utc offset (if present) 153 ] 154 ) 155 156 # return literal with no timezone, eg turn '2020-01-01 12:13:14-08:00' into '2020-01-01 12:13:14' 157 # this is because Clickhouse encodes the timezone as a data type parameter and throws an error if 158 # it's part of the timestamp string 159 ts_without_tz = ( 160 datetime.datetime.fromisoformat(ts_string).replace(tzinfo=None).isoformat(sep=" ") 161 ) 162 ts = exp.Literal.string(ts_without_tz) 163 164 # Non-nullable DateTime64 with microsecond precision 165 expressions = [exp.DataTypeParam(this=tz)] if tz else [] 166 datatype = exp.DataType.build( 167 exp.DataType.Type.DATETIME64, 168 expressions=[exp.DataTypeParam(this=exp.Literal.number(6)), *expressions], 169 nullable=False, 170 ) 171 172 return self.sql(exp.cast(ts, datatype, dialect=self.dialect)) 173 174 175def _map_sql(self: ClickHouse.Generator, expression: exp.Map | exp.VarMap) -> str: 176 if not (expression.parent and expression.parent.arg_key == "settings"): 177 return _lower_func(var_map_sql(self, expression)) 178 179 keys = expression.args.get("keys") 180 values = expression.args.get("values") 181 182 if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array): 183 self.unsupported("Cannot convert array columns into map.") 184 return "" 185 186 args = [] 187 for key, value in zip(keys.expressions, values.expressions): 188 args.append(f"{self.sql(key)}: {self.sql(value)}") 189 190 csv_args = ", ".join(args) 191 192 return f"{{{csv_args}}}" 193 194 195def _build_timestamp_trunc(unit: str) -> t.Callable[[t.List], exp.TimestampTrunc]: 196 return lambda args: exp.TimestampTrunc( 197 this=seq_get(args, 0), unit=exp.var(unit), zone=seq_get(args, 1) 198 ) 199 200 201def _build_split_by_char(args: t.List) -> exp.Split | exp.Anonymous: 202 sep = seq_get(args, 0) 203 if isinstance(sep, exp.Literal): 204 sep_value = sep.to_py() 205 if isinstance(sep_value, str) and len(sep_value.encode("utf-8")) == 1: 206 return _build_split(exp.Split)(args) 207 208 return exp.Anonymous(this="splitByChar", expressions=args) 209 210 211def _build_split(exp_class: t.Type[E]) -> t.Callable[[t.List], E]: 212 return lambda args: exp_class( 213 this=seq_get(args, 1), expression=seq_get(args, 0), limit=seq_get(args, 2) 214 ) 215 216 217# Skip the 'week' unit since ClickHouse's toStartOfWeek 218# uses an extra mode argument to specify the first day of the week 219TIMESTAMP_TRUNC_UNITS = { 220 "MICROSECOND", 221 "MILLISECOND", 222 "SECOND", 223 "MINUTE", 224 "HOUR", 225 "DAY", 226 "MONTH", 227 "QUARTER", 228 "YEAR", 229} 230 231 232class ClickHouse(Dialect): 233 INDEX_OFFSET = 1 234 NORMALIZE_FUNCTIONS: bool | str = False 235 NULL_ORDERING = "nulls_are_last" 236 SUPPORTS_USER_DEFINED_TYPES = False 237 SAFE_DIVISION = True 238 LOG_BASE_FIRST: t.Optional[bool] = None 239 FORCE_EARLY_ALIAS_REF_EXPANSION = True 240 PRESERVE_ORIGINAL_NAMES = True 241 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 242 IDENTIFIERS_CAN_START_WITH_DIGIT = True 243 HEX_STRING_IS_INTEGER_TYPE = True 244 245 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 246 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 247 248 EXPRESSION_METADATA = EXPRESSION_METADATA.copy() 249 250 UNESCAPED_SEQUENCES = { 251 "\\0": "\0", 252 } 253 254 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 255 256 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 257 exp.Except: False, 258 exp.Intersect: False, 259 exp.Union: None, 260 } 261 262 def generate_values_aliases(self, expression: exp.Values) -> t.List[exp.Identifier]: 263 # Clickhouse allows VALUES to have an embedded structure e.g: 264 # VALUES('person String, place String', ('Noah', 'Paris'), ...) 265 # In this case, we don't want to qualify the columns 266 values = expression.expressions[0].expressions 267 268 structure = ( 269 values[0] 270 if (len(values) > 1 and values[0].is_string and isinstance(values[1], exp.Tuple)) 271 else None 272 ) 273 if structure: 274 # Split each column definition into the column name e.g: 275 # 'person String, place String' -> ['person', 'place'] 276 structure_coldefs = [coldef.strip() for coldef in structure.name.split(",")] 277 column_aliases = [ 278 exp.to_identifier(coldef.split(" ")[0]) for coldef in structure_coldefs 279 ] 280 else: 281 # Default column aliases in CH are "c1", "c2", etc. 282 column_aliases = [ 283 exp.to_identifier(f"c{i + 1}") for i in range(len(values[0].expressions)) 284 ] 285 286 return column_aliases 287 288 class Tokenizer(tokens.Tokenizer): 289 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 290 IDENTIFIERS = ['"', "`"] 291 IDENTIFIER_ESCAPES = ["\\"] 292 STRING_ESCAPES = ["'", "\\"] 293 BIT_STRINGS = [("0b", "")] 294 HEX_STRINGS = [("0x", ""), ("0X", "")] 295 HEREDOC_STRINGS = ["$"] 296 297 KEYWORDS = { 298 **tokens.Tokenizer.KEYWORDS, 299 ".:": TokenType.DOTCOLON, 300 "ATTACH": TokenType.COMMAND, 301 "DATE32": TokenType.DATE32, 302 "DATETIME64": TokenType.DATETIME64, 303 "DICTIONARY": TokenType.DICTIONARY, 304 "DYNAMIC": TokenType.DYNAMIC, 305 "ENUM8": TokenType.ENUM8, 306 "ENUM16": TokenType.ENUM16, 307 "EXCHANGE": TokenType.COMMAND, 308 "FINAL": TokenType.FINAL, 309 "FIXEDSTRING": TokenType.FIXEDSTRING, 310 "FLOAT32": TokenType.FLOAT, 311 "FLOAT64": TokenType.DOUBLE, 312 "GLOBAL": TokenType.GLOBAL, 313 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 314 "MAP": TokenType.MAP, 315 "NESTED": TokenType.NESTED, 316 "NOTHING": TokenType.NOTHING, 317 "SAMPLE": TokenType.TABLE_SAMPLE, 318 "TUPLE": TokenType.STRUCT, 319 "UINT16": TokenType.USMALLINT, 320 "UINT32": TokenType.UINT, 321 "UINT64": TokenType.UBIGINT, 322 "UINT8": TokenType.UTINYINT, 323 "IPV4": TokenType.IPV4, 324 "IPV6": TokenType.IPV6, 325 "POINT": TokenType.POINT, 326 "RING": TokenType.RING, 327 "LINESTRING": TokenType.LINESTRING, 328 "MULTILINESTRING": TokenType.MULTILINESTRING, 329 "POLYGON": TokenType.POLYGON, 330 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 331 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 332 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 333 "SYSTEM": TokenType.COMMAND, 334 "PREWHERE": TokenType.PREWHERE, 335 } 336 KEYWORDS.pop("/*+") 337 338 SINGLE_TOKENS = { 339 **tokens.Tokenizer.SINGLE_TOKENS, 340 "$": TokenType.HEREDOC_STRING, 341 } 342 343 class Parser(parser.Parser): 344 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 345 # * select x from t1 union all select x from t2 limit 1; 346 # * select x from t1 union all (select x from t2 limit 1); 347 MODIFIERS_ATTACHED_TO_SET_OP = False 348 INTERVAL_SPANS = False 349 OPTIONAL_ALIAS_TOKEN_CTE = False 350 JOINS_HAVE_EQUAL_PRECEDENCE = True 351 352 FUNCTIONS = { 353 **parser.Parser.FUNCTIONS, 354 **{ 355 f"TOSTARTOF{unit}": _build_timestamp_trunc(unit=unit) 356 for unit in TIMESTAMP_TRUNC_UNITS 357 }, 358 "ANY": exp.AnyValue.from_arg_list, 359 "ARRAYCOMPACT": exp.ArrayCompact.from_arg_list, 360 "ARRAYCONCAT": exp.ArrayConcat.from_arg_list, 361 "ARRAYDISTINCT": exp.ArrayDistinct.from_arg_list, 362 "ARRAYSUM": exp.ArraySum.from_arg_list, 363 "ARRAYMAX": exp.ArrayMax.from_arg_list, 364 "ARRAYMIN": exp.ArrayMin.from_arg_list, 365 "ARRAYREVERSE": exp.ArrayReverse.from_arg_list, 366 "ARRAYSLICE": exp.ArraySlice.from_arg_list, 367 "CURRENTDATABASE": exp.CurrentDatabase.from_arg_list, 368 "CURRENTSCHEMAS": exp.CurrentSchemas.from_arg_list, 369 "COUNTIF": _build_count_if, 370 "COSINEDISTANCE": exp.CosineDistance.from_arg_list, 371 "VERSION": exp.CurrentVersion.from_arg_list, 372 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 373 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 374 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 375 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 376 "DATE_FORMAT": _build_datetime_format(exp.TimeToStr), 377 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 378 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 379 "FORMATDATETIME": _build_datetime_format(exp.TimeToStr), 380 "HAS": exp.ArrayContains.from_arg_list, 381 "ILIKE": build_like(exp.ILike), 382 "JSONEXTRACTSTRING": build_json_extract_path( 383 exp.JSONExtractScalar, zero_based_indexing=False 384 ), 385 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 386 "LIKE": build_like(exp.Like), 387 "L2Distance": exp.EuclideanDistance.from_arg_list, 388 "MAP": parser.build_var_map, 389 "MATCH": exp.RegexpLike.from_arg_list, 390 "NOTLIKE": build_like(exp.Like, not_like=True), 391 "PARSEDATETIME": _build_datetime_format(exp.ParseDatetime), 392 "RANDCANONICAL": exp.Rand.from_arg_list, 393 "STR_TO_DATE": _build_str_to_date, 394 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 395 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 396 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 397 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 398 "TOMONDAY": _build_timestamp_trunc("WEEK"), 399 "UNIQ": exp.ApproxDistinct.from_arg_list, 400 "XOR": lambda args: exp.Xor(expressions=args), 401 "MD5": exp.MD5Digest.from_arg_list, 402 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 403 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 404 "SPLITBYCHAR": _build_split_by_char, 405 "SPLITBYREGEXP": _build_split(exp.RegexpSplit), 406 "SPLITBYSTRING": _build_split(exp.Split), 407 "SUBSTRINGINDEX": exp.SubstringIndex.from_arg_list, 408 "TOTYPENAME": exp.Typeof.from_arg_list, 409 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 410 "JAROWINKLERSIMILARITY": exp.JarowinklerSimilarity.from_arg_list, 411 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 412 "UTCTIMESTAMP": exp.UtcTimestamp.from_arg_list, 413 } 414 FUNCTIONS.pop("TRANSFORM") 415 FUNCTIONS.pop("APPROX_TOP_SUM") 416 417 AGG_FUNCTIONS = { 418 "count", 419 "min", 420 "max", 421 "sum", 422 "avg", 423 "any", 424 "stddevPop", 425 "stddevSamp", 426 "varPop", 427 "varSamp", 428 "corr", 429 "covarPop", 430 "covarSamp", 431 "entropy", 432 "exponentialMovingAverage", 433 "intervalLengthSum", 434 "kolmogorovSmirnovTest", 435 "mannWhitneyUTest", 436 "median", 437 "rankCorr", 438 "sumKahan", 439 "studentTTest", 440 "welchTTest", 441 "anyHeavy", 442 "anyLast", 443 "boundingRatio", 444 "first_value", 445 "last_value", 446 "argMin", 447 "argMax", 448 "avgWeighted", 449 "topK", 450 "approx_top_sum", 451 "topKWeighted", 452 "deltaSum", 453 "deltaSumTimestamp", 454 "groupArray", 455 "groupArrayLast", 456 "groupUniqArray", 457 "groupArrayInsertAt", 458 "groupArrayMovingAvg", 459 "groupArrayMovingSum", 460 "groupArraySample", 461 "groupBitAnd", 462 "groupBitOr", 463 "groupBitXor", 464 "groupBitmap", 465 "groupBitmapAnd", 466 "groupBitmapOr", 467 "groupBitmapXor", 468 "sumWithOverflow", 469 "sumMap", 470 "minMap", 471 "maxMap", 472 "skewSamp", 473 "skewPop", 474 "kurtSamp", 475 "kurtPop", 476 "uniq", 477 "uniqExact", 478 "uniqCombined", 479 "uniqCombined64", 480 "uniqHLL12", 481 "uniqTheta", 482 "quantile", 483 "quantiles", 484 "quantileExact", 485 "quantilesExact", 486 "quantilesExactExclusive", 487 "quantileExactLow", 488 "quantilesExactLow", 489 "quantileExactHigh", 490 "quantilesExactHigh", 491 "quantileExactWeighted", 492 "quantilesExactWeighted", 493 "quantileTiming", 494 "quantilesTiming", 495 "quantileTimingWeighted", 496 "quantilesTimingWeighted", 497 "quantileDeterministic", 498 "quantilesDeterministic", 499 "quantileTDigest", 500 "quantilesTDigest", 501 "quantileTDigestWeighted", 502 "quantilesTDigestWeighted", 503 "quantileBFloat16", 504 "quantilesBFloat16", 505 "quantileBFloat16Weighted", 506 "quantilesBFloat16Weighted", 507 "simpleLinearRegression", 508 "stochasticLinearRegression", 509 "stochasticLogisticRegression", 510 "categoricalInformationValue", 511 "contingency", 512 "cramersV", 513 "cramersVBiasCorrected", 514 "theilsU", 515 "maxIntersections", 516 "maxIntersectionsPosition", 517 "meanZTest", 518 "quantileInterpolatedWeighted", 519 "quantilesInterpolatedWeighted", 520 "quantileGK", 521 "quantilesGK", 522 "sparkBar", 523 "sumCount", 524 "largestTriangleThreeBuckets", 525 "histogram", 526 "sequenceMatch", 527 "sequenceCount", 528 "windowFunnel", 529 "retention", 530 "uniqUpTo", 531 "sequenceNextNode", 532 "exponentialTimeDecayedAvg", 533 } 534 535 AGG_FUNCTIONS_SUFFIXES = [ 536 "If", 537 "Array", 538 "ArrayIf", 539 "Map", 540 "SimpleState", 541 "State", 542 "Merge", 543 "MergeState", 544 "ForEach", 545 "Distinct", 546 "OrDefault", 547 "OrNull", 548 "Resample", 549 "ArgMin", 550 "ArgMax", 551 ] 552 553 FUNC_TOKENS = { 554 *parser.Parser.FUNC_TOKENS, 555 TokenType.AND, 556 TokenType.OR, 557 TokenType.SET, 558 } 559 560 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 561 562 ID_VAR_TOKENS = { 563 *parser.Parser.ID_VAR_TOKENS, 564 TokenType.LIKE, 565 } 566 567 AGG_FUNC_MAPPING = ( 568 lambda functions, suffixes: { 569 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 570 } 571 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 572 573 FUNCTION_PARSERS = { 574 **parser.Parser.FUNCTION_PARSERS, 575 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 576 "QUANTILE": lambda self: self._parse_quantile(), 577 "MEDIAN": lambda self: self._parse_quantile(), 578 "COLUMNS": lambda self: self._parse_columns(), 579 "TUPLE": lambda self: exp.Struct.from_arg_list(self._parse_function_args(alias=True)), 580 "AND": lambda self: exp.and_(*self._parse_function_args(alias=False)), 581 "OR": lambda self: exp.or_(*self._parse_function_args(alias=False)), 582 } 583 584 FUNCTION_PARSERS.pop("MATCH") 585 586 PROPERTY_PARSERS = { 587 **parser.Parser.PROPERTY_PARSERS, 588 "ENGINE": lambda self: self._parse_engine_property(), 589 } 590 PROPERTY_PARSERS.pop("DYNAMIC") 591 592 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 593 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 594 595 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 596 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 597 598 RANGE_PARSERS = { 599 **parser.Parser.RANGE_PARSERS, 600 TokenType.GLOBAL: lambda self, this: self._parse_global_in(this), 601 } 602 603 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 604 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 605 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 606 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 607 608 JOIN_KINDS = { 609 *parser.Parser.JOIN_KINDS, 610 TokenType.ANY, 611 TokenType.ASOF, 612 TokenType.ARRAY, 613 } 614 615 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 616 TokenType.ANY, 617 TokenType.ARRAY, 618 TokenType.FINAL, 619 TokenType.FORMAT, 620 TokenType.SETTINGS, 621 } 622 623 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 624 TokenType.FORMAT, 625 } 626 627 LOG_DEFAULTS_TO_LN = True 628 629 QUERY_MODIFIER_PARSERS = { 630 **parser.Parser.QUERY_MODIFIER_PARSERS, 631 TokenType.SETTINGS: lambda self: ( 632 "settings", 633 self._advance() or self._parse_csv(self._parse_assignment), 634 ), 635 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 636 } 637 638 CONSTRAINT_PARSERS = { 639 **parser.Parser.CONSTRAINT_PARSERS, 640 "INDEX": lambda self: self._parse_index_constraint(), 641 "CODEC": lambda self: self._parse_compress(), 642 } 643 644 ALTER_PARSERS = { 645 **parser.Parser.ALTER_PARSERS, 646 "REPLACE": lambda self: self._parse_alter_table_replace(), 647 } 648 649 SCHEMA_UNNAMED_CONSTRAINTS = { 650 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 651 "INDEX", 652 } 653 654 PLACEHOLDER_PARSERS = { 655 **parser.Parser.PLACEHOLDER_PARSERS, 656 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 657 } 658 659 def _parse_engine_property(self) -> exp.EngineProperty: 660 self._match(TokenType.EQ) 661 return self.expression( 662 exp.EngineProperty, 663 this=self._parse_field(any_token=True, anonymous_func=True), 664 ) 665 666 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 667 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 668 return self._parse_lambda() 669 670 def _parse_types( 671 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 672 ) -> t.Optional[exp.Expression]: 673 dtype = super()._parse_types( 674 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 675 ) 676 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 677 # Mark every type as non-nullable which is ClickHouse's default, unless it's 678 # already marked as nullable. This marker helps us transpile types from other 679 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 680 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 681 # fail in ClickHouse without the `Nullable` type constructor. 682 dtype.set("nullable", False) 683 684 return dtype 685 686 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 687 index = self._index 688 this = self._parse_bitwise() 689 if self._match(TokenType.FROM): 690 self._retreat(index) 691 return super()._parse_extract() 692 693 # We return Anonymous here because extract and regexpExtract have different semantics, 694 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 695 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 696 # 697 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 698 self._match(TokenType.COMMA) 699 return self.expression( 700 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 701 ) 702 703 def _parse_assignment(self) -> t.Optional[exp.Expression]: 704 this = super()._parse_assignment() 705 706 if self._match(TokenType.PLACEHOLDER): 707 return self.expression( 708 exp.If, 709 this=this, 710 true=self._parse_assignment(), 711 false=self._match(TokenType.COLON) and self._parse_assignment(), 712 ) 713 714 return this 715 716 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 717 """ 718 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 719 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 720 """ 721 index = self._index 722 723 this = self._parse_id_var() 724 self._match(TokenType.COLON) 725 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 726 self._match_text_seq("IDENTIFIER") and "Identifier" 727 ) 728 729 if not kind: 730 self._retreat(index) 731 return None 732 elif not self._match(TokenType.R_BRACE): 733 self.raise_error("Expecting }") 734 735 if isinstance(this, exp.Identifier) and not this.quoted: 736 this = exp.var(this.name) 737 738 return self.expression(exp.Placeholder, this=this, kind=kind) 739 740 def _parse_bracket( 741 self, this: t.Optional[exp.Expression] = None 742 ) -> t.Optional[exp.Expression]: 743 l_brace = self._match(TokenType.L_BRACE, advance=False) 744 bracket = super()._parse_bracket(this) 745 746 if l_brace and isinstance(bracket, exp.Struct): 747 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 748 for expression in bracket.expressions: 749 if not isinstance(expression, exp.PropertyEQ): 750 break 751 752 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 753 varmap.args["values"].append("expressions", expression.expression) 754 755 return varmap 756 757 return bracket 758 759 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 760 this = super()._parse_in(this) 761 this.set("is_global", is_global) 762 return this 763 764 def _parse_global_in(self, this: t.Optional[exp.Expression]) -> exp.Not | exp.In: 765 is_negated = self._match(TokenType.NOT) 766 in_expr = self._parse_in(this, is_global=True) if self._match(TokenType.IN) else None 767 return self.expression(exp.Not, this=in_expr) if is_negated else t.cast(exp.In, in_expr) 768 769 def _parse_table( 770 self, 771 schema: bool = False, 772 joins: bool = False, 773 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 774 parse_bracket: bool = False, 775 is_db_reference: bool = False, 776 parse_partition: bool = False, 777 consume_pipe: bool = False, 778 ) -> t.Optional[exp.Expression]: 779 this = super()._parse_table( 780 schema=schema, 781 joins=joins, 782 alias_tokens=alias_tokens, 783 parse_bracket=parse_bracket, 784 is_db_reference=is_db_reference, 785 ) 786 787 if isinstance(this, exp.Table): 788 inner = this.this 789 alias = this.args.get("alias") 790 791 if isinstance(inner, exp.GenerateSeries) and alias and not alias.columns: 792 alias.set("columns", [exp.to_identifier("generate_series")]) 793 794 if self._match(TokenType.FINAL): 795 this = self.expression(exp.Final, this=this) 796 797 return this 798 799 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 800 return super()._parse_position(haystack_first=True) 801 802 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 803 def _parse_cte(self) -> t.Optional[exp.CTE]: 804 # WITH <identifier> AS <subquery expression> 805 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 806 807 if not cte: 808 # WITH <expression> AS <identifier> 809 cte = self.expression( 810 exp.CTE, 811 this=self._parse_assignment(), 812 alias=self._parse_table_alias(), 813 scalar=True, 814 ) 815 816 return cte 817 818 def _parse_join_parts( 819 self, 820 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 821 is_global = self._prev if self._match(TokenType.GLOBAL) else None 822 kind_pre = self._prev if self._match_set(self.JOIN_KINDS, advance=False) else None 823 824 if kind_pre: 825 kind = self._prev if self._match_set(self.JOIN_KINDS) else None 826 side = self._prev if self._match_set(self.JOIN_SIDES) else None 827 return is_global, side, kind 828 829 return ( 830 is_global, 831 self._prev if self._match_set(self.JOIN_SIDES) else None, 832 self._prev if self._match_set(self.JOIN_KINDS) else None, 833 ) 834 835 def _parse_join( 836 self, skip_join_token: bool = False, parse_bracket: bool = False 837 ) -> t.Optional[exp.Join]: 838 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 839 if join: 840 method = join.args.get("method") 841 join.set("method", None) 842 join.set("global_", method) 843 844 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 845 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 846 if join.kind == "ARRAY": 847 for table in join.find_all(exp.Table): 848 table.replace(table.to_column()) 849 850 return join 851 852 def _parse_function( 853 self, 854 functions: t.Optional[t.Dict[str, t.Callable]] = None, 855 anonymous: bool = False, 856 optional_parens: bool = True, 857 any_token: bool = False, 858 ) -> t.Optional[exp.Expression]: 859 expr = super()._parse_function( 860 functions=functions, 861 anonymous=anonymous, 862 optional_parens=optional_parens, 863 any_token=any_token, 864 ) 865 866 func = expr.this if isinstance(expr, exp.Window) else expr 867 868 # Aggregate functions can be split in 2 parts: <func_name><suffix> 869 parts = ( 870 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 871 ) 872 873 if parts: 874 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 875 params = self._parse_func_params(anon_func) 876 877 kwargs = { 878 "this": anon_func.this, 879 "expressions": anon_func.expressions, 880 } 881 if parts[1]: 882 exp_class: t.Type[exp.Expression] = ( 883 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 884 ) 885 else: 886 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 887 888 kwargs["exp_class"] = exp_class 889 if params: 890 kwargs["params"] = params 891 892 func = self.expression(**kwargs) 893 894 if isinstance(expr, exp.Window): 895 # The window's func was parsed as Anonymous in base parser, fix its 896 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 897 expr.set("this", func) 898 elif params: 899 # Params have blocked super()._parse_function() from parsing the following window 900 # (if that exists) as they're standing between the function call and the window spec 901 expr = self._parse_window(func) 902 else: 903 expr = func 904 905 return expr 906 907 def _parse_func_params( 908 self, this: t.Optional[exp.Func] = None 909 ) -> t.Optional[t.List[exp.Expression]]: 910 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 911 return self._parse_csv(self._parse_lambda) 912 913 if self._match(TokenType.L_PAREN): 914 params = self._parse_csv(self._parse_lambda) 915 self._match_r_paren(this) 916 return params 917 918 return None 919 920 def _parse_quantile(self) -> exp.Quantile: 921 this = self._parse_lambda() 922 params = self._parse_func_params() 923 if params: 924 return self.expression(exp.Quantile, this=params[0], quantile=this) 925 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 926 927 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 928 return super()._parse_wrapped_id_vars(optional=True) 929 930 def _parse_primary_key( 931 self, 932 wrapped_optional: bool = False, 933 in_props: bool = False, 934 named_primary_key: bool = False, 935 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 936 return super()._parse_primary_key( 937 wrapped_optional=wrapped_optional or in_props, 938 in_props=in_props, 939 named_primary_key=named_primary_key, 940 ) 941 942 def _parse_on_property(self) -> t.Optional[exp.Expression]: 943 index = self._index 944 if self._match_text_seq("CLUSTER"): 945 this = self._parse_string() or self._parse_id_var() 946 if this: 947 return self.expression(exp.OnCluster, this=this) 948 else: 949 self._retreat(index) 950 return None 951 952 def _parse_index_constraint( 953 self, kind: t.Optional[str] = None 954 ) -> exp.IndexColumnConstraint: 955 # INDEX name1 expr TYPE type1(args) GRANULARITY value 956 this = self._parse_id_var() 957 expression = self._parse_assignment() 958 959 index_type = self._match_text_seq("TYPE") and ( 960 self._parse_function() or self._parse_var() 961 ) 962 963 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 964 965 return self.expression( 966 exp.IndexColumnConstraint, 967 this=this, 968 expression=expression, 969 index_type=index_type, 970 granularity=granularity, 971 ) 972 973 def _parse_partition(self) -> t.Optional[exp.Partition]: 974 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 975 if not self._match(TokenType.PARTITION): 976 return None 977 978 if self._match_text_seq("ID"): 979 # Corresponds to the PARTITION ID <string_value> syntax 980 expressions: t.List[exp.Expression] = [ 981 self.expression(exp.PartitionId, this=self._parse_string()) 982 ] 983 else: 984 expressions = self._parse_expressions() 985 986 return self.expression(exp.Partition, expressions=expressions) 987 988 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 989 partition = self._parse_partition() 990 991 if not partition or not self._match(TokenType.FROM): 992 return None 993 994 return self.expression( 995 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 996 ) 997 998 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 999 if not self._match_text_seq("PROJECTION"): 1000 return None 1001 1002 return self.expression( 1003 exp.ProjectionDef, 1004 this=self._parse_id_var(), 1005 expression=self._parse_wrapped(self._parse_statement), 1006 ) 1007 1008 def _parse_constraint(self) -> t.Optional[exp.Expression]: 1009 return super()._parse_constraint() or self._parse_projection_def() 1010 1011 def _parse_alias( 1012 self, this: t.Optional[exp.Expression], explicit: bool = False 1013 ) -> t.Optional[exp.Expression]: 1014 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 1015 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 1016 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 1017 return this 1018 1019 return super()._parse_alias(this=this, explicit=explicit) 1020 1021 def _parse_expression(self) -> t.Optional[exp.Expression]: 1022 this = super()._parse_expression() 1023 1024 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 1025 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 1026 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 1027 self._match(TokenType.R_PAREN) 1028 1029 return this 1030 1031 def _parse_columns(self) -> exp.Expression: 1032 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 1033 1034 while self._next and self._match_text_seq(")", "APPLY", "("): 1035 self._match(TokenType.R_PAREN) 1036 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 1037 return this 1038 1039 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 1040 value = super()._parse_value(values=values) 1041 if not value: 1042 return None 1043 1044 # In Clickhouse "SELECT * FROM VALUES (1, 2, 3)" generates a table with a single column, in contrast 1045 # to other dialects. For this case, we canonicalize the values into a tuple-of-tuples AST if it's not already one. 1046 # In INSERT INTO statements the same clause actually references multiple columns (opposite semantics), 1047 # but the final result is not altered by the extra parentheses. 1048 # Note: Clickhouse allows VALUES([structure], value, ...) so the branch checks for the last expression 1049 expressions = value.expressions 1050 if values and not isinstance(expressions[-1], exp.Tuple): 1051 value.set( 1052 "expressions", 1053 [self.expression(exp.Tuple, expressions=[expr]) for expr in expressions], 1054 ) 1055 1056 return value 1057 1058 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1059 # ClickHouse allows custom expressions as partition key 1060 # https://clickhouse.com/docs/engines/table-engines/mergetree-family/custom-partitioning-key 1061 return self.expression( 1062 exp.PartitionedByProperty, 1063 this=self._parse_assignment(), 1064 ) 1065 1066 class Generator(generator.Generator): 1067 QUERY_HINTS = False 1068 STRUCT_DELIMITER = ("(", ")") 1069 NVL2_SUPPORTED = False 1070 TABLESAMPLE_REQUIRES_PARENS = False 1071 TABLESAMPLE_SIZE_IS_ROWS = False 1072 TABLESAMPLE_KEYWORDS = "SAMPLE" 1073 LAST_DAY_SUPPORTS_DATE_PART = False 1074 CAN_IMPLEMENT_ARRAY_ANY = True 1075 SUPPORTS_TO_NUMBER = False 1076 JOIN_HINTS = False 1077 TABLE_HINTS = False 1078 GROUPINGS_SEP = "" 1079 SET_OP_MODIFIERS = False 1080 ARRAY_SIZE_NAME = "LENGTH" 1081 WRAP_DERIVED_VALUES = False 1082 1083 STRING_TYPE_MAPPING = { 1084 exp.DataType.Type.BLOB: "String", 1085 exp.DataType.Type.CHAR: "String", 1086 exp.DataType.Type.LONGBLOB: "String", 1087 exp.DataType.Type.LONGTEXT: "String", 1088 exp.DataType.Type.MEDIUMBLOB: "String", 1089 exp.DataType.Type.MEDIUMTEXT: "String", 1090 exp.DataType.Type.TINYBLOB: "String", 1091 exp.DataType.Type.TINYTEXT: "String", 1092 exp.DataType.Type.TEXT: "String", 1093 exp.DataType.Type.VARBINARY: "String", 1094 exp.DataType.Type.VARCHAR: "String", 1095 } 1096 1097 SUPPORTED_JSON_PATH_PARTS = { 1098 exp.JSONPathKey, 1099 exp.JSONPathRoot, 1100 exp.JSONPathSubscript, 1101 } 1102 1103 TYPE_MAPPING = { 1104 **generator.Generator.TYPE_MAPPING, 1105 **STRING_TYPE_MAPPING, 1106 exp.DataType.Type.ARRAY: "Array", 1107 exp.DataType.Type.BOOLEAN: "Bool", 1108 exp.DataType.Type.BIGINT: "Int64", 1109 exp.DataType.Type.DATE32: "Date32", 1110 exp.DataType.Type.DATETIME: "DateTime", 1111 exp.DataType.Type.DATETIME2: "DateTime", 1112 exp.DataType.Type.SMALLDATETIME: "DateTime", 1113 exp.DataType.Type.DATETIME64: "DateTime64", 1114 exp.DataType.Type.DECIMAL: "Decimal", 1115 exp.DataType.Type.DECIMAL32: "Decimal32", 1116 exp.DataType.Type.DECIMAL64: "Decimal64", 1117 exp.DataType.Type.DECIMAL128: "Decimal128", 1118 exp.DataType.Type.DECIMAL256: "Decimal256", 1119 exp.DataType.Type.TIMESTAMP: "DateTime", 1120 exp.DataType.Type.TIMESTAMPNTZ: "DateTime", 1121 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 1122 exp.DataType.Type.DOUBLE: "Float64", 1123 exp.DataType.Type.ENUM: "Enum", 1124 exp.DataType.Type.ENUM8: "Enum8", 1125 exp.DataType.Type.ENUM16: "Enum16", 1126 exp.DataType.Type.FIXEDSTRING: "FixedString", 1127 exp.DataType.Type.FLOAT: "Float32", 1128 exp.DataType.Type.INT: "Int32", 1129 exp.DataType.Type.MEDIUMINT: "Int32", 1130 exp.DataType.Type.INT128: "Int128", 1131 exp.DataType.Type.INT256: "Int256", 1132 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 1133 exp.DataType.Type.MAP: "Map", 1134 exp.DataType.Type.NESTED: "Nested", 1135 exp.DataType.Type.NOTHING: "Nothing", 1136 exp.DataType.Type.SMALLINT: "Int16", 1137 exp.DataType.Type.STRUCT: "Tuple", 1138 exp.DataType.Type.TINYINT: "Int8", 1139 exp.DataType.Type.UBIGINT: "UInt64", 1140 exp.DataType.Type.UINT: "UInt32", 1141 exp.DataType.Type.UINT128: "UInt128", 1142 exp.DataType.Type.UINT256: "UInt256", 1143 exp.DataType.Type.USMALLINT: "UInt16", 1144 exp.DataType.Type.UTINYINT: "UInt8", 1145 exp.DataType.Type.IPV4: "IPv4", 1146 exp.DataType.Type.IPV6: "IPv6", 1147 exp.DataType.Type.POINT: "Point", 1148 exp.DataType.Type.RING: "Ring", 1149 exp.DataType.Type.LINESTRING: "LineString", 1150 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 1151 exp.DataType.Type.POLYGON: "Polygon", 1152 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 1153 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 1154 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 1155 exp.DataType.Type.DYNAMIC: "Dynamic", 1156 } 1157 1158 TRANSFORMS = { 1159 **generator.Generator.TRANSFORMS, 1160 exp.AnyValue: rename_func("any"), 1161 exp.ApproxDistinct: rename_func("uniq"), 1162 exp.ArrayConcat: rename_func("arrayConcat"), 1163 exp.ArrayContains: rename_func("has"), 1164 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 1165 exp.ArrayRemove: remove_from_array_using_filter, 1166 exp.ArrayReverse: rename_func("arrayReverse"), 1167 exp.ArraySlice: rename_func("arraySlice"), 1168 exp.ArraySum: rename_func("arraySum"), 1169 exp.ArrayMax: rename_func("arrayMax"), 1170 exp.ArrayMin: rename_func("arrayMin"), 1171 exp.ArgMax: arg_max_or_min_no_count("argMax"), 1172 exp.ArgMin: arg_max_or_min_no_count("argMin"), 1173 exp.Array: inline_array_sql, 1174 exp.CastToStrType: rename_func("CAST"), 1175 exp.CurrentDatabase: rename_func("CURRENT_DATABASE"), 1176 exp.CurrentSchemas: rename_func("CURRENT_SCHEMAS"), 1177 exp.CountIf: rename_func("countIf"), 1178 exp.CosineDistance: rename_func("cosineDistance"), 1179 exp.CompressColumnConstraint: lambda self, 1180 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 1181 exp.ComputedColumnConstraint: lambda self, 1182 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 1183 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 1184 exp.CurrentVersion: rename_func("VERSION"), 1185 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 1186 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 1187 exp.DateStrToDate: rename_func("toDate"), 1188 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 1189 exp.Explode: rename_func("arrayJoin"), 1190 exp.FarmFingerprint: rename_func("farmFingerprint64"), 1191 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 1192 exp.IsNan: rename_func("isNaN"), 1193 exp.JarowinklerSimilarity: jarowinkler_similarity("jaroWinklerSimilarity"), 1194 exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}", 1195 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 1196 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 1197 exp.JSONPathKey: json_path_key_only_name, 1198 exp.JSONPathRoot: lambda *_: "", 1199 exp.Length: length_or_char_length_sql, 1200 exp.Map: _map_sql, 1201 exp.Median: rename_func("median"), 1202 exp.Nullif: rename_func("nullIf"), 1203 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1204 exp.Pivot: no_pivot_sql, 1205 exp.Quantile: _quantile_sql, 1206 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 1207 exp.Rand: rename_func("randCanonical"), 1208 exp.StartsWith: rename_func("startsWith"), 1209 exp.Struct: rename_func("tuple"), 1210 exp.Trunc: rename_func("trunc"), 1211 exp.EndsWith: rename_func("endsWith"), 1212 exp.EuclideanDistance: rename_func("L2Distance"), 1213 exp.StrPosition: lambda self, e: strposition_sql( 1214 self, 1215 e, 1216 func_name="POSITION", 1217 supports_position=True, 1218 use_ansi_position=False, 1219 ), 1220 exp.TimeToStr: lambda self, e: self.func( 1221 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 1222 ), 1223 exp.TimeStrToTime: _timestrtotime_sql, 1224 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1225 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1226 exp.Typeof: rename_func("toTypeName"), 1227 exp.VarMap: _map_sql, 1228 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1229 exp.MD5Digest: rename_func("MD5"), 1230 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1231 exp.SHA: rename_func("SHA1"), 1232 exp.SHA1Digest: rename_func("SHA1"), 1233 exp.SHA2: sha256_sql, 1234 exp.SHA2Digest: sha2_digest_sql, 1235 exp.Split: lambda self, e: self.func( 1236 "splitByString", e.args.get("expression"), e.this, e.args.get("limit") 1237 ), 1238 exp.RegexpSplit: lambda self, e: self.func( 1239 "splitByRegexp", e.args.get("expression"), e.this, e.args.get("limit") 1240 ), 1241 exp.UnixToTime: _unix_to_time_sql, 1242 exp.TimestampTrunc: timestamptrunc_sql(func="dateTrunc", zone=True), 1243 exp.Trim: lambda self, e: trim_sql(self, e, default_trim_type="BOTH"), 1244 exp.Variance: rename_func("varSamp"), 1245 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1246 exp.Stddev: rename_func("stddevSamp"), 1247 exp.Chr: rename_func("CHAR"), 1248 exp.Lag: lambda self, e: self.func( 1249 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1250 ), 1251 exp.Lead: lambda self, e: self.func( 1252 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1253 ), 1254 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1255 rename_func("editDistance") 1256 ), 1257 exp.ParseDatetime: rename_func("parseDateTime"), 1258 } 1259 1260 PROPERTIES_LOCATION = { 1261 **generator.Generator.PROPERTIES_LOCATION, 1262 exp.OnCluster: exp.Properties.Location.POST_NAME, 1263 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1264 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1265 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1266 } 1267 1268 # There's no list in docs, but it can be found in Clickhouse code 1269 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1270 ON_CLUSTER_TARGETS = { 1271 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1272 "DATABASE", 1273 "TABLE", 1274 "VIEW", 1275 "DICTIONARY", 1276 "INDEX", 1277 "FUNCTION", 1278 "NAMED COLLECTION", 1279 } 1280 1281 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1282 NON_NULLABLE_TYPES = { 1283 exp.DataType.Type.ARRAY, 1284 exp.DataType.Type.MAP, 1285 exp.DataType.Type.STRUCT, 1286 exp.DataType.Type.POINT, 1287 exp.DataType.Type.RING, 1288 exp.DataType.Type.LINESTRING, 1289 exp.DataType.Type.MULTILINESTRING, 1290 exp.DataType.Type.POLYGON, 1291 exp.DataType.Type.MULTIPOLYGON, 1292 } 1293 1294 def offset_sql(self, expression: exp.Offset) -> str: 1295 offset = super().offset_sql(expression) 1296 1297 # OFFSET ... FETCH syntax requires a "ROW" or "ROWS" keyword 1298 # https://clickhouse.com/docs/sql-reference/statements/select/offset 1299 parent = expression.parent 1300 if isinstance(parent, exp.Select) and isinstance(parent.args.get("limit"), exp.Fetch): 1301 offset = f"{offset} ROWS" 1302 1303 return offset 1304 1305 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1306 strtodate_sql = self.function_fallback_sql(expression) 1307 1308 if not isinstance(expression.parent, exp.Cast): 1309 # StrToDate returns DATEs in other dialects (eg. postgres), so 1310 # this branch aims to improve the transpilation to clickhouse 1311 return self.cast_sql(exp.cast(expression, "DATE")) 1312 1313 return strtodate_sql 1314 1315 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1316 this = expression.this 1317 1318 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1319 return self.sql(this) 1320 1321 return super().cast_sql(expression, safe_prefix=safe_prefix) 1322 1323 def trycast_sql(self, expression: exp.TryCast) -> str: 1324 dtype = expression.to 1325 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1326 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1327 dtype.set("nullable", True) 1328 1329 return super().cast_sql(expression) 1330 1331 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1332 this = self.json_path_part(expression.this) 1333 return str(int(this) + 1) if is_int(this) else this 1334 1335 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1336 return f"AS {self.sql(expression, 'this')}" 1337 1338 def _any_to_has( 1339 self, 1340 expression: exp.EQ | exp.NEQ, 1341 default: t.Callable[[t.Any], str], 1342 prefix: str = "", 1343 ) -> str: 1344 if isinstance(expression.left, exp.Any): 1345 arr = expression.left 1346 this = expression.right 1347 elif isinstance(expression.right, exp.Any): 1348 arr = expression.right 1349 this = expression.left 1350 else: 1351 return default(expression) 1352 1353 return prefix + self.func("has", arr.this.unnest(), this) 1354 1355 def eq_sql(self, expression: exp.EQ) -> str: 1356 return self._any_to_has(expression, super().eq_sql) 1357 1358 def neq_sql(self, expression: exp.NEQ) -> str: 1359 return self._any_to_has(expression, super().neq_sql, "NOT ") 1360 1361 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1362 # Manually add a flag to make the search case-insensitive 1363 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1364 return self.func("match", expression.this, regex) 1365 1366 def datatype_sql(self, expression: exp.DataType) -> str: 1367 # String is the standard ClickHouse type, every other variant is just an alias. 1368 # Additionally, any supplied length parameter will be ignored. 1369 # 1370 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1371 if expression.this in self.STRING_TYPE_MAPPING: 1372 dtype = "String" 1373 else: 1374 dtype = super().datatype_sql(expression) 1375 1376 # This section changes the type to `Nullable(...)` if the following conditions hold: 1377 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1378 # and change their semantics 1379 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1380 # constraint: "Type of Map key must be a type, that can be represented by integer or 1381 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1382 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1383 parent = expression.parent 1384 nullable = expression.args.get("nullable") 1385 if nullable is True or ( 1386 nullable is None 1387 and not ( 1388 isinstance(parent, exp.DataType) 1389 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1390 and expression.index in (None, 0) 1391 ) 1392 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1393 ): 1394 dtype = f"Nullable({dtype})" 1395 1396 return dtype 1397 1398 def cte_sql(self, expression: exp.CTE) -> str: 1399 if expression.args.get("scalar"): 1400 this = self.sql(expression, "this") 1401 alias = self.sql(expression, "alias") 1402 return f"{this} AS {alias}" 1403 1404 return super().cte_sql(expression) 1405 1406 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1407 return super().after_limit_modifiers(expression) + [ 1408 ( 1409 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1410 if expression.args.get("settings") 1411 else "" 1412 ), 1413 ( 1414 self.seg("FORMAT ") + self.sql(expression, "format") 1415 if expression.args.get("format") 1416 else "" 1417 ), 1418 ] 1419 1420 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1421 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1422 1423 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1424 return f"ON CLUSTER {self.sql(expression, 'this')}" 1425 1426 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1427 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1428 exp.Properties.Location.POST_NAME 1429 ): 1430 this_name = self.sql( 1431 expression.this if isinstance(expression.this, exp.Schema) else expression, 1432 "this", 1433 ) 1434 this_properties = " ".join( 1435 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1436 ) 1437 this_schema = self.schema_columns_sql(expression.this) 1438 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1439 1440 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1441 1442 return super().createable_sql(expression, locations) 1443 1444 def create_sql(self, expression: exp.Create) -> str: 1445 # The comment property comes last in CTAS statements, i.e. after the query 1446 query = expression.expression 1447 if isinstance(query, exp.Query): 1448 comment_prop = expression.find(exp.SchemaCommentProperty) 1449 if comment_prop: 1450 comment_prop.pop() 1451 query.replace(exp.paren(query)) 1452 else: 1453 comment_prop = None 1454 1455 create_sql = super().create_sql(expression) 1456 1457 comment_sql = self.sql(comment_prop) 1458 comment_sql = f" {comment_sql}" if comment_sql else "" 1459 1460 return f"{create_sql}{comment_sql}" 1461 1462 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1463 this = self.indent(self.sql(expression, "this")) 1464 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1465 1466 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1467 this = self.sql(expression, "this") 1468 this = f" {this}" if this else "" 1469 expr = self.sql(expression, "expression") 1470 expr = f" {expr}" if expr else "" 1471 index_type = self.sql(expression, "index_type") 1472 index_type = f" TYPE {index_type}" if index_type else "" 1473 granularity = self.sql(expression, "granularity") 1474 granularity = f" GRANULARITY {granularity}" if granularity else "" 1475 1476 return f"INDEX{this}{expr}{index_type}{granularity}" 1477 1478 def partition_sql(self, expression: exp.Partition) -> str: 1479 return f"PARTITION {self.expressions(expression, flat=True)}" 1480 1481 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1482 return f"ID {self.sql(expression.this)}" 1483 1484 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1485 return ( 1486 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1487 ) 1488 1489 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1490 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1491 1492 def is_sql(self, expression: exp.Is) -> str: 1493 is_sql = super().is_sql(expression) 1494 1495 if isinstance(expression.parent, exp.Not): 1496 # value IS NOT NULL -> NOT (value IS NULL) 1497 is_sql = self.wrap(is_sql) 1498 1499 return is_sql 1500 1501 def in_sql(self, expression: exp.In) -> str: 1502 in_sql = super().in_sql(expression) 1503 1504 if isinstance(expression.parent, exp.Not) and expression.args.get("is_global"): 1505 in_sql = in_sql.replace("GLOBAL IN", "GLOBAL NOT IN", 1) 1506 1507 return in_sql 1508 1509 def not_sql(self, expression: exp.Not) -> str: 1510 if isinstance(expression.this, exp.In): 1511 if expression.this.args.get("is_global"): 1512 # let `GLOBAL IN` child interpose `NOT` 1513 return self.sql(expression, "this") 1514 1515 expression.set("this", exp.paren(expression.this, copy=False)) 1516 1517 return super().not_sql(expression) 1518 1519 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1520 # If the VALUES clause contains tuples of expressions, we need to treat it 1521 # as a table since Clickhouse will automatically alias it as such. 1522 alias = expression.args.get("alias") 1523 1524 if alias and alias.args.get("columns") and expression.expressions: 1525 values = expression.expressions[0].expressions 1526 values_as_table = any(isinstance(value, exp.Tuple) for value in values) 1527 else: 1528 values_as_table = True 1529 1530 return super().values_sql(expression, values_as_table=values_as_table)
233class ClickHouse(Dialect): 234 INDEX_OFFSET = 1 235 NORMALIZE_FUNCTIONS: bool | str = False 236 NULL_ORDERING = "nulls_are_last" 237 SUPPORTS_USER_DEFINED_TYPES = False 238 SAFE_DIVISION = True 239 LOG_BASE_FIRST: t.Optional[bool] = None 240 FORCE_EARLY_ALIAS_REF_EXPANSION = True 241 PRESERVE_ORIGINAL_NAMES = True 242 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 243 IDENTIFIERS_CAN_START_WITH_DIGIT = True 244 HEX_STRING_IS_INTEGER_TYPE = True 245 246 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 247 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 248 249 EXPRESSION_METADATA = EXPRESSION_METADATA.copy() 250 251 UNESCAPED_SEQUENCES = { 252 "\\0": "\0", 253 } 254 255 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 256 257 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 258 exp.Except: False, 259 exp.Intersect: False, 260 exp.Union: None, 261 } 262 263 def generate_values_aliases(self, expression: exp.Values) -> t.List[exp.Identifier]: 264 # Clickhouse allows VALUES to have an embedded structure e.g: 265 # VALUES('person String, place String', ('Noah', 'Paris'), ...) 266 # In this case, we don't want to qualify the columns 267 values = expression.expressions[0].expressions 268 269 structure = ( 270 values[0] 271 if (len(values) > 1 and values[0].is_string and isinstance(values[1], exp.Tuple)) 272 else None 273 ) 274 if structure: 275 # Split each column definition into the column name e.g: 276 # 'person String, place String' -> ['person', 'place'] 277 structure_coldefs = [coldef.strip() for coldef in structure.name.split(",")] 278 column_aliases = [ 279 exp.to_identifier(coldef.split(" ")[0]) for coldef in structure_coldefs 280 ] 281 else: 282 # Default column aliases in CH are "c1", "c2", etc. 283 column_aliases = [ 284 exp.to_identifier(f"c{i + 1}") for i in range(len(values[0].expressions)) 285 ] 286 287 return column_aliases 288 289 class Tokenizer(tokens.Tokenizer): 290 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 291 IDENTIFIERS = ['"', "`"] 292 IDENTIFIER_ESCAPES = ["\\"] 293 STRING_ESCAPES = ["'", "\\"] 294 BIT_STRINGS = [("0b", "")] 295 HEX_STRINGS = [("0x", ""), ("0X", "")] 296 HEREDOC_STRINGS = ["$"] 297 298 KEYWORDS = { 299 **tokens.Tokenizer.KEYWORDS, 300 ".:": TokenType.DOTCOLON, 301 "ATTACH": TokenType.COMMAND, 302 "DATE32": TokenType.DATE32, 303 "DATETIME64": TokenType.DATETIME64, 304 "DICTIONARY": TokenType.DICTIONARY, 305 "DYNAMIC": TokenType.DYNAMIC, 306 "ENUM8": TokenType.ENUM8, 307 "ENUM16": TokenType.ENUM16, 308 "EXCHANGE": TokenType.COMMAND, 309 "FINAL": TokenType.FINAL, 310 "FIXEDSTRING": TokenType.FIXEDSTRING, 311 "FLOAT32": TokenType.FLOAT, 312 "FLOAT64": TokenType.DOUBLE, 313 "GLOBAL": TokenType.GLOBAL, 314 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 315 "MAP": TokenType.MAP, 316 "NESTED": TokenType.NESTED, 317 "NOTHING": TokenType.NOTHING, 318 "SAMPLE": TokenType.TABLE_SAMPLE, 319 "TUPLE": TokenType.STRUCT, 320 "UINT16": TokenType.USMALLINT, 321 "UINT32": TokenType.UINT, 322 "UINT64": TokenType.UBIGINT, 323 "UINT8": TokenType.UTINYINT, 324 "IPV4": TokenType.IPV4, 325 "IPV6": TokenType.IPV6, 326 "POINT": TokenType.POINT, 327 "RING": TokenType.RING, 328 "LINESTRING": TokenType.LINESTRING, 329 "MULTILINESTRING": TokenType.MULTILINESTRING, 330 "POLYGON": TokenType.POLYGON, 331 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 332 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 333 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 334 "SYSTEM": TokenType.COMMAND, 335 "PREWHERE": TokenType.PREWHERE, 336 } 337 KEYWORDS.pop("/*+") 338 339 SINGLE_TOKENS = { 340 **tokens.Tokenizer.SINGLE_TOKENS, 341 "$": TokenType.HEREDOC_STRING, 342 } 343 344 class Parser(parser.Parser): 345 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 346 # * select x from t1 union all select x from t2 limit 1; 347 # * select x from t1 union all (select x from t2 limit 1); 348 MODIFIERS_ATTACHED_TO_SET_OP = False 349 INTERVAL_SPANS = False 350 OPTIONAL_ALIAS_TOKEN_CTE = False 351 JOINS_HAVE_EQUAL_PRECEDENCE = True 352 353 FUNCTIONS = { 354 **parser.Parser.FUNCTIONS, 355 **{ 356 f"TOSTARTOF{unit}": _build_timestamp_trunc(unit=unit) 357 for unit in TIMESTAMP_TRUNC_UNITS 358 }, 359 "ANY": exp.AnyValue.from_arg_list, 360 "ARRAYCOMPACT": exp.ArrayCompact.from_arg_list, 361 "ARRAYCONCAT": exp.ArrayConcat.from_arg_list, 362 "ARRAYDISTINCT": exp.ArrayDistinct.from_arg_list, 363 "ARRAYSUM": exp.ArraySum.from_arg_list, 364 "ARRAYMAX": exp.ArrayMax.from_arg_list, 365 "ARRAYMIN": exp.ArrayMin.from_arg_list, 366 "ARRAYREVERSE": exp.ArrayReverse.from_arg_list, 367 "ARRAYSLICE": exp.ArraySlice.from_arg_list, 368 "CURRENTDATABASE": exp.CurrentDatabase.from_arg_list, 369 "CURRENTSCHEMAS": exp.CurrentSchemas.from_arg_list, 370 "COUNTIF": _build_count_if, 371 "COSINEDISTANCE": exp.CosineDistance.from_arg_list, 372 "VERSION": exp.CurrentVersion.from_arg_list, 373 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 374 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 375 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 376 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 377 "DATE_FORMAT": _build_datetime_format(exp.TimeToStr), 378 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 379 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 380 "FORMATDATETIME": _build_datetime_format(exp.TimeToStr), 381 "HAS": exp.ArrayContains.from_arg_list, 382 "ILIKE": build_like(exp.ILike), 383 "JSONEXTRACTSTRING": build_json_extract_path( 384 exp.JSONExtractScalar, zero_based_indexing=False 385 ), 386 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 387 "LIKE": build_like(exp.Like), 388 "L2Distance": exp.EuclideanDistance.from_arg_list, 389 "MAP": parser.build_var_map, 390 "MATCH": exp.RegexpLike.from_arg_list, 391 "NOTLIKE": build_like(exp.Like, not_like=True), 392 "PARSEDATETIME": _build_datetime_format(exp.ParseDatetime), 393 "RANDCANONICAL": exp.Rand.from_arg_list, 394 "STR_TO_DATE": _build_str_to_date, 395 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 396 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 397 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 398 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 399 "TOMONDAY": _build_timestamp_trunc("WEEK"), 400 "UNIQ": exp.ApproxDistinct.from_arg_list, 401 "XOR": lambda args: exp.Xor(expressions=args), 402 "MD5": exp.MD5Digest.from_arg_list, 403 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 404 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 405 "SPLITBYCHAR": _build_split_by_char, 406 "SPLITBYREGEXP": _build_split(exp.RegexpSplit), 407 "SPLITBYSTRING": _build_split(exp.Split), 408 "SUBSTRINGINDEX": exp.SubstringIndex.from_arg_list, 409 "TOTYPENAME": exp.Typeof.from_arg_list, 410 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 411 "JAROWINKLERSIMILARITY": exp.JarowinklerSimilarity.from_arg_list, 412 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 413 "UTCTIMESTAMP": exp.UtcTimestamp.from_arg_list, 414 } 415 FUNCTIONS.pop("TRANSFORM") 416 FUNCTIONS.pop("APPROX_TOP_SUM") 417 418 AGG_FUNCTIONS = { 419 "count", 420 "min", 421 "max", 422 "sum", 423 "avg", 424 "any", 425 "stddevPop", 426 "stddevSamp", 427 "varPop", 428 "varSamp", 429 "corr", 430 "covarPop", 431 "covarSamp", 432 "entropy", 433 "exponentialMovingAverage", 434 "intervalLengthSum", 435 "kolmogorovSmirnovTest", 436 "mannWhitneyUTest", 437 "median", 438 "rankCorr", 439 "sumKahan", 440 "studentTTest", 441 "welchTTest", 442 "anyHeavy", 443 "anyLast", 444 "boundingRatio", 445 "first_value", 446 "last_value", 447 "argMin", 448 "argMax", 449 "avgWeighted", 450 "topK", 451 "approx_top_sum", 452 "topKWeighted", 453 "deltaSum", 454 "deltaSumTimestamp", 455 "groupArray", 456 "groupArrayLast", 457 "groupUniqArray", 458 "groupArrayInsertAt", 459 "groupArrayMovingAvg", 460 "groupArrayMovingSum", 461 "groupArraySample", 462 "groupBitAnd", 463 "groupBitOr", 464 "groupBitXor", 465 "groupBitmap", 466 "groupBitmapAnd", 467 "groupBitmapOr", 468 "groupBitmapXor", 469 "sumWithOverflow", 470 "sumMap", 471 "minMap", 472 "maxMap", 473 "skewSamp", 474 "skewPop", 475 "kurtSamp", 476 "kurtPop", 477 "uniq", 478 "uniqExact", 479 "uniqCombined", 480 "uniqCombined64", 481 "uniqHLL12", 482 "uniqTheta", 483 "quantile", 484 "quantiles", 485 "quantileExact", 486 "quantilesExact", 487 "quantilesExactExclusive", 488 "quantileExactLow", 489 "quantilesExactLow", 490 "quantileExactHigh", 491 "quantilesExactHigh", 492 "quantileExactWeighted", 493 "quantilesExactWeighted", 494 "quantileTiming", 495 "quantilesTiming", 496 "quantileTimingWeighted", 497 "quantilesTimingWeighted", 498 "quantileDeterministic", 499 "quantilesDeterministic", 500 "quantileTDigest", 501 "quantilesTDigest", 502 "quantileTDigestWeighted", 503 "quantilesTDigestWeighted", 504 "quantileBFloat16", 505 "quantilesBFloat16", 506 "quantileBFloat16Weighted", 507 "quantilesBFloat16Weighted", 508 "simpleLinearRegression", 509 "stochasticLinearRegression", 510 "stochasticLogisticRegression", 511 "categoricalInformationValue", 512 "contingency", 513 "cramersV", 514 "cramersVBiasCorrected", 515 "theilsU", 516 "maxIntersections", 517 "maxIntersectionsPosition", 518 "meanZTest", 519 "quantileInterpolatedWeighted", 520 "quantilesInterpolatedWeighted", 521 "quantileGK", 522 "quantilesGK", 523 "sparkBar", 524 "sumCount", 525 "largestTriangleThreeBuckets", 526 "histogram", 527 "sequenceMatch", 528 "sequenceCount", 529 "windowFunnel", 530 "retention", 531 "uniqUpTo", 532 "sequenceNextNode", 533 "exponentialTimeDecayedAvg", 534 } 535 536 AGG_FUNCTIONS_SUFFIXES = [ 537 "If", 538 "Array", 539 "ArrayIf", 540 "Map", 541 "SimpleState", 542 "State", 543 "Merge", 544 "MergeState", 545 "ForEach", 546 "Distinct", 547 "OrDefault", 548 "OrNull", 549 "Resample", 550 "ArgMin", 551 "ArgMax", 552 ] 553 554 FUNC_TOKENS = { 555 *parser.Parser.FUNC_TOKENS, 556 TokenType.AND, 557 TokenType.OR, 558 TokenType.SET, 559 } 560 561 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 562 563 ID_VAR_TOKENS = { 564 *parser.Parser.ID_VAR_TOKENS, 565 TokenType.LIKE, 566 } 567 568 AGG_FUNC_MAPPING = ( 569 lambda functions, suffixes: { 570 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 571 } 572 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 573 574 FUNCTION_PARSERS = { 575 **parser.Parser.FUNCTION_PARSERS, 576 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 577 "QUANTILE": lambda self: self._parse_quantile(), 578 "MEDIAN": lambda self: self._parse_quantile(), 579 "COLUMNS": lambda self: self._parse_columns(), 580 "TUPLE": lambda self: exp.Struct.from_arg_list(self._parse_function_args(alias=True)), 581 "AND": lambda self: exp.and_(*self._parse_function_args(alias=False)), 582 "OR": lambda self: exp.or_(*self._parse_function_args(alias=False)), 583 } 584 585 FUNCTION_PARSERS.pop("MATCH") 586 587 PROPERTY_PARSERS = { 588 **parser.Parser.PROPERTY_PARSERS, 589 "ENGINE": lambda self: self._parse_engine_property(), 590 } 591 PROPERTY_PARSERS.pop("DYNAMIC") 592 593 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 594 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 595 596 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 597 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 598 599 RANGE_PARSERS = { 600 **parser.Parser.RANGE_PARSERS, 601 TokenType.GLOBAL: lambda self, this: self._parse_global_in(this), 602 } 603 604 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 605 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 606 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 607 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 608 609 JOIN_KINDS = { 610 *parser.Parser.JOIN_KINDS, 611 TokenType.ANY, 612 TokenType.ASOF, 613 TokenType.ARRAY, 614 } 615 616 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 617 TokenType.ANY, 618 TokenType.ARRAY, 619 TokenType.FINAL, 620 TokenType.FORMAT, 621 TokenType.SETTINGS, 622 } 623 624 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 625 TokenType.FORMAT, 626 } 627 628 LOG_DEFAULTS_TO_LN = True 629 630 QUERY_MODIFIER_PARSERS = { 631 **parser.Parser.QUERY_MODIFIER_PARSERS, 632 TokenType.SETTINGS: lambda self: ( 633 "settings", 634 self._advance() or self._parse_csv(self._parse_assignment), 635 ), 636 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 637 } 638 639 CONSTRAINT_PARSERS = { 640 **parser.Parser.CONSTRAINT_PARSERS, 641 "INDEX": lambda self: self._parse_index_constraint(), 642 "CODEC": lambda self: self._parse_compress(), 643 } 644 645 ALTER_PARSERS = { 646 **parser.Parser.ALTER_PARSERS, 647 "REPLACE": lambda self: self._parse_alter_table_replace(), 648 } 649 650 SCHEMA_UNNAMED_CONSTRAINTS = { 651 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 652 "INDEX", 653 } 654 655 PLACEHOLDER_PARSERS = { 656 **parser.Parser.PLACEHOLDER_PARSERS, 657 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 658 } 659 660 def _parse_engine_property(self) -> exp.EngineProperty: 661 self._match(TokenType.EQ) 662 return self.expression( 663 exp.EngineProperty, 664 this=self._parse_field(any_token=True, anonymous_func=True), 665 ) 666 667 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 668 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 669 return self._parse_lambda() 670 671 def _parse_types( 672 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 673 ) -> t.Optional[exp.Expression]: 674 dtype = super()._parse_types( 675 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 676 ) 677 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 678 # Mark every type as non-nullable which is ClickHouse's default, unless it's 679 # already marked as nullable. This marker helps us transpile types from other 680 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 681 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 682 # fail in ClickHouse without the `Nullable` type constructor. 683 dtype.set("nullable", False) 684 685 return dtype 686 687 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 688 index = self._index 689 this = self._parse_bitwise() 690 if self._match(TokenType.FROM): 691 self._retreat(index) 692 return super()._parse_extract() 693 694 # We return Anonymous here because extract and regexpExtract have different semantics, 695 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 696 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 697 # 698 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 699 self._match(TokenType.COMMA) 700 return self.expression( 701 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 702 ) 703 704 def _parse_assignment(self) -> t.Optional[exp.Expression]: 705 this = super()._parse_assignment() 706 707 if self._match(TokenType.PLACEHOLDER): 708 return self.expression( 709 exp.If, 710 this=this, 711 true=self._parse_assignment(), 712 false=self._match(TokenType.COLON) and self._parse_assignment(), 713 ) 714 715 return this 716 717 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 718 """ 719 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 720 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 721 """ 722 index = self._index 723 724 this = self._parse_id_var() 725 self._match(TokenType.COLON) 726 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 727 self._match_text_seq("IDENTIFIER") and "Identifier" 728 ) 729 730 if not kind: 731 self._retreat(index) 732 return None 733 elif not self._match(TokenType.R_BRACE): 734 self.raise_error("Expecting }") 735 736 if isinstance(this, exp.Identifier) and not this.quoted: 737 this = exp.var(this.name) 738 739 return self.expression(exp.Placeholder, this=this, kind=kind) 740 741 def _parse_bracket( 742 self, this: t.Optional[exp.Expression] = None 743 ) -> t.Optional[exp.Expression]: 744 l_brace = self._match(TokenType.L_BRACE, advance=False) 745 bracket = super()._parse_bracket(this) 746 747 if l_brace and isinstance(bracket, exp.Struct): 748 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 749 for expression in bracket.expressions: 750 if not isinstance(expression, exp.PropertyEQ): 751 break 752 753 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 754 varmap.args["values"].append("expressions", expression.expression) 755 756 return varmap 757 758 return bracket 759 760 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 761 this = super()._parse_in(this) 762 this.set("is_global", is_global) 763 return this 764 765 def _parse_global_in(self, this: t.Optional[exp.Expression]) -> exp.Not | exp.In: 766 is_negated = self._match(TokenType.NOT) 767 in_expr = self._parse_in(this, is_global=True) if self._match(TokenType.IN) else None 768 return self.expression(exp.Not, this=in_expr) if is_negated else t.cast(exp.In, in_expr) 769 770 def _parse_table( 771 self, 772 schema: bool = False, 773 joins: bool = False, 774 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 775 parse_bracket: bool = False, 776 is_db_reference: bool = False, 777 parse_partition: bool = False, 778 consume_pipe: bool = False, 779 ) -> t.Optional[exp.Expression]: 780 this = super()._parse_table( 781 schema=schema, 782 joins=joins, 783 alias_tokens=alias_tokens, 784 parse_bracket=parse_bracket, 785 is_db_reference=is_db_reference, 786 ) 787 788 if isinstance(this, exp.Table): 789 inner = this.this 790 alias = this.args.get("alias") 791 792 if isinstance(inner, exp.GenerateSeries) and alias and not alias.columns: 793 alias.set("columns", [exp.to_identifier("generate_series")]) 794 795 if self._match(TokenType.FINAL): 796 this = self.expression(exp.Final, this=this) 797 798 return this 799 800 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 801 return super()._parse_position(haystack_first=True) 802 803 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 804 def _parse_cte(self) -> t.Optional[exp.CTE]: 805 # WITH <identifier> AS <subquery expression> 806 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 807 808 if not cte: 809 # WITH <expression> AS <identifier> 810 cte = self.expression( 811 exp.CTE, 812 this=self._parse_assignment(), 813 alias=self._parse_table_alias(), 814 scalar=True, 815 ) 816 817 return cte 818 819 def _parse_join_parts( 820 self, 821 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 822 is_global = self._prev if self._match(TokenType.GLOBAL) else None 823 kind_pre = self._prev if self._match_set(self.JOIN_KINDS, advance=False) else None 824 825 if kind_pre: 826 kind = self._prev if self._match_set(self.JOIN_KINDS) else None 827 side = self._prev if self._match_set(self.JOIN_SIDES) else None 828 return is_global, side, kind 829 830 return ( 831 is_global, 832 self._prev if self._match_set(self.JOIN_SIDES) else None, 833 self._prev if self._match_set(self.JOIN_KINDS) else None, 834 ) 835 836 def _parse_join( 837 self, skip_join_token: bool = False, parse_bracket: bool = False 838 ) -> t.Optional[exp.Join]: 839 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 840 if join: 841 method = join.args.get("method") 842 join.set("method", None) 843 join.set("global_", method) 844 845 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 846 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 847 if join.kind == "ARRAY": 848 for table in join.find_all(exp.Table): 849 table.replace(table.to_column()) 850 851 return join 852 853 def _parse_function( 854 self, 855 functions: t.Optional[t.Dict[str, t.Callable]] = None, 856 anonymous: bool = False, 857 optional_parens: bool = True, 858 any_token: bool = False, 859 ) -> t.Optional[exp.Expression]: 860 expr = super()._parse_function( 861 functions=functions, 862 anonymous=anonymous, 863 optional_parens=optional_parens, 864 any_token=any_token, 865 ) 866 867 func = expr.this if isinstance(expr, exp.Window) else expr 868 869 # Aggregate functions can be split in 2 parts: <func_name><suffix> 870 parts = ( 871 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 872 ) 873 874 if parts: 875 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 876 params = self._parse_func_params(anon_func) 877 878 kwargs = { 879 "this": anon_func.this, 880 "expressions": anon_func.expressions, 881 } 882 if parts[1]: 883 exp_class: t.Type[exp.Expression] = ( 884 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 885 ) 886 else: 887 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 888 889 kwargs["exp_class"] = exp_class 890 if params: 891 kwargs["params"] = params 892 893 func = self.expression(**kwargs) 894 895 if isinstance(expr, exp.Window): 896 # The window's func was parsed as Anonymous in base parser, fix its 897 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 898 expr.set("this", func) 899 elif params: 900 # Params have blocked super()._parse_function() from parsing the following window 901 # (if that exists) as they're standing between the function call and the window spec 902 expr = self._parse_window(func) 903 else: 904 expr = func 905 906 return expr 907 908 def _parse_func_params( 909 self, this: t.Optional[exp.Func] = None 910 ) -> t.Optional[t.List[exp.Expression]]: 911 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 912 return self._parse_csv(self._parse_lambda) 913 914 if self._match(TokenType.L_PAREN): 915 params = self._parse_csv(self._parse_lambda) 916 self._match_r_paren(this) 917 return params 918 919 return None 920 921 def _parse_quantile(self) -> exp.Quantile: 922 this = self._parse_lambda() 923 params = self._parse_func_params() 924 if params: 925 return self.expression(exp.Quantile, this=params[0], quantile=this) 926 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 927 928 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 929 return super()._parse_wrapped_id_vars(optional=True) 930 931 def _parse_primary_key( 932 self, 933 wrapped_optional: bool = False, 934 in_props: bool = False, 935 named_primary_key: bool = False, 936 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 937 return super()._parse_primary_key( 938 wrapped_optional=wrapped_optional or in_props, 939 in_props=in_props, 940 named_primary_key=named_primary_key, 941 ) 942 943 def _parse_on_property(self) -> t.Optional[exp.Expression]: 944 index = self._index 945 if self._match_text_seq("CLUSTER"): 946 this = self._parse_string() or self._parse_id_var() 947 if this: 948 return self.expression(exp.OnCluster, this=this) 949 else: 950 self._retreat(index) 951 return None 952 953 def _parse_index_constraint( 954 self, kind: t.Optional[str] = None 955 ) -> exp.IndexColumnConstraint: 956 # INDEX name1 expr TYPE type1(args) GRANULARITY value 957 this = self._parse_id_var() 958 expression = self._parse_assignment() 959 960 index_type = self._match_text_seq("TYPE") and ( 961 self._parse_function() or self._parse_var() 962 ) 963 964 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 965 966 return self.expression( 967 exp.IndexColumnConstraint, 968 this=this, 969 expression=expression, 970 index_type=index_type, 971 granularity=granularity, 972 ) 973 974 def _parse_partition(self) -> t.Optional[exp.Partition]: 975 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 976 if not self._match(TokenType.PARTITION): 977 return None 978 979 if self._match_text_seq("ID"): 980 # Corresponds to the PARTITION ID <string_value> syntax 981 expressions: t.List[exp.Expression] = [ 982 self.expression(exp.PartitionId, this=self._parse_string()) 983 ] 984 else: 985 expressions = self._parse_expressions() 986 987 return self.expression(exp.Partition, expressions=expressions) 988 989 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 990 partition = self._parse_partition() 991 992 if not partition or not self._match(TokenType.FROM): 993 return None 994 995 return self.expression( 996 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 997 ) 998 999 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 1000 if not self._match_text_seq("PROJECTION"): 1001 return None 1002 1003 return self.expression( 1004 exp.ProjectionDef, 1005 this=self._parse_id_var(), 1006 expression=self._parse_wrapped(self._parse_statement), 1007 ) 1008 1009 def _parse_constraint(self) -> t.Optional[exp.Expression]: 1010 return super()._parse_constraint() or self._parse_projection_def() 1011 1012 def _parse_alias( 1013 self, this: t.Optional[exp.Expression], explicit: bool = False 1014 ) -> t.Optional[exp.Expression]: 1015 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 1016 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 1017 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 1018 return this 1019 1020 return super()._parse_alias(this=this, explicit=explicit) 1021 1022 def _parse_expression(self) -> t.Optional[exp.Expression]: 1023 this = super()._parse_expression() 1024 1025 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 1026 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 1027 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 1028 self._match(TokenType.R_PAREN) 1029 1030 return this 1031 1032 def _parse_columns(self) -> exp.Expression: 1033 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 1034 1035 while self._next and self._match_text_seq(")", "APPLY", "("): 1036 self._match(TokenType.R_PAREN) 1037 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 1038 return this 1039 1040 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 1041 value = super()._parse_value(values=values) 1042 if not value: 1043 return None 1044 1045 # In Clickhouse "SELECT * FROM VALUES (1, 2, 3)" generates a table with a single column, in contrast 1046 # to other dialects. For this case, we canonicalize the values into a tuple-of-tuples AST if it's not already one. 1047 # In INSERT INTO statements the same clause actually references multiple columns (opposite semantics), 1048 # but the final result is not altered by the extra parentheses. 1049 # Note: Clickhouse allows VALUES([structure], value, ...) so the branch checks for the last expression 1050 expressions = value.expressions 1051 if values and not isinstance(expressions[-1], exp.Tuple): 1052 value.set( 1053 "expressions", 1054 [self.expression(exp.Tuple, expressions=[expr]) for expr in expressions], 1055 ) 1056 1057 return value 1058 1059 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1060 # ClickHouse allows custom expressions as partition key 1061 # https://clickhouse.com/docs/engines/table-engines/mergetree-family/custom-partitioning-key 1062 return self.expression( 1063 exp.PartitionedByProperty, 1064 this=self._parse_assignment(), 1065 ) 1066 1067 class Generator(generator.Generator): 1068 QUERY_HINTS = False 1069 STRUCT_DELIMITER = ("(", ")") 1070 NVL2_SUPPORTED = False 1071 TABLESAMPLE_REQUIRES_PARENS = False 1072 TABLESAMPLE_SIZE_IS_ROWS = False 1073 TABLESAMPLE_KEYWORDS = "SAMPLE" 1074 LAST_DAY_SUPPORTS_DATE_PART = False 1075 CAN_IMPLEMENT_ARRAY_ANY = True 1076 SUPPORTS_TO_NUMBER = False 1077 JOIN_HINTS = False 1078 TABLE_HINTS = False 1079 GROUPINGS_SEP = "" 1080 SET_OP_MODIFIERS = False 1081 ARRAY_SIZE_NAME = "LENGTH" 1082 WRAP_DERIVED_VALUES = False 1083 1084 STRING_TYPE_MAPPING = { 1085 exp.DataType.Type.BLOB: "String", 1086 exp.DataType.Type.CHAR: "String", 1087 exp.DataType.Type.LONGBLOB: "String", 1088 exp.DataType.Type.LONGTEXT: "String", 1089 exp.DataType.Type.MEDIUMBLOB: "String", 1090 exp.DataType.Type.MEDIUMTEXT: "String", 1091 exp.DataType.Type.TINYBLOB: "String", 1092 exp.DataType.Type.TINYTEXT: "String", 1093 exp.DataType.Type.TEXT: "String", 1094 exp.DataType.Type.VARBINARY: "String", 1095 exp.DataType.Type.VARCHAR: "String", 1096 } 1097 1098 SUPPORTED_JSON_PATH_PARTS = { 1099 exp.JSONPathKey, 1100 exp.JSONPathRoot, 1101 exp.JSONPathSubscript, 1102 } 1103 1104 TYPE_MAPPING = { 1105 **generator.Generator.TYPE_MAPPING, 1106 **STRING_TYPE_MAPPING, 1107 exp.DataType.Type.ARRAY: "Array", 1108 exp.DataType.Type.BOOLEAN: "Bool", 1109 exp.DataType.Type.BIGINT: "Int64", 1110 exp.DataType.Type.DATE32: "Date32", 1111 exp.DataType.Type.DATETIME: "DateTime", 1112 exp.DataType.Type.DATETIME2: "DateTime", 1113 exp.DataType.Type.SMALLDATETIME: "DateTime", 1114 exp.DataType.Type.DATETIME64: "DateTime64", 1115 exp.DataType.Type.DECIMAL: "Decimal", 1116 exp.DataType.Type.DECIMAL32: "Decimal32", 1117 exp.DataType.Type.DECIMAL64: "Decimal64", 1118 exp.DataType.Type.DECIMAL128: "Decimal128", 1119 exp.DataType.Type.DECIMAL256: "Decimal256", 1120 exp.DataType.Type.TIMESTAMP: "DateTime", 1121 exp.DataType.Type.TIMESTAMPNTZ: "DateTime", 1122 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 1123 exp.DataType.Type.DOUBLE: "Float64", 1124 exp.DataType.Type.ENUM: "Enum", 1125 exp.DataType.Type.ENUM8: "Enum8", 1126 exp.DataType.Type.ENUM16: "Enum16", 1127 exp.DataType.Type.FIXEDSTRING: "FixedString", 1128 exp.DataType.Type.FLOAT: "Float32", 1129 exp.DataType.Type.INT: "Int32", 1130 exp.DataType.Type.MEDIUMINT: "Int32", 1131 exp.DataType.Type.INT128: "Int128", 1132 exp.DataType.Type.INT256: "Int256", 1133 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 1134 exp.DataType.Type.MAP: "Map", 1135 exp.DataType.Type.NESTED: "Nested", 1136 exp.DataType.Type.NOTHING: "Nothing", 1137 exp.DataType.Type.SMALLINT: "Int16", 1138 exp.DataType.Type.STRUCT: "Tuple", 1139 exp.DataType.Type.TINYINT: "Int8", 1140 exp.DataType.Type.UBIGINT: "UInt64", 1141 exp.DataType.Type.UINT: "UInt32", 1142 exp.DataType.Type.UINT128: "UInt128", 1143 exp.DataType.Type.UINT256: "UInt256", 1144 exp.DataType.Type.USMALLINT: "UInt16", 1145 exp.DataType.Type.UTINYINT: "UInt8", 1146 exp.DataType.Type.IPV4: "IPv4", 1147 exp.DataType.Type.IPV6: "IPv6", 1148 exp.DataType.Type.POINT: "Point", 1149 exp.DataType.Type.RING: "Ring", 1150 exp.DataType.Type.LINESTRING: "LineString", 1151 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 1152 exp.DataType.Type.POLYGON: "Polygon", 1153 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 1154 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 1155 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 1156 exp.DataType.Type.DYNAMIC: "Dynamic", 1157 } 1158 1159 TRANSFORMS = { 1160 **generator.Generator.TRANSFORMS, 1161 exp.AnyValue: rename_func("any"), 1162 exp.ApproxDistinct: rename_func("uniq"), 1163 exp.ArrayConcat: rename_func("arrayConcat"), 1164 exp.ArrayContains: rename_func("has"), 1165 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 1166 exp.ArrayRemove: remove_from_array_using_filter, 1167 exp.ArrayReverse: rename_func("arrayReverse"), 1168 exp.ArraySlice: rename_func("arraySlice"), 1169 exp.ArraySum: rename_func("arraySum"), 1170 exp.ArrayMax: rename_func("arrayMax"), 1171 exp.ArrayMin: rename_func("arrayMin"), 1172 exp.ArgMax: arg_max_or_min_no_count("argMax"), 1173 exp.ArgMin: arg_max_or_min_no_count("argMin"), 1174 exp.Array: inline_array_sql, 1175 exp.CastToStrType: rename_func("CAST"), 1176 exp.CurrentDatabase: rename_func("CURRENT_DATABASE"), 1177 exp.CurrentSchemas: rename_func("CURRENT_SCHEMAS"), 1178 exp.CountIf: rename_func("countIf"), 1179 exp.CosineDistance: rename_func("cosineDistance"), 1180 exp.CompressColumnConstraint: lambda self, 1181 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 1182 exp.ComputedColumnConstraint: lambda self, 1183 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 1184 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 1185 exp.CurrentVersion: rename_func("VERSION"), 1186 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 1187 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 1188 exp.DateStrToDate: rename_func("toDate"), 1189 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 1190 exp.Explode: rename_func("arrayJoin"), 1191 exp.FarmFingerprint: rename_func("farmFingerprint64"), 1192 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 1193 exp.IsNan: rename_func("isNaN"), 1194 exp.JarowinklerSimilarity: jarowinkler_similarity("jaroWinklerSimilarity"), 1195 exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}", 1196 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 1197 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 1198 exp.JSONPathKey: json_path_key_only_name, 1199 exp.JSONPathRoot: lambda *_: "", 1200 exp.Length: length_or_char_length_sql, 1201 exp.Map: _map_sql, 1202 exp.Median: rename_func("median"), 1203 exp.Nullif: rename_func("nullIf"), 1204 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1205 exp.Pivot: no_pivot_sql, 1206 exp.Quantile: _quantile_sql, 1207 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 1208 exp.Rand: rename_func("randCanonical"), 1209 exp.StartsWith: rename_func("startsWith"), 1210 exp.Struct: rename_func("tuple"), 1211 exp.Trunc: rename_func("trunc"), 1212 exp.EndsWith: rename_func("endsWith"), 1213 exp.EuclideanDistance: rename_func("L2Distance"), 1214 exp.StrPosition: lambda self, e: strposition_sql( 1215 self, 1216 e, 1217 func_name="POSITION", 1218 supports_position=True, 1219 use_ansi_position=False, 1220 ), 1221 exp.TimeToStr: lambda self, e: self.func( 1222 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 1223 ), 1224 exp.TimeStrToTime: _timestrtotime_sql, 1225 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1226 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1227 exp.Typeof: rename_func("toTypeName"), 1228 exp.VarMap: _map_sql, 1229 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1230 exp.MD5Digest: rename_func("MD5"), 1231 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1232 exp.SHA: rename_func("SHA1"), 1233 exp.SHA1Digest: rename_func("SHA1"), 1234 exp.SHA2: sha256_sql, 1235 exp.SHA2Digest: sha2_digest_sql, 1236 exp.Split: lambda self, e: self.func( 1237 "splitByString", e.args.get("expression"), e.this, e.args.get("limit") 1238 ), 1239 exp.RegexpSplit: lambda self, e: self.func( 1240 "splitByRegexp", e.args.get("expression"), e.this, e.args.get("limit") 1241 ), 1242 exp.UnixToTime: _unix_to_time_sql, 1243 exp.TimestampTrunc: timestamptrunc_sql(func="dateTrunc", zone=True), 1244 exp.Trim: lambda self, e: trim_sql(self, e, default_trim_type="BOTH"), 1245 exp.Variance: rename_func("varSamp"), 1246 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1247 exp.Stddev: rename_func("stddevSamp"), 1248 exp.Chr: rename_func("CHAR"), 1249 exp.Lag: lambda self, e: self.func( 1250 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1251 ), 1252 exp.Lead: lambda self, e: self.func( 1253 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1254 ), 1255 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1256 rename_func("editDistance") 1257 ), 1258 exp.ParseDatetime: rename_func("parseDateTime"), 1259 } 1260 1261 PROPERTIES_LOCATION = { 1262 **generator.Generator.PROPERTIES_LOCATION, 1263 exp.OnCluster: exp.Properties.Location.POST_NAME, 1264 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1265 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1266 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1267 } 1268 1269 # There's no list in docs, but it can be found in Clickhouse code 1270 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1271 ON_CLUSTER_TARGETS = { 1272 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1273 "DATABASE", 1274 "TABLE", 1275 "VIEW", 1276 "DICTIONARY", 1277 "INDEX", 1278 "FUNCTION", 1279 "NAMED COLLECTION", 1280 } 1281 1282 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1283 NON_NULLABLE_TYPES = { 1284 exp.DataType.Type.ARRAY, 1285 exp.DataType.Type.MAP, 1286 exp.DataType.Type.STRUCT, 1287 exp.DataType.Type.POINT, 1288 exp.DataType.Type.RING, 1289 exp.DataType.Type.LINESTRING, 1290 exp.DataType.Type.MULTILINESTRING, 1291 exp.DataType.Type.POLYGON, 1292 exp.DataType.Type.MULTIPOLYGON, 1293 } 1294 1295 def offset_sql(self, expression: exp.Offset) -> str: 1296 offset = super().offset_sql(expression) 1297 1298 # OFFSET ... FETCH syntax requires a "ROW" or "ROWS" keyword 1299 # https://clickhouse.com/docs/sql-reference/statements/select/offset 1300 parent = expression.parent 1301 if isinstance(parent, exp.Select) and isinstance(parent.args.get("limit"), exp.Fetch): 1302 offset = f"{offset} ROWS" 1303 1304 return offset 1305 1306 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1307 strtodate_sql = self.function_fallback_sql(expression) 1308 1309 if not isinstance(expression.parent, exp.Cast): 1310 # StrToDate returns DATEs in other dialects (eg. postgres), so 1311 # this branch aims to improve the transpilation to clickhouse 1312 return self.cast_sql(exp.cast(expression, "DATE")) 1313 1314 return strtodate_sql 1315 1316 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1317 this = expression.this 1318 1319 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1320 return self.sql(this) 1321 1322 return super().cast_sql(expression, safe_prefix=safe_prefix) 1323 1324 def trycast_sql(self, expression: exp.TryCast) -> str: 1325 dtype = expression.to 1326 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1327 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1328 dtype.set("nullable", True) 1329 1330 return super().cast_sql(expression) 1331 1332 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1333 this = self.json_path_part(expression.this) 1334 return str(int(this) + 1) if is_int(this) else this 1335 1336 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1337 return f"AS {self.sql(expression, 'this')}" 1338 1339 def _any_to_has( 1340 self, 1341 expression: exp.EQ | exp.NEQ, 1342 default: t.Callable[[t.Any], str], 1343 prefix: str = "", 1344 ) -> str: 1345 if isinstance(expression.left, exp.Any): 1346 arr = expression.left 1347 this = expression.right 1348 elif isinstance(expression.right, exp.Any): 1349 arr = expression.right 1350 this = expression.left 1351 else: 1352 return default(expression) 1353 1354 return prefix + self.func("has", arr.this.unnest(), this) 1355 1356 def eq_sql(self, expression: exp.EQ) -> str: 1357 return self._any_to_has(expression, super().eq_sql) 1358 1359 def neq_sql(self, expression: exp.NEQ) -> str: 1360 return self._any_to_has(expression, super().neq_sql, "NOT ") 1361 1362 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1363 # Manually add a flag to make the search case-insensitive 1364 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1365 return self.func("match", expression.this, regex) 1366 1367 def datatype_sql(self, expression: exp.DataType) -> str: 1368 # String is the standard ClickHouse type, every other variant is just an alias. 1369 # Additionally, any supplied length parameter will be ignored. 1370 # 1371 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1372 if expression.this in self.STRING_TYPE_MAPPING: 1373 dtype = "String" 1374 else: 1375 dtype = super().datatype_sql(expression) 1376 1377 # This section changes the type to `Nullable(...)` if the following conditions hold: 1378 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1379 # and change their semantics 1380 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1381 # constraint: "Type of Map key must be a type, that can be represented by integer or 1382 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1383 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1384 parent = expression.parent 1385 nullable = expression.args.get("nullable") 1386 if nullable is True or ( 1387 nullable is None 1388 and not ( 1389 isinstance(parent, exp.DataType) 1390 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1391 and expression.index in (None, 0) 1392 ) 1393 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1394 ): 1395 dtype = f"Nullable({dtype})" 1396 1397 return dtype 1398 1399 def cte_sql(self, expression: exp.CTE) -> str: 1400 if expression.args.get("scalar"): 1401 this = self.sql(expression, "this") 1402 alias = self.sql(expression, "alias") 1403 return f"{this} AS {alias}" 1404 1405 return super().cte_sql(expression) 1406 1407 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1408 return super().after_limit_modifiers(expression) + [ 1409 ( 1410 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1411 if expression.args.get("settings") 1412 else "" 1413 ), 1414 ( 1415 self.seg("FORMAT ") + self.sql(expression, "format") 1416 if expression.args.get("format") 1417 else "" 1418 ), 1419 ] 1420 1421 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1422 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1423 1424 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1425 return f"ON CLUSTER {self.sql(expression, 'this')}" 1426 1427 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1428 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1429 exp.Properties.Location.POST_NAME 1430 ): 1431 this_name = self.sql( 1432 expression.this if isinstance(expression.this, exp.Schema) else expression, 1433 "this", 1434 ) 1435 this_properties = " ".join( 1436 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1437 ) 1438 this_schema = self.schema_columns_sql(expression.this) 1439 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1440 1441 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1442 1443 return super().createable_sql(expression, locations) 1444 1445 def create_sql(self, expression: exp.Create) -> str: 1446 # The comment property comes last in CTAS statements, i.e. after the query 1447 query = expression.expression 1448 if isinstance(query, exp.Query): 1449 comment_prop = expression.find(exp.SchemaCommentProperty) 1450 if comment_prop: 1451 comment_prop.pop() 1452 query.replace(exp.paren(query)) 1453 else: 1454 comment_prop = None 1455 1456 create_sql = super().create_sql(expression) 1457 1458 comment_sql = self.sql(comment_prop) 1459 comment_sql = f" {comment_sql}" if comment_sql else "" 1460 1461 return f"{create_sql}{comment_sql}" 1462 1463 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1464 this = self.indent(self.sql(expression, "this")) 1465 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1466 1467 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1468 this = self.sql(expression, "this") 1469 this = f" {this}" if this else "" 1470 expr = self.sql(expression, "expression") 1471 expr = f" {expr}" if expr else "" 1472 index_type = self.sql(expression, "index_type") 1473 index_type = f" TYPE {index_type}" if index_type else "" 1474 granularity = self.sql(expression, "granularity") 1475 granularity = f" GRANULARITY {granularity}" if granularity else "" 1476 1477 return f"INDEX{this}{expr}{index_type}{granularity}" 1478 1479 def partition_sql(self, expression: exp.Partition) -> str: 1480 return f"PARTITION {self.expressions(expression, flat=True)}" 1481 1482 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1483 return f"ID {self.sql(expression.this)}" 1484 1485 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1486 return ( 1487 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1488 ) 1489 1490 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1491 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1492 1493 def is_sql(self, expression: exp.Is) -> str: 1494 is_sql = super().is_sql(expression) 1495 1496 if isinstance(expression.parent, exp.Not): 1497 # value IS NOT NULL -> NOT (value IS NULL) 1498 is_sql = self.wrap(is_sql) 1499 1500 return is_sql 1501 1502 def in_sql(self, expression: exp.In) -> str: 1503 in_sql = super().in_sql(expression) 1504 1505 if isinstance(expression.parent, exp.Not) and expression.args.get("is_global"): 1506 in_sql = in_sql.replace("GLOBAL IN", "GLOBAL NOT IN", 1) 1507 1508 return in_sql 1509 1510 def not_sql(self, expression: exp.Not) -> str: 1511 if isinstance(expression.this, exp.In): 1512 if expression.this.args.get("is_global"): 1513 # let `GLOBAL IN` child interpose `NOT` 1514 return self.sql(expression, "this") 1515 1516 expression.set("this", exp.paren(expression.this, copy=False)) 1517 1518 return super().not_sql(expression) 1519 1520 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1521 # If the VALUES clause contains tuples of expressions, we need to treat it 1522 # as a table since Clickhouse will automatically alias it as such. 1523 alias = expression.args.get("alias") 1524 1525 if alias and alias.args.get("columns") and expression.expressions: 1526 values = expression.expressions[0].expressions 1527 values_as_table = any(isinstance(value, exp.Tuple) for value in values) 1528 else: 1529 values_as_table = True 1530 1531 return super().values_sql(expression, values_as_table=values_as_table)
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Default NULL ordering method to use if not explicitly set.
Possible values: "nulls_are_small", "nulls_are_large", "nulls_are_last"
Whether the base comes first in the LOG function.
Possible values: True, False, None (two arguments are not supported by LOG)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether the name of the function should be preserved inside the node's metadata, can be useful for roundtripping deprecated vs new functions that share an AST node e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery
Whether number literals can include underscores for better readability
Whether hex strings such as x'CC' evaluate to integer or binary/blob type
Specifies the strategy according to which identifiers should be normalized.
Mapping of an escaped sequence (\n) to its unescaped version (
).
Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse equivalent of CREATE SCHEMA is CREATE DATABASE.
Whether a set operation uses DISTINCT by default. This is None when either DISTINCT or ALL
must be explicitly specified.
263 def generate_values_aliases(self, expression: exp.Values) -> t.List[exp.Identifier]: 264 # Clickhouse allows VALUES to have an embedded structure e.g: 265 # VALUES('person String, place String', ('Noah', 'Paris'), ...) 266 # In this case, we don't want to qualify the columns 267 values = expression.expressions[0].expressions 268 269 structure = ( 270 values[0] 271 if (len(values) > 1 and values[0].is_string and isinstance(values[1], exp.Tuple)) 272 else None 273 ) 274 if structure: 275 # Split each column definition into the column name e.g: 276 # 'person String, place String' -> ['person', 'place'] 277 structure_coldefs = [coldef.strip() for coldef in structure.name.split(",")] 278 column_aliases = [ 279 exp.to_identifier(coldef.split(" ")[0]) for coldef in structure_coldefs 280 ] 281 else: 282 # Default column aliases in CH are "c1", "c2", etc. 283 column_aliases = [ 284 exp.to_identifier(f"c{i + 1}") for i in range(len(values[0].expressions)) 285 ] 286 287 return column_aliases
289 class Tokenizer(tokens.Tokenizer): 290 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 291 IDENTIFIERS = ['"', "`"] 292 IDENTIFIER_ESCAPES = ["\\"] 293 STRING_ESCAPES = ["'", "\\"] 294 BIT_STRINGS = [("0b", "")] 295 HEX_STRINGS = [("0x", ""), ("0X", "")] 296 HEREDOC_STRINGS = ["$"] 297 298 KEYWORDS = { 299 **tokens.Tokenizer.KEYWORDS, 300 ".:": TokenType.DOTCOLON, 301 "ATTACH": TokenType.COMMAND, 302 "DATE32": TokenType.DATE32, 303 "DATETIME64": TokenType.DATETIME64, 304 "DICTIONARY": TokenType.DICTIONARY, 305 "DYNAMIC": TokenType.DYNAMIC, 306 "ENUM8": TokenType.ENUM8, 307 "ENUM16": TokenType.ENUM16, 308 "EXCHANGE": TokenType.COMMAND, 309 "FINAL": TokenType.FINAL, 310 "FIXEDSTRING": TokenType.FIXEDSTRING, 311 "FLOAT32": TokenType.FLOAT, 312 "FLOAT64": TokenType.DOUBLE, 313 "GLOBAL": TokenType.GLOBAL, 314 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 315 "MAP": TokenType.MAP, 316 "NESTED": TokenType.NESTED, 317 "NOTHING": TokenType.NOTHING, 318 "SAMPLE": TokenType.TABLE_SAMPLE, 319 "TUPLE": TokenType.STRUCT, 320 "UINT16": TokenType.USMALLINT, 321 "UINT32": TokenType.UINT, 322 "UINT64": TokenType.UBIGINT, 323 "UINT8": TokenType.UTINYINT, 324 "IPV4": TokenType.IPV4, 325 "IPV6": TokenType.IPV6, 326 "POINT": TokenType.POINT, 327 "RING": TokenType.RING, 328 "LINESTRING": TokenType.LINESTRING, 329 "MULTILINESTRING": TokenType.MULTILINESTRING, 330 "POLYGON": TokenType.POLYGON, 331 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 332 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 333 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 334 "SYSTEM": TokenType.COMMAND, 335 "PREWHERE": TokenType.PREWHERE, 336 } 337 KEYWORDS.pop("/*+") 338 339 SINGLE_TOKENS = { 340 **tokens.Tokenizer.SINGLE_TOKENS, 341 "$": TokenType.HEREDOC_STRING, 342 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BYTE_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- QUOTES
- VAR_SINGLE_TOKENS
- ESCAPE_FOLLOW_CHARS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- tokenize
- sql
- size
- tokens
344 class Parser(parser.Parser): 345 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 346 # * select x from t1 union all select x from t2 limit 1; 347 # * select x from t1 union all (select x from t2 limit 1); 348 MODIFIERS_ATTACHED_TO_SET_OP = False 349 INTERVAL_SPANS = False 350 OPTIONAL_ALIAS_TOKEN_CTE = False 351 JOINS_HAVE_EQUAL_PRECEDENCE = True 352 353 FUNCTIONS = { 354 **parser.Parser.FUNCTIONS, 355 **{ 356 f"TOSTARTOF{unit}": _build_timestamp_trunc(unit=unit) 357 for unit in TIMESTAMP_TRUNC_UNITS 358 }, 359 "ANY": exp.AnyValue.from_arg_list, 360 "ARRAYCOMPACT": exp.ArrayCompact.from_arg_list, 361 "ARRAYCONCAT": exp.ArrayConcat.from_arg_list, 362 "ARRAYDISTINCT": exp.ArrayDistinct.from_arg_list, 363 "ARRAYSUM": exp.ArraySum.from_arg_list, 364 "ARRAYMAX": exp.ArrayMax.from_arg_list, 365 "ARRAYMIN": exp.ArrayMin.from_arg_list, 366 "ARRAYREVERSE": exp.ArrayReverse.from_arg_list, 367 "ARRAYSLICE": exp.ArraySlice.from_arg_list, 368 "CURRENTDATABASE": exp.CurrentDatabase.from_arg_list, 369 "CURRENTSCHEMAS": exp.CurrentSchemas.from_arg_list, 370 "COUNTIF": _build_count_if, 371 "COSINEDISTANCE": exp.CosineDistance.from_arg_list, 372 "VERSION": exp.CurrentVersion.from_arg_list, 373 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 374 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 375 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 376 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None, supports_timezone=True), 377 "DATE_FORMAT": _build_datetime_format(exp.TimeToStr), 378 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 379 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 380 "FORMATDATETIME": _build_datetime_format(exp.TimeToStr), 381 "HAS": exp.ArrayContains.from_arg_list, 382 "ILIKE": build_like(exp.ILike), 383 "JSONEXTRACTSTRING": build_json_extract_path( 384 exp.JSONExtractScalar, zero_based_indexing=False 385 ), 386 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 387 "LIKE": build_like(exp.Like), 388 "L2Distance": exp.EuclideanDistance.from_arg_list, 389 "MAP": parser.build_var_map, 390 "MATCH": exp.RegexpLike.from_arg_list, 391 "NOTLIKE": build_like(exp.Like, not_like=True), 392 "PARSEDATETIME": _build_datetime_format(exp.ParseDatetime), 393 "RANDCANONICAL": exp.Rand.from_arg_list, 394 "STR_TO_DATE": _build_str_to_date, 395 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 396 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 397 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 398 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 399 "TOMONDAY": _build_timestamp_trunc("WEEK"), 400 "UNIQ": exp.ApproxDistinct.from_arg_list, 401 "XOR": lambda args: exp.Xor(expressions=args), 402 "MD5": exp.MD5Digest.from_arg_list, 403 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 404 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 405 "SPLITBYCHAR": _build_split_by_char, 406 "SPLITBYREGEXP": _build_split(exp.RegexpSplit), 407 "SPLITBYSTRING": _build_split(exp.Split), 408 "SUBSTRINGINDEX": exp.SubstringIndex.from_arg_list, 409 "TOTYPENAME": exp.Typeof.from_arg_list, 410 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 411 "JAROWINKLERSIMILARITY": exp.JarowinklerSimilarity.from_arg_list, 412 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 413 "UTCTIMESTAMP": exp.UtcTimestamp.from_arg_list, 414 } 415 FUNCTIONS.pop("TRANSFORM") 416 FUNCTIONS.pop("APPROX_TOP_SUM") 417 418 AGG_FUNCTIONS = { 419 "count", 420 "min", 421 "max", 422 "sum", 423 "avg", 424 "any", 425 "stddevPop", 426 "stddevSamp", 427 "varPop", 428 "varSamp", 429 "corr", 430 "covarPop", 431 "covarSamp", 432 "entropy", 433 "exponentialMovingAverage", 434 "intervalLengthSum", 435 "kolmogorovSmirnovTest", 436 "mannWhitneyUTest", 437 "median", 438 "rankCorr", 439 "sumKahan", 440 "studentTTest", 441 "welchTTest", 442 "anyHeavy", 443 "anyLast", 444 "boundingRatio", 445 "first_value", 446 "last_value", 447 "argMin", 448 "argMax", 449 "avgWeighted", 450 "topK", 451 "approx_top_sum", 452 "topKWeighted", 453 "deltaSum", 454 "deltaSumTimestamp", 455 "groupArray", 456 "groupArrayLast", 457 "groupUniqArray", 458 "groupArrayInsertAt", 459 "groupArrayMovingAvg", 460 "groupArrayMovingSum", 461 "groupArraySample", 462 "groupBitAnd", 463 "groupBitOr", 464 "groupBitXor", 465 "groupBitmap", 466 "groupBitmapAnd", 467 "groupBitmapOr", 468 "groupBitmapXor", 469 "sumWithOverflow", 470 "sumMap", 471 "minMap", 472 "maxMap", 473 "skewSamp", 474 "skewPop", 475 "kurtSamp", 476 "kurtPop", 477 "uniq", 478 "uniqExact", 479 "uniqCombined", 480 "uniqCombined64", 481 "uniqHLL12", 482 "uniqTheta", 483 "quantile", 484 "quantiles", 485 "quantileExact", 486 "quantilesExact", 487 "quantilesExactExclusive", 488 "quantileExactLow", 489 "quantilesExactLow", 490 "quantileExactHigh", 491 "quantilesExactHigh", 492 "quantileExactWeighted", 493 "quantilesExactWeighted", 494 "quantileTiming", 495 "quantilesTiming", 496 "quantileTimingWeighted", 497 "quantilesTimingWeighted", 498 "quantileDeterministic", 499 "quantilesDeterministic", 500 "quantileTDigest", 501 "quantilesTDigest", 502 "quantileTDigestWeighted", 503 "quantilesTDigestWeighted", 504 "quantileBFloat16", 505 "quantilesBFloat16", 506 "quantileBFloat16Weighted", 507 "quantilesBFloat16Weighted", 508 "simpleLinearRegression", 509 "stochasticLinearRegression", 510 "stochasticLogisticRegression", 511 "categoricalInformationValue", 512 "contingency", 513 "cramersV", 514 "cramersVBiasCorrected", 515 "theilsU", 516 "maxIntersections", 517 "maxIntersectionsPosition", 518 "meanZTest", 519 "quantileInterpolatedWeighted", 520 "quantilesInterpolatedWeighted", 521 "quantileGK", 522 "quantilesGK", 523 "sparkBar", 524 "sumCount", 525 "largestTriangleThreeBuckets", 526 "histogram", 527 "sequenceMatch", 528 "sequenceCount", 529 "windowFunnel", 530 "retention", 531 "uniqUpTo", 532 "sequenceNextNode", 533 "exponentialTimeDecayedAvg", 534 } 535 536 AGG_FUNCTIONS_SUFFIXES = [ 537 "If", 538 "Array", 539 "ArrayIf", 540 "Map", 541 "SimpleState", 542 "State", 543 "Merge", 544 "MergeState", 545 "ForEach", 546 "Distinct", 547 "OrDefault", 548 "OrNull", 549 "Resample", 550 "ArgMin", 551 "ArgMax", 552 ] 553 554 FUNC_TOKENS = { 555 *parser.Parser.FUNC_TOKENS, 556 TokenType.AND, 557 TokenType.OR, 558 TokenType.SET, 559 } 560 561 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 562 563 ID_VAR_TOKENS = { 564 *parser.Parser.ID_VAR_TOKENS, 565 TokenType.LIKE, 566 } 567 568 AGG_FUNC_MAPPING = ( 569 lambda functions, suffixes: { 570 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 571 } 572 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 573 574 FUNCTION_PARSERS = { 575 **parser.Parser.FUNCTION_PARSERS, 576 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 577 "QUANTILE": lambda self: self._parse_quantile(), 578 "MEDIAN": lambda self: self._parse_quantile(), 579 "COLUMNS": lambda self: self._parse_columns(), 580 "TUPLE": lambda self: exp.Struct.from_arg_list(self._parse_function_args(alias=True)), 581 "AND": lambda self: exp.and_(*self._parse_function_args(alias=False)), 582 "OR": lambda self: exp.or_(*self._parse_function_args(alias=False)), 583 } 584 585 FUNCTION_PARSERS.pop("MATCH") 586 587 PROPERTY_PARSERS = { 588 **parser.Parser.PROPERTY_PARSERS, 589 "ENGINE": lambda self: self._parse_engine_property(), 590 } 591 PROPERTY_PARSERS.pop("DYNAMIC") 592 593 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 594 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 595 596 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 597 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 598 599 RANGE_PARSERS = { 600 **parser.Parser.RANGE_PARSERS, 601 TokenType.GLOBAL: lambda self, this: self._parse_global_in(this), 602 } 603 604 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 605 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 606 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 607 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 608 609 JOIN_KINDS = { 610 *parser.Parser.JOIN_KINDS, 611 TokenType.ANY, 612 TokenType.ASOF, 613 TokenType.ARRAY, 614 } 615 616 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 617 TokenType.ANY, 618 TokenType.ARRAY, 619 TokenType.FINAL, 620 TokenType.FORMAT, 621 TokenType.SETTINGS, 622 } 623 624 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 625 TokenType.FORMAT, 626 } 627 628 LOG_DEFAULTS_TO_LN = True 629 630 QUERY_MODIFIER_PARSERS = { 631 **parser.Parser.QUERY_MODIFIER_PARSERS, 632 TokenType.SETTINGS: lambda self: ( 633 "settings", 634 self._advance() or self._parse_csv(self._parse_assignment), 635 ), 636 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 637 } 638 639 CONSTRAINT_PARSERS = { 640 **parser.Parser.CONSTRAINT_PARSERS, 641 "INDEX": lambda self: self._parse_index_constraint(), 642 "CODEC": lambda self: self._parse_compress(), 643 } 644 645 ALTER_PARSERS = { 646 **parser.Parser.ALTER_PARSERS, 647 "REPLACE": lambda self: self._parse_alter_table_replace(), 648 } 649 650 SCHEMA_UNNAMED_CONSTRAINTS = { 651 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 652 "INDEX", 653 } 654 655 PLACEHOLDER_PARSERS = { 656 **parser.Parser.PLACEHOLDER_PARSERS, 657 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 658 } 659 660 def _parse_engine_property(self) -> exp.EngineProperty: 661 self._match(TokenType.EQ) 662 return self.expression( 663 exp.EngineProperty, 664 this=self._parse_field(any_token=True, anonymous_func=True), 665 ) 666 667 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 668 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 669 return self._parse_lambda() 670 671 def _parse_types( 672 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 673 ) -> t.Optional[exp.Expression]: 674 dtype = super()._parse_types( 675 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 676 ) 677 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 678 # Mark every type as non-nullable which is ClickHouse's default, unless it's 679 # already marked as nullable. This marker helps us transpile types from other 680 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 681 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 682 # fail in ClickHouse without the `Nullable` type constructor. 683 dtype.set("nullable", False) 684 685 return dtype 686 687 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 688 index = self._index 689 this = self._parse_bitwise() 690 if self._match(TokenType.FROM): 691 self._retreat(index) 692 return super()._parse_extract() 693 694 # We return Anonymous here because extract and regexpExtract have different semantics, 695 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 696 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 697 # 698 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 699 self._match(TokenType.COMMA) 700 return self.expression( 701 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 702 ) 703 704 def _parse_assignment(self) -> t.Optional[exp.Expression]: 705 this = super()._parse_assignment() 706 707 if self._match(TokenType.PLACEHOLDER): 708 return self.expression( 709 exp.If, 710 this=this, 711 true=self._parse_assignment(), 712 false=self._match(TokenType.COLON) and self._parse_assignment(), 713 ) 714 715 return this 716 717 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 718 """ 719 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 720 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 721 """ 722 index = self._index 723 724 this = self._parse_id_var() 725 self._match(TokenType.COLON) 726 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 727 self._match_text_seq("IDENTIFIER") and "Identifier" 728 ) 729 730 if not kind: 731 self._retreat(index) 732 return None 733 elif not self._match(TokenType.R_BRACE): 734 self.raise_error("Expecting }") 735 736 if isinstance(this, exp.Identifier) and not this.quoted: 737 this = exp.var(this.name) 738 739 return self.expression(exp.Placeholder, this=this, kind=kind) 740 741 def _parse_bracket( 742 self, this: t.Optional[exp.Expression] = None 743 ) -> t.Optional[exp.Expression]: 744 l_brace = self._match(TokenType.L_BRACE, advance=False) 745 bracket = super()._parse_bracket(this) 746 747 if l_brace and isinstance(bracket, exp.Struct): 748 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 749 for expression in bracket.expressions: 750 if not isinstance(expression, exp.PropertyEQ): 751 break 752 753 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 754 varmap.args["values"].append("expressions", expression.expression) 755 756 return varmap 757 758 return bracket 759 760 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 761 this = super()._parse_in(this) 762 this.set("is_global", is_global) 763 return this 764 765 def _parse_global_in(self, this: t.Optional[exp.Expression]) -> exp.Not | exp.In: 766 is_negated = self._match(TokenType.NOT) 767 in_expr = self._parse_in(this, is_global=True) if self._match(TokenType.IN) else None 768 return self.expression(exp.Not, this=in_expr) if is_negated else t.cast(exp.In, in_expr) 769 770 def _parse_table( 771 self, 772 schema: bool = False, 773 joins: bool = False, 774 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 775 parse_bracket: bool = False, 776 is_db_reference: bool = False, 777 parse_partition: bool = False, 778 consume_pipe: bool = False, 779 ) -> t.Optional[exp.Expression]: 780 this = super()._parse_table( 781 schema=schema, 782 joins=joins, 783 alias_tokens=alias_tokens, 784 parse_bracket=parse_bracket, 785 is_db_reference=is_db_reference, 786 ) 787 788 if isinstance(this, exp.Table): 789 inner = this.this 790 alias = this.args.get("alias") 791 792 if isinstance(inner, exp.GenerateSeries) and alias and not alias.columns: 793 alias.set("columns", [exp.to_identifier("generate_series")]) 794 795 if self._match(TokenType.FINAL): 796 this = self.expression(exp.Final, this=this) 797 798 return this 799 800 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 801 return super()._parse_position(haystack_first=True) 802 803 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 804 def _parse_cte(self) -> t.Optional[exp.CTE]: 805 # WITH <identifier> AS <subquery expression> 806 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 807 808 if not cte: 809 # WITH <expression> AS <identifier> 810 cte = self.expression( 811 exp.CTE, 812 this=self._parse_assignment(), 813 alias=self._parse_table_alias(), 814 scalar=True, 815 ) 816 817 return cte 818 819 def _parse_join_parts( 820 self, 821 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 822 is_global = self._prev if self._match(TokenType.GLOBAL) else None 823 kind_pre = self._prev if self._match_set(self.JOIN_KINDS, advance=False) else None 824 825 if kind_pre: 826 kind = self._prev if self._match_set(self.JOIN_KINDS) else None 827 side = self._prev if self._match_set(self.JOIN_SIDES) else None 828 return is_global, side, kind 829 830 return ( 831 is_global, 832 self._prev if self._match_set(self.JOIN_SIDES) else None, 833 self._prev if self._match_set(self.JOIN_KINDS) else None, 834 ) 835 836 def _parse_join( 837 self, skip_join_token: bool = False, parse_bracket: bool = False 838 ) -> t.Optional[exp.Join]: 839 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 840 if join: 841 method = join.args.get("method") 842 join.set("method", None) 843 join.set("global_", method) 844 845 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 846 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 847 if join.kind == "ARRAY": 848 for table in join.find_all(exp.Table): 849 table.replace(table.to_column()) 850 851 return join 852 853 def _parse_function( 854 self, 855 functions: t.Optional[t.Dict[str, t.Callable]] = None, 856 anonymous: bool = False, 857 optional_parens: bool = True, 858 any_token: bool = False, 859 ) -> t.Optional[exp.Expression]: 860 expr = super()._parse_function( 861 functions=functions, 862 anonymous=anonymous, 863 optional_parens=optional_parens, 864 any_token=any_token, 865 ) 866 867 func = expr.this if isinstance(expr, exp.Window) else expr 868 869 # Aggregate functions can be split in 2 parts: <func_name><suffix> 870 parts = ( 871 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 872 ) 873 874 if parts: 875 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 876 params = self._parse_func_params(anon_func) 877 878 kwargs = { 879 "this": anon_func.this, 880 "expressions": anon_func.expressions, 881 } 882 if parts[1]: 883 exp_class: t.Type[exp.Expression] = ( 884 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 885 ) 886 else: 887 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 888 889 kwargs["exp_class"] = exp_class 890 if params: 891 kwargs["params"] = params 892 893 func = self.expression(**kwargs) 894 895 if isinstance(expr, exp.Window): 896 # The window's func was parsed as Anonymous in base parser, fix its 897 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 898 expr.set("this", func) 899 elif params: 900 # Params have blocked super()._parse_function() from parsing the following window 901 # (if that exists) as they're standing between the function call and the window spec 902 expr = self._parse_window(func) 903 else: 904 expr = func 905 906 return expr 907 908 def _parse_func_params( 909 self, this: t.Optional[exp.Func] = None 910 ) -> t.Optional[t.List[exp.Expression]]: 911 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 912 return self._parse_csv(self._parse_lambda) 913 914 if self._match(TokenType.L_PAREN): 915 params = self._parse_csv(self._parse_lambda) 916 self._match_r_paren(this) 917 return params 918 919 return None 920 921 def _parse_quantile(self) -> exp.Quantile: 922 this = self._parse_lambda() 923 params = self._parse_func_params() 924 if params: 925 return self.expression(exp.Quantile, this=params[0], quantile=this) 926 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 927 928 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 929 return super()._parse_wrapped_id_vars(optional=True) 930 931 def _parse_primary_key( 932 self, 933 wrapped_optional: bool = False, 934 in_props: bool = False, 935 named_primary_key: bool = False, 936 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 937 return super()._parse_primary_key( 938 wrapped_optional=wrapped_optional or in_props, 939 in_props=in_props, 940 named_primary_key=named_primary_key, 941 ) 942 943 def _parse_on_property(self) -> t.Optional[exp.Expression]: 944 index = self._index 945 if self._match_text_seq("CLUSTER"): 946 this = self._parse_string() or self._parse_id_var() 947 if this: 948 return self.expression(exp.OnCluster, this=this) 949 else: 950 self._retreat(index) 951 return None 952 953 def _parse_index_constraint( 954 self, kind: t.Optional[str] = None 955 ) -> exp.IndexColumnConstraint: 956 # INDEX name1 expr TYPE type1(args) GRANULARITY value 957 this = self._parse_id_var() 958 expression = self._parse_assignment() 959 960 index_type = self._match_text_seq("TYPE") and ( 961 self._parse_function() or self._parse_var() 962 ) 963 964 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 965 966 return self.expression( 967 exp.IndexColumnConstraint, 968 this=this, 969 expression=expression, 970 index_type=index_type, 971 granularity=granularity, 972 ) 973 974 def _parse_partition(self) -> t.Optional[exp.Partition]: 975 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 976 if not self._match(TokenType.PARTITION): 977 return None 978 979 if self._match_text_seq("ID"): 980 # Corresponds to the PARTITION ID <string_value> syntax 981 expressions: t.List[exp.Expression] = [ 982 self.expression(exp.PartitionId, this=self._parse_string()) 983 ] 984 else: 985 expressions = self._parse_expressions() 986 987 return self.expression(exp.Partition, expressions=expressions) 988 989 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 990 partition = self._parse_partition() 991 992 if not partition or not self._match(TokenType.FROM): 993 return None 994 995 return self.expression( 996 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 997 ) 998 999 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 1000 if not self._match_text_seq("PROJECTION"): 1001 return None 1002 1003 return self.expression( 1004 exp.ProjectionDef, 1005 this=self._parse_id_var(), 1006 expression=self._parse_wrapped(self._parse_statement), 1007 ) 1008 1009 def _parse_constraint(self) -> t.Optional[exp.Expression]: 1010 return super()._parse_constraint() or self._parse_projection_def() 1011 1012 def _parse_alias( 1013 self, this: t.Optional[exp.Expression], explicit: bool = False 1014 ) -> t.Optional[exp.Expression]: 1015 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 1016 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 1017 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 1018 return this 1019 1020 return super()._parse_alias(this=this, explicit=explicit) 1021 1022 def _parse_expression(self) -> t.Optional[exp.Expression]: 1023 this = super()._parse_expression() 1024 1025 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 1026 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 1027 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 1028 self._match(TokenType.R_PAREN) 1029 1030 return this 1031 1032 def _parse_columns(self) -> exp.Expression: 1033 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 1034 1035 while self._next and self._match_text_seq(")", "APPLY", "("): 1036 self._match(TokenType.R_PAREN) 1037 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 1038 return this 1039 1040 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 1041 value = super()._parse_value(values=values) 1042 if not value: 1043 return None 1044 1045 # In Clickhouse "SELECT * FROM VALUES (1, 2, 3)" generates a table with a single column, in contrast 1046 # to other dialects. For this case, we canonicalize the values into a tuple-of-tuples AST if it's not already one. 1047 # In INSERT INTO statements the same clause actually references multiple columns (opposite semantics), 1048 # but the final result is not altered by the extra parentheses. 1049 # Note: Clickhouse allows VALUES([structure], value, ...) so the branch checks for the last expression 1050 expressions = value.expressions 1051 if values and not isinstance(expressions[-1], exp.Tuple): 1052 value.set( 1053 "expressions", 1054 [self.expression(exp.Tuple, expressions=[expr]) for expr in expressions], 1055 ) 1056 1057 return value 1058 1059 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1060 # ClickHouse allows custom expressions as partition key 1061 # https://clickhouse.com/docs/engines/table-engines/mergetree-family/custom-partitioning-key 1062 return self.expression( 1063 exp.PartitionedByProperty, 1064 this=self._parse_assignment(), 1065 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- DB_CREATABLES
- CREATABLES
- TRIGGER_EVENTS
- ALTERABLES
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_HINTS
- LAMBDAS
- CAST_COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- ALTER_ALTER_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_TOKENS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- TRIGGER_TIMING
- TRIGGER_DEFERRABLE
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- SET_ASSIGNMENT_DELIMITERS
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- ALTER_RENAME_REQUIRES_COLUMN
- ALTER_TABLE_PARTITIONS
- ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
- MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS
- JSON_EXTRACT_REQUIRES_JSON_EXPRESSION
- ADD_JOIN_ON_TRUE
- SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT
- raise_error
- validate_expression
- reset
- errors
- error_level
- error_message_context
- max_errors
- dialect
- sql
- parse
- parse_into
- check_errors
- expression
- parse_set_operation
- build_cast
1067 class Generator(generator.Generator): 1068 QUERY_HINTS = False 1069 STRUCT_DELIMITER = ("(", ")") 1070 NVL2_SUPPORTED = False 1071 TABLESAMPLE_REQUIRES_PARENS = False 1072 TABLESAMPLE_SIZE_IS_ROWS = False 1073 TABLESAMPLE_KEYWORDS = "SAMPLE" 1074 LAST_DAY_SUPPORTS_DATE_PART = False 1075 CAN_IMPLEMENT_ARRAY_ANY = True 1076 SUPPORTS_TO_NUMBER = False 1077 JOIN_HINTS = False 1078 TABLE_HINTS = False 1079 GROUPINGS_SEP = "" 1080 SET_OP_MODIFIERS = False 1081 ARRAY_SIZE_NAME = "LENGTH" 1082 WRAP_DERIVED_VALUES = False 1083 1084 STRING_TYPE_MAPPING = { 1085 exp.DataType.Type.BLOB: "String", 1086 exp.DataType.Type.CHAR: "String", 1087 exp.DataType.Type.LONGBLOB: "String", 1088 exp.DataType.Type.LONGTEXT: "String", 1089 exp.DataType.Type.MEDIUMBLOB: "String", 1090 exp.DataType.Type.MEDIUMTEXT: "String", 1091 exp.DataType.Type.TINYBLOB: "String", 1092 exp.DataType.Type.TINYTEXT: "String", 1093 exp.DataType.Type.TEXT: "String", 1094 exp.DataType.Type.VARBINARY: "String", 1095 exp.DataType.Type.VARCHAR: "String", 1096 } 1097 1098 SUPPORTED_JSON_PATH_PARTS = { 1099 exp.JSONPathKey, 1100 exp.JSONPathRoot, 1101 exp.JSONPathSubscript, 1102 } 1103 1104 TYPE_MAPPING = { 1105 **generator.Generator.TYPE_MAPPING, 1106 **STRING_TYPE_MAPPING, 1107 exp.DataType.Type.ARRAY: "Array", 1108 exp.DataType.Type.BOOLEAN: "Bool", 1109 exp.DataType.Type.BIGINT: "Int64", 1110 exp.DataType.Type.DATE32: "Date32", 1111 exp.DataType.Type.DATETIME: "DateTime", 1112 exp.DataType.Type.DATETIME2: "DateTime", 1113 exp.DataType.Type.SMALLDATETIME: "DateTime", 1114 exp.DataType.Type.DATETIME64: "DateTime64", 1115 exp.DataType.Type.DECIMAL: "Decimal", 1116 exp.DataType.Type.DECIMAL32: "Decimal32", 1117 exp.DataType.Type.DECIMAL64: "Decimal64", 1118 exp.DataType.Type.DECIMAL128: "Decimal128", 1119 exp.DataType.Type.DECIMAL256: "Decimal256", 1120 exp.DataType.Type.TIMESTAMP: "DateTime", 1121 exp.DataType.Type.TIMESTAMPNTZ: "DateTime", 1122 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 1123 exp.DataType.Type.DOUBLE: "Float64", 1124 exp.DataType.Type.ENUM: "Enum", 1125 exp.DataType.Type.ENUM8: "Enum8", 1126 exp.DataType.Type.ENUM16: "Enum16", 1127 exp.DataType.Type.FIXEDSTRING: "FixedString", 1128 exp.DataType.Type.FLOAT: "Float32", 1129 exp.DataType.Type.INT: "Int32", 1130 exp.DataType.Type.MEDIUMINT: "Int32", 1131 exp.DataType.Type.INT128: "Int128", 1132 exp.DataType.Type.INT256: "Int256", 1133 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 1134 exp.DataType.Type.MAP: "Map", 1135 exp.DataType.Type.NESTED: "Nested", 1136 exp.DataType.Type.NOTHING: "Nothing", 1137 exp.DataType.Type.SMALLINT: "Int16", 1138 exp.DataType.Type.STRUCT: "Tuple", 1139 exp.DataType.Type.TINYINT: "Int8", 1140 exp.DataType.Type.UBIGINT: "UInt64", 1141 exp.DataType.Type.UINT: "UInt32", 1142 exp.DataType.Type.UINT128: "UInt128", 1143 exp.DataType.Type.UINT256: "UInt256", 1144 exp.DataType.Type.USMALLINT: "UInt16", 1145 exp.DataType.Type.UTINYINT: "UInt8", 1146 exp.DataType.Type.IPV4: "IPv4", 1147 exp.DataType.Type.IPV6: "IPv6", 1148 exp.DataType.Type.POINT: "Point", 1149 exp.DataType.Type.RING: "Ring", 1150 exp.DataType.Type.LINESTRING: "LineString", 1151 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 1152 exp.DataType.Type.POLYGON: "Polygon", 1153 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 1154 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 1155 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 1156 exp.DataType.Type.DYNAMIC: "Dynamic", 1157 } 1158 1159 TRANSFORMS = { 1160 **generator.Generator.TRANSFORMS, 1161 exp.AnyValue: rename_func("any"), 1162 exp.ApproxDistinct: rename_func("uniq"), 1163 exp.ArrayConcat: rename_func("arrayConcat"), 1164 exp.ArrayContains: rename_func("has"), 1165 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 1166 exp.ArrayRemove: remove_from_array_using_filter, 1167 exp.ArrayReverse: rename_func("arrayReverse"), 1168 exp.ArraySlice: rename_func("arraySlice"), 1169 exp.ArraySum: rename_func("arraySum"), 1170 exp.ArrayMax: rename_func("arrayMax"), 1171 exp.ArrayMin: rename_func("arrayMin"), 1172 exp.ArgMax: arg_max_or_min_no_count("argMax"), 1173 exp.ArgMin: arg_max_or_min_no_count("argMin"), 1174 exp.Array: inline_array_sql, 1175 exp.CastToStrType: rename_func("CAST"), 1176 exp.CurrentDatabase: rename_func("CURRENT_DATABASE"), 1177 exp.CurrentSchemas: rename_func("CURRENT_SCHEMAS"), 1178 exp.CountIf: rename_func("countIf"), 1179 exp.CosineDistance: rename_func("cosineDistance"), 1180 exp.CompressColumnConstraint: lambda self, 1181 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 1182 exp.ComputedColumnConstraint: lambda self, 1183 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 1184 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 1185 exp.CurrentVersion: rename_func("VERSION"), 1186 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 1187 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 1188 exp.DateStrToDate: rename_func("toDate"), 1189 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 1190 exp.Explode: rename_func("arrayJoin"), 1191 exp.FarmFingerprint: rename_func("farmFingerprint64"), 1192 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 1193 exp.IsNan: rename_func("isNaN"), 1194 exp.JarowinklerSimilarity: jarowinkler_similarity("jaroWinklerSimilarity"), 1195 exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}", 1196 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 1197 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 1198 exp.JSONPathKey: json_path_key_only_name, 1199 exp.JSONPathRoot: lambda *_: "", 1200 exp.Length: length_or_char_length_sql, 1201 exp.Map: _map_sql, 1202 exp.Median: rename_func("median"), 1203 exp.Nullif: rename_func("nullIf"), 1204 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1205 exp.Pivot: no_pivot_sql, 1206 exp.Quantile: _quantile_sql, 1207 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 1208 exp.Rand: rename_func("randCanonical"), 1209 exp.StartsWith: rename_func("startsWith"), 1210 exp.Struct: rename_func("tuple"), 1211 exp.Trunc: rename_func("trunc"), 1212 exp.EndsWith: rename_func("endsWith"), 1213 exp.EuclideanDistance: rename_func("L2Distance"), 1214 exp.StrPosition: lambda self, e: strposition_sql( 1215 self, 1216 e, 1217 func_name="POSITION", 1218 supports_position=True, 1219 use_ansi_position=False, 1220 ), 1221 exp.TimeToStr: lambda self, e: self.func( 1222 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 1223 ), 1224 exp.TimeStrToTime: _timestrtotime_sql, 1225 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1226 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1227 exp.Typeof: rename_func("toTypeName"), 1228 exp.VarMap: _map_sql, 1229 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1230 exp.MD5Digest: rename_func("MD5"), 1231 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1232 exp.SHA: rename_func("SHA1"), 1233 exp.SHA1Digest: rename_func("SHA1"), 1234 exp.SHA2: sha256_sql, 1235 exp.SHA2Digest: sha2_digest_sql, 1236 exp.Split: lambda self, e: self.func( 1237 "splitByString", e.args.get("expression"), e.this, e.args.get("limit") 1238 ), 1239 exp.RegexpSplit: lambda self, e: self.func( 1240 "splitByRegexp", e.args.get("expression"), e.this, e.args.get("limit") 1241 ), 1242 exp.UnixToTime: _unix_to_time_sql, 1243 exp.TimestampTrunc: timestamptrunc_sql(func="dateTrunc", zone=True), 1244 exp.Trim: lambda self, e: trim_sql(self, e, default_trim_type="BOTH"), 1245 exp.Variance: rename_func("varSamp"), 1246 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1247 exp.Stddev: rename_func("stddevSamp"), 1248 exp.Chr: rename_func("CHAR"), 1249 exp.Lag: lambda self, e: self.func( 1250 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1251 ), 1252 exp.Lead: lambda self, e: self.func( 1253 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1254 ), 1255 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1256 rename_func("editDistance") 1257 ), 1258 exp.ParseDatetime: rename_func("parseDateTime"), 1259 } 1260 1261 PROPERTIES_LOCATION = { 1262 **generator.Generator.PROPERTIES_LOCATION, 1263 exp.OnCluster: exp.Properties.Location.POST_NAME, 1264 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1265 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1266 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1267 } 1268 1269 # There's no list in docs, but it can be found in Clickhouse code 1270 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1271 ON_CLUSTER_TARGETS = { 1272 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1273 "DATABASE", 1274 "TABLE", 1275 "VIEW", 1276 "DICTIONARY", 1277 "INDEX", 1278 "FUNCTION", 1279 "NAMED COLLECTION", 1280 } 1281 1282 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1283 NON_NULLABLE_TYPES = { 1284 exp.DataType.Type.ARRAY, 1285 exp.DataType.Type.MAP, 1286 exp.DataType.Type.STRUCT, 1287 exp.DataType.Type.POINT, 1288 exp.DataType.Type.RING, 1289 exp.DataType.Type.LINESTRING, 1290 exp.DataType.Type.MULTILINESTRING, 1291 exp.DataType.Type.POLYGON, 1292 exp.DataType.Type.MULTIPOLYGON, 1293 } 1294 1295 def offset_sql(self, expression: exp.Offset) -> str: 1296 offset = super().offset_sql(expression) 1297 1298 # OFFSET ... FETCH syntax requires a "ROW" or "ROWS" keyword 1299 # https://clickhouse.com/docs/sql-reference/statements/select/offset 1300 parent = expression.parent 1301 if isinstance(parent, exp.Select) and isinstance(parent.args.get("limit"), exp.Fetch): 1302 offset = f"{offset} ROWS" 1303 1304 return offset 1305 1306 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1307 strtodate_sql = self.function_fallback_sql(expression) 1308 1309 if not isinstance(expression.parent, exp.Cast): 1310 # StrToDate returns DATEs in other dialects (eg. postgres), so 1311 # this branch aims to improve the transpilation to clickhouse 1312 return self.cast_sql(exp.cast(expression, "DATE")) 1313 1314 return strtodate_sql 1315 1316 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1317 this = expression.this 1318 1319 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1320 return self.sql(this) 1321 1322 return super().cast_sql(expression, safe_prefix=safe_prefix) 1323 1324 def trycast_sql(self, expression: exp.TryCast) -> str: 1325 dtype = expression.to 1326 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1327 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1328 dtype.set("nullable", True) 1329 1330 return super().cast_sql(expression) 1331 1332 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1333 this = self.json_path_part(expression.this) 1334 return str(int(this) + 1) if is_int(this) else this 1335 1336 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1337 return f"AS {self.sql(expression, 'this')}" 1338 1339 def _any_to_has( 1340 self, 1341 expression: exp.EQ | exp.NEQ, 1342 default: t.Callable[[t.Any], str], 1343 prefix: str = "", 1344 ) -> str: 1345 if isinstance(expression.left, exp.Any): 1346 arr = expression.left 1347 this = expression.right 1348 elif isinstance(expression.right, exp.Any): 1349 arr = expression.right 1350 this = expression.left 1351 else: 1352 return default(expression) 1353 1354 return prefix + self.func("has", arr.this.unnest(), this) 1355 1356 def eq_sql(self, expression: exp.EQ) -> str: 1357 return self._any_to_has(expression, super().eq_sql) 1358 1359 def neq_sql(self, expression: exp.NEQ) -> str: 1360 return self._any_to_has(expression, super().neq_sql, "NOT ") 1361 1362 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1363 # Manually add a flag to make the search case-insensitive 1364 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1365 return self.func("match", expression.this, regex) 1366 1367 def datatype_sql(self, expression: exp.DataType) -> str: 1368 # String is the standard ClickHouse type, every other variant is just an alias. 1369 # Additionally, any supplied length parameter will be ignored. 1370 # 1371 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1372 if expression.this in self.STRING_TYPE_MAPPING: 1373 dtype = "String" 1374 else: 1375 dtype = super().datatype_sql(expression) 1376 1377 # This section changes the type to `Nullable(...)` if the following conditions hold: 1378 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1379 # and change their semantics 1380 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1381 # constraint: "Type of Map key must be a type, that can be represented by integer or 1382 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1383 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1384 parent = expression.parent 1385 nullable = expression.args.get("nullable") 1386 if nullable is True or ( 1387 nullable is None 1388 and not ( 1389 isinstance(parent, exp.DataType) 1390 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1391 and expression.index in (None, 0) 1392 ) 1393 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1394 ): 1395 dtype = f"Nullable({dtype})" 1396 1397 return dtype 1398 1399 def cte_sql(self, expression: exp.CTE) -> str: 1400 if expression.args.get("scalar"): 1401 this = self.sql(expression, "this") 1402 alias = self.sql(expression, "alias") 1403 return f"{this} AS {alias}" 1404 1405 return super().cte_sql(expression) 1406 1407 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1408 return super().after_limit_modifiers(expression) + [ 1409 ( 1410 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1411 if expression.args.get("settings") 1412 else "" 1413 ), 1414 ( 1415 self.seg("FORMAT ") + self.sql(expression, "format") 1416 if expression.args.get("format") 1417 else "" 1418 ), 1419 ] 1420 1421 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1422 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1423 1424 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1425 return f"ON CLUSTER {self.sql(expression, 'this')}" 1426 1427 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1428 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1429 exp.Properties.Location.POST_NAME 1430 ): 1431 this_name = self.sql( 1432 expression.this if isinstance(expression.this, exp.Schema) else expression, 1433 "this", 1434 ) 1435 this_properties = " ".join( 1436 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1437 ) 1438 this_schema = self.schema_columns_sql(expression.this) 1439 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1440 1441 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1442 1443 return super().createable_sql(expression, locations) 1444 1445 def create_sql(self, expression: exp.Create) -> str: 1446 # The comment property comes last in CTAS statements, i.e. after the query 1447 query = expression.expression 1448 if isinstance(query, exp.Query): 1449 comment_prop = expression.find(exp.SchemaCommentProperty) 1450 if comment_prop: 1451 comment_prop.pop() 1452 query.replace(exp.paren(query)) 1453 else: 1454 comment_prop = None 1455 1456 create_sql = super().create_sql(expression) 1457 1458 comment_sql = self.sql(comment_prop) 1459 comment_sql = f" {comment_sql}" if comment_sql else "" 1460 1461 return f"{create_sql}{comment_sql}" 1462 1463 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1464 this = self.indent(self.sql(expression, "this")) 1465 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1466 1467 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1468 this = self.sql(expression, "this") 1469 this = f" {this}" if this else "" 1470 expr = self.sql(expression, "expression") 1471 expr = f" {expr}" if expr else "" 1472 index_type = self.sql(expression, "index_type") 1473 index_type = f" TYPE {index_type}" if index_type else "" 1474 granularity = self.sql(expression, "granularity") 1475 granularity = f" GRANULARITY {granularity}" if granularity else "" 1476 1477 return f"INDEX{this}{expr}{index_type}{granularity}" 1478 1479 def partition_sql(self, expression: exp.Partition) -> str: 1480 return f"PARTITION {self.expressions(expression, flat=True)}" 1481 1482 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1483 return f"ID {self.sql(expression.this)}" 1484 1485 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1486 return ( 1487 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1488 ) 1489 1490 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1491 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1492 1493 def is_sql(self, expression: exp.Is) -> str: 1494 is_sql = super().is_sql(expression) 1495 1496 if isinstance(expression.parent, exp.Not): 1497 # value IS NOT NULL -> NOT (value IS NULL) 1498 is_sql = self.wrap(is_sql) 1499 1500 return is_sql 1501 1502 def in_sql(self, expression: exp.In) -> str: 1503 in_sql = super().in_sql(expression) 1504 1505 if isinstance(expression.parent, exp.Not) and expression.args.get("is_global"): 1506 in_sql = in_sql.replace("GLOBAL IN", "GLOBAL NOT IN", 1) 1507 1508 return in_sql 1509 1510 def not_sql(self, expression: exp.Not) -> str: 1511 if isinstance(expression.this, exp.In): 1512 if expression.this.args.get("is_global"): 1513 # let `GLOBAL IN` child interpose `NOT` 1514 return self.sql(expression, "this") 1515 1516 expression.set("this", exp.paren(expression.this, copy=False)) 1517 1518 return super().not_sql(expression) 1519 1520 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1521 # If the VALUES clause contains tuples of expressions, we need to treat it 1522 # as a table since Clickhouse will automatically alias it as such. 1523 alias = expression.args.get("alias") 1524 1525 if alias and alias.args.get("columns") and expression.expressions: 1526 values = expression.expressions[0].expressions 1527 values_as_table = any(isinstance(value, exp.Tuple) for value in values) 1528 else: 1529 values_as_table = True 1530 1531 return super().values_sql(expression, values_as_table=values_as_table)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True: Always quote except for specials cases. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHEREclause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1295 def offset_sql(self, expression: exp.Offset) -> str: 1296 offset = super().offset_sql(expression) 1297 1298 # OFFSET ... FETCH syntax requires a "ROW" or "ROWS" keyword 1299 # https://clickhouse.com/docs/sql-reference/statements/select/offset 1300 parent = expression.parent 1301 if isinstance(parent, exp.Select) and isinstance(parent.args.get("limit"), exp.Fetch): 1302 offset = f"{offset} ROWS" 1303 1304 return offset
1306 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1307 strtodate_sql = self.function_fallback_sql(expression) 1308 1309 if not isinstance(expression.parent, exp.Cast): 1310 # StrToDate returns DATEs in other dialects (eg. postgres), so 1311 # this branch aims to improve the transpilation to clickhouse 1312 return self.cast_sql(exp.cast(expression, "DATE")) 1313 1314 return strtodate_sql
1316 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1317 this = expression.this 1318 1319 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1320 return self.sql(this) 1321 1322 return super().cast_sql(expression, safe_prefix=safe_prefix)
1324 def trycast_sql(self, expression: exp.TryCast) -> str: 1325 dtype = expression.to 1326 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1327 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1328 dtype.set("nullable", True) 1329 1330 return super().cast_sql(expression)
1367 def datatype_sql(self, expression: exp.DataType) -> str: 1368 # String is the standard ClickHouse type, every other variant is just an alias. 1369 # Additionally, any supplied length parameter will be ignored. 1370 # 1371 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1372 if expression.this in self.STRING_TYPE_MAPPING: 1373 dtype = "String" 1374 else: 1375 dtype = super().datatype_sql(expression) 1376 1377 # This section changes the type to `Nullable(...)` if the following conditions hold: 1378 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1379 # and change their semantics 1380 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1381 # constraint: "Type of Map key must be a type, that can be represented by integer or 1382 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1383 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1384 parent = expression.parent 1385 nullable = expression.args.get("nullable") 1386 if nullable is True or ( 1387 nullable is None 1388 and not ( 1389 isinstance(parent, exp.DataType) 1390 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1391 and expression.index in (None, 0) 1392 ) 1393 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1394 ): 1395 dtype = f"Nullable({dtype})" 1396 1397 return dtype
1407 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1408 return super().after_limit_modifiers(expression) + [ 1409 ( 1410 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1411 if expression.args.get("settings") 1412 else "" 1413 ), 1414 ( 1415 self.seg("FORMAT ") + self.sql(expression, "format") 1416 if expression.args.get("format") 1417 else "" 1418 ), 1419 ]
1427 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1428 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1429 exp.Properties.Location.POST_NAME 1430 ): 1431 this_name = self.sql( 1432 expression.this if isinstance(expression.this, exp.Schema) else expression, 1433 "this", 1434 ) 1435 this_properties = " ".join( 1436 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1437 ) 1438 this_schema = self.schema_columns_sql(expression.this) 1439 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1440 1441 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1442 1443 return super().createable_sql(expression, locations)
1445 def create_sql(self, expression: exp.Create) -> str: 1446 # The comment property comes last in CTAS statements, i.e. after the query 1447 query = expression.expression 1448 if isinstance(query, exp.Query): 1449 comment_prop = expression.find(exp.SchemaCommentProperty) 1450 if comment_prop: 1451 comment_prop.pop() 1452 query.replace(exp.paren(query)) 1453 else: 1454 comment_prop = None 1455 1456 create_sql = super().create_sql(expression) 1457 1458 comment_sql = self.sql(comment_prop) 1459 comment_sql = f" {comment_sql}" if comment_sql else "" 1460 1461 return f"{create_sql}{comment_sql}"
1467 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1468 this = self.sql(expression, "this") 1469 this = f" {this}" if this else "" 1470 expr = self.sql(expression, "expression") 1471 expr = f" {expr}" if expr else "" 1472 index_type = self.sql(expression, "index_type") 1473 index_type = f" TYPE {index_type}" if index_type else "" 1474 granularity = self.sql(expression, "granularity") 1475 granularity = f" GRANULARITY {granularity}" if granularity else "" 1476 1477 return f"INDEX{this}{expr}{index_type}{granularity}"
1510 def not_sql(self, expression: exp.Not) -> str: 1511 if isinstance(expression.this, exp.In): 1512 if expression.this.args.get("is_global"): 1513 # let `GLOBAL IN` child interpose `NOT` 1514 return self.sql(expression, "this") 1515 1516 expression.set("this", exp.paren(expression.this, copy=False)) 1517 1518 return super().not_sql(expression)
1520 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1521 # If the VALUES clause contains tuples of expressions, we need to treat it 1522 # as a table since Clickhouse will automatically alias it as such. 1523 alias = expression.args.get("alias") 1524 1525 if alias and alias.args.get("columns") and expression.expressions: 1526 values = expression.expressions[0].expressions 1527 values_as_table = any(isinstance(value, exp.Tuple) for value in values) 1528 else: 1529 values_as_table = True 1530 1531 return super().values_sql(expression, values_as_table=values_as_table)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- INDEX_ON
- INOUT_SEPARATOR
- DIRECTED_JOINS
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SUPPORTS_WINDOW_EXCLUDE
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- UNICODE_SUBSTITUTE
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- NORMALIZE_EXTRACT_DATE_PARTS
- PARSE_JSON_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- SUPPORTS_BETWEEN_FLAGS
- SUPPORTS_LIKE_QUANTIFIERS
- MATCH_AGAINST_TABLE_PREFIX
- SET_ASSIGNMENT_REQUIRES_VARIABLE_KEYWORD
- DECLARE_DEFAULT_ASSIGNMENT
- UPDATE_STATEMENT_SUPPORTS_FROM
- STAR_EXCLUDE_REQUIRES_DERIVED_TABLE
- UNSUPPORTED_TYPES
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS
- SAFE_JSON_PATH_KEY_RE
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- pseudocolumn_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- inoutcolumnconstraint_sql
- sequenceproperties_sql
- triggerproperties_sql
- triggerreferencing_sql
- triggerevent_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- rollupindex_sql
- rollupproperty_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- setitem_sql
- set_sql
- queryband_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- booland_sql
- boolor_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- formatphrase_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- strtotime_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- altersession_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- like_sql
- ilike_sql
- match_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- nullsafeeq_sql
- nullsafeneq_sql
- sub_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- generateembedding_sql
- mltranslate_sql
- mlforecast_sql
- featuresattime_sql
- vectorsearch_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- slice_sql
- apply_sql
- grant_sql
- revoke_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- install_sql
- get_put_sql
- translatecharacters_sql
- decodecase_sql
- semanticview_sql
- getextract_sql
- datefromunixdate_sql
- space_sql
- buildproperty_sql
- refreshtriggerproperty_sql
- modelattribute_sql
- directorystage_sql
- uuid_sql
- initcap_sql
- localtime_sql
- localtimestamp_sql
- weekstart_sql
- chr_sql
- block_sql
- storedprocedure_sql
- ifblock_sql
- whileblock_sql
- execute_sql
- executesql_sql