sqlglot.dialects.clickhouse
1from __future__ import annotations 2import typing as t 3import datetime 4from sqlglot import exp, generator, parser, tokens 5from sqlglot.dialects.dialect import ( 6 Dialect, 7 NormalizationStrategy, 8 arg_max_or_min_no_count, 9 build_date_delta, 10 build_formatted_time, 11 inline_array_sql, 12 json_extract_segments, 13 json_path_key_only_name, 14 length_or_char_length_sql, 15 no_pivot_sql, 16 build_json_extract_path, 17 rename_func, 18 sha256_sql, 19 strposition_sql, 20 var_map_sql, 21 timestamptrunc_sql, 22 unit_to_var, 23 trim_sql, 24) 25from sqlglot.generator import Generator 26from sqlglot.helper import is_int, seq_get 27from sqlglot.tokens import Token, TokenType 28from sqlglot.generator import unsupported_args 29 30DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd] 31 32 33def _build_date_format(args: t.List) -> exp.TimeToStr: 34 expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args) 35 36 timezone = seq_get(args, 2) 37 if timezone: 38 expr.set("zone", timezone) 39 40 return expr 41 42 43def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str: 44 scale = expression.args.get("scale") 45 timestamp = expression.this 46 47 if scale in (None, exp.UnixToTime.SECONDS): 48 return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 49 if scale == exp.UnixToTime.MILLIS: 50 return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 51 if scale == exp.UnixToTime.MICROS: 52 return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 53 if scale == exp.UnixToTime.NANOS: 54 return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 55 56 return self.func( 57 "fromUnixTimestamp", 58 exp.cast( 59 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 60 ), 61 ) 62 63 64def _lower_func(sql: str) -> str: 65 index = sql.index("(") 66 return sql[:index].lower() + sql[index:] 67 68 69def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str: 70 quantile = expression.args["quantile"] 71 args = f"({self.sql(expression, 'this')})" 72 73 if isinstance(quantile, exp.Array): 74 func = self.func("quantiles", *quantile) 75 else: 76 func = self.func("quantile", quantile) 77 78 return func + args 79 80 81def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: 82 if len(args) == 1: 83 return exp.CountIf(this=seq_get(args, 0)) 84 85 return exp.CombinedAggFunc(this="countIf", expressions=args) 86 87 88def _build_str_to_date(args: t.List) -> exp.Cast | exp.Anonymous: 89 if len(args) == 3: 90 return exp.Anonymous(this="STR_TO_DATE", expressions=args) 91 92 strtodate = exp.StrToDate.from_arg_list(args) 93 return exp.cast(strtodate, exp.DataType.build(exp.DataType.Type.DATETIME)) 94 95 96def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]: 97 def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str: 98 if not expression.unit: 99 return rename_func(name)(self, expression) 100 101 return self.func( 102 name, 103 unit_to_var(expression), 104 expression.expression, 105 expression.this, 106 ) 107 108 return _delta_sql 109 110 111def _timestrtotime_sql(self: ClickHouse.Generator, expression: exp.TimeStrToTime): 112 ts = expression.this 113 114 tz = expression.args.get("zone") 115 if tz and isinstance(ts, exp.Literal): 116 # Clickhouse will not accept timestamps that include a UTC offset, so we must remove them. 117 # The first step to removing is parsing the string with `datetime.datetime.fromisoformat`. 118 # 119 # In python <3.11, `fromisoformat()` can only parse timestamps of millisecond (3 digit) 120 # or microsecond (6 digit) precision. It will error if passed any other number of fractional 121 # digits, so we extract the fractional seconds and pad to 6 digits before parsing. 122 ts_string = ts.name.strip() 123 124 # separate [date and time] from [fractional seconds and UTC offset] 125 ts_parts = ts_string.split(".") 126 if len(ts_parts) == 2: 127 # separate fractional seconds and UTC offset 128 offset_sep = "+" if "+" in ts_parts[1] else "-" 129 ts_frac_parts = ts_parts[1].split(offset_sep) 130 num_frac_parts = len(ts_frac_parts) 131 132 # pad to 6 digits if fractional seconds present 133 ts_frac_parts[0] = ts_frac_parts[0].ljust(6, "0") 134 ts_string = "".join( 135 [ 136 ts_parts[0], # date and time 137 ".", 138 ts_frac_parts[0], # fractional seconds 139 offset_sep if num_frac_parts > 1 else "", 140 ts_frac_parts[1] if num_frac_parts > 1 else "", # utc offset (if present) 141 ] 142 ) 143 144 # return literal with no timezone, eg turn '2020-01-01 12:13:14-08:00' into '2020-01-01 12:13:14' 145 # this is because Clickhouse encodes the timezone as a data type parameter and throws an error if 146 # it's part of the timestamp string 147 ts_without_tz = ( 148 datetime.datetime.fromisoformat(ts_string).replace(tzinfo=None).isoformat(sep=" ") 149 ) 150 ts = exp.Literal.string(ts_without_tz) 151 152 # Non-nullable DateTime64 with microsecond precision 153 expressions = [exp.DataTypeParam(this=tz)] if tz else [] 154 datatype = exp.DataType.build( 155 exp.DataType.Type.DATETIME64, 156 expressions=[exp.DataTypeParam(this=exp.Literal.number(6)), *expressions], 157 nullable=False, 158 ) 159 160 return self.sql(exp.cast(ts, datatype, dialect=self.dialect)) 161 162 163def _map_sql(self: ClickHouse.Generator, expression: exp.Map | exp.VarMap) -> str: 164 if not (expression.parent and expression.parent.arg_key == "settings"): 165 return _lower_func(var_map_sql(self, expression)) 166 167 keys = expression.args.get("keys") 168 values = expression.args.get("values") 169 170 if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array): 171 self.unsupported("Cannot convert array columns into map.") 172 return "" 173 174 args = [] 175 for key, value in zip(keys.expressions, values.expressions): 176 args.append(f"{self.sql(key)}: {self.sql(value)}") 177 178 csv_args = ", ".join(args) 179 180 return f"{{{csv_args}}}" 181 182 183class ClickHouse(Dialect): 184 NORMALIZE_FUNCTIONS: bool | str = False 185 NULL_ORDERING = "nulls_are_last" 186 SUPPORTS_USER_DEFINED_TYPES = False 187 SAFE_DIVISION = True 188 LOG_BASE_FIRST: t.Optional[bool] = None 189 FORCE_EARLY_ALIAS_REF_EXPANSION = True 190 PRESERVE_ORIGINAL_NAMES = True 191 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 192 IDENTIFIERS_CAN_START_WITH_DIGIT = True 193 HEX_STRING_IS_INTEGER_TYPE = True 194 195 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 196 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 197 198 UNESCAPED_SEQUENCES = { 199 "\\0": "\0", 200 } 201 202 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 203 204 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 205 exp.Except: False, 206 exp.Intersect: False, 207 exp.Union: None, 208 } 209 210 class Tokenizer(tokens.Tokenizer): 211 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 212 IDENTIFIERS = ['"', "`"] 213 IDENTIFIER_ESCAPES = ["\\"] 214 STRING_ESCAPES = ["'", "\\"] 215 BIT_STRINGS = [("0b", "")] 216 HEX_STRINGS = [("0x", ""), ("0X", "")] 217 HEREDOC_STRINGS = ["$"] 218 219 KEYWORDS = { 220 **tokens.Tokenizer.KEYWORDS, 221 ".:": TokenType.DOTCOLON, 222 "ATTACH": TokenType.COMMAND, 223 "DATE32": TokenType.DATE32, 224 "DATETIME64": TokenType.DATETIME64, 225 "DICTIONARY": TokenType.DICTIONARY, 226 "DYNAMIC": TokenType.DYNAMIC, 227 "ENUM8": TokenType.ENUM8, 228 "ENUM16": TokenType.ENUM16, 229 "FINAL": TokenType.FINAL, 230 "FIXEDSTRING": TokenType.FIXEDSTRING, 231 "FLOAT32": TokenType.FLOAT, 232 "FLOAT64": TokenType.DOUBLE, 233 "GLOBAL": TokenType.GLOBAL, 234 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 235 "MAP": TokenType.MAP, 236 "NESTED": TokenType.NESTED, 237 "SAMPLE": TokenType.TABLE_SAMPLE, 238 "TUPLE": TokenType.STRUCT, 239 "UINT16": TokenType.USMALLINT, 240 "UINT32": TokenType.UINT, 241 "UINT64": TokenType.UBIGINT, 242 "UINT8": TokenType.UTINYINT, 243 "IPV4": TokenType.IPV4, 244 "IPV6": TokenType.IPV6, 245 "POINT": TokenType.POINT, 246 "RING": TokenType.RING, 247 "LINESTRING": TokenType.LINESTRING, 248 "MULTILINESTRING": TokenType.MULTILINESTRING, 249 "POLYGON": TokenType.POLYGON, 250 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 251 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 252 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 253 "SYSTEM": TokenType.COMMAND, 254 "PREWHERE": TokenType.PREWHERE, 255 } 256 KEYWORDS.pop("/*+") 257 258 SINGLE_TOKENS = { 259 **tokens.Tokenizer.SINGLE_TOKENS, 260 "$": TokenType.HEREDOC_STRING, 261 } 262 263 class Parser(parser.Parser): 264 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 265 # * select x from t1 union all select x from t2 limit 1; 266 # * select x from t1 union all (select x from t2 limit 1); 267 MODIFIERS_ATTACHED_TO_SET_OP = False 268 INTERVAL_SPANS = False 269 OPTIONAL_ALIAS_TOKEN_CTE = False 270 271 FUNCTIONS = { 272 **parser.Parser.FUNCTIONS, 273 "ANY": exp.AnyValue.from_arg_list, 274 "ARRAYSUM": exp.ArraySum.from_arg_list, 275 "COUNTIF": _build_count_if, 276 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 277 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 278 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 279 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 280 "DATE_FORMAT": _build_date_format, 281 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 282 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 283 "FORMATDATETIME": _build_date_format, 284 "JSONEXTRACTSTRING": build_json_extract_path( 285 exp.JSONExtractScalar, zero_based_indexing=False 286 ), 287 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 288 "MAP": parser.build_var_map, 289 "MATCH": exp.RegexpLike.from_arg_list, 290 "RANDCANONICAL": exp.Rand.from_arg_list, 291 "STR_TO_DATE": _build_str_to_date, 292 "TUPLE": exp.Struct.from_arg_list, 293 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 294 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 295 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 296 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 297 "UNIQ": exp.ApproxDistinct.from_arg_list, 298 "XOR": lambda args: exp.Xor(expressions=args), 299 "MD5": exp.MD5Digest.from_arg_list, 300 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 301 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 302 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 303 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 304 } 305 FUNCTIONS.pop("TRANSFORM") 306 307 AGG_FUNCTIONS = { 308 "count", 309 "min", 310 "max", 311 "sum", 312 "avg", 313 "any", 314 "stddevPop", 315 "stddevSamp", 316 "varPop", 317 "varSamp", 318 "corr", 319 "covarPop", 320 "covarSamp", 321 "entropy", 322 "exponentialMovingAverage", 323 "intervalLengthSum", 324 "kolmogorovSmirnovTest", 325 "mannWhitneyUTest", 326 "median", 327 "rankCorr", 328 "sumKahan", 329 "studentTTest", 330 "welchTTest", 331 "anyHeavy", 332 "anyLast", 333 "boundingRatio", 334 "first_value", 335 "last_value", 336 "argMin", 337 "argMax", 338 "avgWeighted", 339 "topK", 340 "topKWeighted", 341 "deltaSum", 342 "deltaSumTimestamp", 343 "groupArray", 344 "groupArrayLast", 345 "groupUniqArray", 346 "groupArrayInsertAt", 347 "groupArrayMovingAvg", 348 "groupArrayMovingSum", 349 "groupArraySample", 350 "groupBitAnd", 351 "groupBitOr", 352 "groupBitXor", 353 "groupBitmap", 354 "groupBitmapAnd", 355 "groupBitmapOr", 356 "groupBitmapXor", 357 "sumWithOverflow", 358 "sumMap", 359 "minMap", 360 "maxMap", 361 "skewSamp", 362 "skewPop", 363 "kurtSamp", 364 "kurtPop", 365 "uniq", 366 "uniqExact", 367 "uniqCombined", 368 "uniqCombined64", 369 "uniqHLL12", 370 "uniqTheta", 371 "quantile", 372 "quantiles", 373 "quantileExact", 374 "quantilesExact", 375 "quantileExactLow", 376 "quantilesExactLow", 377 "quantileExactHigh", 378 "quantilesExactHigh", 379 "quantileExactWeighted", 380 "quantilesExactWeighted", 381 "quantileTiming", 382 "quantilesTiming", 383 "quantileTimingWeighted", 384 "quantilesTimingWeighted", 385 "quantileDeterministic", 386 "quantilesDeterministic", 387 "quantileTDigest", 388 "quantilesTDigest", 389 "quantileTDigestWeighted", 390 "quantilesTDigestWeighted", 391 "quantileBFloat16", 392 "quantilesBFloat16", 393 "quantileBFloat16Weighted", 394 "quantilesBFloat16Weighted", 395 "simpleLinearRegression", 396 "stochasticLinearRegression", 397 "stochasticLogisticRegression", 398 "categoricalInformationValue", 399 "contingency", 400 "cramersV", 401 "cramersVBiasCorrected", 402 "theilsU", 403 "maxIntersections", 404 "maxIntersectionsPosition", 405 "meanZTest", 406 "quantileInterpolatedWeighted", 407 "quantilesInterpolatedWeighted", 408 "quantileGK", 409 "quantilesGK", 410 "sparkBar", 411 "sumCount", 412 "largestTriangleThreeBuckets", 413 "histogram", 414 "sequenceMatch", 415 "sequenceCount", 416 "windowFunnel", 417 "retention", 418 "uniqUpTo", 419 "sequenceNextNode", 420 "exponentialTimeDecayedAvg", 421 } 422 423 AGG_FUNCTIONS_SUFFIXES = [ 424 "If", 425 "Array", 426 "ArrayIf", 427 "Map", 428 "SimpleState", 429 "State", 430 "Merge", 431 "MergeState", 432 "ForEach", 433 "Distinct", 434 "OrDefault", 435 "OrNull", 436 "Resample", 437 "ArgMin", 438 "ArgMax", 439 ] 440 441 FUNC_TOKENS = { 442 *parser.Parser.FUNC_TOKENS, 443 TokenType.AND, 444 TokenType.OR, 445 TokenType.SET, 446 } 447 448 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 449 450 ID_VAR_TOKENS = { 451 *parser.Parser.ID_VAR_TOKENS, 452 TokenType.LIKE, 453 } 454 455 AGG_FUNC_MAPPING = ( 456 lambda functions, suffixes: { 457 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 458 } 459 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 460 461 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 462 463 FUNCTION_PARSERS = { 464 **parser.Parser.FUNCTION_PARSERS, 465 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 466 "QUANTILE": lambda self: self._parse_quantile(), 467 "MEDIAN": lambda self: self._parse_quantile(), 468 "COLUMNS": lambda self: self._parse_columns(), 469 } 470 471 FUNCTION_PARSERS.pop("MATCH") 472 473 PROPERTY_PARSERS = parser.Parser.PROPERTY_PARSERS.copy() 474 PROPERTY_PARSERS.pop("DYNAMIC") 475 476 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 477 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 478 479 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 480 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 481 482 RANGE_PARSERS = { 483 **parser.Parser.RANGE_PARSERS, 484 TokenType.GLOBAL: lambda self, this: self._parse_global_in(this), 485 } 486 487 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 488 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 489 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 490 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 491 492 JOIN_KINDS = { 493 *parser.Parser.JOIN_KINDS, 494 TokenType.ANY, 495 TokenType.ASOF, 496 TokenType.ARRAY, 497 } 498 499 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 500 TokenType.ANY, 501 TokenType.ARRAY, 502 TokenType.FINAL, 503 TokenType.FORMAT, 504 TokenType.SETTINGS, 505 } 506 507 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 508 TokenType.FORMAT, 509 } 510 511 LOG_DEFAULTS_TO_LN = True 512 513 QUERY_MODIFIER_PARSERS = { 514 **parser.Parser.QUERY_MODIFIER_PARSERS, 515 TokenType.SETTINGS: lambda self: ( 516 "settings", 517 self._advance() or self._parse_csv(self._parse_assignment), 518 ), 519 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 520 } 521 522 CONSTRAINT_PARSERS = { 523 **parser.Parser.CONSTRAINT_PARSERS, 524 "INDEX": lambda self: self._parse_index_constraint(), 525 "CODEC": lambda self: self._parse_compress(), 526 } 527 528 ALTER_PARSERS = { 529 **parser.Parser.ALTER_PARSERS, 530 "REPLACE": lambda self: self._parse_alter_table_replace(), 531 } 532 533 SCHEMA_UNNAMED_CONSTRAINTS = { 534 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 535 "INDEX", 536 } 537 538 PLACEHOLDER_PARSERS = { 539 **parser.Parser.PLACEHOLDER_PARSERS, 540 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 541 } 542 543 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 544 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 545 return self._parse_lambda() 546 547 def _parse_types( 548 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 549 ) -> t.Optional[exp.Expression]: 550 dtype = super()._parse_types( 551 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 552 ) 553 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 554 # Mark every type as non-nullable which is ClickHouse's default, unless it's 555 # already marked as nullable. This marker helps us transpile types from other 556 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 557 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 558 # fail in ClickHouse without the `Nullable` type constructor. 559 dtype.set("nullable", False) 560 561 return dtype 562 563 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 564 index = self._index 565 this = self._parse_bitwise() 566 if self._match(TokenType.FROM): 567 self._retreat(index) 568 return super()._parse_extract() 569 570 # We return Anonymous here because extract and regexpExtract have different semantics, 571 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 572 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 573 # 574 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 575 self._match(TokenType.COMMA) 576 return self.expression( 577 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 578 ) 579 580 def _parse_assignment(self) -> t.Optional[exp.Expression]: 581 this = super()._parse_assignment() 582 583 if self._match(TokenType.PLACEHOLDER): 584 return self.expression( 585 exp.If, 586 this=this, 587 true=self._parse_assignment(), 588 false=self._match(TokenType.COLON) and self._parse_assignment(), 589 ) 590 591 return this 592 593 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 594 """ 595 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 596 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 597 """ 598 index = self._index 599 600 this = self._parse_id_var() 601 self._match(TokenType.COLON) 602 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 603 self._match_text_seq("IDENTIFIER") and "Identifier" 604 ) 605 606 if not kind: 607 self._retreat(index) 608 return None 609 elif not self._match(TokenType.R_BRACE): 610 self.raise_error("Expecting }") 611 612 if isinstance(this, exp.Identifier) and not this.quoted: 613 this = exp.var(this.name) 614 615 return self.expression(exp.Placeholder, this=this, kind=kind) 616 617 def _parse_bracket( 618 self, this: t.Optional[exp.Expression] = None 619 ) -> t.Optional[exp.Expression]: 620 l_brace = self._match(TokenType.L_BRACE, advance=False) 621 bracket = super()._parse_bracket(this) 622 623 if l_brace and isinstance(bracket, exp.Struct): 624 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 625 for expression in bracket.expressions: 626 if not isinstance(expression, exp.PropertyEQ): 627 break 628 629 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 630 varmap.args["values"].append("expressions", expression.expression) 631 632 return varmap 633 634 return bracket 635 636 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 637 this = super()._parse_in(this) 638 this.set("is_global", is_global) 639 return this 640 641 def _parse_global_in(self, this: t.Optional[exp.Expression]) -> exp.Not | exp.In: 642 is_negated = self._match(TokenType.NOT) 643 this = self._match(TokenType.IN) and self._parse_in(this, is_global=True) 644 return self.expression(exp.Not, this=this) if is_negated else this 645 646 def _parse_table( 647 self, 648 schema: bool = False, 649 joins: bool = False, 650 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 651 parse_bracket: bool = False, 652 is_db_reference: bool = False, 653 parse_partition: bool = False, 654 ) -> t.Optional[exp.Expression]: 655 this = super()._parse_table( 656 schema=schema, 657 joins=joins, 658 alias_tokens=alias_tokens, 659 parse_bracket=parse_bracket, 660 is_db_reference=is_db_reference, 661 ) 662 663 if isinstance(this, exp.Table): 664 inner = this.this 665 alias = this.args.get("alias") 666 667 if isinstance(inner, exp.GenerateSeries) and alias and not alias.columns: 668 alias.set("columns", [exp.to_identifier("generate_series")]) 669 670 if self._match(TokenType.FINAL): 671 this = self.expression(exp.Final, this=this) 672 673 return this 674 675 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 676 return super()._parse_position(haystack_first=True) 677 678 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 679 def _parse_cte(self) -> t.Optional[exp.CTE]: 680 # WITH <identifier> AS <subquery expression> 681 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 682 683 if not cte: 684 # WITH <expression> AS <identifier> 685 cte = self.expression( 686 exp.CTE, 687 this=self._parse_assignment(), 688 alias=self._parse_table_alias(), 689 scalar=True, 690 ) 691 692 return cte 693 694 def _parse_join_parts( 695 self, 696 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 697 is_global = self._match(TokenType.GLOBAL) and self._prev 698 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 699 700 if kind_pre: 701 kind = self._match_set(self.JOIN_KINDS) and self._prev 702 side = self._match_set(self.JOIN_SIDES) and self._prev 703 return is_global, side, kind 704 705 return ( 706 is_global, 707 self._match_set(self.JOIN_SIDES) and self._prev, 708 self._match_set(self.JOIN_KINDS) and self._prev, 709 ) 710 711 def _parse_join( 712 self, skip_join_token: bool = False, parse_bracket: bool = False 713 ) -> t.Optional[exp.Join]: 714 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 715 if join: 716 join.set("global", join.args.pop("method", None)) 717 718 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 719 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 720 if join.kind == "ARRAY": 721 for table in join.find_all(exp.Table): 722 table.replace(table.to_column()) 723 724 return join 725 726 def _parse_function( 727 self, 728 functions: t.Optional[t.Dict[str, t.Callable]] = None, 729 anonymous: bool = False, 730 optional_parens: bool = True, 731 any_token: bool = False, 732 ) -> t.Optional[exp.Expression]: 733 expr = super()._parse_function( 734 functions=functions, 735 anonymous=anonymous, 736 optional_parens=optional_parens, 737 any_token=any_token, 738 ) 739 740 func = expr.this if isinstance(expr, exp.Window) else expr 741 742 # Aggregate functions can be split in 2 parts: <func_name><suffix> 743 parts = ( 744 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 745 ) 746 747 if parts: 748 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 749 params = self._parse_func_params(anon_func) 750 751 kwargs = { 752 "this": anon_func.this, 753 "expressions": anon_func.expressions, 754 } 755 if parts[1]: 756 exp_class: t.Type[exp.Expression] = ( 757 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 758 ) 759 else: 760 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 761 762 kwargs["exp_class"] = exp_class 763 if params: 764 kwargs["params"] = params 765 766 func = self.expression(**kwargs) 767 768 if isinstance(expr, exp.Window): 769 # The window's func was parsed as Anonymous in base parser, fix its 770 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 771 expr.set("this", func) 772 elif params: 773 # Params have blocked super()._parse_function() from parsing the following window 774 # (if that exists) as they're standing between the function call and the window spec 775 expr = self._parse_window(func) 776 else: 777 expr = func 778 779 return expr 780 781 def _parse_func_params( 782 self, this: t.Optional[exp.Func] = None 783 ) -> t.Optional[t.List[exp.Expression]]: 784 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 785 return self._parse_csv(self._parse_lambda) 786 787 if self._match(TokenType.L_PAREN): 788 params = self._parse_csv(self._parse_lambda) 789 self._match_r_paren(this) 790 return params 791 792 return None 793 794 def _parse_quantile(self) -> exp.Quantile: 795 this = self._parse_lambda() 796 params = self._parse_func_params() 797 if params: 798 return self.expression(exp.Quantile, this=params[0], quantile=this) 799 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 800 801 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 802 return super()._parse_wrapped_id_vars(optional=True) 803 804 def _parse_primary_key( 805 self, wrapped_optional: bool = False, in_props: bool = False 806 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 807 return super()._parse_primary_key( 808 wrapped_optional=wrapped_optional or in_props, in_props=in_props 809 ) 810 811 def _parse_on_property(self) -> t.Optional[exp.Expression]: 812 index = self._index 813 if self._match_text_seq("CLUSTER"): 814 this = self._parse_id_var() 815 if this: 816 return self.expression(exp.OnCluster, this=this) 817 else: 818 self._retreat(index) 819 return None 820 821 def _parse_index_constraint( 822 self, kind: t.Optional[str] = None 823 ) -> exp.IndexColumnConstraint: 824 # INDEX name1 expr TYPE type1(args) GRANULARITY value 825 this = self._parse_id_var() 826 expression = self._parse_assignment() 827 828 index_type = self._match_text_seq("TYPE") and ( 829 self._parse_function() or self._parse_var() 830 ) 831 832 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 833 834 return self.expression( 835 exp.IndexColumnConstraint, 836 this=this, 837 expression=expression, 838 index_type=index_type, 839 granularity=granularity, 840 ) 841 842 def _parse_partition(self) -> t.Optional[exp.Partition]: 843 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 844 if not self._match(TokenType.PARTITION): 845 return None 846 847 if self._match_text_seq("ID"): 848 # Corresponds to the PARTITION ID <string_value> syntax 849 expressions: t.List[exp.Expression] = [ 850 self.expression(exp.PartitionId, this=self._parse_string()) 851 ] 852 else: 853 expressions = self._parse_expressions() 854 855 return self.expression(exp.Partition, expressions=expressions) 856 857 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 858 partition = self._parse_partition() 859 860 if not partition or not self._match(TokenType.FROM): 861 return None 862 863 return self.expression( 864 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 865 ) 866 867 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 868 if not self._match_text_seq("PROJECTION"): 869 return None 870 871 return self.expression( 872 exp.ProjectionDef, 873 this=self._parse_id_var(), 874 expression=self._parse_wrapped(self._parse_statement), 875 ) 876 877 def _parse_constraint(self) -> t.Optional[exp.Expression]: 878 return super()._parse_constraint() or self._parse_projection_def() 879 880 def _parse_alias( 881 self, this: t.Optional[exp.Expression], explicit: bool = False 882 ) -> t.Optional[exp.Expression]: 883 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 884 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 885 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 886 return this 887 888 return super()._parse_alias(this=this, explicit=explicit) 889 890 def _parse_expression(self) -> t.Optional[exp.Expression]: 891 this = super()._parse_expression() 892 893 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 894 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 895 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 896 self._match(TokenType.R_PAREN) 897 898 return this 899 900 def _parse_columns(self) -> exp.Expression: 901 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 902 903 while self._next and self._match_text_seq(")", "APPLY", "("): 904 self._match(TokenType.R_PAREN) 905 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 906 return this 907 908 class Generator(generator.Generator): 909 QUERY_HINTS = False 910 STRUCT_DELIMITER = ("(", ")") 911 NVL2_SUPPORTED = False 912 TABLESAMPLE_REQUIRES_PARENS = False 913 TABLESAMPLE_SIZE_IS_ROWS = False 914 TABLESAMPLE_KEYWORDS = "SAMPLE" 915 LAST_DAY_SUPPORTS_DATE_PART = False 916 CAN_IMPLEMENT_ARRAY_ANY = True 917 SUPPORTS_TO_NUMBER = False 918 JOIN_HINTS = False 919 TABLE_HINTS = False 920 GROUPINGS_SEP = "" 921 SET_OP_MODIFIERS = False 922 VALUES_AS_TABLE = False 923 ARRAY_SIZE_NAME = "LENGTH" 924 925 STRING_TYPE_MAPPING = { 926 exp.DataType.Type.BLOB: "String", 927 exp.DataType.Type.CHAR: "String", 928 exp.DataType.Type.LONGBLOB: "String", 929 exp.DataType.Type.LONGTEXT: "String", 930 exp.DataType.Type.MEDIUMBLOB: "String", 931 exp.DataType.Type.MEDIUMTEXT: "String", 932 exp.DataType.Type.TINYBLOB: "String", 933 exp.DataType.Type.TINYTEXT: "String", 934 exp.DataType.Type.TEXT: "String", 935 exp.DataType.Type.VARBINARY: "String", 936 exp.DataType.Type.VARCHAR: "String", 937 } 938 939 SUPPORTED_JSON_PATH_PARTS = { 940 exp.JSONPathKey, 941 exp.JSONPathRoot, 942 exp.JSONPathSubscript, 943 } 944 945 TYPE_MAPPING = { 946 **generator.Generator.TYPE_MAPPING, 947 **STRING_TYPE_MAPPING, 948 exp.DataType.Type.ARRAY: "Array", 949 exp.DataType.Type.BOOLEAN: "Bool", 950 exp.DataType.Type.BIGINT: "Int64", 951 exp.DataType.Type.DATE32: "Date32", 952 exp.DataType.Type.DATETIME: "DateTime", 953 exp.DataType.Type.DATETIME2: "DateTime", 954 exp.DataType.Type.SMALLDATETIME: "DateTime", 955 exp.DataType.Type.DATETIME64: "DateTime64", 956 exp.DataType.Type.DECIMAL: "Decimal", 957 exp.DataType.Type.DECIMAL32: "Decimal32", 958 exp.DataType.Type.DECIMAL64: "Decimal64", 959 exp.DataType.Type.DECIMAL128: "Decimal128", 960 exp.DataType.Type.DECIMAL256: "Decimal256", 961 exp.DataType.Type.TIMESTAMP: "DateTime", 962 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 963 exp.DataType.Type.DOUBLE: "Float64", 964 exp.DataType.Type.ENUM: "Enum", 965 exp.DataType.Type.ENUM8: "Enum8", 966 exp.DataType.Type.ENUM16: "Enum16", 967 exp.DataType.Type.FIXEDSTRING: "FixedString", 968 exp.DataType.Type.FLOAT: "Float32", 969 exp.DataType.Type.INT: "Int32", 970 exp.DataType.Type.MEDIUMINT: "Int32", 971 exp.DataType.Type.INT128: "Int128", 972 exp.DataType.Type.INT256: "Int256", 973 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 974 exp.DataType.Type.MAP: "Map", 975 exp.DataType.Type.NESTED: "Nested", 976 exp.DataType.Type.SMALLINT: "Int16", 977 exp.DataType.Type.STRUCT: "Tuple", 978 exp.DataType.Type.TINYINT: "Int8", 979 exp.DataType.Type.UBIGINT: "UInt64", 980 exp.DataType.Type.UINT: "UInt32", 981 exp.DataType.Type.UINT128: "UInt128", 982 exp.DataType.Type.UINT256: "UInt256", 983 exp.DataType.Type.USMALLINT: "UInt16", 984 exp.DataType.Type.UTINYINT: "UInt8", 985 exp.DataType.Type.IPV4: "IPv4", 986 exp.DataType.Type.IPV6: "IPv6", 987 exp.DataType.Type.POINT: "Point", 988 exp.DataType.Type.RING: "Ring", 989 exp.DataType.Type.LINESTRING: "LineString", 990 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 991 exp.DataType.Type.POLYGON: "Polygon", 992 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 993 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 994 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 995 exp.DataType.Type.DYNAMIC: "Dynamic", 996 } 997 998 TRANSFORMS = { 999 **generator.Generator.TRANSFORMS, 1000 exp.AnyValue: rename_func("any"), 1001 exp.ApproxDistinct: rename_func("uniq"), 1002 exp.ArrayConcat: rename_func("arrayConcat"), 1003 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 1004 exp.ArraySum: rename_func("arraySum"), 1005 exp.ArgMax: arg_max_or_min_no_count("argMax"), 1006 exp.ArgMin: arg_max_or_min_no_count("argMin"), 1007 exp.Array: inline_array_sql, 1008 exp.CastToStrType: rename_func("CAST"), 1009 exp.CountIf: rename_func("countIf"), 1010 exp.CompressColumnConstraint: lambda self, 1011 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 1012 exp.ComputedColumnConstraint: lambda self, 1013 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 1014 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 1015 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 1016 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 1017 exp.DateStrToDate: rename_func("toDate"), 1018 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 1019 exp.Explode: rename_func("arrayJoin"), 1020 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 1021 exp.IsNan: rename_func("isNaN"), 1022 exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}", 1023 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 1024 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 1025 exp.JSONPathKey: json_path_key_only_name, 1026 exp.JSONPathRoot: lambda *_: "", 1027 exp.Length: length_or_char_length_sql, 1028 exp.Map: _map_sql, 1029 exp.Median: rename_func("median"), 1030 exp.Nullif: rename_func("nullIf"), 1031 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1032 exp.Pivot: no_pivot_sql, 1033 exp.Quantile: _quantile_sql, 1034 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 1035 exp.Rand: rename_func("randCanonical"), 1036 exp.StartsWith: rename_func("startsWith"), 1037 exp.StrPosition: lambda self, e: strposition_sql( 1038 self, 1039 e, 1040 func_name="POSITION", 1041 supports_position=True, 1042 use_ansi_position=False, 1043 ), 1044 exp.TimeToStr: lambda self, e: self.func( 1045 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 1046 ), 1047 exp.TimeStrToTime: _timestrtotime_sql, 1048 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1049 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1050 exp.VarMap: _map_sql, 1051 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1052 exp.MD5Digest: rename_func("MD5"), 1053 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1054 exp.SHA: rename_func("SHA1"), 1055 exp.SHA2: sha256_sql, 1056 exp.UnixToTime: _unix_to_time_sql, 1057 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 1058 exp.Trim: lambda self, e: trim_sql(self, e, default_trim_type="BOTH"), 1059 exp.Variance: rename_func("varSamp"), 1060 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1061 exp.Stddev: rename_func("stddevSamp"), 1062 exp.Chr: rename_func("CHAR"), 1063 exp.Lag: lambda self, e: self.func( 1064 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1065 ), 1066 exp.Lead: lambda self, e: self.func( 1067 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1068 ), 1069 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1070 rename_func("editDistance") 1071 ), 1072 } 1073 1074 PROPERTIES_LOCATION = { 1075 **generator.Generator.PROPERTIES_LOCATION, 1076 exp.OnCluster: exp.Properties.Location.POST_NAME, 1077 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1078 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1079 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1080 } 1081 1082 # There's no list in docs, but it can be found in Clickhouse code 1083 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1084 ON_CLUSTER_TARGETS = { 1085 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1086 "DATABASE", 1087 "TABLE", 1088 "VIEW", 1089 "DICTIONARY", 1090 "INDEX", 1091 "FUNCTION", 1092 "NAMED COLLECTION", 1093 } 1094 1095 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1096 NON_NULLABLE_TYPES = { 1097 exp.DataType.Type.ARRAY, 1098 exp.DataType.Type.MAP, 1099 exp.DataType.Type.STRUCT, 1100 exp.DataType.Type.POINT, 1101 exp.DataType.Type.RING, 1102 exp.DataType.Type.LINESTRING, 1103 exp.DataType.Type.MULTILINESTRING, 1104 exp.DataType.Type.POLYGON, 1105 exp.DataType.Type.MULTIPOLYGON, 1106 } 1107 1108 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1109 strtodate_sql = self.function_fallback_sql(expression) 1110 1111 if not isinstance(expression.parent, exp.Cast): 1112 # StrToDate returns DATEs in other dialects (eg. postgres), so 1113 # this branch aims to improve the transpilation to clickhouse 1114 return self.cast_sql(exp.cast(expression, "DATE")) 1115 1116 return strtodate_sql 1117 1118 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1119 this = expression.this 1120 1121 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1122 return self.sql(this) 1123 1124 return super().cast_sql(expression, safe_prefix=safe_prefix) 1125 1126 def trycast_sql(self, expression: exp.TryCast) -> str: 1127 dtype = expression.to 1128 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1129 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1130 dtype.set("nullable", True) 1131 1132 return super().cast_sql(expression) 1133 1134 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1135 this = self.json_path_part(expression.this) 1136 return str(int(this) + 1) if is_int(this) else this 1137 1138 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1139 return f"AS {self.sql(expression, 'this')}" 1140 1141 def _any_to_has( 1142 self, 1143 expression: exp.EQ | exp.NEQ, 1144 default: t.Callable[[t.Any], str], 1145 prefix: str = "", 1146 ) -> str: 1147 if isinstance(expression.left, exp.Any): 1148 arr = expression.left 1149 this = expression.right 1150 elif isinstance(expression.right, exp.Any): 1151 arr = expression.right 1152 this = expression.left 1153 else: 1154 return default(expression) 1155 1156 return prefix + self.func("has", arr.this.unnest(), this) 1157 1158 def eq_sql(self, expression: exp.EQ) -> str: 1159 return self._any_to_has(expression, super().eq_sql) 1160 1161 def neq_sql(self, expression: exp.NEQ) -> str: 1162 return self._any_to_has(expression, super().neq_sql, "NOT ") 1163 1164 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1165 # Manually add a flag to make the search case-insensitive 1166 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1167 return self.func("match", expression.this, regex) 1168 1169 def datatype_sql(self, expression: exp.DataType) -> str: 1170 # String is the standard ClickHouse type, every other variant is just an alias. 1171 # Additionally, any supplied length parameter will be ignored. 1172 # 1173 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1174 if expression.this in self.STRING_TYPE_MAPPING: 1175 dtype = "String" 1176 else: 1177 dtype = super().datatype_sql(expression) 1178 1179 # This section changes the type to `Nullable(...)` if the following conditions hold: 1180 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1181 # and change their semantics 1182 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1183 # constraint: "Type of Map key must be a type, that can be represented by integer or 1184 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1185 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1186 parent = expression.parent 1187 nullable = expression.args.get("nullable") 1188 if nullable is True or ( 1189 nullable is None 1190 and not ( 1191 isinstance(parent, exp.DataType) 1192 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1193 and expression.index in (None, 0) 1194 ) 1195 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1196 ): 1197 dtype = f"Nullable({dtype})" 1198 1199 return dtype 1200 1201 def cte_sql(self, expression: exp.CTE) -> str: 1202 if expression.args.get("scalar"): 1203 this = self.sql(expression, "this") 1204 alias = self.sql(expression, "alias") 1205 return f"{this} AS {alias}" 1206 1207 return super().cte_sql(expression) 1208 1209 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1210 return super().after_limit_modifiers(expression) + [ 1211 ( 1212 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1213 if expression.args.get("settings") 1214 else "" 1215 ), 1216 ( 1217 self.seg("FORMAT ") + self.sql(expression, "format") 1218 if expression.args.get("format") 1219 else "" 1220 ), 1221 ] 1222 1223 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1224 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1225 1226 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1227 return f"ON CLUSTER {self.sql(expression, 'this')}" 1228 1229 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1230 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1231 exp.Properties.Location.POST_NAME 1232 ): 1233 this_name = self.sql( 1234 expression.this if isinstance(expression.this, exp.Schema) else expression, 1235 "this", 1236 ) 1237 this_properties = " ".join( 1238 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1239 ) 1240 this_schema = self.schema_columns_sql(expression.this) 1241 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1242 1243 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1244 1245 return super().createable_sql(expression, locations) 1246 1247 def create_sql(self, expression: exp.Create) -> str: 1248 # The comment property comes last in CTAS statements, i.e. after the query 1249 query = expression.expression 1250 if isinstance(query, exp.Query): 1251 comment_prop = expression.find(exp.SchemaCommentProperty) 1252 if comment_prop: 1253 comment_prop.pop() 1254 query.replace(exp.paren(query)) 1255 else: 1256 comment_prop = None 1257 1258 create_sql = super().create_sql(expression) 1259 1260 comment_sql = self.sql(comment_prop) 1261 comment_sql = f" {comment_sql}" if comment_sql else "" 1262 1263 return f"{create_sql}{comment_sql}" 1264 1265 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1266 this = self.indent(self.sql(expression, "this")) 1267 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1268 1269 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1270 this = self.sql(expression, "this") 1271 this = f" {this}" if this else "" 1272 expr = self.sql(expression, "expression") 1273 expr = f" {expr}" if expr else "" 1274 index_type = self.sql(expression, "index_type") 1275 index_type = f" TYPE {index_type}" if index_type else "" 1276 granularity = self.sql(expression, "granularity") 1277 granularity = f" GRANULARITY {granularity}" if granularity else "" 1278 1279 return f"INDEX{this}{expr}{index_type}{granularity}" 1280 1281 def partition_sql(self, expression: exp.Partition) -> str: 1282 return f"PARTITION {self.expressions(expression, flat=True)}" 1283 1284 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1285 return f"ID {self.sql(expression.this)}" 1286 1287 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1288 return ( 1289 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1290 ) 1291 1292 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1293 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1294 1295 def is_sql(self, expression: exp.Is) -> str: 1296 is_sql = super().is_sql(expression) 1297 1298 if isinstance(expression.parent, exp.Not): 1299 # value IS NOT NULL -> NOT (value IS NULL) 1300 is_sql = self.wrap(is_sql) 1301 1302 return is_sql 1303 1304 def in_sql(self, expression: exp.In) -> str: 1305 in_sql = super().in_sql(expression) 1306 1307 if isinstance(expression.parent, exp.Not) and expression.args.get("is_global"): 1308 in_sql = in_sql.replace("GLOBAL IN", "GLOBAL NOT IN", 1) 1309 1310 return in_sql 1311 1312 def not_sql(self, expression: exp.Not) -> str: 1313 if isinstance(expression.this, exp.In) and expression.this.args.get("is_global"): 1314 # let `GLOBAL IN` child interpose `NOT` 1315 return self.sql(expression, "this") 1316 1317 return super().not_sql(expression)
184class ClickHouse(Dialect): 185 NORMALIZE_FUNCTIONS: bool | str = False 186 NULL_ORDERING = "nulls_are_last" 187 SUPPORTS_USER_DEFINED_TYPES = False 188 SAFE_DIVISION = True 189 LOG_BASE_FIRST: t.Optional[bool] = None 190 FORCE_EARLY_ALIAS_REF_EXPANSION = True 191 PRESERVE_ORIGINAL_NAMES = True 192 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 193 IDENTIFIERS_CAN_START_WITH_DIGIT = True 194 HEX_STRING_IS_INTEGER_TYPE = True 195 196 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 197 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 198 199 UNESCAPED_SEQUENCES = { 200 "\\0": "\0", 201 } 202 203 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 204 205 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 206 exp.Except: False, 207 exp.Intersect: False, 208 exp.Union: None, 209 } 210 211 class Tokenizer(tokens.Tokenizer): 212 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 213 IDENTIFIERS = ['"', "`"] 214 IDENTIFIER_ESCAPES = ["\\"] 215 STRING_ESCAPES = ["'", "\\"] 216 BIT_STRINGS = [("0b", "")] 217 HEX_STRINGS = [("0x", ""), ("0X", "")] 218 HEREDOC_STRINGS = ["$"] 219 220 KEYWORDS = { 221 **tokens.Tokenizer.KEYWORDS, 222 ".:": TokenType.DOTCOLON, 223 "ATTACH": TokenType.COMMAND, 224 "DATE32": TokenType.DATE32, 225 "DATETIME64": TokenType.DATETIME64, 226 "DICTIONARY": TokenType.DICTIONARY, 227 "DYNAMIC": TokenType.DYNAMIC, 228 "ENUM8": TokenType.ENUM8, 229 "ENUM16": TokenType.ENUM16, 230 "FINAL": TokenType.FINAL, 231 "FIXEDSTRING": TokenType.FIXEDSTRING, 232 "FLOAT32": TokenType.FLOAT, 233 "FLOAT64": TokenType.DOUBLE, 234 "GLOBAL": TokenType.GLOBAL, 235 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 236 "MAP": TokenType.MAP, 237 "NESTED": TokenType.NESTED, 238 "SAMPLE": TokenType.TABLE_SAMPLE, 239 "TUPLE": TokenType.STRUCT, 240 "UINT16": TokenType.USMALLINT, 241 "UINT32": TokenType.UINT, 242 "UINT64": TokenType.UBIGINT, 243 "UINT8": TokenType.UTINYINT, 244 "IPV4": TokenType.IPV4, 245 "IPV6": TokenType.IPV6, 246 "POINT": TokenType.POINT, 247 "RING": TokenType.RING, 248 "LINESTRING": TokenType.LINESTRING, 249 "MULTILINESTRING": TokenType.MULTILINESTRING, 250 "POLYGON": TokenType.POLYGON, 251 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 252 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 253 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 254 "SYSTEM": TokenType.COMMAND, 255 "PREWHERE": TokenType.PREWHERE, 256 } 257 KEYWORDS.pop("/*+") 258 259 SINGLE_TOKENS = { 260 **tokens.Tokenizer.SINGLE_TOKENS, 261 "$": TokenType.HEREDOC_STRING, 262 } 263 264 class Parser(parser.Parser): 265 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 266 # * select x from t1 union all select x from t2 limit 1; 267 # * select x from t1 union all (select x from t2 limit 1); 268 MODIFIERS_ATTACHED_TO_SET_OP = False 269 INTERVAL_SPANS = False 270 OPTIONAL_ALIAS_TOKEN_CTE = False 271 272 FUNCTIONS = { 273 **parser.Parser.FUNCTIONS, 274 "ANY": exp.AnyValue.from_arg_list, 275 "ARRAYSUM": exp.ArraySum.from_arg_list, 276 "COUNTIF": _build_count_if, 277 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 278 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 279 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 280 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 281 "DATE_FORMAT": _build_date_format, 282 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 283 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 284 "FORMATDATETIME": _build_date_format, 285 "JSONEXTRACTSTRING": build_json_extract_path( 286 exp.JSONExtractScalar, zero_based_indexing=False 287 ), 288 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 289 "MAP": parser.build_var_map, 290 "MATCH": exp.RegexpLike.from_arg_list, 291 "RANDCANONICAL": exp.Rand.from_arg_list, 292 "STR_TO_DATE": _build_str_to_date, 293 "TUPLE": exp.Struct.from_arg_list, 294 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 295 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 296 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 297 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 298 "UNIQ": exp.ApproxDistinct.from_arg_list, 299 "XOR": lambda args: exp.Xor(expressions=args), 300 "MD5": exp.MD5Digest.from_arg_list, 301 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 302 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 303 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 304 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 305 } 306 FUNCTIONS.pop("TRANSFORM") 307 308 AGG_FUNCTIONS = { 309 "count", 310 "min", 311 "max", 312 "sum", 313 "avg", 314 "any", 315 "stddevPop", 316 "stddevSamp", 317 "varPop", 318 "varSamp", 319 "corr", 320 "covarPop", 321 "covarSamp", 322 "entropy", 323 "exponentialMovingAverage", 324 "intervalLengthSum", 325 "kolmogorovSmirnovTest", 326 "mannWhitneyUTest", 327 "median", 328 "rankCorr", 329 "sumKahan", 330 "studentTTest", 331 "welchTTest", 332 "anyHeavy", 333 "anyLast", 334 "boundingRatio", 335 "first_value", 336 "last_value", 337 "argMin", 338 "argMax", 339 "avgWeighted", 340 "topK", 341 "topKWeighted", 342 "deltaSum", 343 "deltaSumTimestamp", 344 "groupArray", 345 "groupArrayLast", 346 "groupUniqArray", 347 "groupArrayInsertAt", 348 "groupArrayMovingAvg", 349 "groupArrayMovingSum", 350 "groupArraySample", 351 "groupBitAnd", 352 "groupBitOr", 353 "groupBitXor", 354 "groupBitmap", 355 "groupBitmapAnd", 356 "groupBitmapOr", 357 "groupBitmapXor", 358 "sumWithOverflow", 359 "sumMap", 360 "minMap", 361 "maxMap", 362 "skewSamp", 363 "skewPop", 364 "kurtSamp", 365 "kurtPop", 366 "uniq", 367 "uniqExact", 368 "uniqCombined", 369 "uniqCombined64", 370 "uniqHLL12", 371 "uniqTheta", 372 "quantile", 373 "quantiles", 374 "quantileExact", 375 "quantilesExact", 376 "quantileExactLow", 377 "quantilesExactLow", 378 "quantileExactHigh", 379 "quantilesExactHigh", 380 "quantileExactWeighted", 381 "quantilesExactWeighted", 382 "quantileTiming", 383 "quantilesTiming", 384 "quantileTimingWeighted", 385 "quantilesTimingWeighted", 386 "quantileDeterministic", 387 "quantilesDeterministic", 388 "quantileTDigest", 389 "quantilesTDigest", 390 "quantileTDigestWeighted", 391 "quantilesTDigestWeighted", 392 "quantileBFloat16", 393 "quantilesBFloat16", 394 "quantileBFloat16Weighted", 395 "quantilesBFloat16Weighted", 396 "simpleLinearRegression", 397 "stochasticLinearRegression", 398 "stochasticLogisticRegression", 399 "categoricalInformationValue", 400 "contingency", 401 "cramersV", 402 "cramersVBiasCorrected", 403 "theilsU", 404 "maxIntersections", 405 "maxIntersectionsPosition", 406 "meanZTest", 407 "quantileInterpolatedWeighted", 408 "quantilesInterpolatedWeighted", 409 "quantileGK", 410 "quantilesGK", 411 "sparkBar", 412 "sumCount", 413 "largestTriangleThreeBuckets", 414 "histogram", 415 "sequenceMatch", 416 "sequenceCount", 417 "windowFunnel", 418 "retention", 419 "uniqUpTo", 420 "sequenceNextNode", 421 "exponentialTimeDecayedAvg", 422 } 423 424 AGG_FUNCTIONS_SUFFIXES = [ 425 "If", 426 "Array", 427 "ArrayIf", 428 "Map", 429 "SimpleState", 430 "State", 431 "Merge", 432 "MergeState", 433 "ForEach", 434 "Distinct", 435 "OrDefault", 436 "OrNull", 437 "Resample", 438 "ArgMin", 439 "ArgMax", 440 ] 441 442 FUNC_TOKENS = { 443 *parser.Parser.FUNC_TOKENS, 444 TokenType.AND, 445 TokenType.OR, 446 TokenType.SET, 447 } 448 449 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 450 451 ID_VAR_TOKENS = { 452 *parser.Parser.ID_VAR_TOKENS, 453 TokenType.LIKE, 454 } 455 456 AGG_FUNC_MAPPING = ( 457 lambda functions, suffixes: { 458 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 459 } 460 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 461 462 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 463 464 FUNCTION_PARSERS = { 465 **parser.Parser.FUNCTION_PARSERS, 466 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 467 "QUANTILE": lambda self: self._parse_quantile(), 468 "MEDIAN": lambda self: self._parse_quantile(), 469 "COLUMNS": lambda self: self._parse_columns(), 470 } 471 472 FUNCTION_PARSERS.pop("MATCH") 473 474 PROPERTY_PARSERS = parser.Parser.PROPERTY_PARSERS.copy() 475 PROPERTY_PARSERS.pop("DYNAMIC") 476 477 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 478 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 479 480 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 481 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 482 483 RANGE_PARSERS = { 484 **parser.Parser.RANGE_PARSERS, 485 TokenType.GLOBAL: lambda self, this: self._parse_global_in(this), 486 } 487 488 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 489 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 490 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 491 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 492 493 JOIN_KINDS = { 494 *parser.Parser.JOIN_KINDS, 495 TokenType.ANY, 496 TokenType.ASOF, 497 TokenType.ARRAY, 498 } 499 500 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 501 TokenType.ANY, 502 TokenType.ARRAY, 503 TokenType.FINAL, 504 TokenType.FORMAT, 505 TokenType.SETTINGS, 506 } 507 508 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 509 TokenType.FORMAT, 510 } 511 512 LOG_DEFAULTS_TO_LN = True 513 514 QUERY_MODIFIER_PARSERS = { 515 **parser.Parser.QUERY_MODIFIER_PARSERS, 516 TokenType.SETTINGS: lambda self: ( 517 "settings", 518 self._advance() or self._parse_csv(self._parse_assignment), 519 ), 520 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 521 } 522 523 CONSTRAINT_PARSERS = { 524 **parser.Parser.CONSTRAINT_PARSERS, 525 "INDEX": lambda self: self._parse_index_constraint(), 526 "CODEC": lambda self: self._parse_compress(), 527 } 528 529 ALTER_PARSERS = { 530 **parser.Parser.ALTER_PARSERS, 531 "REPLACE": lambda self: self._parse_alter_table_replace(), 532 } 533 534 SCHEMA_UNNAMED_CONSTRAINTS = { 535 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 536 "INDEX", 537 } 538 539 PLACEHOLDER_PARSERS = { 540 **parser.Parser.PLACEHOLDER_PARSERS, 541 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 542 } 543 544 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 545 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 546 return self._parse_lambda() 547 548 def _parse_types( 549 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 550 ) -> t.Optional[exp.Expression]: 551 dtype = super()._parse_types( 552 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 553 ) 554 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 555 # Mark every type as non-nullable which is ClickHouse's default, unless it's 556 # already marked as nullable. This marker helps us transpile types from other 557 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 558 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 559 # fail in ClickHouse without the `Nullable` type constructor. 560 dtype.set("nullable", False) 561 562 return dtype 563 564 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 565 index = self._index 566 this = self._parse_bitwise() 567 if self._match(TokenType.FROM): 568 self._retreat(index) 569 return super()._parse_extract() 570 571 # We return Anonymous here because extract and regexpExtract have different semantics, 572 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 573 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 574 # 575 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 576 self._match(TokenType.COMMA) 577 return self.expression( 578 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 579 ) 580 581 def _parse_assignment(self) -> t.Optional[exp.Expression]: 582 this = super()._parse_assignment() 583 584 if self._match(TokenType.PLACEHOLDER): 585 return self.expression( 586 exp.If, 587 this=this, 588 true=self._parse_assignment(), 589 false=self._match(TokenType.COLON) and self._parse_assignment(), 590 ) 591 592 return this 593 594 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 595 """ 596 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 597 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 598 """ 599 index = self._index 600 601 this = self._parse_id_var() 602 self._match(TokenType.COLON) 603 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 604 self._match_text_seq("IDENTIFIER") and "Identifier" 605 ) 606 607 if not kind: 608 self._retreat(index) 609 return None 610 elif not self._match(TokenType.R_BRACE): 611 self.raise_error("Expecting }") 612 613 if isinstance(this, exp.Identifier) and not this.quoted: 614 this = exp.var(this.name) 615 616 return self.expression(exp.Placeholder, this=this, kind=kind) 617 618 def _parse_bracket( 619 self, this: t.Optional[exp.Expression] = None 620 ) -> t.Optional[exp.Expression]: 621 l_brace = self._match(TokenType.L_BRACE, advance=False) 622 bracket = super()._parse_bracket(this) 623 624 if l_brace and isinstance(bracket, exp.Struct): 625 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 626 for expression in bracket.expressions: 627 if not isinstance(expression, exp.PropertyEQ): 628 break 629 630 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 631 varmap.args["values"].append("expressions", expression.expression) 632 633 return varmap 634 635 return bracket 636 637 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 638 this = super()._parse_in(this) 639 this.set("is_global", is_global) 640 return this 641 642 def _parse_global_in(self, this: t.Optional[exp.Expression]) -> exp.Not | exp.In: 643 is_negated = self._match(TokenType.NOT) 644 this = self._match(TokenType.IN) and self._parse_in(this, is_global=True) 645 return self.expression(exp.Not, this=this) if is_negated else this 646 647 def _parse_table( 648 self, 649 schema: bool = False, 650 joins: bool = False, 651 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 652 parse_bracket: bool = False, 653 is_db_reference: bool = False, 654 parse_partition: bool = False, 655 ) -> t.Optional[exp.Expression]: 656 this = super()._parse_table( 657 schema=schema, 658 joins=joins, 659 alias_tokens=alias_tokens, 660 parse_bracket=parse_bracket, 661 is_db_reference=is_db_reference, 662 ) 663 664 if isinstance(this, exp.Table): 665 inner = this.this 666 alias = this.args.get("alias") 667 668 if isinstance(inner, exp.GenerateSeries) and alias and not alias.columns: 669 alias.set("columns", [exp.to_identifier("generate_series")]) 670 671 if self._match(TokenType.FINAL): 672 this = self.expression(exp.Final, this=this) 673 674 return this 675 676 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 677 return super()._parse_position(haystack_first=True) 678 679 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 680 def _parse_cte(self) -> t.Optional[exp.CTE]: 681 # WITH <identifier> AS <subquery expression> 682 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 683 684 if not cte: 685 # WITH <expression> AS <identifier> 686 cte = self.expression( 687 exp.CTE, 688 this=self._parse_assignment(), 689 alias=self._parse_table_alias(), 690 scalar=True, 691 ) 692 693 return cte 694 695 def _parse_join_parts( 696 self, 697 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 698 is_global = self._match(TokenType.GLOBAL) and self._prev 699 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 700 701 if kind_pre: 702 kind = self._match_set(self.JOIN_KINDS) and self._prev 703 side = self._match_set(self.JOIN_SIDES) and self._prev 704 return is_global, side, kind 705 706 return ( 707 is_global, 708 self._match_set(self.JOIN_SIDES) and self._prev, 709 self._match_set(self.JOIN_KINDS) and self._prev, 710 ) 711 712 def _parse_join( 713 self, skip_join_token: bool = False, parse_bracket: bool = False 714 ) -> t.Optional[exp.Join]: 715 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 716 if join: 717 join.set("global", join.args.pop("method", None)) 718 719 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 720 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 721 if join.kind == "ARRAY": 722 for table in join.find_all(exp.Table): 723 table.replace(table.to_column()) 724 725 return join 726 727 def _parse_function( 728 self, 729 functions: t.Optional[t.Dict[str, t.Callable]] = None, 730 anonymous: bool = False, 731 optional_parens: bool = True, 732 any_token: bool = False, 733 ) -> t.Optional[exp.Expression]: 734 expr = super()._parse_function( 735 functions=functions, 736 anonymous=anonymous, 737 optional_parens=optional_parens, 738 any_token=any_token, 739 ) 740 741 func = expr.this if isinstance(expr, exp.Window) else expr 742 743 # Aggregate functions can be split in 2 parts: <func_name><suffix> 744 parts = ( 745 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 746 ) 747 748 if parts: 749 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 750 params = self._parse_func_params(anon_func) 751 752 kwargs = { 753 "this": anon_func.this, 754 "expressions": anon_func.expressions, 755 } 756 if parts[1]: 757 exp_class: t.Type[exp.Expression] = ( 758 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 759 ) 760 else: 761 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 762 763 kwargs["exp_class"] = exp_class 764 if params: 765 kwargs["params"] = params 766 767 func = self.expression(**kwargs) 768 769 if isinstance(expr, exp.Window): 770 # The window's func was parsed as Anonymous in base parser, fix its 771 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 772 expr.set("this", func) 773 elif params: 774 # Params have blocked super()._parse_function() from parsing the following window 775 # (if that exists) as they're standing between the function call and the window spec 776 expr = self._parse_window(func) 777 else: 778 expr = func 779 780 return expr 781 782 def _parse_func_params( 783 self, this: t.Optional[exp.Func] = None 784 ) -> t.Optional[t.List[exp.Expression]]: 785 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 786 return self._parse_csv(self._parse_lambda) 787 788 if self._match(TokenType.L_PAREN): 789 params = self._parse_csv(self._parse_lambda) 790 self._match_r_paren(this) 791 return params 792 793 return None 794 795 def _parse_quantile(self) -> exp.Quantile: 796 this = self._parse_lambda() 797 params = self._parse_func_params() 798 if params: 799 return self.expression(exp.Quantile, this=params[0], quantile=this) 800 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 801 802 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 803 return super()._parse_wrapped_id_vars(optional=True) 804 805 def _parse_primary_key( 806 self, wrapped_optional: bool = False, in_props: bool = False 807 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 808 return super()._parse_primary_key( 809 wrapped_optional=wrapped_optional or in_props, in_props=in_props 810 ) 811 812 def _parse_on_property(self) -> t.Optional[exp.Expression]: 813 index = self._index 814 if self._match_text_seq("CLUSTER"): 815 this = self._parse_id_var() 816 if this: 817 return self.expression(exp.OnCluster, this=this) 818 else: 819 self._retreat(index) 820 return None 821 822 def _parse_index_constraint( 823 self, kind: t.Optional[str] = None 824 ) -> exp.IndexColumnConstraint: 825 # INDEX name1 expr TYPE type1(args) GRANULARITY value 826 this = self._parse_id_var() 827 expression = self._parse_assignment() 828 829 index_type = self._match_text_seq("TYPE") and ( 830 self._parse_function() or self._parse_var() 831 ) 832 833 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 834 835 return self.expression( 836 exp.IndexColumnConstraint, 837 this=this, 838 expression=expression, 839 index_type=index_type, 840 granularity=granularity, 841 ) 842 843 def _parse_partition(self) -> t.Optional[exp.Partition]: 844 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 845 if not self._match(TokenType.PARTITION): 846 return None 847 848 if self._match_text_seq("ID"): 849 # Corresponds to the PARTITION ID <string_value> syntax 850 expressions: t.List[exp.Expression] = [ 851 self.expression(exp.PartitionId, this=self._parse_string()) 852 ] 853 else: 854 expressions = self._parse_expressions() 855 856 return self.expression(exp.Partition, expressions=expressions) 857 858 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 859 partition = self._parse_partition() 860 861 if not partition or not self._match(TokenType.FROM): 862 return None 863 864 return self.expression( 865 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 866 ) 867 868 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 869 if not self._match_text_seq("PROJECTION"): 870 return None 871 872 return self.expression( 873 exp.ProjectionDef, 874 this=self._parse_id_var(), 875 expression=self._parse_wrapped(self._parse_statement), 876 ) 877 878 def _parse_constraint(self) -> t.Optional[exp.Expression]: 879 return super()._parse_constraint() or self._parse_projection_def() 880 881 def _parse_alias( 882 self, this: t.Optional[exp.Expression], explicit: bool = False 883 ) -> t.Optional[exp.Expression]: 884 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 885 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 886 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 887 return this 888 889 return super()._parse_alias(this=this, explicit=explicit) 890 891 def _parse_expression(self) -> t.Optional[exp.Expression]: 892 this = super()._parse_expression() 893 894 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 895 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 896 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 897 self._match(TokenType.R_PAREN) 898 899 return this 900 901 def _parse_columns(self) -> exp.Expression: 902 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 903 904 while self._next and self._match_text_seq(")", "APPLY", "("): 905 self._match(TokenType.R_PAREN) 906 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 907 return this 908 909 class Generator(generator.Generator): 910 QUERY_HINTS = False 911 STRUCT_DELIMITER = ("(", ")") 912 NVL2_SUPPORTED = False 913 TABLESAMPLE_REQUIRES_PARENS = False 914 TABLESAMPLE_SIZE_IS_ROWS = False 915 TABLESAMPLE_KEYWORDS = "SAMPLE" 916 LAST_DAY_SUPPORTS_DATE_PART = False 917 CAN_IMPLEMENT_ARRAY_ANY = True 918 SUPPORTS_TO_NUMBER = False 919 JOIN_HINTS = False 920 TABLE_HINTS = False 921 GROUPINGS_SEP = "" 922 SET_OP_MODIFIERS = False 923 VALUES_AS_TABLE = False 924 ARRAY_SIZE_NAME = "LENGTH" 925 926 STRING_TYPE_MAPPING = { 927 exp.DataType.Type.BLOB: "String", 928 exp.DataType.Type.CHAR: "String", 929 exp.DataType.Type.LONGBLOB: "String", 930 exp.DataType.Type.LONGTEXT: "String", 931 exp.DataType.Type.MEDIUMBLOB: "String", 932 exp.DataType.Type.MEDIUMTEXT: "String", 933 exp.DataType.Type.TINYBLOB: "String", 934 exp.DataType.Type.TINYTEXT: "String", 935 exp.DataType.Type.TEXT: "String", 936 exp.DataType.Type.VARBINARY: "String", 937 exp.DataType.Type.VARCHAR: "String", 938 } 939 940 SUPPORTED_JSON_PATH_PARTS = { 941 exp.JSONPathKey, 942 exp.JSONPathRoot, 943 exp.JSONPathSubscript, 944 } 945 946 TYPE_MAPPING = { 947 **generator.Generator.TYPE_MAPPING, 948 **STRING_TYPE_MAPPING, 949 exp.DataType.Type.ARRAY: "Array", 950 exp.DataType.Type.BOOLEAN: "Bool", 951 exp.DataType.Type.BIGINT: "Int64", 952 exp.DataType.Type.DATE32: "Date32", 953 exp.DataType.Type.DATETIME: "DateTime", 954 exp.DataType.Type.DATETIME2: "DateTime", 955 exp.DataType.Type.SMALLDATETIME: "DateTime", 956 exp.DataType.Type.DATETIME64: "DateTime64", 957 exp.DataType.Type.DECIMAL: "Decimal", 958 exp.DataType.Type.DECIMAL32: "Decimal32", 959 exp.DataType.Type.DECIMAL64: "Decimal64", 960 exp.DataType.Type.DECIMAL128: "Decimal128", 961 exp.DataType.Type.DECIMAL256: "Decimal256", 962 exp.DataType.Type.TIMESTAMP: "DateTime", 963 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 964 exp.DataType.Type.DOUBLE: "Float64", 965 exp.DataType.Type.ENUM: "Enum", 966 exp.DataType.Type.ENUM8: "Enum8", 967 exp.DataType.Type.ENUM16: "Enum16", 968 exp.DataType.Type.FIXEDSTRING: "FixedString", 969 exp.DataType.Type.FLOAT: "Float32", 970 exp.DataType.Type.INT: "Int32", 971 exp.DataType.Type.MEDIUMINT: "Int32", 972 exp.DataType.Type.INT128: "Int128", 973 exp.DataType.Type.INT256: "Int256", 974 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 975 exp.DataType.Type.MAP: "Map", 976 exp.DataType.Type.NESTED: "Nested", 977 exp.DataType.Type.SMALLINT: "Int16", 978 exp.DataType.Type.STRUCT: "Tuple", 979 exp.DataType.Type.TINYINT: "Int8", 980 exp.DataType.Type.UBIGINT: "UInt64", 981 exp.DataType.Type.UINT: "UInt32", 982 exp.DataType.Type.UINT128: "UInt128", 983 exp.DataType.Type.UINT256: "UInt256", 984 exp.DataType.Type.USMALLINT: "UInt16", 985 exp.DataType.Type.UTINYINT: "UInt8", 986 exp.DataType.Type.IPV4: "IPv4", 987 exp.DataType.Type.IPV6: "IPv6", 988 exp.DataType.Type.POINT: "Point", 989 exp.DataType.Type.RING: "Ring", 990 exp.DataType.Type.LINESTRING: "LineString", 991 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 992 exp.DataType.Type.POLYGON: "Polygon", 993 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 994 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 995 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 996 exp.DataType.Type.DYNAMIC: "Dynamic", 997 } 998 999 TRANSFORMS = { 1000 **generator.Generator.TRANSFORMS, 1001 exp.AnyValue: rename_func("any"), 1002 exp.ApproxDistinct: rename_func("uniq"), 1003 exp.ArrayConcat: rename_func("arrayConcat"), 1004 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 1005 exp.ArraySum: rename_func("arraySum"), 1006 exp.ArgMax: arg_max_or_min_no_count("argMax"), 1007 exp.ArgMin: arg_max_or_min_no_count("argMin"), 1008 exp.Array: inline_array_sql, 1009 exp.CastToStrType: rename_func("CAST"), 1010 exp.CountIf: rename_func("countIf"), 1011 exp.CompressColumnConstraint: lambda self, 1012 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 1013 exp.ComputedColumnConstraint: lambda self, 1014 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 1015 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 1016 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 1017 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 1018 exp.DateStrToDate: rename_func("toDate"), 1019 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 1020 exp.Explode: rename_func("arrayJoin"), 1021 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 1022 exp.IsNan: rename_func("isNaN"), 1023 exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}", 1024 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 1025 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 1026 exp.JSONPathKey: json_path_key_only_name, 1027 exp.JSONPathRoot: lambda *_: "", 1028 exp.Length: length_or_char_length_sql, 1029 exp.Map: _map_sql, 1030 exp.Median: rename_func("median"), 1031 exp.Nullif: rename_func("nullIf"), 1032 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1033 exp.Pivot: no_pivot_sql, 1034 exp.Quantile: _quantile_sql, 1035 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 1036 exp.Rand: rename_func("randCanonical"), 1037 exp.StartsWith: rename_func("startsWith"), 1038 exp.StrPosition: lambda self, e: strposition_sql( 1039 self, 1040 e, 1041 func_name="POSITION", 1042 supports_position=True, 1043 use_ansi_position=False, 1044 ), 1045 exp.TimeToStr: lambda self, e: self.func( 1046 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 1047 ), 1048 exp.TimeStrToTime: _timestrtotime_sql, 1049 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1050 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1051 exp.VarMap: _map_sql, 1052 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1053 exp.MD5Digest: rename_func("MD5"), 1054 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1055 exp.SHA: rename_func("SHA1"), 1056 exp.SHA2: sha256_sql, 1057 exp.UnixToTime: _unix_to_time_sql, 1058 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 1059 exp.Trim: lambda self, e: trim_sql(self, e, default_trim_type="BOTH"), 1060 exp.Variance: rename_func("varSamp"), 1061 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1062 exp.Stddev: rename_func("stddevSamp"), 1063 exp.Chr: rename_func("CHAR"), 1064 exp.Lag: lambda self, e: self.func( 1065 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1066 ), 1067 exp.Lead: lambda self, e: self.func( 1068 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1069 ), 1070 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1071 rename_func("editDistance") 1072 ), 1073 } 1074 1075 PROPERTIES_LOCATION = { 1076 **generator.Generator.PROPERTIES_LOCATION, 1077 exp.OnCluster: exp.Properties.Location.POST_NAME, 1078 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1079 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1080 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1081 } 1082 1083 # There's no list in docs, but it can be found in Clickhouse code 1084 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1085 ON_CLUSTER_TARGETS = { 1086 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1087 "DATABASE", 1088 "TABLE", 1089 "VIEW", 1090 "DICTIONARY", 1091 "INDEX", 1092 "FUNCTION", 1093 "NAMED COLLECTION", 1094 } 1095 1096 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1097 NON_NULLABLE_TYPES = { 1098 exp.DataType.Type.ARRAY, 1099 exp.DataType.Type.MAP, 1100 exp.DataType.Type.STRUCT, 1101 exp.DataType.Type.POINT, 1102 exp.DataType.Type.RING, 1103 exp.DataType.Type.LINESTRING, 1104 exp.DataType.Type.MULTILINESTRING, 1105 exp.DataType.Type.POLYGON, 1106 exp.DataType.Type.MULTIPOLYGON, 1107 } 1108 1109 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1110 strtodate_sql = self.function_fallback_sql(expression) 1111 1112 if not isinstance(expression.parent, exp.Cast): 1113 # StrToDate returns DATEs in other dialects (eg. postgres), so 1114 # this branch aims to improve the transpilation to clickhouse 1115 return self.cast_sql(exp.cast(expression, "DATE")) 1116 1117 return strtodate_sql 1118 1119 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1120 this = expression.this 1121 1122 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1123 return self.sql(this) 1124 1125 return super().cast_sql(expression, safe_prefix=safe_prefix) 1126 1127 def trycast_sql(self, expression: exp.TryCast) -> str: 1128 dtype = expression.to 1129 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1130 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1131 dtype.set("nullable", True) 1132 1133 return super().cast_sql(expression) 1134 1135 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1136 this = self.json_path_part(expression.this) 1137 return str(int(this) + 1) if is_int(this) else this 1138 1139 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1140 return f"AS {self.sql(expression, 'this')}" 1141 1142 def _any_to_has( 1143 self, 1144 expression: exp.EQ | exp.NEQ, 1145 default: t.Callable[[t.Any], str], 1146 prefix: str = "", 1147 ) -> str: 1148 if isinstance(expression.left, exp.Any): 1149 arr = expression.left 1150 this = expression.right 1151 elif isinstance(expression.right, exp.Any): 1152 arr = expression.right 1153 this = expression.left 1154 else: 1155 return default(expression) 1156 1157 return prefix + self.func("has", arr.this.unnest(), this) 1158 1159 def eq_sql(self, expression: exp.EQ) -> str: 1160 return self._any_to_has(expression, super().eq_sql) 1161 1162 def neq_sql(self, expression: exp.NEQ) -> str: 1163 return self._any_to_has(expression, super().neq_sql, "NOT ") 1164 1165 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1166 # Manually add a flag to make the search case-insensitive 1167 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1168 return self.func("match", expression.this, regex) 1169 1170 def datatype_sql(self, expression: exp.DataType) -> str: 1171 # String is the standard ClickHouse type, every other variant is just an alias. 1172 # Additionally, any supplied length parameter will be ignored. 1173 # 1174 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1175 if expression.this in self.STRING_TYPE_MAPPING: 1176 dtype = "String" 1177 else: 1178 dtype = super().datatype_sql(expression) 1179 1180 # This section changes the type to `Nullable(...)` if the following conditions hold: 1181 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1182 # and change their semantics 1183 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1184 # constraint: "Type of Map key must be a type, that can be represented by integer or 1185 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1186 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1187 parent = expression.parent 1188 nullable = expression.args.get("nullable") 1189 if nullable is True or ( 1190 nullable is None 1191 and not ( 1192 isinstance(parent, exp.DataType) 1193 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1194 and expression.index in (None, 0) 1195 ) 1196 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1197 ): 1198 dtype = f"Nullable({dtype})" 1199 1200 return dtype 1201 1202 def cte_sql(self, expression: exp.CTE) -> str: 1203 if expression.args.get("scalar"): 1204 this = self.sql(expression, "this") 1205 alias = self.sql(expression, "alias") 1206 return f"{this} AS {alias}" 1207 1208 return super().cte_sql(expression) 1209 1210 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1211 return super().after_limit_modifiers(expression) + [ 1212 ( 1213 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1214 if expression.args.get("settings") 1215 else "" 1216 ), 1217 ( 1218 self.seg("FORMAT ") + self.sql(expression, "format") 1219 if expression.args.get("format") 1220 else "" 1221 ), 1222 ] 1223 1224 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1225 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1226 1227 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1228 return f"ON CLUSTER {self.sql(expression, 'this')}" 1229 1230 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1231 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1232 exp.Properties.Location.POST_NAME 1233 ): 1234 this_name = self.sql( 1235 expression.this if isinstance(expression.this, exp.Schema) else expression, 1236 "this", 1237 ) 1238 this_properties = " ".join( 1239 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1240 ) 1241 this_schema = self.schema_columns_sql(expression.this) 1242 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1243 1244 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1245 1246 return super().createable_sql(expression, locations) 1247 1248 def create_sql(self, expression: exp.Create) -> str: 1249 # The comment property comes last in CTAS statements, i.e. after the query 1250 query = expression.expression 1251 if isinstance(query, exp.Query): 1252 comment_prop = expression.find(exp.SchemaCommentProperty) 1253 if comment_prop: 1254 comment_prop.pop() 1255 query.replace(exp.paren(query)) 1256 else: 1257 comment_prop = None 1258 1259 create_sql = super().create_sql(expression) 1260 1261 comment_sql = self.sql(comment_prop) 1262 comment_sql = f" {comment_sql}" if comment_sql else "" 1263 1264 return f"{create_sql}{comment_sql}" 1265 1266 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1267 this = self.indent(self.sql(expression, "this")) 1268 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1269 1270 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1271 this = self.sql(expression, "this") 1272 this = f" {this}" if this else "" 1273 expr = self.sql(expression, "expression") 1274 expr = f" {expr}" if expr else "" 1275 index_type = self.sql(expression, "index_type") 1276 index_type = f" TYPE {index_type}" if index_type else "" 1277 granularity = self.sql(expression, "granularity") 1278 granularity = f" GRANULARITY {granularity}" if granularity else "" 1279 1280 return f"INDEX{this}{expr}{index_type}{granularity}" 1281 1282 def partition_sql(self, expression: exp.Partition) -> str: 1283 return f"PARTITION {self.expressions(expression, flat=True)}" 1284 1285 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1286 return f"ID {self.sql(expression.this)}" 1287 1288 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1289 return ( 1290 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1291 ) 1292 1293 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1294 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1295 1296 def is_sql(self, expression: exp.Is) -> str: 1297 is_sql = super().is_sql(expression) 1298 1299 if isinstance(expression.parent, exp.Not): 1300 # value IS NOT NULL -> NOT (value IS NULL) 1301 is_sql = self.wrap(is_sql) 1302 1303 return is_sql 1304 1305 def in_sql(self, expression: exp.In) -> str: 1306 in_sql = super().in_sql(expression) 1307 1308 if isinstance(expression.parent, exp.Not) and expression.args.get("is_global"): 1309 in_sql = in_sql.replace("GLOBAL IN", "GLOBAL NOT IN", 1) 1310 1311 return in_sql 1312 1313 def not_sql(self, expression: exp.Not) -> str: 1314 if isinstance(expression.this, exp.In) and expression.this.args.get("is_global"): 1315 # let `GLOBAL IN` child interpose `NOT` 1316 return self.sql(expression, "this") 1317 1318 return super().not_sql(expression)
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether the name of the function should be preserved inside the node's metadata, can be useful for roundtripping deprecated vs new functions that share an AST node e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery
Whether number literals can include underscores for better readability
Whether hex strings such as x'CC' evaluate to integer or binary/blob type
Specifies the strategy according to which identifiers should be normalized.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse equivalent of CREATE SCHEMA is CREATE DATABASE.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
211 class Tokenizer(tokens.Tokenizer): 212 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 213 IDENTIFIERS = ['"', "`"] 214 IDENTIFIER_ESCAPES = ["\\"] 215 STRING_ESCAPES = ["'", "\\"] 216 BIT_STRINGS = [("0b", "")] 217 HEX_STRINGS = [("0x", ""), ("0X", "")] 218 HEREDOC_STRINGS = ["$"] 219 220 KEYWORDS = { 221 **tokens.Tokenizer.KEYWORDS, 222 ".:": TokenType.DOTCOLON, 223 "ATTACH": TokenType.COMMAND, 224 "DATE32": TokenType.DATE32, 225 "DATETIME64": TokenType.DATETIME64, 226 "DICTIONARY": TokenType.DICTIONARY, 227 "DYNAMIC": TokenType.DYNAMIC, 228 "ENUM8": TokenType.ENUM8, 229 "ENUM16": TokenType.ENUM16, 230 "FINAL": TokenType.FINAL, 231 "FIXEDSTRING": TokenType.FIXEDSTRING, 232 "FLOAT32": TokenType.FLOAT, 233 "FLOAT64": TokenType.DOUBLE, 234 "GLOBAL": TokenType.GLOBAL, 235 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 236 "MAP": TokenType.MAP, 237 "NESTED": TokenType.NESTED, 238 "SAMPLE": TokenType.TABLE_SAMPLE, 239 "TUPLE": TokenType.STRUCT, 240 "UINT16": TokenType.USMALLINT, 241 "UINT32": TokenType.UINT, 242 "UINT64": TokenType.UBIGINT, 243 "UINT8": TokenType.UTINYINT, 244 "IPV4": TokenType.IPV4, 245 "IPV6": TokenType.IPV6, 246 "POINT": TokenType.POINT, 247 "RING": TokenType.RING, 248 "LINESTRING": TokenType.LINESTRING, 249 "MULTILINESTRING": TokenType.MULTILINESTRING, 250 "POLYGON": TokenType.POLYGON, 251 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 252 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 253 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 254 "SYSTEM": TokenType.COMMAND, 255 "PREWHERE": TokenType.PREWHERE, 256 } 257 KEYWORDS.pop("/*+") 258 259 SINGLE_TOKENS = { 260 **tokens.Tokenizer.SINGLE_TOKENS, 261 "$": TokenType.HEREDOC_STRING, 262 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BYTE_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
264 class Parser(parser.Parser): 265 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 266 # * select x from t1 union all select x from t2 limit 1; 267 # * select x from t1 union all (select x from t2 limit 1); 268 MODIFIERS_ATTACHED_TO_SET_OP = False 269 INTERVAL_SPANS = False 270 OPTIONAL_ALIAS_TOKEN_CTE = False 271 272 FUNCTIONS = { 273 **parser.Parser.FUNCTIONS, 274 "ANY": exp.AnyValue.from_arg_list, 275 "ARRAYSUM": exp.ArraySum.from_arg_list, 276 "COUNTIF": _build_count_if, 277 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 278 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 279 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 280 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 281 "DATE_FORMAT": _build_date_format, 282 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 283 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 284 "FORMATDATETIME": _build_date_format, 285 "JSONEXTRACTSTRING": build_json_extract_path( 286 exp.JSONExtractScalar, zero_based_indexing=False 287 ), 288 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 289 "MAP": parser.build_var_map, 290 "MATCH": exp.RegexpLike.from_arg_list, 291 "RANDCANONICAL": exp.Rand.from_arg_list, 292 "STR_TO_DATE": _build_str_to_date, 293 "TUPLE": exp.Struct.from_arg_list, 294 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 295 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 296 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 297 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 298 "UNIQ": exp.ApproxDistinct.from_arg_list, 299 "XOR": lambda args: exp.Xor(expressions=args), 300 "MD5": exp.MD5Digest.from_arg_list, 301 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 302 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 303 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 304 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 305 } 306 FUNCTIONS.pop("TRANSFORM") 307 308 AGG_FUNCTIONS = { 309 "count", 310 "min", 311 "max", 312 "sum", 313 "avg", 314 "any", 315 "stddevPop", 316 "stddevSamp", 317 "varPop", 318 "varSamp", 319 "corr", 320 "covarPop", 321 "covarSamp", 322 "entropy", 323 "exponentialMovingAverage", 324 "intervalLengthSum", 325 "kolmogorovSmirnovTest", 326 "mannWhitneyUTest", 327 "median", 328 "rankCorr", 329 "sumKahan", 330 "studentTTest", 331 "welchTTest", 332 "anyHeavy", 333 "anyLast", 334 "boundingRatio", 335 "first_value", 336 "last_value", 337 "argMin", 338 "argMax", 339 "avgWeighted", 340 "topK", 341 "topKWeighted", 342 "deltaSum", 343 "deltaSumTimestamp", 344 "groupArray", 345 "groupArrayLast", 346 "groupUniqArray", 347 "groupArrayInsertAt", 348 "groupArrayMovingAvg", 349 "groupArrayMovingSum", 350 "groupArraySample", 351 "groupBitAnd", 352 "groupBitOr", 353 "groupBitXor", 354 "groupBitmap", 355 "groupBitmapAnd", 356 "groupBitmapOr", 357 "groupBitmapXor", 358 "sumWithOverflow", 359 "sumMap", 360 "minMap", 361 "maxMap", 362 "skewSamp", 363 "skewPop", 364 "kurtSamp", 365 "kurtPop", 366 "uniq", 367 "uniqExact", 368 "uniqCombined", 369 "uniqCombined64", 370 "uniqHLL12", 371 "uniqTheta", 372 "quantile", 373 "quantiles", 374 "quantileExact", 375 "quantilesExact", 376 "quantileExactLow", 377 "quantilesExactLow", 378 "quantileExactHigh", 379 "quantilesExactHigh", 380 "quantileExactWeighted", 381 "quantilesExactWeighted", 382 "quantileTiming", 383 "quantilesTiming", 384 "quantileTimingWeighted", 385 "quantilesTimingWeighted", 386 "quantileDeterministic", 387 "quantilesDeterministic", 388 "quantileTDigest", 389 "quantilesTDigest", 390 "quantileTDigestWeighted", 391 "quantilesTDigestWeighted", 392 "quantileBFloat16", 393 "quantilesBFloat16", 394 "quantileBFloat16Weighted", 395 "quantilesBFloat16Weighted", 396 "simpleLinearRegression", 397 "stochasticLinearRegression", 398 "stochasticLogisticRegression", 399 "categoricalInformationValue", 400 "contingency", 401 "cramersV", 402 "cramersVBiasCorrected", 403 "theilsU", 404 "maxIntersections", 405 "maxIntersectionsPosition", 406 "meanZTest", 407 "quantileInterpolatedWeighted", 408 "quantilesInterpolatedWeighted", 409 "quantileGK", 410 "quantilesGK", 411 "sparkBar", 412 "sumCount", 413 "largestTriangleThreeBuckets", 414 "histogram", 415 "sequenceMatch", 416 "sequenceCount", 417 "windowFunnel", 418 "retention", 419 "uniqUpTo", 420 "sequenceNextNode", 421 "exponentialTimeDecayedAvg", 422 } 423 424 AGG_FUNCTIONS_SUFFIXES = [ 425 "If", 426 "Array", 427 "ArrayIf", 428 "Map", 429 "SimpleState", 430 "State", 431 "Merge", 432 "MergeState", 433 "ForEach", 434 "Distinct", 435 "OrDefault", 436 "OrNull", 437 "Resample", 438 "ArgMin", 439 "ArgMax", 440 ] 441 442 FUNC_TOKENS = { 443 *parser.Parser.FUNC_TOKENS, 444 TokenType.AND, 445 TokenType.OR, 446 TokenType.SET, 447 } 448 449 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 450 451 ID_VAR_TOKENS = { 452 *parser.Parser.ID_VAR_TOKENS, 453 TokenType.LIKE, 454 } 455 456 AGG_FUNC_MAPPING = ( 457 lambda functions, suffixes: { 458 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 459 } 460 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 461 462 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 463 464 FUNCTION_PARSERS = { 465 **parser.Parser.FUNCTION_PARSERS, 466 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 467 "QUANTILE": lambda self: self._parse_quantile(), 468 "MEDIAN": lambda self: self._parse_quantile(), 469 "COLUMNS": lambda self: self._parse_columns(), 470 } 471 472 FUNCTION_PARSERS.pop("MATCH") 473 474 PROPERTY_PARSERS = parser.Parser.PROPERTY_PARSERS.copy() 475 PROPERTY_PARSERS.pop("DYNAMIC") 476 477 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 478 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 479 480 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 481 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 482 483 RANGE_PARSERS = { 484 **parser.Parser.RANGE_PARSERS, 485 TokenType.GLOBAL: lambda self, this: self._parse_global_in(this), 486 } 487 488 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 489 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 490 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 491 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 492 493 JOIN_KINDS = { 494 *parser.Parser.JOIN_KINDS, 495 TokenType.ANY, 496 TokenType.ASOF, 497 TokenType.ARRAY, 498 } 499 500 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 501 TokenType.ANY, 502 TokenType.ARRAY, 503 TokenType.FINAL, 504 TokenType.FORMAT, 505 TokenType.SETTINGS, 506 } 507 508 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 509 TokenType.FORMAT, 510 } 511 512 LOG_DEFAULTS_TO_LN = True 513 514 QUERY_MODIFIER_PARSERS = { 515 **parser.Parser.QUERY_MODIFIER_PARSERS, 516 TokenType.SETTINGS: lambda self: ( 517 "settings", 518 self._advance() or self._parse_csv(self._parse_assignment), 519 ), 520 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 521 } 522 523 CONSTRAINT_PARSERS = { 524 **parser.Parser.CONSTRAINT_PARSERS, 525 "INDEX": lambda self: self._parse_index_constraint(), 526 "CODEC": lambda self: self._parse_compress(), 527 } 528 529 ALTER_PARSERS = { 530 **parser.Parser.ALTER_PARSERS, 531 "REPLACE": lambda self: self._parse_alter_table_replace(), 532 } 533 534 SCHEMA_UNNAMED_CONSTRAINTS = { 535 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 536 "INDEX", 537 } 538 539 PLACEHOLDER_PARSERS = { 540 **parser.Parser.PLACEHOLDER_PARSERS, 541 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 542 } 543 544 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 545 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 546 return self._parse_lambda() 547 548 def _parse_types( 549 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 550 ) -> t.Optional[exp.Expression]: 551 dtype = super()._parse_types( 552 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 553 ) 554 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 555 # Mark every type as non-nullable which is ClickHouse's default, unless it's 556 # already marked as nullable. This marker helps us transpile types from other 557 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 558 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 559 # fail in ClickHouse without the `Nullable` type constructor. 560 dtype.set("nullable", False) 561 562 return dtype 563 564 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 565 index = self._index 566 this = self._parse_bitwise() 567 if self._match(TokenType.FROM): 568 self._retreat(index) 569 return super()._parse_extract() 570 571 # We return Anonymous here because extract and regexpExtract have different semantics, 572 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 573 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 574 # 575 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 576 self._match(TokenType.COMMA) 577 return self.expression( 578 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 579 ) 580 581 def _parse_assignment(self) -> t.Optional[exp.Expression]: 582 this = super()._parse_assignment() 583 584 if self._match(TokenType.PLACEHOLDER): 585 return self.expression( 586 exp.If, 587 this=this, 588 true=self._parse_assignment(), 589 false=self._match(TokenType.COLON) and self._parse_assignment(), 590 ) 591 592 return this 593 594 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 595 """ 596 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 597 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 598 """ 599 index = self._index 600 601 this = self._parse_id_var() 602 self._match(TokenType.COLON) 603 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 604 self._match_text_seq("IDENTIFIER") and "Identifier" 605 ) 606 607 if not kind: 608 self._retreat(index) 609 return None 610 elif not self._match(TokenType.R_BRACE): 611 self.raise_error("Expecting }") 612 613 if isinstance(this, exp.Identifier) and not this.quoted: 614 this = exp.var(this.name) 615 616 return self.expression(exp.Placeholder, this=this, kind=kind) 617 618 def _parse_bracket( 619 self, this: t.Optional[exp.Expression] = None 620 ) -> t.Optional[exp.Expression]: 621 l_brace = self._match(TokenType.L_BRACE, advance=False) 622 bracket = super()._parse_bracket(this) 623 624 if l_brace and isinstance(bracket, exp.Struct): 625 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 626 for expression in bracket.expressions: 627 if not isinstance(expression, exp.PropertyEQ): 628 break 629 630 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 631 varmap.args["values"].append("expressions", expression.expression) 632 633 return varmap 634 635 return bracket 636 637 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 638 this = super()._parse_in(this) 639 this.set("is_global", is_global) 640 return this 641 642 def _parse_global_in(self, this: t.Optional[exp.Expression]) -> exp.Not | exp.In: 643 is_negated = self._match(TokenType.NOT) 644 this = self._match(TokenType.IN) and self._parse_in(this, is_global=True) 645 return self.expression(exp.Not, this=this) if is_negated else this 646 647 def _parse_table( 648 self, 649 schema: bool = False, 650 joins: bool = False, 651 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 652 parse_bracket: bool = False, 653 is_db_reference: bool = False, 654 parse_partition: bool = False, 655 ) -> t.Optional[exp.Expression]: 656 this = super()._parse_table( 657 schema=schema, 658 joins=joins, 659 alias_tokens=alias_tokens, 660 parse_bracket=parse_bracket, 661 is_db_reference=is_db_reference, 662 ) 663 664 if isinstance(this, exp.Table): 665 inner = this.this 666 alias = this.args.get("alias") 667 668 if isinstance(inner, exp.GenerateSeries) and alias and not alias.columns: 669 alias.set("columns", [exp.to_identifier("generate_series")]) 670 671 if self._match(TokenType.FINAL): 672 this = self.expression(exp.Final, this=this) 673 674 return this 675 676 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 677 return super()._parse_position(haystack_first=True) 678 679 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 680 def _parse_cte(self) -> t.Optional[exp.CTE]: 681 # WITH <identifier> AS <subquery expression> 682 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 683 684 if not cte: 685 # WITH <expression> AS <identifier> 686 cte = self.expression( 687 exp.CTE, 688 this=self._parse_assignment(), 689 alias=self._parse_table_alias(), 690 scalar=True, 691 ) 692 693 return cte 694 695 def _parse_join_parts( 696 self, 697 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 698 is_global = self._match(TokenType.GLOBAL) and self._prev 699 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 700 701 if kind_pre: 702 kind = self._match_set(self.JOIN_KINDS) and self._prev 703 side = self._match_set(self.JOIN_SIDES) and self._prev 704 return is_global, side, kind 705 706 return ( 707 is_global, 708 self._match_set(self.JOIN_SIDES) and self._prev, 709 self._match_set(self.JOIN_KINDS) and self._prev, 710 ) 711 712 def _parse_join( 713 self, skip_join_token: bool = False, parse_bracket: bool = False 714 ) -> t.Optional[exp.Join]: 715 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 716 if join: 717 join.set("global", join.args.pop("method", None)) 718 719 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 720 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 721 if join.kind == "ARRAY": 722 for table in join.find_all(exp.Table): 723 table.replace(table.to_column()) 724 725 return join 726 727 def _parse_function( 728 self, 729 functions: t.Optional[t.Dict[str, t.Callable]] = None, 730 anonymous: bool = False, 731 optional_parens: bool = True, 732 any_token: bool = False, 733 ) -> t.Optional[exp.Expression]: 734 expr = super()._parse_function( 735 functions=functions, 736 anonymous=anonymous, 737 optional_parens=optional_parens, 738 any_token=any_token, 739 ) 740 741 func = expr.this if isinstance(expr, exp.Window) else expr 742 743 # Aggregate functions can be split in 2 parts: <func_name><suffix> 744 parts = ( 745 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 746 ) 747 748 if parts: 749 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 750 params = self._parse_func_params(anon_func) 751 752 kwargs = { 753 "this": anon_func.this, 754 "expressions": anon_func.expressions, 755 } 756 if parts[1]: 757 exp_class: t.Type[exp.Expression] = ( 758 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 759 ) 760 else: 761 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 762 763 kwargs["exp_class"] = exp_class 764 if params: 765 kwargs["params"] = params 766 767 func = self.expression(**kwargs) 768 769 if isinstance(expr, exp.Window): 770 # The window's func was parsed as Anonymous in base parser, fix its 771 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 772 expr.set("this", func) 773 elif params: 774 # Params have blocked super()._parse_function() from parsing the following window 775 # (if that exists) as they're standing between the function call and the window spec 776 expr = self._parse_window(func) 777 else: 778 expr = func 779 780 return expr 781 782 def _parse_func_params( 783 self, this: t.Optional[exp.Func] = None 784 ) -> t.Optional[t.List[exp.Expression]]: 785 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 786 return self._parse_csv(self._parse_lambda) 787 788 if self._match(TokenType.L_PAREN): 789 params = self._parse_csv(self._parse_lambda) 790 self._match_r_paren(this) 791 return params 792 793 return None 794 795 def _parse_quantile(self) -> exp.Quantile: 796 this = self._parse_lambda() 797 params = self._parse_func_params() 798 if params: 799 return self.expression(exp.Quantile, this=params[0], quantile=this) 800 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 801 802 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 803 return super()._parse_wrapped_id_vars(optional=True) 804 805 def _parse_primary_key( 806 self, wrapped_optional: bool = False, in_props: bool = False 807 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 808 return super()._parse_primary_key( 809 wrapped_optional=wrapped_optional or in_props, in_props=in_props 810 ) 811 812 def _parse_on_property(self) -> t.Optional[exp.Expression]: 813 index = self._index 814 if self._match_text_seq("CLUSTER"): 815 this = self._parse_id_var() 816 if this: 817 return self.expression(exp.OnCluster, this=this) 818 else: 819 self._retreat(index) 820 return None 821 822 def _parse_index_constraint( 823 self, kind: t.Optional[str] = None 824 ) -> exp.IndexColumnConstraint: 825 # INDEX name1 expr TYPE type1(args) GRANULARITY value 826 this = self._parse_id_var() 827 expression = self._parse_assignment() 828 829 index_type = self._match_text_seq("TYPE") and ( 830 self._parse_function() or self._parse_var() 831 ) 832 833 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 834 835 return self.expression( 836 exp.IndexColumnConstraint, 837 this=this, 838 expression=expression, 839 index_type=index_type, 840 granularity=granularity, 841 ) 842 843 def _parse_partition(self) -> t.Optional[exp.Partition]: 844 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 845 if not self._match(TokenType.PARTITION): 846 return None 847 848 if self._match_text_seq("ID"): 849 # Corresponds to the PARTITION ID <string_value> syntax 850 expressions: t.List[exp.Expression] = [ 851 self.expression(exp.PartitionId, this=self._parse_string()) 852 ] 853 else: 854 expressions = self._parse_expressions() 855 856 return self.expression(exp.Partition, expressions=expressions) 857 858 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 859 partition = self._parse_partition() 860 861 if not partition or not self._match(TokenType.FROM): 862 return None 863 864 return self.expression( 865 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 866 ) 867 868 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 869 if not self._match_text_seq("PROJECTION"): 870 return None 871 872 return self.expression( 873 exp.ProjectionDef, 874 this=self._parse_id_var(), 875 expression=self._parse_wrapped(self._parse_statement), 876 ) 877 878 def _parse_constraint(self) -> t.Optional[exp.Expression]: 879 return super()._parse_constraint() or self._parse_projection_def() 880 881 def _parse_alias( 882 self, this: t.Optional[exp.Expression], explicit: bool = False 883 ) -> t.Optional[exp.Expression]: 884 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 885 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 886 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 887 return this 888 889 return super()._parse_alias(this=this, explicit=explicit) 890 891 def _parse_expression(self) -> t.Optional[exp.Expression]: 892 this = super()._parse_expression() 893 894 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 895 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 896 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 897 self._match(TokenType.R_PAREN) 898 899 return this 900 901 def _parse_columns(self) -> exp.Expression: 902 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 903 904 while self._next and self._match_text_seq(")", "APPLY", "("): 905 self._match(TokenType.R_PAREN) 906 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 907 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- ALTER_ALTER_PARSERS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
909 class Generator(generator.Generator): 910 QUERY_HINTS = False 911 STRUCT_DELIMITER = ("(", ")") 912 NVL2_SUPPORTED = False 913 TABLESAMPLE_REQUIRES_PARENS = False 914 TABLESAMPLE_SIZE_IS_ROWS = False 915 TABLESAMPLE_KEYWORDS = "SAMPLE" 916 LAST_DAY_SUPPORTS_DATE_PART = False 917 CAN_IMPLEMENT_ARRAY_ANY = True 918 SUPPORTS_TO_NUMBER = False 919 JOIN_HINTS = False 920 TABLE_HINTS = False 921 GROUPINGS_SEP = "" 922 SET_OP_MODIFIERS = False 923 VALUES_AS_TABLE = False 924 ARRAY_SIZE_NAME = "LENGTH" 925 926 STRING_TYPE_MAPPING = { 927 exp.DataType.Type.BLOB: "String", 928 exp.DataType.Type.CHAR: "String", 929 exp.DataType.Type.LONGBLOB: "String", 930 exp.DataType.Type.LONGTEXT: "String", 931 exp.DataType.Type.MEDIUMBLOB: "String", 932 exp.DataType.Type.MEDIUMTEXT: "String", 933 exp.DataType.Type.TINYBLOB: "String", 934 exp.DataType.Type.TINYTEXT: "String", 935 exp.DataType.Type.TEXT: "String", 936 exp.DataType.Type.VARBINARY: "String", 937 exp.DataType.Type.VARCHAR: "String", 938 } 939 940 SUPPORTED_JSON_PATH_PARTS = { 941 exp.JSONPathKey, 942 exp.JSONPathRoot, 943 exp.JSONPathSubscript, 944 } 945 946 TYPE_MAPPING = { 947 **generator.Generator.TYPE_MAPPING, 948 **STRING_TYPE_MAPPING, 949 exp.DataType.Type.ARRAY: "Array", 950 exp.DataType.Type.BOOLEAN: "Bool", 951 exp.DataType.Type.BIGINT: "Int64", 952 exp.DataType.Type.DATE32: "Date32", 953 exp.DataType.Type.DATETIME: "DateTime", 954 exp.DataType.Type.DATETIME2: "DateTime", 955 exp.DataType.Type.SMALLDATETIME: "DateTime", 956 exp.DataType.Type.DATETIME64: "DateTime64", 957 exp.DataType.Type.DECIMAL: "Decimal", 958 exp.DataType.Type.DECIMAL32: "Decimal32", 959 exp.DataType.Type.DECIMAL64: "Decimal64", 960 exp.DataType.Type.DECIMAL128: "Decimal128", 961 exp.DataType.Type.DECIMAL256: "Decimal256", 962 exp.DataType.Type.TIMESTAMP: "DateTime", 963 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 964 exp.DataType.Type.DOUBLE: "Float64", 965 exp.DataType.Type.ENUM: "Enum", 966 exp.DataType.Type.ENUM8: "Enum8", 967 exp.DataType.Type.ENUM16: "Enum16", 968 exp.DataType.Type.FIXEDSTRING: "FixedString", 969 exp.DataType.Type.FLOAT: "Float32", 970 exp.DataType.Type.INT: "Int32", 971 exp.DataType.Type.MEDIUMINT: "Int32", 972 exp.DataType.Type.INT128: "Int128", 973 exp.DataType.Type.INT256: "Int256", 974 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 975 exp.DataType.Type.MAP: "Map", 976 exp.DataType.Type.NESTED: "Nested", 977 exp.DataType.Type.SMALLINT: "Int16", 978 exp.DataType.Type.STRUCT: "Tuple", 979 exp.DataType.Type.TINYINT: "Int8", 980 exp.DataType.Type.UBIGINT: "UInt64", 981 exp.DataType.Type.UINT: "UInt32", 982 exp.DataType.Type.UINT128: "UInt128", 983 exp.DataType.Type.UINT256: "UInt256", 984 exp.DataType.Type.USMALLINT: "UInt16", 985 exp.DataType.Type.UTINYINT: "UInt8", 986 exp.DataType.Type.IPV4: "IPv4", 987 exp.DataType.Type.IPV6: "IPv6", 988 exp.DataType.Type.POINT: "Point", 989 exp.DataType.Type.RING: "Ring", 990 exp.DataType.Type.LINESTRING: "LineString", 991 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 992 exp.DataType.Type.POLYGON: "Polygon", 993 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 994 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 995 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 996 exp.DataType.Type.DYNAMIC: "Dynamic", 997 } 998 999 TRANSFORMS = { 1000 **generator.Generator.TRANSFORMS, 1001 exp.AnyValue: rename_func("any"), 1002 exp.ApproxDistinct: rename_func("uniq"), 1003 exp.ArrayConcat: rename_func("arrayConcat"), 1004 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 1005 exp.ArraySum: rename_func("arraySum"), 1006 exp.ArgMax: arg_max_or_min_no_count("argMax"), 1007 exp.ArgMin: arg_max_or_min_no_count("argMin"), 1008 exp.Array: inline_array_sql, 1009 exp.CastToStrType: rename_func("CAST"), 1010 exp.CountIf: rename_func("countIf"), 1011 exp.CompressColumnConstraint: lambda self, 1012 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 1013 exp.ComputedColumnConstraint: lambda self, 1014 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 1015 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 1016 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 1017 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 1018 exp.DateStrToDate: rename_func("toDate"), 1019 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 1020 exp.Explode: rename_func("arrayJoin"), 1021 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 1022 exp.IsNan: rename_func("isNaN"), 1023 exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}", 1024 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 1025 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 1026 exp.JSONPathKey: json_path_key_only_name, 1027 exp.JSONPathRoot: lambda *_: "", 1028 exp.Length: length_or_char_length_sql, 1029 exp.Map: _map_sql, 1030 exp.Median: rename_func("median"), 1031 exp.Nullif: rename_func("nullIf"), 1032 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1033 exp.Pivot: no_pivot_sql, 1034 exp.Quantile: _quantile_sql, 1035 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 1036 exp.Rand: rename_func("randCanonical"), 1037 exp.StartsWith: rename_func("startsWith"), 1038 exp.StrPosition: lambda self, e: strposition_sql( 1039 self, 1040 e, 1041 func_name="POSITION", 1042 supports_position=True, 1043 use_ansi_position=False, 1044 ), 1045 exp.TimeToStr: lambda self, e: self.func( 1046 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 1047 ), 1048 exp.TimeStrToTime: _timestrtotime_sql, 1049 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1050 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1051 exp.VarMap: _map_sql, 1052 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1053 exp.MD5Digest: rename_func("MD5"), 1054 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1055 exp.SHA: rename_func("SHA1"), 1056 exp.SHA2: sha256_sql, 1057 exp.UnixToTime: _unix_to_time_sql, 1058 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 1059 exp.Trim: lambda self, e: trim_sql(self, e, default_trim_type="BOTH"), 1060 exp.Variance: rename_func("varSamp"), 1061 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1062 exp.Stddev: rename_func("stddevSamp"), 1063 exp.Chr: rename_func("CHAR"), 1064 exp.Lag: lambda self, e: self.func( 1065 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1066 ), 1067 exp.Lead: lambda self, e: self.func( 1068 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1069 ), 1070 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1071 rename_func("editDistance") 1072 ), 1073 } 1074 1075 PROPERTIES_LOCATION = { 1076 **generator.Generator.PROPERTIES_LOCATION, 1077 exp.OnCluster: exp.Properties.Location.POST_NAME, 1078 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1079 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1080 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1081 } 1082 1083 # There's no list in docs, but it can be found in Clickhouse code 1084 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1085 ON_CLUSTER_TARGETS = { 1086 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1087 "DATABASE", 1088 "TABLE", 1089 "VIEW", 1090 "DICTIONARY", 1091 "INDEX", 1092 "FUNCTION", 1093 "NAMED COLLECTION", 1094 } 1095 1096 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1097 NON_NULLABLE_TYPES = { 1098 exp.DataType.Type.ARRAY, 1099 exp.DataType.Type.MAP, 1100 exp.DataType.Type.STRUCT, 1101 exp.DataType.Type.POINT, 1102 exp.DataType.Type.RING, 1103 exp.DataType.Type.LINESTRING, 1104 exp.DataType.Type.MULTILINESTRING, 1105 exp.DataType.Type.POLYGON, 1106 exp.DataType.Type.MULTIPOLYGON, 1107 } 1108 1109 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1110 strtodate_sql = self.function_fallback_sql(expression) 1111 1112 if not isinstance(expression.parent, exp.Cast): 1113 # StrToDate returns DATEs in other dialects (eg. postgres), so 1114 # this branch aims to improve the transpilation to clickhouse 1115 return self.cast_sql(exp.cast(expression, "DATE")) 1116 1117 return strtodate_sql 1118 1119 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1120 this = expression.this 1121 1122 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1123 return self.sql(this) 1124 1125 return super().cast_sql(expression, safe_prefix=safe_prefix) 1126 1127 def trycast_sql(self, expression: exp.TryCast) -> str: 1128 dtype = expression.to 1129 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1130 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1131 dtype.set("nullable", True) 1132 1133 return super().cast_sql(expression) 1134 1135 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1136 this = self.json_path_part(expression.this) 1137 return str(int(this) + 1) if is_int(this) else this 1138 1139 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1140 return f"AS {self.sql(expression, 'this')}" 1141 1142 def _any_to_has( 1143 self, 1144 expression: exp.EQ | exp.NEQ, 1145 default: t.Callable[[t.Any], str], 1146 prefix: str = "", 1147 ) -> str: 1148 if isinstance(expression.left, exp.Any): 1149 arr = expression.left 1150 this = expression.right 1151 elif isinstance(expression.right, exp.Any): 1152 arr = expression.right 1153 this = expression.left 1154 else: 1155 return default(expression) 1156 1157 return prefix + self.func("has", arr.this.unnest(), this) 1158 1159 def eq_sql(self, expression: exp.EQ) -> str: 1160 return self._any_to_has(expression, super().eq_sql) 1161 1162 def neq_sql(self, expression: exp.NEQ) -> str: 1163 return self._any_to_has(expression, super().neq_sql, "NOT ") 1164 1165 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1166 # Manually add a flag to make the search case-insensitive 1167 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1168 return self.func("match", expression.this, regex) 1169 1170 def datatype_sql(self, expression: exp.DataType) -> str: 1171 # String is the standard ClickHouse type, every other variant is just an alias. 1172 # Additionally, any supplied length parameter will be ignored. 1173 # 1174 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1175 if expression.this in self.STRING_TYPE_MAPPING: 1176 dtype = "String" 1177 else: 1178 dtype = super().datatype_sql(expression) 1179 1180 # This section changes the type to `Nullable(...)` if the following conditions hold: 1181 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1182 # and change their semantics 1183 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1184 # constraint: "Type of Map key must be a type, that can be represented by integer or 1185 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1186 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1187 parent = expression.parent 1188 nullable = expression.args.get("nullable") 1189 if nullable is True or ( 1190 nullable is None 1191 and not ( 1192 isinstance(parent, exp.DataType) 1193 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1194 and expression.index in (None, 0) 1195 ) 1196 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1197 ): 1198 dtype = f"Nullable({dtype})" 1199 1200 return dtype 1201 1202 def cte_sql(self, expression: exp.CTE) -> str: 1203 if expression.args.get("scalar"): 1204 this = self.sql(expression, "this") 1205 alias = self.sql(expression, "alias") 1206 return f"{this} AS {alias}" 1207 1208 return super().cte_sql(expression) 1209 1210 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1211 return super().after_limit_modifiers(expression) + [ 1212 ( 1213 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1214 if expression.args.get("settings") 1215 else "" 1216 ), 1217 ( 1218 self.seg("FORMAT ") + self.sql(expression, "format") 1219 if expression.args.get("format") 1220 else "" 1221 ), 1222 ] 1223 1224 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1225 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1226 1227 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1228 return f"ON CLUSTER {self.sql(expression, 'this')}" 1229 1230 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1231 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1232 exp.Properties.Location.POST_NAME 1233 ): 1234 this_name = self.sql( 1235 expression.this if isinstance(expression.this, exp.Schema) else expression, 1236 "this", 1237 ) 1238 this_properties = " ".join( 1239 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1240 ) 1241 this_schema = self.schema_columns_sql(expression.this) 1242 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1243 1244 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1245 1246 return super().createable_sql(expression, locations) 1247 1248 def create_sql(self, expression: exp.Create) -> str: 1249 # The comment property comes last in CTAS statements, i.e. after the query 1250 query = expression.expression 1251 if isinstance(query, exp.Query): 1252 comment_prop = expression.find(exp.SchemaCommentProperty) 1253 if comment_prop: 1254 comment_prop.pop() 1255 query.replace(exp.paren(query)) 1256 else: 1257 comment_prop = None 1258 1259 create_sql = super().create_sql(expression) 1260 1261 comment_sql = self.sql(comment_prop) 1262 comment_sql = f" {comment_sql}" if comment_sql else "" 1263 1264 return f"{create_sql}{comment_sql}" 1265 1266 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1267 this = self.indent(self.sql(expression, "this")) 1268 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1269 1270 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1271 this = self.sql(expression, "this") 1272 this = f" {this}" if this else "" 1273 expr = self.sql(expression, "expression") 1274 expr = f" {expr}" if expr else "" 1275 index_type = self.sql(expression, "index_type") 1276 index_type = f" TYPE {index_type}" if index_type else "" 1277 granularity = self.sql(expression, "granularity") 1278 granularity = f" GRANULARITY {granularity}" if granularity else "" 1279 1280 return f"INDEX{this}{expr}{index_type}{granularity}" 1281 1282 def partition_sql(self, expression: exp.Partition) -> str: 1283 return f"PARTITION {self.expressions(expression, flat=True)}" 1284 1285 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1286 return f"ID {self.sql(expression.this)}" 1287 1288 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1289 return ( 1290 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1291 ) 1292 1293 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1294 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1295 1296 def is_sql(self, expression: exp.Is) -> str: 1297 is_sql = super().is_sql(expression) 1298 1299 if isinstance(expression.parent, exp.Not): 1300 # value IS NOT NULL -> NOT (value IS NULL) 1301 is_sql = self.wrap(is_sql) 1302 1303 return is_sql 1304 1305 def in_sql(self, expression: exp.In) -> str: 1306 in_sql = super().in_sql(expression) 1307 1308 if isinstance(expression.parent, exp.Not) and expression.args.get("is_global"): 1309 in_sql = in_sql.replace("GLOBAL IN", "GLOBAL NOT IN", 1) 1310 1311 return in_sql 1312 1313 def not_sql(self, expression: exp.Not) -> str: 1314 if isinstance(expression.this, exp.In) and expression.this.args.get("is_global"): 1315 # let `GLOBAL IN` child interpose `NOT` 1316 return self.sql(expression, "this") 1317 1318 return super().not_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1109 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1110 strtodate_sql = self.function_fallback_sql(expression) 1111 1112 if not isinstance(expression.parent, exp.Cast): 1113 # StrToDate returns DATEs in other dialects (eg. postgres), so 1114 # this branch aims to improve the transpilation to clickhouse 1115 return self.cast_sql(exp.cast(expression, "DATE")) 1116 1117 return strtodate_sql
1119 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1120 this = expression.this 1121 1122 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1123 return self.sql(this) 1124 1125 return super().cast_sql(expression, safe_prefix=safe_prefix)
1127 def trycast_sql(self, expression: exp.TryCast) -> str: 1128 dtype = expression.to 1129 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1130 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1131 dtype.set("nullable", True) 1132 1133 return super().cast_sql(expression)
1170 def datatype_sql(self, expression: exp.DataType) -> str: 1171 # String is the standard ClickHouse type, every other variant is just an alias. 1172 # Additionally, any supplied length parameter will be ignored. 1173 # 1174 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1175 if expression.this in self.STRING_TYPE_MAPPING: 1176 dtype = "String" 1177 else: 1178 dtype = super().datatype_sql(expression) 1179 1180 # This section changes the type to `Nullable(...)` if the following conditions hold: 1181 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1182 # and change their semantics 1183 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1184 # constraint: "Type of Map key must be a type, that can be represented by integer or 1185 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1186 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1187 parent = expression.parent 1188 nullable = expression.args.get("nullable") 1189 if nullable is True or ( 1190 nullable is None 1191 and not ( 1192 isinstance(parent, exp.DataType) 1193 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1194 and expression.index in (None, 0) 1195 ) 1196 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1197 ): 1198 dtype = f"Nullable({dtype})" 1199 1200 return dtype
1210 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1211 return super().after_limit_modifiers(expression) + [ 1212 ( 1213 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1214 if expression.args.get("settings") 1215 else "" 1216 ), 1217 ( 1218 self.seg("FORMAT ") + self.sql(expression, "format") 1219 if expression.args.get("format") 1220 else "" 1221 ), 1222 ]
1230 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1231 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1232 exp.Properties.Location.POST_NAME 1233 ): 1234 this_name = self.sql( 1235 expression.this if isinstance(expression.this, exp.Schema) else expression, 1236 "this", 1237 ) 1238 this_properties = " ".join( 1239 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1240 ) 1241 this_schema = self.schema_columns_sql(expression.this) 1242 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1243 1244 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1245 1246 return super().createable_sql(expression, locations)
1248 def create_sql(self, expression: exp.Create) -> str: 1249 # The comment property comes last in CTAS statements, i.e. after the query 1250 query = expression.expression 1251 if isinstance(query, exp.Query): 1252 comment_prop = expression.find(exp.SchemaCommentProperty) 1253 if comment_prop: 1254 comment_prop.pop() 1255 query.replace(exp.paren(query)) 1256 else: 1257 comment_prop = None 1258 1259 create_sql = super().create_sql(expression) 1260 1261 comment_sql = self.sql(comment_prop) 1262 comment_sql = f" {comment_sql}" if comment_sql else "" 1263 1264 return f"{create_sql}{comment_sql}"
1270 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1271 this = self.sql(expression, "this") 1272 this = f" {this}" if this else "" 1273 expr = self.sql(expression, "expression") 1274 expr = f" {expr}" if expr else "" 1275 index_type = self.sql(expression, "index_type") 1276 index_type = f" TYPE {index_type}" if index_type else "" 1277 granularity = self.sql(expression, "granularity") 1278 granularity = f" GRANULARITY {granularity}" if granularity else "" 1279 1280 return f"INDEX{this}{expr}{index_type}{granularity}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- SUPPORTS_UNIX_SECONDS
- PARSE_JSON_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- put_sql