sqlglot.dialects.clickhouse
1from __future__ import annotations 2import typing as t 3import datetime 4from sqlglot import exp, generator, parser, tokens 5from sqlglot.dialects.dialect import ( 6 Dialect, 7 NormalizationStrategy, 8 arg_max_or_min_no_count, 9 build_date_delta, 10 build_formatted_time, 11 inline_array_sql, 12 json_extract_segments, 13 json_path_key_only_name, 14 length_or_char_length_sql, 15 no_pivot_sql, 16 build_json_extract_path, 17 rename_func, 18 sha256_sql, 19 var_map_sql, 20 timestamptrunc_sql, 21 unit_to_var, 22 trim_sql, 23) 24from sqlglot.generator import Generator 25from sqlglot.helper import is_int, seq_get 26from sqlglot.tokens import Token, TokenType 27from sqlglot.generator import unsupported_args 28 29DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd] 30 31 32def _build_date_format(args: t.List) -> exp.TimeToStr: 33 expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args) 34 35 timezone = seq_get(args, 2) 36 if timezone: 37 expr.set("zone", timezone) 38 39 return expr 40 41 42def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str: 43 scale = expression.args.get("scale") 44 timestamp = expression.this 45 46 if scale in (None, exp.UnixToTime.SECONDS): 47 return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 48 if scale == exp.UnixToTime.MILLIS: 49 return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 50 if scale == exp.UnixToTime.MICROS: 51 return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 52 if scale == exp.UnixToTime.NANOS: 53 return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 54 55 return self.func( 56 "fromUnixTimestamp", 57 exp.cast( 58 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 59 ), 60 ) 61 62 63def _lower_func(sql: str) -> str: 64 index = sql.index("(") 65 return sql[:index].lower() + sql[index:] 66 67 68def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str: 69 quantile = expression.args["quantile"] 70 args = f"({self.sql(expression, 'this')})" 71 72 if isinstance(quantile, exp.Array): 73 func = self.func("quantiles", *quantile) 74 else: 75 func = self.func("quantile", quantile) 76 77 return func + args 78 79 80def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: 81 if len(args) == 1: 82 return exp.CountIf(this=seq_get(args, 0)) 83 84 return exp.CombinedAggFunc(this="countIf", expressions=args, parts=("count", "If")) 85 86 87def _build_str_to_date(args: t.List) -> exp.Cast | exp.Anonymous: 88 if len(args) == 3: 89 return exp.Anonymous(this="STR_TO_DATE", expressions=args) 90 91 strtodate = exp.StrToDate.from_arg_list(args) 92 return exp.cast(strtodate, exp.DataType.build(exp.DataType.Type.DATETIME)) 93 94 95def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]: 96 def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str: 97 if not expression.unit: 98 return rename_func(name)(self, expression) 99 100 return self.func( 101 name, 102 unit_to_var(expression), 103 expression.expression, 104 expression.this, 105 ) 106 107 return _delta_sql 108 109 110def _timestrtotime_sql(self: ClickHouse.Generator, expression: exp.TimeStrToTime): 111 ts = expression.this 112 113 tz = expression.args.get("zone") 114 if tz and isinstance(ts, exp.Literal): 115 # Clickhouse will not accept timestamps that include a UTC offset, so we must remove them. 116 # The first step to removing is parsing the string with `datetime.datetime.fromisoformat`. 117 # 118 # In python <3.11, `fromisoformat()` can only parse timestamps of millisecond (3 digit) 119 # or microsecond (6 digit) precision. It will error if passed any other number of fractional 120 # digits, so we extract the fractional seconds and pad to 6 digits before parsing. 121 ts_string = ts.name.strip() 122 123 # separate [date and time] from [fractional seconds and UTC offset] 124 ts_parts = ts_string.split(".") 125 if len(ts_parts) == 2: 126 # separate fractional seconds and UTC offset 127 offset_sep = "+" if "+" in ts_parts[1] else "-" 128 ts_frac_parts = ts_parts[1].split(offset_sep) 129 num_frac_parts = len(ts_frac_parts) 130 131 # pad to 6 digits if fractional seconds present 132 ts_frac_parts[0] = ts_frac_parts[0].ljust(6, "0") 133 ts_string = "".join( 134 [ 135 ts_parts[0], # date and time 136 ".", 137 ts_frac_parts[0], # fractional seconds 138 offset_sep if num_frac_parts > 1 else "", 139 ts_frac_parts[1] if num_frac_parts > 1 else "", # utc offset (if present) 140 ] 141 ) 142 143 # return literal with no timezone, eg turn '2020-01-01 12:13:14-08:00' into '2020-01-01 12:13:14' 144 # this is because Clickhouse encodes the timezone as a data type parameter and throws an error if 145 # it's part of the timestamp string 146 ts_without_tz = ( 147 datetime.datetime.fromisoformat(ts_string).replace(tzinfo=None).isoformat(sep=" ") 148 ) 149 ts = exp.Literal.string(ts_without_tz) 150 151 # Non-nullable DateTime64 with microsecond precision 152 expressions = [exp.DataTypeParam(this=tz)] if tz else [] 153 datatype = exp.DataType.build( 154 exp.DataType.Type.DATETIME64, 155 expressions=[exp.DataTypeParam(this=exp.Literal.number(6)), *expressions], 156 nullable=False, 157 ) 158 159 return self.sql(exp.cast(ts, datatype, dialect=self.dialect)) 160 161 162class ClickHouse(Dialect): 163 NORMALIZE_FUNCTIONS: bool | str = False 164 NULL_ORDERING = "nulls_are_last" 165 SUPPORTS_USER_DEFINED_TYPES = False 166 SAFE_DIVISION = True 167 LOG_BASE_FIRST: t.Optional[bool] = None 168 FORCE_EARLY_ALIAS_REF_EXPANSION = True 169 PRESERVE_ORIGINAL_NAMES = True 170 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 171 IDENTIFIERS_CAN_START_WITH_DIGIT = True 172 173 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 174 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 175 176 UNESCAPED_SEQUENCES = { 177 "\\0": "\0", 178 } 179 180 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 181 182 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 183 exp.Except: False, 184 exp.Intersect: False, 185 exp.Union: None, 186 } 187 188 class Tokenizer(tokens.Tokenizer): 189 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 190 IDENTIFIERS = ['"', "`"] 191 IDENTIFIER_ESCAPES = ["\\"] 192 STRING_ESCAPES = ["'", "\\"] 193 BIT_STRINGS = [("0b", "")] 194 HEX_STRINGS = [("0x", ""), ("0X", "")] 195 HEREDOC_STRINGS = ["$"] 196 197 KEYWORDS = { 198 **tokens.Tokenizer.KEYWORDS, 199 "ATTACH": TokenType.COMMAND, 200 "DATE32": TokenType.DATE32, 201 "DATETIME64": TokenType.DATETIME64, 202 "DICTIONARY": TokenType.DICTIONARY, 203 "ENUM8": TokenType.ENUM8, 204 "ENUM16": TokenType.ENUM16, 205 "FINAL": TokenType.FINAL, 206 "FIXEDSTRING": TokenType.FIXEDSTRING, 207 "FLOAT32": TokenType.FLOAT, 208 "FLOAT64": TokenType.DOUBLE, 209 "GLOBAL": TokenType.GLOBAL, 210 "INT256": TokenType.INT256, 211 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 212 "MAP": TokenType.MAP, 213 "NESTED": TokenType.NESTED, 214 "SAMPLE": TokenType.TABLE_SAMPLE, 215 "TUPLE": TokenType.STRUCT, 216 "UINT128": TokenType.UINT128, 217 "UINT16": TokenType.USMALLINT, 218 "UINT256": TokenType.UINT256, 219 "UINT32": TokenType.UINT, 220 "UINT64": TokenType.UBIGINT, 221 "UINT8": TokenType.UTINYINT, 222 "IPV4": TokenType.IPV4, 223 "IPV6": TokenType.IPV6, 224 "POINT": TokenType.POINT, 225 "RING": TokenType.RING, 226 "LINESTRING": TokenType.LINESTRING, 227 "MULTILINESTRING": TokenType.MULTILINESTRING, 228 "POLYGON": TokenType.POLYGON, 229 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 230 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 231 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 232 "SYSTEM": TokenType.COMMAND, 233 "PREWHERE": TokenType.PREWHERE, 234 } 235 KEYWORDS.pop("/*+") 236 237 SINGLE_TOKENS = { 238 **tokens.Tokenizer.SINGLE_TOKENS, 239 "$": TokenType.HEREDOC_STRING, 240 } 241 242 class Parser(parser.Parser): 243 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 244 # * select x from t1 union all select x from t2 limit 1; 245 # * select x from t1 union all (select x from t2 limit 1); 246 MODIFIERS_ATTACHED_TO_SET_OP = False 247 INTERVAL_SPANS = False 248 OPTIONAL_ALIAS_TOKEN_CTE = False 249 250 FUNCTIONS = { 251 **parser.Parser.FUNCTIONS, 252 "ANY": exp.AnyValue.from_arg_list, 253 "ARRAYSUM": exp.ArraySum.from_arg_list, 254 "COUNTIF": _build_count_if, 255 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 256 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 257 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 258 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 259 "DATE_FORMAT": _build_date_format, 260 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 261 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 262 "FORMATDATETIME": _build_date_format, 263 "JSONEXTRACTSTRING": build_json_extract_path( 264 exp.JSONExtractScalar, zero_based_indexing=False 265 ), 266 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 267 "MAP": parser.build_var_map, 268 "MATCH": exp.RegexpLike.from_arg_list, 269 "RANDCANONICAL": exp.Rand.from_arg_list, 270 "STR_TO_DATE": _build_str_to_date, 271 "TUPLE": exp.Struct.from_arg_list, 272 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 273 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 274 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 275 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 276 "UNIQ": exp.ApproxDistinct.from_arg_list, 277 "XOR": lambda args: exp.Xor(expressions=args), 278 "MD5": exp.MD5Digest.from_arg_list, 279 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 280 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 281 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 282 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 283 } 284 FUNCTIONS.pop("TRANSFORM") 285 286 AGG_FUNCTIONS = { 287 "count", 288 "min", 289 "max", 290 "sum", 291 "avg", 292 "any", 293 "stddevPop", 294 "stddevSamp", 295 "varPop", 296 "varSamp", 297 "corr", 298 "covarPop", 299 "covarSamp", 300 "entropy", 301 "exponentialMovingAverage", 302 "intervalLengthSum", 303 "kolmogorovSmirnovTest", 304 "mannWhitneyUTest", 305 "median", 306 "rankCorr", 307 "sumKahan", 308 "studentTTest", 309 "welchTTest", 310 "anyHeavy", 311 "anyLast", 312 "boundingRatio", 313 "first_value", 314 "last_value", 315 "argMin", 316 "argMax", 317 "avgWeighted", 318 "topK", 319 "topKWeighted", 320 "deltaSum", 321 "deltaSumTimestamp", 322 "groupArray", 323 "groupArrayLast", 324 "groupUniqArray", 325 "groupArrayInsertAt", 326 "groupArrayMovingAvg", 327 "groupArrayMovingSum", 328 "groupArraySample", 329 "groupBitAnd", 330 "groupBitOr", 331 "groupBitXor", 332 "groupBitmap", 333 "groupBitmapAnd", 334 "groupBitmapOr", 335 "groupBitmapXor", 336 "sumWithOverflow", 337 "sumMap", 338 "minMap", 339 "maxMap", 340 "skewSamp", 341 "skewPop", 342 "kurtSamp", 343 "kurtPop", 344 "uniq", 345 "uniqExact", 346 "uniqCombined", 347 "uniqCombined64", 348 "uniqHLL12", 349 "uniqTheta", 350 "quantile", 351 "quantiles", 352 "quantileExact", 353 "quantilesExact", 354 "quantileExactLow", 355 "quantilesExactLow", 356 "quantileExactHigh", 357 "quantilesExactHigh", 358 "quantileExactWeighted", 359 "quantilesExactWeighted", 360 "quantileTiming", 361 "quantilesTiming", 362 "quantileTimingWeighted", 363 "quantilesTimingWeighted", 364 "quantileDeterministic", 365 "quantilesDeterministic", 366 "quantileTDigest", 367 "quantilesTDigest", 368 "quantileTDigestWeighted", 369 "quantilesTDigestWeighted", 370 "quantileBFloat16", 371 "quantilesBFloat16", 372 "quantileBFloat16Weighted", 373 "quantilesBFloat16Weighted", 374 "simpleLinearRegression", 375 "stochasticLinearRegression", 376 "stochasticLogisticRegression", 377 "categoricalInformationValue", 378 "contingency", 379 "cramersV", 380 "cramersVBiasCorrected", 381 "theilsU", 382 "maxIntersections", 383 "maxIntersectionsPosition", 384 "meanZTest", 385 "quantileInterpolatedWeighted", 386 "quantilesInterpolatedWeighted", 387 "quantileGK", 388 "quantilesGK", 389 "sparkBar", 390 "sumCount", 391 "largestTriangleThreeBuckets", 392 "histogram", 393 "sequenceMatch", 394 "sequenceCount", 395 "windowFunnel", 396 "retention", 397 "uniqUpTo", 398 "sequenceNextNode", 399 "exponentialTimeDecayedAvg", 400 } 401 402 AGG_FUNCTIONS_SUFFIXES = [ 403 "If", 404 "Array", 405 "ArrayIf", 406 "Map", 407 "SimpleState", 408 "State", 409 "Merge", 410 "MergeState", 411 "ForEach", 412 "Distinct", 413 "OrDefault", 414 "OrNull", 415 "Resample", 416 "ArgMin", 417 "ArgMax", 418 ] 419 420 FUNC_TOKENS = { 421 *parser.Parser.FUNC_TOKENS, 422 TokenType.SET, 423 } 424 425 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 426 427 ID_VAR_TOKENS = { 428 *parser.Parser.ID_VAR_TOKENS, 429 TokenType.LIKE, 430 } 431 432 AGG_FUNC_MAPPING = ( 433 lambda functions, suffixes: { 434 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 435 } 436 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 437 438 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 439 440 FUNCTION_PARSERS = { 441 **parser.Parser.FUNCTION_PARSERS, 442 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 443 "QUANTILE": lambda self: self._parse_quantile(), 444 "MEDIAN": lambda self: self._parse_quantile(), 445 "COLUMNS": lambda self: self._parse_columns(), 446 } 447 448 FUNCTION_PARSERS.pop("MATCH") 449 450 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 451 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 452 453 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 454 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 455 456 RANGE_PARSERS = { 457 **parser.Parser.RANGE_PARSERS, 458 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 459 and self._parse_in(this, is_global=True), 460 } 461 462 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 463 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 464 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 465 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 466 467 JOIN_KINDS = { 468 *parser.Parser.JOIN_KINDS, 469 TokenType.ANY, 470 TokenType.ASOF, 471 TokenType.ARRAY, 472 } 473 474 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 475 TokenType.ANY, 476 TokenType.ARRAY, 477 TokenType.FINAL, 478 TokenType.FORMAT, 479 TokenType.SETTINGS, 480 } 481 482 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 483 TokenType.FORMAT, 484 } 485 486 LOG_DEFAULTS_TO_LN = True 487 488 QUERY_MODIFIER_PARSERS = { 489 **parser.Parser.QUERY_MODIFIER_PARSERS, 490 TokenType.SETTINGS: lambda self: ( 491 "settings", 492 self._advance() or self._parse_csv(self._parse_assignment), 493 ), 494 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 495 } 496 497 CONSTRAINT_PARSERS = { 498 **parser.Parser.CONSTRAINT_PARSERS, 499 "INDEX": lambda self: self._parse_index_constraint(), 500 "CODEC": lambda self: self._parse_compress(), 501 } 502 503 ALTER_PARSERS = { 504 **parser.Parser.ALTER_PARSERS, 505 "REPLACE": lambda self: self._parse_alter_table_replace(), 506 } 507 508 SCHEMA_UNNAMED_CONSTRAINTS = { 509 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 510 "INDEX", 511 } 512 513 PLACEHOLDER_PARSERS = { 514 **parser.Parser.PLACEHOLDER_PARSERS, 515 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 516 } 517 518 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 519 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 520 return self._parse_lambda() 521 522 def _parse_types( 523 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 524 ) -> t.Optional[exp.Expression]: 525 dtype = super()._parse_types( 526 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 527 ) 528 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 529 # Mark every type as non-nullable which is ClickHouse's default, unless it's 530 # already marked as nullable. This marker helps us transpile types from other 531 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 532 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 533 # fail in ClickHouse without the `Nullable` type constructor. 534 dtype.set("nullable", False) 535 536 return dtype 537 538 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 539 index = self._index 540 this = self._parse_bitwise() 541 if self._match(TokenType.FROM): 542 self._retreat(index) 543 return super()._parse_extract() 544 545 # We return Anonymous here because extract and regexpExtract have different semantics, 546 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 547 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 548 # 549 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 550 self._match(TokenType.COMMA) 551 return self.expression( 552 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 553 ) 554 555 def _parse_assignment(self) -> t.Optional[exp.Expression]: 556 this = super()._parse_assignment() 557 558 if self._match(TokenType.PLACEHOLDER): 559 return self.expression( 560 exp.If, 561 this=this, 562 true=self._parse_assignment(), 563 false=self._match(TokenType.COLON) and self._parse_assignment(), 564 ) 565 566 return this 567 568 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 569 """ 570 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 571 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 572 """ 573 index = self._index 574 575 this = self._parse_id_var() 576 self._match(TokenType.COLON) 577 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 578 self._match_text_seq("IDENTIFIER") and "Identifier" 579 ) 580 581 if not kind: 582 self._retreat(index) 583 return None 584 elif not self._match(TokenType.R_BRACE): 585 self.raise_error("Expecting }") 586 587 return self.expression(exp.Placeholder, this=this, kind=kind) 588 589 def _parse_bracket( 590 self, this: t.Optional[exp.Expression] = None 591 ) -> t.Optional[exp.Expression]: 592 l_brace = self._match(TokenType.L_BRACE, advance=False) 593 bracket = super()._parse_bracket(this) 594 595 if l_brace and isinstance(bracket, exp.Struct): 596 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 597 for expression in bracket.expressions: 598 if not isinstance(expression, exp.PropertyEQ): 599 break 600 601 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 602 varmap.args["values"].append("expressions", expression.expression) 603 604 return varmap 605 606 return bracket 607 608 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 609 this = super()._parse_in(this) 610 this.set("is_global", is_global) 611 return this 612 613 def _parse_table( 614 self, 615 schema: bool = False, 616 joins: bool = False, 617 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 618 parse_bracket: bool = False, 619 is_db_reference: bool = False, 620 parse_partition: bool = False, 621 ) -> t.Optional[exp.Expression]: 622 this = super()._parse_table( 623 schema=schema, 624 joins=joins, 625 alias_tokens=alias_tokens, 626 parse_bracket=parse_bracket, 627 is_db_reference=is_db_reference, 628 ) 629 630 if self._match(TokenType.FINAL): 631 this = self.expression(exp.Final, this=this) 632 633 return this 634 635 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 636 return super()._parse_position(haystack_first=True) 637 638 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 639 def _parse_cte(self) -> t.Optional[exp.CTE]: 640 # WITH <identifier> AS <subquery expression> 641 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 642 643 if not cte: 644 # WITH <expression> AS <identifier> 645 cte = self.expression( 646 exp.CTE, 647 this=self._parse_assignment(), 648 alias=self._parse_table_alias(), 649 scalar=True, 650 ) 651 652 return cte 653 654 def _parse_join_parts( 655 self, 656 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 657 is_global = self._match(TokenType.GLOBAL) and self._prev 658 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 659 660 if kind_pre: 661 kind = self._match_set(self.JOIN_KINDS) and self._prev 662 side = self._match_set(self.JOIN_SIDES) and self._prev 663 return is_global, side, kind 664 665 return ( 666 is_global, 667 self._match_set(self.JOIN_SIDES) and self._prev, 668 self._match_set(self.JOIN_KINDS) and self._prev, 669 ) 670 671 def _parse_join( 672 self, skip_join_token: bool = False, parse_bracket: bool = False 673 ) -> t.Optional[exp.Join]: 674 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 675 if join: 676 join.set("global", join.args.pop("method", None)) 677 678 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 679 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 680 if join.kind == "ARRAY": 681 for table in join.find_all(exp.Table): 682 table.replace(table.to_column()) 683 684 return join 685 686 def _parse_function( 687 self, 688 functions: t.Optional[t.Dict[str, t.Callable]] = None, 689 anonymous: bool = False, 690 optional_parens: bool = True, 691 any_token: bool = False, 692 ) -> t.Optional[exp.Expression]: 693 expr = super()._parse_function( 694 functions=functions, 695 anonymous=anonymous, 696 optional_parens=optional_parens, 697 any_token=any_token, 698 ) 699 700 func = expr.this if isinstance(expr, exp.Window) else expr 701 702 # Aggregate functions can be split in 2 parts: <func_name><suffix> 703 parts = ( 704 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 705 ) 706 707 if parts: 708 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 709 params = self._parse_func_params(anon_func) 710 711 kwargs = { 712 "this": anon_func.this, 713 "expressions": anon_func.expressions, 714 } 715 if parts[1]: 716 kwargs["parts"] = parts 717 exp_class: t.Type[exp.Expression] = ( 718 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 719 ) 720 else: 721 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 722 723 kwargs["exp_class"] = exp_class 724 if params: 725 kwargs["params"] = params 726 727 func = self.expression(**kwargs) 728 729 if isinstance(expr, exp.Window): 730 # The window's func was parsed as Anonymous in base parser, fix its 731 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 732 expr.set("this", func) 733 elif params: 734 # Params have blocked super()._parse_function() from parsing the following window 735 # (if that exists) as they're standing between the function call and the window spec 736 expr = self._parse_window(func) 737 else: 738 expr = func 739 740 return expr 741 742 def _parse_func_params( 743 self, this: t.Optional[exp.Func] = None 744 ) -> t.Optional[t.List[exp.Expression]]: 745 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 746 return self._parse_csv(self._parse_lambda) 747 748 if self._match(TokenType.L_PAREN): 749 params = self._parse_csv(self._parse_lambda) 750 self._match_r_paren(this) 751 return params 752 753 return None 754 755 def _parse_quantile(self) -> exp.Quantile: 756 this = self._parse_lambda() 757 params = self._parse_func_params() 758 if params: 759 return self.expression(exp.Quantile, this=params[0], quantile=this) 760 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 761 762 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 763 return super()._parse_wrapped_id_vars(optional=True) 764 765 def _parse_primary_key( 766 self, wrapped_optional: bool = False, in_props: bool = False 767 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 768 return super()._parse_primary_key( 769 wrapped_optional=wrapped_optional or in_props, in_props=in_props 770 ) 771 772 def _parse_on_property(self) -> t.Optional[exp.Expression]: 773 index = self._index 774 if self._match_text_seq("CLUSTER"): 775 this = self._parse_id_var() 776 if this: 777 return self.expression(exp.OnCluster, this=this) 778 else: 779 self._retreat(index) 780 return None 781 782 def _parse_index_constraint( 783 self, kind: t.Optional[str] = None 784 ) -> exp.IndexColumnConstraint: 785 # INDEX name1 expr TYPE type1(args) GRANULARITY value 786 this = self._parse_id_var() 787 expression = self._parse_assignment() 788 789 index_type = self._match_text_seq("TYPE") and ( 790 self._parse_function() or self._parse_var() 791 ) 792 793 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 794 795 return self.expression( 796 exp.IndexColumnConstraint, 797 this=this, 798 expression=expression, 799 index_type=index_type, 800 granularity=granularity, 801 ) 802 803 def _parse_partition(self) -> t.Optional[exp.Partition]: 804 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 805 if not self._match(TokenType.PARTITION): 806 return None 807 808 if self._match_text_seq("ID"): 809 # Corresponds to the PARTITION ID <string_value> syntax 810 expressions: t.List[exp.Expression] = [ 811 self.expression(exp.PartitionId, this=self._parse_string()) 812 ] 813 else: 814 expressions = self._parse_expressions() 815 816 return self.expression(exp.Partition, expressions=expressions) 817 818 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 819 partition = self._parse_partition() 820 821 if not partition or not self._match(TokenType.FROM): 822 return None 823 824 return self.expression( 825 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 826 ) 827 828 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 829 if not self._match_text_seq("PROJECTION"): 830 return None 831 832 return self.expression( 833 exp.ProjectionDef, 834 this=self._parse_id_var(), 835 expression=self._parse_wrapped(self._parse_statement), 836 ) 837 838 def _parse_constraint(self) -> t.Optional[exp.Expression]: 839 return super()._parse_constraint() or self._parse_projection_def() 840 841 def _parse_alias( 842 self, this: t.Optional[exp.Expression], explicit: bool = False 843 ) -> t.Optional[exp.Expression]: 844 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 845 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 846 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 847 return this 848 849 return super()._parse_alias(this=this, explicit=explicit) 850 851 def _parse_expression(self) -> t.Optional[exp.Expression]: 852 this = super()._parse_expression() 853 854 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 855 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 856 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 857 self._match(TokenType.R_PAREN) 858 859 return this 860 861 def _parse_columns(self) -> exp.Expression: 862 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 863 864 while self._next and self._match_text_seq(")", "APPLY", "("): 865 self._match(TokenType.R_PAREN) 866 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 867 return this 868 869 class Generator(generator.Generator): 870 QUERY_HINTS = False 871 STRUCT_DELIMITER = ("(", ")") 872 NVL2_SUPPORTED = False 873 TABLESAMPLE_REQUIRES_PARENS = False 874 TABLESAMPLE_SIZE_IS_ROWS = False 875 TABLESAMPLE_KEYWORDS = "SAMPLE" 876 LAST_DAY_SUPPORTS_DATE_PART = False 877 CAN_IMPLEMENT_ARRAY_ANY = True 878 SUPPORTS_TO_NUMBER = False 879 JOIN_HINTS = False 880 TABLE_HINTS = False 881 GROUPINGS_SEP = "" 882 SET_OP_MODIFIERS = False 883 SUPPORTS_TABLE_ALIAS_COLUMNS = False 884 VALUES_AS_TABLE = False 885 ARRAY_SIZE_NAME = "LENGTH" 886 887 STRING_TYPE_MAPPING = { 888 exp.DataType.Type.CHAR: "String", 889 exp.DataType.Type.LONGBLOB: "String", 890 exp.DataType.Type.LONGTEXT: "String", 891 exp.DataType.Type.MEDIUMBLOB: "String", 892 exp.DataType.Type.MEDIUMTEXT: "String", 893 exp.DataType.Type.TINYBLOB: "String", 894 exp.DataType.Type.TINYTEXT: "String", 895 exp.DataType.Type.TEXT: "String", 896 exp.DataType.Type.VARBINARY: "String", 897 exp.DataType.Type.VARCHAR: "String", 898 } 899 900 SUPPORTED_JSON_PATH_PARTS = { 901 exp.JSONPathKey, 902 exp.JSONPathRoot, 903 exp.JSONPathSubscript, 904 } 905 906 TYPE_MAPPING = { 907 **generator.Generator.TYPE_MAPPING, 908 **STRING_TYPE_MAPPING, 909 exp.DataType.Type.ARRAY: "Array", 910 exp.DataType.Type.BOOLEAN: "Bool", 911 exp.DataType.Type.BIGINT: "Int64", 912 exp.DataType.Type.DATE32: "Date32", 913 exp.DataType.Type.DATETIME: "DateTime", 914 exp.DataType.Type.DATETIME2: "DateTime", 915 exp.DataType.Type.SMALLDATETIME: "DateTime", 916 exp.DataType.Type.DATETIME64: "DateTime64", 917 exp.DataType.Type.DECIMAL: "Decimal", 918 exp.DataType.Type.DECIMAL32: "Decimal32", 919 exp.DataType.Type.DECIMAL64: "Decimal64", 920 exp.DataType.Type.DECIMAL128: "Decimal128", 921 exp.DataType.Type.DECIMAL256: "Decimal256", 922 exp.DataType.Type.TIMESTAMP: "DateTime", 923 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 924 exp.DataType.Type.DOUBLE: "Float64", 925 exp.DataType.Type.ENUM: "Enum", 926 exp.DataType.Type.ENUM8: "Enum8", 927 exp.DataType.Type.ENUM16: "Enum16", 928 exp.DataType.Type.FIXEDSTRING: "FixedString", 929 exp.DataType.Type.FLOAT: "Float32", 930 exp.DataType.Type.INT: "Int32", 931 exp.DataType.Type.MEDIUMINT: "Int32", 932 exp.DataType.Type.INT128: "Int128", 933 exp.DataType.Type.INT256: "Int256", 934 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 935 exp.DataType.Type.MAP: "Map", 936 exp.DataType.Type.NESTED: "Nested", 937 exp.DataType.Type.SMALLINT: "Int16", 938 exp.DataType.Type.STRUCT: "Tuple", 939 exp.DataType.Type.TINYINT: "Int8", 940 exp.DataType.Type.UBIGINT: "UInt64", 941 exp.DataType.Type.UINT: "UInt32", 942 exp.DataType.Type.UINT128: "UInt128", 943 exp.DataType.Type.UINT256: "UInt256", 944 exp.DataType.Type.USMALLINT: "UInt16", 945 exp.DataType.Type.UTINYINT: "UInt8", 946 exp.DataType.Type.IPV4: "IPv4", 947 exp.DataType.Type.IPV6: "IPv6", 948 exp.DataType.Type.POINT: "Point", 949 exp.DataType.Type.RING: "Ring", 950 exp.DataType.Type.LINESTRING: "LineString", 951 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 952 exp.DataType.Type.POLYGON: "Polygon", 953 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 954 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 955 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 956 } 957 958 TRANSFORMS = { 959 **generator.Generator.TRANSFORMS, 960 exp.AnyValue: rename_func("any"), 961 exp.ApproxDistinct: rename_func("uniq"), 962 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 963 exp.ArraySum: rename_func("arraySum"), 964 exp.ArgMax: arg_max_or_min_no_count("argMax"), 965 exp.ArgMin: arg_max_or_min_no_count("argMin"), 966 exp.Array: inline_array_sql, 967 exp.CastToStrType: rename_func("CAST"), 968 exp.CountIf: rename_func("countIf"), 969 exp.CompressColumnConstraint: lambda self, 970 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 971 exp.ComputedColumnConstraint: lambda self, 972 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 973 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 974 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 975 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 976 exp.DateStrToDate: rename_func("toDate"), 977 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 978 exp.Explode: rename_func("arrayJoin"), 979 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 980 exp.IsNan: rename_func("isNaN"), 981 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 982 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 983 exp.JSONPathKey: json_path_key_only_name, 984 exp.JSONPathRoot: lambda *_: "", 985 exp.Length: length_or_char_length_sql, 986 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 987 exp.Median: rename_func("median"), 988 exp.Nullif: rename_func("nullIf"), 989 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 990 exp.Pivot: no_pivot_sql, 991 exp.Quantile: _quantile_sql, 992 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 993 exp.Rand: rename_func("randCanonical"), 994 exp.StartsWith: rename_func("startsWith"), 995 exp.StrPosition: lambda self, e: self.func( 996 "position", e.this, e.args.get("substr"), e.args.get("position") 997 ), 998 exp.TimeToStr: lambda self, e: self.func( 999 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 1000 ), 1001 exp.TimeStrToTime: _timestrtotime_sql, 1002 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1003 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1004 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 1005 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1006 exp.MD5Digest: rename_func("MD5"), 1007 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1008 exp.SHA: rename_func("SHA1"), 1009 exp.SHA2: sha256_sql, 1010 exp.UnixToTime: _unix_to_time_sql, 1011 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 1012 exp.Trim: trim_sql, 1013 exp.Variance: rename_func("varSamp"), 1014 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1015 exp.Stddev: rename_func("stddevSamp"), 1016 exp.Chr: rename_func("CHAR"), 1017 exp.Lag: lambda self, e: self.func( 1018 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1019 ), 1020 exp.Lead: lambda self, e: self.func( 1021 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1022 ), 1023 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1024 rename_func("editDistance") 1025 ), 1026 } 1027 1028 PROPERTIES_LOCATION = { 1029 **generator.Generator.PROPERTIES_LOCATION, 1030 exp.OnCluster: exp.Properties.Location.POST_NAME, 1031 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1032 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1033 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1034 } 1035 1036 # There's no list in docs, but it can be found in Clickhouse code 1037 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1038 ON_CLUSTER_TARGETS = { 1039 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1040 "DATABASE", 1041 "TABLE", 1042 "VIEW", 1043 "DICTIONARY", 1044 "INDEX", 1045 "FUNCTION", 1046 "NAMED COLLECTION", 1047 } 1048 1049 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1050 NON_NULLABLE_TYPES = { 1051 exp.DataType.Type.ARRAY, 1052 exp.DataType.Type.MAP, 1053 exp.DataType.Type.STRUCT, 1054 exp.DataType.Type.POINT, 1055 exp.DataType.Type.RING, 1056 exp.DataType.Type.LINESTRING, 1057 exp.DataType.Type.MULTILINESTRING, 1058 exp.DataType.Type.POLYGON, 1059 exp.DataType.Type.MULTIPOLYGON, 1060 } 1061 1062 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1063 strtodate_sql = self.function_fallback_sql(expression) 1064 1065 if not isinstance(expression.parent, exp.Cast): 1066 # StrToDate returns DATEs in other dialects (eg. postgres), so 1067 # this branch aims to improve the transpilation to clickhouse 1068 return f"CAST({strtodate_sql} AS DATE)" 1069 1070 return strtodate_sql 1071 1072 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1073 this = expression.this 1074 1075 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1076 return self.sql(this) 1077 1078 return super().cast_sql(expression, safe_prefix=safe_prefix) 1079 1080 def trycast_sql(self, expression: exp.TryCast) -> str: 1081 dtype = expression.to 1082 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1083 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1084 dtype.set("nullable", True) 1085 1086 return super().cast_sql(expression) 1087 1088 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1089 this = self.json_path_part(expression.this) 1090 return str(int(this) + 1) if is_int(this) else this 1091 1092 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1093 return f"AS {self.sql(expression, 'this')}" 1094 1095 def _any_to_has( 1096 self, 1097 expression: exp.EQ | exp.NEQ, 1098 default: t.Callable[[t.Any], str], 1099 prefix: str = "", 1100 ) -> str: 1101 if isinstance(expression.left, exp.Any): 1102 arr = expression.left 1103 this = expression.right 1104 elif isinstance(expression.right, exp.Any): 1105 arr = expression.right 1106 this = expression.left 1107 else: 1108 return default(expression) 1109 1110 return prefix + self.func("has", arr.this.unnest(), this) 1111 1112 def eq_sql(self, expression: exp.EQ) -> str: 1113 return self._any_to_has(expression, super().eq_sql) 1114 1115 def neq_sql(self, expression: exp.NEQ) -> str: 1116 return self._any_to_has(expression, super().neq_sql, "NOT ") 1117 1118 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1119 # Manually add a flag to make the search case-insensitive 1120 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1121 return self.func("match", expression.this, regex) 1122 1123 def datatype_sql(self, expression: exp.DataType) -> str: 1124 # String is the standard ClickHouse type, every other variant is just an alias. 1125 # Additionally, any supplied length parameter will be ignored. 1126 # 1127 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1128 if expression.this in self.STRING_TYPE_MAPPING: 1129 dtype = "String" 1130 else: 1131 dtype = super().datatype_sql(expression) 1132 1133 # This section changes the type to `Nullable(...)` if the following conditions hold: 1134 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1135 # and change their semantics 1136 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1137 # constraint: "Type of Map key must be a type, that can be represented by integer or 1138 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1139 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1140 parent = expression.parent 1141 nullable = expression.args.get("nullable") 1142 if nullable is True or ( 1143 nullable is None 1144 and not ( 1145 isinstance(parent, exp.DataType) 1146 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1147 and expression.index in (None, 0) 1148 ) 1149 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1150 ): 1151 dtype = f"Nullable({dtype})" 1152 1153 return dtype 1154 1155 def cte_sql(self, expression: exp.CTE) -> str: 1156 if expression.args.get("scalar"): 1157 this = self.sql(expression, "this") 1158 alias = self.sql(expression, "alias") 1159 return f"{this} AS {alias}" 1160 1161 return super().cte_sql(expression) 1162 1163 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1164 return super().after_limit_modifiers(expression) + [ 1165 ( 1166 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1167 if expression.args.get("settings") 1168 else "" 1169 ), 1170 ( 1171 self.seg("FORMAT ") + self.sql(expression, "format") 1172 if expression.args.get("format") 1173 else "" 1174 ), 1175 ] 1176 1177 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1178 params = self.expressions(expression, key="params", flat=True) 1179 return self.func(expression.name, *expression.expressions) + f"({params})" 1180 1181 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1182 return self.func(expression.name, *expression.expressions) 1183 1184 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1185 return self.anonymousaggfunc_sql(expression) 1186 1187 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1188 return self.parameterizedagg_sql(expression) 1189 1190 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1191 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1192 1193 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1194 return f"ON CLUSTER {self.sql(expression, 'this')}" 1195 1196 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1197 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1198 exp.Properties.Location.POST_NAME 1199 ): 1200 this_name = self.sql( 1201 expression.this if isinstance(expression.this, exp.Schema) else expression, 1202 "this", 1203 ) 1204 this_properties = " ".join( 1205 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1206 ) 1207 this_schema = self.schema_columns_sql(expression.this) 1208 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1209 1210 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1211 1212 return super().createable_sql(expression, locations) 1213 1214 def create_sql(self, expression: exp.Create) -> str: 1215 # The comment property comes last in CTAS statements, i.e. after the query 1216 query = expression.expression 1217 if isinstance(query, exp.Query): 1218 comment_prop = expression.find(exp.SchemaCommentProperty) 1219 if comment_prop: 1220 comment_prop.pop() 1221 query.replace(exp.paren(query)) 1222 else: 1223 comment_prop = None 1224 1225 create_sql = super().create_sql(expression) 1226 1227 comment_sql = self.sql(comment_prop) 1228 comment_sql = f" {comment_sql}" if comment_sql else "" 1229 1230 return f"{create_sql}{comment_sql}" 1231 1232 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1233 this = self.indent(self.sql(expression, "this")) 1234 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1235 1236 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1237 this = self.sql(expression, "this") 1238 this = f" {this}" if this else "" 1239 expr = self.sql(expression, "expression") 1240 expr = f" {expr}" if expr else "" 1241 index_type = self.sql(expression, "index_type") 1242 index_type = f" TYPE {index_type}" if index_type else "" 1243 granularity = self.sql(expression, "granularity") 1244 granularity = f" GRANULARITY {granularity}" if granularity else "" 1245 1246 return f"INDEX{this}{expr}{index_type}{granularity}" 1247 1248 def partition_sql(self, expression: exp.Partition) -> str: 1249 return f"PARTITION {self.expressions(expression, flat=True)}" 1250 1251 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1252 return f"ID {self.sql(expression.this)}" 1253 1254 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1255 return ( 1256 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1257 ) 1258 1259 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1260 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1261 1262 def is_sql(self, expression: exp.Is) -> str: 1263 is_sql = super().is_sql(expression) 1264 1265 if isinstance(expression.parent, exp.Not): 1266 # value IS NOT NULL -> NOT (value IS NULL) 1267 is_sql = self.wrap(is_sql) 1268 1269 return is_sql
163class ClickHouse(Dialect): 164 NORMALIZE_FUNCTIONS: bool | str = False 165 NULL_ORDERING = "nulls_are_last" 166 SUPPORTS_USER_DEFINED_TYPES = False 167 SAFE_DIVISION = True 168 LOG_BASE_FIRST: t.Optional[bool] = None 169 FORCE_EARLY_ALIAS_REF_EXPANSION = True 170 PRESERVE_ORIGINAL_NAMES = True 171 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 172 IDENTIFIERS_CAN_START_WITH_DIGIT = True 173 174 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 175 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 176 177 UNESCAPED_SEQUENCES = { 178 "\\0": "\0", 179 } 180 181 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 182 183 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 184 exp.Except: False, 185 exp.Intersect: False, 186 exp.Union: None, 187 } 188 189 class Tokenizer(tokens.Tokenizer): 190 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 191 IDENTIFIERS = ['"', "`"] 192 IDENTIFIER_ESCAPES = ["\\"] 193 STRING_ESCAPES = ["'", "\\"] 194 BIT_STRINGS = [("0b", "")] 195 HEX_STRINGS = [("0x", ""), ("0X", "")] 196 HEREDOC_STRINGS = ["$"] 197 198 KEYWORDS = { 199 **tokens.Tokenizer.KEYWORDS, 200 "ATTACH": TokenType.COMMAND, 201 "DATE32": TokenType.DATE32, 202 "DATETIME64": TokenType.DATETIME64, 203 "DICTIONARY": TokenType.DICTIONARY, 204 "ENUM8": TokenType.ENUM8, 205 "ENUM16": TokenType.ENUM16, 206 "FINAL": TokenType.FINAL, 207 "FIXEDSTRING": TokenType.FIXEDSTRING, 208 "FLOAT32": TokenType.FLOAT, 209 "FLOAT64": TokenType.DOUBLE, 210 "GLOBAL": TokenType.GLOBAL, 211 "INT256": TokenType.INT256, 212 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 213 "MAP": TokenType.MAP, 214 "NESTED": TokenType.NESTED, 215 "SAMPLE": TokenType.TABLE_SAMPLE, 216 "TUPLE": TokenType.STRUCT, 217 "UINT128": TokenType.UINT128, 218 "UINT16": TokenType.USMALLINT, 219 "UINT256": TokenType.UINT256, 220 "UINT32": TokenType.UINT, 221 "UINT64": TokenType.UBIGINT, 222 "UINT8": TokenType.UTINYINT, 223 "IPV4": TokenType.IPV4, 224 "IPV6": TokenType.IPV6, 225 "POINT": TokenType.POINT, 226 "RING": TokenType.RING, 227 "LINESTRING": TokenType.LINESTRING, 228 "MULTILINESTRING": TokenType.MULTILINESTRING, 229 "POLYGON": TokenType.POLYGON, 230 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 231 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 232 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 233 "SYSTEM": TokenType.COMMAND, 234 "PREWHERE": TokenType.PREWHERE, 235 } 236 KEYWORDS.pop("/*+") 237 238 SINGLE_TOKENS = { 239 **tokens.Tokenizer.SINGLE_TOKENS, 240 "$": TokenType.HEREDOC_STRING, 241 } 242 243 class Parser(parser.Parser): 244 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 245 # * select x from t1 union all select x from t2 limit 1; 246 # * select x from t1 union all (select x from t2 limit 1); 247 MODIFIERS_ATTACHED_TO_SET_OP = False 248 INTERVAL_SPANS = False 249 OPTIONAL_ALIAS_TOKEN_CTE = False 250 251 FUNCTIONS = { 252 **parser.Parser.FUNCTIONS, 253 "ANY": exp.AnyValue.from_arg_list, 254 "ARRAYSUM": exp.ArraySum.from_arg_list, 255 "COUNTIF": _build_count_if, 256 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 257 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 258 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 259 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 260 "DATE_FORMAT": _build_date_format, 261 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 262 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 263 "FORMATDATETIME": _build_date_format, 264 "JSONEXTRACTSTRING": build_json_extract_path( 265 exp.JSONExtractScalar, zero_based_indexing=False 266 ), 267 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 268 "MAP": parser.build_var_map, 269 "MATCH": exp.RegexpLike.from_arg_list, 270 "RANDCANONICAL": exp.Rand.from_arg_list, 271 "STR_TO_DATE": _build_str_to_date, 272 "TUPLE": exp.Struct.from_arg_list, 273 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 274 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 275 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 276 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 277 "UNIQ": exp.ApproxDistinct.from_arg_list, 278 "XOR": lambda args: exp.Xor(expressions=args), 279 "MD5": exp.MD5Digest.from_arg_list, 280 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 281 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 282 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 283 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 284 } 285 FUNCTIONS.pop("TRANSFORM") 286 287 AGG_FUNCTIONS = { 288 "count", 289 "min", 290 "max", 291 "sum", 292 "avg", 293 "any", 294 "stddevPop", 295 "stddevSamp", 296 "varPop", 297 "varSamp", 298 "corr", 299 "covarPop", 300 "covarSamp", 301 "entropy", 302 "exponentialMovingAverage", 303 "intervalLengthSum", 304 "kolmogorovSmirnovTest", 305 "mannWhitneyUTest", 306 "median", 307 "rankCorr", 308 "sumKahan", 309 "studentTTest", 310 "welchTTest", 311 "anyHeavy", 312 "anyLast", 313 "boundingRatio", 314 "first_value", 315 "last_value", 316 "argMin", 317 "argMax", 318 "avgWeighted", 319 "topK", 320 "topKWeighted", 321 "deltaSum", 322 "deltaSumTimestamp", 323 "groupArray", 324 "groupArrayLast", 325 "groupUniqArray", 326 "groupArrayInsertAt", 327 "groupArrayMovingAvg", 328 "groupArrayMovingSum", 329 "groupArraySample", 330 "groupBitAnd", 331 "groupBitOr", 332 "groupBitXor", 333 "groupBitmap", 334 "groupBitmapAnd", 335 "groupBitmapOr", 336 "groupBitmapXor", 337 "sumWithOverflow", 338 "sumMap", 339 "minMap", 340 "maxMap", 341 "skewSamp", 342 "skewPop", 343 "kurtSamp", 344 "kurtPop", 345 "uniq", 346 "uniqExact", 347 "uniqCombined", 348 "uniqCombined64", 349 "uniqHLL12", 350 "uniqTheta", 351 "quantile", 352 "quantiles", 353 "quantileExact", 354 "quantilesExact", 355 "quantileExactLow", 356 "quantilesExactLow", 357 "quantileExactHigh", 358 "quantilesExactHigh", 359 "quantileExactWeighted", 360 "quantilesExactWeighted", 361 "quantileTiming", 362 "quantilesTiming", 363 "quantileTimingWeighted", 364 "quantilesTimingWeighted", 365 "quantileDeterministic", 366 "quantilesDeterministic", 367 "quantileTDigest", 368 "quantilesTDigest", 369 "quantileTDigestWeighted", 370 "quantilesTDigestWeighted", 371 "quantileBFloat16", 372 "quantilesBFloat16", 373 "quantileBFloat16Weighted", 374 "quantilesBFloat16Weighted", 375 "simpleLinearRegression", 376 "stochasticLinearRegression", 377 "stochasticLogisticRegression", 378 "categoricalInformationValue", 379 "contingency", 380 "cramersV", 381 "cramersVBiasCorrected", 382 "theilsU", 383 "maxIntersections", 384 "maxIntersectionsPosition", 385 "meanZTest", 386 "quantileInterpolatedWeighted", 387 "quantilesInterpolatedWeighted", 388 "quantileGK", 389 "quantilesGK", 390 "sparkBar", 391 "sumCount", 392 "largestTriangleThreeBuckets", 393 "histogram", 394 "sequenceMatch", 395 "sequenceCount", 396 "windowFunnel", 397 "retention", 398 "uniqUpTo", 399 "sequenceNextNode", 400 "exponentialTimeDecayedAvg", 401 } 402 403 AGG_FUNCTIONS_SUFFIXES = [ 404 "If", 405 "Array", 406 "ArrayIf", 407 "Map", 408 "SimpleState", 409 "State", 410 "Merge", 411 "MergeState", 412 "ForEach", 413 "Distinct", 414 "OrDefault", 415 "OrNull", 416 "Resample", 417 "ArgMin", 418 "ArgMax", 419 ] 420 421 FUNC_TOKENS = { 422 *parser.Parser.FUNC_TOKENS, 423 TokenType.SET, 424 } 425 426 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 427 428 ID_VAR_TOKENS = { 429 *parser.Parser.ID_VAR_TOKENS, 430 TokenType.LIKE, 431 } 432 433 AGG_FUNC_MAPPING = ( 434 lambda functions, suffixes: { 435 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 436 } 437 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 438 439 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 440 441 FUNCTION_PARSERS = { 442 **parser.Parser.FUNCTION_PARSERS, 443 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 444 "QUANTILE": lambda self: self._parse_quantile(), 445 "MEDIAN": lambda self: self._parse_quantile(), 446 "COLUMNS": lambda self: self._parse_columns(), 447 } 448 449 FUNCTION_PARSERS.pop("MATCH") 450 451 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 452 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 453 454 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 455 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 456 457 RANGE_PARSERS = { 458 **parser.Parser.RANGE_PARSERS, 459 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 460 and self._parse_in(this, is_global=True), 461 } 462 463 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 464 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 465 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 466 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 467 468 JOIN_KINDS = { 469 *parser.Parser.JOIN_KINDS, 470 TokenType.ANY, 471 TokenType.ASOF, 472 TokenType.ARRAY, 473 } 474 475 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 476 TokenType.ANY, 477 TokenType.ARRAY, 478 TokenType.FINAL, 479 TokenType.FORMAT, 480 TokenType.SETTINGS, 481 } 482 483 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 484 TokenType.FORMAT, 485 } 486 487 LOG_DEFAULTS_TO_LN = True 488 489 QUERY_MODIFIER_PARSERS = { 490 **parser.Parser.QUERY_MODIFIER_PARSERS, 491 TokenType.SETTINGS: lambda self: ( 492 "settings", 493 self._advance() or self._parse_csv(self._parse_assignment), 494 ), 495 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 496 } 497 498 CONSTRAINT_PARSERS = { 499 **parser.Parser.CONSTRAINT_PARSERS, 500 "INDEX": lambda self: self._parse_index_constraint(), 501 "CODEC": lambda self: self._parse_compress(), 502 } 503 504 ALTER_PARSERS = { 505 **parser.Parser.ALTER_PARSERS, 506 "REPLACE": lambda self: self._parse_alter_table_replace(), 507 } 508 509 SCHEMA_UNNAMED_CONSTRAINTS = { 510 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 511 "INDEX", 512 } 513 514 PLACEHOLDER_PARSERS = { 515 **parser.Parser.PLACEHOLDER_PARSERS, 516 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 517 } 518 519 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 520 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 521 return self._parse_lambda() 522 523 def _parse_types( 524 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 525 ) -> t.Optional[exp.Expression]: 526 dtype = super()._parse_types( 527 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 528 ) 529 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 530 # Mark every type as non-nullable which is ClickHouse's default, unless it's 531 # already marked as nullable. This marker helps us transpile types from other 532 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 533 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 534 # fail in ClickHouse without the `Nullable` type constructor. 535 dtype.set("nullable", False) 536 537 return dtype 538 539 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 540 index = self._index 541 this = self._parse_bitwise() 542 if self._match(TokenType.FROM): 543 self._retreat(index) 544 return super()._parse_extract() 545 546 # We return Anonymous here because extract and regexpExtract have different semantics, 547 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 548 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 549 # 550 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 551 self._match(TokenType.COMMA) 552 return self.expression( 553 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 554 ) 555 556 def _parse_assignment(self) -> t.Optional[exp.Expression]: 557 this = super()._parse_assignment() 558 559 if self._match(TokenType.PLACEHOLDER): 560 return self.expression( 561 exp.If, 562 this=this, 563 true=self._parse_assignment(), 564 false=self._match(TokenType.COLON) and self._parse_assignment(), 565 ) 566 567 return this 568 569 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 570 """ 571 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 572 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 573 """ 574 index = self._index 575 576 this = self._parse_id_var() 577 self._match(TokenType.COLON) 578 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 579 self._match_text_seq("IDENTIFIER") and "Identifier" 580 ) 581 582 if not kind: 583 self._retreat(index) 584 return None 585 elif not self._match(TokenType.R_BRACE): 586 self.raise_error("Expecting }") 587 588 return self.expression(exp.Placeholder, this=this, kind=kind) 589 590 def _parse_bracket( 591 self, this: t.Optional[exp.Expression] = None 592 ) -> t.Optional[exp.Expression]: 593 l_brace = self._match(TokenType.L_BRACE, advance=False) 594 bracket = super()._parse_bracket(this) 595 596 if l_brace and isinstance(bracket, exp.Struct): 597 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 598 for expression in bracket.expressions: 599 if not isinstance(expression, exp.PropertyEQ): 600 break 601 602 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 603 varmap.args["values"].append("expressions", expression.expression) 604 605 return varmap 606 607 return bracket 608 609 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 610 this = super()._parse_in(this) 611 this.set("is_global", is_global) 612 return this 613 614 def _parse_table( 615 self, 616 schema: bool = False, 617 joins: bool = False, 618 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 619 parse_bracket: bool = False, 620 is_db_reference: bool = False, 621 parse_partition: bool = False, 622 ) -> t.Optional[exp.Expression]: 623 this = super()._parse_table( 624 schema=schema, 625 joins=joins, 626 alias_tokens=alias_tokens, 627 parse_bracket=parse_bracket, 628 is_db_reference=is_db_reference, 629 ) 630 631 if self._match(TokenType.FINAL): 632 this = self.expression(exp.Final, this=this) 633 634 return this 635 636 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 637 return super()._parse_position(haystack_first=True) 638 639 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 640 def _parse_cte(self) -> t.Optional[exp.CTE]: 641 # WITH <identifier> AS <subquery expression> 642 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 643 644 if not cte: 645 # WITH <expression> AS <identifier> 646 cte = self.expression( 647 exp.CTE, 648 this=self._parse_assignment(), 649 alias=self._parse_table_alias(), 650 scalar=True, 651 ) 652 653 return cte 654 655 def _parse_join_parts( 656 self, 657 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 658 is_global = self._match(TokenType.GLOBAL) and self._prev 659 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 660 661 if kind_pre: 662 kind = self._match_set(self.JOIN_KINDS) and self._prev 663 side = self._match_set(self.JOIN_SIDES) and self._prev 664 return is_global, side, kind 665 666 return ( 667 is_global, 668 self._match_set(self.JOIN_SIDES) and self._prev, 669 self._match_set(self.JOIN_KINDS) and self._prev, 670 ) 671 672 def _parse_join( 673 self, skip_join_token: bool = False, parse_bracket: bool = False 674 ) -> t.Optional[exp.Join]: 675 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 676 if join: 677 join.set("global", join.args.pop("method", None)) 678 679 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 680 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 681 if join.kind == "ARRAY": 682 for table in join.find_all(exp.Table): 683 table.replace(table.to_column()) 684 685 return join 686 687 def _parse_function( 688 self, 689 functions: t.Optional[t.Dict[str, t.Callable]] = None, 690 anonymous: bool = False, 691 optional_parens: bool = True, 692 any_token: bool = False, 693 ) -> t.Optional[exp.Expression]: 694 expr = super()._parse_function( 695 functions=functions, 696 anonymous=anonymous, 697 optional_parens=optional_parens, 698 any_token=any_token, 699 ) 700 701 func = expr.this if isinstance(expr, exp.Window) else expr 702 703 # Aggregate functions can be split in 2 parts: <func_name><suffix> 704 parts = ( 705 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 706 ) 707 708 if parts: 709 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 710 params = self._parse_func_params(anon_func) 711 712 kwargs = { 713 "this": anon_func.this, 714 "expressions": anon_func.expressions, 715 } 716 if parts[1]: 717 kwargs["parts"] = parts 718 exp_class: t.Type[exp.Expression] = ( 719 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 720 ) 721 else: 722 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 723 724 kwargs["exp_class"] = exp_class 725 if params: 726 kwargs["params"] = params 727 728 func = self.expression(**kwargs) 729 730 if isinstance(expr, exp.Window): 731 # The window's func was parsed as Anonymous in base parser, fix its 732 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 733 expr.set("this", func) 734 elif params: 735 # Params have blocked super()._parse_function() from parsing the following window 736 # (if that exists) as they're standing between the function call and the window spec 737 expr = self._parse_window(func) 738 else: 739 expr = func 740 741 return expr 742 743 def _parse_func_params( 744 self, this: t.Optional[exp.Func] = None 745 ) -> t.Optional[t.List[exp.Expression]]: 746 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 747 return self._parse_csv(self._parse_lambda) 748 749 if self._match(TokenType.L_PAREN): 750 params = self._parse_csv(self._parse_lambda) 751 self._match_r_paren(this) 752 return params 753 754 return None 755 756 def _parse_quantile(self) -> exp.Quantile: 757 this = self._parse_lambda() 758 params = self._parse_func_params() 759 if params: 760 return self.expression(exp.Quantile, this=params[0], quantile=this) 761 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 762 763 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 764 return super()._parse_wrapped_id_vars(optional=True) 765 766 def _parse_primary_key( 767 self, wrapped_optional: bool = False, in_props: bool = False 768 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 769 return super()._parse_primary_key( 770 wrapped_optional=wrapped_optional or in_props, in_props=in_props 771 ) 772 773 def _parse_on_property(self) -> t.Optional[exp.Expression]: 774 index = self._index 775 if self._match_text_seq("CLUSTER"): 776 this = self._parse_id_var() 777 if this: 778 return self.expression(exp.OnCluster, this=this) 779 else: 780 self._retreat(index) 781 return None 782 783 def _parse_index_constraint( 784 self, kind: t.Optional[str] = None 785 ) -> exp.IndexColumnConstraint: 786 # INDEX name1 expr TYPE type1(args) GRANULARITY value 787 this = self._parse_id_var() 788 expression = self._parse_assignment() 789 790 index_type = self._match_text_seq("TYPE") and ( 791 self._parse_function() or self._parse_var() 792 ) 793 794 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 795 796 return self.expression( 797 exp.IndexColumnConstraint, 798 this=this, 799 expression=expression, 800 index_type=index_type, 801 granularity=granularity, 802 ) 803 804 def _parse_partition(self) -> t.Optional[exp.Partition]: 805 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 806 if not self._match(TokenType.PARTITION): 807 return None 808 809 if self._match_text_seq("ID"): 810 # Corresponds to the PARTITION ID <string_value> syntax 811 expressions: t.List[exp.Expression] = [ 812 self.expression(exp.PartitionId, this=self._parse_string()) 813 ] 814 else: 815 expressions = self._parse_expressions() 816 817 return self.expression(exp.Partition, expressions=expressions) 818 819 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 820 partition = self._parse_partition() 821 822 if not partition or not self._match(TokenType.FROM): 823 return None 824 825 return self.expression( 826 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 827 ) 828 829 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 830 if not self._match_text_seq("PROJECTION"): 831 return None 832 833 return self.expression( 834 exp.ProjectionDef, 835 this=self._parse_id_var(), 836 expression=self._parse_wrapped(self._parse_statement), 837 ) 838 839 def _parse_constraint(self) -> t.Optional[exp.Expression]: 840 return super()._parse_constraint() or self._parse_projection_def() 841 842 def _parse_alias( 843 self, this: t.Optional[exp.Expression], explicit: bool = False 844 ) -> t.Optional[exp.Expression]: 845 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 846 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 847 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 848 return this 849 850 return super()._parse_alias(this=this, explicit=explicit) 851 852 def _parse_expression(self) -> t.Optional[exp.Expression]: 853 this = super()._parse_expression() 854 855 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 856 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 857 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 858 self._match(TokenType.R_PAREN) 859 860 return this 861 862 def _parse_columns(self) -> exp.Expression: 863 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 864 865 while self._next and self._match_text_seq(")", "APPLY", "("): 866 self._match(TokenType.R_PAREN) 867 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 868 return this 869 870 class Generator(generator.Generator): 871 QUERY_HINTS = False 872 STRUCT_DELIMITER = ("(", ")") 873 NVL2_SUPPORTED = False 874 TABLESAMPLE_REQUIRES_PARENS = False 875 TABLESAMPLE_SIZE_IS_ROWS = False 876 TABLESAMPLE_KEYWORDS = "SAMPLE" 877 LAST_DAY_SUPPORTS_DATE_PART = False 878 CAN_IMPLEMENT_ARRAY_ANY = True 879 SUPPORTS_TO_NUMBER = False 880 JOIN_HINTS = False 881 TABLE_HINTS = False 882 GROUPINGS_SEP = "" 883 SET_OP_MODIFIERS = False 884 SUPPORTS_TABLE_ALIAS_COLUMNS = False 885 VALUES_AS_TABLE = False 886 ARRAY_SIZE_NAME = "LENGTH" 887 888 STRING_TYPE_MAPPING = { 889 exp.DataType.Type.CHAR: "String", 890 exp.DataType.Type.LONGBLOB: "String", 891 exp.DataType.Type.LONGTEXT: "String", 892 exp.DataType.Type.MEDIUMBLOB: "String", 893 exp.DataType.Type.MEDIUMTEXT: "String", 894 exp.DataType.Type.TINYBLOB: "String", 895 exp.DataType.Type.TINYTEXT: "String", 896 exp.DataType.Type.TEXT: "String", 897 exp.DataType.Type.VARBINARY: "String", 898 exp.DataType.Type.VARCHAR: "String", 899 } 900 901 SUPPORTED_JSON_PATH_PARTS = { 902 exp.JSONPathKey, 903 exp.JSONPathRoot, 904 exp.JSONPathSubscript, 905 } 906 907 TYPE_MAPPING = { 908 **generator.Generator.TYPE_MAPPING, 909 **STRING_TYPE_MAPPING, 910 exp.DataType.Type.ARRAY: "Array", 911 exp.DataType.Type.BOOLEAN: "Bool", 912 exp.DataType.Type.BIGINT: "Int64", 913 exp.DataType.Type.DATE32: "Date32", 914 exp.DataType.Type.DATETIME: "DateTime", 915 exp.DataType.Type.DATETIME2: "DateTime", 916 exp.DataType.Type.SMALLDATETIME: "DateTime", 917 exp.DataType.Type.DATETIME64: "DateTime64", 918 exp.DataType.Type.DECIMAL: "Decimal", 919 exp.DataType.Type.DECIMAL32: "Decimal32", 920 exp.DataType.Type.DECIMAL64: "Decimal64", 921 exp.DataType.Type.DECIMAL128: "Decimal128", 922 exp.DataType.Type.DECIMAL256: "Decimal256", 923 exp.DataType.Type.TIMESTAMP: "DateTime", 924 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 925 exp.DataType.Type.DOUBLE: "Float64", 926 exp.DataType.Type.ENUM: "Enum", 927 exp.DataType.Type.ENUM8: "Enum8", 928 exp.DataType.Type.ENUM16: "Enum16", 929 exp.DataType.Type.FIXEDSTRING: "FixedString", 930 exp.DataType.Type.FLOAT: "Float32", 931 exp.DataType.Type.INT: "Int32", 932 exp.DataType.Type.MEDIUMINT: "Int32", 933 exp.DataType.Type.INT128: "Int128", 934 exp.DataType.Type.INT256: "Int256", 935 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 936 exp.DataType.Type.MAP: "Map", 937 exp.DataType.Type.NESTED: "Nested", 938 exp.DataType.Type.SMALLINT: "Int16", 939 exp.DataType.Type.STRUCT: "Tuple", 940 exp.DataType.Type.TINYINT: "Int8", 941 exp.DataType.Type.UBIGINT: "UInt64", 942 exp.DataType.Type.UINT: "UInt32", 943 exp.DataType.Type.UINT128: "UInt128", 944 exp.DataType.Type.UINT256: "UInt256", 945 exp.DataType.Type.USMALLINT: "UInt16", 946 exp.DataType.Type.UTINYINT: "UInt8", 947 exp.DataType.Type.IPV4: "IPv4", 948 exp.DataType.Type.IPV6: "IPv6", 949 exp.DataType.Type.POINT: "Point", 950 exp.DataType.Type.RING: "Ring", 951 exp.DataType.Type.LINESTRING: "LineString", 952 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 953 exp.DataType.Type.POLYGON: "Polygon", 954 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 955 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 956 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 957 } 958 959 TRANSFORMS = { 960 **generator.Generator.TRANSFORMS, 961 exp.AnyValue: rename_func("any"), 962 exp.ApproxDistinct: rename_func("uniq"), 963 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 964 exp.ArraySum: rename_func("arraySum"), 965 exp.ArgMax: arg_max_or_min_no_count("argMax"), 966 exp.ArgMin: arg_max_or_min_no_count("argMin"), 967 exp.Array: inline_array_sql, 968 exp.CastToStrType: rename_func("CAST"), 969 exp.CountIf: rename_func("countIf"), 970 exp.CompressColumnConstraint: lambda self, 971 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 972 exp.ComputedColumnConstraint: lambda self, 973 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 974 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 975 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 976 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 977 exp.DateStrToDate: rename_func("toDate"), 978 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 979 exp.Explode: rename_func("arrayJoin"), 980 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 981 exp.IsNan: rename_func("isNaN"), 982 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 983 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 984 exp.JSONPathKey: json_path_key_only_name, 985 exp.JSONPathRoot: lambda *_: "", 986 exp.Length: length_or_char_length_sql, 987 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 988 exp.Median: rename_func("median"), 989 exp.Nullif: rename_func("nullIf"), 990 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 991 exp.Pivot: no_pivot_sql, 992 exp.Quantile: _quantile_sql, 993 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 994 exp.Rand: rename_func("randCanonical"), 995 exp.StartsWith: rename_func("startsWith"), 996 exp.StrPosition: lambda self, e: self.func( 997 "position", e.this, e.args.get("substr"), e.args.get("position") 998 ), 999 exp.TimeToStr: lambda self, e: self.func( 1000 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 1001 ), 1002 exp.TimeStrToTime: _timestrtotime_sql, 1003 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1004 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1005 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 1006 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1007 exp.MD5Digest: rename_func("MD5"), 1008 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1009 exp.SHA: rename_func("SHA1"), 1010 exp.SHA2: sha256_sql, 1011 exp.UnixToTime: _unix_to_time_sql, 1012 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 1013 exp.Trim: trim_sql, 1014 exp.Variance: rename_func("varSamp"), 1015 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1016 exp.Stddev: rename_func("stddevSamp"), 1017 exp.Chr: rename_func("CHAR"), 1018 exp.Lag: lambda self, e: self.func( 1019 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1020 ), 1021 exp.Lead: lambda self, e: self.func( 1022 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1023 ), 1024 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1025 rename_func("editDistance") 1026 ), 1027 } 1028 1029 PROPERTIES_LOCATION = { 1030 **generator.Generator.PROPERTIES_LOCATION, 1031 exp.OnCluster: exp.Properties.Location.POST_NAME, 1032 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1033 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1034 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1035 } 1036 1037 # There's no list in docs, but it can be found in Clickhouse code 1038 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1039 ON_CLUSTER_TARGETS = { 1040 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1041 "DATABASE", 1042 "TABLE", 1043 "VIEW", 1044 "DICTIONARY", 1045 "INDEX", 1046 "FUNCTION", 1047 "NAMED COLLECTION", 1048 } 1049 1050 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1051 NON_NULLABLE_TYPES = { 1052 exp.DataType.Type.ARRAY, 1053 exp.DataType.Type.MAP, 1054 exp.DataType.Type.STRUCT, 1055 exp.DataType.Type.POINT, 1056 exp.DataType.Type.RING, 1057 exp.DataType.Type.LINESTRING, 1058 exp.DataType.Type.MULTILINESTRING, 1059 exp.DataType.Type.POLYGON, 1060 exp.DataType.Type.MULTIPOLYGON, 1061 } 1062 1063 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1064 strtodate_sql = self.function_fallback_sql(expression) 1065 1066 if not isinstance(expression.parent, exp.Cast): 1067 # StrToDate returns DATEs in other dialects (eg. postgres), so 1068 # this branch aims to improve the transpilation to clickhouse 1069 return f"CAST({strtodate_sql} AS DATE)" 1070 1071 return strtodate_sql 1072 1073 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1074 this = expression.this 1075 1076 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1077 return self.sql(this) 1078 1079 return super().cast_sql(expression, safe_prefix=safe_prefix) 1080 1081 def trycast_sql(self, expression: exp.TryCast) -> str: 1082 dtype = expression.to 1083 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1084 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1085 dtype.set("nullable", True) 1086 1087 return super().cast_sql(expression) 1088 1089 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1090 this = self.json_path_part(expression.this) 1091 return str(int(this) + 1) if is_int(this) else this 1092 1093 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1094 return f"AS {self.sql(expression, 'this')}" 1095 1096 def _any_to_has( 1097 self, 1098 expression: exp.EQ | exp.NEQ, 1099 default: t.Callable[[t.Any], str], 1100 prefix: str = "", 1101 ) -> str: 1102 if isinstance(expression.left, exp.Any): 1103 arr = expression.left 1104 this = expression.right 1105 elif isinstance(expression.right, exp.Any): 1106 arr = expression.right 1107 this = expression.left 1108 else: 1109 return default(expression) 1110 1111 return prefix + self.func("has", arr.this.unnest(), this) 1112 1113 def eq_sql(self, expression: exp.EQ) -> str: 1114 return self._any_to_has(expression, super().eq_sql) 1115 1116 def neq_sql(self, expression: exp.NEQ) -> str: 1117 return self._any_to_has(expression, super().neq_sql, "NOT ") 1118 1119 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1120 # Manually add a flag to make the search case-insensitive 1121 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1122 return self.func("match", expression.this, regex) 1123 1124 def datatype_sql(self, expression: exp.DataType) -> str: 1125 # String is the standard ClickHouse type, every other variant is just an alias. 1126 # Additionally, any supplied length parameter will be ignored. 1127 # 1128 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1129 if expression.this in self.STRING_TYPE_MAPPING: 1130 dtype = "String" 1131 else: 1132 dtype = super().datatype_sql(expression) 1133 1134 # This section changes the type to `Nullable(...)` if the following conditions hold: 1135 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1136 # and change their semantics 1137 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1138 # constraint: "Type of Map key must be a type, that can be represented by integer or 1139 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1140 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1141 parent = expression.parent 1142 nullable = expression.args.get("nullable") 1143 if nullable is True or ( 1144 nullable is None 1145 and not ( 1146 isinstance(parent, exp.DataType) 1147 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1148 and expression.index in (None, 0) 1149 ) 1150 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1151 ): 1152 dtype = f"Nullable({dtype})" 1153 1154 return dtype 1155 1156 def cte_sql(self, expression: exp.CTE) -> str: 1157 if expression.args.get("scalar"): 1158 this = self.sql(expression, "this") 1159 alias = self.sql(expression, "alias") 1160 return f"{this} AS {alias}" 1161 1162 return super().cte_sql(expression) 1163 1164 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1165 return super().after_limit_modifiers(expression) + [ 1166 ( 1167 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1168 if expression.args.get("settings") 1169 else "" 1170 ), 1171 ( 1172 self.seg("FORMAT ") + self.sql(expression, "format") 1173 if expression.args.get("format") 1174 else "" 1175 ), 1176 ] 1177 1178 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1179 params = self.expressions(expression, key="params", flat=True) 1180 return self.func(expression.name, *expression.expressions) + f"({params})" 1181 1182 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1183 return self.func(expression.name, *expression.expressions) 1184 1185 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1186 return self.anonymousaggfunc_sql(expression) 1187 1188 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1189 return self.parameterizedagg_sql(expression) 1190 1191 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1192 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1193 1194 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1195 return f"ON CLUSTER {self.sql(expression, 'this')}" 1196 1197 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1198 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1199 exp.Properties.Location.POST_NAME 1200 ): 1201 this_name = self.sql( 1202 expression.this if isinstance(expression.this, exp.Schema) else expression, 1203 "this", 1204 ) 1205 this_properties = " ".join( 1206 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1207 ) 1208 this_schema = self.schema_columns_sql(expression.this) 1209 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1210 1211 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1212 1213 return super().createable_sql(expression, locations) 1214 1215 def create_sql(self, expression: exp.Create) -> str: 1216 # The comment property comes last in CTAS statements, i.e. after the query 1217 query = expression.expression 1218 if isinstance(query, exp.Query): 1219 comment_prop = expression.find(exp.SchemaCommentProperty) 1220 if comment_prop: 1221 comment_prop.pop() 1222 query.replace(exp.paren(query)) 1223 else: 1224 comment_prop = None 1225 1226 create_sql = super().create_sql(expression) 1227 1228 comment_sql = self.sql(comment_prop) 1229 comment_sql = f" {comment_sql}" if comment_sql else "" 1230 1231 return f"{create_sql}{comment_sql}" 1232 1233 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1234 this = self.indent(self.sql(expression, "this")) 1235 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1236 1237 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1238 this = self.sql(expression, "this") 1239 this = f" {this}" if this else "" 1240 expr = self.sql(expression, "expression") 1241 expr = f" {expr}" if expr else "" 1242 index_type = self.sql(expression, "index_type") 1243 index_type = f" TYPE {index_type}" if index_type else "" 1244 granularity = self.sql(expression, "granularity") 1245 granularity = f" GRANULARITY {granularity}" if granularity else "" 1246 1247 return f"INDEX{this}{expr}{index_type}{granularity}" 1248 1249 def partition_sql(self, expression: exp.Partition) -> str: 1250 return f"PARTITION {self.expressions(expression, flat=True)}" 1251 1252 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1253 return f"ID {self.sql(expression.this)}" 1254 1255 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1256 return ( 1257 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1258 ) 1259 1260 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1261 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1262 1263 def is_sql(self, expression: exp.Is) -> str: 1264 is_sql = super().is_sql(expression) 1265 1266 if isinstance(expression.parent, exp.Not): 1267 # value IS NOT NULL -> NOT (value IS NULL) 1268 is_sql = self.wrap(is_sql) 1269 1270 return is_sql
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether the name of the function should be preserved inside the node's metadata, can be useful for roundtripping deprecated vs new functions that share an AST node e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery
Whether number literals can include underscores for better readability
Specifies the strategy according to which identifiers should be normalized.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse equivalent of CREATE SCHEMA is CREATE DATABASE.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- TYPED_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- STRICT_JSON_PATH_SYNTAX
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- PROMOTE_TO_INFERRED_DATETIME_TYPE
- SUPPORTS_VALUES_DEFAULT
- REGEXP_EXTRACT_DEFAULT_GROUP
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
189 class Tokenizer(tokens.Tokenizer): 190 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 191 IDENTIFIERS = ['"', "`"] 192 IDENTIFIER_ESCAPES = ["\\"] 193 STRING_ESCAPES = ["'", "\\"] 194 BIT_STRINGS = [("0b", "")] 195 HEX_STRINGS = [("0x", ""), ("0X", "")] 196 HEREDOC_STRINGS = ["$"] 197 198 KEYWORDS = { 199 **tokens.Tokenizer.KEYWORDS, 200 "ATTACH": TokenType.COMMAND, 201 "DATE32": TokenType.DATE32, 202 "DATETIME64": TokenType.DATETIME64, 203 "DICTIONARY": TokenType.DICTIONARY, 204 "ENUM8": TokenType.ENUM8, 205 "ENUM16": TokenType.ENUM16, 206 "FINAL": TokenType.FINAL, 207 "FIXEDSTRING": TokenType.FIXEDSTRING, 208 "FLOAT32": TokenType.FLOAT, 209 "FLOAT64": TokenType.DOUBLE, 210 "GLOBAL": TokenType.GLOBAL, 211 "INT256": TokenType.INT256, 212 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 213 "MAP": TokenType.MAP, 214 "NESTED": TokenType.NESTED, 215 "SAMPLE": TokenType.TABLE_SAMPLE, 216 "TUPLE": TokenType.STRUCT, 217 "UINT128": TokenType.UINT128, 218 "UINT16": TokenType.USMALLINT, 219 "UINT256": TokenType.UINT256, 220 "UINT32": TokenType.UINT, 221 "UINT64": TokenType.UBIGINT, 222 "UINT8": TokenType.UTINYINT, 223 "IPV4": TokenType.IPV4, 224 "IPV6": TokenType.IPV6, 225 "POINT": TokenType.POINT, 226 "RING": TokenType.RING, 227 "LINESTRING": TokenType.LINESTRING, 228 "MULTILINESTRING": TokenType.MULTILINESTRING, 229 "POLYGON": TokenType.POLYGON, 230 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 231 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 232 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 233 "SYSTEM": TokenType.COMMAND, 234 "PREWHERE": TokenType.PREWHERE, 235 } 236 KEYWORDS.pop("/*+") 237 238 SINGLE_TOKENS = { 239 **tokens.Tokenizer.SINGLE_TOKENS, 240 "$": TokenType.HEREDOC_STRING, 241 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BYTE_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
243 class Parser(parser.Parser): 244 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 245 # * select x from t1 union all select x from t2 limit 1; 246 # * select x from t1 union all (select x from t2 limit 1); 247 MODIFIERS_ATTACHED_TO_SET_OP = False 248 INTERVAL_SPANS = False 249 OPTIONAL_ALIAS_TOKEN_CTE = False 250 251 FUNCTIONS = { 252 **parser.Parser.FUNCTIONS, 253 "ANY": exp.AnyValue.from_arg_list, 254 "ARRAYSUM": exp.ArraySum.from_arg_list, 255 "COUNTIF": _build_count_if, 256 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 257 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 258 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 259 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 260 "DATE_FORMAT": _build_date_format, 261 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 262 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 263 "FORMATDATETIME": _build_date_format, 264 "JSONEXTRACTSTRING": build_json_extract_path( 265 exp.JSONExtractScalar, zero_based_indexing=False 266 ), 267 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 268 "MAP": parser.build_var_map, 269 "MATCH": exp.RegexpLike.from_arg_list, 270 "RANDCANONICAL": exp.Rand.from_arg_list, 271 "STR_TO_DATE": _build_str_to_date, 272 "TUPLE": exp.Struct.from_arg_list, 273 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 274 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 275 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 276 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 277 "UNIQ": exp.ApproxDistinct.from_arg_list, 278 "XOR": lambda args: exp.Xor(expressions=args), 279 "MD5": exp.MD5Digest.from_arg_list, 280 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 281 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 282 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 283 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 284 } 285 FUNCTIONS.pop("TRANSFORM") 286 287 AGG_FUNCTIONS = { 288 "count", 289 "min", 290 "max", 291 "sum", 292 "avg", 293 "any", 294 "stddevPop", 295 "stddevSamp", 296 "varPop", 297 "varSamp", 298 "corr", 299 "covarPop", 300 "covarSamp", 301 "entropy", 302 "exponentialMovingAverage", 303 "intervalLengthSum", 304 "kolmogorovSmirnovTest", 305 "mannWhitneyUTest", 306 "median", 307 "rankCorr", 308 "sumKahan", 309 "studentTTest", 310 "welchTTest", 311 "anyHeavy", 312 "anyLast", 313 "boundingRatio", 314 "first_value", 315 "last_value", 316 "argMin", 317 "argMax", 318 "avgWeighted", 319 "topK", 320 "topKWeighted", 321 "deltaSum", 322 "deltaSumTimestamp", 323 "groupArray", 324 "groupArrayLast", 325 "groupUniqArray", 326 "groupArrayInsertAt", 327 "groupArrayMovingAvg", 328 "groupArrayMovingSum", 329 "groupArraySample", 330 "groupBitAnd", 331 "groupBitOr", 332 "groupBitXor", 333 "groupBitmap", 334 "groupBitmapAnd", 335 "groupBitmapOr", 336 "groupBitmapXor", 337 "sumWithOverflow", 338 "sumMap", 339 "minMap", 340 "maxMap", 341 "skewSamp", 342 "skewPop", 343 "kurtSamp", 344 "kurtPop", 345 "uniq", 346 "uniqExact", 347 "uniqCombined", 348 "uniqCombined64", 349 "uniqHLL12", 350 "uniqTheta", 351 "quantile", 352 "quantiles", 353 "quantileExact", 354 "quantilesExact", 355 "quantileExactLow", 356 "quantilesExactLow", 357 "quantileExactHigh", 358 "quantilesExactHigh", 359 "quantileExactWeighted", 360 "quantilesExactWeighted", 361 "quantileTiming", 362 "quantilesTiming", 363 "quantileTimingWeighted", 364 "quantilesTimingWeighted", 365 "quantileDeterministic", 366 "quantilesDeterministic", 367 "quantileTDigest", 368 "quantilesTDigest", 369 "quantileTDigestWeighted", 370 "quantilesTDigestWeighted", 371 "quantileBFloat16", 372 "quantilesBFloat16", 373 "quantileBFloat16Weighted", 374 "quantilesBFloat16Weighted", 375 "simpleLinearRegression", 376 "stochasticLinearRegression", 377 "stochasticLogisticRegression", 378 "categoricalInformationValue", 379 "contingency", 380 "cramersV", 381 "cramersVBiasCorrected", 382 "theilsU", 383 "maxIntersections", 384 "maxIntersectionsPosition", 385 "meanZTest", 386 "quantileInterpolatedWeighted", 387 "quantilesInterpolatedWeighted", 388 "quantileGK", 389 "quantilesGK", 390 "sparkBar", 391 "sumCount", 392 "largestTriangleThreeBuckets", 393 "histogram", 394 "sequenceMatch", 395 "sequenceCount", 396 "windowFunnel", 397 "retention", 398 "uniqUpTo", 399 "sequenceNextNode", 400 "exponentialTimeDecayedAvg", 401 } 402 403 AGG_FUNCTIONS_SUFFIXES = [ 404 "If", 405 "Array", 406 "ArrayIf", 407 "Map", 408 "SimpleState", 409 "State", 410 "Merge", 411 "MergeState", 412 "ForEach", 413 "Distinct", 414 "OrDefault", 415 "OrNull", 416 "Resample", 417 "ArgMin", 418 "ArgMax", 419 ] 420 421 FUNC_TOKENS = { 422 *parser.Parser.FUNC_TOKENS, 423 TokenType.SET, 424 } 425 426 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 427 428 ID_VAR_TOKENS = { 429 *parser.Parser.ID_VAR_TOKENS, 430 TokenType.LIKE, 431 } 432 433 AGG_FUNC_MAPPING = ( 434 lambda functions, suffixes: { 435 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 436 } 437 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 438 439 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 440 441 FUNCTION_PARSERS = { 442 **parser.Parser.FUNCTION_PARSERS, 443 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 444 "QUANTILE": lambda self: self._parse_quantile(), 445 "MEDIAN": lambda self: self._parse_quantile(), 446 "COLUMNS": lambda self: self._parse_columns(), 447 } 448 449 FUNCTION_PARSERS.pop("MATCH") 450 451 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 452 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 453 454 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 455 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 456 457 RANGE_PARSERS = { 458 **parser.Parser.RANGE_PARSERS, 459 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 460 and self._parse_in(this, is_global=True), 461 } 462 463 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 464 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 465 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 466 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 467 468 JOIN_KINDS = { 469 *parser.Parser.JOIN_KINDS, 470 TokenType.ANY, 471 TokenType.ASOF, 472 TokenType.ARRAY, 473 } 474 475 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 476 TokenType.ANY, 477 TokenType.ARRAY, 478 TokenType.FINAL, 479 TokenType.FORMAT, 480 TokenType.SETTINGS, 481 } 482 483 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 484 TokenType.FORMAT, 485 } 486 487 LOG_DEFAULTS_TO_LN = True 488 489 QUERY_MODIFIER_PARSERS = { 490 **parser.Parser.QUERY_MODIFIER_PARSERS, 491 TokenType.SETTINGS: lambda self: ( 492 "settings", 493 self._advance() or self._parse_csv(self._parse_assignment), 494 ), 495 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 496 } 497 498 CONSTRAINT_PARSERS = { 499 **parser.Parser.CONSTRAINT_PARSERS, 500 "INDEX": lambda self: self._parse_index_constraint(), 501 "CODEC": lambda self: self._parse_compress(), 502 } 503 504 ALTER_PARSERS = { 505 **parser.Parser.ALTER_PARSERS, 506 "REPLACE": lambda self: self._parse_alter_table_replace(), 507 } 508 509 SCHEMA_UNNAMED_CONSTRAINTS = { 510 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 511 "INDEX", 512 } 513 514 PLACEHOLDER_PARSERS = { 515 **parser.Parser.PLACEHOLDER_PARSERS, 516 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 517 } 518 519 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 520 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 521 return self._parse_lambda() 522 523 def _parse_types( 524 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 525 ) -> t.Optional[exp.Expression]: 526 dtype = super()._parse_types( 527 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 528 ) 529 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 530 # Mark every type as non-nullable which is ClickHouse's default, unless it's 531 # already marked as nullable. This marker helps us transpile types from other 532 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 533 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 534 # fail in ClickHouse without the `Nullable` type constructor. 535 dtype.set("nullable", False) 536 537 return dtype 538 539 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 540 index = self._index 541 this = self._parse_bitwise() 542 if self._match(TokenType.FROM): 543 self._retreat(index) 544 return super()._parse_extract() 545 546 # We return Anonymous here because extract and regexpExtract have different semantics, 547 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 548 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 549 # 550 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 551 self._match(TokenType.COMMA) 552 return self.expression( 553 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 554 ) 555 556 def _parse_assignment(self) -> t.Optional[exp.Expression]: 557 this = super()._parse_assignment() 558 559 if self._match(TokenType.PLACEHOLDER): 560 return self.expression( 561 exp.If, 562 this=this, 563 true=self._parse_assignment(), 564 false=self._match(TokenType.COLON) and self._parse_assignment(), 565 ) 566 567 return this 568 569 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 570 """ 571 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 572 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 573 """ 574 index = self._index 575 576 this = self._parse_id_var() 577 self._match(TokenType.COLON) 578 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 579 self._match_text_seq("IDENTIFIER") and "Identifier" 580 ) 581 582 if not kind: 583 self._retreat(index) 584 return None 585 elif not self._match(TokenType.R_BRACE): 586 self.raise_error("Expecting }") 587 588 return self.expression(exp.Placeholder, this=this, kind=kind) 589 590 def _parse_bracket( 591 self, this: t.Optional[exp.Expression] = None 592 ) -> t.Optional[exp.Expression]: 593 l_brace = self._match(TokenType.L_BRACE, advance=False) 594 bracket = super()._parse_bracket(this) 595 596 if l_brace and isinstance(bracket, exp.Struct): 597 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 598 for expression in bracket.expressions: 599 if not isinstance(expression, exp.PropertyEQ): 600 break 601 602 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 603 varmap.args["values"].append("expressions", expression.expression) 604 605 return varmap 606 607 return bracket 608 609 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 610 this = super()._parse_in(this) 611 this.set("is_global", is_global) 612 return this 613 614 def _parse_table( 615 self, 616 schema: bool = False, 617 joins: bool = False, 618 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 619 parse_bracket: bool = False, 620 is_db_reference: bool = False, 621 parse_partition: bool = False, 622 ) -> t.Optional[exp.Expression]: 623 this = super()._parse_table( 624 schema=schema, 625 joins=joins, 626 alias_tokens=alias_tokens, 627 parse_bracket=parse_bracket, 628 is_db_reference=is_db_reference, 629 ) 630 631 if self._match(TokenType.FINAL): 632 this = self.expression(exp.Final, this=this) 633 634 return this 635 636 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 637 return super()._parse_position(haystack_first=True) 638 639 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 640 def _parse_cte(self) -> t.Optional[exp.CTE]: 641 # WITH <identifier> AS <subquery expression> 642 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 643 644 if not cte: 645 # WITH <expression> AS <identifier> 646 cte = self.expression( 647 exp.CTE, 648 this=self._parse_assignment(), 649 alias=self._parse_table_alias(), 650 scalar=True, 651 ) 652 653 return cte 654 655 def _parse_join_parts( 656 self, 657 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 658 is_global = self._match(TokenType.GLOBAL) and self._prev 659 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 660 661 if kind_pre: 662 kind = self._match_set(self.JOIN_KINDS) and self._prev 663 side = self._match_set(self.JOIN_SIDES) and self._prev 664 return is_global, side, kind 665 666 return ( 667 is_global, 668 self._match_set(self.JOIN_SIDES) and self._prev, 669 self._match_set(self.JOIN_KINDS) and self._prev, 670 ) 671 672 def _parse_join( 673 self, skip_join_token: bool = False, parse_bracket: bool = False 674 ) -> t.Optional[exp.Join]: 675 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 676 if join: 677 join.set("global", join.args.pop("method", None)) 678 679 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 680 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 681 if join.kind == "ARRAY": 682 for table in join.find_all(exp.Table): 683 table.replace(table.to_column()) 684 685 return join 686 687 def _parse_function( 688 self, 689 functions: t.Optional[t.Dict[str, t.Callable]] = None, 690 anonymous: bool = False, 691 optional_parens: bool = True, 692 any_token: bool = False, 693 ) -> t.Optional[exp.Expression]: 694 expr = super()._parse_function( 695 functions=functions, 696 anonymous=anonymous, 697 optional_parens=optional_parens, 698 any_token=any_token, 699 ) 700 701 func = expr.this if isinstance(expr, exp.Window) else expr 702 703 # Aggregate functions can be split in 2 parts: <func_name><suffix> 704 parts = ( 705 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 706 ) 707 708 if parts: 709 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 710 params = self._parse_func_params(anon_func) 711 712 kwargs = { 713 "this": anon_func.this, 714 "expressions": anon_func.expressions, 715 } 716 if parts[1]: 717 kwargs["parts"] = parts 718 exp_class: t.Type[exp.Expression] = ( 719 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 720 ) 721 else: 722 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 723 724 kwargs["exp_class"] = exp_class 725 if params: 726 kwargs["params"] = params 727 728 func = self.expression(**kwargs) 729 730 if isinstance(expr, exp.Window): 731 # The window's func was parsed as Anonymous in base parser, fix its 732 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 733 expr.set("this", func) 734 elif params: 735 # Params have blocked super()._parse_function() from parsing the following window 736 # (if that exists) as they're standing between the function call and the window spec 737 expr = self._parse_window(func) 738 else: 739 expr = func 740 741 return expr 742 743 def _parse_func_params( 744 self, this: t.Optional[exp.Func] = None 745 ) -> t.Optional[t.List[exp.Expression]]: 746 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 747 return self._parse_csv(self._parse_lambda) 748 749 if self._match(TokenType.L_PAREN): 750 params = self._parse_csv(self._parse_lambda) 751 self._match_r_paren(this) 752 return params 753 754 return None 755 756 def _parse_quantile(self) -> exp.Quantile: 757 this = self._parse_lambda() 758 params = self._parse_func_params() 759 if params: 760 return self.expression(exp.Quantile, this=params[0], quantile=this) 761 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 762 763 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 764 return super()._parse_wrapped_id_vars(optional=True) 765 766 def _parse_primary_key( 767 self, wrapped_optional: bool = False, in_props: bool = False 768 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 769 return super()._parse_primary_key( 770 wrapped_optional=wrapped_optional or in_props, in_props=in_props 771 ) 772 773 def _parse_on_property(self) -> t.Optional[exp.Expression]: 774 index = self._index 775 if self._match_text_seq("CLUSTER"): 776 this = self._parse_id_var() 777 if this: 778 return self.expression(exp.OnCluster, this=this) 779 else: 780 self._retreat(index) 781 return None 782 783 def _parse_index_constraint( 784 self, kind: t.Optional[str] = None 785 ) -> exp.IndexColumnConstraint: 786 # INDEX name1 expr TYPE type1(args) GRANULARITY value 787 this = self._parse_id_var() 788 expression = self._parse_assignment() 789 790 index_type = self._match_text_seq("TYPE") and ( 791 self._parse_function() or self._parse_var() 792 ) 793 794 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 795 796 return self.expression( 797 exp.IndexColumnConstraint, 798 this=this, 799 expression=expression, 800 index_type=index_type, 801 granularity=granularity, 802 ) 803 804 def _parse_partition(self) -> t.Optional[exp.Partition]: 805 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 806 if not self._match(TokenType.PARTITION): 807 return None 808 809 if self._match_text_seq("ID"): 810 # Corresponds to the PARTITION ID <string_value> syntax 811 expressions: t.List[exp.Expression] = [ 812 self.expression(exp.PartitionId, this=self._parse_string()) 813 ] 814 else: 815 expressions = self._parse_expressions() 816 817 return self.expression(exp.Partition, expressions=expressions) 818 819 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 820 partition = self._parse_partition() 821 822 if not partition or not self._match(TokenType.FROM): 823 return None 824 825 return self.expression( 826 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 827 ) 828 829 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 830 if not self._match_text_seq("PROJECTION"): 831 return None 832 833 return self.expression( 834 exp.ProjectionDef, 835 this=self._parse_id_var(), 836 expression=self._parse_wrapped(self._parse_statement), 837 ) 838 839 def _parse_constraint(self) -> t.Optional[exp.Expression]: 840 return super()._parse_constraint() or self._parse_projection_def() 841 842 def _parse_alias( 843 self, this: t.Optional[exp.Expression], explicit: bool = False 844 ) -> t.Optional[exp.Expression]: 845 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 846 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 847 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 848 return this 849 850 return super()._parse_alias(this=this, explicit=explicit) 851 852 def _parse_expression(self) -> t.Optional[exp.Expression]: 853 this = super()._parse_expression() 854 855 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 856 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 857 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 858 self._match(TokenType.R_PAREN) 859 860 return this 861 862 def _parse_columns(self) -> exp.Expression: 863 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 864 865 while self._next and self._match_text_seq(")", "APPLY", "("): 866 self._match(TokenType.R_PAREN) 867 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 868 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PROPERTY_PARSERS
- ALTER_ALTER_PARSERS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
870 class Generator(generator.Generator): 871 QUERY_HINTS = False 872 STRUCT_DELIMITER = ("(", ")") 873 NVL2_SUPPORTED = False 874 TABLESAMPLE_REQUIRES_PARENS = False 875 TABLESAMPLE_SIZE_IS_ROWS = False 876 TABLESAMPLE_KEYWORDS = "SAMPLE" 877 LAST_DAY_SUPPORTS_DATE_PART = False 878 CAN_IMPLEMENT_ARRAY_ANY = True 879 SUPPORTS_TO_NUMBER = False 880 JOIN_HINTS = False 881 TABLE_HINTS = False 882 GROUPINGS_SEP = "" 883 SET_OP_MODIFIERS = False 884 SUPPORTS_TABLE_ALIAS_COLUMNS = False 885 VALUES_AS_TABLE = False 886 ARRAY_SIZE_NAME = "LENGTH" 887 888 STRING_TYPE_MAPPING = { 889 exp.DataType.Type.CHAR: "String", 890 exp.DataType.Type.LONGBLOB: "String", 891 exp.DataType.Type.LONGTEXT: "String", 892 exp.DataType.Type.MEDIUMBLOB: "String", 893 exp.DataType.Type.MEDIUMTEXT: "String", 894 exp.DataType.Type.TINYBLOB: "String", 895 exp.DataType.Type.TINYTEXT: "String", 896 exp.DataType.Type.TEXT: "String", 897 exp.DataType.Type.VARBINARY: "String", 898 exp.DataType.Type.VARCHAR: "String", 899 } 900 901 SUPPORTED_JSON_PATH_PARTS = { 902 exp.JSONPathKey, 903 exp.JSONPathRoot, 904 exp.JSONPathSubscript, 905 } 906 907 TYPE_MAPPING = { 908 **generator.Generator.TYPE_MAPPING, 909 **STRING_TYPE_MAPPING, 910 exp.DataType.Type.ARRAY: "Array", 911 exp.DataType.Type.BOOLEAN: "Bool", 912 exp.DataType.Type.BIGINT: "Int64", 913 exp.DataType.Type.DATE32: "Date32", 914 exp.DataType.Type.DATETIME: "DateTime", 915 exp.DataType.Type.DATETIME2: "DateTime", 916 exp.DataType.Type.SMALLDATETIME: "DateTime", 917 exp.DataType.Type.DATETIME64: "DateTime64", 918 exp.DataType.Type.DECIMAL: "Decimal", 919 exp.DataType.Type.DECIMAL32: "Decimal32", 920 exp.DataType.Type.DECIMAL64: "Decimal64", 921 exp.DataType.Type.DECIMAL128: "Decimal128", 922 exp.DataType.Type.DECIMAL256: "Decimal256", 923 exp.DataType.Type.TIMESTAMP: "DateTime", 924 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 925 exp.DataType.Type.DOUBLE: "Float64", 926 exp.DataType.Type.ENUM: "Enum", 927 exp.DataType.Type.ENUM8: "Enum8", 928 exp.DataType.Type.ENUM16: "Enum16", 929 exp.DataType.Type.FIXEDSTRING: "FixedString", 930 exp.DataType.Type.FLOAT: "Float32", 931 exp.DataType.Type.INT: "Int32", 932 exp.DataType.Type.MEDIUMINT: "Int32", 933 exp.DataType.Type.INT128: "Int128", 934 exp.DataType.Type.INT256: "Int256", 935 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 936 exp.DataType.Type.MAP: "Map", 937 exp.DataType.Type.NESTED: "Nested", 938 exp.DataType.Type.SMALLINT: "Int16", 939 exp.DataType.Type.STRUCT: "Tuple", 940 exp.DataType.Type.TINYINT: "Int8", 941 exp.DataType.Type.UBIGINT: "UInt64", 942 exp.DataType.Type.UINT: "UInt32", 943 exp.DataType.Type.UINT128: "UInt128", 944 exp.DataType.Type.UINT256: "UInt256", 945 exp.DataType.Type.USMALLINT: "UInt16", 946 exp.DataType.Type.UTINYINT: "UInt8", 947 exp.DataType.Type.IPV4: "IPv4", 948 exp.DataType.Type.IPV6: "IPv6", 949 exp.DataType.Type.POINT: "Point", 950 exp.DataType.Type.RING: "Ring", 951 exp.DataType.Type.LINESTRING: "LineString", 952 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 953 exp.DataType.Type.POLYGON: "Polygon", 954 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 955 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 956 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 957 } 958 959 TRANSFORMS = { 960 **generator.Generator.TRANSFORMS, 961 exp.AnyValue: rename_func("any"), 962 exp.ApproxDistinct: rename_func("uniq"), 963 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 964 exp.ArraySum: rename_func("arraySum"), 965 exp.ArgMax: arg_max_or_min_no_count("argMax"), 966 exp.ArgMin: arg_max_or_min_no_count("argMin"), 967 exp.Array: inline_array_sql, 968 exp.CastToStrType: rename_func("CAST"), 969 exp.CountIf: rename_func("countIf"), 970 exp.CompressColumnConstraint: lambda self, 971 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 972 exp.ComputedColumnConstraint: lambda self, 973 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 974 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 975 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 976 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 977 exp.DateStrToDate: rename_func("toDate"), 978 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 979 exp.Explode: rename_func("arrayJoin"), 980 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 981 exp.IsNan: rename_func("isNaN"), 982 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 983 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 984 exp.JSONPathKey: json_path_key_only_name, 985 exp.JSONPathRoot: lambda *_: "", 986 exp.Length: length_or_char_length_sql, 987 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 988 exp.Median: rename_func("median"), 989 exp.Nullif: rename_func("nullIf"), 990 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 991 exp.Pivot: no_pivot_sql, 992 exp.Quantile: _quantile_sql, 993 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 994 exp.Rand: rename_func("randCanonical"), 995 exp.StartsWith: rename_func("startsWith"), 996 exp.StrPosition: lambda self, e: self.func( 997 "position", e.this, e.args.get("substr"), e.args.get("position") 998 ), 999 exp.TimeToStr: lambda self, e: self.func( 1000 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 1001 ), 1002 exp.TimeStrToTime: _timestrtotime_sql, 1003 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1004 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1005 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 1006 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1007 exp.MD5Digest: rename_func("MD5"), 1008 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1009 exp.SHA: rename_func("SHA1"), 1010 exp.SHA2: sha256_sql, 1011 exp.UnixToTime: _unix_to_time_sql, 1012 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 1013 exp.Trim: trim_sql, 1014 exp.Variance: rename_func("varSamp"), 1015 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1016 exp.Stddev: rename_func("stddevSamp"), 1017 exp.Chr: rename_func("CHAR"), 1018 exp.Lag: lambda self, e: self.func( 1019 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1020 ), 1021 exp.Lead: lambda self, e: self.func( 1022 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1023 ), 1024 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1025 rename_func("editDistance") 1026 ), 1027 } 1028 1029 PROPERTIES_LOCATION = { 1030 **generator.Generator.PROPERTIES_LOCATION, 1031 exp.OnCluster: exp.Properties.Location.POST_NAME, 1032 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1033 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1034 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1035 } 1036 1037 # There's no list in docs, but it can be found in Clickhouse code 1038 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1039 ON_CLUSTER_TARGETS = { 1040 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1041 "DATABASE", 1042 "TABLE", 1043 "VIEW", 1044 "DICTIONARY", 1045 "INDEX", 1046 "FUNCTION", 1047 "NAMED COLLECTION", 1048 } 1049 1050 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1051 NON_NULLABLE_TYPES = { 1052 exp.DataType.Type.ARRAY, 1053 exp.DataType.Type.MAP, 1054 exp.DataType.Type.STRUCT, 1055 exp.DataType.Type.POINT, 1056 exp.DataType.Type.RING, 1057 exp.DataType.Type.LINESTRING, 1058 exp.DataType.Type.MULTILINESTRING, 1059 exp.DataType.Type.POLYGON, 1060 exp.DataType.Type.MULTIPOLYGON, 1061 } 1062 1063 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1064 strtodate_sql = self.function_fallback_sql(expression) 1065 1066 if not isinstance(expression.parent, exp.Cast): 1067 # StrToDate returns DATEs in other dialects (eg. postgres), so 1068 # this branch aims to improve the transpilation to clickhouse 1069 return f"CAST({strtodate_sql} AS DATE)" 1070 1071 return strtodate_sql 1072 1073 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1074 this = expression.this 1075 1076 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1077 return self.sql(this) 1078 1079 return super().cast_sql(expression, safe_prefix=safe_prefix) 1080 1081 def trycast_sql(self, expression: exp.TryCast) -> str: 1082 dtype = expression.to 1083 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1084 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1085 dtype.set("nullable", True) 1086 1087 return super().cast_sql(expression) 1088 1089 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1090 this = self.json_path_part(expression.this) 1091 return str(int(this) + 1) if is_int(this) else this 1092 1093 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1094 return f"AS {self.sql(expression, 'this')}" 1095 1096 def _any_to_has( 1097 self, 1098 expression: exp.EQ | exp.NEQ, 1099 default: t.Callable[[t.Any], str], 1100 prefix: str = "", 1101 ) -> str: 1102 if isinstance(expression.left, exp.Any): 1103 arr = expression.left 1104 this = expression.right 1105 elif isinstance(expression.right, exp.Any): 1106 arr = expression.right 1107 this = expression.left 1108 else: 1109 return default(expression) 1110 1111 return prefix + self.func("has", arr.this.unnest(), this) 1112 1113 def eq_sql(self, expression: exp.EQ) -> str: 1114 return self._any_to_has(expression, super().eq_sql) 1115 1116 def neq_sql(self, expression: exp.NEQ) -> str: 1117 return self._any_to_has(expression, super().neq_sql, "NOT ") 1118 1119 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1120 # Manually add a flag to make the search case-insensitive 1121 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1122 return self.func("match", expression.this, regex) 1123 1124 def datatype_sql(self, expression: exp.DataType) -> str: 1125 # String is the standard ClickHouse type, every other variant is just an alias. 1126 # Additionally, any supplied length parameter will be ignored. 1127 # 1128 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1129 if expression.this in self.STRING_TYPE_MAPPING: 1130 dtype = "String" 1131 else: 1132 dtype = super().datatype_sql(expression) 1133 1134 # This section changes the type to `Nullable(...)` if the following conditions hold: 1135 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1136 # and change their semantics 1137 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1138 # constraint: "Type of Map key must be a type, that can be represented by integer or 1139 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1140 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1141 parent = expression.parent 1142 nullable = expression.args.get("nullable") 1143 if nullable is True or ( 1144 nullable is None 1145 and not ( 1146 isinstance(parent, exp.DataType) 1147 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1148 and expression.index in (None, 0) 1149 ) 1150 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1151 ): 1152 dtype = f"Nullable({dtype})" 1153 1154 return dtype 1155 1156 def cte_sql(self, expression: exp.CTE) -> str: 1157 if expression.args.get("scalar"): 1158 this = self.sql(expression, "this") 1159 alias = self.sql(expression, "alias") 1160 return f"{this} AS {alias}" 1161 1162 return super().cte_sql(expression) 1163 1164 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1165 return super().after_limit_modifiers(expression) + [ 1166 ( 1167 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1168 if expression.args.get("settings") 1169 else "" 1170 ), 1171 ( 1172 self.seg("FORMAT ") + self.sql(expression, "format") 1173 if expression.args.get("format") 1174 else "" 1175 ), 1176 ] 1177 1178 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1179 params = self.expressions(expression, key="params", flat=True) 1180 return self.func(expression.name, *expression.expressions) + f"({params})" 1181 1182 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1183 return self.func(expression.name, *expression.expressions) 1184 1185 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1186 return self.anonymousaggfunc_sql(expression) 1187 1188 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1189 return self.parameterizedagg_sql(expression) 1190 1191 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1192 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1193 1194 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1195 return f"ON CLUSTER {self.sql(expression, 'this')}" 1196 1197 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1198 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1199 exp.Properties.Location.POST_NAME 1200 ): 1201 this_name = self.sql( 1202 expression.this if isinstance(expression.this, exp.Schema) else expression, 1203 "this", 1204 ) 1205 this_properties = " ".join( 1206 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1207 ) 1208 this_schema = self.schema_columns_sql(expression.this) 1209 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1210 1211 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1212 1213 return super().createable_sql(expression, locations) 1214 1215 def create_sql(self, expression: exp.Create) -> str: 1216 # The comment property comes last in CTAS statements, i.e. after the query 1217 query = expression.expression 1218 if isinstance(query, exp.Query): 1219 comment_prop = expression.find(exp.SchemaCommentProperty) 1220 if comment_prop: 1221 comment_prop.pop() 1222 query.replace(exp.paren(query)) 1223 else: 1224 comment_prop = None 1225 1226 create_sql = super().create_sql(expression) 1227 1228 comment_sql = self.sql(comment_prop) 1229 comment_sql = f" {comment_sql}" if comment_sql else "" 1230 1231 return f"{create_sql}{comment_sql}" 1232 1233 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1234 this = self.indent(self.sql(expression, "this")) 1235 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1236 1237 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1238 this = self.sql(expression, "this") 1239 this = f" {this}" if this else "" 1240 expr = self.sql(expression, "expression") 1241 expr = f" {expr}" if expr else "" 1242 index_type = self.sql(expression, "index_type") 1243 index_type = f" TYPE {index_type}" if index_type else "" 1244 granularity = self.sql(expression, "granularity") 1245 granularity = f" GRANULARITY {granularity}" if granularity else "" 1246 1247 return f"INDEX{this}{expr}{index_type}{granularity}" 1248 1249 def partition_sql(self, expression: exp.Partition) -> str: 1250 return f"PARTITION {self.expressions(expression, flat=True)}" 1251 1252 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1253 return f"ID {self.sql(expression.this)}" 1254 1255 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1256 return ( 1257 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1258 ) 1259 1260 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1261 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1262 1263 def is_sql(self, expression: exp.Is) -> str: 1264 is_sql = super().is_sql(expression) 1265 1266 if isinstance(expression.parent, exp.Not): 1267 # value IS NOT NULL -> NOT (value IS NULL) 1268 is_sql = self.wrap(is_sql) 1269 1270 return is_sql
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1063 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1064 strtodate_sql = self.function_fallback_sql(expression) 1065 1066 if not isinstance(expression.parent, exp.Cast): 1067 # StrToDate returns DATEs in other dialects (eg. postgres), so 1068 # this branch aims to improve the transpilation to clickhouse 1069 return f"CAST({strtodate_sql} AS DATE)" 1070 1071 return strtodate_sql
1073 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1074 this = expression.this 1075 1076 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1077 return self.sql(this) 1078 1079 return super().cast_sql(expression, safe_prefix=safe_prefix)
1081 def trycast_sql(self, expression: exp.TryCast) -> str: 1082 dtype = expression.to 1083 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1084 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1085 dtype.set("nullable", True) 1086 1087 return super().cast_sql(expression)
1124 def datatype_sql(self, expression: exp.DataType) -> str: 1125 # String is the standard ClickHouse type, every other variant is just an alias. 1126 # Additionally, any supplied length parameter will be ignored. 1127 # 1128 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1129 if expression.this in self.STRING_TYPE_MAPPING: 1130 dtype = "String" 1131 else: 1132 dtype = super().datatype_sql(expression) 1133 1134 # This section changes the type to `Nullable(...)` if the following conditions hold: 1135 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1136 # and change their semantics 1137 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1138 # constraint: "Type of Map key must be a type, that can be represented by integer or 1139 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1140 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1141 parent = expression.parent 1142 nullable = expression.args.get("nullable") 1143 if nullable is True or ( 1144 nullable is None 1145 and not ( 1146 isinstance(parent, exp.DataType) 1147 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1148 and expression.index in (None, 0) 1149 ) 1150 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1151 ): 1152 dtype = f"Nullable({dtype})" 1153 1154 return dtype
1164 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1165 return super().after_limit_modifiers(expression) + [ 1166 ( 1167 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1168 if expression.args.get("settings") 1169 else "" 1170 ), 1171 ( 1172 self.seg("FORMAT ") + self.sql(expression, "format") 1173 if expression.args.get("format") 1174 else "" 1175 ), 1176 ]
1197 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1198 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1199 exp.Properties.Location.POST_NAME 1200 ): 1201 this_name = self.sql( 1202 expression.this if isinstance(expression.this, exp.Schema) else expression, 1203 "this", 1204 ) 1205 this_properties = " ".join( 1206 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1207 ) 1208 this_schema = self.schema_columns_sql(expression.this) 1209 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1210 1211 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1212 1213 return super().createable_sql(expression, locations)
1215 def create_sql(self, expression: exp.Create) -> str: 1216 # The comment property comes last in CTAS statements, i.e. after the query 1217 query = expression.expression 1218 if isinstance(query, exp.Query): 1219 comment_prop = expression.find(exp.SchemaCommentProperty) 1220 if comment_prop: 1221 comment_prop.pop() 1222 query.replace(exp.paren(query)) 1223 else: 1224 comment_prop = None 1225 1226 create_sql = super().create_sql(expression) 1227 1228 comment_sql = self.sql(comment_prop) 1229 comment_sql = f" {comment_sql}" if comment_sql else "" 1230 1231 return f"{create_sql}{comment_sql}"
1237 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1238 this = self.sql(expression, "this") 1239 this = f" {this}" if this else "" 1240 expr = self.sql(expression, "expression") 1241 expr = f" {expr}" if expr else "" 1242 index_type = self.sql(expression, "index_type") 1243 index_type = f" TYPE {index_type}" if index_type else "" 1244 granularity = self.sql(expression, "granularity") 1245 granularity = f" GRANULARITY {granularity}" if granularity else "" 1246 1247 return f"INDEX{this}{expr}{index_type}{granularity}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- SUPPORTS_UNIX_SECONDS
- PARSE_JSON_NAME
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql