sqlglot.dialects.clickhouse
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 arg_max_or_min_no_count, 9 build_date_delta, 10 build_formatted_time, 11 inline_array_sql, 12 json_extract_segments, 13 json_path_key_only_name, 14 no_pivot_sql, 15 build_json_extract_path, 16 rename_func, 17 sha256_sql, 18 var_map_sql, 19 timestamptrunc_sql, 20 unit_to_var, 21) 22from sqlglot.generator import Generator 23from sqlglot.helper import is_int, seq_get 24from sqlglot.tokens import Token, TokenType 25 26DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd] 27 28 29def _build_date_format(args: t.List) -> exp.TimeToStr: 30 expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args) 31 32 timezone = seq_get(args, 2) 33 if timezone: 34 expr.set("timezone", timezone) 35 36 return expr 37 38 39def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str: 40 scale = expression.args.get("scale") 41 timestamp = expression.this 42 43 if scale in (None, exp.UnixToTime.SECONDS): 44 return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 45 if scale == exp.UnixToTime.MILLIS: 46 return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 47 if scale == exp.UnixToTime.MICROS: 48 return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 49 if scale == exp.UnixToTime.NANOS: 50 return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 51 52 return self.func( 53 "fromUnixTimestamp", 54 exp.cast( 55 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 56 ), 57 ) 58 59 60def _lower_func(sql: str) -> str: 61 index = sql.index("(") 62 return sql[:index].lower() + sql[index:] 63 64 65def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str: 66 quantile = expression.args["quantile"] 67 args = f"({self.sql(expression, 'this')})" 68 69 if isinstance(quantile, exp.Array): 70 func = self.func("quantiles", *quantile) 71 else: 72 func = self.func("quantile", quantile) 73 74 return func + args 75 76 77def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: 78 if len(args) == 1: 79 return exp.CountIf(this=seq_get(args, 0)) 80 81 return exp.CombinedAggFunc(this="countIf", expressions=args, parts=("count", "If")) 82 83 84def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]: 85 def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str: 86 if not expression.unit: 87 return rename_func(name)(self, expression) 88 89 return self.func( 90 name, 91 unit_to_var(expression), 92 expression.expression, 93 expression.this, 94 ) 95 96 return _delta_sql 97 98 99class ClickHouse(Dialect): 100 NORMALIZE_FUNCTIONS: bool | str = False 101 NULL_ORDERING = "nulls_are_last" 102 SUPPORTS_USER_DEFINED_TYPES = False 103 SAFE_DIVISION = True 104 LOG_BASE_FIRST: t.Optional[bool] = None 105 FORCE_EARLY_ALIAS_REF_EXPANSION = True 106 107 UNESCAPED_SEQUENCES = { 108 "\\0": "\0", 109 } 110 111 class Tokenizer(tokens.Tokenizer): 112 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 113 IDENTIFIERS = ['"', "`"] 114 STRING_ESCAPES = ["'", "\\"] 115 BIT_STRINGS = [("0b", "")] 116 HEX_STRINGS = [("0x", ""), ("0X", "")] 117 HEREDOC_STRINGS = ["$"] 118 119 KEYWORDS = { 120 **tokens.Tokenizer.KEYWORDS, 121 "ATTACH": TokenType.COMMAND, 122 "DATE32": TokenType.DATE32, 123 "DATETIME64": TokenType.DATETIME64, 124 "DICTIONARY": TokenType.DICTIONARY, 125 "ENUM8": TokenType.ENUM8, 126 "ENUM16": TokenType.ENUM16, 127 "FINAL": TokenType.FINAL, 128 "FIXEDSTRING": TokenType.FIXEDSTRING, 129 "FLOAT32": TokenType.FLOAT, 130 "FLOAT64": TokenType.DOUBLE, 131 "GLOBAL": TokenType.GLOBAL, 132 "INT256": TokenType.INT256, 133 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 134 "MAP": TokenType.MAP, 135 "NESTED": TokenType.NESTED, 136 "SAMPLE": TokenType.TABLE_SAMPLE, 137 "TUPLE": TokenType.STRUCT, 138 "UINT128": TokenType.UINT128, 139 "UINT16": TokenType.USMALLINT, 140 "UINT256": TokenType.UINT256, 141 "UINT32": TokenType.UINT, 142 "UINT64": TokenType.UBIGINT, 143 "UINT8": TokenType.UTINYINT, 144 "IPV4": TokenType.IPV4, 145 "IPV6": TokenType.IPV6, 146 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 147 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 148 "SYSTEM": TokenType.COMMAND, 149 "PREWHERE": TokenType.PREWHERE, 150 } 151 KEYWORDS.pop("/*+") 152 153 SINGLE_TOKENS = { 154 **tokens.Tokenizer.SINGLE_TOKENS, 155 "$": TokenType.HEREDOC_STRING, 156 } 157 158 class Parser(parser.Parser): 159 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 160 # * select x from t1 union all select x from t2 limit 1; 161 # * select x from t1 union all (select x from t2 limit 1); 162 MODIFIERS_ATTACHED_TO_SET_OP = False 163 INTERVAL_SPANS = False 164 165 FUNCTIONS = { 166 **parser.Parser.FUNCTIONS, 167 "ANY": exp.AnyValue.from_arg_list, 168 "ARRAYSUM": exp.ArraySum.from_arg_list, 169 "COUNTIF": _build_count_if, 170 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 171 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 172 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 173 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 174 "DATE_FORMAT": _build_date_format, 175 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 176 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 177 "FORMATDATETIME": _build_date_format, 178 "JSONEXTRACTSTRING": build_json_extract_path( 179 exp.JSONExtractScalar, zero_based_indexing=False 180 ), 181 "MAP": parser.build_var_map, 182 "MATCH": exp.RegexpLike.from_arg_list, 183 "RANDCANONICAL": exp.Rand.from_arg_list, 184 "TUPLE": exp.Struct.from_arg_list, 185 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 186 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 187 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 188 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 189 "UNIQ": exp.ApproxDistinct.from_arg_list, 190 "XOR": lambda args: exp.Xor(expressions=args), 191 "MD5": exp.MD5Digest.from_arg_list, 192 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 193 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 194 } 195 196 AGG_FUNCTIONS = { 197 "count", 198 "min", 199 "max", 200 "sum", 201 "avg", 202 "any", 203 "stddevPop", 204 "stddevSamp", 205 "varPop", 206 "varSamp", 207 "corr", 208 "covarPop", 209 "covarSamp", 210 "entropy", 211 "exponentialMovingAverage", 212 "intervalLengthSum", 213 "kolmogorovSmirnovTest", 214 "mannWhitneyUTest", 215 "median", 216 "rankCorr", 217 "sumKahan", 218 "studentTTest", 219 "welchTTest", 220 "anyHeavy", 221 "anyLast", 222 "boundingRatio", 223 "first_value", 224 "last_value", 225 "argMin", 226 "argMax", 227 "avgWeighted", 228 "topK", 229 "topKWeighted", 230 "deltaSum", 231 "deltaSumTimestamp", 232 "groupArray", 233 "groupArrayLast", 234 "groupUniqArray", 235 "groupArrayInsertAt", 236 "groupArrayMovingAvg", 237 "groupArrayMovingSum", 238 "groupArraySample", 239 "groupBitAnd", 240 "groupBitOr", 241 "groupBitXor", 242 "groupBitmap", 243 "groupBitmapAnd", 244 "groupBitmapOr", 245 "groupBitmapXor", 246 "sumWithOverflow", 247 "sumMap", 248 "minMap", 249 "maxMap", 250 "skewSamp", 251 "skewPop", 252 "kurtSamp", 253 "kurtPop", 254 "uniq", 255 "uniqExact", 256 "uniqCombined", 257 "uniqCombined64", 258 "uniqHLL12", 259 "uniqTheta", 260 "quantile", 261 "quantiles", 262 "quantileExact", 263 "quantilesExact", 264 "quantileExactLow", 265 "quantilesExactLow", 266 "quantileExactHigh", 267 "quantilesExactHigh", 268 "quantileExactWeighted", 269 "quantilesExactWeighted", 270 "quantileTiming", 271 "quantilesTiming", 272 "quantileTimingWeighted", 273 "quantilesTimingWeighted", 274 "quantileDeterministic", 275 "quantilesDeterministic", 276 "quantileTDigest", 277 "quantilesTDigest", 278 "quantileTDigestWeighted", 279 "quantilesTDigestWeighted", 280 "quantileBFloat16", 281 "quantilesBFloat16", 282 "quantileBFloat16Weighted", 283 "quantilesBFloat16Weighted", 284 "simpleLinearRegression", 285 "stochasticLinearRegression", 286 "stochasticLogisticRegression", 287 "categoricalInformationValue", 288 "contingency", 289 "cramersV", 290 "cramersVBiasCorrected", 291 "theilsU", 292 "maxIntersections", 293 "maxIntersectionsPosition", 294 "meanZTest", 295 "quantileInterpolatedWeighted", 296 "quantilesInterpolatedWeighted", 297 "quantileGK", 298 "quantilesGK", 299 "sparkBar", 300 "sumCount", 301 "largestTriangleThreeBuckets", 302 "histogram", 303 "sequenceMatch", 304 "sequenceCount", 305 "windowFunnel", 306 "retention", 307 "uniqUpTo", 308 "sequenceNextNode", 309 "exponentialTimeDecayedAvg", 310 } 311 312 AGG_FUNCTIONS_SUFFIXES = [ 313 "If", 314 "Array", 315 "ArrayIf", 316 "Map", 317 "SimpleState", 318 "State", 319 "Merge", 320 "MergeState", 321 "ForEach", 322 "Distinct", 323 "OrDefault", 324 "OrNull", 325 "Resample", 326 "ArgMin", 327 "ArgMax", 328 ] 329 330 FUNC_TOKENS = { 331 *parser.Parser.FUNC_TOKENS, 332 TokenType.SET, 333 } 334 335 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 336 337 ID_VAR_TOKENS = { 338 *parser.Parser.ID_VAR_TOKENS, 339 TokenType.LIKE, 340 } 341 342 AGG_FUNC_MAPPING = ( 343 lambda functions, suffixes: { 344 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 345 } 346 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 347 348 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 349 350 FUNCTION_PARSERS = { 351 **parser.Parser.FUNCTION_PARSERS, 352 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 353 "QUANTILE": lambda self: self._parse_quantile(), 354 } 355 356 FUNCTION_PARSERS.pop("MATCH") 357 358 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 359 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 360 361 RANGE_PARSERS = { 362 **parser.Parser.RANGE_PARSERS, 363 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 364 and self._parse_in(this, is_global=True), 365 } 366 367 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 368 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 369 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 370 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 371 372 JOIN_KINDS = { 373 *parser.Parser.JOIN_KINDS, 374 TokenType.ANY, 375 TokenType.ASOF, 376 TokenType.ARRAY, 377 } 378 379 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 380 TokenType.ANY, 381 TokenType.ARRAY, 382 TokenType.FINAL, 383 TokenType.FORMAT, 384 TokenType.SETTINGS, 385 } 386 387 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 388 TokenType.FORMAT, 389 } 390 391 LOG_DEFAULTS_TO_LN = True 392 393 QUERY_MODIFIER_PARSERS = { 394 **parser.Parser.QUERY_MODIFIER_PARSERS, 395 TokenType.SETTINGS: lambda self: ( 396 "settings", 397 self._advance() or self._parse_csv(self._parse_assignment), 398 ), 399 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 400 } 401 402 CONSTRAINT_PARSERS = { 403 **parser.Parser.CONSTRAINT_PARSERS, 404 "INDEX": lambda self: self._parse_index_constraint(), 405 "CODEC": lambda self: self._parse_compress(), 406 } 407 408 ALTER_PARSERS = { 409 **parser.Parser.ALTER_PARSERS, 410 "REPLACE": lambda self: self._parse_alter_table_replace(), 411 } 412 413 SCHEMA_UNNAMED_CONSTRAINTS = { 414 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 415 "INDEX", 416 } 417 418 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 419 index = self._index 420 this = self._parse_bitwise() 421 if self._match(TokenType.FROM): 422 self._retreat(index) 423 return super()._parse_extract() 424 425 # We return Anonymous here because extract and regexpExtract have different semantics, 426 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 427 # `extract('foobar', 'b')` works, but CH crashes for `regexpExtract('foobar', 'b')`. 428 # 429 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 430 self._match(TokenType.COMMA) 431 return self.expression( 432 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 433 ) 434 435 def _parse_assignment(self) -> t.Optional[exp.Expression]: 436 this = super()._parse_assignment() 437 438 if self._match(TokenType.PLACEHOLDER): 439 return self.expression( 440 exp.If, 441 this=this, 442 true=self._parse_assignment(), 443 false=self._match(TokenType.COLON) and self._parse_assignment(), 444 ) 445 446 return this 447 448 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 449 """ 450 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 451 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 452 """ 453 if not self._match(TokenType.L_BRACE): 454 return None 455 456 this = self._parse_id_var() 457 self._match(TokenType.COLON) 458 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 459 self._match_text_seq("IDENTIFIER") and "Identifier" 460 ) 461 462 if not kind: 463 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 464 elif not self._match(TokenType.R_BRACE): 465 self.raise_error("Expecting }") 466 467 return self.expression(exp.Placeholder, this=this, kind=kind) 468 469 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 470 this = super()._parse_in(this) 471 this.set("is_global", is_global) 472 return this 473 474 def _parse_table( 475 self, 476 schema: bool = False, 477 joins: bool = False, 478 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 479 parse_bracket: bool = False, 480 is_db_reference: bool = False, 481 parse_partition: bool = False, 482 ) -> t.Optional[exp.Expression]: 483 this = super()._parse_table( 484 schema=schema, 485 joins=joins, 486 alias_tokens=alias_tokens, 487 parse_bracket=parse_bracket, 488 is_db_reference=is_db_reference, 489 ) 490 491 if self._match(TokenType.FINAL): 492 this = self.expression(exp.Final, this=this) 493 494 return this 495 496 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 497 return super()._parse_position(haystack_first=True) 498 499 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 500 def _parse_cte(self) -> exp.CTE: 501 # WITH <identifier> AS <subquery expression> 502 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 503 504 if not cte: 505 # WITH <expression> AS <identifier> 506 cte = self.expression( 507 exp.CTE, 508 this=self._parse_assignment(), 509 alias=self._parse_table_alias(), 510 scalar=True, 511 ) 512 513 return cte 514 515 def _parse_join_parts( 516 self, 517 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 518 is_global = self._match(TokenType.GLOBAL) and self._prev 519 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 520 521 if kind_pre: 522 kind = self._match_set(self.JOIN_KINDS) and self._prev 523 side = self._match_set(self.JOIN_SIDES) and self._prev 524 return is_global, side, kind 525 526 return ( 527 is_global, 528 self._match_set(self.JOIN_SIDES) and self._prev, 529 self._match_set(self.JOIN_KINDS) and self._prev, 530 ) 531 532 def _parse_join( 533 self, skip_join_token: bool = False, parse_bracket: bool = False 534 ) -> t.Optional[exp.Join]: 535 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 536 if join: 537 join.set("global", join.args.pop("method", None)) 538 539 return join 540 541 def _parse_function( 542 self, 543 functions: t.Optional[t.Dict[str, t.Callable]] = None, 544 anonymous: bool = False, 545 optional_parens: bool = True, 546 any_token: bool = False, 547 ) -> t.Optional[exp.Expression]: 548 expr = super()._parse_function( 549 functions=functions, 550 anonymous=anonymous, 551 optional_parens=optional_parens, 552 any_token=any_token, 553 ) 554 555 func = expr.this if isinstance(expr, exp.Window) else expr 556 557 # Aggregate functions can be split in 2 parts: <func_name><suffix> 558 parts = ( 559 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 560 ) 561 562 if parts: 563 params = self._parse_func_params(func) 564 565 kwargs = { 566 "this": func.this, 567 "expressions": func.expressions, 568 } 569 if parts[1]: 570 kwargs["parts"] = parts 571 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 572 else: 573 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 574 575 kwargs["exp_class"] = exp_class 576 if params: 577 kwargs["params"] = params 578 579 func = self.expression(**kwargs) 580 581 if isinstance(expr, exp.Window): 582 # The window's func was parsed as Anonymous in base parser, fix its 583 # type to be CH style CombinedAnonymousAggFunc / AnonymousAggFunc 584 expr.set("this", func) 585 elif params: 586 # Params have blocked super()._parse_function() from parsing the following window 587 # (if that exists) as they're standing between the function call and the window spec 588 expr = self._parse_window(func) 589 else: 590 expr = func 591 592 return expr 593 594 def _parse_func_params( 595 self, this: t.Optional[exp.Func] = None 596 ) -> t.Optional[t.List[exp.Expression]]: 597 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 598 return self._parse_csv(self._parse_lambda) 599 600 if self._match(TokenType.L_PAREN): 601 params = self._parse_csv(self._parse_lambda) 602 self._match_r_paren(this) 603 return params 604 605 return None 606 607 def _parse_quantile(self) -> exp.Quantile: 608 this = self._parse_lambda() 609 params = self._parse_func_params() 610 if params: 611 return self.expression(exp.Quantile, this=params[0], quantile=this) 612 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 613 614 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 615 return super()._parse_wrapped_id_vars(optional=True) 616 617 def _parse_primary_key( 618 self, wrapped_optional: bool = False, in_props: bool = False 619 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 620 return super()._parse_primary_key( 621 wrapped_optional=wrapped_optional or in_props, in_props=in_props 622 ) 623 624 def _parse_on_property(self) -> t.Optional[exp.Expression]: 625 index = self._index 626 if self._match_text_seq("CLUSTER"): 627 this = self._parse_id_var() 628 if this: 629 return self.expression(exp.OnCluster, this=this) 630 else: 631 self._retreat(index) 632 return None 633 634 def _parse_index_constraint( 635 self, kind: t.Optional[str] = None 636 ) -> exp.IndexColumnConstraint: 637 # INDEX name1 expr TYPE type1(args) GRANULARITY value 638 this = self._parse_id_var() 639 expression = self._parse_assignment() 640 641 index_type = self._match_text_seq("TYPE") and ( 642 self._parse_function() or self._parse_var() 643 ) 644 645 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 646 647 return self.expression( 648 exp.IndexColumnConstraint, 649 this=this, 650 expression=expression, 651 index_type=index_type, 652 granularity=granularity, 653 ) 654 655 def _parse_partition(self) -> t.Optional[exp.Partition]: 656 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 657 if not self._match(TokenType.PARTITION): 658 return None 659 660 if self._match_text_seq("ID"): 661 # Corresponds to the PARTITION ID <string_value> syntax 662 expressions: t.List[exp.Expression] = [ 663 self.expression(exp.PartitionId, this=self._parse_string()) 664 ] 665 else: 666 expressions = self._parse_expressions() 667 668 return self.expression(exp.Partition, expressions=expressions) 669 670 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 671 partition = self._parse_partition() 672 673 if not partition or not self._match(TokenType.FROM): 674 return None 675 676 return self.expression( 677 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 678 ) 679 680 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 681 if not self._match_text_seq("PROJECTION"): 682 return None 683 684 return self.expression( 685 exp.ProjectionDef, 686 this=self._parse_id_var(), 687 expression=self._parse_wrapped(self._parse_statement), 688 ) 689 690 def _parse_constraint(self) -> t.Optional[exp.Expression]: 691 return super()._parse_constraint() or self._parse_projection_def() 692 693 class Generator(generator.Generator): 694 QUERY_HINTS = False 695 STRUCT_DELIMITER = ("(", ")") 696 NVL2_SUPPORTED = False 697 TABLESAMPLE_REQUIRES_PARENS = False 698 TABLESAMPLE_SIZE_IS_ROWS = False 699 TABLESAMPLE_KEYWORDS = "SAMPLE" 700 LAST_DAY_SUPPORTS_DATE_PART = False 701 CAN_IMPLEMENT_ARRAY_ANY = True 702 SUPPORTS_TO_NUMBER = False 703 JOIN_HINTS = False 704 TABLE_HINTS = False 705 EXPLICIT_SET_OP = True 706 GROUPINGS_SEP = "" 707 SET_OP_MODIFIERS = False 708 SUPPORTS_TABLE_ALIAS_COLUMNS = False 709 710 STRING_TYPE_MAPPING = { 711 exp.DataType.Type.CHAR: "String", 712 exp.DataType.Type.LONGBLOB: "String", 713 exp.DataType.Type.LONGTEXT: "String", 714 exp.DataType.Type.MEDIUMBLOB: "String", 715 exp.DataType.Type.MEDIUMTEXT: "String", 716 exp.DataType.Type.TINYBLOB: "String", 717 exp.DataType.Type.TINYTEXT: "String", 718 exp.DataType.Type.TEXT: "String", 719 exp.DataType.Type.VARBINARY: "String", 720 exp.DataType.Type.VARCHAR: "String", 721 } 722 723 SUPPORTED_JSON_PATH_PARTS = { 724 exp.JSONPathKey, 725 exp.JSONPathRoot, 726 exp.JSONPathSubscript, 727 } 728 729 TYPE_MAPPING = { 730 **generator.Generator.TYPE_MAPPING, 731 **STRING_TYPE_MAPPING, 732 exp.DataType.Type.ARRAY: "Array", 733 exp.DataType.Type.BIGINT: "Int64", 734 exp.DataType.Type.DATE32: "Date32", 735 exp.DataType.Type.DATETIME64: "DateTime64", 736 exp.DataType.Type.DOUBLE: "Float64", 737 exp.DataType.Type.ENUM: "Enum", 738 exp.DataType.Type.ENUM8: "Enum8", 739 exp.DataType.Type.ENUM16: "Enum16", 740 exp.DataType.Type.FIXEDSTRING: "FixedString", 741 exp.DataType.Type.FLOAT: "Float32", 742 exp.DataType.Type.INT: "Int32", 743 exp.DataType.Type.MEDIUMINT: "Int32", 744 exp.DataType.Type.INT128: "Int128", 745 exp.DataType.Type.INT256: "Int256", 746 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 747 exp.DataType.Type.MAP: "Map", 748 exp.DataType.Type.NESTED: "Nested", 749 exp.DataType.Type.NULLABLE: "Nullable", 750 exp.DataType.Type.SMALLINT: "Int16", 751 exp.DataType.Type.STRUCT: "Tuple", 752 exp.DataType.Type.TINYINT: "Int8", 753 exp.DataType.Type.UBIGINT: "UInt64", 754 exp.DataType.Type.UINT: "UInt32", 755 exp.DataType.Type.UINT128: "UInt128", 756 exp.DataType.Type.UINT256: "UInt256", 757 exp.DataType.Type.USMALLINT: "UInt16", 758 exp.DataType.Type.UTINYINT: "UInt8", 759 exp.DataType.Type.IPV4: "IPv4", 760 exp.DataType.Type.IPV6: "IPv6", 761 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 762 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 763 } 764 765 TRANSFORMS = { 766 **generator.Generator.TRANSFORMS, 767 exp.AnyValue: rename_func("any"), 768 exp.ApproxDistinct: rename_func("uniq"), 769 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 770 exp.ArraySize: rename_func("LENGTH"), 771 exp.ArraySum: rename_func("arraySum"), 772 exp.ArgMax: arg_max_or_min_no_count("argMax"), 773 exp.ArgMin: arg_max_or_min_no_count("argMin"), 774 exp.Array: inline_array_sql, 775 exp.CastToStrType: rename_func("CAST"), 776 exp.CountIf: rename_func("countIf"), 777 exp.CompressColumnConstraint: lambda self, 778 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 779 exp.ComputedColumnConstraint: lambda self, 780 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 781 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 782 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 783 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 784 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 785 exp.Explode: rename_func("arrayJoin"), 786 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 787 exp.IsNan: rename_func("isNaN"), 788 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 789 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 790 exp.JSONPathKey: json_path_key_only_name, 791 exp.JSONPathRoot: lambda *_: "", 792 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 793 exp.Nullif: rename_func("nullIf"), 794 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 795 exp.Pivot: no_pivot_sql, 796 exp.Quantile: _quantile_sql, 797 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 798 exp.Rand: rename_func("randCanonical"), 799 exp.StartsWith: rename_func("startsWith"), 800 exp.StrPosition: lambda self, e: self.func( 801 "position", e.this, e.args.get("substr"), e.args.get("position") 802 ), 803 exp.TimeToStr: lambda self, e: self.func( 804 "DATE_FORMAT", e.this, self.format_time(e), e.args.get("timezone") 805 ), 806 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 807 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 808 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 809 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 810 exp.MD5Digest: rename_func("MD5"), 811 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 812 exp.SHA: rename_func("SHA1"), 813 exp.SHA2: sha256_sql, 814 exp.UnixToTime: _unix_to_time_sql, 815 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 816 exp.Variance: rename_func("varSamp"), 817 exp.Stddev: rename_func("stddevSamp"), 818 } 819 820 PROPERTIES_LOCATION = { 821 **generator.Generator.PROPERTIES_LOCATION, 822 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 823 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 824 exp.OnCluster: exp.Properties.Location.POST_NAME, 825 } 826 827 # there's no list in docs, but it can be found in Clickhouse code 828 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 829 ON_CLUSTER_TARGETS = { 830 "DATABASE", 831 "TABLE", 832 "VIEW", 833 "DICTIONARY", 834 "INDEX", 835 "FUNCTION", 836 "NAMED COLLECTION", 837 } 838 839 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 840 this = self.json_path_part(expression.this) 841 return str(int(this) + 1) if is_int(this) else this 842 843 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 844 return f"AS {self.sql(expression, 'this')}" 845 846 def _any_to_has( 847 self, 848 expression: exp.EQ | exp.NEQ, 849 default: t.Callable[[t.Any], str], 850 prefix: str = "", 851 ) -> str: 852 if isinstance(expression.left, exp.Any): 853 arr = expression.left 854 this = expression.right 855 elif isinstance(expression.right, exp.Any): 856 arr = expression.right 857 this = expression.left 858 else: 859 return default(expression) 860 861 return prefix + self.func("has", arr.this.unnest(), this) 862 863 def eq_sql(self, expression: exp.EQ) -> str: 864 return self._any_to_has(expression, super().eq_sql) 865 866 def neq_sql(self, expression: exp.NEQ) -> str: 867 return self._any_to_has(expression, super().neq_sql, "NOT ") 868 869 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 870 # Manually add a flag to make the search case-insensitive 871 regex = self.func("CONCAT", "'(?i)'", expression.expression) 872 return self.func("match", expression.this, regex) 873 874 def datatype_sql(self, expression: exp.DataType) -> str: 875 # String is the standard ClickHouse type, every other variant is just an alias. 876 # Additionally, any supplied length parameter will be ignored. 877 # 878 # https://clickhouse.com/docs/en/sql-reference/data-types/string 879 if expression.this in self.STRING_TYPE_MAPPING: 880 return "String" 881 882 return super().datatype_sql(expression) 883 884 def cte_sql(self, expression: exp.CTE) -> str: 885 if expression.args.get("scalar"): 886 this = self.sql(expression, "this") 887 alias = self.sql(expression, "alias") 888 return f"{this} AS {alias}" 889 890 return super().cte_sql(expression) 891 892 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 893 return super().after_limit_modifiers(expression) + [ 894 ( 895 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 896 if expression.args.get("settings") 897 else "" 898 ), 899 ( 900 self.seg("FORMAT ") + self.sql(expression, "format") 901 if expression.args.get("format") 902 else "" 903 ), 904 ] 905 906 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 907 params = self.expressions(expression, key="params", flat=True) 908 return self.func(expression.name, *expression.expressions) + f"({params})" 909 910 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 911 return self.func(expression.name, *expression.expressions) 912 913 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 914 return self.anonymousaggfunc_sql(expression) 915 916 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 917 return self.parameterizedagg_sql(expression) 918 919 def placeholder_sql(self, expression: exp.Placeholder) -> str: 920 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 921 922 def oncluster_sql(self, expression: exp.OnCluster) -> str: 923 return f"ON CLUSTER {self.sql(expression, 'this')}" 924 925 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 926 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 927 exp.Properties.Location.POST_NAME 928 ): 929 this_name = self.sql(expression.this, "this") 930 this_properties = " ".join( 931 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 932 ) 933 this_schema = self.schema_columns_sql(expression.this) 934 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 935 936 return super().createable_sql(expression, locations) 937 938 def prewhere_sql(self, expression: exp.PreWhere) -> str: 939 this = self.indent(self.sql(expression, "this")) 940 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 941 942 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 943 this = self.sql(expression, "this") 944 this = f" {this}" if this else "" 945 expr = self.sql(expression, "expression") 946 expr = f" {expr}" if expr else "" 947 index_type = self.sql(expression, "index_type") 948 index_type = f" TYPE {index_type}" if index_type else "" 949 granularity = self.sql(expression, "granularity") 950 granularity = f" GRANULARITY {granularity}" if granularity else "" 951 952 return f"INDEX{this}{expr}{index_type}{granularity}" 953 954 def partition_sql(self, expression: exp.Partition) -> str: 955 return f"PARTITION {self.expressions(expression, flat=True)}" 956 957 def partitionid_sql(self, expression: exp.PartitionId) -> str: 958 return f"ID {self.sql(expression.this)}" 959 960 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 961 return ( 962 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 963 ) 964 965 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 966 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
100class ClickHouse(Dialect): 101 NORMALIZE_FUNCTIONS: bool | str = False 102 NULL_ORDERING = "nulls_are_last" 103 SUPPORTS_USER_DEFINED_TYPES = False 104 SAFE_DIVISION = True 105 LOG_BASE_FIRST: t.Optional[bool] = None 106 FORCE_EARLY_ALIAS_REF_EXPANSION = True 107 108 UNESCAPED_SEQUENCES = { 109 "\\0": "\0", 110 } 111 112 class Tokenizer(tokens.Tokenizer): 113 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 114 IDENTIFIERS = ['"', "`"] 115 STRING_ESCAPES = ["'", "\\"] 116 BIT_STRINGS = [("0b", "")] 117 HEX_STRINGS = [("0x", ""), ("0X", "")] 118 HEREDOC_STRINGS = ["$"] 119 120 KEYWORDS = { 121 **tokens.Tokenizer.KEYWORDS, 122 "ATTACH": TokenType.COMMAND, 123 "DATE32": TokenType.DATE32, 124 "DATETIME64": TokenType.DATETIME64, 125 "DICTIONARY": TokenType.DICTIONARY, 126 "ENUM8": TokenType.ENUM8, 127 "ENUM16": TokenType.ENUM16, 128 "FINAL": TokenType.FINAL, 129 "FIXEDSTRING": TokenType.FIXEDSTRING, 130 "FLOAT32": TokenType.FLOAT, 131 "FLOAT64": TokenType.DOUBLE, 132 "GLOBAL": TokenType.GLOBAL, 133 "INT256": TokenType.INT256, 134 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 135 "MAP": TokenType.MAP, 136 "NESTED": TokenType.NESTED, 137 "SAMPLE": TokenType.TABLE_SAMPLE, 138 "TUPLE": TokenType.STRUCT, 139 "UINT128": TokenType.UINT128, 140 "UINT16": TokenType.USMALLINT, 141 "UINT256": TokenType.UINT256, 142 "UINT32": TokenType.UINT, 143 "UINT64": TokenType.UBIGINT, 144 "UINT8": TokenType.UTINYINT, 145 "IPV4": TokenType.IPV4, 146 "IPV6": TokenType.IPV6, 147 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 148 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 149 "SYSTEM": TokenType.COMMAND, 150 "PREWHERE": TokenType.PREWHERE, 151 } 152 KEYWORDS.pop("/*+") 153 154 SINGLE_TOKENS = { 155 **tokens.Tokenizer.SINGLE_TOKENS, 156 "$": TokenType.HEREDOC_STRING, 157 } 158 159 class Parser(parser.Parser): 160 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 161 # * select x from t1 union all select x from t2 limit 1; 162 # * select x from t1 union all (select x from t2 limit 1); 163 MODIFIERS_ATTACHED_TO_SET_OP = False 164 INTERVAL_SPANS = False 165 166 FUNCTIONS = { 167 **parser.Parser.FUNCTIONS, 168 "ANY": exp.AnyValue.from_arg_list, 169 "ARRAYSUM": exp.ArraySum.from_arg_list, 170 "COUNTIF": _build_count_if, 171 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 172 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 173 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 174 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 175 "DATE_FORMAT": _build_date_format, 176 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 177 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 178 "FORMATDATETIME": _build_date_format, 179 "JSONEXTRACTSTRING": build_json_extract_path( 180 exp.JSONExtractScalar, zero_based_indexing=False 181 ), 182 "MAP": parser.build_var_map, 183 "MATCH": exp.RegexpLike.from_arg_list, 184 "RANDCANONICAL": exp.Rand.from_arg_list, 185 "TUPLE": exp.Struct.from_arg_list, 186 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 187 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 188 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 189 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 190 "UNIQ": exp.ApproxDistinct.from_arg_list, 191 "XOR": lambda args: exp.Xor(expressions=args), 192 "MD5": exp.MD5Digest.from_arg_list, 193 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 194 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 195 } 196 197 AGG_FUNCTIONS = { 198 "count", 199 "min", 200 "max", 201 "sum", 202 "avg", 203 "any", 204 "stddevPop", 205 "stddevSamp", 206 "varPop", 207 "varSamp", 208 "corr", 209 "covarPop", 210 "covarSamp", 211 "entropy", 212 "exponentialMovingAverage", 213 "intervalLengthSum", 214 "kolmogorovSmirnovTest", 215 "mannWhitneyUTest", 216 "median", 217 "rankCorr", 218 "sumKahan", 219 "studentTTest", 220 "welchTTest", 221 "anyHeavy", 222 "anyLast", 223 "boundingRatio", 224 "first_value", 225 "last_value", 226 "argMin", 227 "argMax", 228 "avgWeighted", 229 "topK", 230 "topKWeighted", 231 "deltaSum", 232 "deltaSumTimestamp", 233 "groupArray", 234 "groupArrayLast", 235 "groupUniqArray", 236 "groupArrayInsertAt", 237 "groupArrayMovingAvg", 238 "groupArrayMovingSum", 239 "groupArraySample", 240 "groupBitAnd", 241 "groupBitOr", 242 "groupBitXor", 243 "groupBitmap", 244 "groupBitmapAnd", 245 "groupBitmapOr", 246 "groupBitmapXor", 247 "sumWithOverflow", 248 "sumMap", 249 "minMap", 250 "maxMap", 251 "skewSamp", 252 "skewPop", 253 "kurtSamp", 254 "kurtPop", 255 "uniq", 256 "uniqExact", 257 "uniqCombined", 258 "uniqCombined64", 259 "uniqHLL12", 260 "uniqTheta", 261 "quantile", 262 "quantiles", 263 "quantileExact", 264 "quantilesExact", 265 "quantileExactLow", 266 "quantilesExactLow", 267 "quantileExactHigh", 268 "quantilesExactHigh", 269 "quantileExactWeighted", 270 "quantilesExactWeighted", 271 "quantileTiming", 272 "quantilesTiming", 273 "quantileTimingWeighted", 274 "quantilesTimingWeighted", 275 "quantileDeterministic", 276 "quantilesDeterministic", 277 "quantileTDigest", 278 "quantilesTDigest", 279 "quantileTDigestWeighted", 280 "quantilesTDigestWeighted", 281 "quantileBFloat16", 282 "quantilesBFloat16", 283 "quantileBFloat16Weighted", 284 "quantilesBFloat16Weighted", 285 "simpleLinearRegression", 286 "stochasticLinearRegression", 287 "stochasticLogisticRegression", 288 "categoricalInformationValue", 289 "contingency", 290 "cramersV", 291 "cramersVBiasCorrected", 292 "theilsU", 293 "maxIntersections", 294 "maxIntersectionsPosition", 295 "meanZTest", 296 "quantileInterpolatedWeighted", 297 "quantilesInterpolatedWeighted", 298 "quantileGK", 299 "quantilesGK", 300 "sparkBar", 301 "sumCount", 302 "largestTriangleThreeBuckets", 303 "histogram", 304 "sequenceMatch", 305 "sequenceCount", 306 "windowFunnel", 307 "retention", 308 "uniqUpTo", 309 "sequenceNextNode", 310 "exponentialTimeDecayedAvg", 311 } 312 313 AGG_FUNCTIONS_SUFFIXES = [ 314 "If", 315 "Array", 316 "ArrayIf", 317 "Map", 318 "SimpleState", 319 "State", 320 "Merge", 321 "MergeState", 322 "ForEach", 323 "Distinct", 324 "OrDefault", 325 "OrNull", 326 "Resample", 327 "ArgMin", 328 "ArgMax", 329 ] 330 331 FUNC_TOKENS = { 332 *parser.Parser.FUNC_TOKENS, 333 TokenType.SET, 334 } 335 336 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 337 338 ID_VAR_TOKENS = { 339 *parser.Parser.ID_VAR_TOKENS, 340 TokenType.LIKE, 341 } 342 343 AGG_FUNC_MAPPING = ( 344 lambda functions, suffixes: { 345 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 346 } 347 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 348 349 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 350 351 FUNCTION_PARSERS = { 352 **parser.Parser.FUNCTION_PARSERS, 353 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 354 "QUANTILE": lambda self: self._parse_quantile(), 355 } 356 357 FUNCTION_PARSERS.pop("MATCH") 358 359 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 360 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 361 362 RANGE_PARSERS = { 363 **parser.Parser.RANGE_PARSERS, 364 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 365 and self._parse_in(this, is_global=True), 366 } 367 368 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 369 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 370 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 371 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 372 373 JOIN_KINDS = { 374 *parser.Parser.JOIN_KINDS, 375 TokenType.ANY, 376 TokenType.ASOF, 377 TokenType.ARRAY, 378 } 379 380 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 381 TokenType.ANY, 382 TokenType.ARRAY, 383 TokenType.FINAL, 384 TokenType.FORMAT, 385 TokenType.SETTINGS, 386 } 387 388 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 389 TokenType.FORMAT, 390 } 391 392 LOG_DEFAULTS_TO_LN = True 393 394 QUERY_MODIFIER_PARSERS = { 395 **parser.Parser.QUERY_MODIFIER_PARSERS, 396 TokenType.SETTINGS: lambda self: ( 397 "settings", 398 self._advance() or self._parse_csv(self._parse_assignment), 399 ), 400 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 401 } 402 403 CONSTRAINT_PARSERS = { 404 **parser.Parser.CONSTRAINT_PARSERS, 405 "INDEX": lambda self: self._parse_index_constraint(), 406 "CODEC": lambda self: self._parse_compress(), 407 } 408 409 ALTER_PARSERS = { 410 **parser.Parser.ALTER_PARSERS, 411 "REPLACE": lambda self: self._parse_alter_table_replace(), 412 } 413 414 SCHEMA_UNNAMED_CONSTRAINTS = { 415 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 416 "INDEX", 417 } 418 419 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 420 index = self._index 421 this = self._parse_bitwise() 422 if self._match(TokenType.FROM): 423 self._retreat(index) 424 return super()._parse_extract() 425 426 # We return Anonymous here because extract and regexpExtract have different semantics, 427 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 428 # `extract('foobar', 'b')` works, but CH crashes for `regexpExtract('foobar', 'b')`. 429 # 430 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 431 self._match(TokenType.COMMA) 432 return self.expression( 433 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 434 ) 435 436 def _parse_assignment(self) -> t.Optional[exp.Expression]: 437 this = super()._parse_assignment() 438 439 if self._match(TokenType.PLACEHOLDER): 440 return self.expression( 441 exp.If, 442 this=this, 443 true=self._parse_assignment(), 444 false=self._match(TokenType.COLON) and self._parse_assignment(), 445 ) 446 447 return this 448 449 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 450 """ 451 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 452 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 453 """ 454 if not self._match(TokenType.L_BRACE): 455 return None 456 457 this = self._parse_id_var() 458 self._match(TokenType.COLON) 459 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 460 self._match_text_seq("IDENTIFIER") and "Identifier" 461 ) 462 463 if not kind: 464 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 465 elif not self._match(TokenType.R_BRACE): 466 self.raise_error("Expecting }") 467 468 return self.expression(exp.Placeholder, this=this, kind=kind) 469 470 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 471 this = super()._parse_in(this) 472 this.set("is_global", is_global) 473 return this 474 475 def _parse_table( 476 self, 477 schema: bool = False, 478 joins: bool = False, 479 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 480 parse_bracket: bool = False, 481 is_db_reference: bool = False, 482 parse_partition: bool = False, 483 ) -> t.Optional[exp.Expression]: 484 this = super()._parse_table( 485 schema=schema, 486 joins=joins, 487 alias_tokens=alias_tokens, 488 parse_bracket=parse_bracket, 489 is_db_reference=is_db_reference, 490 ) 491 492 if self._match(TokenType.FINAL): 493 this = self.expression(exp.Final, this=this) 494 495 return this 496 497 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 498 return super()._parse_position(haystack_first=True) 499 500 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 501 def _parse_cte(self) -> exp.CTE: 502 # WITH <identifier> AS <subquery expression> 503 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 504 505 if not cte: 506 # WITH <expression> AS <identifier> 507 cte = self.expression( 508 exp.CTE, 509 this=self._parse_assignment(), 510 alias=self._parse_table_alias(), 511 scalar=True, 512 ) 513 514 return cte 515 516 def _parse_join_parts( 517 self, 518 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 519 is_global = self._match(TokenType.GLOBAL) and self._prev 520 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 521 522 if kind_pre: 523 kind = self._match_set(self.JOIN_KINDS) and self._prev 524 side = self._match_set(self.JOIN_SIDES) and self._prev 525 return is_global, side, kind 526 527 return ( 528 is_global, 529 self._match_set(self.JOIN_SIDES) and self._prev, 530 self._match_set(self.JOIN_KINDS) and self._prev, 531 ) 532 533 def _parse_join( 534 self, skip_join_token: bool = False, parse_bracket: bool = False 535 ) -> t.Optional[exp.Join]: 536 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 537 if join: 538 join.set("global", join.args.pop("method", None)) 539 540 return join 541 542 def _parse_function( 543 self, 544 functions: t.Optional[t.Dict[str, t.Callable]] = None, 545 anonymous: bool = False, 546 optional_parens: bool = True, 547 any_token: bool = False, 548 ) -> t.Optional[exp.Expression]: 549 expr = super()._parse_function( 550 functions=functions, 551 anonymous=anonymous, 552 optional_parens=optional_parens, 553 any_token=any_token, 554 ) 555 556 func = expr.this if isinstance(expr, exp.Window) else expr 557 558 # Aggregate functions can be split in 2 parts: <func_name><suffix> 559 parts = ( 560 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 561 ) 562 563 if parts: 564 params = self._parse_func_params(func) 565 566 kwargs = { 567 "this": func.this, 568 "expressions": func.expressions, 569 } 570 if parts[1]: 571 kwargs["parts"] = parts 572 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 573 else: 574 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 575 576 kwargs["exp_class"] = exp_class 577 if params: 578 kwargs["params"] = params 579 580 func = self.expression(**kwargs) 581 582 if isinstance(expr, exp.Window): 583 # The window's func was parsed as Anonymous in base parser, fix its 584 # type to be CH style CombinedAnonymousAggFunc / AnonymousAggFunc 585 expr.set("this", func) 586 elif params: 587 # Params have blocked super()._parse_function() from parsing the following window 588 # (if that exists) as they're standing between the function call and the window spec 589 expr = self._parse_window(func) 590 else: 591 expr = func 592 593 return expr 594 595 def _parse_func_params( 596 self, this: t.Optional[exp.Func] = None 597 ) -> t.Optional[t.List[exp.Expression]]: 598 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 599 return self._parse_csv(self._parse_lambda) 600 601 if self._match(TokenType.L_PAREN): 602 params = self._parse_csv(self._parse_lambda) 603 self._match_r_paren(this) 604 return params 605 606 return None 607 608 def _parse_quantile(self) -> exp.Quantile: 609 this = self._parse_lambda() 610 params = self._parse_func_params() 611 if params: 612 return self.expression(exp.Quantile, this=params[0], quantile=this) 613 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 614 615 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 616 return super()._parse_wrapped_id_vars(optional=True) 617 618 def _parse_primary_key( 619 self, wrapped_optional: bool = False, in_props: bool = False 620 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 621 return super()._parse_primary_key( 622 wrapped_optional=wrapped_optional or in_props, in_props=in_props 623 ) 624 625 def _parse_on_property(self) -> t.Optional[exp.Expression]: 626 index = self._index 627 if self._match_text_seq("CLUSTER"): 628 this = self._parse_id_var() 629 if this: 630 return self.expression(exp.OnCluster, this=this) 631 else: 632 self._retreat(index) 633 return None 634 635 def _parse_index_constraint( 636 self, kind: t.Optional[str] = None 637 ) -> exp.IndexColumnConstraint: 638 # INDEX name1 expr TYPE type1(args) GRANULARITY value 639 this = self._parse_id_var() 640 expression = self._parse_assignment() 641 642 index_type = self._match_text_seq("TYPE") and ( 643 self._parse_function() or self._parse_var() 644 ) 645 646 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 647 648 return self.expression( 649 exp.IndexColumnConstraint, 650 this=this, 651 expression=expression, 652 index_type=index_type, 653 granularity=granularity, 654 ) 655 656 def _parse_partition(self) -> t.Optional[exp.Partition]: 657 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 658 if not self._match(TokenType.PARTITION): 659 return None 660 661 if self._match_text_seq("ID"): 662 # Corresponds to the PARTITION ID <string_value> syntax 663 expressions: t.List[exp.Expression] = [ 664 self.expression(exp.PartitionId, this=self._parse_string()) 665 ] 666 else: 667 expressions = self._parse_expressions() 668 669 return self.expression(exp.Partition, expressions=expressions) 670 671 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 672 partition = self._parse_partition() 673 674 if not partition or not self._match(TokenType.FROM): 675 return None 676 677 return self.expression( 678 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 679 ) 680 681 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 682 if not self._match_text_seq("PROJECTION"): 683 return None 684 685 return self.expression( 686 exp.ProjectionDef, 687 this=self._parse_id_var(), 688 expression=self._parse_wrapped(self._parse_statement), 689 ) 690 691 def _parse_constraint(self) -> t.Optional[exp.Expression]: 692 return super()._parse_constraint() or self._parse_projection_def() 693 694 class Generator(generator.Generator): 695 QUERY_HINTS = False 696 STRUCT_DELIMITER = ("(", ")") 697 NVL2_SUPPORTED = False 698 TABLESAMPLE_REQUIRES_PARENS = False 699 TABLESAMPLE_SIZE_IS_ROWS = False 700 TABLESAMPLE_KEYWORDS = "SAMPLE" 701 LAST_DAY_SUPPORTS_DATE_PART = False 702 CAN_IMPLEMENT_ARRAY_ANY = True 703 SUPPORTS_TO_NUMBER = False 704 JOIN_HINTS = False 705 TABLE_HINTS = False 706 EXPLICIT_SET_OP = True 707 GROUPINGS_SEP = "" 708 SET_OP_MODIFIERS = False 709 SUPPORTS_TABLE_ALIAS_COLUMNS = False 710 711 STRING_TYPE_MAPPING = { 712 exp.DataType.Type.CHAR: "String", 713 exp.DataType.Type.LONGBLOB: "String", 714 exp.DataType.Type.LONGTEXT: "String", 715 exp.DataType.Type.MEDIUMBLOB: "String", 716 exp.DataType.Type.MEDIUMTEXT: "String", 717 exp.DataType.Type.TINYBLOB: "String", 718 exp.DataType.Type.TINYTEXT: "String", 719 exp.DataType.Type.TEXT: "String", 720 exp.DataType.Type.VARBINARY: "String", 721 exp.DataType.Type.VARCHAR: "String", 722 } 723 724 SUPPORTED_JSON_PATH_PARTS = { 725 exp.JSONPathKey, 726 exp.JSONPathRoot, 727 exp.JSONPathSubscript, 728 } 729 730 TYPE_MAPPING = { 731 **generator.Generator.TYPE_MAPPING, 732 **STRING_TYPE_MAPPING, 733 exp.DataType.Type.ARRAY: "Array", 734 exp.DataType.Type.BIGINT: "Int64", 735 exp.DataType.Type.DATE32: "Date32", 736 exp.DataType.Type.DATETIME64: "DateTime64", 737 exp.DataType.Type.DOUBLE: "Float64", 738 exp.DataType.Type.ENUM: "Enum", 739 exp.DataType.Type.ENUM8: "Enum8", 740 exp.DataType.Type.ENUM16: "Enum16", 741 exp.DataType.Type.FIXEDSTRING: "FixedString", 742 exp.DataType.Type.FLOAT: "Float32", 743 exp.DataType.Type.INT: "Int32", 744 exp.DataType.Type.MEDIUMINT: "Int32", 745 exp.DataType.Type.INT128: "Int128", 746 exp.DataType.Type.INT256: "Int256", 747 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 748 exp.DataType.Type.MAP: "Map", 749 exp.DataType.Type.NESTED: "Nested", 750 exp.DataType.Type.NULLABLE: "Nullable", 751 exp.DataType.Type.SMALLINT: "Int16", 752 exp.DataType.Type.STRUCT: "Tuple", 753 exp.DataType.Type.TINYINT: "Int8", 754 exp.DataType.Type.UBIGINT: "UInt64", 755 exp.DataType.Type.UINT: "UInt32", 756 exp.DataType.Type.UINT128: "UInt128", 757 exp.DataType.Type.UINT256: "UInt256", 758 exp.DataType.Type.USMALLINT: "UInt16", 759 exp.DataType.Type.UTINYINT: "UInt8", 760 exp.DataType.Type.IPV4: "IPv4", 761 exp.DataType.Type.IPV6: "IPv6", 762 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 763 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 764 } 765 766 TRANSFORMS = { 767 **generator.Generator.TRANSFORMS, 768 exp.AnyValue: rename_func("any"), 769 exp.ApproxDistinct: rename_func("uniq"), 770 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 771 exp.ArraySize: rename_func("LENGTH"), 772 exp.ArraySum: rename_func("arraySum"), 773 exp.ArgMax: arg_max_or_min_no_count("argMax"), 774 exp.ArgMin: arg_max_or_min_no_count("argMin"), 775 exp.Array: inline_array_sql, 776 exp.CastToStrType: rename_func("CAST"), 777 exp.CountIf: rename_func("countIf"), 778 exp.CompressColumnConstraint: lambda self, 779 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 780 exp.ComputedColumnConstraint: lambda self, 781 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 782 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 783 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 784 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 785 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 786 exp.Explode: rename_func("arrayJoin"), 787 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 788 exp.IsNan: rename_func("isNaN"), 789 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 790 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 791 exp.JSONPathKey: json_path_key_only_name, 792 exp.JSONPathRoot: lambda *_: "", 793 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 794 exp.Nullif: rename_func("nullIf"), 795 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 796 exp.Pivot: no_pivot_sql, 797 exp.Quantile: _quantile_sql, 798 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 799 exp.Rand: rename_func("randCanonical"), 800 exp.StartsWith: rename_func("startsWith"), 801 exp.StrPosition: lambda self, e: self.func( 802 "position", e.this, e.args.get("substr"), e.args.get("position") 803 ), 804 exp.TimeToStr: lambda self, e: self.func( 805 "DATE_FORMAT", e.this, self.format_time(e), e.args.get("timezone") 806 ), 807 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 808 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 809 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 810 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 811 exp.MD5Digest: rename_func("MD5"), 812 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 813 exp.SHA: rename_func("SHA1"), 814 exp.SHA2: sha256_sql, 815 exp.UnixToTime: _unix_to_time_sql, 816 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 817 exp.Variance: rename_func("varSamp"), 818 exp.Stddev: rename_func("stddevSamp"), 819 } 820 821 PROPERTIES_LOCATION = { 822 **generator.Generator.PROPERTIES_LOCATION, 823 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 824 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 825 exp.OnCluster: exp.Properties.Location.POST_NAME, 826 } 827 828 # there's no list in docs, but it can be found in Clickhouse code 829 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 830 ON_CLUSTER_TARGETS = { 831 "DATABASE", 832 "TABLE", 833 "VIEW", 834 "DICTIONARY", 835 "INDEX", 836 "FUNCTION", 837 "NAMED COLLECTION", 838 } 839 840 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 841 this = self.json_path_part(expression.this) 842 return str(int(this) + 1) if is_int(this) else this 843 844 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 845 return f"AS {self.sql(expression, 'this')}" 846 847 def _any_to_has( 848 self, 849 expression: exp.EQ | exp.NEQ, 850 default: t.Callable[[t.Any], str], 851 prefix: str = "", 852 ) -> str: 853 if isinstance(expression.left, exp.Any): 854 arr = expression.left 855 this = expression.right 856 elif isinstance(expression.right, exp.Any): 857 arr = expression.right 858 this = expression.left 859 else: 860 return default(expression) 861 862 return prefix + self.func("has", arr.this.unnest(), this) 863 864 def eq_sql(self, expression: exp.EQ) -> str: 865 return self._any_to_has(expression, super().eq_sql) 866 867 def neq_sql(self, expression: exp.NEQ) -> str: 868 return self._any_to_has(expression, super().neq_sql, "NOT ") 869 870 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 871 # Manually add a flag to make the search case-insensitive 872 regex = self.func("CONCAT", "'(?i)'", expression.expression) 873 return self.func("match", expression.this, regex) 874 875 def datatype_sql(self, expression: exp.DataType) -> str: 876 # String is the standard ClickHouse type, every other variant is just an alias. 877 # Additionally, any supplied length parameter will be ignored. 878 # 879 # https://clickhouse.com/docs/en/sql-reference/data-types/string 880 if expression.this in self.STRING_TYPE_MAPPING: 881 return "String" 882 883 return super().datatype_sql(expression) 884 885 def cte_sql(self, expression: exp.CTE) -> str: 886 if expression.args.get("scalar"): 887 this = self.sql(expression, "this") 888 alias = self.sql(expression, "alias") 889 return f"{this} AS {alias}" 890 891 return super().cte_sql(expression) 892 893 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 894 return super().after_limit_modifiers(expression) + [ 895 ( 896 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 897 if expression.args.get("settings") 898 else "" 899 ), 900 ( 901 self.seg("FORMAT ") + self.sql(expression, "format") 902 if expression.args.get("format") 903 else "" 904 ), 905 ] 906 907 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 908 params = self.expressions(expression, key="params", flat=True) 909 return self.func(expression.name, *expression.expressions) + f"({params})" 910 911 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 912 return self.func(expression.name, *expression.expressions) 913 914 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 915 return self.anonymousaggfunc_sql(expression) 916 917 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 918 return self.parameterizedagg_sql(expression) 919 920 def placeholder_sql(self, expression: exp.Placeholder) -> str: 921 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 922 923 def oncluster_sql(self, expression: exp.OnCluster) -> str: 924 return f"ON CLUSTER {self.sql(expression, 'this')}" 925 926 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 927 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 928 exp.Properties.Location.POST_NAME 929 ): 930 this_name = self.sql(expression.this, "this") 931 this_properties = " ".join( 932 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 933 ) 934 this_schema = self.schema_columns_sql(expression.this) 935 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 936 937 return super().createable_sql(expression, locations) 938 939 def prewhere_sql(self, expression: exp.PreWhere) -> str: 940 this = self.indent(self.sql(expression, "this")) 941 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 942 943 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 944 this = self.sql(expression, "this") 945 this = f" {this}" if this else "" 946 expr = self.sql(expression, "expression") 947 expr = f" {expr}" if expr else "" 948 index_type = self.sql(expression, "index_type") 949 index_type = f" TYPE {index_type}" if index_type else "" 950 granularity = self.sql(expression, "granularity") 951 granularity = f" GRANULARITY {granularity}" if granularity else "" 952 953 return f"INDEX{this}{expr}{index_type}{granularity}" 954 955 def partition_sql(self, expression: exp.Partition) -> str: 956 return f"PARTITION {self.expressions(expression, flat=True)}" 957 958 def partitionid_sql(self, expression: exp.PartitionId) -> str: 959 return f"ID {self.sql(expression.this)}" 960 961 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 962 return ( 963 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 964 ) 965 966 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 967 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects "my_id" would refer to "data.my_id" (which is done in _qualify_columns()) across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- NORMALIZATION_STRATEGY
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- TYPED_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
112 class Tokenizer(tokens.Tokenizer): 113 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 114 IDENTIFIERS = ['"', "`"] 115 STRING_ESCAPES = ["'", "\\"] 116 BIT_STRINGS = [("0b", "")] 117 HEX_STRINGS = [("0x", ""), ("0X", "")] 118 HEREDOC_STRINGS = ["$"] 119 120 KEYWORDS = { 121 **tokens.Tokenizer.KEYWORDS, 122 "ATTACH": TokenType.COMMAND, 123 "DATE32": TokenType.DATE32, 124 "DATETIME64": TokenType.DATETIME64, 125 "DICTIONARY": TokenType.DICTIONARY, 126 "ENUM8": TokenType.ENUM8, 127 "ENUM16": TokenType.ENUM16, 128 "FINAL": TokenType.FINAL, 129 "FIXEDSTRING": TokenType.FIXEDSTRING, 130 "FLOAT32": TokenType.FLOAT, 131 "FLOAT64": TokenType.DOUBLE, 132 "GLOBAL": TokenType.GLOBAL, 133 "INT256": TokenType.INT256, 134 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 135 "MAP": TokenType.MAP, 136 "NESTED": TokenType.NESTED, 137 "SAMPLE": TokenType.TABLE_SAMPLE, 138 "TUPLE": TokenType.STRUCT, 139 "UINT128": TokenType.UINT128, 140 "UINT16": TokenType.USMALLINT, 141 "UINT256": TokenType.UINT256, 142 "UINT32": TokenType.UINT, 143 "UINT64": TokenType.UBIGINT, 144 "UINT8": TokenType.UTINYINT, 145 "IPV4": TokenType.IPV4, 146 "IPV6": TokenType.IPV6, 147 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 148 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 149 "SYSTEM": TokenType.COMMAND, 150 "PREWHERE": TokenType.PREWHERE, 151 } 152 KEYWORDS.pop("/*+") 153 154 SINGLE_TOKENS = { 155 **tokens.Tokenizer.SINGLE_TOKENS, 156 "$": TokenType.HEREDOC_STRING, 157 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BYTE_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIER_ESCAPES
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
159 class Parser(parser.Parser): 160 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 161 # * select x from t1 union all select x from t2 limit 1; 162 # * select x from t1 union all (select x from t2 limit 1); 163 MODIFIERS_ATTACHED_TO_SET_OP = False 164 INTERVAL_SPANS = False 165 166 FUNCTIONS = { 167 **parser.Parser.FUNCTIONS, 168 "ANY": exp.AnyValue.from_arg_list, 169 "ARRAYSUM": exp.ArraySum.from_arg_list, 170 "COUNTIF": _build_count_if, 171 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 172 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 173 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 174 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 175 "DATE_FORMAT": _build_date_format, 176 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 177 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 178 "FORMATDATETIME": _build_date_format, 179 "JSONEXTRACTSTRING": build_json_extract_path( 180 exp.JSONExtractScalar, zero_based_indexing=False 181 ), 182 "MAP": parser.build_var_map, 183 "MATCH": exp.RegexpLike.from_arg_list, 184 "RANDCANONICAL": exp.Rand.from_arg_list, 185 "TUPLE": exp.Struct.from_arg_list, 186 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 187 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 188 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 189 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 190 "UNIQ": exp.ApproxDistinct.from_arg_list, 191 "XOR": lambda args: exp.Xor(expressions=args), 192 "MD5": exp.MD5Digest.from_arg_list, 193 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 194 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 195 } 196 197 AGG_FUNCTIONS = { 198 "count", 199 "min", 200 "max", 201 "sum", 202 "avg", 203 "any", 204 "stddevPop", 205 "stddevSamp", 206 "varPop", 207 "varSamp", 208 "corr", 209 "covarPop", 210 "covarSamp", 211 "entropy", 212 "exponentialMovingAverage", 213 "intervalLengthSum", 214 "kolmogorovSmirnovTest", 215 "mannWhitneyUTest", 216 "median", 217 "rankCorr", 218 "sumKahan", 219 "studentTTest", 220 "welchTTest", 221 "anyHeavy", 222 "anyLast", 223 "boundingRatio", 224 "first_value", 225 "last_value", 226 "argMin", 227 "argMax", 228 "avgWeighted", 229 "topK", 230 "topKWeighted", 231 "deltaSum", 232 "deltaSumTimestamp", 233 "groupArray", 234 "groupArrayLast", 235 "groupUniqArray", 236 "groupArrayInsertAt", 237 "groupArrayMovingAvg", 238 "groupArrayMovingSum", 239 "groupArraySample", 240 "groupBitAnd", 241 "groupBitOr", 242 "groupBitXor", 243 "groupBitmap", 244 "groupBitmapAnd", 245 "groupBitmapOr", 246 "groupBitmapXor", 247 "sumWithOverflow", 248 "sumMap", 249 "minMap", 250 "maxMap", 251 "skewSamp", 252 "skewPop", 253 "kurtSamp", 254 "kurtPop", 255 "uniq", 256 "uniqExact", 257 "uniqCombined", 258 "uniqCombined64", 259 "uniqHLL12", 260 "uniqTheta", 261 "quantile", 262 "quantiles", 263 "quantileExact", 264 "quantilesExact", 265 "quantileExactLow", 266 "quantilesExactLow", 267 "quantileExactHigh", 268 "quantilesExactHigh", 269 "quantileExactWeighted", 270 "quantilesExactWeighted", 271 "quantileTiming", 272 "quantilesTiming", 273 "quantileTimingWeighted", 274 "quantilesTimingWeighted", 275 "quantileDeterministic", 276 "quantilesDeterministic", 277 "quantileTDigest", 278 "quantilesTDigest", 279 "quantileTDigestWeighted", 280 "quantilesTDigestWeighted", 281 "quantileBFloat16", 282 "quantilesBFloat16", 283 "quantileBFloat16Weighted", 284 "quantilesBFloat16Weighted", 285 "simpleLinearRegression", 286 "stochasticLinearRegression", 287 "stochasticLogisticRegression", 288 "categoricalInformationValue", 289 "contingency", 290 "cramersV", 291 "cramersVBiasCorrected", 292 "theilsU", 293 "maxIntersections", 294 "maxIntersectionsPosition", 295 "meanZTest", 296 "quantileInterpolatedWeighted", 297 "quantilesInterpolatedWeighted", 298 "quantileGK", 299 "quantilesGK", 300 "sparkBar", 301 "sumCount", 302 "largestTriangleThreeBuckets", 303 "histogram", 304 "sequenceMatch", 305 "sequenceCount", 306 "windowFunnel", 307 "retention", 308 "uniqUpTo", 309 "sequenceNextNode", 310 "exponentialTimeDecayedAvg", 311 } 312 313 AGG_FUNCTIONS_SUFFIXES = [ 314 "If", 315 "Array", 316 "ArrayIf", 317 "Map", 318 "SimpleState", 319 "State", 320 "Merge", 321 "MergeState", 322 "ForEach", 323 "Distinct", 324 "OrDefault", 325 "OrNull", 326 "Resample", 327 "ArgMin", 328 "ArgMax", 329 ] 330 331 FUNC_TOKENS = { 332 *parser.Parser.FUNC_TOKENS, 333 TokenType.SET, 334 } 335 336 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 337 338 ID_VAR_TOKENS = { 339 *parser.Parser.ID_VAR_TOKENS, 340 TokenType.LIKE, 341 } 342 343 AGG_FUNC_MAPPING = ( 344 lambda functions, suffixes: { 345 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 346 } 347 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 348 349 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 350 351 FUNCTION_PARSERS = { 352 **parser.Parser.FUNCTION_PARSERS, 353 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 354 "QUANTILE": lambda self: self._parse_quantile(), 355 } 356 357 FUNCTION_PARSERS.pop("MATCH") 358 359 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 360 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 361 362 RANGE_PARSERS = { 363 **parser.Parser.RANGE_PARSERS, 364 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 365 and self._parse_in(this, is_global=True), 366 } 367 368 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 369 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 370 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 371 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 372 373 JOIN_KINDS = { 374 *parser.Parser.JOIN_KINDS, 375 TokenType.ANY, 376 TokenType.ASOF, 377 TokenType.ARRAY, 378 } 379 380 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 381 TokenType.ANY, 382 TokenType.ARRAY, 383 TokenType.FINAL, 384 TokenType.FORMAT, 385 TokenType.SETTINGS, 386 } 387 388 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 389 TokenType.FORMAT, 390 } 391 392 LOG_DEFAULTS_TO_LN = True 393 394 QUERY_MODIFIER_PARSERS = { 395 **parser.Parser.QUERY_MODIFIER_PARSERS, 396 TokenType.SETTINGS: lambda self: ( 397 "settings", 398 self._advance() or self._parse_csv(self._parse_assignment), 399 ), 400 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 401 } 402 403 CONSTRAINT_PARSERS = { 404 **parser.Parser.CONSTRAINT_PARSERS, 405 "INDEX": lambda self: self._parse_index_constraint(), 406 "CODEC": lambda self: self._parse_compress(), 407 } 408 409 ALTER_PARSERS = { 410 **parser.Parser.ALTER_PARSERS, 411 "REPLACE": lambda self: self._parse_alter_table_replace(), 412 } 413 414 SCHEMA_UNNAMED_CONSTRAINTS = { 415 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 416 "INDEX", 417 } 418 419 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 420 index = self._index 421 this = self._parse_bitwise() 422 if self._match(TokenType.FROM): 423 self._retreat(index) 424 return super()._parse_extract() 425 426 # We return Anonymous here because extract and regexpExtract have different semantics, 427 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 428 # `extract('foobar', 'b')` works, but CH crashes for `regexpExtract('foobar', 'b')`. 429 # 430 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 431 self._match(TokenType.COMMA) 432 return self.expression( 433 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 434 ) 435 436 def _parse_assignment(self) -> t.Optional[exp.Expression]: 437 this = super()._parse_assignment() 438 439 if self._match(TokenType.PLACEHOLDER): 440 return self.expression( 441 exp.If, 442 this=this, 443 true=self._parse_assignment(), 444 false=self._match(TokenType.COLON) and self._parse_assignment(), 445 ) 446 447 return this 448 449 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 450 """ 451 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 452 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 453 """ 454 if not self._match(TokenType.L_BRACE): 455 return None 456 457 this = self._parse_id_var() 458 self._match(TokenType.COLON) 459 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 460 self._match_text_seq("IDENTIFIER") and "Identifier" 461 ) 462 463 if not kind: 464 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 465 elif not self._match(TokenType.R_BRACE): 466 self.raise_error("Expecting }") 467 468 return self.expression(exp.Placeholder, this=this, kind=kind) 469 470 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 471 this = super()._parse_in(this) 472 this.set("is_global", is_global) 473 return this 474 475 def _parse_table( 476 self, 477 schema: bool = False, 478 joins: bool = False, 479 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 480 parse_bracket: bool = False, 481 is_db_reference: bool = False, 482 parse_partition: bool = False, 483 ) -> t.Optional[exp.Expression]: 484 this = super()._parse_table( 485 schema=schema, 486 joins=joins, 487 alias_tokens=alias_tokens, 488 parse_bracket=parse_bracket, 489 is_db_reference=is_db_reference, 490 ) 491 492 if self._match(TokenType.FINAL): 493 this = self.expression(exp.Final, this=this) 494 495 return this 496 497 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 498 return super()._parse_position(haystack_first=True) 499 500 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 501 def _parse_cte(self) -> exp.CTE: 502 # WITH <identifier> AS <subquery expression> 503 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 504 505 if not cte: 506 # WITH <expression> AS <identifier> 507 cte = self.expression( 508 exp.CTE, 509 this=self._parse_assignment(), 510 alias=self._parse_table_alias(), 511 scalar=True, 512 ) 513 514 return cte 515 516 def _parse_join_parts( 517 self, 518 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 519 is_global = self._match(TokenType.GLOBAL) and self._prev 520 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 521 522 if kind_pre: 523 kind = self._match_set(self.JOIN_KINDS) and self._prev 524 side = self._match_set(self.JOIN_SIDES) and self._prev 525 return is_global, side, kind 526 527 return ( 528 is_global, 529 self._match_set(self.JOIN_SIDES) and self._prev, 530 self._match_set(self.JOIN_KINDS) and self._prev, 531 ) 532 533 def _parse_join( 534 self, skip_join_token: bool = False, parse_bracket: bool = False 535 ) -> t.Optional[exp.Join]: 536 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 537 if join: 538 join.set("global", join.args.pop("method", None)) 539 540 return join 541 542 def _parse_function( 543 self, 544 functions: t.Optional[t.Dict[str, t.Callable]] = None, 545 anonymous: bool = False, 546 optional_parens: bool = True, 547 any_token: bool = False, 548 ) -> t.Optional[exp.Expression]: 549 expr = super()._parse_function( 550 functions=functions, 551 anonymous=anonymous, 552 optional_parens=optional_parens, 553 any_token=any_token, 554 ) 555 556 func = expr.this if isinstance(expr, exp.Window) else expr 557 558 # Aggregate functions can be split in 2 parts: <func_name><suffix> 559 parts = ( 560 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 561 ) 562 563 if parts: 564 params = self._parse_func_params(func) 565 566 kwargs = { 567 "this": func.this, 568 "expressions": func.expressions, 569 } 570 if parts[1]: 571 kwargs["parts"] = parts 572 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 573 else: 574 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 575 576 kwargs["exp_class"] = exp_class 577 if params: 578 kwargs["params"] = params 579 580 func = self.expression(**kwargs) 581 582 if isinstance(expr, exp.Window): 583 # The window's func was parsed as Anonymous in base parser, fix its 584 # type to be CH style CombinedAnonymousAggFunc / AnonymousAggFunc 585 expr.set("this", func) 586 elif params: 587 # Params have blocked super()._parse_function() from parsing the following window 588 # (if that exists) as they're standing between the function call and the window spec 589 expr = self._parse_window(func) 590 else: 591 expr = func 592 593 return expr 594 595 def _parse_func_params( 596 self, this: t.Optional[exp.Func] = None 597 ) -> t.Optional[t.List[exp.Expression]]: 598 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 599 return self._parse_csv(self._parse_lambda) 600 601 if self._match(TokenType.L_PAREN): 602 params = self._parse_csv(self._parse_lambda) 603 self._match_r_paren(this) 604 return params 605 606 return None 607 608 def _parse_quantile(self) -> exp.Quantile: 609 this = self._parse_lambda() 610 params = self._parse_func_params() 611 if params: 612 return self.expression(exp.Quantile, this=params[0], quantile=this) 613 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 614 615 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 616 return super()._parse_wrapped_id_vars(optional=True) 617 618 def _parse_primary_key( 619 self, wrapped_optional: bool = False, in_props: bool = False 620 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 621 return super()._parse_primary_key( 622 wrapped_optional=wrapped_optional or in_props, in_props=in_props 623 ) 624 625 def _parse_on_property(self) -> t.Optional[exp.Expression]: 626 index = self._index 627 if self._match_text_seq("CLUSTER"): 628 this = self._parse_id_var() 629 if this: 630 return self.expression(exp.OnCluster, this=this) 631 else: 632 self._retreat(index) 633 return None 634 635 def _parse_index_constraint( 636 self, kind: t.Optional[str] = None 637 ) -> exp.IndexColumnConstraint: 638 # INDEX name1 expr TYPE type1(args) GRANULARITY value 639 this = self._parse_id_var() 640 expression = self._parse_assignment() 641 642 index_type = self._match_text_seq("TYPE") and ( 643 self._parse_function() or self._parse_var() 644 ) 645 646 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 647 648 return self.expression( 649 exp.IndexColumnConstraint, 650 this=this, 651 expression=expression, 652 index_type=index_type, 653 granularity=granularity, 654 ) 655 656 def _parse_partition(self) -> t.Optional[exp.Partition]: 657 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 658 if not self._match(TokenType.PARTITION): 659 return None 660 661 if self._match_text_seq("ID"): 662 # Corresponds to the PARTITION ID <string_value> syntax 663 expressions: t.List[exp.Expression] = [ 664 self.expression(exp.PartitionId, this=self._parse_string()) 665 ] 666 else: 667 expressions = self._parse_expressions() 668 669 return self.expression(exp.Partition, expressions=expressions) 670 671 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 672 partition = self._parse_partition() 673 674 if not partition or not self._match(TokenType.FROM): 675 return None 676 677 return self.expression( 678 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 679 ) 680 681 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 682 if not self._match_text_seq("PROJECTION"): 683 return None 684 685 return self.expression( 686 exp.ProjectionDef, 687 this=self._parse_id_var(), 688 expression=self._parse_wrapped(self._parse_statement), 689 ) 690 691 def _parse_constraint(self) -> t.Optional[exp.Expression]: 692 return super()._parse_constraint() or self._parse_projection_def()
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- DB_CREATABLES
- CREATABLES
- INTERVAL_VARS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- PROPERTY_PARSERS
- ALTER_ALTER_PARSERS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
694 class Generator(generator.Generator): 695 QUERY_HINTS = False 696 STRUCT_DELIMITER = ("(", ")") 697 NVL2_SUPPORTED = False 698 TABLESAMPLE_REQUIRES_PARENS = False 699 TABLESAMPLE_SIZE_IS_ROWS = False 700 TABLESAMPLE_KEYWORDS = "SAMPLE" 701 LAST_DAY_SUPPORTS_DATE_PART = False 702 CAN_IMPLEMENT_ARRAY_ANY = True 703 SUPPORTS_TO_NUMBER = False 704 JOIN_HINTS = False 705 TABLE_HINTS = False 706 EXPLICIT_SET_OP = True 707 GROUPINGS_SEP = "" 708 SET_OP_MODIFIERS = False 709 SUPPORTS_TABLE_ALIAS_COLUMNS = False 710 711 STRING_TYPE_MAPPING = { 712 exp.DataType.Type.CHAR: "String", 713 exp.DataType.Type.LONGBLOB: "String", 714 exp.DataType.Type.LONGTEXT: "String", 715 exp.DataType.Type.MEDIUMBLOB: "String", 716 exp.DataType.Type.MEDIUMTEXT: "String", 717 exp.DataType.Type.TINYBLOB: "String", 718 exp.DataType.Type.TINYTEXT: "String", 719 exp.DataType.Type.TEXT: "String", 720 exp.DataType.Type.VARBINARY: "String", 721 exp.DataType.Type.VARCHAR: "String", 722 } 723 724 SUPPORTED_JSON_PATH_PARTS = { 725 exp.JSONPathKey, 726 exp.JSONPathRoot, 727 exp.JSONPathSubscript, 728 } 729 730 TYPE_MAPPING = { 731 **generator.Generator.TYPE_MAPPING, 732 **STRING_TYPE_MAPPING, 733 exp.DataType.Type.ARRAY: "Array", 734 exp.DataType.Type.BIGINT: "Int64", 735 exp.DataType.Type.DATE32: "Date32", 736 exp.DataType.Type.DATETIME64: "DateTime64", 737 exp.DataType.Type.DOUBLE: "Float64", 738 exp.DataType.Type.ENUM: "Enum", 739 exp.DataType.Type.ENUM8: "Enum8", 740 exp.DataType.Type.ENUM16: "Enum16", 741 exp.DataType.Type.FIXEDSTRING: "FixedString", 742 exp.DataType.Type.FLOAT: "Float32", 743 exp.DataType.Type.INT: "Int32", 744 exp.DataType.Type.MEDIUMINT: "Int32", 745 exp.DataType.Type.INT128: "Int128", 746 exp.DataType.Type.INT256: "Int256", 747 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 748 exp.DataType.Type.MAP: "Map", 749 exp.DataType.Type.NESTED: "Nested", 750 exp.DataType.Type.NULLABLE: "Nullable", 751 exp.DataType.Type.SMALLINT: "Int16", 752 exp.DataType.Type.STRUCT: "Tuple", 753 exp.DataType.Type.TINYINT: "Int8", 754 exp.DataType.Type.UBIGINT: "UInt64", 755 exp.DataType.Type.UINT: "UInt32", 756 exp.DataType.Type.UINT128: "UInt128", 757 exp.DataType.Type.UINT256: "UInt256", 758 exp.DataType.Type.USMALLINT: "UInt16", 759 exp.DataType.Type.UTINYINT: "UInt8", 760 exp.DataType.Type.IPV4: "IPv4", 761 exp.DataType.Type.IPV6: "IPv6", 762 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 763 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 764 } 765 766 TRANSFORMS = { 767 **generator.Generator.TRANSFORMS, 768 exp.AnyValue: rename_func("any"), 769 exp.ApproxDistinct: rename_func("uniq"), 770 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 771 exp.ArraySize: rename_func("LENGTH"), 772 exp.ArraySum: rename_func("arraySum"), 773 exp.ArgMax: arg_max_or_min_no_count("argMax"), 774 exp.ArgMin: arg_max_or_min_no_count("argMin"), 775 exp.Array: inline_array_sql, 776 exp.CastToStrType: rename_func("CAST"), 777 exp.CountIf: rename_func("countIf"), 778 exp.CompressColumnConstraint: lambda self, 779 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 780 exp.ComputedColumnConstraint: lambda self, 781 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 782 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 783 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 784 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 785 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 786 exp.Explode: rename_func("arrayJoin"), 787 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 788 exp.IsNan: rename_func("isNaN"), 789 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 790 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 791 exp.JSONPathKey: json_path_key_only_name, 792 exp.JSONPathRoot: lambda *_: "", 793 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 794 exp.Nullif: rename_func("nullIf"), 795 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 796 exp.Pivot: no_pivot_sql, 797 exp.Quantile: _quantile_sql, 798 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 799 exp.Rand: rename_func("randCanonical"), 800 exp.StartsWith: rename_func("startsWith"), 801 exp.StrPosition: lambda self, e: self.func( 802 "position", e.this, e.args.get("substr"), e.args.get("position") 803 ), 804 exp.TimeToStr: lambda self, e: self.func( 805 "DATE_FORMAT", e.this, self.format_time(e), e.args.get("timezone") 806 ), 807 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 808 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 809 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 810 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 811 exp.MD5Digest: rename_func("MD5"), 812 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 813 exp.SHA: rename_func("SHA1"), 814 exp.SHA2: sha256_sql, 815 exp.UnixToTime: _unix_to_time_sql, 816 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 817 exp.Variance: rename_func("varSamp"), 818 exp.Stddev: rename_func("stddevSamp"), 819 } 820 821 PROPERTIES_LOCATION = { 822 **generator.Generator.PROPERTIES_LOCATION, 823 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 824 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 825 exp.OnCluster: exp.Properties.Location.POST_NAME, 826 } 827 828 # there's no list in docs, but it can be found in Clickhouse code 829 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 830 ON_CLUSTER_TARGETS = { 831 "DATABASE", 832 "TABLE", 833 "VIEW", 834 "DICTIONARY", 835 "INDEX", 836 "FUNCTION", 837 "NAMED COLLECTION", 838 } 839 840 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 841 this = self.json_path_part(expression.this) 842 return str(int(this) + 1) if is_int(this) else this 843 844 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 845 return f"AS {self.sql(expression, 'this')}" 846 847 def _any_to_has( 848 self, 849 expression: exp.EQ | exp.NEQ, 850 default: t.Callable[[t.Any], str], 851 prefix: str = "", 852 ) -> str: 853 if isinstance(expression.left, exp.Any): 854 arr = expression.left 855 this = expression.right 856 elif isinstance(expression.right, exp.Any): 857 arr = expression.right 858 this = expression.left 859 else: 860 return default(expression) 861 862 return prefix + self.func("has", arr.this.unnest(), this) 863 864 def eq_sql(self, expression: exp.EQ) -> str: 865 return self._any_to_has(expression, super().eq_sql) 866 867 def neq_sql(self, expression: exp.NEQ) -> str: 868 return self._any_to_has(expression, super().neq_sql, "NOT ") 869 870 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 871 # Manually add a flag to make the search case-insensitive 872 regex = self.func("CONCAT", "'(?i)'", expression.expression) 873 return self.func("match", expression.this, regex) 874 875 def datatype_sql(self, expression: exp.DataType) -> str: 876 # String is the standard ClickHouse type, every other variant is just an alias. 877 # Additionally, any supplied length parameter will be ignored. 878 # 879 # https://clickhouse.com/docs/en/sql-reference/data-types/string 880 if expression.this in self.STRING_TYPE_MAPPING: 881 return "String" 882 883 return super().datatype_sql(expression) 884 885 def cte_sql(self, expression: exp.CTE) -> str: 886 if expression.args.get("scalar"): 887 this = self.sql(expression, "this") 888 alias = self.sql(expression, "alias") 889 return f"{this} AS {alias}" 890 891 return super().cte_sql(expression) 892 893 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 894 return super().after_limit_modifiers(expression) + [ 895 ( 896 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 897 if expression.args.get("settings") 898 else "" 899 ), 900 ( 901 self.seg("FORMAT ") + self.sql(expression, "format") 902 if expression.args.get("format") 903 else "" 904 ), 905 ] 906 907 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 908 params = self.expressions(expression, key="params", flat=True) 909 return self.func(expression.name, *expression.expressions) + f"({params})" 910 911 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 912 return self.func(expression.name, *expression.expressions) 913 914 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 915 return self.anonymousaggfunc_sql(expression) 916 917 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 918 return self.parameterizedagg_sql(expression) 919 920 def placeholder_sql(self, expression: exp.Placeholder) -> str: 921 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 922 923 def oncluster_sql(self, expression: exp.OnCluster) -> str: 924 return f"ON CLUSTER {self.sql(expression, 'this')}" 925 926 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 927 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 928 exp.Properties.Location.POST_NAME 929 ): 930 this_name = self.sql(expression.this, "this") 931 this_properties = " ".join( 932 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 933 ) 934 this_schema = self.schema_columns_sql(expression.this) 935 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 936 937 return super().createable_sql(expression, locations) 938 939 def prewhere_sql(self, expression: exp.PreWhere) -> str: 940 this = self.indent(self.sql(expression, "this")) 941 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 942 943 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 944 this = self.sql(expression, "this") 945 this = f" {this}" if this else "" 946 expr = self.sql(expression, "expression") 947 expr = f" {expr}" if expr else "" 948 index_type = self.sql(expression, "index_type") 949 index_type = f" TYPE {index_type}" if index_type else "" 950 granularity = self.sql(expression, "granularity") 951 granularity = f" GRANULARITY {granularity}" if granularity else "" 952 953 return f"INDEX{this}{expr}{index_type}{granularity}" 954 955 def partition_sql(self, expression: exp.Partition) -> str: 956 return f"PARTITION {self.expressions(expression, flat=True)}" 957 958 def partitionid_sql(self, expression: exp.PartitionId) -> str: 959 return f"ID {self.sql(expression.this)}" 960 961 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 962 return ( 963 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 964 ) 965 966 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 967 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
875 def datatype_sql(self, expression: exp.DataType) -> str: 876 # String is the standard ClickHouse type, every other variant is just an alias. 877 # Additionally, any supplied length parameter will be ignored. 878 # 879 # https://clickhouse.com/docs/en/sql-reference/data-types/string 880 if expression.this in self.STRING_TYPE_MAPPING: 881 return "String" 882 883 return super().datatype_sql(expression)
893 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 894 return super().after_limit_modifiers(expression) + [ 895 ( 896 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 897 if expression.args.get("settings") 898 else "" 899 ), 900 ( 901 self.seg("FORMAT ") + self.sql(expression, "format") 902 if expression.args.get("format") 903 else "" 904 ), 905 ]
926 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 927 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 928 exp.Properties.Location.POST_NAME 929 ): 930 this_name = self.sql(expression.this, "this") 931 this_properties = " ".join( 932 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 933 ) 934 this_schema = self.schema_columns_sql(expression.this) 935 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 936 937 return super().createable_sql(expression, locations)
943 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 944 this = self.sql(expression, "this") 945 this = f" {this}" if this else "" 946 expr = self.sql(expression, "expression") 947 expr = f" {expr}" if expr else "" 948 index_type = self.sql(expression, "index_type") 949 index_type = f" TYPE {index_type}" if index_type else "" 950 granularity = self.sql(expression, "granularity") 951 granularity = f" GRANULARITY {granularity}" if granularity else "" 952 953 return f"INDEX{this}{expr}{index_type}{granularity}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- generateseries_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- length_sql
- rand_sql
- strtodate_sql
- strtotime_sql
- changes_sql
- pad_sql