sqlglot.dialects.dialect
1from __future__ import annotations 2 3import logging 4import typing as t 5from enum import Enum, auto 6from functools import reduce 7 8from sqlglot import exp 9from sqlglot.errors import ParseError 10from sqlglot.generator import Generator, unsupported_args 11from sqlglot.helper import AutoName, flatten, is_int, seq_get, subclasses, to_bool 12from sqlglot.jsonpath import JSONPathTokenizer, parse as parse_json_path 13from sqlglot.parser import Parser 14from sqlglot.time import TIMEZONES, format_time, subsecond_precision 15from sqlglot.tokens import Token, Tokenizer, TokenType 16from sqlglot.trie import new_trie 17 18DATE_ADD_OR_DIFF = t.Union[ 19 exp.DateAdd, 20 exp.DateDiff, 21 exp.DateSub, 22 exp.TsOrDsAdd, 23 exp.TsOrDsDiff, 24] 25DATE_ADD_OR_SUB = t.Union[exp.DateAdd, exp.TsOrDsAdd, exp.DateSub] 26JSON_EXTRACT_TYPE = t.Union[exp.JSONExtract, exp.JSONExtractScalar] 27 28 29if t.TYPE_CHECKING: 30 from sqlglot._typing import B, E, F 31 32 from sqlglot.optimizer.annotate_types import TypeAnnotator 33 34 AnnotatorsType = t.Dict[t.Type[E], t.Callable[[TypeAnnotator, E], E]] 35 36logger = logging.getLogger("sqlglot") 37 38UNESCAPED_SEQUENCES = { 39 "\\a": "\a", 40 "\\b": "\b", 41 "\\f": "\f", 42 "\\n": "\n", 43 "\\r": "\r", 44 "\\t": "\t", 45 "\\v": "\v", 46 "\\\\": "\\", 47} 48 49 50def _annotate_with_type_lambda(data_type: exp.DataType.Type) -> t.Callable[[TypeAnnotator, E], E]: 51 return lambda self, e: self._annotate_with_type(e, data_type) 52 53 54class Dialects(str, Enum): 55 """Dialects supported by SQLGLot.""" 56 57 DIALECT = "" 58 59 ATHENA = "athena" 60 BIGQUERY = "bigquery" 61 CLICKHOUSE = "clickhouse" 62 DATABRICKS = "databricks" 63 DORIS = "doris" 64 DRILL = "drill" 65 DUCKDB = "duckdb" 66 HIVE = "hive" 67 MATERIALIZE = "materialize" 68 MYSQL = "mysql" 69 ORACLE = "oracle" 70 POSTGRES = "postgres" 71 PRESTO = "presto" 72 PRQL = "prql" 73 REDSHIFT = "redshift" 74 RISINGWAVE = "risingwave" 75 SNOWFLAKE = "snowflake" 76 SPARK = "spark" 77 SPARK2 = "spark2" 78 SQLITE = "sqlite" 79 STARROCKS = "starrocks" 80 TABLEAU = "tableau" 81 TERADATA = "teradata" 82 TRINO = "trino" 83 TSQL = "tsql" 84 85 86class NormalizationStrategy(str, AutoName): 87 """Specifies the strategy according to which identifiers should be normalized.""" 88 89 LOWERCASE = auto() 90 """Unquoted identifiers are lowercased.""" 91 92 UPPERCASE = auto() 93 """Unquoted identifiers are uppercased.""" 94 95 CASE_SENSITIVE = auto() 96 """Always case-sensitive, regardless of quotes.""" 97 98 CASE_INSENSITIVE = auto() 99 """Always case-insensitive, regardless of quotes.""" 100 101 102class _Dialect(type): 103 classes: t.Dict[str, t.Type[Dialect]] = {} 104 105 def __eq__(cls, other: t.Any) -> bool: 106 if cls is other: 107 return True 108 if isinstance(other, str): 109 return cls is cls.get(other) 110 if isinstance(other, Dialect): 111 return cls is type(other) 112 113 return False 114 115 def __hash__(cls) -> int: 116 return hash(cls.__name__.lower()) 117 118 @classmethod 119 def __getitem__(cls, key: str) -> t.Type[Dialect]: 120 return cls.classes[key] 121 122 @classmethod 123 def get( 124 cls, key: str, default: t.Optional[t.Type[Dialect]] = None 125 ) -> t.Optional[t.Type[Dialect]]: 126 return cls.classes.get(key, default) 127 128 def __new__(cls, clsname, bases, attrs): 129 klass = super().__new__(cls, clsname, bases, attrs) 130 enum = Dialects.__members__.get(clsname.upper()) 131 cls.classes[enum.value if enum is not None else clsname.lower()] = klass 132 133 klass.TIME_TRIE = new_trie(klass.TIME_MAPPING) 134 klass.FORMAT_TRIE = ( 135 new_trie(klass.FORMAT_MAPPING) if klass.FORMAT_MAPPING else klass.TIME_TRIE 136 ) 137 klass.INVERSE_TIME_MAPPING = {v: k for k, v in klass.TIME_MAPPING.items()} 138 klass.INVERSE_TIME_TRIE = new_trie(klass.INVERSE_TIME_MAPPING) 139 klass.INVERSE_FORMAT_MAPPING = {v: k for k, v in klass.FORMAT_MAPPING.items()} 140 klass.INVERSE_FORMAT_TRIE = new_trie(klass.INVERSE_FORMAT_MAPPING) 141 142 klass.INVERSE_CREATABLE_KIND_MAPPING = { 143 v: k for k, v in klass.CREATABLE_KIND_MAPPING.items() 144 } 145 146 base = seq_get(bases, 0) 147 base_tokenizer = (getattr(base, "tokenizer_class", Tokenizer),) 148 base_jsonpath_tokenizer = (getattr(base, "jsonpath_tokenizer_class", JSONPathTokenizer),) 149 base_parser = (getattr(base, "parser_class", Parser),) 150 base_generator = (getattr(base, "generator_class", Generator),) 151 152 klass.tokenizer_class = klass.__dict__.get( 153 "Tokenizer", type("Tokenizer", base_tokenizer, {}) 154 ) 155 klass.jsonpath_tokenizer_class = klass.__dict__.get( 156 "JSONPathTokenizer", type("JSONPathTokenizer", base_jsonpath_tokenizer, {}) 157 ) 158 klass.parser_class = klass.__dict__.get("Parser", type("Parser", base_parser, {})) 159 klass.generator_class = klass.__dict__.get( 160 "Generator", type("Generator", base_generator, {}) 161 ) 162 163 klass.QUOTE_START, klass.QUOTE_END = list(klass.tokenizer_class._QUOTES.items())[0] 164 klass.IDENTIFIER_START, klass.IDENTIFIER_END = list( 165 klass.tokenizer_class._IDENTIFIERS.items() 166 )[0] 167 168 def get_start_end(token_type: TokenType) -> t.Tuple[t.Optional[str], t.Optional[str]]: 169 return next( 170 ( 171 (s, e) 172 for s, (e, t) in klass.tokenizer_class._FORMAT_STRINGS.items() 173 if t == token_type 174 ), 175 (None, None), 176 ) 177 178 klass.BIT_START, klass.BIT_END = get_start_end(TokenType.BIT_STRING) 179 klass.HEX_START, klass.HEX_END = get_start_end(TokenType.HEX_STRING) 180 klass.BYTE_START, klass.BYTE_END = get_start_end(TokenType.BYTE_STRING) 181 klass.UNICODE_START, klass.UNICODE_END = get_start_end(TokenType.UNICODE_STRING) 182 183 if "\\" in klass.tokenizer_class.STRING_ESCAPES: 184 klass.UNESCAPED_SEQUENCES = { 185 **UNESCAPED_SEQUENCES, 186 **klass.UNESCAPED_SEQUENCES, 187 } 188 189 klass.ESCAPED_SEQUENCES = {v: k for k, v in klass.UNESCAPED_SEQUENCES.items()} 190 191 klass.SUPPORTS_COLUMN_JOIN_MARKS = "(+)" in klass.tokenizer_class.KEYWORDS 192 193 if enum not in ("", "bigquery"): 194 klass.generator_class.SELECT_KINDS = () 195 196 if enum not in ("", "athena", "presto", "trino"): 197 klass.generator_class.TRY_SUPPORTED = False 198 klass.generator_class.SUPPORTS_UESCAPE = False 199 200 if enum not in ("", "databricks", "hive", "spark", "spark2"): 201 modifier_transforms = klass.generator_class.AFTER_HAVING_MODIFIER_TRANSFORMS.copy() 202 for modifier in ("cluster", "distribute", "sort"): 203 modifier_transforms.pop(modifier, None) 204 205 klass.generator_class.AFTER_HAVING_MODIFIER_TRANSFORMS = modifier_transforms 206 207 if enum not in ("", "doris", "mysql"): 208 klass.parser_class.ID_VAR_TOKENS = klass.parser_class.ID_VAR_TOKENS | { 209 TokenType.STRAIGHT_JOIN, 210 } 211 klass.parser_class.TABLE_ALIAS_TOKENS = klass.parser_class.TABLE_ALIAS_TOKENS | { 212 TokenType.STRAIGHT_JOIN, 213 } 214 215 if not klass.SUPPORTS_SEMI_ANTI_JOIN: 216 klass.parser_class.TABLE_ALIAS_TOKENS = klass.parser_class.TABLE_ALIAS_TOKENS | { 217 TokenType.ANTI, 218 TokenType.SEMI, 219 } 220 221 return klass 222 223 224class Dialect(metaclass=_Dialect): 225 INDEX_OFFSET = 0 226 """The base index offset for arrays.""" 227 228 WEEK_OFFSET = 0 229 """First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.""" 230 231 UNNEST_COLUMN_ONLY = False 232 """Whether `UNNEST` table aliases are treated as column aliases.""" 233 234 ALIAS_POST_TABLESAMPLE = False 235 """Whether the table alias comes after tablesample.""" 236 237 TABLESAMPLE_SIZE_IS_PERCENT = False 238 """Whether a size in the table sample clause represents percentage.""" 239 240 NORMALIZATION_STRATEGY = NormalizationStrategy.LOWERCASE 241 """Specifies the strategy according to which identifiers should be normalized.""" 242 243 IDENTIFIERS_CAN_START_WITH_DIGIT = False 244 """Whether an unquoted identifier can start with a digit.""" 245 246 DPIPE_IS_STRING_CONCAT = True 247 """Whether the DPIPE token (`||`) is a string concatenation operator.""" 248 249 STRICT_STRING_CONCAT = False 250 """Whether `CONCAT`'s arguments must be strings.""" 251 252 SUPPORTS_USER_DEFINED_TYPES = True 253 """Whether user-defined data types are supported.""" 254 255 SUPPORTS_SEMI_ANTI_JOIN = True 256 """Whether `SEMI` or `ANTI` joins are supported.""" 257 258 SUPPORTS_COLUMN_JOIN_MARKS = False 259 """Whether the old-style outer join (+) syntax is supported.""" 260 261 COPY_PARAMS_ARE_CSV = True 262 """Separator of COPY statement parameters.""" 263 264 NORMALIZE_FUNCTIONS: bool | str = "upper" 265 """ 266 Determines how function names are going to be normalized. 267 Possible values: 268 "upper" or True: Convert names to uppercase. 269 "lower": Convert names to lowercase. 270 False: Disables function name normalization. 271 """ 272 273 PRESERVE_ORIGINAL_NAMES: bool = False 274 """ 275 Whether the name of the function should be preserved inside the node's metadata, 276 can be useful for roundtripping deprecated vs new functions that share an AST node 277 e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery 278 """ 279 280 LOG_BASE_FIRST: t.Optional[bool] = True 281 """ 282 Whether the base comes first in the `LOG` function. 283 Possible values: `True`, `False`, `None` (two arguments are not supported by `LOG`) 284 """ 285 286 NULL_ORDERING = "nulls_are_small" 287 """ 288 Default `NULL` ordering method to use if not explicitly set. 289 Possible values: `"nulls_are_small"`, `"nulls_are_large"`, `"nulls_are_last"` 290 """ 291 292 TYPED_DIVISION = False 293 """ 294 Whether the behavior of `a / b` depends on the types of `a` and `b`. 295 False means `a / b` is always float division. 296 True means `a / b` is integer division if both `a` and `b` are integers. 297 """ 298 299 SAFE_DIVISION = False 300 """Whether division by zero throws an error (`False`) or returns NULL (`True`).""" 301 302 CONCAT_COALESCE = False 303 """A `NULL` arg in `CONCAT` yields `NULL` by default, but in some dialects it yields an empty string.""" 304 305 HEX_LOWERCASE = False 306 """Whether the `HEX` function returns a lowercase hexadecimal string.""" 307 308 DATE_FORMAT = "'%Y-%m-%d'" 309 DATEINT_FORMAT = "'%Y%m%d'" 310 TIME_FORMAT = "'%Y-%m-%d %H:%M:%S'" 311 312 TIME_MAPPING: t.Dict[str, str] = {} 313 """Associates this dialect's time formats with their equivalent Python `strftime` formats.""" 314 315 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_model_rules_date_time 316 # https://docs.teradata.com/r/Teradata-Database-SQL-Functions-Operators-Expressions-and-Predicates/March-2017/Data-Type-Conversions/Character-to-DATE-Conversion/Forcing-a-FORMAT-on-CAST-for-Converting-Character-to-DATE 317 FORMAT_MAPPING: t.Dict[str, str] = {} 318 """ 319 Helper which is used for parsing the special syntax `CAST(x AS DATE FORMAT 'yyyy')`. 320 If empty, the corresponding trie will be constructed off of `TIME_MAPPING`. 321 """ 322 323 UNESCAPED_SEQUENCES: t.Dict[str, str] = {} 324 """Mapping of an escaped sequence (`\\n`) to its unescaped version (`\n`).""" 325 326 PSEUDOCOLUMNS: t.Set[str] = set() 327 """ 328 Columns that are auto-generated by the engine corresponding to this dialect. 329 For example, such columns may be excluded from `SELECT *` queries. 330 """ 331 332 PREFER_CTE_ALIAS_COLUMN = False 333 """ 334 Some dialects, such as Snowflake, allow you to reference a CTE column alias in the 335 HAVING clause of the CTE. This flag will cause the CTE alias columns to override 336 any projection aliases in the subquery. 337 338 For example, 339 WITH y(c) AS ( 340 SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 341 ) SELECT c FROM y; 342 343 will be rewritten as 344 345 WITH y(c) AS ( 346 SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0 347 ) SELECT c FROM y; 348 """ 349 350 COPY_PARAMS_ARE_CSV = True 351 """ 352 Whether COPY statement parameters are separated by comma or whitespace 353 """ 354 355 FORCE_EARLY_ALIAS_REF_EXPANSION = False 356 """ 357 Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()). 358 359 For example: 360 WITH data AS ( 361 SELECT 362 1 AS id, 363 2 AS my_id 364 ) 365 SELECT 366 id AS my_id 367 FROM 368 data 369 WHERE 370 my_id = 1 371 GROUP BY 372 my_id, 373 HAVING 374 my_id = 1 375 376 In most dialects, "my_id" would refer to "data.my_id" across the query, except: 377 - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e 378 it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" 379 - Clickhouse, which will forward the alias across the query i.e it resolves 380 to "WHERE id = 1 GROUP BY id HAVING id = 1" 381 """ 382 383 EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY = False 384 """Whether alias reference expansion before qualification should only happen for the GROUP BY clause.""" 385 386 SUPPORTS_ORDER_BY_ALL = False 387 """ 388 Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks 389 """ 390 391 HAS_DISTINCT_ARRAY_CONSTRUCTORS = False 392 """ 393 Whether the ARRAY constructor is context-sensitive, i.e in Redshift ARRAY[1, 2, 3] != ARRAY(1, 2, 3) 394 as the former is of type INT[] vs the latter which is SUPER 395 """ 396 397 SUPPORTS_FIXED_SIZE_ARRAYS = False 398 """ 399 Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. 400 in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should 401 be interpreted as a subscript/index operator. 402 """ 403 404 STRICT_JSON_PATH_SYNTAX = True 405 """Whether failing to parse a JSON path expression using the JSONPath dialect will log a warning.""" 406 407 ON_CONDITION_EMPTY_BEFORE_ERROR = True 408 """Whether "X ON EMPTY" should come before "X ON ERROR" (for dialects like T-SQL, MySQL, Oracle).""" 409 410 ARRAY_AGG_INCLUDES_NULLS: t.Optional[bool] = True 411 """Whether ArrayAgg needs to filter NULL values.""" 412 413 PROMOTE_TO_INFERRED_DATETIME_TYPE = False 414 """ 415 This flag is used in the optimizer's canonicalize rule and determines whether x will be promoted 416 to the literal's type in x::DATE < '2020-01-01 12:05:03' (i.e., DATETIME). When false, the literal 417 is cast to x's type to match it instead. 418 """ 419 420 SUPPORTS_VALUES_DEFAULT = True 421 """Whether the DEFAULT keyword is supported in the VALUES clause.""" 422 423 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = False 424 """Whether number literals can include underscores for better readability""" 425 426 REGEXP_EXTRACT_DEFAULT_GROUP = 0 427 """The default value for the capturing group.""" 428 429 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 430 exp.Except: True, 431 exp.Intersect: True, 432 exp.Union: True, 433 } 434 """ 435 Whether a set operation uses DISTINCT by default. This is `None` when either `DISTINCT` or `ALL` 436 must be explicitly specified. 437 """ 438 439 CREATABLE_KIND_MAPPING: dict[str, str] = {} 440 """ 441 Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse 442 equivalent of CREATE SCHEMA is CREATE DATABASE. 443 """ 444 445 # --- Autofilled --- 446 447 tokenizer_class = Tokenizer 448 jsonpath_tokenizer_class = JSONPathTokenizer 449 parser_class = Parser 450 generator_class = Generator 451 452 # A trie of the time_mapping keys 453 TIME_TRIE: t.Dict = {} 454 FORMAT_TRIE: t.Dict = {} 455 456 INVERSE_TIME_MAPPING: t.Dict[str, str] = {} 457 INVERSE_TIME_TRIE: t.Dict = {} 458 INVERSE_FORMAT_MAPPING: t.Dict[str, str] = {} 459 INVERSE_FORMAT_TRIE: t.Dict = {} 460 461 INVERSE_CREATABLE_KIND_MAPPING: dict[str, str] = {} 462 463 ESCAPED_SEQUENCES: t.Dict[str, str] = {} 464 465 # Delimiters for string literals and identifiers 466 QUOTE_START = "'" 467 QUOTE_END = "'" 468 IDENTIFIER_START = '"' 469 IDENTIFIER_END = '"' 470 471 # Delimiters for bit, hex, byte and unicode literals 472 BIT_START: t.Optional[str] = None 473 BIT_END: t.Optional[str] = None 474 HEX_START: t.Optional[str] = None 475 HEX_END: t.Optional[str] = None 476 BYTE_START: t.Optional[str] = None 477 BYTE_END: t.Optional[str] = None 478 UNICODE_START: t.Optional[str] = None 479 UNICODE_END: t.Optional[str] = None 480 481 DATE_PART_MAPPING = { 482 "Y": "YEAR", 483 "YY": "YEAR", 484 "YYY": "YEAR", 485 "YYYY": "YEAR", 486 "YR": "YEAR", 487 "YEARS": "YEAR", 488 "YRS": "YEAR", 489 "MM": "MONTH", 490 "MON": "MONTH", 491 "MONS": "MONTH", 492 "MONTHS": "MONTH", 493 "D": "DAY", 494 "DD": "DAY", 495 "DAYS": "DAY", 496 "DAYOFMONTH": "DAY", 497 "DAY OF WEEK": "DAYOFWEEK", 498 "WEEKDAY": "DAYOFWEEK", 499 "DOW": "DAYOFWEEK", 500 "DW": "DAYOFWEEK", 501 "WEEKDAY_ISO": "DAYOFWEEKISO", 502 "DOW_ISO": "DAYOFWEEKISO", 503 "DW_ISO": "DAYOFWEEKISO", 504 "DAY OF YEAR": "DAYOFYEAR", 505 "DOY": "DAYOFYEAR", 506 "DY": "DAYOFYEAR", 507 "W": "WEEK", 508 "WK": "WEEK", 509 "WEEKOFYEAR": "WEEK", 510 "WOY": "WEEK", 511 "WY": "WEEK", 512 "WEEK_ISO": "WEEKISO", 513 "WEEKOFYEARISO": "WEEKISO", 514 "WEEKOFYEAR_ISO": "WEEKISO", 515 "Q": "QUARTER", 516 "QTR": "QUARTER", 517 "QTRS": "QUARTER", 518 "QUARTERS": "QUARTER", 519 "H": "HOUR", 520 "HH": "HOUR", 521 "HR": "HOUR", 522 "HOURS": "HOUR", 523 "HRS": "HOUR", 524 "M": "MINUTE", 525 "MI": "MINUTE", 526 "MIN": "MINUTE", 527 "MINUTES": "MINUTE", 528 "MINS": "MINUTE", 529 "S": "SECOND", 530 "SEC": "SECOND", 531 "SECONDS": "SECOND", 532 "SECS": "SECOND", 533 "MS": "MILLISECOND", 534 "MSEC": "MILLISECOND", 535 "MSECS": "MILLISECOND", 536 "MSECOND": "MILLISECOND", 537 "MSECONDS": "MILLISECOND", 538 "MILLISEC": "MILLISECOND", 539 "MILLISECS": "MILLISECOND", 540 "MILLISECON": "MILLISECOND", 541 "MILLISECONDS": "MILLISECOND", 542 "US": "MICROSECOND", 543 "USEC": "MICROSECOND", 544 "USECS": "MICROSECOND", 545 "MICROSEC": "MICROSECOND", 546 "MICROSECS": "MICROSECOND", 547 "USECOND": "MICROSECOND", 548 "USECONDS": "MICROSECOND", 549 "MICROSECONDS": "MICROSECOND", 550 "NS": "NANOSECOND", 551 "NSEC": "NANOSECOND", 552 "NANOSEC": "NANOSECOND", 553 "NSECOND": "NANOSECOND", 554 "NSECONDS": "NANOSECOND", 555 "NANOSECS": "NANOSECOND", 556 "EPOCH_SECOND": "EPOCH", 557 "EPOCH_SECONDS": "EPOCH", 558 "EPOCH_MILLISECONDS": "EPOCH_MILLISECOND", 559 "EPOCH_MICROSECONDS": "EPOCH_MICROSECOND", 560 "EPOCH_NANOSECONDS": "EPOCH_NANOSECOND", 561 "TZH": "TIMEZONE_HOUR", 562 "TZM": "TIMEZONE_MINUTE", 563 "DEC": "DECADE", 564 "DECS": "DECADE", 565 "DECADES": "DECADE", 566 "MIL": "MILLENIUM", 567 "MILS": "MILLENIUM", 568 "MILLENIA": "MILLENIUM", 569 "C": "CENTURY", 570 "CENT": "CENTURY", 571 "CENTS": "CENTURY", 572 "CENTURIES": "CENTURY", 573 } 574 575 TYPE_TO_EXPRESSIONS: t.Dict[exp.DataType.Type, t.Set[t.Type[exp.Expression]]] = { 576 exp.DataType.Type.BIGINT: { 577 exp.ApproxDistinct, 578 exp.ArraySize, 579 exp.Length, 580 }, 581 exp.DataType.Type.BOOLEAN: { 582 exp.Between, 583 exp.Boolean, 584 exp.In, 585 exp.RegexpLike, 586 }, 587 exp.DataType.Type.DATE: { 588 exp.CurrentDate, 589 exp.Date, 590 exp.DateFromParts, 591 exp.DateStrToDate, 592 exp.DiToDate, 593 exp.StrToDate, 594 exp.TimeStrToDate, 595 exp.TsOrDsToDate, 596 }, 597 exp.DataType.Type.DATETIME: { 598 exp.CurrentDatetime, 599 exp.Datetime, 600 exp.DatetimeAdd, 601 exp.DatetimeSub, 602 }, 603 exp.DataType.Type.DOUBLE: { 604 exp.ApproxQuantile, 605 exp.Avg, 606 exp.Exp, 607 exp.Ln, 608 exp.Log, 609 exp.Pow, 610 exp.Quantile, 611 exp.Round, 612 exp.SafeDivide, 613 exp.Sqrt, 614 exp.Stddev, 615 exp.StddevPop, 616 exp.StddevSamp, 617 exp.ToDouble, 618 exp.Variance, 619 exp.VariancePop, 620 }, 621 exp.DataType.Type.INT: { 622 exp.Ceil, 623 exp.DatetimeDiff, 624 exp.DateDiff, 625 exp.TimestampDiff, 626 exp.TimeDiff, 627 exp.DateToDi, 628 exp.Levenshtein, 629 exp.Sign, 630 exp.StrPosition, 631 exp.TsOrDiToDi, 632 }, 633 exp.DataType.Type.JSON: { 634 exp.ParseJSON, 635 }, 636 exp.DataType.Type.TIME: { 637 exp.Time, 638 }, 639 exp.DataType.Type.TIMESTAMP: { 640 exp.CurrentTime, 641 exp.CurrentTimestamp, 642 exp.StrToTime, 643 exp.TimeAdd, 644 exp.TimeStrToTime, 645 exp.TimeSub, 646 exp.TimestampAdd, 647 exp.TimestampSub, 648 exp.UnixToTime, 649 }, 650 exp.DataType.Type.TINYINT: { 651 exp.Day, 652 exp.Month, 653 exp.Week, 654 exp.Year, 655 exp.Quarter, 656 }, 657 exp.DataType.Type.VARCHAR: { 658 exp.ArrayConcat, 659 exp.Concat, 660 exp.ConcatWs, 661 exp.DateToDateStr, 662 exp.GroupConcat, 663 exp.Initcap, 664 exp.Lower, 665 exp.Substring, 666 exp.String, 667 exp.TimeToStr, 668 exp.TimeToTimeStr, 669 exp.Trim, 670 exp.TsOrDsToDateStr, 671 exp.UnixToStr, 672 exp.UnixToTimeStr, 673 exp.Upper, 674 }, 675 } 676 677 ANNOTATORS: AnnotatorsType = { 678 **{ 679 expr_type: lambda self, e: self._annotate_unary(e) 680 for expr_type in subclasses(exp.__name__, (exp.Unary, exp.Alias)) 681 }, 682 **{ 683 expr_type: lambda self, e: self._annotate_binary(e) 684 for expr_type in subclasses(exp.__name__, exp.Binary) 685 }, 686 **{ 687 expr_type: _annotate_with_type_lambda(data_type) 688 for data_type, expressions in TYPE_TO_EXPRESSIONS.items() 689 for expr_type in expressions 690 }, 691 exp.Abs: lambda self, e: self._annotate_by_args(e, "this"), 692 exp.Anonymous: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.UNKNOWN), 693 exp.Array: lambda self, e: self._annotate_by_args(e, "expressions", array=True), 694 exp.ArrayAgg: lambda self, e: self._annotate_by_args(e, "this", array=True), 695 exp.ArrayConcat: lambda self, e: self._annotate_by_args(e, "this", "expressions"), 696 exp.Bracket: lambda self, e: self._annotate_bracket(e), 697 exp.Cast: lambda self, e: self._annotate_with_type(e, e.args["to"]), 698 exp.Case: lambda self, e: self._annotate_by_args(e, "default", "ifs"), 699 exp.Coalesce: lambda self, e: self._annotate_by_args(e, "this", "expressions"), 700 exp.Count: lambda self, e: self._annotate_with_type( 701 e, exp.DataType.Type.BIGINT if e.args.get("big_int") else exp.DataType.Type.INT 702 ), 703 exp.DataType: lambda self, e: self._annotate_with_type(e, e.copy()), 704 exp.DateAdd: lambda self, e: self._annotate_timeunit(e), 705 exp.DateSub: lambda self, e: self._annotate_timeunit(e), 706 exp.DateTrunc: lambda self, e: self._annotate_timeunit(e), 707 exp.Distinct: lambda self, e: self._annotate_by_args(e, "expressions"), 708 exp.Div: lambda self, e: self._annotate_div(e), 709 exp.Dot: lambda self, e: self._annotate_dot(e), 710 exp.Explode: lambda self, e: self._annotate_explode(e), 711 exp.Extract: lambda self, e: self._annotate_extract(e), 712 exp.Filter: lambda self, e: self._annotate_by_args(e, "this"), 713 exp.GenerateDateArray: lambda self, e: self._annotate_with_type( 714 e, exp.DataType.build("ARRAY<DATE>") 715 ), 716 exp.GenerateTimestampArray: lambda self, e: self._annotate_with_type( 717 e, exp.DataType.build("ARRAY<TIMESTAMP>") 718 ), 719 exp.Greatest: lambda self, e: self._annotate_by_args(e, "this", "expressions"), 720 exp.If: lambda self, e: self._annotate_by_args(e, "true", "false"), 721 exp.Interval: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.INTERVAL), 722 exp.Least: lambda self, e: self._annotate_by_args(e, "this", "expressions"), 723 exp.Literal: lambda self, e: self._annotate_literal(e), 724 exp.Map: lambda self, e: self._annotate_map(e), 725 exp.Max: lambda self, e: self._annotate_by_args(e, "this", "expressions"), 726 exp.Min: lambda self, e: self._annotate_by_args(e, "this", "expressions"), 727 exp.Null: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.NULL), 728 exp.Nullif: lambda self, e: self._annotate_by_args(e, "this", "expression"), 729 exp.PropertyEQ: lambda self, e: self._annotate_by_args(e, "expression"), 730 exp.Slice: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.UNKNOWN), 731 exp.Struct: lambda self, e: self._annotate_struct(e), 732 exp.Sum: lambda self, e: self._annotate_by_args(e, "this", "expressions", promote=True), 733 exp.Timestamp: lambda self, e: self._annotate_with_type( 734 e, 735 exp.DataType.Type.TIMESTAMPTZ if e.args.get("with_tz") else exp.DataType.Type.TIMESTAMP, 736 ), 737 exp.ToMap: lambda self, e: self._annotate_to_map(e), 738 exp.TryCast: lambda self, e: self._annotate_with_type(e, e.args["to"]), 739 exp.Unnest: lambda self, e: self._annotate_unnest(e), 740 exp.VarMap: lambda self, e: self._annotate_map(e), 741 } 742 743 @classmethod 744 def get_or_raise(cls, dialect: DialectType) -> Dialect: 745 """ 746 Look up a dialect in the global dialect registry and return it if it exists. 747 748 Args: 749 dialect: The target dialect. If this is a string, it can be optionally followed by 750 additional key-value pairs that are separated by commas and are used to specify 751 dialect settings, such as whether the dialect's identifiers are case-sensitive. 752 753 Example: 754 >>> dialect = dialect_class = get_or_raise("duckdb") 755 >>> dialect = get_or_raise("mysql, normalization_strategy = case_sensitive") 756 757 Returns: 758 The corresponding Dialect instance. 759 """ 760 761 if not dialect: 762 return cls() 763 if isinstance(dialect, _Dialect): 764 return dialect() 765 if isinstance(dialect, Dialect): 766 return dialect 767 if isinstance(dialect, str): 768 try: 769 dialect_name, *kv_strings = dialect.split(",") 770 kv_pairs = (kv.split("=") for kv in kv_strings) 771 kwargs = {} 772 for pair in kv_pairs: 773 key = pair[0].strip() 774 value: t.Union[bool | str | None] = None 775 776 if len(pair) == 1: 777 # Default initialize standalone settings to True 778 value = True 779 elif len(pair) == 2: 780 value = pair[1].strip() 781 782 kwargs[key] = to_bool(value) 783 784 except ValueError: 785 raise ValueError( 786 f"Invalid dialect format: '{dialect}'. " 787 "Please use the correct format: 'dialect [, k1 = v2 [, ...]]'." 788 ) 789 790 result = cls.get(dialect_name.strip()) 791 if not result: 792 from difflib import get_close_matches 793 794 similar = seq_get(get_close_matches(dialect_name, cls.classes, n=1), 0) or "" 795 if similar: 796 similar = f" Did you mean {similar}?" 797 798 raise ValueError(f"Unknown dialect '{dialect_name}'.{similar}") 799 800 return result(**kwargs) 801 802 raise ValueError(f"Invalid dialect type for '{dialect}': '{type(dialect)}'.") 803 804 @classmethod 805 def format_time( 806 cls, expression: t.Optional[str | exp.Expression] 807 ) -> t.Optional[exp.Expression]: 808 """Converts a time format in this dialect to its equivalent Python `strftime` format.""" 809 if isinstance(expression, str): 810 return exp.Literal.string( 811 # the time formats are quoted 812 format_time(expression[1:-1], cls.TIME_MAPPING, cls.TIME_TRIE) 813 ) 814 815 if expression and expression.is_string: 816 return exp.Literal.string(format_time(expression.this, cls.TIME_MAPPING, cls.TIME_TRIE)) 817 818 return expression 819 820 def __init__(self, **kwargs) -> None: 821 normalization_strategy = kwargs.pop("normalization_strategy", None) 822 823 if normalization_strategy is None: 824 self.normalization_strategy = self.NORMALIZATION_STRATEGY 825 else: 826 self.normalization_strategy = NormalizationStrategy(normalization_strategy.upper()) 827 828 self.settings = kwargs 829 830 def __eq__(self, other: t.Any) -> bool: 831 # Does not currently take dialect state into account 832 return type(self) == other 833 834 def __hash__(self) -> int: 835 # Does not currently take dialect state into account 836 return hash(type(self)) 837 838 def normalize_identifier(self, expression: E) -> E: 839 """ 840 Transforms an identifier in a way that resembles how it'd be resolved by this dialect. 841 842 For example, an identifier like `FoO` would be resolved as `foo` in Postgres, because it 843 lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so 844 it would resolve it as `FOO`. If it was quoted, it'd need to be treated as case-sensitive, 845 and so any normalization would be prohibited in order to avoid "breaking" the identifier. 846 847 There are also dialects like Spark, which are case-insensitive even when quotes are 848 present, and dialects like MySQL, whose resolution rules match those employed by the 849 underlying operating system, for example they may always be case-sensitive in Linux. 850 851 Finally, the normalization behavior of some engines can even be controlled through flags, 852 like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier. 853 854 SQLGlot aims to understand and handle all of these different behaviors gracefully, so 855 that it can analyze queries in the optimizer and successfully capture their semantics. 856 """ 857 if ( 858 isinstance(expression, exp.Identifier) 859 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 860 and ( 861 not expression.quoted 862 or self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 863 ) 864 ): 865 expression.set( 866 "this", 867 ( 868 expression.this.upper() 869 if self.normalization_strategy is NormalizationStrategy.UPPERCASE 870 else expression.this.lower() 871 ), 872 ) 873 874 return expression 875 876 def case_sensitive(self, text: str) -> bool: 877 """Checks if text contains any case sensitive characters, based on the dialect's rules.""" 878 if self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE: 879 return False 880 881 unsafe = ( 882 str.islower 883 if self.normalization_strategy is NormalizationStrategy.UPPERCASE 884 else str.isupper 885 ) 886 return any(unsafe(char) for char in text) 887 888 def can_identify(self, text: str, identify: str | bool = "safe") -> bool: 889 """Checks if text can be identified given an identify option. 890 891 Args: 892 text: The text to check. 893 identify: 894 `"always"` or `True`: Always returns `True`. 895 `"safe"`: Only returns `True` if the identifier is case-insensitive. 896 897 Returns: 898 Whether the given text can be identified. 899 """ 900 if identify is True or identify == "always": 901 return True 902 903 if identify == "safe": 904 return not self.case_sensitive(text) 905 906 return False 907 908 def quote_identifier(self, expression: E, identify: bool = True) -> E: 909 """ 910 Adds quotes to a given identifier. 911 912 Args: 913 expression: The expression of interest. If it's not an `Identifier`, this method is a no-op. 914 identify: If set to `False`, the quotes will only be added if the identifier is deemed 915 "unsafe", with respect to its characters and this dialect's normalization strategy. 916 """ 917 if isinstance(expression, exp.Identifier) and not isinstance(expression.parent, exp.Func): 918 name = expression.this 919 expression.set( 920 "quoted", 921 identify or self.case_sensitive(name) or not exp.SAFE_IDENTIFIER_RE.match(name), 922 ) 923 924 return expression 925 926 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 927 if isinstance(path, exp.Literal): 928 path_text = path.name 929 if path.is_number: 930 path_text = f"[{path_text}]" 931 try: 932 return parse_json_path(path_text, self) 933 except ParseError as e: 934 if self.STRICT_JSON_PATH_SYNTAX: 935 logger.warning(f"Invalid JSON path syntax. {str(e)}") 936 937 return path 938 939 def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]: 940 return self.parser(**opts).parse(self.tokenize(sql), sql) 941 942 def parse_into( 943 self, expression_type: exp.IntoType, sql: str, **opts 944 ) -> t.List[t.Optional[exp.Expression]]: 945 return self.parser(**opts).parse_into(expression_type, self.tokenize(sql), sql) 946 947 def generate(self, expression: exp.Expression, copy: bool = True, **opts) -> str: 948 return self.generator(**opts).generate(expression, copy=copy) 949 950 def transpile(self, sql: str, **opts) -> t.List[str]: 951 return [ 952 self.generate(expression, copy=False, **opts) if expression else "" 953 for expression in self.parse(sql) 954 ] 955 956 def tokenize(self, sql: str) -> t.List[Token]: 957 return self.tokenizer.tokenize(sql) 958 959 @property 960 def tokenizer(self) -> Tokenizer: 961 return self.tokenizer_class(dialect=self) 962 963 @property 964 def jsonpath_tokenizer(self) -> JSONPathTokenizer: 965 return self.jsonpath_tokenizer_class(dialect=self) 966 967 def parser(self, **opts) -> Parser: 968 return self.parser_class(dialect=self, **opts) 969 970 def generator(self, **opts) -> Generator: 971 return self.generator_class(dialect=self, **opts) 972 973 974DialectType = t.Union[str, Dialect, t.Type[Dialect], None] 975 976 977def rename_func(name: str) -> t.Callable[[Generator, exp.Expression], str]: 978 return lambda self, expression: self.func(name, *flatten(expression.args.values())) 979 980 981@unsupported_args("accuracy") 982def approx_count_distinct_sql(self: Generator, expression: exp.ApproxDistinct) -> str: 983 return self.func("APPROX_COUNT_DISTINCT", expression.this) 984 985 986def if_sql( 987 name: str = "IF", false_value: t.Optional[exp.Expression | str] = None 988) -> t.Callable[[Generator, exp.If], str]: 989 def _if_sql(self: Generator, expression: exp.If) -> str: 990 return self.func( 991 name, 992 expression.this, 993 expression.args.get("true"), 994 expression.args.get("false") or false_value, 995 ) 996 997 return _if_sql 998 999 1000def arrow_json_extract_sql(self: Generator, expression: JSON_EXTRACT_TYPE) -> str: 1001 this = expression.this 1002 if self.JSON_TYPE_REQUIRED_FOR_EXTRACTION and isinstance(this, exp.Literal) and this.is_string: 1003 this.replace(exp.cast(this, exp.DataType.Type.JSON)) 1004 1005 return self.binary(expression, "->" if isinstance(expression, exp.JSONExtract) else "->>") 1006 1007 1008def inline_array_sql(self: Generator, expression: exp.Array) -> str: 1009 return f"[{self.expressions(expression, dynamic=True, new_line=True, skip_first=True, skip_last=True)}]" 1010 1011 1012def inline_array_unless_query(self: Generator, expression: exp.Array) -> str: 1013 elem = seq_get(expression.expressions, 0) 1014 if isinstance(elem, exp.Expression) and elem.find(exp.Query): 1015 return self.func("ARRAY", elem) 1016 return inline_array_sql(self, expression) 1017 1018 1019def no_ilike_sql(self: Generator, expression: exp.ILike) -> str: 1020 return self.like_sql( 1021 exp.Like( 1022 this=exp.Lower(this=expression.this), expression=exp.Lower(this=expression.expression) 1023 ) 1024 ) 1025 1026 1027def no_paren_current_date_sql(self: Generator, expression: exp.CurrentDate) -> str: 1028 zone = self.sql(expression, "this") 1029 return f"CURRENT_DATE AT TIME ZONE {zone}" if zone else "CURRENT_DATE" 1030 1031 1032def no_recursive_cte_sql(self: Generator, expression: exp.With) -> str: 1033 if expression.args.get("recursive"): 1034 self.unsupported("Recursive CTEs are unsupported") 1035 expression.args["recursive"] = False 1036 return self.with_sql(expression) 1037 1038 1039def no_safe_divide_sql(self: Generator, expression: exp.SafeDivide, if_sql: str = "IF") -> str: 1040 n = self.sql(expression, "this") 1041 d = self.sql(expression, "expression") 1042 return f"{if_sql}(({d}) <> 0, ({n}) / ({d}), NULL)" 1043 1044 1045def no_tablesample_sql(self: Generator, expression: exp.TableSample) -> str: 1046 self.unsupported("TABLESAMPLE unsupported") 1047 return self.sql(expression.this) 1048 1049 1050def no_pivot_sql(self: Generator, expression: exp.Pivot) -> str: 1051 self.unsupported("PIVOT unsupported") 1052 return "" 1053 1054 1055def no_trycast_sql(self: Generator, expression: exp.TryCast) -> str: 1056 return self.cast_sql(expression) 1057 1058 1059def no_comment_column_constraint_sql( 1060 self: Generator, expression: exp.CommentColumnConstraint 1061) -> str: 1062 self.unsupported("CommentColumnConstraint unsupported") 1063 return "" 1064 1065 1066def no_map_from_entries_sql(self: Generator, expression: exp.MapFromEntries) -> str: 1067 self.unsupported("MAP_FROM_ENTRIES unsupported") 1068 return "" 1069 1070 1071def property_sql(self: Generator, expression: exp.Property) -> str: 1072 return f"{self.property_name(expression, string_key=True)}={self.sql(expression, 'value')}" 1073 1074 1075def str_position_sql( 1076 self: Generator, 1077 expression: exp.StrPosition, 1078 generate_instance: bool = False, 1079 str_position_func_name: str = "STRPOS", 1080) -> str: 1081 this = self.sql(expression, "this") 1082 substr = self.sql(expression, "substr") 1083 position = self.sql(expression, "position") 1084 instance = expression.args.get("instance") if generate_instance else None 1085 position_offset = "" 1086 1087 if position: 1088 # Normalize third 'pos' argument into 'SUBSTR(..) + offset' across dialects 1089 this = self.func("SUBSTR", this, position) 1090 position_offset = f" + {position} - 1" 1091 1092 return self.func(str_position_func_name, this, substr, instance) + position_offset 1093 1094 1095def struct_extract_sql(self: Generator, expression: exp.StructExtract) -> str: 1096 return ( 1097 f"{self.sql(expression, 'this')}.{self.sql(exp.to_identifier(expression.expression.name))}" 1098 ) 1099 1100 1101def var_map_sql( 1102 self: Generator, expression: exp.Map | exp.VarMap, map_func_name: str = "MAP" 1103) -> str: 1104 keys = expression.args["keys"] 1105 values = expression.args["values"] 1106 1107 if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array): 1108 self.unsupported("Cannot convert array columns into map.") 1109 return self.func(map_func_name, keys, values) 1110 1111 args = [] 1112 for key, value in zip(keys.expressions, values.expressions): 1113 args.append(self.sql(key)) 1114 args.append(self.sql(value)) 1115 1116 return self.func(map_func_name, *args) 1117 1118 1119def build_formatted_time( 1120 exp_class: t.Type[E], dialect: str, default: t.Optional[bool | str] = None 1121) -> t.Callable[[t.List], E]: 1122 """Helper used for time expressions. 1123 1124 Args: 1125 exp_class: the expression class to instantiate. 1126 dialect: target sql dialect. 1127 default: the default format, True being time. 1128 1129 Returns: 1130 A callable that can be used to return the appropriately formatted time expression. 1131 """ 1132 1133 def _builder(args: t.List): 1134 return exp_class( 1135 this=seq_get(args, 0), 1136 format=Dialect[dialect].format_time( 1137 seq_get(args, 1) 1138 or (Dialect[dialect].TIME_FORMAT if default is True else default or None) 1139 ), 1140 ) 1141 1142 return _builder 1143 1144 1145def time_format( 1146 dialect: DialectType = None, 1147) -> t.Callable[[Generator, exp.UnixToStr | exp.StrToUnix], t.Optional[str]]: 1148 def _time_format(self: Generator, expression: exp.UnixToStr | exp.StrToUnix) -> t.Optional[str]: 1149 """ 1150 Returns the time format for a given expression, unless it's equivalent 1151 to the default time format of the dialect of interest. 1152 """ 1153 time_format = self.format_time(expression) 1154 return time_format if time_format != Dialect.get_or_raise(dialect).TIME_FORMAT else None 1155 1156 return _time_format 1157 1158 1159def build_date_delta( 1160 exp_class: t.Type[E], 1161 unit_mapping: t.Optional[t.Dict[str, str]] = None, 1162 default_unit: t.Optional[str] = "DAY", 1163) -> t.Callable[[t.List], E]: 1164 def _builder(args: t.List) -> E: 1165 unit_based = len(args) == 3 1166 this = args[2] if unit_based else seq_get(args, 0) 1167 unit = None 1168 if unit_based or default_unit: 1169 unit = args[0] if unit_based else exp.Literal.string(default_unit) 1170 unit = exp.var(unit_mapping.get(unit.name.lower(), unit.name)) if unit_mapping else unit 1171 return exp_class(this=this, expression=seq_get(args, 1), unit=unit) 1172 1173 return _builder 1174 1175 1176def build_date_delta_with_interval( 1177 expression_class: t.Type[E], 1178) -> t.Callable[[t.List], t.Optional[E]]: 1179 def _builder(args: t.List) -> t.Optional[E]: 1180 if len(args) < 2: 1181 return None 1182 1183 interval = args[1] 1184 1185 if not isinstance(interval, exp.Interval): 1186 raise ParseError(f"INTERVAL expression expected but got '{interval}'") 1187 1188 return expression_class(this=args[0], expression=interval.this, unit=unit_to_str(interval)) 1189 1190 return _builder 1191 1192 1193def date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 1194 unit = seq_get(args, 0) 1195 this = seq_get(args, 1) 1196 1197 if isinstance(this, exp.Cast) and this.is_type("date"): 1198 return exp.DateTrunc(unit=unit, this=this) 1199 return exp.TimestampTrunc(this=this, unit=unit) 1200 1201 1202def date_add_interval_sql( 1203 data_type: str, kind: str 1204) -> t.Callable[[Generator, exp.Expression], str]: 1205 def func(self: Generator, expression: exp.Expression) -> str: 1206 this = self.sql(expression, "this") 1207 interval = exp.Interval(this=expression.expression, unit=unit_to_var(expression)) 1208 return f"{data_type}_{kind}({this}, {self.sql(interval)})" 1209 1210 return func 1211 1212 1213def timestamptrunc_sql(zone: bool = False) -> t.Callable[[Generator, exp.TimestampTrunc], str]: 1214 def _timestamptrunc_sql(self: Generator, expression: exp.TimestampTrunc) -> str: 1215 args = [unit_to_str(expression), expression.this] 1216 if zone: 1217 args.append(expression.args.get("zone")) 1218 return self.func("DATE_TRUNC", *args) 1219 1220 return _timestamptrunc_sql 1221 1222 1223def no_timestamp_sql(self: Generator, expression: exp.Timestamp) -> str: 1224 zone = expression.args.get("zone") 1225 if not zone: 1226 from sqlglot.optimizer.annotate_types import annotate_types 1227 1228 target_type = annotate_types(expression).type or exp.DataType.Type.TIMESTAMP 1229 return self.sql(exp.cast(expression.this, target_type)) 1230 if zone.name.lower() in TIMEZONES: 1231 return self.sql( 1232 exp.AtTimeZone( 1233 this=exp.cast(expression.this, exp.DataType.Type.TIMESTAMP), 1234 zone=zone, 1235 ) 1236 ) 1237 return self.func("TIMESTAMP", expression.this, zone) 1238 1239 1240def no_time_sql(self: Generator, expression: exp.Time) -> str: 1241 # Transpile BQ's TIME(timestamp, zone) to CAST(TIMESTAMPTZ <timestamp> AT TIME ZONE <zone> AS TIME) 1242 this = exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ) 1243 expr = exp.cast( 1244 exp.AtTimeZone(this=this, zone=expression.args.get("zone")), exp.DataType.Type.TIME 1245 ) 1246 return self.sql(expr) 1247 1248 1249def no_datetime_sql(self: Generator, expression: exp.Datetime) -> str: 1250 this = expression.this 1251 expr = expression.expression 1252 1253 if expr.name.lower() in TIMEZONES: 1254 # Transpile BQ's DATETIME(timestamp, zone) to CAST(TIMESTAMPTZ <timestamp> AT TIME ZONE <zone> AS TIMESTAMP) 1255 this = exp.cast(this, exp.DataType.Type.TIMESTAMPTZ) 1256 this = exp.cast(exp.AtTimeZone(this=this, zone=expr), exp.DataType.Type.TIMESTAMP) 1257 return self.sql(this) 1258 1259 this = exp.cast(this, exp.DataType.Type.DATE) 1260 expr = exp.cast(expr, exp.DataType.Type.TIME) 1261 1262 return self.sql(exp.cast(exp.Add(this=this, expression=expr), exp.DataType.Type.TIMESTAMP)) 1263 1264 1265def locate_to_strposition(args: t.List) -> exp.Expression: 1266 return exp.StrPosition( 1267 this=seq_get(args, 1), substr=seq_get(args, 0), position=seq_get(args, 2) 1268 ) 1269 1270 1271def strposition_to_locate_sql(self: Generator, expression: exp.StrPosition) -> str: 1272 return self.func( 1273 "LOCATE", expression.args.get("substr"), expression.this, expression.args.get("position") 1274 ) 1275 1276 1277def left_to_substring_sql(self: Generator, expression: exp.Left) -> str: 1278 return self.sql( 1279 exp.Substring( 1280 this=expression.this, start=exp.Literal.number(1), length=expression.expression 1281 ) 1282 ) 1283 1284 1285def right_to_substring_sql(self: Generator, expression: exp.Left) -> str: 1286 return self.sql( 1287 exp.Substring( 1288 this=expression.this, 1289 start=exp.Length(this=expression.this) - exp.paren(expression.expression - 1), 1290 ) 1291 ) 1292 1293 1294def timestrtotime_sql( 1295 self: Generator, 1296 expression: exp.TimeStrToTime, 1297 include_precision: bool = False, 1298) -> str: 1299 datatype = exp.DataType.build( 1300 exp.DataType.Type.TIMESTAMPTZ 1301 if expression.args.get("zone") 1302 else exp.DataType.Type.TIMESTAMP 1303 ) 1304 1305 if isinstance(expression.this, exp.Literal) and include_precision: 1306 precision = subsecond_precision(expression.this.name) 1307 if precision > 0: 1308 datatype = exp.DataType.build( 1309 datatype.this, expressions=[exp.DataTypeParam(this=exp.Literal.number(precision))] 1310 ) 1311 1312 return self.sql(exp.cast(expression.this, datatype, dialect=self.dialect)) 1313 1314 1315def datestrtodate_sql(self: Generator, expression: exp.DateStrToDate) -> str: 1316 return self.sql(exp.cast(expression.this, exp.DataType.Type.DATE)) 1317 1318 1319# Used for Presto and Duckdb which use functions that don't support charset, and assume utf-8 1320def encode_decode_sql( 1321 self: Generator, expression: exp.Expression, name: str, replace: bool = True 1322) -> str: 1323 charset = expression.args.get("charset") 1324 if charset and charset.name.lower() != "utf-8": 1325 self.unsupported(f"Expected utf-8 character set, got {charset}.") 1326 1327 return self.func(name, expression.this, expression.args.get("replace") if replace else None) 1328 1329 1330def min_or_least(self: Generator, expression: exp.Min) -> str: 1331 name = "LEAST" if expression.expressions else "MIN" 1332 return rename_func(name)(self, expression) 1333 1334 1335def max_or_greatest(self: Generator, expression: exp.Max) -> str: 1336 name = "GREATEST" if expression.expressions else "MAX" 1337 return rename_func(name)(self, expression) 1338 1339 1340def count_if_to_sum(self: Generator, expression: exp.CountIf) -> str: 1341 cond = expression.this 1342 1343 if isinstance(expression.this, exp.Distinct): 1344 cond = expression.this.expressions[0] 1345 self.unsupported("DISTINCT is not supported when converting COUNT_IF to SUM") 1346 1347 return self.func("sum", exp.func("if", cond, 1, 0)) 1348 1349 1350def trim_sql(self: Generator, expression: exp.Trim) -> str: 1351 target = self.sql(expression, "this") 1352 trim_type = self.sql(expression, "position") 1353 remove_chars = self.sql(expression, "expression") 1354 collation = self.sql(expression, "collation") 1355 1356 # Use TRIM/LTRIM/RTRIM syntax if the expression isn't database-specific 1357 if not remove_chars: 1358 return self.trim_sql(expression) 1359 1360 trim_type = f"{trim_type} " if trim_type else "" 1361 remove_chars = f"{remove_chars} " if remove_chars else "" 1362 from_part = "FROM " if trim_type or remove_chars else "" 1363 collation = f" COLLATE {collation}" if collation else "" 1364 return f"TRIM({trim_type}{remove_chars}{from_part}{target}{collation})" 1365 1366 1367def str_to_time_sql(self: Generator, expression: exp.Expression) -> str: 1368 return self.func("STRPTIME", expression.this, self.format_time(expression)) 1369 1370 1371def concat_to_dpipe_sql(self: Generator, expression: exp.Concat) -> str: 1372 return self.sql(reduce(lambda x, y: exp.DPipe(this=x, expression=y), expression.expressions)) 1373 1374 1375def concat_ws_to_dpipe_sql(self: Generator, expression: exp.ConcatWs) -> str: 1376 delim, *rest_args = expression.expressions 1377 return self.sql( 1378 reduce( 1379 lambda x, y: exp.DPipe(this=x, expression=exp.DPipe(this=delim, expression=y)), 1380 rest_args, 1381 ) 1382 ) 1383 1384 1385@unsupported_args("position", "occurrence", "parameters") 1386def regexp_extract_sql( 1387 self: Generator, expression: exp.RegexpExtract | exp.RegexpExtractAll 1388) -> str: 1389 group = expression.args.get("group") 1390 1391 # Do not render group if it's the default value for this dialect 1392 if group and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP): 1393 group = None 1394 1395 return self.func(expression.sql_name(), expression.this, expression.expression, group) 1396 1397 1398@unsupported_args("position", "occurrence", "modifiers") 1399def regexp_replace_sql(self: Generator, expression: exp.RegexpReplace) -> str: 1400 return self.func( 1401 "REGEXP_REPLACE", expression.this, expression.expression, expression.args["replacement"] 1402 ) 1403 1404 1405def pivot_column_names(aggregations: t.List[exp.Expression], dialect: DialectType) -> t.List[str]: 1406 names = [] 1407 for agg in aggregations: 1408 if isinstance(agg, exp.Alias): 1409 names.append(agg.alias) 1410 else: 1411 """ 1412 This case corresponds to aggregations without aliases being used as suffixes 1413 (e.g. col_avg(foo)). We need to unquote identifiers because they're going to 1414 be quoted in the base parser's `_parse_pivot` method, due to `to_identifier`. 1415 Otherwise, we'd end up with `col_avg(`foo`)` (notice the double quotes). 1416 """ 1417 agg_all_unquoted = agg.transform( 1418 lambda node: ( 1419 exp.Identifier(this=node.name, quoted=False) 1420 if isinstance(node, exp.Identifier) 1421 else node 1422 ) 1423 ) 1424 names.append(agg_all_unquoted.sql(dialect=dialect, normalize_functions="lower")) 1425 1426 return names 1427 1428 1429def binary_from_function(expr_type: t.Type[B]) -> t.Callable[[t.List], B]: 1430 return lambda args: expr_type(this=seq_get(args, 0), expression=seq_get(args, 1)) 1431 1432 1433# Used to represent DATE_TRUNC in Doris, Postgres and Starrocks dialects 1434def build_timestamp_trunc(args: t.List) -> exp.TimestampTrunc: 1435 return exp.TimestampTrunc(this=seq_get(args, 1), unit=seq_get(args, 0)) 1436 1437 1438def any_value_to_max_sql(self: Generator, expression: exp.AnyValue) -> str: 1439 return self.func("MAX", expression.this) 1440 1441 1442def bool_xor_sql(self: Generator, expression: exp.Xor) -> str: 1443 a = self.sql(expression.left) 1444 b = self.sql(expression.right) 1445 return f"({a} AND (NOT {b})) OR ((NOT {a}) AND {b})" 1446 1447 1448def is_parse_json(expression: exp.Expression) -> bool: 1449 return isinstance(expression, exp.ParseJSON) or ( 1450 isinstance(expression, exp.Cast) and expression.is_type("json") 1451 ) 1452 1453 1454def isnull_to_is_null(args: t.List) -> exp.Expression: 1455 return exp.Paren(this=exp.Is(this=seq_get(args, 0), expression=exp.null())) 1456 1457 1458def generatedasidentitycolumnconstraint_sql( 1459 self: Generator, expression: exp.GeneratedAsIdentityColumnConstraint 1460) -> str: 1461 start = self.sql(expression, "start") or "1" 1462 increment = self.sql(expression, "increment") or "1" 1463 return f"IDENTITY({start}, {increment})" 1464 1465 1466def arg_max_or_min_no_count(name: str) -> t.Callable[[Generator, exp.ArgMax | exp.ArgMin], str]: 1467 @unsupported_args("count") 1468 def _arg_max_or_min_sql(self: Generator, expression: exp.ArgMax | exp.ArgMin) -> str: 1469 return self.func(name, expression.this, expression.expression) 1470 1471 return _arg_max_or_min_sql 1472 1473 1474def ts_or_ds_add_cast(expression: exp.TsOrDsAdd) -> exp.TsOrDsAdd: 1475 this = expression.this.copy() 1476 1477 return_type = expression.return_type 1478 if return_type.is_type(exp.DataType.Type.DATE): 1479 # If we need to cast to a DATE, we cast to TIMESTAMP first to make sure we 1480 # can truncate timestamp strings, because some dialects can't cast them to DATE 1481 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1482 1483 expression.this.replace(exp.cast(this, return_type)) 1484 return expression 1485 1486 1487def date_delta_sql(name: str, cast: bool = False) -> t.Callable[[Generator, DATE_ADD_OR_DIFF], str]: 1488 def _delta_sql(self: Generator, expression: DATE_ADD_OR_DIFF) -> str: 1489 if cast and isinstance(expression, exp.TsOrDsAdd): 1490 expression = ts_or_ds_add_cast(expression) 1491 1492 return self.func( 1493 name, 1494 unit_to_var(expression), 1495 expression.expression, 1496 expression.this, 1497 ) 1498 1499 return _delta_sql 1500 1501 1502def unit_to_str(expression: exp.Expression, default: str = "DAY") -> t.Optional[exp.Expression]: 1503 unit = expression.args.get("unit") 1504 1505 if isinstance(unit, exp.Placeholder): 1506 return unit 1507 if unit: 1508 return exp.Literal.string(unit.name) 1509 return exp.Literal.string(default) if default else None 1510 1511 1512def unit_to_var(expression: exp.Expression, default: str = "DAY") -> t.Optional[exp.Expression]: 1513 unit = expression.args.get("unit") 1514 1515 if isinstance(unit, (exp.Var, exp.Placeholder)): 1516 return unit 1517 return exp.Var(this=default) if default else None 1518 1519 1520@t.overload 1521def map_date_part(part: exp.Expression, dialect: DialectType = Dialect) -> exp.Var: 1522 pass 1523 1524 1525@t.overload 1526def map_date_part( 1527 part: t.Optional[exp.Expression], dialect: DialectType = Dialect 1528) -> t.Optional[exp.Expression]: 1529 pass 1530 1531 1532def map_date_part(part, dialect: DialectType = Dialect): 1533 mapped = ( 1534 Dialect.get_or_raise(dialect).DATE_PART_MAPPING.get(part.name.upper()) if part else None 1535 ) 1536 return exp.var(mapped) if mapped else part 1537 1538 1539def no_last_day_sql(self: Generator, expression: exp.LastDay) -> str: 1540 trunc_curr_date = exp.func("date_trunc", "month", expression.this) 1541 plus_one_month = exp.func("date_add", trunc_curr_date, 1, "month") 1542 minus_one_day = exp.func("date_sub", plus_one_month, 1, "day") 1543 1544 return self.sql(exp.cast(minus_one_day, exp.DataType.Type.DATE)) 1545 1546 1547def merge_without_target_sql(self: Generator, expression: exp.Merge) -> str: 1548 """Remove table refs from columns in when statements.""" 1549 alias = expression.this.args.get("alias") 1550 1551 def normalize(identifier: t.Optional[exp.Identifier]) -> t.Optional[str]: 1552 return self.dialect.normalize_identifier(identifier).name if identifier else None 1553 1554 targets = {normalize(expression.this.this)} 1555 1556 if alias: 1557 targets.add(normalize(alias.this)) 1558 1559 for when in expression.args["whens"].expressions: 1560 # only remove the target names from the THEN clause 1561 # theyre still valid in the <condition> part of WHEN MATCHED / WHEN NOT MATCHED 1562 # ref: https://github.com/TobikoData/sqlmesh/issues/2934 1563 then = when.args.get("then") 1564 if then: 1565 then.transform( 1566 lambda node: ( 1567 exp.column(node.this) 1568 if isinstance(node, exp.Column) and normalize(node.args.get("table")) in targets 1569 else node 1570 ), 1571 copy=False, 1572 ) 1573 1574 return self.merge_sql(expression) 1575 1576 1577def build_json_extract_path( 1578 expr_type: t.Type[F], zero_based_indexing: bool = True, arrow_req_json_type: bool = False 1579) -> t.Callable[[t.List], F]: 1580 def _builder(args: t.List) -> F: 1581 segments: t.List[exp.JSONPathPart] = [exp.JSONPathRoot()] 1582 for arg in args[1:]: 1583 if not isinstance(arg, exp.Literal): 1584 # We use the fallback parser because we can't really transpile non-literals safely 1585 return expr_type.from_arg_list(args) 1586 1587 text = arg.name 1588 if is_int(text): 1589 index = int(text) 1590 segments.append( 1591 exp.JSONPathSubscript(this=index if zero_based_indexing else index - 1) 1592 ) 1593 else: 1594 segments.append(exp.JSONPathKey(this=text)) 1595 1596 # This is done to avoid failing in the expression validator due to the arg count 1597 del args[2:] 1598 return expr_type( 1599 this=seq_get(args, 0), 1600 expression=exp.JSONPath(expressions=segments), 1601 only_json_types=arrow_req_json_type, 1602 ) 1603 1604 return _builder 1605 1606 1607def json_extract_segments( 1608 name: str, quoted_index: bool = True, op: t.Optional[str] = None 1609) -> t.Callable[[Generator, JSON_EXTRACT_TYPE], str]: 1610 def _json_extract_segments(self: Generator, expression: JSON_EXTRACT_TYPE) -> str: 1611 path = expression.expression 1612 if not isinstance(path, exp.JSONPath): 1613 return rename_func(name)(self, expression) 1614 1615 escape = path.args.get("escape") 1616 1617 segments = [] 1618 for segment in path.expressions: 1619 path = self.sql(segment) 1620 if path: 1621 if isinstance(segment, exp.JSONPathPart) and ( 1622 quoted_index or not isinstance(segment, exp.JSONPathSubscript) 1623 ): 1624 if escape: 1625 path = self.escape_str(path) 1626 1627 path = f"{self.dialect.QUOTE_START}{path}{self.dialect.QUOTE_END}" 1628 1629 segments.append(path) 1630 1631 if op: 1632 return f" {op} ".join([self.sql(expression.this), *segments]) 1633 return self.func(name, expression.this, *segments) 1634 1635 return _json_extract_segments 1636 1637 1638def json_path_key_only_name(self: Generator, expression: exp.JSONPathKey) -> str: 1639 if isinstance(expression.this, exp.JSONPathWildcard): 1640 self.unsupported("Unsupported wildcard in JSONPathKey expression") 1641 1642 return expression.name 1643 1644 1645def filter_array_using_unnest(self: Generator, expression: exp.ArrayFilter) -> str: 1646 cond = expression.expression 1647 if isinstance(cond, exp.Lambda) and len(cond.expressions) == 1: 1648 alias = cond.expressions[0] 1649 cond = cond.this 1650 elif isinstance(cond, exp.Predicate): 1651 alias = "_u" 1652 else: 1653 self.unsupported("Unsupported filter condition") 1654 return "" 1655 1656 unnest = exp.Unnest(expressions=[expression.this]) 1657 filtered = exp.select(alias).from_(exp.alias_(unnest, None, table=[alias])).where(cond) 1658 return self.sql(exp.Array(expressions=[filtered])) 1659 1660 1661def to_number_with_nls_param(self: Generator, expression: exp.ToNumber) -> str: 1662 return self.func( 1663 "TO_NUMBER", 1664 expression.this, 1665 expression.args.get("format"), 1666 expression.args.get("nlsparam"), 1667 ) 1668 1669 1670def build_default_decimal_type( 1671 precision: t.Optional[int] = None, scale: t.Optional[int] = None 1672) -> t.Callable[[exp.DataType], exp.DataType]: 1673 def _builder(dtype: exp.DataType) -> exp.DataType: 1674 if dtype.expressions or precision is None: 1675 return dtype 1676 1677 params = f"{precision}{f', {scale}' if scale is not None else ''}" 1678 return exp.DataType.build(f"DECIMAL({params})") 1679 1680 return _builder 1681 1682 1683def build_timestamp_from_parts(args: t.List) -> exp.Func: 1684 if len(args) == 2: 1685 # Other dialects don't have the TIMESTAMP_FROM_PARTS(date, time) concept, 1686 # so we parse this into Anonymous for now instead of introducing complexity 1687 return exp.Anonymous(this="TIMESTAMP_FROM_PARTS", expressions=args) 1688 1689 return exp.TimestampFromParts.from_arg_list(args) 1690 1691 1692def sha256_sql(self: Generator, expression: exp.SHA2) -> str: 1693 return self.func(f"SHA{expression.text('length') or '256'}", expression.this) 1694 1695 1696def sequence_sql(self: Generator, expression: exp.GenerateSeries | exp.GenerateDateArray) -> str: 1697 start = expression.args.get("start") 1698 end = expression.args.get("end") 1699 step = expression.args.get("step") 1700 1701 if isinstance(start, exp.Cast): 1702 target_type = start.to 1703 elif isinstance(end, exp.Cast): 1704 target_type = end.to 1705 else: 1706 target_type = None 1707 1708 if start and end and target_type and target_type.is_type("date", "timestamp"): 1709 if isinstance(start, exp.Cast) and target_type is start.to: 1710 end = exp.cast(end, target_type) 1711 else: 1712 start = exp.cast(start, target_type) 1713 1714 return self.func("SEQUENCE", start, end, step) 1715 1716 1717def build_regexp_extract(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 1718 def _builder(args: t.List, dialect: Dialect) -> E: 1719 return expr_type( 1720 this=seq_get(args, 0), 1721 expression=seq_get(args, 1), 1722 group=seq_get(args, 2) or exp.Literal.number(dialect.REGEXP_EXTRACT_DEFAULT_GROUP), 1723 parameters=seq_get(args, 3), 1724 ) 1725 1726 return _builder 1727 1728 1729def explode_to_unnest_sql(self: Generator, expression: exp.Lateral) -> str: 1730 if isinstance(expression.this, exp.Explode): 1731 return self.sql( 1732 exp.Join( 1733 this=exp.Unnest( 1734 expressions=[expression.this.this], 1735 alias=expression.args.get("alias"), 1736 offset=isinstance(expression.this, exp.Posexplode), 1737 ), 1738 kind="cross", 1739 ) 1740 ) 1741 return self.lateral_sql(expression) 1742 1743 1744def timestampdiff_sql(self: Generator, expression: exp.DatetimeDiff | exp.TimestampDiff) -> str: 1745 return self.func("TIMESTAMPDIFF", expression.unit, expression.expression, expression.this) 1746 1747 1748def no_make_interval_sql(self: Generator, expression: exp.MakeInterval, sep: str = ", ") -> str: 1749 args = [] 1750 for unit, value in expression.args.items(): 1751 if isinstance(value, exp.Kwarg): 1752 value = value.expression 1753 1754 args.append(f"{value} {unit}") 1755 1756 return f"INTERVAL '{self.format_args(*args, sep=sep)}'" 1757 1758 1759def length_or_char_length_sql(self: Generator, expression: exp.Length) -> str: 1760 length_func = "LENGTH" if expression.args.get("binary") else "CHAR_LENGTH" 1761 return self.func(length_func, expression.this)
55class Dialects(str, Enum): 56 """Dialects supported by SQLGLot.""" 57 58 DIALECT = "" 59 60 ATHENA = "athena" 61 BIGQUERY = "bigquery" 62 CLICKHOUSE = "clickhouse" 63 DATABRICKS = "databricks" 64 DORIS = "doris" 65 DRILL = "drill" 66 DUCKDB = "duckdb" 67 HIVE = "hive" 68 MATERIALIZE = "materialize" 69 MYSQL = "mysql" 70 ORACLE = "oracle" 71 POSTGRES = "postgres" 72 PRESTO = "presto" 73 PRQL = "prql" 74 REDSHIFT = "redshift" 75 RISINGWAVE = "risingwave" 76 SNOWFLAKE = "snowflake" 77 SPARK = "spark" 78 SPARK2 = "spark2" 79 SQLITE = "sqlite" 80 STARROCKS = "starrocks" 81 TABLEAU = "tableau" 82 TERADATA = "teradata" 83 TRINO = "trino" 84 TSQL = "tsql"
Dialects supported by SQLGLot.
87class NormalizationStrategy(str, AutoName): 88 """Specifies the strategy according to which identifiers should be normalized.""" 89 90 LOWERCASE = auto() 91 """Unquoted identifiers are lowercased.""" 92 93 UPPERCASE = auto() 94 """Unquoted identifiers are uppercased.""" 95 96 CASE_SENSITIVE = auto() 97 """Always case-sensitive, regardless of quotes.""" 98 99 CASE_INSENSITIVE = auto() 100 """Always case-insensitive, regardless of quotes."""
Specifies the strategy according to which identifiers should be normalized.
Always case-sensitive, regardless of quotes.
Always case-insensitive, regardless of quotes.
225class Dialect(metaclass=_Dialect): 226 INDEX_OFFSET = 0 227 """The base index offset for arrays.""" 228 229 WEEK_OFFSET = 0 230 """First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.""" 231 232 UNNEST_COLUMN_ONLY = False 233 """Whether `UNNEST` table aliases are treated as column aliases.""" 234 235 ALIAS_POST_TABLESAMPLE = False 236 """Whether the table alias comes after tablesample.""" 237 238 TABLESAMPLE_SIZE_IS_PERCENT = False 239 """Whether a size in the table sample clause represents percentage.""" 240 241 NORMALIZATION_STRATEGY = NormalizationStrategy.LOWERCASE 242 """Specifies the strategy according to which identifiers should be normalized.""" 243 244 IDENTIFIERS_CAN_START_WITH_DIGIT = False 245 """Whether an unquoted identifier can start with a digit.""" 246 247 DPIPE_IS_STRING_CONCAT = True 248 """Whether the DPIPE token (`||`) is a string concatenation operator.""" 249 250 STRICT_STRING_CONCAT = False 251 """Whether `CONCAT`'s arguments must be strings.""" 252 253 SUPPORTS_USER_DEFINED_TYPES = True 254 """Whether user-defined data types are supported.""" 255 256 SUPPORTS_SEMI_ANTI_JOIN = True 257 """Whether `SEMI` or `ANTI` joins are supported.""" 258 259 SUPPORTS_COLUMN_JOIN_MARKS = False 260 """Whether the old-style outer join (+) syntax is supported.""" 261 262 COPY_PARAMS_ARE_CSV = True 263 """Separator of COPY statement parameters.""" 264 265 NORMALIZE_FUNCTIONS: bool | str = "upper" 266 """ 267 Determines how function names are going to be normalized. 268 Possible values: 269 "upper" or True: Convert names to uppercase. 270 "lower": Convert names to lowercase. 271 False: Disables function name normalization. 272 """ 273 274 PRESERVE_ORIGINAL_NAMES: bool = False 275 """ 276 Whether the name of the function should be preserved inside the node's metadata, 277 can be useful for roundtripping deprecated vs new functions that share an AST node 278 e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery 279 """ 280 281 LOG_BASE_FIRST: t.Optional[bool] = True 282 """ 283 Whether the base comes first in the `LOG` function. 284 Possible values: `True`, `False`, `None` (two arguments are not supported by `LOG`) 285 """ 286 287 NULL_ORDERING = "nulls_are_small" 288 """ 289 Default `NULL` ordering method to use if not explicitly set. 290 Possible values: `"nulls_are_small"`, `"nulls_are_large"`, `"nulls_are_last"` 291 """ 292 293 TYPED_DIVISION = False 294 """ 295 Whether the behavior of `a / b` depends on the types of `a` and `b`. 296 False means `a / b` is always float division. 297 True means `a / b` is integer division if both `a` and `b` are integers. 298 """ 299 300 SAFE_DIVISION = False 301 """Whether division by zero throws an error (`False`) or returns NULL (`True`).""" 302 303 CONCAT_COALESCE = False 304 """A `NULL` arg in `CONCAT` yields `NULL` by default, but in some dialects it yields an empty string.""" 305 306 HEX_LOWERCASE = False 307 """Whether the `HEX` function returns a lowercase hexadecimal string.""" 308 309 DATE_FORMAT = "'%Y-%m-%d'" 310 DATEINT_FORMAT = "'%Y%m%d'" 311 TIME_FORMAT = "'%Y-%m-%d %H:%M:%S'" 312 313 TIME_MAPPING: t.Dict[str, str] = {} 314 """Associates this dialect's time formats with their equivalent Python `strftime` formats.""" 315 316 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_model_rules_date_time 317 # https://docs.teradata.com/r/Teradata-Database-SQL-Functions-Operators-Expressions-and-Predicates/March-2017/Data-Type-Conversions/Character-to-DATE-Conversion/Forcing-a-FORMAT-on-CAST-for-Converting-Character-to-DATE 318 FORMAT_MAPPING: t.Dict[str, str] = {} 319 """ 320 Helper which is used for parsing the special syntax `CAST(x AS DATE FORMAT 'yyyy')`. 321 If empty, the corresponding trie will be constructed off of `TIME_MAPPING`. 322 """ 323 324 UNESCAPED_SEQUENCES: t.Dict[str, str] = {} 325 """Mapping of an escaped sequence (`\\n`) to its unescaped version (`\n`).""" 326 327 PSEUDOCOLUMNS: t.Set[str] = set() 328 """ 329 Columns that are auto-generated by the engine corresponding to this dialect. 330 For example, such columns may be excluded from `SELECT *` queries. 331 """ 332 333 PREFER_CTE_ALIAS_COLUMN = False 334 """ 335 Some dialects, such as Snowflake, allow you to reference a CTE column alias in the 336 HAVING clause of the CTE. This flag will cause the CTE alias columns to override 337 any projection aliases in the subquery. 338 339 For example, 340 WITH y(c) AS ( 341 SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 342 ) SELECT c FROM y; 343 344 will be rewritten as 345 346 WITH y(c) AS ( 347 SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0 348 ) SELECT c FROM y; 349 """ 350 351 COPY_PARAMS_ARE_CSV = True 352 """ 353 Whether COPY statement parameters are separated by comma or whitespace 354 """ 355 356 FORCE_EARLY_ALIAS_REF_EXPANSION = False 357 """ 358 Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()). 359 360 For example: 361 WITH data AS ( 362 SELECT 363 1 AS id, 364 2 AS my_id 365 ) 366 SELECT 367 id AS my_id 368 FROM 369 data 370 WHERE 371 my_id = 1 372 GROUP BY 373 my_id, 374 HAVING 375 my_id = 1 376 377 In most dialects, "my_id" would refer to "data.my_id" across the query, except: 378 - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e 379 it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" 380 - Clickhouse, which will forward the alias across the query i.e it resolves 381 to "WHERE id = 1 GROUP BY id HAVING id = 1" 382 """ 383 384 EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY = False 385 """Whether alias reference expansion before qualification should only happen for the GROUP BY clause.""" 386 387 SUPPORTS_ORDER_BY_ALL = False 388 """ 389 Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks 390 """ 391 392 HAS_DISTINCT_ARRAY_CONSTRUCTORS = False 393 """ 394 Whether the ARRAY constructor is context-sensitive, i.e in Redshift ARRAY[1, 2, 3] != ARRAY(1, 2, 3) 395 as the former is of type INT[] vs the latter which is SUPER 396 """ 397 398 SUPPORTS_FIXED_SIZE_ARRAYS = False 399 """ 400 Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. 401 in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should 402 be interpreted as a subscript/index operator. 403 """ 404 405 STRICT_JSON_PATH_SYNTAX = True 406 """Whether failing to parse a JSON path expression using the JSONPath dialect will log a warning.""" 407 408 ON_CONDITION_EMPTY_BEFORE_ERROR = True 409 """Whether "X ON EMPTY" should come before "X ON ERROR" (for dialects like T-SQL, MySQL, Oracle).""" 410 411 ARRAY_AGG_INCLUDES_NULLS: t.Optional[bool] = True 412 """Whether ArrayAgg needs to filter NULL values.""" 413 414 PROMOTE_TO_INFERRED_DATETIME_TYPE = False 415 """ 416 This flag is used in the optimizer's canonicalize rule and determines whether x will be promoted 417 to the literal's type in x::DATE < '2020-01-01 12:05:03' (i.e., DATETIME). When false, the literal 418 is cast to x's type to match it instead. 419 """ 420 421 SUPPORTS_VALUES_DEFAULT = True 422 """Whether the DEFAULT keyword is supported in the VALUES clause.""" 423 424 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = False 425 """Whether number literals can include underscores for better readability""" 426 427 REGEXP_EXTRACT_DEFAULT_GROUP = 0 428 """The default value for the capturing group.""" 429 430 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 431 exp.Except: True, 432 exp.Intersect: True, 433 exp.Union: True, 434 } 435 """ 436 Whether a set operation uses DISTINCT by default. This is `None` when either `DISTINCT` or `ALL` 437 must be explicitly specified. 438 """ 439 440 CREATABLE_KIND_MAPPING: dict[str, str] = {} 441 """ 442 Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse 443 equivalent of CREATE SCHEMA is CREATE DATABASE. 444 """ 445 446 # --- Autofilled --- 447 448 tokenizer_class = Tokenizer 449 jsonpath_tokenizer_class = JSONPathTokenizer 450 parser_class = Parser 451 generator_class = Generator 452 453 # A trie of the time_mapping keys 454 TIME_TRIE: t.Dict = {} 455 FORMAT_TRIE: t.Dict = {} 456 457 INVERSE_TIME_MAPPING: t.Dict[str, str] = {} 458 INVERSE_TIME_TRIE: t.Dict = {} 459 INVERSE_FORMAT_MAPPING: t.Dict[str, str] = {} 460 INVERSE_FORMAT_TRIE: t.Dict = {} 461 462 INVERSE_CREATABLE_KIND_MAPPING: dict[str, str] = {} 463 464 ESCAPED_SEQUENCES: t.Dict[str, str] = {} 465 466 # Delimiters for string literals and identifiers 467 QUOTE_START = "'" 468 QUOTE_END = "'" 469 IDENTIFIER_START = '"' 470 IDENTIFIER_END = '"' 471 472 # Delimiters for bit, hex, byte and unicode literals 473 BIT_START: t.Optional[str] = None 474 BIT_END: t.Optional[str] = None 475 HEX_START: t.Optional[str] = None 476 HEX_END: t.Optional[str] = None 477 BYTE_START: t.Optional[str] = None 478 BYTE_END: t.Optional[str] = None 479 UNICODE_START: t.Optional[str] = None 480 UNICODE_END: t.Optional[str] = None 481 482 DATE_PART_MAPPING = { 483 "Y": "YEAR", 484 "YY": "YEAR", 485 "YYY": "YEAR", 486 "YYYY": "YEAR", 487 "YR": "YEAR", 488 "YEARS": "YEAR", 489 "YRS": "YEAR", 490 "MM": "MONTH", 491 "MON": "MONTH", 492 "MONS": "MONTH", 493 "MONTHS": "MONTH", 494 "D": "DAY", 495 "DD": "DAY", 496 "DAYS": "DAY", 497 "DAYOFMONTH": "DAY", 498 "DAY OF WEEK": "DAYOFWEEK", 499 "WEEKDAY": "DAYOFWEEK", 500 "DOW": "DAYOFWEEK", 501 "DW": "DAYOFWEEK", 502 "WEEKDAY_ISO": "DAYOFWEEKISO", 503 "DOW_ISO": "DAYOFWEEKISO", 504 "DW_ISO": "DAYOFWEEKISO", 505 "DAY OF YEAR": "DAYOFYEAR", 506 "DOY": "DAYOFYEAR", 507 "DY": "DAYOFYEAR", 508 "W": "WEEK", 509 "WK": "WEEK", 510 "WEEKOFYEAR": "WEEK", 511 "WOY": "WEEK", 512 "WY": "WEEK", 513 "WEEK_ISO": "WEEKISO", 514 "WEEKOFYEARISO": "WEEKISO", 515 "WEEKOFYEAR_ISO": "WEEKISO", 516 "Q": "QUARTER", 517 "QTR": "QUARTER", 518 "QTRS": "QUARTER", 519 "QUARTERS": "QUARTER", 520 "H": "HOUR", 521 "HH": "HOUR", 522 "HR": "HOUR", 523 "HOURS": "HOUR", 524 "HRS": "HOUR", 525 "M": "MINUTE", 526 "MI": "MINUTE", 527 "MIN": "MINUTE", 528 "MINUTES": "MINUTE", 529 "MINS": "MINUTE", 530 "S": "SECOND", 531 "SEC": "SECOND", 532 "SECONDS": "SECOND", 533 "SECS": "SECOND", 534 "MS": "MILLISECOND", 535 "MSEC": "MILLISECOND", 536 "MSECS": "MILLISECOND", 537 "MSECOND": "MILLISECOND", 538 "MSECONDS": "MILLISECOND", 539 "MILLISEC": "MILLISECOND", 540 "MILLISECS": "MILLISECOND", 541 "MILLISECON": "MILLISECOND", 542 "MILLISECONDS": "MILLISECOND", 543 "US": "MICROSECOND", 544 "USEC": "MICROSECOND", 545 "USECS": "MICROSECOND", 546 "MICROSEC": "MICROSECOND", 547 "MICROSECS": "MICROSECOND", 548 "USECOND": "MICROSECOND", 549 "USECONDS": "MICROSECOND", 550 "MICROSECONDS": "MICROSECOND", 551 "NS": "NANOSECOND", 552 "NSEC": "NANOSECOND", 553 "NANOSEC": "NANOSECOND", 554 "NSECOND": "NANOSECOND", 555 "NSECONDS": "NANOSECOND", 556 "NANOSECS": "NANOSECOND", 557 "EPOCH_SECOND": "EPOCH", 558 "EPOCH_SECONDS": "EPOCH", 559 "EPOCH_MILLISECONDS": "EPOCH_MILLISECOND", 560 "EPOCH_MICROSECONDS": "EPOCH_MICROSECOND", 561 "EPOCH_NANOSECONDS": "EPOCH_NANOSECOND", 562 "TZH": "TIMEZONE_HOUR", 563 "TZM": "TIMEZONE_MINUTE", 564 "DEC": "DECADE", 565 "DECS": "DECADE", 566 "DECADES": "DECADE", 567 "MIL": "MILLENIUM", 568 "MILS": "MILLENIUM", 569 "MILLENIA": "MILLENIUM", 570 "C": "CENTURY", 571 "CENT": "CENTURY", 572 "CENTS": "CENTURY", 573 "CENTURIES": "CENTURY", 574 } 575 576 TYPE_TO_EXPRESSIONS: t.Dict[exp.DataType.Type, t.Set[t.Type[exp.Expression]]] = { 577 exp.DataType.Type.BIGINT: { 578 exp.ApproxDistinct, 579 exp.ArraySize, 580 exp.Length, 581 }, 582 exp.DataType.Type.BOOLEAN: { 583 exp.Between, 584 exp.Boolean, 585 exp.In, 586 exp.RegexpLike, 587 }, 588 exp.DataType.Type.DATE: { 589 exp.CurrentDate, 590 exp.Date, 591 exp.DateFromParts, 592 exp.DateStrToDate, 593 exp.DiToDate, 594 exp.StrToDate, 595 exp.TimeStrToDate, 596 exp.TsOrDsToDate, 597 }, 598 exp.DataType.Type.DATETIME: { 599 exp.CurrentDatetime, 600 exp.Datetime, 601 exp.DatetimeAdd, 602 exp.DatetimeSub, 603 }, 604 exp.DataType.Type.DOUBLE: { 605 exp.ApproxQuantile, 606 exp.Avg, 607 exp.Exp, 608 exp.Ln, 609 exp.Log, 610 exp.Pow, 611 exp.Quantile, 612 exp.Round, 613 exp.SafeDivide, 614 exp.Sqrt, 615 exp.Stddev, 616 exp.StddevPop, 617 exp.StddevSamp, 618 exp.ToDouble, 619 exp.Variance, 620 exp.VariancePop, 621 }, 622 exp.DataType.Type.INT: { 623 exp.Ceil, 624 exp.DatetimeDiff, 625 exp.DateDiff, 626 exp.TimestampDiff, 627 exp.TimeDiff, 628 exp.DateToDi, 629 exp.Levenshtein, 630 exp.Sign, 631 exp.StrPosition, 632 exp.TsOrDiToDi, 633 }, 634 exp.DataType.Type.JSON: { 635 exp.ParseJSON, 636 }, 637 exp.DataType.Type.TIME: { 638 exp.Time, 639 }, 640 exp.DataType.Type.TIMESTAMP: { 641 exp.CurrentTime, 642 exp.CurrentTimestamp, 643 exp.StrToTime, 644 exp.TimeAdd, 645 exp.TimeStrToTime, 646 exp.TimeSub, 647 exp.TimestampAdd, 648 exp.TimestampSub, 649 exp.UnixToTime, 650 }, 651 exp.DataType.Type.TINYINT: { 652 exp.Day, 653 exp.Month, 654 exp.Week, 655 exp.Year, 656 exp.Quarter, 657 }, 658 exp.DataType.Type.VARCHAR: { 659 exp.ArrayConcat, 660 exp.Concat, 661 exp.ConcatWs, 662 exp.DateToDateStr, 663 exp.GroupConcat, 664 exp.Initcap, 665 exp.Lower, 666 exp.Substring, 667 exp.String, 668 exp.TimeToStr, 669 exp.TimeToTimeStr, 670 exp.Trim, 671 exp.TsOrDsToDateStr, 672 exp.UnixToStr, 673 exp.UnixToTimeStr, 674 exp.Upper, 675 }, 676 } 677 678 ANNOTATORS: AnnotatorsType = { 679 **{ 680 expr_type: lambda self, e: self._annotate_unary(e) 681 for expr_type in subclasses(exp.__name__, (exp.Unary, exp.Alias)) 682 }, 683 **{ 684 expr_type: lambda self, e: self._annotate_binary(e) 685 for expr_type in subclasses(exp.__name__, exp.Binary) 686 }, 687 **{ 688 expr_type: _annotate_with_type_lambda(data_type) 689 for data_type, expressions in TYPE_TO_EXPRESSIONS.items() 690 for expr_type in expressions 691 }, 692 exp.Abs: lambda self, e: self._annotate_by_args(e, "this"), 693 exp.Anonymous: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.UNKNOWN), 694 exp.Array: lambda self, e: self._annotate_by_args(e, "expressions", array=True), 695 exp.ArrayAgg: lambda self, e: self._annotate_by_args(e, "this", array=True), 696 exp.ArrayConcat: lambda self, e: self._annotate_by_args(e, "this", "expressions"), 697 exp.Bracket: lambda self, e: self._annotate_bracket(e), 698 exp.Cast: lambda self, e: self._annotate_with_type(e, e.args["to"]), 699 exp.Case: lambda self, e: self._annotate_by_args(e, "default", "ifs"), 700 exp.Coalesce: lambda self, e: self._annotate_by_args(e, "this", "expressions"), 701 exp.Count: lambda self, e: self._annotate_with_type( 702 e, exp.DataType.Type.BIGINT if e.args.get("big_int") else exp.DataType.Type.INT 703 ), 704 exp.DataType: lambda self, e: self._annotate_with_type(e, e.copy()), 705 exp.DateAdd: lambda self, e: self._annotate_timeunit(e), 706 exp.DateSub: lambda self, e: self._annotate_timeunit(e), 707 exp.DateTrunc: lambda self, e: self._annotate_timeunit(e), 708 exp.Distinct: lambda self, e: self._annotate_by_args(e, "expressions"), 709 exp.Div: lambda self, e: self._annotate_div(e), 710 exp.Dot: lambda self, e: self._annotate_dot(e), 711 exp.Explode: lambda self, e: self._annotate_explode(e), 712 exp.Extract: lambda self, e: self._annotate_extract(e), 713 exp.Filter: lambda self, e: self._annotate_by_args(e, "this"), 714 exp.GenerateDateArray: lambda self, e: self._annotate_with_type( 715 e, exp.DataType.build("ARRAY<DATE>") 716 ), 717 exp.GenerateTimestampArray: lambda self, e: self._annotate_with_type( 718 e, exp.DataType.build("ARRAY<TIMESTAMP>") 719 ), 720 exp.Greatest: lambda self, e: self._annotate_by_args(e, "this", "expressions"), 721 exp.If: lambda self, e: self._annotate_by_args(e, "true", "false"), 722 exp.Interval: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.INTERVAL), 723 exp.Least: lambda self, e: self._annotate_by_args(e, "this", "expressions"), 724 exp.Literal: lambda self, e: self._annotate_literal(e), 725 exp.Map: lambda self, e: self._annotate_map(e), 726 exp.Max: lambda self, e: self._annotate_by_args(e, "this", "expressions"), 727 exp.Min: lambda self, e: self._annotate_by_args(e, "this", "expressions"), 728 exp.Null: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.NULL), 729 exp.Nullif: lambda self, e: self._annotate_by_args(e, "this", "expression"), 730 exp.PropertyEQ: lambda self, e: self._annotate_by_args(e, "expression"), 731 exp.Slice: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.UNKNOWN), 732 exp.Struct: lambda self, e: self._annotate_struct(e), 733 exp.Sum: lambda self, e: self._annotate_by_args(e, "this", "expressions", promote=True), 734 exp.Timestamp: lambda self, e: self._annotate_with_type( 735 e, 736 exp.DataType.Type.TIMESTAMPTZ if e.args.get("with_tz") else exp.DataType.Type.TIMESTAMP, 737 ), 738 exp.ToMap: lambda self, e: self._annotate_to_map(e), 739 exp.TryCast: lambda self, e: self._annotate_with_type(e, e.args["to"]), 740 exp.Unnest: lambda self, e: self._annotate_unnest(e), 741 exp.VarMap: lambda self, e: self._annotate_map(e), 742 } 743 744 @classmethod 745 def get_or_raise(cls, dialect: DialectType) -> Dialect: 746 """ 747 Look up a dialect in the global dialect registry and return it if it exists. 748 749 Args: 750 dialect: The target dialect. If this is a string, it can be optionally followed by 751 additional key-value pairs that are separated by commas and are used to specify 752 dialect settings, such as whether the dialect's identifiers are case-sensitive. 753 754 Example: 755 >>> dialect = dialect_class = get_or_raise("duckdb") 756 >>> dialect = get_or_raise("mysql, normalization_strategy = case_sensitive") 757 758 Returns: 759 The corresponding Dialect instance. 760 """ 761 762 if not dialect: 763 return cls() 764 if isinstance(dialect, _Dialect): 765 return dialect() 766 if isinstance(dialect, Dialect): 767 return dialect 768 if isinstance(dialect, str): 769 try: 770 dialect_name, *kv_strings = dialect.split(",") 771 kv_pairs = (kv.split("=") for kv in kv_strings) 772 kwargs = {} 773 for pair in kv_pairs: 774 key = pair[0].strip() 775 value: t.Union[bool | str | None] = None 776 777 if len(pair) == 1: 778 # Default initialize standalone settings to True 779 value = True 780 elif len(pair) == 2: 781 value = pair[1].strip() 782 783 kwargs[key] = to_bool(value) 784 785 except ValueError: 786 raise ValueError( 787 f"Invalid dialect format: '{dialect}'. " 788 "Please use the correct format: 'dialect [, k1 = v2 [, ...]]'." 789 ) 790 791 result = cls.get(dialect_name.strip()) 792 if not result: 793 from difflib import get_close_matches 794 795 similar = seq_get(get_close_matches(dialect_name, cls.classes, n=1), 0) or "" 796 if similar: 797 similar = f" Did you mean {similar}?" 798 799 raise ValueError(f"Unknown dialect '{dialect_name}'.{similar}") 800 801 return result(**kwargs) 802 803 raise ValueError(f"Invalid dialect type for '{dialect}': '{type(dialect)}'.") 804 805 @classmethod 806 def format_time( 807 cls, expression: t.Optional[str | exp.Expression] 808 ) -> t.Optional[exp.Expression]: 809 """Converts a time format in this dialect to its equivalent Python `strftime` format.""" 810 if isinstance(expression, str): 811 return exp.Literal.string( 812 # the time formats are quoted 813 format_time(expression[1:-1], cls.TIME_MAPPING, cls.TIME_TRIE) 814 ) 815 816 if expression and expression.is_string: 817 return exp.Literal.string(format_time(expression.this, cls.TIME_MAPPING, cls.TIME_TRIE)) 818 819 return expression 820 821 def __init__(self, **kwargs) -> None: 822 normalization_strategy = kwargs.pop("normalization_strategy", None) 823 824 if normalization_strategy is None: 825 self.normalization_strategy = self.NORMALIZATION_STRATEGY 826 else: 827 self.normalization_strategy = NormalizationStrategy(normalization_strategy.upper()) 828 829 self.settings = kwargs 830 831 def __eq__(self, other: t.Any) -> bool: 832 # Does not currently take dialect state into account 833 return type(self) == other 834 835 def __hash__(self) -> int: 836 # Does not currently take dialect state into account 837 return hash(type(self)) 838 839 def normalize_identifier(self, expression: E) -> E: 840 """ 841 Transforms an identifier in a way that resembles how it'd be resolved by this dialect. 842 843 For example, an identifier like `FoO` would be resolved as `foo` in Postgres, because it 844 lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so 845 it would resolve it as `FOO`. If it was quoted, it'd need to be treated as case-sensitive, 846 and so any normalization would be prohibited in order to avoid "breaking" the identifier. 847 848 There are also dialects like Spark, which are case-insensitive even when quotes are 849 present, and dialects like MySQL, whose resolution rules match those employed by the 850 underlying operating system, for example they may always be case-sensitive in Linux. 851 852 Finally, the normalization behavior of some engines can even be controlled through flags, 853 like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier. 854 855 SQLGlot aims to understand and handle all of these different behaviors gracefully, so 856 that it can analyze queries in the optimizer and successfully capture their semantics. 857 """ 858 if ( 859 isinstance(expression, exp.Identifier) 860 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 861 and ( 862 not expression.quoted 863 or self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 864 ) 865 ): 866 expression.set( 867 "this", 868 ( 869 expression.this.upper() 870 if self.normalization_strategy is NormalizationStrategy.UPPERCASE 871 else expression.this.lower() 872 ), 873 ) 874 875 return expression 876 877 def case_sensitive(self, text: str) -> bool: 878 """Checks if text contains any case sensitive characters, based on the dialect's rules.""" 879 if self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE: 880 return False 881 882 unsafe = ( 883 str.islower 884 if self.normalization_strategy is NormalizationStrategy.UPPERCASE 885 else str.isupper 886 ) 887 return any(unsafe(char) for char in text) 888 889 def can_identify(self, text: str, identify: str | bool = "safe") -> bool: 890 """Checks if text can be identified given an identify option. 891 892 Args: 893 text: The text to check. 894 identify: 895 `"always"` or `True`: Always returns `True`. 896 `"safe"`: Only returns `True` if the identifier is case-insensitive. 897 898 Returns: 899 Whether the given text can be identified. 900 """ 901 if identify is True or identify == "always": 902 return True 903 904 if identify == "safe": 905 return not self.case_sensitive(text) 906 907 return False 908 909 def quote_identifier(self, expression: E, identify: bool = True) -> E: 910 """ 911 Adds quotes to a given identifier. 912 913 Args: 914 expression: The expression of interest. If it's not an `Identifier`, this method is a no-op. 915 identify: If set to `False`, the quotes will only be added if the identifier is deemed 916 "unsafe", with respect to its characters and this dialect's normalization strategy. 917 """ 918 if isinstance(expression, exp.Identifier) and not isinstance(expression.parent, exp.Func): 919 name = expression.this 920 expression.set( 921 "quoted", 922 identify or self.case_sensitive(name) or not exp.SAFE_IDENTIFIER_RE.match(name), 923 ) 924 925 return expression 926 927 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 928 if isinstance(path, exp.Literal): 929 path_text = path.name 930 if path.is_number: 931 path_text = f"[{path_text}]" 932 try: 933 return parse_json_path(path_text, self) 934 except ParseError as e: 935 if self.STRICT_JSON_PATH_SYNTAX: 936 logger.warning(f"Invalid JSON path syntax. {str(e)}") 937 938 return path 939 940 def parse(self, sql: str, **opts) -> t.List[t.Optional[exp.Expression]]: 941 return self.parser(**opts).parse(self.tokenize(sql), sql) 942 943 def parse_into( 944 self, expression_type: exp.IntoType, sql: str, **opts 945 ) -> t.List[t.Optional[exp.Expression]]: 946 return self.parser(**opts).parse_into(expression_type, self.tokenize(sql), sql) 947 948 def generate(self, expression: exp.Expression, copy: bool = True, **opts) -> str: 949 return self.generator(**opts).generate(expression, copy=copy) 950 951 def transpile(self, sql: str, **opts) -> t.List[str]: 952 return [ 953 self.generate(expression, copy=False, **opts) if expression else "" 954 for expression in self.parse(sql) 955 ] 956 957 def tokenize(self, sql: str) -> t.List[Token]: 958 return self.tokenizer.tokenize(sql) 959 960 @property 961 def tokenizer(self) -> Tokenizer: 962 return self.tokenizer_class(dialect=self) 963 964 @property 965 def jsonpath_tokenizer(self) -> JSONPathTokenizer: 966 return self.jsonpath_tokenizer_class(dialect=self) 967 968 def parser(self, **opts) -> Parser: 969 return self.parser_class(dialect=self, **opts) 970 971 def generator(self, **opts) -> Generator: 972 return self.generator_class(dialect=self, **opts)
821 def __init__(self, **kwargs) -> None: 822 normalization_strategy = kwargs.pop("normalization_strategy", None) 823 824 if normalization_strategy is None: 825 self.normalization_strategy = self.NORMALIZATION_STRATEGY 826 else: 827 self.normalization_strategy = NormalizationStrategy(normalization_strategy.upper()) 828 829 self.settings = kwargs
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether a size in the table sample clause represents percentage.
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Whether the name of the function should be preserved inside the node's metadata, can be useful for roundtripping deprecated vs new functions that share an AST node e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the behavior of a / b
depends on the types of a
and b
.
False means a / b
is always float division.
True means a / b
is integer division if both a
and b
are integers.
A NULL
arg in CONCAT
yields NULL
by default, but in some dialects it yields an empty string.
Associates this dialect's time formats with their equivalent Python strftime
formats.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether alias reference expansion before qualification should only happen for the GROUP BY clause.
Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
Whether the ARRAY constructor is context-sensitive, i.e in Redshift ARRAY[1, 2, 3] != ARRAY(1, 2, 3) as the former is of type INT[] vs the latter which is SUPER
Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator.
Whether failing to parse a JSON path expression using the JSONPath dialect will log a warning.
Whether "X ON EMPTY" should come before "X ON ERROR" (for dialects like T-SQL, MySQL, Oracle).
This flag is used in the optimizer's canonicalize rule and determines whether x will be promoted to the literal's type in x::DATE < '2020-01-01 12:05:03' (i.e., DATETIME). When false, the literal is cast to x's type to match it instead.
Whether number literals can include underscores for better readability
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse equivalent of CREATE SCHEMA is CREATE DATABASE.
744 @classmethod 745 def get_or_raise(cls, dialect: DialectType) -> Dialect: 746 """ 747 Look up a dialect in the global dialect registry and return it if it exists. 748 749 Args: 750 dialect: The target dialect. If this is a string, it can be optionally followed by 751 additional key-value pairs that are separated by commas and are used to specify 752 dialect settings, such as whether the dialect's identifiers are case-sensitive. 753 754 Example: 755 >>> dialect = dialect_class = get_or_raise("duckdb") 756 >>> dialect = get_or_raise("mysql, normalization_strategy = case_sensitive") 757 758 Returns: 759 The corresponding Dialect instance. 760 """ 761 762 if not dialect: 763 return cls() 764 if isinstance(dialect, _Dialect): 765 return dialect() 766 if isinstance(dialect, Dialect): 767 return dialect 768 if isinstance(dialect, str): 769 try: 770 dialect_name, *kv_strings = dialect.split(",") 771 kv_pairs = (kv.split("=") for kv in kv_strings) 772 kwargs = {} 773 for pair in kv_pairs: 774 key = pair[0].strip() 775 value: t.Union[bool | str | None] = None 776 777 if len(pair) == 1: 778 # Default initialize standalone settings to True 779 value = True 780 elif len(pair) == 2: 781 value = pair[1].strip() 782 783 kwargs[key] = to_bool(value) 784 785 except ValueError: 786 raise ValueError( 787 f"Invalid dialect format: '{dialect}'. " 788 "Please use the correct format: 'dialect [, k1 = v2 [, ...]]'." 789 ) 790 791 result = cls.get(dialect_name.strip()) 792 if not result: 793 from difflib import get_close_matches 794 795 similar = seq_get(get_close_matches(dialect_name, cls.classes, n=1), 0) or "" 796 if similar: 797 similar = f" Did you mean {similar}?" 798 799 raise ValueError(f"Unknown dialect '{dialect_name}'.{similar}") 800 801 return result(**kwargs) 802 803 raise ValueError(f"Invalid dialect type for '{dialect}': '{type(dialect)}'.")
Look up a dialect in the global dialect registry and return it if it exists.
Arguments:
- dialect: The target dialect. If this is a string, it can be optionally followed by additional key-value pairs that are separated by commas and are used to specify dialect settings, such as whether the dialect's identifiers are case-sensitive.
Example:
>>> dialect = dialect_class = get_or_raise("duckdb") >>> dialect = get_or_raise("mysql, normalization_strategy = case_sensitive")
Returns:
The corresponding Dialect instance.
805 @classmethod 806 def format_time( 807 cls, expression: t.Optional[str | exp.Expression] 808 ) -> t.Optional[exp.Expression]: 809 """Converts a time format in this dialect to its equivalent Python `strftime` format.""" 810 if isinstance(expression, str): 811 return exp.Literal.string( 812 # the time formats are quoted 813 format_time(expression[1:-1], cls.TIME_MAPPING, cls.TIME_TRIE) 814 ) 815 816 if expression and expression.is_string: 817 return exp.Literal.string(format_time(expression.this, cls.TIME_MAPPING, cls.TIME_TRIE)) 818 819 return expression
Converts a time format in this dialect to its equivalent Python strftime
format.
839 def normalize_identifier(self, expression: E) -> E: 840 """ 841 Transforms an identifier in a way that resembles how it'd be resolved by this dialect. 842 843 For example, an identifier like `FoO` would be resolved as `foo` in Postgres, because it 844 lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so 845 it would resolve it as `FOO`. If it was quoted, it'd need to be treated as case-sensitive, 846 and so any normalization would be prohibited in order to avoid "breaking" the identifier. 847 848 There are also dialects like Spark, which are case-insensitive even when quotes are 849 present, and dialects like MySQL, whose resolution rules match those employed by the 850 underlying operating system, for example they may always be case-sensitive in Linux. 851 852 Finally, the normalization behavior of some engines can even be controlled through flags, 853 like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier. 854 855 SQLGlot aims to understand and handle all of these different behaviors gracefully, so 856 that it can analyze queries in the optimizer and successfully capture their semantics. 857 """ 858 if ( 859 isinstance(expression, exp.Identifier) 860 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 861 and ( 862 not expression.quoted 863 or self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 864 ) 865 ): 866 expression.set( 867 "this", 868 ( 869 expression.this.upper() 870 if self.normalization_strategy is NormalizationStrategy.UPPERCASE 871 else expression.this.lower() 872 ), 873 ) 874 875 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
877 def case_sensitive(self, text: str) -> bool: 878 """Checks if text contains any case sensitive characters, based on the dialect's rules.""" 879 if self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE: 880 return False 881 882 unsafe = ( 883 str.islower 884 if self.normalization_strategy is NormalizationStrategy.UPPERCASE 885 else str.isupper 886 ) 887 return any(unsafe(char) for char in text)
Checks if text contains any case sensitive characters, based on the dialect's rules.
889 def can_identify(self, text: str, identify: str | bool = "safe") -> bool: 890 """Checks if text can be identified given an identify option. 891 892 Args: 893 text: The text to check. 894 identify: 895 `"always"` or `True`: Always returns `True`. 896 `"safe"`: Only returns `True` if the identifier is case-insensitive. 897 898 Returns: 899 Whether the given text can be identified. 900 """ 901 if identify is True or identify == "always": 902 return True 903 904 if identify == "safe": 905 return not self.case_sensitive(text) 906 907 return False
Checks if text can be identified given an identify option.
Arguments:
- text: The text to check.
- identify:
"always"
orTrue
: Always returnsTrue
."safe"
: Only returnsTrue
if the identifier is case-insensitive.
Returns:
Whether the given text can be identified.
909 def quote_identifier(self, expression: E, identify: bool = True) -> E: 910 """ 911 Adds quotes to a given identifier. 912 913 Args: 914 expression: The expression of interest. If it's not an `Identifier`, this method is a no-op. 915 identify: If set to `False`, the quotes will only be added if the identifier is deemed 916 "unsafe", with respect to its characters and this dialect's normalization strategy. 917 """ 918 if isinstance(expression, exp.Identifier) and not isinstance(expression.parent, exp.Func): 919 name = expression.this 920 expression.set( 921 "quoted", 922 identify or self.case_sensitive(name) or not exp.SAFE_IDENTIFIER_RE.match(name), 923 ) 924 925 return expression
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
927 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 928 if isinstance(path, exp.Literal): 929 path_text = path.name 930 if path.is_number: 931 path_text = f"[{path_text}]" 932 try: 933 return parse_json_path(path_text, self) 934 except ParseError as e: 935 if self.STRICT_JSON_PATH_SYNTAX: 936 logger.warning(f"Invalid JSON path syntax. {str(e)}") 937 938 return path
987def if_sql( 988 name: str = "IF", false_value: t.Optional[exp.Expression | str] = None 989) -> t.Callable[[Generator, exp.If], str]: 990 def _if_sql(self: Generator, expression: exp.If) -> str: 991 return self.func( 992 name, 993 expression.this, 994 expression.args.get("true"), 995 expression.args.get("false") or false_value, 996 ) 997 998 return _if_sql
1001def arrow_json_extract_sql(self: Generator, expression: JSON_EXTRACT_TYPE) -> str: 1002 this = expression.this 1003 if self.JSON_TYPE_REQUIRED_FOR_EXTRACTION and isinstance(this, exp.Literal) and this.is_string: 1004 this.replace(exp.cast(this, exp.DataType.Type.JSON)) 1005 1006 return self.binary(expression, "->" if isinstance(expression, exp.JSONExtract) else "->>")
1076def str_position_sql( 1077 self: Generator, 1078 expression: exp.StrPosition, 1079 generate_instance: bool = False, 1080 str_position_func_name: str = "STRPOS", 1081) -> str: 1082 this = self.sql(expression, "this") 1083 substr = self.sql(expression, "substr") 1084 position = self.sql(expression, "position") 1085 instance = expression.args.get("instance") if generate_instance else None 1086 position_offset = "" 1087 1088 if position: 1089 # Normalize third 'pos' argument into 'SUBSTR(..) + offset' across dialects 1090 this = self.func("SUBSTR", this, position) 1091 position_offset = f" + {position} - 1" 1092 1093 return self.func(str_position_func_name, this, substr, instance) + position_offset
1102def var_map_sql( 1103 self: Generator, expression: exp.Map | exp.VarMap, map_func_name: str = "MAP" 1104) -> str: 1105 keys = expression.args["keys"] 1106 values = expression.args["values"] 1107 1108 if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array): 1109 self.unsupported("Cannot convert array columns into map.") 1110 return self.func(map_func_name, keys, values) 1111 1112 args = [] 1113 for key, value in zip(keys.expressions, values.expressions): 1114 args.append(self.sql(key)) 1115 args.append(self.sql(value)) 1116 1117 return self.func(map_func_name, *args)
1120def build_formatted_time( 1121 exp_class: t.Type[E], dialect: str, default: t.Optional[bool | str] = None 1122) -> t.Callable[[t.List], E]: 1123 """Helper used for time expressions. 1124 1125 Args: 1126 exp_class: the expression class to instantiate. 1127 dialect: target sql dialect. 1128 default: the default format, True being time. 1129 1130 Returns: 1131 A callable that can be used to return the appropriately formatted time expression. 1132 """ 1133 1134 def _builder(args: t.List): 1135 return exp_class( 1136 this=seq_get(args, 0), 1137 format=Dialect[dialect].format_time( 1138 seq_get(args, 1) 1139 or (Dialect[dialect].TIME_FORMAT if default is True else default or None) 1140 ), 1141 ) 1142 1143 return _builder
Helper used for time expressions.
Arguments:
- exp_class: the expression class to instantiate.
- dialect: target sql dialect.
- default: the default format, True being time.
Returns:
A callable that can be used to return the appropriately formatted time expression.
1146def time_format( 1147 dialect: DialectType = None, 1148) -> t.Callable[[Generator, exp.UnixToStr | exp.StrToUnix], t.Optional[str]]: 1149 def _time_format(self: Generator, expression: exp.UnixToStr | exp.StrToUnix) -> t.Optional[str]: 1150 """ 1151 Returns the time format for a given expression, unless it's equivalent 1152 to the default time format of the dialect of interest. 1153 """ 1154 time_format = self.format_time(expression) 1155 return time_format if time_format != Dialect.get_or_raise(dialect).TIME_FORMAT else None 1156 1157 return _time_format
1160def build_date_delta( 1161 exp_class: t.Type[E], 1162 unit_mapping: t.Optional[t.Dict[str, str]] = None, 1163 default_unit: t.Optional[str] = "DAY", 1164) -> t.Callable[[t.List], E]: 1165 def _builder(args: t.List) -> E: 1166 unit_based = len(args) == 3 1167 this = args[2] if unit_based else seq_get(args, 0) 1168 unit = None 1169 if unit_based or default_unit: 1170 unit = args[0] if unit_based else exp.Literal.string(default_unit) 1171 unit = exp.var(unit_mapping.get(unit.name.lower(), unit.name)) if unit_mapping else unit 1172 return exp_class(this=this, expression=seq_get(args, 1), unit=unit) 1173 1174 return _builder
1177def build_date_delta_with_interval( 1178 expression_class: t.Type[E], 1179) -> t.Callable[[t.List], t.Optional[E]]: 1180 def _builder(args: t.List) -> t.Optional[E]: 1181 if len(args) < 2: 1182 return None 1183 1184 interval = args[1] 1185 1186 if not isinstance(interval, exp.Interval): 1187 raise ParseError(f"INTERVAL expression expected but got '{interval}'") 1188 1189 return expression_class(this=args[0], expression=interval.this, unit=unit_to_str(interval)) 1190 1191 return _builder
1194def date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 1195 unit = seq_get(args, 0) 1196 this = seq_get(args, 1) 1197 1198 if isinstance(this, exp.Cast) and this.is_type("date"): 1199 return exp.DateTrunc(unit=unit, this=this) 1200 return exp.TimestampTrunc(this=this, unit=unit)
1203def date_add_interval_sql( 1204 data_type: str, kind: str 1205) -> t.Callable[[Generator, exp.Expression], str]: 1206 def func(self: Generator, expression: exp.Expression) -> str: 1207 this = self.sql(expression, "this") 1208 interval = exp.Interval(this=expression.expression, unit=unit_to_var(expression)) 1209 return f"{data_type}_{kind}({this}, {self.sql(interval)})" 1210 1211 return func
1214def timestamptrunc_sql(zone: bool = False) -> t.Callable[[Generator, exp.TimestampTrunc], str]: 1215 def _timestamptrunc_sql(self: Generator, expression: exp.TimestampTrunc) -> str: 1216 args = [unit_to_str(expression), expression.this] 1217 if zone: 1218 args.append(expression.args.get("zone")) 1219 return self.func("DATE_TRUNC", *args) 1220 1221 return _timestamptrunc_sql
1224def no_timestamp_sql(self: Generator, expression: exp.Timestamp) -> str: 1225 zone = expression.args.get("zone") 1226 if not zone: 1227 from sqlglot.optimizer.annotate_types import annotate_types 1228 1229 target_type = annotate_types(expression).type or exp.DataType.Type.TIMESTAMP 1230 return self.sql(exp.cast(expression.this, target_type)) 1231 if zone.name.lower() in TIMEZONES: 1232 return self.sql( 1233 exp.AtTimeZone( 1234 this=exp.cast(expression.this, exp.DataType.Type.TIMESTAMP), 1235 zone=zone, 1236 ) 1237 ) 1238 return self.func("TIMESTAMP", expression.this, zone)
1241def no_time_sql(self: Generator, expression: exp.Time) -> str: 1242 # Transpile BQ's TIME(timestamp, zone) to CAST(TIMESTAMPTZ <timestamp> AT TIME ZONE <zone> AS TIME) 1243 this = exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ) 1244 expr = exp.cast( 1245 exp.AtTimeZone(this=this, zone=expression.args.get("zone")), exp.DataType.Type.TIME 1246 ) 1247 return self.sql(expr)
1250def no_datetime_sql(self: Generator, expression: exp.Datetime) -> str: 1251 this = expression.this 1252 expr = expression.expression 1253 1254 if expr.name.lower() in TIMEZONES: 1255 # Transpile BQ's DATETIME(timestamp, zone) to CAST(TIMESTAMPTZ <timestamp> AT TIME ZONE <zone> AS TIMESTAMP) 1256 this = exp.cast(this, exp.DataType.Type.TIMESTAMPTZ) 1257 this = exp.cast(exp.AtTimeZone(this=this, zone=expr), exp.DataType.Type.TIMESTAMP) 1258 return self.sql(this) 1259 1260 this = exp.cast(this, exp.DataType.Type.DATE) 1261 expr = exp.cast(expr, exp.DataType.Type.TIME) 1262 1263 return self.sql(exp.cast(exp.Add(this=this, expression=expr), exp.DataType.Type.TIMESTAMP))
1295def timestrtotime_sql( 1296 self: Generator, 1297 expression: exp.TimeStrToTime, 1298 include_precision: bool = False, 1299) -> str: 1300 datatype = exp.DataType.build( 1301 exp.DataType.Type.TIMESTAMPTZ 1302 if expression.args.get("zone") 1303 else exp.DataType.Type.TIMESTAMP 1304 ) 1305 1306 if isinstance(expression.this, exp.Literal) and include_precision: 1307 precision = subsecond_precision(expression.this.name) 1308 if precision > 0: 1309 datatype = exp.DataType.build( 1310 datatype.this, expressions=[exp.DataTypeParam(this=exp.Literal.number(precision))] 1311 ) 1312 1313 return self.sql(exp.cast(expression.this, datatype, dialect=self.dialect))
1321def encode_decode_sql( 1322 self: Generator, expression: exp.Expression, name: str, replace: bool = True 1323) -> str: 1324 charset = expression.args.get("charset") 1325 if charset and charset.name.lower() != "utf-8": 1326 self.unsupported(f"Expected utf-8 character set, got {charset}.") 1327 1328 return self.func(name, expression.this, expression.args.get("replace") if replace else None)
1341def count_if_to_sum(self: Generator, expression: exp.CountIf) -> str: 1342 cond = expression.this 1343 1344 if isinstance(expression.this, exp.Distinct): 1345 cond = expression.this.expressions[0] 1346 self.unsupported("DISTINCT is not supported when converting COUNT_IF to SUM") 1347 1348 return self.func("sum", exp.func("if", cond, 1, 0))
1351def trim_sql(self: Generator, expression: exp.Trim) -> str: 1352 target = self.sql(expression, "this") 1353 trim_type = self.sql(expression, "position") 1354 remove_chars = self.sql(expression, "expression") 1355 collation = self.sql(expression, "collation") 1356 1357 # Use TRIM/LTRIM/RTRIM syntax if the expression isn't database-specific 1358 if not remove_chars: 1359 return self.trim_sql(expression) 1360 1361 trim_type = f"{trim_type} " if trim_type else "" 1362 remove_chars = f"{remove_chars} " if remove_chars else "" 1363 from_part = "FROM " if trim_type or remove_chars else "" 1364 collation = f" COLLATE {collation}" if collation else "" 1365 return f"TRIM({trim_type}{remove_chars}{from_part}{target}{collation})"
1386@unsupported_args("position", "occurrence", "parameters") 1387def regexp_extract_sql( 1388 self: Generator, expression: exp.RegexpExtract | exp.RegexpExtractAll 1389) -> str: 1390 group = expression.args.get("group") 1391 1392 # Do not render group if it's the default value for this dialect 1393 if group and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP): 1394 group = None 1395 1396 return self.func(expression.sql_name(), expression.this, expression.expression, group)