sqlglot.dialects.postgres
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 DATE_ADD_OR_SUB, 8 Dialect, 9 JSON_EXTRACT_TYPE, 10 any_value_to_max_sql, 11 binary_from_function, 12 bool_xor_sql, 13 datestrtodate_sql, 14 build_formatted_time, 15 filter_array_using_unnest, 16 getbit_sql, 17 inline_array_sql, 18 json_extract_segments, 19 json_path_key_only_name, 20 max_or_greatest, 21 merge_without_target_sql, 22 min_or_least, 23 no_last_day_sql, 24 no_map_from_entries_sql, 25 no_paren_current_date_sql, 26 no_pivot_sql, 27 no_trycast_sql, 28 build_json_extract_path, 29 build_timestamp_trunc, 30 rename_func, 31 sha256_sql, 32 struct_extract_sql, 33 timestamptrunc_sql, 34 timestrtotime_sql, 35 trim_sql, 36 ts_or_ds_add_cast, 37 strposition_sql, 38 count_if_to_sum, 39 groupconcat_sql, 40 regexp_replace_global_modifier, 41 sha2_digest_sql, 42) 43from sqlglot.generator import unsupported_args 44from sqlglot.helper import is_int, seq_get 45from sqlglot.parser import binary_range_parser 46from sqlglot.tokens import TokenType 47 48if t.TYPE_CHECKING: 49 from sqlglot.dialects.dialect import DialectType 50 51 52DATE_DIFF_FACTOR = { 53 "MICROSECOND": " * 1000000", 54 "MILLISECOND": " * 1000", 55 "SECOND": "", 56 "MINUTE": " / 60", 57 "HOUR": " / 3600", 58 "DAY": " / 86400", 59} 60 61 62def _date_add_sql(kind: str) -> t.Callable[[Postgres.Generator, DATE_ADD_OR_SUB], str]: 63 def func(self: Postgres.Generator, expression: DATE_ADD_OR_SUB) -> str: 64 if isinstance(expression, exp.TsOrDsAdd): 65 expression = ts_or_ds_add_cast(expression) 66 67 this = self.sql(expression, "this") 68 unit = expression.args.get("unit") 69 70 e = self._simplify_unless_literal(expression.expression) 71 if isinstance(e, exp.Literal): 72 e.set("is_string", True) 73 elif e.is_number: 74 e = exp.Literal.string(e.to_py()) 75 else: 76 self.unsupported("Cannot add non literal") 77 78 return f"{this} {kind} {self.sql(exp.Interval(this=e, unit=unit))}" 79 80 return func 81 82 83def _date_diff_sql(self: Postgres.Generator, expression: exp.DateDiff) -> str: 84 unit = expression.text("unit").upper() 85 factor = DATE_DIFF_FACTOR.get(unit) 86 87 end = f"CAST({self.sql(expression, 'this')} AS TIMESTAMP)" 88 start = f"CAST({self.sql(expression, 'expression')} AS TIMESTAMP)" 89 90 if factor is not None: 91 return f"CAST(EXTRACT(epoch FROM {end} - {start}){factor} AS BIGINT)" 92 93 age = f"AGE({end}, {start})" 94 95 if unit == "WEEK": 96 unit = f"EXTRACT(days FROM ({end} - {start})) / 7" 97 elif unit == "MONTH": 98 unit = f"EXTRACT(year FROM {age}) * 12 + EXTRACT(month FROM {age})" 99 elif unit == "QUARTER": 100 unit = f"EXTRACT(year FROM {age}) * 4 + EXTRACT(month FROM {age}) / 3" 101 elif unit == "YEAR": 102 unit = f"EXTRACT(year FROM {age})" 103 else: 104 unit = age 105 106 return f"CAST({unit} AS BIGINT)" 107 108 109def _substring_sql(self: Postgres.Generator, expression: exp.Substring) -> str: 110 this = self.sql(expression, "this") 111 start = self.sql(expression, "start") 112 length = self.sql(expression, "length") 113 114 from_part = f" FROM {start}" if start else "" 115 for_part = f" FOR {length}" if length else "" 116 117 return f"SUBSTRING({this}{from_part}{for_part})" 118 119 120def _auto_increment_to_serial(expression: exp.Expression) -> exp.Expression: 121 auto = expression.find(exp.AutoIncrementColumnConstraint) 122 123 if auto: 124 expression.args["constraints"].remove(auto.parent) 125 kind = expression.args["kind"] 126 127 if kind.this == exp.DataType.Type.INT: 128 kind.replace(exp.DataType(this=exp.DataType.Type.SERIAL)) 129 elif kind.this == exp.DataType.Type.SMALLINT: 130 kind.replace(exp.DataType(this=exp.DataType.Type.SMALLSERIAL)) 131 elif kind.this == exp.DataType.Type.BIGINT: 132 kind.replace(exp.DataType(this=exp.DataType.Type.BIGSERIAL)) 133 134 return expression 135 136 137def _serial_to_generated(expression: exp.Expression) -> exp.Expression: 138 if not isinstance(expression, exp.ColumnDef): 139 return expression 140 kind = expression.kind 141 if not kind: 142 return expression 143 144 if kind.this == exp.DataType.Type.SERIAL: 145 data_type = exp.DataType(this=exp.DataType.Type.INT) 146 elif kind.this == exp.DataType.Type.SMALLSERIAL: 147 data_type = exp.DataType(this=exp.DataType.Type.SMALLINT) 148 elif kind.this == exp.DataType.Type.BIGSERIAL: 149 data_type = exp.DataType(this=exp.DataType.Type.BIGINT) 150 else: 151 data_type = None 152 153 if data_type: 154 expression.args["kind"].replace(data_type) 155 constraints = expression.args["constraints"] 156 generated = exp.ColumnConstraint(kind=exp.GeneratedAsIdentityColumnConstraint(this=False)) 157 notnull = exp.ColumnConstraint(kind=exp.NotNullColumnConstraint()) 158 159 if notnull not in constraints: 160 constraints.insert(0, notnull) 161 if generated not in constraints: 162 constraints.insert(0, generated) 163 164 return expression 165 166 167def _build_generate_series(args: t.List) -> exp.ExplodingGenerateSeries: 168 # The goal is to convert step values like '1 day' or INTERVAL '1 day' into INTERVAL '1' day 169 # Note: postgres allows calls with just two arguments -- the "step" argument defaults to 1 170 step = seq_get(args, 2) 171 if step is not None: 172 if step.is_string: 173 args[2] = exp.to_interval(step.this) 174 elif isinstance(step, exp.Interval) and not step.args.get("unit"): 175 args[2] = exp.to_interval(step.this.this) 176 177 return exp.ExplodingGenerateSeries.from_arg_list(args) 178 179 180def _build_to_timestamp(args: t.List) -> exp.UnixToTime | exp.StrToTime: 181 # TO_TIMESTAMP accepts either a single double argument or (text, text) 182 if len(args) == 1: 183 # https://www.postgresql.org/docs/current/functions-datetime.html#FUNCTIONS-DATETIME-TABLE 184 return exp.UnixToTime.from_arg_list(args) 185 186 # https://www.postgresql.org/docs/current/functions-formatting.html 187 return build_formatted_time(exp.StrToTime, "postgres")(args) 188 189 190def _json_extract_sql( 191 name: str, op: str 192) -> t.Callable[[Postgres.Generator, JSON_EXTRACT_TYPE], str]: 193 def _generate(self: Postgres.Generator, expression: JSON_EXTRACT_TYPE) -> str: 194 if expression.args.get("only_json_types"): 195 return json_extract_segments(name, quoted_index=False, op=op)(self, expression) 196 return json_extract_segments(name)(self, expression) 197 198 return _generate 199 200 201def _build_regexp_replace(args: t.List, dialect: DialectType = None) -> exp.RegexpReplace: 202 # The signature of REGEXP_REPLACE is: 203 # regexp_replace(source, pattern, replacement [, start [, N ]] [, flags ]) 204 # 205 # Any one of `start`, `N` and `flags` can be column references, meaning that 206 # unless we can statically see that the last argument is a non-integer string 207 # (eg. not '0'), then it's not possible to construct the correct AST 208 regexp_replace = None 209 if len(args) > 3: 210 last = args[-1] 211 if not is_int(last.name): 212 if not last.type or last.is_type(exp.DataType.Type.UNKNOWN, exp.DataType.Type.NULL): 213 from sqlglot.optimizer.annotate_types import annotate_types 214 215 last = annotate_types(last, dialect=dialect) 216 217 if last.is_type(*exp.DataType.TEXT_TYPES): 218 regexp_replace = exp.RegexpReplace.from_arg_list(args[:-1]) 219 regexp_replace.set("modifiers", last) 220 221 regexp_replace = regexp_replace or exp.RegexpReplace.from_arg_list(args) 222 regexp_replace.set("single_replace", True) 223 return regexp_replace 224 225 226def _unix_to_time_sql(self: Postgres.Generator, expression: exp.UnixToTime) -> str: 227 scale = expression.args.get("scale") 228 timestamp = expression.this 229 230 if scale in (None, exp.UnixToTime.SECONDS): 231 return self.func("TO_TIMESTAMP", timestamp, self.format_time(expression)) 232 233 return self.func( 234 "TO_TIMESTAMP", 235 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), 236 self.format_time(expression), 237 ) 238 239 240def _build_levenshtein_less_equal(args: t.List) -> exp.Levenshtein: 241 # Postgres has two signatures for levenshtein_less_equal function, but in both cases 242 # max_dist is the last argument 243 # levenshtein_less_equal(source, target, ins_cost, del_cost, sub_cost, max_d) 244 # levenshtein_less_equal(source, target, max_d) 245 max_dist = args.pop() 246 247 return exp.Levenshtein( 248 this=seq_get(args, 0), 249 expression=seq_get(args, 1), 250 ins_cost=seq_get(args, 2), 251 del_cost=seq_get(args, 3), 252 sub_cost=seq_get(args, 4), 253 max_dist=max_dist, 254 ) 255 256 257def _levenshtein_sql(self: Postgres.Generator, expression: exp.Levenshtein) -> str: 258 name = "LEVENSHTEIN_LESS_EQUAL" if expression.args.get("max_dist") else "LEVENSHTEIN" 259 260 return rename_func(name)(self, expression) 261 262 263def _versioned_anyvalue_sql(self: Postgres.Generator, expression: exp.AnyValue) -> str: 264 # https://www.postgresql.org/docs/16/functions-aggregate.html 265 # https://www.postgresql.org/about/featurematrix/ 266 if self.dialect.version < (16,): 267 return any_value_to_max_sql(self, expression) 268 269 return rename_func("ANY_VALUE")(self, expression) 270 271 272def _round_sql(self: Postgres.Generator, expression: exp.Round) -> str: 273 this = self.sql(expression, "this") 274 decimals = self.sql(expression, "decimals") 275 276 if not decimals: 277 return self.func("ROUND", this) 278 279 if not expression.type: 280 from sqlglot.optimizer.annotate_types import annotate_types 281 282 expression = annotate_types(expression, dialect=self.dialect) 283 284 # ROUND(double precision, integer) is not permitted in Postgres 285 # so it's necessary to cast to decimal before rounding. 286 if expression.this.is_type(exp.DataType.Type.DOUBLE): 287 decimal_type = exp.DataType.build( 288 exp.DataType.Type.DECIMAL, expressions=expression.expressions 289 ) 290 this = self.sql(exp.Cast(this=this, to=decimal_type)) 291 292 return self.func("ROUND", this, decimals) 293 294 295class Postgres(Dialect): 296 INDEX_OFFSET = 1 297 TYPED_DIVISION = True 298 CONCAT_COALESCE = True 299 NULL_ORDERING = "nulls_are_large" 300 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 301 TABLESAMPLE_SIZE_IS_PERCENT = True 302 TABLES_REFERENCEABLE_AS_COLUMNS = True 303 304 DEFAULT_FUNCTIONS_COLUMN_NAMES = { 305 exp.ExplodingGenerateSeries: "generate_series", 306 } 307 308 TIME_MAPPING = { 309 "d": "%u", # 1-based day of week 310 "D": "%u", # 1-based day of week 311 "dd": "%d", # day of month 312 "DD": "%d", # day of month 313 "ddd": "%j", # zero padded day of year 314 "DDD": "%j", # zero padded day of year 315 "FMDD": "%-d", # - is no leading zero for Python; same for FM in postgres 316 "FMDDD": "%-j", # day of year 317 "FMHH12": "%-I", # 9 318 "FMHH24": "%-H", # 9 319 "FMMI": "%-M", # Minute 320 "FMMM": "%-m", # 1 321 "FMSS": "%-S", # Second 322 "HH12": "%I", # 09 323 "HH24": "%H", # 09 324 "mi": "%M", # zero padded minute 325 "MI": "%M", # zero padded minute 326 "mm": "%m", # 01 327 "MM": "%m", # 01 328 "OF": "%z", # utc offset 329 "ss": "%S", # zero padded second 330 "SS": "%S", # zero padded second 331 "TMDay": "%A", # TM is locale dependent 332 "TMDy": "%a", 333 "TMMon": "%b", # Sep 334 "TMMonth": "%B", # September 335 "TZ": "%Z", # uppercase timezone name 336 "US": "%f", # zero padded microsecond 337 "ww": "%U", # 1-based week of year 338 "WW": "%U", # 1-based week of year 339 "yy": "%y", # 15 340 "YY": "%y", # 15 341 "yyyy": "%Y", # 2015 342 "YYYY": "%Y", # 2015 343 } 344 345 class Tokenizer(tokens.Tokenizer): 346 BIT_STRINGS = [("b'", "'"), ("B'", "'")] 347 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 348 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 349 HEREDOC_STRINGS = ["$"] 350 351 HEREDOC_TAG_IS_IDENTIFIER = True 352 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 353 354 KEYWORDS = { 355 **tokens.Tokenizer.KEYWORDS, 356 "~": TokenType.RLIKE, 357 "@@": TokenType.DAT, 358 "@>": TokenType.AT_GT, 359 "<@": TokenType.LT_AT, 360 "?&": TokenType.QMARK_AMP, 361 "?|": TokenType.QMARK_PIPE, 362 "#-": TokenType.HASH_DASH, 363 "|/": TokenType.PIPE_SLASH, 364 "||/": TokenType.DPIPE_SLASH, 365 "BEGIN": TokenType.BEGIN, 366 "BIGSERIAL": TokenType.BIGSERIAL, 367 "CONSTRAINT TRIGGER": TokenType.COMMAND, 368 "CSTRING": TokenType.PSEUDO_TYPE, 369 "DECLARE": TokenType.COMMAND, 370 "DO": TokenType.COMMAND, 371 "EXEC": TokenType.COMMAND, 372 "HSTORE": TokenType.HSTORE, 373 "INT8": TokenType.BIGINT, 374 "MONEY": TokenType.MONEY, 375 "NAME": TokenType.NAME, 376 "OID": TokenType.OBJECT_IDENTIFIER, 377 "ONLY": TokenType.ONLY, 378 "POINT": TokenType.POINT, 379 "REFRESH": TokenType.COMMAND, 380 "REINDEX": TokenType.COMMAND, 381 "RESET": TokenType.COMMAND, 382 "SERIAL": TokenType.SERIAL, 383 "SMALLSERIAL": TokenType.SMALLSERIAL, 384 "TEMP": TokenType.TEMPORARY, 385 "REGCLASS": TokenType.OBJECT_IDENTIFIER, 386 "REGCOLLATION": TokenType.OBJECT_IDENTIFIER, 387 "REGCONFIG": TokenType.OBJECT_IDENTIFIER, 388 "REGDICTIONARY": TokenType.OBJECT_IDENTIFIER, 389 "REGNAMESPACE": TokenType.OBJECT_IDENTIFIER, 390 "REGOPER": TokenType.OBJECT_IDENTIFIER, 391 "REGOPERATOR": TokenType.OBJECT_IDENTIFIER, 392 "REGPROC": TokenType.OBJECT_IDENTIFIER, 393 "REGPROCEDURE": TokenType.OBJECT_IDENTIFIER, 394 "REGROLE": TokenType.OBJECT_IDENTIFIER, 395 "REGTYPE": TokenType.OBJECT_IDENTIFIER, 396 "FLOAT": TokenType.DOUBLE, 397 "XML": TokenType.XML, 398 } 399 KEYWORDS.pop("/*+") 400 KEYWORDS.pop("DIV") 401 402 SINGLE_TOKENS = { 403 **tokens.Tokenizer.SINGLE_TOKENS, 404 "$": TokenType.HEREDOC_STRING, 405 } 406 407 VAR_SINGLE_TOKENS = {"$"} 408 409 class Parser(parser.Parser): 410 SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT = True 411 412 PROPERTY_PARSERS = { 413 **parser.Parser.PROPERTY_PARSERS, 414 "SET": lambda self: self.expression(exp.SetConfigProperty, this=self._parse_set()), 415 } 416 PROPERTY_PARSERS.pop("INPUT") 417 418 PLACEHOLDER_PARSERS = { 419 **parser.Parser.PLACEHOLDER_PARSERS, 420 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder, jdbc=True), 421 TokenType.MOD: lambda self: self._parse_query_parameter(), 422 } 423 424 FUNCTIONS = { 425 **parser.Parser.FUNCTIONS, 426 "ARRAY_PREPEND": lambda args: exp.ArrayPrepend( 427 this=seq_get(args, 1), expression=seq_get(args, 0) 428 ), 429 "BIT_AND": exp.BitwiseAndAgg.from_arg_list, 430 "BIT_OR": exp.BitwiseOrAgg.from_arg_list, 431 "BIT_XOR": exp.BitwiseXorAgg.from_arg_list, 432 "DATE_TRUNC": build_timestamp_trunc, 433 "DIV": lambda args: exp.cast( 434 binary_from_function(exp.IntDiv)(args), exp.DataType.Type.DECIMAL 435 ), 436 "GENERATE_SERIES": _build_generate_series, 437 "GET_BIT": lambda args: exp.Getbit( 438 this=seq_get(args, 0), expression=seq_get(args, 1), zero_is_msb=True 439 ), 440 "JSON_EXTRACT_PATH": build_json_extract_path(exp.JSONExtract), 441 "JSON_EXTRACT_PATH_TEXT": build_json_extract_path(exp.JSONExtractScalar), 442 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), encoding=seq_get(args, 1)), 443 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 444 "MAKE_TIMESTAMP": exp.TimestampFromParts.from_arg_list, 445 "NOW": exp.CurrentTimestamp.from_arg_list, 446 "REGEXP_REPLACE": _build_regexp_replace, 447 "TO_CHAR": build_formatted_time(exp.TimeToStr, "postgres"), 448 "TO_DATE": build_formatted_time(exp.StrToDate, "postgres"), 449 "TO_TIMESTAMP": _build_to_timestamp, 450 "UNNEST": exp.Explode.from_arg_list, 451 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 452 "SHA384": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(384)), 453 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 454 "LEVENSHTEIN_LESS_EQUAL": _build_levenshtein_less_equal, 455 "JSON_OBJECT_AGG": lambda args: exp.JSONObjectAgg(expressions=args), 456 "JSONB_OBJECT_AGG": exp.JSONBObjectAgg.from_arg_list, 457 "WIDTH_BUCKET": lambda args: exp.WidthBucket( 458 this=seq_get(args, 0), threshold=seq_get(args, 1) 459 ) 460 if len(args) == 2 461 else exp.WidthBucket.from_arg_list(args), 462 } 463 464 NO_PAREN_FUNCTIONS = { 465 **parser.Parser.NO_PAREN_FUNCTIONS, 466 TokenType.CURRENT_SCHEMA: exp.CurrentSchema, 467 } 468 469 FUNCTION_PARSERS = { 470 **parser.Parser.FUNCTION_PARSERS, 471 "DATE_PART": lambda self: self._parse_date_part(), 472 "JSON_AGG": lambda self: self.expression( 473 exp.JSONArrayAgg, 474 this=self._parse_lambda(), 475 order=self._parse_order(), 476 ), 477 "JSONB_EXISTS": lambda self: self._parse_jsonb_exists(), 478 } 479 480 BITWISE = { 481 **parser.Parser.BITWISE, 482 TokenType.HASH: exp.BitwiseXor, 483 } 484 485 EXPONENT = { 486 TokenType.CARET: exp.Pow, 487 } 488 489 RANGE_PARSERS = { 490 **parser.Parser.RANGE_PARSERS, 491 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 492 TokenType.DAT: lambda self, this: self.expression( 493 exp.MatchAgainst, this=self._parse_bitwise(), expressions=[this] 494 ), 495 } 496 497 STATEMENT_PARSERS = { 498 **parser.Parser.STATEMENT_PARSERS, 499 TokenType.END: lambda self: self._parse_commit_or_rollback(), 500 } 501 502 JSON_ARROWS_REQUIRE_JSON_TYPE = True 503 504 COLUMN_OPERATORS = { 505 **parser.Parser.COLUMN_OPERATORS, 506 TokenType.ARROW: lambda self, this, path: self.validate_expression( 507 build_json_extract_path( 508 exp.JSONExtract, arrow_req_json_type=self.JSON_ARROWS_REQUIRE_JSON_TYPE 509 )([this, path]) 510 ), 511 TokenType.DARROW: lambda self, this, path: self.validate_expression( 512 build_json_extract_path( 513 exp.JSONExtractScalar, arrow_req_json_type=self.JSON_ARROWS_REQUIRE_JSON_TYPE 514 )([this, path]) 515 ), 516 } 517 518 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 519 this = ( 520 self._parse_wrapped(self._parse_id_var) 521 if self._match(TokenType.L_PAREN, advance=False) 522 else None 523 ) 524 self._match_text_seq("S") 525 return self.expression(exp.Placeholder, this=this) 526 527 def _parse_date_part(self) -> exp.Expression: 528 part = self._parse_type() 529 self._match(TokenType.COMMA) 530 value = self._parse_bitwise() 531 532 if part and isinstance(part, (exp.Column, exp.Literal)): 533 part = exp.var(part.name) 534 535 return self.expression(exp.Extract, this=part, expression=value) 536 537 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 538 return None 539 540 def _parse_jsonb_exists(self) -> exp.JSONBExists: 541 return self.expression( 542 exp.JSONBExists, 543 this=self._parse_bitwise(), 544 path=self._match(TokenType.COMMA) 545 and self.dialect.to_json_path(self._parse_bitwise()), 546 ) 547 548 def _parse_generated_as_identity( 549 self, 550 ) -> ( 551 exp.GeneratedAsIdentityColumnConstraint 552 | exp.ComputedColumnConstraint 553 | exp.GeneratedAsRowColumnConstraint 554 ): 555 this = super()._parse_generated_as_identity() 556 557 if self._match_text_seq("STORED"): 558 this = self.expression(exp.ComputedColumnConstraint, this=this.expression) 559 560 return this 561 562 def _parse_user_defined_type( 563 self, identifier: exp.Identifier 564 ) -> t.Optional[exp.Expression]: 565 udt_type: exp.Identifier | exp.Dot = identifier 566 567 while self._match(TokenType.DOT): 568 part = self._parse_id_var() 569 if part: 570 udt_type = exp.Dot(this=udt_type, expression=part) 571 572 return exp.DataType.build(udt_type, udt=True) 573 574 class Generator(generator.Generator): 575 SINGLE_STRING_INTERVAL = True 576 RENAME_TABLE_WITH_DB = False 577 LOCKING_READS_SUPPORTED = True 578 JOIN_HINTS = False 579 TABLE_HINTS = False 580 QUERY_HINTS = False 581 NVL2_SUPPORTED = False 582 PARAMETER_TOKEN = "$" 583 NAMED_PLACEHOLDER_TOKEN = "%" 584 TABLESAMPLE_SIZE_IS_ROWS = False 585 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 586 SUPPORTS_SELECT_INTO = True 587 JSON_TYPE_REQUIRED_FOR_EXTRACTION = True 588 SUPPORTS_UNLOGGED_TABLES = True 589 LIKE_PROPERTY_INSIDE_SCHEMA = True 590 MULTI_ARG_DISTINCT = False 591 CAN_IMPLEMENT_ARRAY_ANY = True 592 SUPPORTS_WINDOW_EXCLUDE = True 593 COPY_HAS_INTO_KEYWORD = False 594 ARRAY_CONCAT_IS_VAR_LEN = False 595 SUPPORTS_MEDIAN = False 596 ARRAY_SIZE_DIM_REQUIRED = True 597 SUPPORTS_BETWEEN_FLAGS = True 598 599 SUPPORTED_JSON_PATH_PARTS = { 600 exp.JSONPathKey, 601 exp.JSONPathRoot, 602 exp.JSONPathSubscript, 603 } 604 605 TYPE_MAPPING = { 606 **generator.Generator.TYPE_MAPPING, 607 exp.DataType.Type.TINYINT: "SMALLINT", 608 exp.DataType.Type.FLOAT: "REAL", 609 exp.DataType.Type.DOUBLE: "DOUBLE PRECISION", 610 exp.DataType.Type.BINARY: "BYTEA", 611 exp.DataType.Type.VARBINARY: "BYTEA", 612 exp.DataType.Type.ROWVERSION: "BYTEA", 613 exp.DataType.Type.DATETIME: "TIMESTAMP", 614 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 615 exp.DataType.Type.BLOB: "BYTEA", 616 } 617 618 TRANSFORMS = { 619 **generator.Generator.TRANSFORMS, 620 exp.AnyValue: _versioned_anyvalue_sql, 621 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 622 exp.ArrayFilter: filter_array_using_unnest, 623 exp.ArrayPrepend: lambda self, e: self.func("ARRAY_PREPEND", e.expression, e.this), 624 exp.BitwiseAndAgg: rename_func("BIT_AND"), 625 exp.BitwiseOrAgg: rename_func("BIT_OR"), 626 exp.BitwiseXor: lambda self, e: self.binary(e, "#"), 627 exp.BitwiseXorAgg: rename_func("BIT_XOR"), 628 exp.ColumnDef: transforms.preprocess([_auto_increment_to_serial, _serial_to_generated]), 629 exp.CurrentDate: no_paren_current_date_sql, 630 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 631 exp.CurrentUser: lambda *_: "CURRENT_USER", 632 exp.DateAdd: _date_add_sql("+"), 633 exp.DateDiff: _date_diff_sql, 634 exp.DateStrToDate: datestrtodate_sql, 635 exp.DateSub: _date_add_sql("-"), 636 exp.Explode: rename_func("UNNEST"), 637 exp.ExplodingGenerateSeries: rename_func("GENERATE_SERIES"), 638 exp.Getbit: getbit_sql, 639 exp.GroupConcat: lambda self, e: groupconcat_sql( 640 self, e, func_name="STRING_AGG", within_group=False 641 ), 642 exp.IntDiv: rename_func("DIV"), 643 exp.JSONArrayAgg: lambda self, e: self.func( 644 "JSON_AGG", 645 self.sql(e, "this"), 646 suffix=f"{self.sql(e, 'order')})", 647 ), 648 exp.JSONExtract: _json_extract_sql("JSON_EXTRACT_PATH", "->"), 649 exp.JSONExtractScalar: _json_extract_sql("JSON_EXTRACT_PATH_TEXT", "->>"), 650 exp.JSONBExtract: lambda self, e: self.binary(e, "#>"), 651 exp.JSONBExtractScalar: lambda self, e: self.binary(e, "#>>"), 652 exp.JSONBContains: lambda self, e: self.binary(e, "?"), 653 exp.ParseJSON: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.JSON)), 654 exp.JSONPathKey: json_path_key_only_name, 655 exp.JSONPathRoot: lambda *_: "", 656 exp.JSONPathSubscript: lambda self, e: self.json_path_part(e.this), 657 exp.LastDay: no_last_day_sql, 658 exp.LogicalOr: rename_func("BOOL_OR"), 659 exp.LogicalAnd: rename_func("BOOL_AND"), 660 exp.Max: max_or_greatest, 661 exp.MapFromEntries: no_map_from_entries_sql, 662 exp.Min: min_or_least, 663 exp.Merge: merge_without_target_sql, 664 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 665 exp.PercentileCont: transforms.preprocess( 666 [transforms.add_within_group_for_percentiles] 667 ), 668 exp.PercentileDisc: transforms.preprocess( 669 [transforms.add_within_group_for_percentiles] 670 ), 671 exp.Pivot: no_pivot_sql, 672 exp.Rand: rename_func("RANDOM"), 673 exp.RegexpLike: lambda self, e: self.binary(e, "~"), 674 exp.RegexpILike: lambda self, e: self.binary(e, "~*"), 675 exp.RegexpReplace: lambda self, e: self.func( 676 "REGEXP_REPLACE", 677 e.this, 678 e.expression, 679 e.args.get("replacement"), 680 e.args.get("position"), 681 e.args.get("occurrence"), 682 regexp_replace_global_modifier(e), 683 ), 684 exp.Round: _round_sql, 685 exp.Select: transforms.preprocess( 686 [ 687 transforms.eliminate_semi_and_anti_joins, 688 transforms.eliminate_qualify, 689 ] 690 ), 691 exp.SHA2: sha256_sql, 692 exp.SHA2Digest: sha2_digest_sql, 693 exp.StrPosition: lambda self, e: strposition_sql(self, e, func_name="POSITION"), 694 exp.StrToDate: lambda self, e: self.func("TO_DATE", e.this, self.format_time(e)), 695 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 696 exp.StructExtract: struct_extract_sql, 697 exp.Substring: _substring_sql, 698 exp.TimeFromParts: rename_func("MAKE_TIME"), 699 exp.TimestampFromParts: rename_func("MAKE_TIMESTAMP"), 700 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 701 exp.TimeStrToTime: timestrtotime_sql, 702 exp.TimeToStr: lambda self, e: self.func("TO_CHAR", e.this, self.format_time(e)), 703 exp.ToChar: lambda self, e: self.function_fallback_sql(e) 704 if e.args.get("format") 705 else self.tochar_sql(e), 706 exp.Trim: trim_sql, 707 exp.TryCast: no_trycast_sql, 708 exp.TsOrDsAdd: _date_add_sql("+"), 709 exp.TsOrDsDiff: _date_diff_sql, 710 exp.UnixToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this), 711 exp.Uuid: lambda *_: "GEN_RANDOM_UUID()", 712 exp.TimeToUnix: lambda self, e: self.func( 713 "DATE_PART", exp.Literal.string("epoch"), e.this 714 ), 715 exp.VariancePop: rename_func("VAR_POP"), 716 exp.Variance: rename_func("VAR_SAMP"), 717 exp.Xor: bool_xor_sql, 718 exp.Unicode: rename_func("ASCII"), 719 exp.UnixToTime: _unix_to_time_sql, 720 exp.Levenshtein: _levenshtein_sql, 721 exp.JSONObjectAgg: rename_func("JSON_OBJECT_AGG"), 722 exp.JSONBObjectAgg: rename_func("JSONB_OBJECT_AGG"), 723 exp.CountIf: count_if_to_sum, 724 } 725 726 TRANSFORMS.pop(exp.CommentColumnConstraint) 727 728 PROPERTIES_LOCATION = { 729 **generator.Generator.PROPERTIES_LOCATION, 730 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 731 exp.TransientProperty: exp.Properties.Location.UNSUPPORTED, 732 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 733 } 734 735 def schemacommentproperty_sql(self, expression: exp.SchemaCommentProperty) -> str: 736 self.unsupported("Table comments are not supported in the CREATE statement") 737 return "" 738 739 def commentcolumnconstraint_sql(self, expression: exp.CommentColumnConstraint) -> str: 740 self.unsupported("Column comments are not supported in the CREATE statement") 741 return "" 742 743 def unnest_sql(self, expression: exp.Unnest) -> str: 744 if len(expression.expressions) == 1: 745 arg = expression.expressions[0] 746 if isinstance(arg, exp.GenerateDateArray): 747 generate_series: exp.Expression = exp.GenerateSeries(**arg.args) 748 if isinstance(expression.parent, (exp.From, exp.Join)): 749 generate_series = ( 750 exp.select("value::date") 751 .from_(exp.Table(this=generate_series).as_("_t", table=["value"])) 752 .subquery(expression.args.get("alias") or "_unnested_generate_series") 753 ) 754 return self.sql(generate_series) 755 756 from sqlglot.optimizer.annotate_types import annotate_types 757 758 this = annotate_types(arg, dialect=self.dialect) 759 if this.is_type("array<json>"): 760 while isinstance(this, exp.Cast): 761 this = this.this 762 763 arg_as_json = self.sql(exp.cast(this, exp.DataType.Type.JSON)) 764 alias = self.sql(expression, "alias") 765 alias = f" AS {alias}" if alias else "" 766 767 if expression.args.get("offset"): 768 self.unsupported("Unsupported JSON_ARRAY_ELEMENTS with offset") 769 770 return f"JSON_ARRAY_ELEMENTS({arg_as_json}){alias}" 771 772 return super().unnest_sql(expression) 773 774 def bracket_sql(self, expression: exp.Bracket) -> str: 775 """Forms like ARRAY[1, 2, 3][3] aren't allowed; we need to wrap the ARRAY.""" 776 if isinstance(expression.this, exp.Array): 777 expression.set("this", exp.paren(expression.this, copy=False)) 778 779 return super().bracket_sql(expression) 780 781 def matchagainst_sql(self, expression: exp.MatchAgainst) -> str: 782 this = self.sql(expression, "this") 783 expressions = [f"{self.sql(e)} @@ {this}" for e in expression.expressions] 784 sql = " OR ".join(expressions) 785 return f"({sql})" if len(expressions) > 1 else sql 786 787 def alterset_sql(self, expression: exp.AlterSet) -> str: 788 exprs = self.expressions(expression, flat=True) 789 exprs = f"({exprs})" if exprs else "" 790 791 access_method = self.sql(expression, "access_method") 792 access_method = f"ACCESS METHOD {access_method}" if access_method else "" 793 tablespace = self.sql(expression, "tablespace") 794 tablespace = f"TABLESPACE {tablespace}" if tablespace else "" 795 option = self.sql(expression, "option") 796 797 return f"SET {exprs}{access_method}{tablespace}{option}" 798 799 def datatype_sql(self, expression: exp.DataType) -> str: 800 if expression.is_type(exp.DataType.Type.ARRAY): 801 if expression.expressions: 802 values = self.expressions(expression, key="values", flat=True) 803 return f"{self.expressions(expression, flat=True)}[{values}]" 804 return "ARRAY" 805 806 if ( 807 expression.is_type(exp.DataType.Type.DOUBLE, exp.DataType.Type.FLOAT) 808 and expression.expressions 809 ): 810 # Postgres doesn't support precision for REAL and DOUBLE PRECISION types 811 return f"FLOAT({self.expressions(expression, flat=True)})" 812 813 return super().datatype_sql(expression) 814 815 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 816 this = expression.this 817 818 # Postgres casts DIV() to decimal for transpilation but when roundtripping it's superfluous 819 if isinstance(this, exp.IntDiv) and expression.to == exp.DataType.build("decimal"): 820 return self.sql(this) 821 822 return super().cast_sql(expression, safe_prefix=safe_prefix) 823 824 def array_sql(self, expression: exp.Array) -> str: 825 exprs = expression.expressions 826 func_name = self.normalize_func("ARRAY") 827 828 if isinstance(seq_get(exprs, 0), exp.Select): 829 return f"{func_name}({self.sql(exprs[0])})" 830 831 return f"{func_name}{inline_array_sql(self, expression)}" 832 833 def computedcolumnconstraint_sql(self, expression: exp.ComputedColumnConstraint) -> str: 834 return f"GENERATED ALWAYS AS ({self.sql(expression, 'this')}) STORED" 835 836 def isascii_sql(self, expression: exp.IsAscii) -> str: 837 return f"({self.sql(expression.this)} ~ '^[[:ascii:]]*$')" 838 839 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 840 # https://www.postgresql.org/docs/current/functions-window.html 841 self.unsupported("PostgreSQL does not support IGNORE NULLS.") 842 return self.sql(expression.this) 843 844 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 845 # https://www.postgresql.org/docs/current/functions-window.html 846 self.unsupported("PostgreSQL does not support RESPECT NULLS.") 847 return self.sql(expression.this) 848 849 @unsupported_args("this") 850 def currentschema_sql(self, expression: exp.CurrentSchema) -> str: 851 return "CURRENT_SCHEMA" 852 853 def interval_sql(self, expression: exp.Interval) -> str: 854 unit = expression.text("unit").lower() 855 856 if unit.startswith("quarter") and isinstance(expression.this, exp.Literal): 857 expression.this.replace(exp.Literal.number(int(expression.this.to_py()) * 3)) 858 expression.args["unit"].replace(exp.var("MONTH")) 859 860 return super().interval_sql(expression) 861 862 def placeholder_sql(self, expression: exp.Placeholder) -> str: 863 if expression.args.get("jdbc"): 864 return "?" 865 866 this = f"({expression.name})" if expression.this else "" 867 return f"{self.NAMED_PLACEHOLDER_TOKEN}{this}s" 868 869 def arraycontains_sql(self, expression: exp.ArrayContains) -> str: 870 # Convert DuckDB's LIST_CONTAINS(array, value) to PostgreSQL 871 # DuckDB behavior: 872 # - LIST_CONTAINS([1,2,3], 2) -> true 873 # - LIST_CONTAINS([1,2,3], 4) -> false 874 # - LIST_CONTAINS([1,2,NULL], 4) -> false (not NULL) 875 # - LIST_CONTAINS([1,2,3], NULL) -> NULL 876 # 877 # PostgreSQL equivalent: CASE WHEN value IS NULL THEN NULL 878 # ELSE COALESCE(value = ANY(array), FALSE) END 879 value = expression.expression 880 array = expression.this 881 882 coalesce_expr = exp.Coalesce( 883 this=value.eq(exp.Any(this=exp.paren(expression=array, copy=False))), 884 expressions=[exp.false()], 885 ) 886 887 case_expr = ( 888 exp.Case() 889 .when(exp.Is(this=value, expression=exp.null()), exp.null(), copy=False) 890 .else_(coalesce_expr, copy=False) 891 ) 892 893 return self.sql(case_expr)
296class Postgres(Dialect): 297 INDEX_OFFSET = 1 298 TYPED_DIVISION = True 299 CONCAT_COALESCE = True 300 NULL_ORDERING = "nulls_are_large" 301 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 302 TABLESAMPLE_SIZE_IS_PERCENT = True 303 TABLES_REFERENCEABLE_AS_COLUMNS = True 304 305 DEFAULT_FUNCTIONS_COLUMN_NAMES = { 306 exp.ExplodingGenerateSeries: "generate_series", 307 } 308 309 TIME_MAPPING = { 310 "d": "%u", # 1-based day of week 311 "D": "%u", # 1-based day of week 312 "dd": "%d", # day of month 313 "DD": "%d", # day of month 314 "ddd": "%j", # zero padded day of year 315 "DDD": "%j", # zero padded day of year 316 "FMDD": "%-d", # - is no leading zero for Python; same for FM in postgres 317 "FMDDD": "%-j", # day of year 318 "FMHH12": "%-I", # 9 319 "FMHH24": "%-H", # 9 320 "FMMI": "%-M", # Minute 321 "FMMM": "%-m", # 1 322 "FMSS": "%-S", # Second 323 "HH12": "%I", # 09 324 "HH24": "%H", # 09 325 "mi": "%M", # zero padded minute 326 "MI": "%M", # zero padded minute 327 "mm": "%m", # 01 328 "MM": "%m", # 01 329 "OF": "%z", # utc offset 330 "ss": "%S", # zero padded second 331 "SS": "%S", # zero padded second 332 "TMDay": "%A", # TM is locale dependent 333 "TMDy": "%a", 334 "TMMon": "%b", # Sep 335 "TMMonth": "%B", # September 336 "TZ": "%Z", # uppercase timezone name 337 "US": "%f", # zero padded microsecond 338 "ww": "%U", # 1-based week of year 339 "WW": "%U", # 1-based week of year 340 "yy": "%y", # 15 341 "YY": "%y", # 15 342 "yyyy": "%Y", # 2015 343 "YYYY": "%Y", # 2015 344 } 345 346 class Tokenizer(tokens.Tokenizer): 347 BIT_STRINGS = [("b'", "'"), ("B'", "'")] 348 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 349 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 350 HEREDOC_STRINGS = ["$"] 351 352 HEREDOC_TAG_IS_IDENTIFIER = True 353 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 354 355 KEYWORDS = { 356 **tokens.Tokenizer.KEYWORDS, 357 "~": TokenType.RLIKE, 358 "@@": TokenType.DAT, 359 "@>": TokenType.AT_GT, 360 "<@": TokenType.LT_AT, 361 "?&": TokenType.QMARK_AMP, 362 "?|": TokenType.QMARK_PIPE, 363 "#-": TokenType.HASH_DASH, 364 "|/": TokenType.PIPE_SLASH, 365 "||/": TokenType.DPIPE_SLASH, 366 "BEGIN": TokenType.BEGIN, 367 "BIGSERIAL": TokenType.BIGSERIAL, 368 "CONSTRAINT TRIGGER": TokenType.COMMAND, 369 "CSTRING": TokenType.PSEUDO_TYPE, 370 "DECLARE": TokenType.COMMAND, 371 "DO": TokenType.COMMAND, 372 "EXEC": TokenType.COMMAND, 373 "HSTORE": TokenType.HSTORE, 374 "INT8": TokenType.BIGINT, 375 "MONEY": TokenType.MONEY, 376 "NAME": TokenType.NAME, 377 "OID": TokenType.OBJECT_IDENTIFIER, 378 "ONLY": TokenType.ONLY, 379 "POINT": TokenType.POINT, 380 "REFRESH": TokenType.COMMAND, 381 "REINDEX": TokenType.COMMAND, 382 "RESET": TokenType.COMMAND, 383 "SERIAL": TokenType.SERIAL, 384 "SMALLSERIAL": TokenType.SMALLSERIAL, 385 "TEMP": TokenType.TEMPORARY, 386 "REGCLASS": TokenType.OBJECT_IDENTIFIER, 387 "REGCOLLATION": TokenType.OBJECT_IDENTIFIER, 388 "REGCONFIG": TokenType.OBJECT_IDENTIFIER, 389 "REGDICTIONARY": TokenType.OBJECT_IDENTIFIER, 390 "REGNAMESPACE": TokenType.OBJECT_IDENTIFIER, 391 "REGOPER": TokenType.OBJECT_IDENTIFIER, 392 "REGOPERATOR": TokenType.OBJECT_IDENTIFIER, 393 "REGPROC": TokenType.OBJECT_IDENTIFIER, 394 "REGPROCEDURE": TokenType.OBJECT_IDENTIFIER, 395 "REGROLE": TokenType.OBJECT_IDENTIFIER, 396 "REGTYPE": TokenType.OBJECT_IDENTIFIER, 397 "FLOAT": TokenType.DOUBLE, 398 "XML": TokenType.XML, 399 } 400 KEYWORDS.pop("/*+") 401 KEYWORDS.pop("DIV") 402 403 SINGLE_TOKENS = { 404 **tokens.Tokenizer.SINGLE_TOKENS, 405 "$": TokenType.HEREDOC_STRING, 406 } 407 408 VAR_SINGLE_TOKENS = {"$"} 409 410 class Parser(parser.Parser): 411 SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT = True 412 413 PROPERTY_PARSERS = { 414 **parser.Parser.PROPERTY_PARSERS, 415 "SET": lambda self: self.expression(exp.SetConfigProperty, this=self._parse_set()), 416 } 417 PROPERTY_PARSERS.pop("INPUT") 418 419 PLACEHOLDER_PARSERS = { 420 **parser.Parser.PLACEHOLDER_PARSERS, 421 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder, jdbc=True), 422 TokenType.MOD: lambda self: self._parse_query_parameter(), 423 } 424 425 FUNCTIONS = { 426 **parser.Parser.FUNCTIONS, 427 "ARRAY_PREPEND": lambda args: exp.ArrayPrepend( 428 this=seq_get(args, 1), expression=seq_get(args, 0) 429 ), 430 "BIT_AND": exp.BitwiseAndAgg.from_arg_list, 431 "BIT_OR": exp.BitwiseOrAgg.from_arg_list, 432 "BIT_XOR": exp.BitwiseXorAgg.from_arg_list, 433 "DATE_TRUNC": build_timestamp_trunc, 434 "DIV": lambda args: exp.cast( 435 binary_from_function(exp.IntDiv)(args), exp.DataType.Type.DECIMAL 436 ), 437 "GENERATE_SERIES": _build_generate_series, 438 "GET_BIT": lambda args: exp.Getbit( 439 this=seq_get(args, 0), expression=seq_get(args, 1), zero_is_msb=True 440 ), 441 "JSON_EXTRACT_PATH": build_json_extract_path(exp.JSONExtract), 442 "JSON_EXTRACT_PATH_TEXT": build_json_extract_path(exp.JSONExtractScalar), 443 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), encoding=seq_get(args, 1)), 444 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 445 "MAKE_TIMESTAMP": exp.TimestampFromParts.from_arg_list, 446 "NOW": exp.CurrentTimestamp.from_arg_list, 447 "REGEXP_REPLACE": _build_regexp_replace, 448 "TO_CHAR": build_formatted_time(exp.TimeToStr, "postgres"), 449 "TO_DATE": build_formatted_time(exp.StrToDate, "postgres"), 450 "TO_TIMESTAMP": _build_to_timestamp, 451 "UNNEST": exp.Explode.from_arg_list, 452 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 453 "SHA384": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(384)), 454 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 455 "LEVENSHTEIN_LESS_EQUAL": _build_levenshtein_less_equal, 456 "JSON_OBJECT_AGG": lambda args: exp.JSONObjectAgg(expressions=args), 457 "JSONB_OBJECT_AGG": exp.JSONBObjectAgg.from_arg_list, 458 "WIDTH_BUCKET": lambda args: exp.WidthBucket( 459 this=seq_get(args, 0), threshold=seq_get(args, 1) 460 ) 461 if len(args) == 2 462 else exp.WidthBucket.from_arg_list(args), 463 } 464 465 NO_PAREN_FUNCTIONS = { 466 **parser.Parser.NO_PAREN_FUNCTIONS, 467 TokenType.CURRENT_SCHEMA: exp.CurrentSchema, 468 } 469 470 FUNCTION_PARSERS = { 471 **parser.Parser.FUNCTION_PARSERS, 472 "DATE_PART": lambda self: self._parse_date_part(), 473 "JSON_AGG": lambda self: self.expression( 474 exp.JSONArrayAgg, 475 this=self._parse_lambda(), 476 order=self._parse_order(), 477 ), 478 "JSONB_EXISTS": lambda self: self._parse_jsonb_exists(), 479 } 480 481 BITWISE = { 482 **parser.Parser.BITWISE, 483 TokenType.HASH: exp.BitwiseXor, 484 } 485 486 EXPONENT = { 487 TokenType.CARET: exp.Pow, 488 } 489 490 RANGE_PARSERS = { 491 **parser.Parser.RANGE_PARSERS, 492 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 493 TokenType.DAT: lambda self, this: self.expression( 494 exp.MatchAgainst, this=self._parse_bitwise(), expressions=[this] 495 ), 496 } 497 498 STATEMENT_PARSERS = { 499 **parser.Parser.STATEMENT_PARSERS, 500 TokenType.END: lambda self: self._parse_commit_or_rollback(), 501 } 502 503 JSON_ARROWS_REQUIRE_JSON_TYPE = True 504 505 COLUMN_OPERATORS = { 506 **parser.Parser.COLUMN_OPERATORS, 507 TokenType.ARROW: lambda self, this, path: self.validate_expression( 508 build_json_extract_path( 509 exp.JSONExtract, arrow_req_json_type=self.JSON_ARROWS_REQUIRE_JSON_TYPE 510 )([this, path]) 511 ), 512 TokenType.DARROW: lambda self, this, path: self.validate_expression( 513 build_json_extract_path( 514 exp.JSONExtractScalar, arrow_req_json_type=self.JSON_ARROWS_REQUIRE_JSON_TYPE 515 )([this, path]) 516 ), 517 } 518 519 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 520 this = ( 521 self._parse_wrapped(self._parse_id_var) 522 if self._match(TokenType.L_PAREN, advance=False) 523 else None 524 ) 525 self._match_text_seq("S") 526 return self.expression(exp.Placeholder, this=this) 527 528 def _parse_date_part(self) -> exp.Expression: 529 part = self._parse_type() 530 self._match(TokenType.COMMA) 531 value = self._parse_bitwise() 532 533 if part and isinstance(part, (exp.Column, exp.Literal)): 534 part = exp.var(part.name) 535 536 return self.expression(exp.Extract, this=part, expression=value) 537 538 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 539 return None 540 541 def _parse_jsonb_exists(self) -> exp.JSONBExists: 542 return self.expression( 543 exp.JSONBExists, 544 this=self._parse_bitwise(), 545 path=self._match(TokenType.COMMA) 546 and self.dialect.to_json_path(self._parse_bitwise()), 547 ) 548 549 def _parse_generated_as_identity( 550 self, 551 ) -> ( 552 exp.GeneratedAsIdentityColumnConstraint 553 | exp.ComputedColumnConstraint 554 | exp.GeneratedAsRowColumnConstraint 555 ): 556 this = super()._parse_generated_as_identity() 557 558 if self._match_text_seq("STORED"): 559 this = self.expression(exp.ComputedColumnConstraint, this=this.expression) 560 561 return this 562 563 def _parse_user_defined_type( 564 self, identifier: exp.Identifier 565 ) -> t.Optional[exp.Expression]: 566 udt_type: exp.Identifier | exp.Dot = identifier 567 568 while self._match(TokenType.DOT): 569 part = self._parse_id_var() 570 if part: 571 udt_type = exp.Dot(this=udt_type, expression=part) 572 573 return exp.DataType.build(udt_type, udt=True) 574 575 class Generator(generator.Generator): 576 SINGLE_STRING_INTERVAL = True 577 RENAME_TABLE_WITH_DB = False 578 LOCKING_READS_SUPPORTED = True 579 JOIN_HINTS = False 580 TABLE_HINTS = False 581 QUERY_HINTS = False 582 NVL2_SUPPORTED = False 583 PARAMETER_TOKEN = "$" 584 NAMED_PLACEHOLDER_TOKEN = "%" 585 TABLESAMPLE_SIZE_IS_ROWS = False 586 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 587 SUPPORTS_SELECT_INTO = True 588 JSON_TYPE_REQUIRED_FOR_EXTRACTION = True 589 SUPPORTS_UNLOGGED_TABLES = True 590 LIKE_PROPERTY_INSIDE_SCHEMA = True 591 MULTI_ARG_DISTINCT = False 592 CAN_IMPLEMENT_ARRAY_ANY = True 593 SUPPORTS_WINDOW_EXCLUDE = True 594 COPY_HAS_INTO_KEYWORD = False 595 ARRAY_CONCAT_IS_VAR_LEN = False 596 SUPPORTS_MEDIAN = False 597 ARRAY_SIZE_DIM_REQUIRED = True 598 SUPPORTS_BETWEEN_FLAGS = True 599 600 SUPPORTED_JSON_PATH_PARTS = { 601 exp.JSONPathKey, 602 exp.JSONPathRoot, 603 exp.JSONPathSubscript, 604 } 605 606 TYPE_MAPPING = { 607 **generator.Generator.TYPE_MAPPING, 608 exp.DataType.Type.TINYINT: "SMALLINT", 609 exp.DataType.Type.FLOAT: "REAL", 610 exp.DataType.Type.DOUBLE: "DOUBLE PRECISION", 611 exp.DataType.Type.BINARY: "BYTEA", 612 exp.DataType.Type.VARBINARY: "BYTEA", 613 exp.DataType.Type.ROWVERSION: "BYTEA", 614 exp.DataType.Type.DATETIME: "TIMESTAMP", 615 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 616 exp.DataType.Type.BLOB: "BYTEA", 617 } 618 619 TRANSFORMS = { 620 **generator.Generator.TRANSFORMS, 621 exp.AnyValue: _versioned_anyvalue_sql, 622 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 623 exp.ArrayFilter: filter_array_using_unnest, 624 exp.ArrayPrepend: lambda self, e: self.func("ARRAY_PREPEND", e.expression, e.this), 625 exp.BitwiseAndAgg: rename_func("BIT_AND"), 626 exp.BitwiseOrAgg: rename_func("BIT_OR"), 627 exp.BitwiseXor: lambda self, e: self.binary(e, "#"), 628 exp.BitwiseXorAgg: rename_func("BIT_XOR"), 629 exp.ColumnDef: transforms.preprocess([_auto_increment_to_serial, _serial_to_generated]), 630 exp.CurrentDate: no_paren_current_date_sql, 631 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 632 exp.CurrentUser: lambda *_: "CURRENT_USER", 633 exp.DateAdd: _date_add_sql("+"), 634 exp.DateDiff: _date_diff_sql, 635 exp.DateStrToDate: datestrtodate_sql, 636 exp.DateSub: _date_add_sql("-"), 637 exp.Explode: rename_func("UNNEST"), 638 exp.ExplodingGenerateSeries: rename_func("GENERATE_SERIES"), 639 exp.Getbit: getbit_sql, 640 exp.GroupConcat: lambda self, e: groupconcat_sql( 641 self, e, func_name="STRING_AGG", within_group=False 642 ), 643 exp.IntDiv: rename_func("DIV"), 644 exp.JSONArrayAgg: lambda self, e: self.func( 645 "JSON_AGG", 646 self.sql(e, "this"), 647 suffix=f"{self.sql(e, 'order')})", 648 ), 649 exp.JSONExtract: _json_extract_sql("JSON_EXTRACT_PATH", "->"), 650 exp.JSONExtractScalar: _json_extract_sql("JSON_EXTRACT_PATH_TEXT", "->>"), 651 exp.JSONBExtract: lambda self, e: self.binary(e, "#>"), 652 exp.JSONBExtractScalar: lambda self, e: self.binary(e, "#>>"), 653 exp.JSONBContains: lambda self, e: self.binary(e, "?"), 654 exp.ParseJSON: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.JSON)), 655 exp.JSONPathKey: json_path_key_only_name, 656 exp.JSONPathRoot: lambda *_: "", 657 exp.JSONPathSubscript: lambda self, e: self.json_path_part(e.this), 658 exp.LastDay: no_last_day_sql, 659 exp.LogicalOr: rename_func("BOOL_OR"), 660 exp.LogicalAnd: rename_func("BOOL_AND"), 661 exp.Max: max_or_greatest, 662 exp.MapFromEntries: no_map_from_entries_sql, 663 exp.Min: min_or_least, 664 exp.Merge: merge_without_target_sql, 665 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 666 exp.PercentileCont: transforms.preprocess( 667 [transforms.add_within_group_for_percentiles] 668 ), 669 exp.PercentileDisc: transforms.preprocess( 670 [transforms.add_within_group_for_percentiles] 671 ), 672 exp.Pivot: no_pivot_sql, 673 exp.Rand: rename_func("RANDOM"), 674 exp.RegexpLike: lambda self, e: self.binary(e, "~"), 675 exp.RegexpILike: lambda self, e: self.binary(e, "~*"), 676 exp.RegexpReplace: lambda self, e: self.func( 677 "REGEXP_REPLACE", 678 e.this, 679 e.expression, 680 e.args.get("replacement"), 681 e.args.get("position"), 682 e.args.get("occurrence"), 683 regexp_replace_global_modifier(e), 684 ), 685 exp.Round: _round_sql, 686 exp.Select: transforms.preprocess( 687 [ 688 transforms.eliminate_semi_and_anti_joins, 689 transforms.eliminate_qualify, 690 ] 691 ), 692 exp.SHA2: sha256_sql, 693 exp.SHA2Digest: sha2_digest_sql, 694 exp.StrPosition: lambda self, e: strposition_sql(self, e, func_name="POSITION"), 695 exp.StrToDate: lambda self, e: self.func("TO_DATE", e.this, self.format_time(e)), 696 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 697 exp.StructExtract: struct_extract_sql, 698 exp.Substring: _substring_sql, 699 exp.TimeFromParts: rename_func("MAKE_TIME"), 700 exp.TimestampFromParts: rename_func("MAKE_TIMESTAMP"), 701 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 702 exp.TimeStrToTime: timestrtotime_sql, 703 exp.TimeToStr: lambda self, e: self.func("TO_CHAR", e.this, self.format_time(e)), 704 exp.ToChar: lambda self, e: self.function_fallback_sql(e) 705 if e.args.get("format") 706 else self.tochar_sql(e), 707 exp.Trim: trim_sql, 708 exp.TryCast: no_trycast_sql, 709 exp.TsOrDsAdd: _date_add_sql("+"), 710 exp.TsOrDsDiff: _date_diff_sql, 711 exp.UnixToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this), 712 exp.Uuid: lambda *_: "GEN_RANDOM_UUID()", 713 exp.TimeToUnix: lambda self, e: self.func( 714 "DATE_PART", exp.Literal.string("epoch"), e.this 715 ), 716 exp.VariancePop: rename_func("VAR_POP"), 717 exp.Variance: rename_func("VAR_SAMP"), 718 exp.Xor: bool_xor_sql, 719 exp.Unicode: rename_func("ASCII"), 720 exp.UnixToTime: _unix_to_time_sql, 721 exp.Levenshtein: _levenshtein_sql, 722 exp.JSONObjectAgg: rename_func("JSON_OBJECT_AGG"), 723 exp.JSONBObjectAgg: rename_func("JSONB_OBJECT_AGG"), 724 exp.CountIf: count_if_to_sum, 725 } 726 727 TRANSFORMS.pop(exp.CommentColumnConstraint) 728 729 PROPERTIES_LOCATION = { 730 **generator.Generator.PROPERTIES_LOCATION, 731 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 732 exp.TransientProperty: exp.Properties.Location.UNSUPPORTED, 733 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 734 } 735 736 def schemacommentproperty_sql(self, expression: exp.SchemaCommentProperty) -> str: 737 self.unsupported("Table comments are not supported in the CREATE statement") 738 return "" 739 740 def commentcolumnconstraint_sql(self, expression: exp.CommentColumnConstraint) -> str: 741 self.unsupported("Column comments are not supported in the CREATE statement") 742 return "" 743 744 def unnest_sql(self, expression: exp.Unnest) -> str: 745 if len(expression.expressions) == 1: 746 arg = expression.expressions[0] 747 if isinstance(arg, exp.GenerateDateArray): 748 generate_series: exp.Expression = exp.GenerateSeries(**arg.args) 749 if isinstance(expression.parent, (exp.From, exp.Join)): 750 generate_series = ( 751 exp.select("value::date") 752 .from_(exp.Table(this=generate_series).as_("_t", table=["value"])) 753 .subquery(expression.args.get("alias") or "_unnested_generate_series") 754 ) 755 return self.sql(generate_series) 756 757 from sqlglot.optimizer.annotate_types import annotate_types 758 759 this = annotate_types(arg, dialect=self.dialect) 760 if this.is_type("array<json>"): 761 while isinstance(this, exp.Cast): 762 this = this.this 763 764 arg_as_json = self.sql(exp.cast(this, exp.DataType.Type.JSON)) 765 alias = self.sql(expression, "alias") 766 alias = f" AS {alias}" if alias else "" 767 768 if expression.args.get("offset"): 769 self.unsupported("Unsupported JSON_ARRAY_ELEMENTS with offset") 770 771 return f"JSON_ARRAY_ELEMENTS({arg_as_json}){alias}" 772 773 return super().unnest_sql(expression) 774 775 def bracket_sql(self, expression: exp.Bracket) -> str: 776 """Forms like ARRAY[1, 2, 3][3] aren't allowed; we need to wrap the ARRAY.""" 777 if isinstance(expression.this, exp.Array): 778 expression.set("this", exp.paren(expression.this, copy=False)) 779 780 return super().bracket_sql(expression) 781 782 def matchagainst_sql(self, expression: exp.MatchAgainst) -> str: 783 this = self.sql(expression, "this") 784 expressions = [f"{self.sql(e)} @@ {this}" for e in expression.expressions] 785 sql = " OR ".join(expressions) 786 return f"({sql})" if len(expressions) > 1 else sql 787 788 def alterset_sql(self, expression: exp.AlterSet) -> str: 789 exprs = self.expressions(expression, flat=True) 790 exprs = f"({exprs})" if exprs else "" 791 792 access_method = self.sql(expression, "access_method") 793 access_method = f"ACCESS METHOD {access_method}" if access_method else "" 794 tablespace = self.sql(expression, "tablespace") 795 tablespace = f"TABLESPACE {tablespace}" if tablespace else "" 796 option = self.sql(expression, "option") 797 798 return f"SET {exprs}{access_method}{tablespace}{option}" 799 800 def datatype_sql(self, expression: exp.DataType) -> str: 801 if expression.is_type(exp.DataType.Type.ARRAY): 802 if expression.expressions: 803 values = self.expressions(expression, key="values", flat=True) 804 return f"{self.expressions(expression, flat=True)}[{values}]" 805 return "ARRAY" 806 807 if ( 808 expression.is_type(exp.DataType.Type.DOUBLE, exp.DataType.Type.FLOAT) 809 and expression.expressions 810 ): 811 # Postgres doesn't support precision for REAL and DOUBLE PRECISION types 812 return f"FLOAT({self.expressions(expression, flat=True)})" 813 814 return super().datatype_sql(expression) 815 816 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 817 this = expression.this 818 819 # Postgres casts DIV() to decimal for transpilation but when roundtripping it's superfluous 820 if isinstance(this, exp.IntDiv) and expression.to == exp.DataType.build("decimal"): 821 return self.sql(this) 822 823 return super().cast_sql(expression, safe_prefix=safe_prefix) 824 825 def array_sql(self, expression: exp.Array) -> str: 826 exprs = expression.expressions 827 func_name = self.normalize_func("ARRAY") 828 829 if isinstance(seq_get(exprs, 0), exp.Select): 830 return f"{func_name}({self.sql(exprs[0])})" 831 832 return f"{func_name}{inline_array_sql(self, expression)}" 833 834 def computedcolumnconstraint_sql(self, expression: exp.ComputedColumnConstraint) -> str: 835 return f"GENERATED ALWAYS AS ({self.sql(expression, 'this')}) STORED" 836 837 def isascii_sql(self, expression: exp.IsAscii) -> str: 838 return f"({self.sql(expression.this)} ~ '^[[:ascii:]]*$')" 839 840 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 841 # https://www.postgresql.org/docs/current/functions-window.html 842 self.unsupported("PostgreSQL does not support IGNORE NULLS.") 843 return self.sql(expression.this) 844 845 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 846 # https://www.postgresql.org/docs/current/functions-window.html 847 self.unsupported("PostgreSQL does not support RESPECT NULLS.") 848 return self.sql(expression.this) 849 850 @unsupported_args("this") 851 def currentschema_sql(self, expression: exp.CurrentSchema) -> str: 852 return "CURRENT_SCHEMA" 853 854 def interval_sql(self, expression: exp.Interval) -> str: 855 unit = expression.text("unit").lower() 856 857 if unit.startswith("quarter") and isinstance(expression.this, exp.Literal): 858 expression.this.replace(exp.Literal.number(int(expression.this.to_py()) * 3)) 859 expression.args["unit"].replace(exp.var("MONTH")) 860 861 return super().interval_sql(expression) 862 863 def placeholder_sql(self, expression: exp.Placeholder) -> str: 864 if expression.args.get("jdbc"): 865 return "?" 866 867 this = f"({expression.name})" if expression.this else "" 868 return f"{self.NAMED_PLACEHOLDER_TOKEN}{this}s" 869 870 def arraycontains_sql(self, expression: exp.ArrayContains) -> str: 871 # Convert DuckDB's LIST_CONTAINS(array, value) to PostgreSQL 872 # DuckDB behavior: 873 # - LIST_CONTAINS([1,2,3], 2) -> true 874 # - LIST_CONTAINS([1,2,3], 4) -> false 875 # - LIST_CONTAINS([1,2,NULL], 4) -> false (not NULL) 876 # - LIST_CONTAINS([1,2,3], NULL) -> NULL 877 # 878 # PostgreSQL equivalent: CASE WHEN value IS NULL THEN NULL 879 # ELSE COALESCE(value = ANY(array), FALSE) END 880 value = expression.expression 881 array = expression.this 882 883 coalesce_expr = exp.Coalesce( 884 this=value.eq(exp.Any(this=exp.paren(expression=array, copy=False))), 885 expressions=[exp.false()], 886 ) 887 888 case_expr = ( 889 exp.Case() 890 .when(exp.Is(this=value, expression=exp.null()), exp.null(), copy=False) 891 .else_(coalesce_expr, copy=False) 892 ) 893 894 return self.sql(case_expr)
Whether the behavior of a / b depends on the types of a and b.
False means a / b is always float division.
True means a / b is integer division if both a and b are integers.
A NULL arg in CONCAT yields NULL by default, but in some dialects it yields an empty string.
Default NULL ordering method to use if not explicitly set.
Possible values: "nulls_are_small", "nulls_are_large", "nulls_are_last"
Whether table names can be referenced as columns (treated as structs).
BigQuery allows tables to be referenced as columns in queries, automatically treating them as struct values containing all the table's columns.
For example, in BigQuery: SELECT t FROM my_table AS t -- Returns entire row as a struct
Maps function expressions to their default output column name(s).
For example, in Postgres, generate_series function outputs a column named "generate_series" by default, so we map the ExplodingGenerateSeries expression to "generate_series" string.
Associates this dialect's time formats with their equivalent Python strftime formats.
346 class Tokenizer(tokens.Tokenizer): 347 BIT_STRINGS = [("b'", "'"), ("B'", "'")] 348 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 349 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 350 HEREDOC_STRINGS = ["$"] 351 352 HEREDOC_TAG_IS_IDENTIFIER = True 353 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 354 355 KEYWORDS = { 356 **tokens.Tokenizer.KEYWORDS, 357 "~": TokenType.RLIKE, 358 "@@": TokenType.DAT, 359 "@>": TokenType.AT_GT, 360 "<@": TokenType.LT_AT, 361 "?&": TokenType.QMARK_AMP, 362 "?|": TokenType.QMARK_PIPE, 363 "#-": TokenType.HASH_DASH, 364 "|/": TokenType.PIPE_SLASH, 365 "||/": TokenType.DPIPE_SLASH, 366 "BEGIN": TokenType.BEGIN, 367 "BIGSERIAL": TokenType.BIGSERIAL, 368 "CONSTRAINT TRIGGER": TokenType.COMMAND, 369 "CSTRING": TokenType.PSEUDO_TYPE, 370 "DECLARE": TokenType.COMMAND, 371 "DO": TokenType.COMMAND, 372 "EXEC": TokenType.COMMAND, 373 "HSTORE": TokenType.HSTORE, 374 "INT8": TokenType.BIGINT, 375 "MONEY": TokenType.MONEY, 376 "NAME": TokenType.NAME, 377 "OID": TokenType.OBJECT_IDENTIFIER, 378 "ONLY": TokenType.ONLY, 379 "POINT": TokenType.POINT, 380 "REFRESH": TokenType.COMMAND, 381 "REINDEX": TokenType.COMMAND, 382 "RESET": TokenType.COMMAND, 383 "SERIAL": TokenType.SERIAL, 384 "SMALLSERIAL": TokenType.SMALLSERIAL, 385 "TEMP": TokenType.TEMPORARY, 386 "REGCLASS": TokenType.OBJECT_IDENTIFIER, 387 "REGCOLLATION": TokenType.OBJECT_IDENTIFIER, 388 "REGCONFIG": TokenType.OBJECT_IDENTIFIER, 389 "REGDICTIONARY": TokenType.OBJECT_IDENTIFIER, 390 "REGNAMESPACE": TokenType.OBJECT_IDENTIFIER, 391 "REGOPER": TokenType.OBJECT_IDENTIFIER, 392 "REGOPERATOR": TokenType.OBJECT_IDENTIFIER, 393 "REGPROC": TokenType.OBJECT_IDENTIFIER, 394 "REGPROCEDURE": TokenType.OBJECT_IDENTIFIER, 395 "REGROLE": TokenType.OBJECT_IDENTIFIER, 396 "REGTYPE": TokenType.OBJECT_IDENTIFIER, 397 "FLOAT": TokenType.DOUBLE, 398 "XML": TokenType.XML, 399 } 400 KEYWORDS.pop("/*+") 401 KEYWORDS.pop("DIV") 402 403 SINGLE_TOKENS = { 404 **tokens.Tokenizer.SINGLE_TOKENS, 405 "$": TokenType.HEREDOC_STRING, 406 } 407 408 VAR_SINGLE_TOKENS = {"$"}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- STRING_ESCAPES
- ESCAPE_FOLLOW_CHARS
- IDENTIFIER_ESCAPES
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
410 class Parser(parser.Parser): 411 SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT = True 412 413 PROPERTY_PARSERS = { 414 **parser.Parser.PROPERTY_PARSERS, 415 "SET": lambda self: self.expression(exp.SetConfigProperty, this=self._parse_set()), 416 } 417 PROPERTY_PARSERS.pop("INPUT") 418 419 PLACEHOLDER_PARSERS = { 420 **parser.Parser.PLACEHOLDER_PARSERS, 421 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder, jdbc=True), 422 TokenType.MOD: lambda self: self._parse_query_parameter(), 423 } 424 425 FUNCTIONS = { 426 **parser.Parser.FUNCTIONS, 427 "ARRAY_PREPEND": lambda args: exp.ArrayPrepend( 428 this=seq_get(args, 1), expression=seq_get(args, 0) 429 ), 430 "BIT_AND": exp.BitwiseAndAgg.from_arg_list, 431 "BIT_OR": exp.BitwiseOrAgg.from_arg_list, 432 "BIT_XOR": exp.BitwiseXorAgg.from_arg_list, 433 "DATE_TRUNC": build_timestamp_trunc, 434 "DIV": lambda args: exp.cast( 435 binary_from_function(exp.IntDiv)(args), exp.DataType.Type.DECIMAL 436 ), 437 "GENERATE_SERIES": _build_generate_series, 438 "GET_BIT": lambda args: exp.Getbit( 439 this=seq_get(args, 0), expression=seq_get(args, 1), zero_is_msb=True 440 ), 441 "JSON_EXTRACT_PATH": build_json_extract_path(exp.JSONExtract), 442 "JSON_EXTRACT_PATH_TEXT": build_json_extract_path(exp.JSONExtractScalar), 443 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), encoding=seq_get(args, 1)), 444 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 445 "MAKE_TIMESTAMP": exp.TimestampFromParts.from_arg_list, 446 "NOW": exp.CurrentTimestamp.from_arg_list, 447 "REGEXP_REPLACE": _build_regexp_replace, 448 "TO_CHAR": build_formatted_time(exp.TimeToStr, "postgres"), 449 "TO_DATE": build_formatted_time(exp.StrToDate, "postgres"), 450 "TO_TIMESTAMP": _build_to_timestamp, 451 "UNNEST": exp.Explode.from_arg_list, 452 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 453 "SHA384": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(384)), 454 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 455 "LEVENSHTEIN_LESS_EQUAL": _build_levenshtein_less_equal, 456 "JSON_OBJECT_AGG": lambda args: exp.JSONObjectAgg(expressions=args), 457 "JSONB_OBJECT_AGG": exp.JSONBObjectAgg.from_arg_list, 458 "WIDTH_BUCKET": lambda args: exp.WidthBucket( 459 this=seq_get(args, 0), threshold=seq_get(args, 1) 460 ) 461 if len(args) == 2 462 else exp.WidthBucket.from_arg_list(args), 463 } 464 465 NO_PAREN_FUNCTIONS = { 466 **parser.Parser.NO_PAREN_FUNCTIONS, 467 TokenType.CURRENT_SCHEMA: exp.CurrentSchema, 468 } 469 470 FUNCTION_PARSERS = { 471 **parser.Parser.FUNCTION_PARSERS, 472 "DATE_PART": lambda self: self._parse_date_part(), 473 "JSON_AGG": lambda self: self.expression( 474 exp.JSONArrayAgg, 475 this=self._parse_lambda(), 476 order=self._parse_order(), 477 ), 478 "JSONB_EXISTS": lambda self: self._parse_jsonb_exists(), 479 } 480 481 BITWISE = { 482 **parser.Parser.BITWISE, 483 TokenType.HASH: exp.BitwiseXor, 484 } 485 486 EXPONENT = { 487 TokenType.CARET: exp.Pow, 488 } 489 490 RANGE_PARSERS = { 491 **parser.Parser.RANGE_PARSERS, 492 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 493 TokenType.DAT: lambda self, this: self.expression( 494 exp.MatchAgainst, this=self._parse_bitwise(), expressions=[this] 495 ), 496 } 497 498 STATEMENT_PARSERS = { 499 **parser.Parser.STATEMENT_PARSERS, 500 TokenType.END: lambda self: self._parse_commit_or_rollback(), 501 } 502 503 JSON_ARROWS_REQUIRE_JSON_TYPE = True 504 505 COLUMN_OPERATORS = { 506 **parser.Parser.COLUMN_OPERATORS, 507 TokenType.ARROW: lambda self, this, path: self.validate_expression( 508 build_json_extract_path( 509 exp.JSONExtract, arrow_req_json_type=self.JSON_ARROWS_REQUIRE_JSON_TYPE 510 )([this, path]) 511 ), 512 TokenType.DARROW: lambda self, this, path: self.validate_expression( 513 build_json_extract_path( 514 exp.JSONExtractScalar, arrow_req_json_type=self.JSON_ARROWS_REQUIRE_JSON_TYPE 515 )([this, path]) 516 ), 517 } 518 519 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 520 this = ( 521 self._parse_wrapped(self._parse_id_var) 522 if self._match(TokenType.L_PAREN, advance=False) 523 else None 524 ) 525 self._match_text_seq("S") 526 return self.expression(exp.Placeholder, this=this) 527 528 def _parse_date_part(self) -> exp.Expression: 529 part = self._parse_type() 530 self._match(TokenType.COMMA) 531 value = self._parse_bitwise() 532 533 if part and isinstance(part, (exp.Column, exp.Literal)): 534 part = exp.var(part.name) 535 536 return self.expression(exp.Extract, this=part, expression=value) 537 538 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 539 return None 540 541 def _parse_jsonb_exists(self) -> exp.JSONBExists: 542 return self.expression( 543 exp.JSONBExists, 544 this=self._parse_bitwise(), 545 path=self._match(TokenType.COMMA) 546 and self.dialect.to_json_path(self._parse_bitwise()), 547 ) 548 549 def _parse_generated_as_identity( 550 self, 551 ) -> ( 552 exp.GeneratedAsIdentityColumnConstraint 553 | exp.ComputedColumnConstraint 554 | exp.GeneratedAsRowColumnConstraint 555 ): 556 this = super()._parse_generated_as_identity() 557 558 if self._match_text_seq("STORED"): 559 this = self.expression(exp.ComputedColumnConstraint, this=this.expression) 560 561 return this 562 563 def _parse_user_defined_type( 564 self, identifier: exp.Identifier 565 ) -> t.Optional[exp.Expression]: 566 udt_type: exp.Identifier | exp.Dot = identifier 567 568 while self._match(TokenType.DOT): 569 part = self._parse_id_var() 570 if part: 571 udt_type = exp.Dot(this=udt_type, expression=part) 572 573 return exp.DataType.build(udt_type, udt=True)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- CAST_COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- QUERY_MODIFIER_TOKENS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- SET_ASSIGNMENT_DELIMITERS
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- ALTER_TABLE_PARTITIONS
- JOINS_HAVE_EQUAL_PRECEDENCE
- ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
- MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS
- JSON_EXTRACT_REQUIRES_JSON_EXPRESSION
- ADD_JOIN_ON_TRUE
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- build_cast
- errors
- sql
575 class Generator(generator.Generator): 576 SINGLE_STRING_INTERVAL = True 577 RENAME_TABLE_WITH_DB = False 578 LOCKING_READS_SUPPORTED = True 579 JOIN_HINTS = False 580 TABLE_HINTS = False 581 QUERY_HINTS = False 582 NVL2_SUPPORTED = False 583 PARAMETER_TOKEN = "$" 584 NAMED_PLACEHOLDER_TOKEN = "%" 585 TABLESAMPLE_SIZE_IS_ROWS = False 586 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 587 SUPPORTS_SELECT_INTO = True 588 JSON_TYPE_REQUIRED_FOR_EXTRACTION = True 589 SUPPORTS_UNLOGGED_TABLES = True 590 LIKE_PROPERTY_INSIDE_SCHEMA = True 591 MULTI_ARG_DISTINCT = False 592 CAN_IMPLEMENT_ARRAY_ANY = True 593 SUPPORTS_WINDOW_EXCLUDE = True 594 COPY_HAS_INTO_KEYWORD = False 595 ARRAY_CONCAT_IS_VAR_LEN = False 596 SUPPORTS_MEDIAN = False 597 ARRAY_SIZE_DIM_REQUIRED = True 598 SUPPORTS_BETWEEN_FLAGS = True 599 600 SUPPORTED_JSON_PATH_PARTS = { 601 exp.JSONPathKey, 602 exp.JSONPathRoot, 603 exp.JSONPathSubscript, 604 } 605 606 TYPE_MAPPING = { 607 **generator.Generator.TYPE_MAPPING, 608 exp.DataType.Type.TINYINT: "SMALLINT", 609 exp.DataType.Type.FLOAT: "REAL", 610 exp.DataType.Type.DOUBLE: "DOUBLE PRECISION", 611 exp.DataType.Type.BINARY: "BYTEA", 612 exp.DataType.Type.VARBINARY: "BYTEA", 613 exp.DataType.Type.ROWVERSION: "BYTEA", 614 exp.DataType.Type.DATETIME: "TIMESTAMP", 615 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 616 exp.DataType.Type.BLOB: "BYTEA", 617 } 618 619 TRANSFORMS = { 620 **generator.Generator.TRANSFORMS, 621 exp.AnyValue: _versioned_anyvalue_sql, 622 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 623 exp.ArrayFilter: filter_array_using_unnest, 624 exp.ArrayPrepend: lambda self, e: self.func("ARRAY_PREPEND", e.expression, e.this), 625 exp.BitwiseAndAgg: rename_func("BIT_AND"), 626 exp.BitwiseOrAgg: rename_func("BIT_OR"), 627 exp.BitwiseXor: lambda self, e: self.binary(e, "#"), 628 exp.BitwiseXorAgg: rename_func("BIT_XOR"), 629 exp.ColumnDef: transforms.preprocess([_auto_increment_to_serial, _serial_to_generated]), 630 exp.CurrentDate: no_paren_current_date_sql, 631 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 632 exp.CurrentUser: lambda *_: "CURRENT_USER", 633 exp.DateAdd: _date_add_sql("+"), 634 exp.DateDiff: _date_diff_sql, 635 exp.DateStrToDate: datestrtodate_sql, 636 exp.DateSub: _date_add_sql("-"), 637 exp.Explode: rename_func("UNNEST"), 638 exp.ExplodingGenerateSeries: rename_func("GENERATE_SERIES"), 639 exp.Getbit: getbit_sql, 640 exp.GroupConcat: lambda self, e: groupconcat_sql( 641 self, e, func_name="STRING_AGG", within_group=False 642 ), 643 exp.IntDiv: rename_func("DIV"), 644 exp.JSONArrayAgg: lambda self, e: self.func( 645 "JSON_AGG", 646 self.sql(e, "this"), 647 suffix=f"{self.sql(e, 'order')})", 648 ), 649 exp.JSONExtract: _json_extract_sql("JSON_EXTRACT_PATH", "->"), 650 exp.JSONExtractScalar: _json_extract_sql("JSON_EXTRACT_PATH_TEXT", "->>"), 651 exp.JSONBExtract: lambda self, e: self.binary(e, "#>"), 652 exp.JSONBExtractScalar: lambda self, e: self.binary(e, "#>>"), 653 exp.JSONBContains: lambda self, e: self.binary(e, "?"), 654 exp.ParseJSON: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.JSON)), 655 exp.JSONPathKey: json_path_key_only_name, 656 exp.JSONPathRoot: lambda *_: "", 657 exp.JSONPathSubscript: lambda self, e: self.json_path_part(e.this), 658 exp.LastDay: no_last_day_sql, 659 exp.LogicalOr: rename_func("BOOL_OR"), 660 exp.LogicalAnd: rename_func("BOOL_AND"), 661 exp.Max: max_or_greatest, 662 exp.MapFromEntries: no_map_from_entries_sql, 663 exp.Min: min_or_least, 664 exp.Merge: merge_without_target_sql, 665 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 666 exp.PercentileCont: transforms.preprocess( 667 [transforms.add_within_group_for_percentiles] 668 ), 669 exp.PercentileDisc: transforms.preprocess( 670 [transforms.add_within_group_for_percentiles] 671 ), 672 exp.Pivot: no_pivot_sql, 673 exp.Rand: rename_func("RANDOM"), 674 exp.RegexpLike: lambda self, e: self.binary(e, "~"), 675 exp.RegexpILike: lambda self, e: self.binary(e, "~*"), 676 exp.RegexpReplace: lambda self, e: self.func( 677 "REGEXP_REPLACE", 678 e.this, 679 e.expression, 680 e.args.get("replacement"), 681 e.args.get("position"), 682 e.args.get("occurrence"), 683 regexp_replace_global_modifier(e), 684 ), 685 exp.Round: _round_sql, 686 exp.Select: transforms.preprocess( 687 [ 688 transforms.eliminate_semi_and_anti_joins, 689 transforms.eliminate_qualify, 690 ] 691 ), 692 exp.SHA2: sha256_sql, 693 exp.SHA2Digest: sha2_digest_sql, 694 exp.StrPosition: lambda self, e: strposition_sql(self, e, func_name="POSITION"), 695 exp.StrToDate: lambda self, e: self.func("TO_DATE", e.this, self.format_time(e)), 696 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 697 exp.StructExtract: struct_extract_sql, 698 exp.Substring: _substring_sql, 699 exp.TimeFromParts: rename_func("MAKE_TIME"), 700 exp.TimestampFromParts: rename_func("MAKE_TIMESTAMP"), 701 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 702 exp.TimeStrToTime: timestrtotime_sql, 703 exp.TimeToStr: lambda self, e: self.func("TO_CHAR", e.this, self.format_time(e)), 704 exp.ToChar: lambda self, e: self.function_fallback_sql(e) 705 if e.args.get("format") 706 else self.tochar_sql(e), 707 exp.Trim: trim_sql, 708 exp.TryCast: no_trycast_sql, 709 exp.TsOrDsAdd: _date_add_sql("+"), 710 exp.TsOrDsDiff: _date_diff_sql, 711 exp.UnixToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this), 712 exp.Uuid: lambda *_: "GEN_RANDOM_UUID()", 713 exp.TimeToUnix: lambda self, e: self.func( 714 "DATE_PART", exp.Literal.string("epoch"), e.this 715 ), 716 exp.VariancePop: rename_func("VAR_POP"), 717 exp.Variance: rename_func("VAR_SAMP"), 718 exp.Xor: bool_xor_sql, 719 exp.Unicode: rename_func("ASCII"), 720 exp.UnixToTime: _unix_to_time_sql, 721 exp.Levenshtein: _levenshtein_sql, 722 exp.JSONObjectAgg: rename_func("JSON_OBJECT_AGG"), 723 exp.JSONBObjectAgg: rename_func("JSONB_OBJECT_AGG"), 724 exp.CountIf: count_if_to_sum, 725 } 726 727 TRANSFORMS.pop(exp.CommentColumnConstraint) 728 729 PROPERTIES_LOCATION = { 730 **generator.Generator.PROPERTIES_LOCATION, 731 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 732 exp.TransientProperty: exp.Properties.Location.UNSUPPORTED, 733 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 734 } 735 736 def schemacommentproperty_sql(self, expression: exp.SchemaCommentProperty) -> str: 737 self.unsupported("Table comments are not supported in the CREATE statement") 738 return "" 739 740 def commentcolumnconstraint_sql(self, expression: exp.CommentColumnConstraint) -> str: 741 self.unsupported("Column comments are not supported in the CREATE statement") 742 return "" 743 744 def unnest_sql(self, expression: exp.Unnest) -> str: 745 if len(expression.expressions) == 1: 746 arg = expression.expressions[0] 747 if isinstance(arg, exp.GenerateDateArray): 748 generate_series: exp.Expression = exp.GenerateSeries(**arg.args) 749 if isinstance(expression.parent, (exp.From, exp.Join)): 750 generate_series = ( 751 exp.select("value::date") 752 .from_(exp.Table(this=generate_series).as_("_t", table=["value"])) 753 .subquery(expression.args.get("alias") or "_unnested_generate_series") 754 ) 755 return self.sql(generate_series) 756 757 from sqlglot.optimizer.annotate_types import annotate_types 758 759 this = annotate_types(arg, dialect=self.dialect) 760 if this.is_type("array<json>"): 761 while isinstance(this, exp.Cast): 762 this = this.this 763 764 arg_as_json = self.sql(exp.cast(this, exp.DataType.Type.JSON)) 765 alias = self.sql(expression, "alias") 766 alias = f" AS {alias}" if alias else "" 767 768 if expression.args.get("offset"): 769 self.unsupported("Unsupported JSON_ARRAY_ELEMENTS with offset") 770 771 return f"JSON_ARRAY_ELEMENTS({arg_as_json}){alias}" 772 773 return super().unnest_sql(expression) 774 775 def bracket_sql(self, expression: exp.Bracket) -> str: 776 """Forms like ARRAY[1, 2, 3][3] aren't allowed; we need to wrap the ARRAY.""" 777 if isinstance(expression.this, exp.Array): 778 expression.set("this", exp.paren(expression.this, copy=False)) 779 780 return super().bracket_sql(expression) 781 782 def matchagainst_sql(self, expression: exp.MatchAgainst) -> str: 783 this = self.sql(expression, "this") 784 expressions = [f"{self.sql(e)} @@ {this}" for e in expression.expressions] 785 sql = " OR ".join(expressions) 786 return f"({sql})" if len(expressions) > 1 else sql 787 788 def alterset_sql(self, expression: exp.AlterSet) -> str: 789 exprs = self.expressions(expression, flat=True) 790 exprs = f"({exprs})" if exprs else "" 791 792 access_method = self.sql(expression, "access_method") 793 access_method = f"ACCESS METHOD {access_method}" if access_method else "" 794 tablespace = self.sql(expression, "tablespace") 795 tablespace = f"TABLESPACE {tablespace}" if tablespace else "" 796 option = self.sql(expression, "option") 797 798 return f"SET {exprs}{access_method}{tablespace}{option}" 799 800 def datatype_sql(self, expression: exp.DataType) -> str: 801 if expression.is_type(exp.DataType.Type.ARRAY): 802 if expression.expressions: 803 values = self.expressions(expression, key="values", flat=True) 804 return f"{self.expressions(expression, flat=True)}[{values}]" 805 return "ARRAY" 806 807 if ( 808 expression.is_type(exp.DataType.Type.DOUBLE, exp.DataType.Type.FLOAT) 809 and expression.expressions 810 ): 811 # Postgres doesn't support precision for REAL and DOUBLE PRECISION types 812 return f"FLOAT({self.expressions(expression, flat=True)})" 813 814 return super().datatype_sql(expression) 815 816 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 817 this = expression.this 818 819 # Postgres casts DIV() to decimal for transpilation but when roundtripping it's superfluous 820 if isinstance(this, exp.IntDiv) and expression.to == exp.DataType.build("decimal"): 821 return self.sql(this) 822 823 return super().cast_sql(expression, safe_prefix=safe_prefix) 824 825 def array_sql(self, expression: exp.Array) -> str: 826 exprs = expression.expressions 827 func_name = self.normalize_func("ARRAY") 828 829 if isinstance(seq_get(exprs, 0), exp.Select): 830 return f"{func_name}({self.sql(exprs[0])})" 831 832 return f"{func_name}{inline_array_sql(self, expression)}" 833 834 def computedcolumnconstraint_sql(self, expression: exp.ComputedColumnConstraint) -> str: 835 return f"GENERATED ALWAYS AS ({self.sql(expression, 'this')}) STORED" 836 837 def isascii_sql(self, expression: exp.IsAscii) -> str: 838 return f"({self.sql(expression.this)} ~ '^[[:ascii:]]*$')" 839 840 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 841 # https://www.postgresql.org/docs/current/functions-window.html 842 self.unsupported("PostgreSQL does not support IGNORE NULLS.") 843 return self.sql(expression.this) 844 845 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 846 # https://www.postgresql.org/docs/current/functions-window.html 847 self.unsupported("PostgreSQL does not support RESPECT NULLS.") 848 return self.sql(expression.this) 849 850 @unsupported_args("this") 851 def currentschema_sql(self, expression: exp.CurrentSchema) -> str: 852 return "CURRENT_SCHEMA" 853 854 def interval_sql(self, expression: exp.Interval) -> str: 855 unit = expression.text("unit").lower() 856 857 if unit.startswith("quarter") and isinstance(expression.this, exp.Literal): 858 expression.this.replace(exp.Literal.number(int(expression.this.to_py()) * 3)) 859 expression.args["unit"].replace(exp.var("MONTH")) 860 861 return super().interval_sql(expression) 862 863 def placeholder_sql(self, expression: exp.Placeholder) -> str: 864 if expression.args.get("jdbc"): 865 return "?" 866 867 this = f"({expression.name})" if expression.this else "" 868 return f"{self.NAMED_PLACEHOLDER_TOKEN}{this}s" 869 870 def arraycontains_sql(self, expression: exp.ArrayContains) -> str: 871 # Convert DuckDB's LIST_CONTAINS(array, value) to PostgreSQL 872 # DuckDB behavior: 873 # - LIST_CONTAINS([1,2,3], 2) -> true 874 # - LIST_CONTAINS([1,2,3], 4) -> false 875 # - LIST_CONTAINS([1,2,NULL], 4) -> false (not NULL) 876 # - LIST_CONTAINS([1,2,3], NULL) -> NULL 877 # 878 # PostgreSQL equivalent: CASE WHEN value IS NULL THEN NULL 879 # ELSE COALESCE(value = ANY(array), FALSE) END 880 value = expression.expression 881 array = expression.this 882 883 coalesce_expr = exp.Coalesce( 884 this=value.eq(exp.Any(this=exp.paren(expression=array, copy=False))), 885 expressions=[exp.false()], 886 ) 887 888 case_expr = ( 889 exp.Case() 890 .when(exp.Is(this=value, expression=exp.null()), exp.null(), copy=False) 891 .else_(coalesce_expr, copy=False) 892 ) 893 894 return self.sql(case_expr)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True: Always quote except for specials cases. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHEREclause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
744 def unnest_sql(self, expression: exp.Unnest) -> str: 745 if len(expression.expressions) == 1: 746 arg = expression.expressions[0] 747 if isinstance(arg, exp.GenerateDateArray): 748 generate_series: exp.Expression = exp.GenerateSeries(**arg.args) 749 if isinstance(expression.parent, (exp.From, exp.Join)): 750 generate_series = ( 751 exp.select("value::date") 752 .from_(exp.Table(this=generate_series).as_("_t", table=["value"])) 753 .subquery(expression.args.get("alias") or "_unnested_generate_series") 754 ) 755 return self.sql(generate_series) 756 757 from sqlglot.optimizer.annotate_types import annotate_types 758 759 this = annotate_types(arg, dialect=self.dialect) 760 if this.is_type("array<json>"): 761 while isinstance(this, exp.Cast): 762 this = this.this 763 764 arg_as_json = self.sql(exp.cast(this, exp.DataType.Type.JSON)) 765 alias = self.sql(expression, "alias") 766 alias = f" AS {alias}" if alias else "" 767 768 if expression.args.get("offset"): 769 self.unsupported("Unsupported JSON_ARRAY_ELEMENTS with offset") 770 771 return f"JSON_ARRAY_ELEMENTS({arg_as_json}){alias}" 772 773 return super().unnest_sql(expression)
775 def bracket_sql(self, expression: exp.Bracket) -> str: 776 """Forms like ARRAY[1, 2, 3][3] aren't allowed; we need to wrap the ARRAY.""" 777 if isinstance(expression.this, exp.Array): 778 expression.set("this", exp.paren(expression.this, copy=False)) 779 780 return super().bracket_sql(expression)
Forms like ARRAY[1, 2, 3][3] aren't allowed; we need to wrap the ARRAY.
788 def alterset_sql(self, expression: exp.AlterSet) -> str: 789 exprs = self.expressions(expression, flat=True) 790 exprs = f"({exprs})" if exprs else "" 791 792 access_method = self.sql(expression, "access_method") 793 access_method = f"ACCESS METHOD {access_method}" if access_method else "" 794 tablespace = self.sql(expression, "tablespace") 795 tablespace = f"TABLESPACE {tablespace}" if tablespace else "" 796 option = self.sql(expression, "option") 797 798 return f"SET {exprs}{access_method}{tablespace}{option}"
800 def datatype_sql(self, expression: exp.DataType) -> str: 801 if expression.is_type(exp.DataType.Type.ARRAY): 802 if expression.expressions: 803 values = self.expressions(expression, key="values", flat=True) 804 return f"{self.expressions(expression, flat=True)}[{values}]" 805 return "ARRAY" 806 807 if ( 808 expression.is_type(exp.DataType.Type.DOUBLE, exp.DataType.Type.FLOAT) 809 and expression.expressions 810 ): 811 # Postgres doesn't support precision for REAL and DOUBLE PRECISION types 812 return f"FLOAT({self.expressions(expression, flat=True)})" 813 814 return super().datatype_sql(expression)
816 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 817 this = expression.this 818 819 # Postgres casts DIV() to decimal for transpilation but when roundtripping it's superfluous 820 if isinstance(this, exp.IntDiv) and expression.to == exp.DataType.build("decimal"): 821 return self.sql(this) 822 823 return super().cast_sql(expression, safe_prefix=safe_prefix)
825 def array_sql(self, expression: exp.Array) -> str: 826 exprs = expression.expressions 827 func_name = self.normalize_func("ARRAY") 828 829 if isinstance(seq_get(exprs, 0), exp.Select): 830 return f"{func_name}({self.sql(exprs[0])})" 831 832 return f"{func_name}{inline_array_sql(self, expression)}"
854 def interval_sql(self, expression: exp.Interval) -> str: 855 unit = expression.text("unit").lower() 856 857 if unit.startswith("quarter") and isinstance(expression.this, exp.Literal): 858 expression.this.replace(exp.Literal.number(int(expression.this.to_py()) * 3)) 859 expression.args["unit"].replace(exp.var("MONTH")) 860 861 return super().interval_sql(expression)
870 def arraycontains_sql(self, expression: exp.ArrayContains) -> str: 871 # Convert DuckDB's LIST_CONTAINS(array, value) to PostgreSQL 872 # DuckDB behavior: 873 # - LIST_CONTAINS([1,2,3], 2) -> true 874 # - LIST_CONTAINS([1,2,3], 4) -> false 875 # - LIST_CONTAINS([1,2,NULL], 4) -> false (not NULL) 876 # - LIST_CONTAINS([1,2,3], NULL) -> NULL 877 # 878 # PostgreSQL equivalent: CASE WHEN value IS NULL THEN NULL 879 # ELSE COALESCE(value = ANY(array), FALSE) END 880 value = expression.expression 881 array = expression.this 882 883 coalesce_expr = exp.Coalesce( 884 this=value.eq(exp.Any(this=exp.paren(expression=array, copy=False))), 885 expressions=[exp.false()], 886 ) 887 888 case_expr = ( 889 exp.Case() 890 .when(exp.Is(this=value, expression=exp.null()), exp.null(), copy=False) 891 .else_(coalesce_expr, copy=False) 892 ) 893 894 return self.sql(case_expr)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_CREATE_TABLE_LIKE
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SUPPORTS_TO_NUMBER
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- UNICODE_SUBSTITUTE
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_EXPLODING_PROJECTIONS
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- NORMALIZE_EXTRACT_DATE_PARTS
- PARSE_JSON_NAME
- ARRAY_SIZE_NAME
- ALTER_SET_TYPE
- SUPPORTS_LIKE_QUANTIFIERS
- MATCH_AGAINST_TABLE_PREFIX
- SET_ASSIGNMENT_REQUIRES_VARIABLE_KEYWORD
- UPDATE_STATEMENT_SUPPORTS_FROM
- UNSUPPORTED_TYPES
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS
- SAFE_JSON_PATH_KEY_RE
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- pseudocolumn_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- inoutcolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- queryband_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- booland_sql
- boolor_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- formatphrase_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- strtotime_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alter_sql
- altersession_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- is_sql
- like_sql
- ilike_sql
- match_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- sub_sql
- trycast_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- generateembedding_sql
- mltranslate_sql
- mlforecast_sql
- featuresattime_sql
- vectorsearch_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- slice_sql
- apply_sql
- grant_sql
- revoke_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- install_sql
- get_put_sql
- translatecharacters_sql
- decodecase_sql
- semanticview_sql
- getextract_sql
- datefromunixdate_sql
- space_sql
- buildproperty_sql
- refreshtriggerproperty_sql
- modelattribute_sql
- directorystage_sql
- uuid_sql
- initcap_sql
- localtime_sql
- localtimestamp_sql
- weekstart_sql
- chr_sql