sqlglot.dialects.presto
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 bool_xor_sql, 11 bracket_to_element_at_sql, 12 build_replace_with_optional_replacement, 13 date_trunc_to_time, 14 datestrtodate_sql, 15 encode_decode_sql, 16 build_formatted_time, 17 if_sql, 18 left_to_substring_sql, 19 no_ilike_sql, 20 no_pivot_sql, 21 no_timestamp_sql, 22 regexp_extract_sql, 23 rename_func, 24 right_to_substring_sql, 25 sha256_sql, 26 strposition_sql, 27 struct_extract_sql, 28 timestamptrunc_sql, 29 timestrtotime_sql, 30 ts_or_ds_add_cast, 31 unit_to_str, 32 sequence_sql, 33 build_regexp_extract, 34 explode_to_unnest_sql, 35 sha2_digest_sql, 36) 37from sqlglot.dialects.hive import Hive 38from sqlglot.dialects.mysql import MySQL 39from sqlglot.helper import seq_get 40from sqlglot.optimizer.scope import find_all_in_scope 41from sqlglot.tokens import TokenType 42from sqlglot.transforms import unqualify_columns 43from sqlglot.generator import unsupported_args 44from sqlglot.typing.presto import EXPRESSION_METADATA 45 46DATE_ADD_OR_SUB = t.Union[exp.DateAdd, exp.TimestampAdd, exp.DateSub] 47 48 49def _initcap_sql(self: Presto.Generator, expression: exp.Initcap) -> str: 50 delimiters = expression.expression 51 if delimiters and not ( 52 delimiters.is_string and delimiters.this == self.dialect.INITCAP_DEFAULT_DELIMITER_CHARS 53 ): 54 self.unsupported("INITCAP does not support custom delimiters") 55 56 regex = r"(\w)(\w*)" 57 return f"REGEXP_REPLACE({self.sql(expression, 'this')}, '{regex}', x -> UPPER(x[1]) || LOWER(x[2]))" 58 59 60def _no_sort_array(self: Presto.Generator, expression: exp.SortArray) -> str: 61 if expression.args.get("asc") == exp.false(): 62 comparator = "(a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END" 63 else: 64 comparator = None 65 return self.func("ARRAY_SORT", expression.this, comparator) 66 67 68def _schema_sql(self: Presto.Generator, expression: exp.Schema) -> str: 69 if isinstance(expression.parent, exp.PartitionedByProperty): 70 # Any columns in the ARRAY[] string literals should not be quoted 71 expression.transform(lambda n: n.name if isinstance(n, exp.Identifier) else n, copy=False) 72 73 partition_exprs = [ 74 self.sql(c) if isinstance(c, (exp.Func, exp.Property)) else self.sql(c, "this") 75 for c in expression.expressions 76 ] 77 return self.sql(exp.Array(expressions=[exp.Literal.string(c) for c in partition_exprs])) 78 79 if expression.parent: 80 for schema in expression.parent.find_all(exp.Schema): 81 if schema is expression: 82 continue 83 84 column_defs = schema.find_all(exp.ColumnDef) 85 if column_defs and isinstance(schema.parent, exp.Property): 86 expression.expressions.extend(column_defs) 87 88 return self.schema_sql(expression) 89 90 91def _quantile_sql(self: Presto.Generator, expression: exp.Quantile) -> str: 92 self.unsupported("Presto does not support exact quantiles") 93 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 94 95 96def _str_to_time_sql( 97 self: Presto.Generator, expression: exp.StrToDate | exp.StrToTime | exp.TsOrDsToDate 98) -> str: 99 return self.func("DATE_PARSE", expression.this, self.format_time(expression)) 100 101 102def _ts_or_ds_to_date_sql(self: Presto.Generator, expression: exp.TsOrDsToDate) -> str: 103 time_format = self.format_time(expression) 104 if time_format and time_format not in (Presto.TIME_FORMAT, Presto.DATE_FORMAT): 105 return self.sql(exp.cast(_str_to_time_sql(self, expression), exp.DataType.Type.DATE)) 106 return self.sql( 107 exp.cast(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP), exp.DataType.Type.DATE) 108 ) 109 110 111def _ts_or_ds_add_sql(self: Presto.Generator, expression: exp.TsOrDsAdd) -> str: 112 expression = ts_or_ds_add_cast(expression) 113 unit = unit_to_str(expression) 114 return self.func("DATE_ADD", unit, expression.expression, expression.this) 115 116 117def _ts_or_ds_diff_sql(self: Presto.Generator, expression: exp.TsOrDsDiff) -> str: 118 this = exp.cast(expression.this, exp.DataType.Type.TIMESTAMP) 119 expr = exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP) 120 unit = unit_to_str(expression) 121 return self.func("DATE_DIFF", unit, expr, this) 122 123 124def _build_approx_percentile(args: t.List) -> exp.Expression: 125 if len(args) == 4: 126 return exp.ApproxQuantile( 127 this=seq_get(args, 0), 128 weight=seq_get(args, 1), 129 quantile=seq_get(args, 2), 130 accuracy=seq_get(args, 3), 131 ) 132 if len(args) == 3: 133 return exp.ApproxQuantile( 134 this=seq_get(args, 0), quantile=seq_get(args, 1), accuracy=seq_get(args, 2) 135 ) 136 return exp.ApproxQuantile.from_arg_list(args) 137 138 139def _build_from_unixtime(args: t.List) -> exp.Expression: 140 if len(args) == 3: 141 return exp.UnixToTime( 142 this=seq_get(args, 0), 143 hours=seq_get(args, 1), 144 minutes=seq_get(args, 2), 145 ) 146 if len(args) == 2: 147 return exp.UnixToTime(this=seq_get(args, 0), zone=seq_get(args, 1)) 148 149 return exp.UnixToTime.from_arg_list(args) 150 151 152def _first_last_sql(self: Presto.Generator, expression: exp.Func) -> str: 153 """ 154 Trino doesn't support FIRST / LAST as functions, but they're valid in the context 155 of MATCH_RECOGNIZE, so we need to preserve them in that case. In all other cases 156 they're converted into an ARBITRARY call. 157 158 Reference: https://trino.io/docs/current/sql/match-recognize.html#logical-navigation-functions 159 """ 160 if isinstance(expression.find_ancestor(exp.MatchRecognize, exp.Select), exp.MatchRecognize): 161 return self.function_fallback_sql(expression) 162 163 return rename_func("ARBITRARY")(self, expression) 164 165 166def _unix_to_time_sql(self: Presto.Generator, expression: exp.UnixToTime) -> str: 167 scale = expression.args.get("scale") 168 timestamp = self.sql(expression, "this") 169 if scale in (None, exp.UnixToTime.SECONDS): 170 return rename_func("FROM_UNIXTIME")(self, expression) 171 172 return f"FROM_UNIXTIME(CAST({timestamp} AS DOUBLE) / POW(10, {scale}))" 173 174 175def _to_int(self: Presto.Generator, expression: exp.Expression) -> exp.Expression: 176 if not expression.type: 177 from sqlglot.optimizer.annotate_types import annotate_types 178 179 annotate_types(expression, dialect=self.dialect) 180 if expression.type and expression.type.this not in exp.DataType.INTEGER_TYPES: 181 return exp.cast(expression, to=exp.DataType.Type.BIGINT) 182 return expression 183 184 185def _build_to_char(args: t.List) -> exp.TimeToStr: 186 fmt = seq_get(args, 1) 187 if isinstance(fmt, exp.Literal): 188 # We uppercase this to match Teradata's format mapping keys 189 fmt.set("this", fmt.this.upper()) 190 191 # We use "teradata" on purpose here, because the time formats are different in Presto. 192 # See https://prestodb.io/docs/current/functions/teradata.html?highlight=to_char#to_char 193 return build_formatted_time(exp.TimeToStr, "teradata")(args) 194 195 196def _date_delta_sql( 197 name: str, negate_interval: bool = False 198) -> t.Callable[[Presto.Generator, DATE_ADD_OR_SUB], str]: 199 def _delta_sql(self: Presto.Generator, expression: DATE_ADD_OR_SUB) -> str: 200 interval = _to_int(self, expression.expression) 201 return self.func( 202 name, 203 unit_to_str(expression), 204 interval * (-1) if negate_interval else interval, 205 expression.this, 206 ) 207 208 return _delta_sql 209 210 211def _explode_to_unnest_sql(self: Presto.Generator, expression: exp.Lateral) -> str: 212 explode = expression.this 213 if isinstance(explode, exp.Explode): 214 exploded_type = explode.this.type 215 alias = expression.args.get("alias") 216 217 # This attempts a best-effort transpilation of LATERAL VIEW EXPLODE on a struct array 218 if ( 219 isinstance(alias, exp.TableAlias) 220 and isinstance(exploded_type, exp.DataType) 221 and exploded_type.is_type(exp.DataType.Type.ARRAY) 222 and exploded_type.expressions 223 and exploded_type.expressions[0].is_type(exp.DataType.Type.STRUCT) 224 ): 225 # When unnesting a ROW in Presto, it produces N columns, so we need to fix the alias 226 alias.set("columns", [c.this.copy() for c in exploded_type.expressions[0].expressions]) 227 elif isinstance(explode, exp.Inline): 228 explode.replace(exp.Explode(this=explode.this.copy())) 229 230 return explode_to_unnest_sql(self, expression) 231 232 233def amend_exploded_column_table(expression: exp.Expression) -> exp.Expression: 234 # We check for expression.type because the columns can be amended only if types were inferred 235 if isinstance(expression, exp.Select) and expression.type: 236 for lateral in expression.args.get("laterals") or []: 237 alias = lateral.args.get("alias") 238 if ( 239 not isinstance(lateral.this, exp.Explode) 240 or not isinstance(alias, exp.TableAlias) 241 or len(alias.columns) != 1 242 ): 243 continue 244 245 new_table = alias.this 246 old_table = alias.columns[0].name.lower() 247 248 # When transpiling a LATERAL VIEW EXPLODE Spark query, the exploded fields may be qualified 249 # with the struct column, resulting in invalid Presto references that need to be amended 250 for column in find_all_in_scope(expression, exp.Column): 251 if column.db.lower() == old_table: 252 column.set("table", column.args["db"].pop()) 253 elif column.table.lower() == old_table: 254 column.set("table", new_table.copy()) 255 elif column.name.lower() == old_table and isinstance(column.parent, exp.Dot): 256 column.parent.replace(exp.column(column.parent.expression, table=new_table)) 257 258 return expression 259 260 261class Presto(Dialect): 262 INDEX_OFFSET = 1 263 NULL_ORDERING = "nulls_are_last" 264 TIME_FORMAT = MySQL.TIME_FORMAT 265 STRICT_STRING_CONCAT = True 266 SUPPORTS_SEMI_ANTI_JOIN = False 267 TYPED_DIVISION = True 268 TABLESAMPLE_SIZE_IS_PERCENT = True 269 LOG_BASE_FIRST: t.Optional[bool] = None 270 SUPPORTS_VALUES_DEFAULT = False 271 LEAST_GREATEST_IGNORES_NULLS = False 272 273 TIME_MAPPING = MySQL.TIME_MAPPING 274 275 # https://github.com/trinodb/trino/issues/17 276 # https://github.com/trinodb/trino/issues/12289 277 # https://github.com/prestodb/presto/issues/2863 278 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 279 280 EXPRESSION_METADATA = EXPRESSION_METADATA.copy() 281 282 SUPPORTED_SETTINGS = { 283 *Dialect.SUPPORTED_SETTINGS, 284 "variant_extract_is_json_extract", 285 } 286 287 class Tokenizer(tokens.Tokenizer): 288 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 289 UNICODE_STRINGS = [ 290 (prefix + q, q) 291 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 292 for prefix in ("U&", "u&") 293 ] 294 295 NESTED_COMMENTS = False 296 297 KEYWORDS = { 298 **tokens.Tokenizer.KEYWORDS, 299 "DEALLOCATE PREPARE": TokenType.COMMAND, 300 "DESCRIBE INPUT": TokenType.COMMAND, 301 "DESCRIBE OUTPUT": TokenType.COMMAND, 302 "RESET SESSION": TokenType.COMMAND, 303 "START": TokenType.BEGIN, 304 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 305 "ROW": TokenType.STRUCT, 306 "IPADDRESS": TokenType.IPADDRESS, 307 "IPPREFIX": TokenType.IPPREFIX, 308 "TDIGEST": TokenType.TDIGEST, 309 "HYPERLOGLOG": TokenType.HLLSKETCH, 310 } 311 KEYWORDS.pop("/*+") 312 KEYWORDS.pop("QUALIFY") 313 314 class Parser(parser.Parser): 315 VALUES_FOLLOWED_BY_PAREN = False 316 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = True 317 318 FUNCTIONS = { 319 **parser.Parser.FUNCTIONS, 320 "ARBITRARY": exp.AnyValue.from_arg_list, 321 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 322 "APPROX_PERCENTILE": _build_approx_percentile, 323 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 324 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 325 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 326 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 327 "CARDINALITY": exp.ArraySize.from_arg_list, 328 "CONTAINS": exp.ArrayContains.from_arg_list, 329 "DATE_ADD": lambda args: exp.DateAdd( 330 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 331 ), 332 "DATE_DIFF": lambda args: exp.DateDiff( 333 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 334 ), 335 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 336 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 337 "DATE_TRUNC": date_trunc_to_time, 338 "DAY_OF_WEEK": exp.DayOfWeekIso.from_arg_list, 339 "DOW": exp.DayOfWeekIso.from_arg_list, 340 "DOY": exp.DayOfYear.from_arg_list, 341 "ELEMENT_AT": lambda args: exp.Bracket( 342 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 343 ), 344 "FROM_HEX": exp.Unhex.from_arg_list, 345 "FROM_UNIXTIME": _build_from_unixtime, 346 "FROM_UTF8": lambda args: exp.Decode( 347 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 348 ), 349 "JSON_FORMAT": lambda args: exp.JSONFormat( 350 this=seq_get(args, 0), options=seq_get(args, 1), is_json=True 351 ), 352 "LEVENSHTEIN_DISTANCE": exp.Levenshtein.from_arg_list, 353 "NOW": exp.CurrentTimestamp.from_arg_list, 354 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 355 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 356 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 357 this=seq_get(args, 0), 358 expression=seq_get(args, 1), 359 replacement=seq_get(args, 2) or exp.Literal.string(""), 360 ), 361 "REPLACE": build_replace_with_optional_replacement, 362 "ROW": exp.Struct.from_arg_list, 363 "SEQUENCE": exp.GenerateSeries.from_arg_list, 364 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 365 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 366 "STRPOS": lambda args: exp.StrPosition( 367 this=seq_get(args, 0), substr=seq_get(args, 1), occurrence=seq_get(args, 2) 368 ), 369 "SLICE": exp.ArraySlice.from_arg_list, 370 "TO_CHAR": _build_to_char, 371 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 372 "TO_UTF8": lambda args: exp.Encode( 373 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 374 ), 375 "MD5": exp.MD5Digest.from_arg_list, 376 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 377 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 378 "WEEK": exp.WeekOfYear.from_arg_list, 379 } 380 381 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 382 FUNCTION_PARSERS.pop("TRIM") 383 384 class Generator(generator.Generator): 385 INTERVAL_ALLOWS_PLURAL_FORM = False 386 JOIN_HINTS = False 387 TABLE_HINTS = False 388 QUERY_HINTS = False 389 IS_BOOL_ALLOWED = False 390 TZ_TO_WITH_TIME_ZONE = True 391 NVL2_SUPPORTED = False 392 STRUCT_DELIMITER = ("(", ")") 393 LIMIT_ONLY_LITERALS = True 394 SUPPORTS_SINGLE_ARG_CONCAT = False 395 LIKE_PROPERTY_INSIDE_SCHEMA = True 396 MULTI_ARG_DISTINCT = False 397 SUPPORTS_TO_NUMBER = False 398 HEX_FUNC = "TO_HEX" 399 PARSE_JSON_NAME = "JSON_PARSE" 400 PAD_FILL_PATTERN_IS_REQUIRED = True 401 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 402 SUPPORTS_MEDIAN = False 403 ARRAY_SIZE_NAME = "CARDINALITY" 404 405 PROPERTIES_LOCATION = { 406 **generator.Generator.PROPERTIES_LOCATION, 407 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 408 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 409 } 410 411 TYPE_MAPPING = { 412 **generator.Generator.TYPE_MAPPING, 413 exp.DataType.Type.BINARY: "VARBINARY", 414 exp.DataType.Type.BIT: "BOOLEAN", 415 exp.DataType.Type.DATETIME: "TIMESTAMP", 416 exp.DataType.Type.DATETIME64: "TIMESTAMP", 417 exp.DataType.Type.FLOAT: "REAL", 418 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 419 exp.DataType.Type.INT: "INTEGER", 420 exp.DataType.Type.STRUCT: "ROW", 421 exp.DataType.Type.TEXT: "VARCHAR", 422 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 423 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 424 exp.DataType.Type.TIMETZ: "TIME", 425 } 426 427 TRANSFORMS = { 428 **generator.Generator.TRANSFORMS, 429 exp.AnyValue: rename_func("ARBITRARY"), 430 exp.ApproxQuantile: lambda self, e: self.func( 431 "APPROX_PERCENTILE", 432 e.this, 433 e.args.get("weight"), 434 e.args.get("quantile"), 435 e.args.get("accuracy"), 436 ), 437 exp.ArgMax: rename_func("MAX_BY"), 438 exp.ArgMin: rename_func("MIN_BY"), 439 exp.Array: transforms.preprocess( 440 [transforms.inherit_struct_field_names], 441 generator=lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 442 ), 443 exp.ArrayAny: rename_func("ANY_MATCH"), 444 exp.ArrayConcat: rename_func("CONCAT"), 445 exp.ArrayContains: rename_func("CONTAINS"), 446 exp.ArrayToString: rename_func("ARRAY_JOIN"), 447 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 448 exp.ArraySlice: rename_func("SLICE"), 449 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 450 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 451 exp.BitwiseLeftShift: lambda self, e: self.func( 452 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 453 ), 454 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 455 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 456 exp.BitwiseRightShift: lambda self, e: self.func( 457 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 458 ), 459 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 460 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 461 exp.CurrentTime: lambda *_: "CURRENT_TIME", 462 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 463 exp.CurrentUser: lambda *_: "CURRENT_USER", 464 exp.DateAdd: _date_delta_sql("DATE_ADD"), 465 exp.DateDiff: lambda self, e: self.func( 466 "DATE_DIFF", unit_to_str(e), e.expression, e.this 467 ), 468 exp.DateStrToDate: datestrtodate_sql, 469 exp.DateToDi: lambda self, 470 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 471 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 472 exp.DayOfWeek: lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)", 473 exp.DayOfWeekIso: rename_func("DAY_OF_WEEK"), 474 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 475 exp.DiToDate: lambda self, 476 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 477 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 478 exp.FileFormatProperty: lambda self, 479 e: f"format={self.sql(exp.Literal.string(e.name))}", 480 exp.First: _first_last_sql, 481 exp.FromTimeZone: lambda self, 482 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 483 exp.GenerateSeries: sequence_sql, 484 exp.GenerateDateArray: sequence_sql, 485 exp.If: if_sql(), 486 exp.ILike: no_ilike_sql, 487 exp.Initcap: _initcap_sql, 488 exp.Last: _first_last_sql, 489 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 490 exp.Lateral: _explode_to_unnest_sql, 491 exp.Left: left_to_substring_sql, 492 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 493 rename_func("LEVENSHTEIN_DISTANCE") 494 ), 495 exp.LogicalAnd: rename_func("BOOL_AND"), 496 exp.LogicalOr: rename_func("BOOL_OR"), 497 exp.Pivot: no_pivot_sql, 498 exp.Quantile: _quantile_sql, 499 exp.RegexpExtract: regexp_extract_sql, 500 exp.RegexpExtractAll: regexp_extract_sql, 501 exp.Right: right_to_substring_sql, 502 exp.Schema: _schema_sql, 503 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 504 exp.Select: transforms.preprocess( 505 [ 506 transforms.eliminate_window_clause, 507 transforms.eliminate_qualify, 508 transforms.eliminate_distinct_on, 509 transforms.explode_projection_to_unnest(1), 510 transforms.eliminate_semi_and_anti_joins, 511 amend_exploded_column_table, 512 ] 513 ), 514 exp.SortArray: _no_sort_array, 515 exp.StrPosition: lambda self, e: strposition_sql(self, e, supports_occurrence=True), 516 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 517 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 518 exp.StrToTime: _str_to_time_sql, 519 exp.StructExtract: struct_extract_sql, 520 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 521 exp.Timestamp: no_timestamp_sql, 522 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 523 exp.TimestampTrunc: timestamptrunc_sql(), 524 exp.TimeStrToDate: timestrtotime_sql, 525 exp.TimeStrToTime: timestrtotime_sql, 526 exp.TimeStrToUnix: lambda self, e: self.func( 527 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 528 ), 529 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 530 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 531 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 532 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 533 exp.TsOrDiToDi: lambda self, 534 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 535 exp.TsOrDsAdd: _ts_or_ds_add_sql, 536 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 537 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 538 exp.Unhex: rename_func("FROM_HEX"), 539 exp.UnixToStr: lambda self, 540 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 541 exp.UnixToTime: _unix_to_time_sql, 542 exp.UnixToTimeStr: lambda self, 543 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 544 exp.VariancePop: rename_func("VAR_POP"), 545 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 546 exp.WithinGroup: transforms.preprocess( 547 [transforms.remove_within_group_for_percentiles] 548 ), 549 # Note: Presto's TRUNCATE always returns DOUBLE, even with decimals=0, whereas 550 # most dialects return INT (SQLite also returns REAL, see sqlite.py). This creates 551 # a bidirectional transpilation gap: Presto→Other may change float division to int 552 # division, and vice versa. Modeling precisely would require exp.FloatTrunc or 553 # similar, deemed overengineering for this subtle semantic difference. 554 exp.Trunc: rename_func("TRUNCATE"), 555 exp.Xor: bool_xor_sql, 556 exp.MD5Digest: rename_func("MD5"), 557 exp.SHA: rename_func("SHA1"), 558 exp.SHA1Digest: rename_func("SHA1"), 559 exp.SHA2: sha256_sql, 560 exp.SHA2Digest: sha2_digest_sql, 561 } 562 563 RESERVED_KEYWORDS = { 564 "alter", 565 "and", 566 "as", 567 "between", 568 "by", 569 "case", 570 "cast", 571 "constraint", 572 "create", 573 "cross", 574 "current_time", 575 "current_timestamp", 576 "deallocate", 577 "delete", 578 "describe", 579 "distinct", 580 "drop", 581 "else", 582 "end", 583 "escape", 584 "except", 585 "execute", 586 "exists", 587 "extract", 588 "false", 589 "for", 590 "from", 591 "full", 592 "group", 593 "having", 594 "in", 595 "inner", 596 "insert", 597 "intersect", 598 "into", 599 "is", 600 "join", 601 "left", 602 "like", 603 "natural", 604 "not", 605 "null", 606 "on", 607 "or", 608 "order", 609 "outer", 610 "prepare", 611 "right", 612 "select", 613 "table", 614 "then", 615 "true", 616 "union", 617 "using", 618 "values", 619 "when", 620 "where", 621 "with", 622 } 623 624 def extract_sql(self, expression: exp.Extract) -> str: 625 date_part = expression.name 626 627 if not date_part.startswith("EPOCH"): 628 return super().extract_sql(expression) 629 630 if date_part == "EPOCH_MILLISECOND": 631 scale = 10**3 632 elif date_part == "EPOCH_MICROSECOND": 633 scale = 10**6 634 elif date_part == "EPOCH_NANOSECOND": 635 scale = 10**9 636 else: 637 scale = None 638 639 value = expression.expression 640 641 ts = exp.cast(value, to=exp.DataType.build("TIMESTAMP")) 642 to_unix: exp.Expression = exp.TimeToUnix(this=ts) 643 644 if scale: 645 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 646 647 return self.sql(to_unix) 648 649 def jsonformat_sql(self, expression: exp.JSONFormat) -> str: 650 this = expression.this 651 is_json = expression.args.get("is_json") 652 653 if this and not (is_json or this.type): 654 from sqlglot.optimizer.annotate_types import annotate_types 655 656 this = annotate_types(this, dialect=self.dialect) 657 658 if not (is_json or this.is_type(exp.DataType.Type.JSON)): 659 this.replace(exp.cast(this, exp.DataType.Type.JSON)) 660 661 return self.function_fallback_sql(expression) 662 663 def md5_sql(self, expression: exp.MD5) -> str: 664 this = expression.this 665 666 if not this.type: 667 from sqlglot.optimizer.annotate_types import annotate_types 668 669 this = annotate_types(this, dialect=self.dialect) 670 671 if this.is_type(*exp.DataType.TEXT_TYPES): 672 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 673 674 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 675 676 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 677 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 678 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 679 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 680 # which seems to be using the same time mapping as Hive, as per: 681 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 682 this = expression.this 683 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 684 value_as_timestamp = ( 685 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 686 ) 687 688 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 689 690 formatted_value = self.func( 691 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 692 ) 693 parse_with_tz = self.func( 694 "PARSE_DATETIME", 695 formatted_value, 696 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 697 ) 698 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 699 return self.func("TO_UNIXTIME", coalesced) 700 701 def bracket_sql(self, expression: exp.Bracket) -> str: 702 if expression.args.get("safe"): 703 return bracket_to_element_at_sql(self, expression) 704 return super().bracket_sql(expression) 705 706 def struct_sql(self, expression: exp.Struct) -> str: 707 if not expression.type: 708 from sqlglot.optimizer.annotate_types import annotate_types 709 710 annotate_types(expression, dialect=self.dialect) 711 712 values: t.List[str] = [] 713 schema: t.List[str] = [] 714 unknown_type = False 715 716 for e in expression.expressions: 717 if isinstance(e, exp.PropertyEQ): 718 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 719 unknown_type = True 720 else: 721 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 722 values.append(self.sql(e, "expression")) 723 else: 724 values.append(self.sql(e)) 725 726 size = len(expression.expressions) 727 728 if not size or len(schema) != size: 729 if unknown_type: 730 self.unsupported( 731 "Cannot convert untyped key-value definitions (try annotate_types)." 732 ) 733 return self.func("ROW", *values) 734 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 735 736 def interval_sql(self, expression: exp.Interval) -> str: 737 if expression.this and expression.text("unit").upper().startswith("WEEK"): 738 return f"({expression.this.name} * INTERVAL '7' DAY)" 739 return super().interval_sql(expression) 740 741 def transaction_sql(self, expression: exp.Transaction) -> str: 742 modes = expression.args.get("modes") 743 modes = f" {', '.join(modes)}" if modes else "" 744 return f"START TRANSACTION{modes}" 745 746 def offset_limit_modifiers( 747 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 748 ) -> t.List[str]: 749 return [ 750 self.sql(expression, "offset"), 751 self.sql(limit), 752 ] 753 754 def create_sql(self, expression: exp.Create) -> str: 755 """ 756 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 757 so we need to remove them 758 """ 759 kind = expression.args["kind"] 760 schema = expression.this 761 if kind == "VIEW" and schema.expressions: 762 expression.this.set("expressions", None) 763 return super().create_sql(expression) 764 765 def delete_sql(self, expression: exp.Delete) -> str: 766 """ 767 Presto only supports DELETE FROM for a single table without an alias, so we need 768 to remove the unnecessary parts. If the original DELETE statement contains more 769 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 770 """ 771 tables = expression.args.get("tables") or [expression.this] 772 if len(tables) > 1: 773 return super().delete_sql(expression) 774 775 table = tables[0] 776 expression.set("this", table) 777 expression.set("tables", None) 778 779 if isinstance(table, exp.Table): 780 table_alias = table.args.get("alias") 781 if table_alias: 782 table_alias.pop() 783 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 784 785 return super().delete_sql(expression) 786 787 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 788 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 789 790 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 791 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 792 if not expression.args.get("variant_extract") or is_json_extract: 793 return self.func( 794 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 795 ) 796 797 this = self.sql(expression, "this") 798 799 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 800 segments = [] 801 for path_key in expression.expression.expressions[1:]: 802 if not isinstance(path_key, exp.JSONPathKey): 803 # Cannot transpile subscripts, wildcards etc to dot notation 804 self.unsupported( 805 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 806 ) 807 continue 808 key = path_key.this 809 if not exp.SAFE_IDENTIFIER_RE.match(key): 810 key = f'"{key}"' 811 segments.append(f".{key}") 812 813 expr = "".join(segments) 814 815 return f"{this}{expr}" 816 817 def groupconcat_sql(self, expression: exp.GroupConcat) -> str: 818 return self.func( 819 "ARRAY_JOIN", 820 self.func("ARRAY_AGG", expression.this), 821 expression.args.get("separator"), 822 )
234def amend_exploded_column_table(expression: exp.Expression) -> exp.Expression: 235 # We check for expression.type because the columns can be amended only if types were inferred 236 if isinstance(expression, exp.Select) and expression.type: 237 for lateral in expression.args.get("laterals") or []: 238 alias = lateral.args.get("alias") 239 if ( 240 not isinstance(lateral.this, exp.Explode) 241 or not isinstance(alias, exp.TableAlias) 242 or len(alias.columns) != 1 243 ): 244 continue 245 246 new_table = alias.this 247 old_table = alias.columns[0].name.lower() 248 249 # When transpiling a LATERAL VIEW EXPLODE Spark query, the exploded fields may be qualified 250 # with the struct column, resulting in invalid Presto references that need to be amended 251 for column in find_all_in_scope(expression, exp.Column): 252 if column.db.lower() == old_table: 253 column.set("table", column.args["db"].pop()) 254 elif column.table.lower() == old_table: 255 column.set("table", new_table.copy()) 256 elif column.name.lower() == old_table and isinstance(column.parent, exp.Dot): 257 column.parent.replace(exp.column(column.parent.expression, table=new_table)) 258 259 return expression
262class Presto(Dialect): 263 INDEX_OFFSET = 1 264 NULL_ORDERING = "nulls_are_last" 265 TIME_FORMAT = MySQL.TIME_FORMAT 266 STRICT_STRING_CONCAT = True 267 SUPPORTS_SEMI_ANTI_JOIN = False 268 TYPED_DIVISION = True 269 TABLESAMPLE_SIZE_IS_PERCENT = True 270 LOG_BASE_FIRST: t.Optional[bool] = None 271 SUPPORTS_VALUES_DEFAULT = False 272 LEAST_GREATEST_IGNORES_NULLS = False 273 274 TIME_MAPPING = MySQL.TIME_MAPPING 275 276 # https://github.com/trinodb/trino/issues/17 277 # https://github.com/trinodb/trino/issues/12289 278 # https://github.com/prestodb/presto/issues/2863 279 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 280 281 EXPRESSION_METADATA = EXPRESSION_METADATA.copy() 282 283 SUPPORTED_SETTINGS = { 284 *Dialect.SUPPORTED_SETTINGS, 285 "variant_extract_is_json_extract", 286 } 287 288 class Tokenizer(tokens.Tokenizer): 289 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 290 UNICODE_STRINGS = [ 291 (prefix + q, q) 292 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 293 for prefix in ("U&", "u&") 294 ] 295 296 NESTED_COMMENTS = False 297 298 KEYWORDS = { 299 **tokens.Tokenizer.KEYWORDS, 300 "DEALLOCATE PREPARE": TokenType.COMMAND, 301 "DESCRIBE INPUT": TokenType.COMMAND, 302 "DESCRIBE OUTPUT": TokenType.COMMAND, 303 "RESET SESSION": TokenType.COMMAND, 304 "START": TokenType.BEGIN, 305 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 306 "ROW": TokenType.STRUCT, 307 "IPADDRESS": TokenType.IPADDRESS, 308 "IPPREFIX": TokenType.IPPREFIX, 309 "TDIGEST": TokenType.TDIGEST, 310 "HYPERLOGLOG": TokenType.HLLSKETCH, 311 } 312 KEYWORDS.pop("/*+") 313 KEYWORDS.pop("QUALIFY") 314 315 class Parser(parser.Parser): 316 VALUES_FOLLOWED_BY_PAREN = False 317 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = True 318 319 FUNCTIONS = { 320 **parser.Parser.FUNCTIONS, 321 "ARBITRARY": exp.AnyValue.from_arg_list, 322 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 323 "APPROX_PERCENTILE": _build_approx_percentile, 324 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 325 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 326 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 327 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 328 "CARDINALITY": exp.ArraySize.from_arg_list, 329 "CONTAINS": exp.ArrayContains.from_arg_list, 330 "DATE_ADD": lambda args: exp.DateAdd( 331 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 332 ), 333 "DATE_DIFF": lambda args: exp.DateDiff( 334 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 335 ), 336 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 337 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 338 "DATE_TRUNC": date_trunc_to_time, 339 "DAY_OF_WEEK": exp.DayOfWeekIso.from_arg_list, 340 "DOW": exp.DayOfWeekIso.from_arg_list, 341 "DOY": exp.DayOfYear.from_arg_list, 342 "ELEMENT_AT": lambda args: exp.Bracket( 343 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 344 ), 345 "FROM_HEX": exp.Unhex.from_arg_list, 346 "FROM_UNIXTIME": _build_from_unixtime, 347 "FROM_UTF8": lambda args: exp.Decode( 348 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 349 ), 350 "JSON_FORMAT": lambda args: exp.JSONFormat( 351 this=seq_get(args, 0), options=seq_get(args, 1), is_json=True 352 ), 353 "LEVENSHTEIN_DISTANCE": exp.Levenshtein.from_arg_list, 354 "NOW": exp.CurrentTimestamp.from_arg_list, 355 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 356 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 357 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 358 this=seq_get(args, 0), 359 expression=seq_get(args, 1), 360 replacement=seq_get(args, 2) or exp.Literal.string(""), 361 ), 362 "REPLACE": build_replace_with_optional_replacement, 363 "ROW": exp.Struct.from_arg_list, 364 "SEQUENCE": exp.GenerateSeries.from_arg_list, 365 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 366 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 367 "STRPOS": lambda args: exp.StrPosition( 368 this=seq_get(args, 0), substr=seq_get(args, 1), occurrence=seq_get(args, 2) 369 ), 370 "SLICE": exp.ArraySlice.from_arg_list, 371 "TO_CHAR": _build_to_char, 372 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 373 "TO_UTF8": lambda args: exp.Encode( 374 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 375 ), 376 "MD5": exp.MD5Digest.from_arg_list, 377 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 378 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 379 "WEEK": exp.WeekOfYear.from_arg_list, 380 } 381 382 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 383 FUNCTION_PARSERS.pop("TRIM") 384 385 class Generator(generator.Generator): 386 INTERVAL_ALLOWS_PLURAL_FORM = False 387 JOIN_HINTS = False 388 TABLE_HINTS = False 389 QUERY_HINTS = False 390 IS_BOOL_ALLOWED = False 391 TZ_TO_WITH_TIME_ZONE = True 392 NVL2_SUPPORTED = False 393 STRUCT_DELIMITER = ("(", ")") 394 LIMIT_ONLY_LITERALS = True 395 SUPPORTS_SINGLE_ARG_CONCAT = False 396 LIKE_PROPERTY_INSIDE_SCHEMA = True 397 MULTI_ARG_DISTINCT = False 398 SUPPORTS_TO_NUMBER = False 399 HEX_FUNC = "TO_HEX" 400 PARSE_JSON_NAME = "JSON_PARSE" 401 PAD_FILL_PATTERN_IS_REQUIRED = True 402 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 403 SUPPORTS_MEDIAN = False 404 ARRAY_SIZE_NAME = "CARDINALITY" 405 406 PROPERTIES_LOCATION = { 407 **generator.Generator.PROPERTIES_LOCATION, 408 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 409 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 410 } 411 412 TYPE_MAPPING = { 413 **generator.Generator.TYPE_MAPPING, 414 exp.DataType.Type.BINARY: "VARBINARY", 415 exp.DataType.Type.BIT: "BOOLEAN", 416 exp.DataType.Type.DATETIME: "TIMESTAMP", 417 exp.DataType.Type.DATETIME64: "TIMESTAMP", 418 exp.DataType.Type.FLOAT: "REAL", 419 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 420 exp.DataType.Type.INT: "INTEGER", 421 exp.DataType.Type.STRUCT: "ROW", 422 exp.DataType.Type.TEXT: "VARCHAR", 423 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 424 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 425 exp.DataType.Type.TIMETZ: "TIME", 426 } 427 428 TRANSFORMS = { 429 **generator.Generator.TRANSFORMS, 430 exp.AnyValue: rename_func("ARBITRARY"), 431 exp.ApproxQuantile: lambda self, e: self.func( 432 "APPROX_PERCENTILE", 433 e.this, 434 e.args.get("weight"), 435 e.args.get("quantile"), 436 e.args.get("accuracy"), 437 ), 438 exp.ArgMax: rename_func("MAX_BY"), 439 exp.ArgMin: rename_func("MIN_BY"), 440 exp.Array: transforms.preprocess( 441 [transforms.inherit_struct_field_names], 442 generator=lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 443 ), 444 exp.ArrayAny: rename_func("ANY_MATCH"), 445 exp.ArrayConcat: rename_func("CONCAT"), 446 exp.ArrayContains: rename_func("CONTAINS"), 447 exp.ArrayToString: rename_func("ARRAY_JOIN"), 448 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 449 exp.ArraySlice: rename_func("SLICE"), 450 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 451 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 452 exp.BitwiseLeftShift: lambda self, e: self.func( 453 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 454 ), 455 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 456 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 457 exp.BitwiseRightShift: lambda self, e: self.func( 458 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 459 ), 460 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 461 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 462 exp.CurrentTime: lambda *_: "CURRENT_TIME", 463 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 464 exp.CurrentUser: lambda *_: "CURRENT_USER", 465 exp.DateAdd: _date_delta_sql("DATE_ADD"), 466 exp.DateDiff: lambda self, e: self.func( 467 "DATE_DIFF", unit_to_str(e), e.expression, e.this 468 ), 469 exp.DateStrToDate: datestrtodate_sql, 470 exp.DateToDi: lambda self, 471 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 472 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 473 exp.DayOfWeek: lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)", 474 exp.DayOfWeekIso: rename_func("DAY_OF_WEEK"), 475 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 476 exp.DiToDate: lambda self, 477 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 478 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 479 exp.FileFormatProperty: lambda self, 480 e: f"format={self.sql(exp.Literal.string(e.name))}", 481 exp.First: _first_last_sql, 482 exp.FromTimeZone: lambda self, 483 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 484 exp.GenerateSeries: sequence_sql, 485 exp.GenerateDateArray: sequence_sql, 486 exp.If: if_sql(), 487 exp.ILike: no_ilike_sql, 488 exp.Initcap: _initcap_sql, 489 exp.Last: _first_last_sql, 490 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 491 exp.Lateral: _explode_to_unnest_sql, 492 exp.Left: left_to_substring_sql, 493 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 494 rename_func("LEVENSHTEIN_DISTANCE") 495 ), 496 exp.LogicalAnd: rename_func("BOOL_AND"), 497 exp.LogicalOr: rename_func("BOOL_OR"), 498 exp.Pivot: no_pivot_sql, 499 exp.Quantile: _quantile_sql, 500 exp.RegexpExtract: regexp_extract_sql, 501 exp.RegexpExtractAll: regexp_extract_sql, 502 exp.Right: right_to_substring_sql, 503 exp.Schema: _schema_sql, 504 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 505 exp.Select: transforms.preprocess( 506 [ 507 transforms.eliminate_window_clause, 508 transforms.eliminate_qualify, 509 transforms.eliminate_distinct_on, 510 transforms.explode_projection_to_unnest(1), 511 transforms.eliminate_semi_and_anti_joins, 512 amend_exploded_column_table, 513 ] 514 ), 515 exp.SortArray: _no_sort_array, 516 exp.StrPosition: lambda self, e: strposition_sql(self, e, supports_occurrence=True), 517 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 518 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 519 exp.StrToTime: _str_to_time_sql, 520 exp.StructExtract: struct_extract_sql, 521 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 522 exp.Timestamp: no_timestamp_sql, 523 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 524 exp.TimestampTrunc: timestamptrunc_sql(), 525 exp.TimeStrToDate: timestrtotime_sql, 526 exp.TimeStrToTime: timestrtotime_sql, 527 exp.TimeStrToUnix: lambda self, e: self.func( 528 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 529 ), 530 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 531 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 532 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 533 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 534 exp.TsOrDiToDi: lambda self, 535 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 536 exp.TsOrDsAdd: _ts_or_ds_add_sql, 537 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 538 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 539 exp.Unhex: rename_func("FROM_HEX"), 540 exp.UnixToStr: lambda self, 541 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 542 exp.UnixToTime: _unix_to_time_sql, 543 exp.UnixToTimeStr: lambda self, 544 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 545 exp.VariancePop: rename_func("VAR_POP"), 546 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 547 exp.WithinGroup: transforms.preprocess( 548 [transforms.remove_within_group_for_percentiles] 549 ), 550 # Note: Presto's TRUNCATE always returns DOUBLE, even with decimals=0, whereas 551 # most dialects return INT (SQLite also returns REAL, see sqlite.py). This creates 552 # a bidirectional transpilation gap: Presto→Other may change float division to int 553 # division, and vice versa. Modeling precisely would require exp.FloatTrunc or 554 # similar, deemed overengineering for this subtle semantic difference. 555 exp.Trunc: rename_func("TRUNCATE"), 556 exp.Xor: bool_xor_sql, 557 exp.MD5Digest: rename_func("MD5"), 558 exp.SHA: rename_func("SHA1"), 559 exp.SHA1Digest: rename_func("SHA1"), 560 exp.SHA2: sha256_sql, 561 exp.SHA2Digest: sha2_digest_sql, 562 } 563 564 RESERVED_KEYWORDS = { 565 "alter", 566 "and", 567 "as", 568 "between", 569 "by", 570 "case", 571 "cast", 572 "constraint", 573 "create", 574 "cross", 575 "current_time", 576 "current_timestamp", 577 "deallocate", 578 "delete", 579 "describe", 580 "distinct", 581 "drop", 582 "else", 583 "end", 584 "escape", 585 "except", 586 "execute", 587 "exists", 588 "extract", 589 "false", 590 "for", 591 "from", 592 "full", 593 "group", 594 "having", 595 "in", 596 "inner", 597 "insert", 598 "intersect", 599 "into", 600 "is", 601 "join", 602 "left", 603 "like", 604 "natural", 605 "not", 606 "null", 607 "on", 608 "or", 609 "order", 610 "outer", 611 "prepare", 612 "right", 613 "select", 614 "table", 615 "then", 616 "true", 617 "union", 618 "using", 619 "values", 620 "when", 621 "where", 622 "with", 623 } 624 625 def extract_sql(self, expression: exp.Extract) -> str: 626 date_part = expression.name 627 628 if not date_part.startswith("EPOCH"): 629 return super().extract_sql(expression) 630 631 if date_part == "EPOCH_MILLISECOND": 632 scale = 10**3 633 elif date_part == "EPOCH_MICROSECOND": 634 scale = 10**6 635 elif date_part == "EPOCH_NANOSECOND": 636 scale = 10**9 637 else: 638 scale = None 639 640 value = expression.expression 641 642 ts = exp.cast(value, to=exp.DataType.build("TIMESTAMP")) 643 to_unix: exp.Expression = exp.TimeToUnix(this=ts) 644 645 if scale: 646 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 647 648 return self.sql(to_unix) 649 650 def jsonformat_sql(self, expression: exp.JSONFormat) -> str: 651 this = expression.this 652 is_json = expression.args.get("is_json") 653 654 if this and not (is_json or this.type): 655 from sqlglot.optimizer.annotate_types import annotate_types 656 657 this = annotate_types(this, dialect=self.dialect) 658 659 if not (is_json or this.is_type(exp.DataType.Type.JSON)): 660 this.replace(exp.cast(this, exp.DataType.Type.JSON)) 661 662 return self.function_fallback_sql(expression) 663 664 def md5_sql(self, expression: exp.MD5) -> str: 665 this = expression.this 666 667 if not this.type: 668 from sqlglot.optimizer.annotate_types import annotate_types 669 670 this = annotate_types(this, dialect=self.dialect) 671 672 if this.is_type(*exp.DataType.TEXT_TYPES): 673 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 674 675 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 676 677 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 678 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 679 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 680 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 681 # which seems to be using the same time mapping as Hive, as per: 682 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 683 this = expression.this 684 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 685 value_as_timestamp = ( 686 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 687 ) 688 689 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 690 691 formatted_value = self.func( 692 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 693 ) 694 parse_with_tz = self.func( 695 "PARSE_DATETIME", 696 formatted_value, 697 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 698 ) 699 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 700 return self.func("TO_UNIXTIME", coalesced) 701 702 def bracket_sql(self, expression: exp.Bracket) -> str: 703 if expression.args.get("safe"): 704 return bracket_to_element_at_sql(self, expression) 705 return super().bracket_sql(expression) 706 707 def struct_sql(self, expression: exp.Struct) -> str: 708 if not expression.type: 709 from sqlglot.optimizer.annotate_types import annotate_types 710 711 annotate_types(expression, dialect=self.dialect) 712 713 values: t.List[str] = [] 714 schema: t.List[str] = [] 715 unknown_type = False 716 717 for e in expression.expressions: 718 if isinstance(e, exp.PropertyEQ): 719 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 720 unknown_type = True 721 else: 722 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 723 values.append(self.sql(e, "expression")) 724 else: 725 values.append(self.sql(e)) 726 727 size = len(expression.expressions) 728 729 if not size or len(schema) != size: 730 if unknown_type: 731 self.unsupported( 732 "Cannot convert untyped key-value definitions (try annotate_types)." 733 ) 734 return self.func("ROW", *values) 735 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 736 737 def interval_sql(self, expression: exp.Interval) -> str: 738 if expression.this and expression.text("unit").upper().startswith("WEEK"): 739 return f"({expression.this.name} * INTERVAL '7' DAY)" 740 return super().interval_sql(expression) 741 742 def transaction_sql(self, expression: exp.Transaction) -> str: 743 modes = expression.args.get("modes") 744 modes = f" {', '.join(modes)}" if modes else "" 745 return f"START TRANSACTION{modes}" 746 747 def offset_limit_modifiers( 748 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 749 ) -> t.List[str]: 750 return [ 751 self.sql(expression, "offset"), 752 self.sql(limit), 753 ] 754 755 def create_sql(self, expression: exp.Create) -> str: 756 """ 757 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 758 so we need to remove them 759 """ 760 kind = expression.args["kind"] 761 schema = expression.this 762 if kind == "VIEW" and schema.expressions: 763 expression.this.set("expressions", None) 764 return super().create_sql(expression) 765 766 def delete_sql(self, expression: exp.Delete) -> str: 767 """ 768 Presto only supports DELETE FROM for a single table without an alias, so we need 769 to remove the unnecessary parts. If the original DELETE statement contains more 770 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 771 """ 772 tables = expression.args.get("tables") or [expression.this] 773 if len(tables) > 1: 774 return super().delete_sql(expression) 775 776 table = tables[0] 777 expression.set("this", table) 778 expression.set("tables", None) 779 780 if isinstance(table, exp.Table): 781 table_alias = table.args.get("alias") 782 if table_alias: 783 table_alias.pop() 784 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 785 786 return super().delete_sql(expression) 787 788 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 789 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 790 791 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 792 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 793 if not expression.args.get("variant_extract") or is_json_extract: 794 return self.func( 795 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 796 ) 797 798 this = self.sql(expression, "this") 799 800 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 801 segments = [] 802 for path_key in expression.expression.expressions[1:]: 803 if not isinstance(path_key, exp.JSONPathKey): 804 # Cannot transpile subscripts, wildcards etc to dot notation 805 self.unsupported( 806 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 807 ) 808 continue 809 key = path_key.this 810 if not exp.SAFE_IDENTIFIER_RE.match(key): 811 key = f'"{key}"' 812 segments.append(f".{key}") 813 814 expr = "".join(segments) 815 816 return f"{this}{expr}" 817 818 def groupconcat_sql(self, expression: exp.GroupConcat) -> str: 819 return self.func( 820 "ARRAY_JOIN", 821 self.func("ARRAY_AGG", expression.this), 822 expression.args.get("separator"), 823 )
Default NULL ordering method to use if not explicitly set.
Possible values: "nulls_are_small", "nulls_are_large", "nulls_are_last"
Whether the behavior of a / b depends on the types of a and b.
False means a / b is always float division.
True means a / b is integer division if both a and b are integers.
Whether the base comes first in the LOG function.
Possible values: True, False, None (two arguments are not supported by LOG)
Whether LEAST/GREATEST functions ignore NULL values, e.g:
- BigQuery, Snowflake, MySQL, Presto/Trino: LEAST(1, NULL, 2) -> NULL
- Spark, Postgres, DuckDB, TSQL: LEAST(1, NULL, 2) -> 1
Associates this dialect's time formats with their equivalent Python strftime formats.
Specifies the strategy according to which identifiers should be normalized.
288 class Tokenizer(tokens.Tokenizer): 289 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 290 UNICODE_STRINGS = [ 291 (prefix + q, q) 292 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 293 for prefix in ("U&", "u&") 294 ] 295 296 NESTED_COMMENTS = False 297 298 KEYWORDS = { 299 **tokens.Tokenizer.KEYWORDS, 300 "DEALLOCATE PREPARE": TokenType.COMMAND, 301 "DESCRIBE INPUT": TokenType.COMMAND, 302 "DESCRIBE OUTPUT": TokenType.COMMAND, 303 "RESET SESSION": TokenType.COMMAND, 304 "START": TokenType.BEGIN, 305 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 306 "ROW": TokenType.STRUCT, 307 "IPADDRESS": TokenType.IPADDRESS, 308 "IPPREFIX": TokenType.IPPREFIX, 309 "TDIGEST": TokenType.TDIGEST, 310 "HYPERLOGLOG": TokenType.HLLSKETCH, 311 } 312 KEYWORDS.pop("/*+") 313 KEYWORDS.pop("QUALIFY")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- BYTE_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- IDENTIFIERS
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- ESCAPE_FOLLOW_CHARS
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- tokenize
- sql
- size
- tokens
315 class Parser(parser.Parser): 316 VALUES_FOLLOWED_BY_PAREN = False 317 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = True 318 319 FUNCTIONS = { 320 **parser.Parser.FUNCTIONS, 321 "ARBITRARY": exp.AnyValue.from_arg_list, 322 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 323 "APPROX_PERCENTILE": _build_approx_percentile, 324 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 325 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 326 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 327 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 328 "CARDINALITY": exp.ArraySize.from_arg_list, 329 "CONTAINS": exp.ArrayContains.from_arg_list, 330 "DATE_ADD": lambda args: exp.DateAdd( 331 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 332 ), 333 "DATE_DIFF": lambda args: exp.DateDiff( 334 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 335 ), 336 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 337 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 338 "DATE_TRUNC": date_trunc_to_time, 339 "DAY_OF_WEEK": exp.DayOfWeekIso.from_arg_list, 340 "DOW": exp.DayOfWeekIso.from_arg_list, 341 "DOY": exp.DayOfYear.from_arg_list, 342 "ELEMENT_AT": lambda args: exp.Bracket( 343 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 344 ), 345 "FROM_HEX": exp.Unhex.from_arg_list, 346 "FROM_UNIXTIME": _build_from_unixtime, 347 "FROM_UTF8": lambda args: exp.Decode( 348 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 349 ), 350 "JSON_FORMAT": lambda args: exp.JSONFormat( 351 this=seq_get(args, 0), options=seq_get(args, 1), is_json=True 352 ), 353 "LEVENSHTEIN_DISTANCE": exp.Levenshtein.from_arg_list, 354 "NOW": exp.CurrentTimestamp.from_arg_list, 355 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 356 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 357 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 358 this=seq_get(args, 0), 359 expression=seq_get(args, 1), 360 replacement=seq_get(args, 2) or exp.Literal.string(""), 361 ), 362 "REPLACE": build_replace_with_optional_replacement, 363 "ROW": exp.Struct.from_arg_list, 364 "SEQUENCE": exp.GenerateSeries.from_arg_list, 365 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 366 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 367 "STRPOS": lambda args: exp.StrPosition( 368 this=seq_get(args, 0), substr=seq_get(args, 1), occurrence=seq_get(args, 2) 369 ), 370 "SLICE": exp.ArraySlice.from_arg_list, 371 "TO_CHAR": _build_to_char, 372 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 373 "TO_UTF8": lambda args: exp.Encode( 374 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 375 ), 376 "MD5": exp.MD5Digest.from_arg_list, 377 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 378 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 379 "WEEK": exp.WeekOfYear.from_arg_list, 380 } 381 382 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 383 FUNCTION_PARSERS.pop("TRIM")
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- TRIGGER_EVENTS
- ALTERABLES
- ALIAS_TOKENS
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- CAST_COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- QUERY_MODIFIER_TOKENS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- TRIGGER_TIMING
- TRIGGER_DEFERRABLE
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- SET_ASSIGNMENT_DELIMITERS
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- ALTER_TABLE_PARTITIONS
- JOINS_HAVE_EQUAL_PRECEDENCE
- MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS
- JSON_EXTRACT_REQUIRES_JSON_EXPRESSION
- ADD_JOIN_ON_TRUE
- SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT
- raise_error
- validate_expression
- reset
- errors
- error_level
- error_message_context
- max_errors
- dialect
- sql
- parse
- parse_into
- check_errors
- expression
- parse_set_operation
- build_cast
385 class Generator(generator.Generator): 386 INTERVAL_ALLOWS_PLURAL_FORM = False 387 JOIN_HINTS = False 388 TABLE_HINTS = False 389 QUERY_HINTS = False 390 IS_BOOL_ALLOWED = False 391 TZ_TO_WITH_TIME_ZONE = True 392 NVL2_SUPPORTED = False 393 STRUCT_DELIMITER = ("(", ")") 394 LIMIT_ONLY_LITERALS = True 395 SUPPORTS_SINGLE_ARG_CONCAT = False 396 LIKE_PROPERTY_INSIDE_SCHEMA = True 397 MULTI_ARG_DISTINCT = False 398 SUPPORTS_TO_NUMBER = False 399 HEX_FUNC = "TO_HEX" 400 PARSE_JSON_NAME = "JSON_PARSE" 401 PAD_FILL_PATTERN_IS_REQUIRED = True 402 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 403 SUPPORTS_MEDIAN = False 404 ARRAY_SIZE_NAME = "CARDINALITY" 405 406 PROPERTIES_LOCATION = { 407 **generator.Generator.PROPERTIES_LOCATION, 408 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 409 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 410 } 411 412 TYPE_MAPPING = { 413 **generator.Generator.TYPE_MAPPING, 414 exp.DataType.Type.BINARY: "VARBINARY", 415 exp.DataType.Type.BIT: "BOOLEAN", 416 exp.DataType.Type.DATETIME: "TIMESTAMP", 417 exp.DataType.Type.DATETIME64: "TIMESTAMP", 418 exp.DataType.Type.FLOAT: "REAL", 419 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 420 exp.DataType.Type.INT: "INTEGER", 421 exp.DataType.Type.STRUCT: "ROW", 422 exp.DataType.Type.TEXT: "VARCHAR", 423 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 424 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 425 exp.DataType.Type.TIMETZ: "TIME", 426 } 427 428 TRANSFORMS = { 429 **generator.Generator.TRANSFORMS, 430 exp.AnyValue: rename_func("ARBITRARY"), 431 exp.ApproxQuantile: lambda self, e: self.func( 432 "APPROX_PERCENTILE", 433 e.this, 434 e.args.get("weight"), 435 e.args.get("quantile"), 436 e.args.get("accuracy"), 437 ), 438 exp.ArgMax: rename_func("MAX_BY"), 439 exp.ArgMin: rename_func("MIN_BY"), 440 exp.Array: transforms.preprocess( 441 [transforms.inherit_struct_field_names], 442 generator=lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 443 ), 444 exp.ArrayAny: rename_func("ANY_MATCH"), 445 exp.ArrayConcat: rename_func("CONCAT"), 446 exp.ArrayContains: rename_func("CONTAINS"), 447 exp.ArrayToString: rename_func("ARRAY_JOIN"), 448 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 449 exp.ArraySlice: rename_func("SLICE"), 450 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 451 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 452 exp.BitwiseLeftShift: lambda self, e: self.func( 453 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 454 ), 455 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 456 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 457 exp.BitwiseRightShift: lambda self, e: self.func( 458 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 459 ), 460 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 461 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 462 exp.CurrentTime: lambda *_: "CURRENT_TIME", 463 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 464 exp.CurrentUser: lambda *_: "CURRENT_USER", 465 exp.DateAdd: _date_delta_sql("DATE_ADD"), 466 exp.DateDiff: lambda self, e: self.func( 467 "DATE_DIFF", unit_to_str(e), e.expression, e.this 468 ), 469 exp.DateStrToDate: datestrtodate_sql, 470 exp.DateToDi: lambda self, 471 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 472 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 473 exp.DayOfWeek: lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)", 474 exp.DayOfWeekIso: rename_func("DAY_OF_WEEK"), 475 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 476 exp.DiToDate: lambda self, 477 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 478 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 479 exp.FileFormatProperty: lambda self, 480 e: f"format={self.sql(exp.Literal.string(e.name))}", 481 exp.First: _first_last_sql, 482 exp.FromTimeZone: lambda self, 483 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 484 exp.GenerateSeries: sequence_sql, 485 exp.GenerateDateArray: sequence_sql, 486 exp.If: if_sql(), 487 exp.ILike: no_ilike_sql, 488 exp.Initcap: _initcap_sql, 489 exp.Last: _first_last_sql, 490 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 491 exp.Lateral: _explode_to_unnest_sql, 492 exp.Left: left_to_substring_sql, 493 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 494 rename_func("LEVENSHTEIN_DISTANCE") 495 ), 496 exp.LogicalAnd: rename_func("BOOL_AND"), 497 exp.LogicalOr: rename_func("BOOL_OR"), 498 exp.Pivot: no_pivot_sql, 499 exp.Quantile: _quantile_sql, 500 exp.RegexpExtract: regexp_extract_sql, 501 exp.RegexpExtractAll: regexp_extract_sql, 502 exp.Right: right_to_substring_sql, 503 exp.Schema: _schema_sql, 504 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 505 exp.Select: transforms.preprocess( 506 [ 507 transforms.eliminate_window_clause, 508 transforms.eliminate_qualify, 509 transforms.eliminate_distinct_on, 510 transforms.explode_projection_to_unnest(1), 511 transforms.eliminate_semi_and_anti_joins, 512 amend_exploded_column_table, 513 ] 514 ), 515 exp.SortArray: _no_sort_array, 516 exp.StrPosition: lambda self, e: strposition_sql(self, e, supports_occurrence=True), 517 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 518 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 519 exp.StrToTime: _str_to_time_sql, 520 exp.StructExtract: struct_extract_sql, 521 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 522 exp.Timestamp: no_timestamp_sql, 523 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 524 exp.TimestampTrunc: timestamptrunc_sql(), 525 exp.TimeStrToDate: timestrtotime_sql, 526 exp.TimeStrToTime: timestrtotime_sql, 527 exp.TimeStrToUnix: lambda self, e: self.func( 528 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 529 ), 530 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 531 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 532 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 533 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 534 exp.TsOrDiToDi: lambda self, 535 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 536 exp.TsOrDsAdd: _ts_or_ds_add_sql, 537 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 538 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 539 exp.Unhex: rename_func("FROM_HEX"), 540 exp.UnixToStr: lambda self, 541 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 542 exp.UnixToTime: _unix_to_time_sql, 543 exp.UnixToTimeStr: lambda self, 544 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 545 exp.VariancePop: rename_func("VAR_POP"), 546 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 547 exp.WithinGroup: transforms.preprocess( 548 [transforms.remove_within_group_for_percentiles] 549 ), 550 # Note: Presto's TRUNCATE always returns DOUBLE, even with decimals=0, whereas 551 # most dialects return INT (SQLite also returns REAL, see sqlite.py). This creates 552 # a bidirectional transpilation gap: Presto→Other may change float division to int 553 # division, and vice versa. Modeling precisely would require exp.FloatTrunc or 554 # similar, deemed overengineering for this subtle semantic difference. 555 exp.Trunc: rename_func("TRUNCATE"), 556 exp.Xor: bool_xor_sql, 557 exp.MD5Digest: rename_func("MD5"), 558 exp.SHA: rename_func("SHA1"), 559 exp.SHA1Digest: rename_func("SHA1"), 560 exp.SHA2: sha256_sql, 561 exp.SHA2Digest: sha2_digest_sql, 562 } 563 564 RESERVED_KEYWORDS = { 565 "alter", 566 "and", 567 "as", 568 "between", 569 "by", 570 "case", 571 "cast", 572 "constraint", 573 "create", 574 "cross", 575 "current_time", 576 "current_timestamp", 577 "deallocate", 578 "delete", 579 "describe", 580 "distinct", 581 "drop", 582 "else", 583 "end", 584 "escape", 585 "except", 586 "execute", 587 "exists", 588 "extract", 589 "false", 590 "for", 591 "from", 592 "full", 593 "group", 594 "having", 595 "in", 596 "inner", 597 "insert", 598 "intersect", 599 "into", 600 "is", 601 "join", 602 "left", 603 "like", 604 "natural", 605 "not", 606 "null", 607 "on", 608 "or", 609 "order", 610 "outer", 611 "prepare", 612 "right", 613 "select", 614 "table", 615 "then", 616 "true", 617 "union", 618 "using", 619 "values", 620 "when", 621 "where", 622 "with", 623 } 624 625 def extract_sql(self, expression: exp.Extract) -> str: 626 date_part = expression.name 627 628 if not date_part.startswith("EPOCH"): 629 return super().extract_sql(expression) 630 631 if date_part == "EPOCH_MILLISECOND": 632 scale = 10**3 633 elif date_part == "EPOCH_MICROSECOND": 634 scale = 10**6 635 elif date_part == "EPOCH_NANOSECOND": 636 scale = 10**9 637 else: 638 scale = None 639 640 value = expression.expression 641 642 ts = exp.cast(value, to=exp.DataType.build("TIMESTAMP")) 643 to_unix: exp.Expression = exp.TimeToUnix(this=ts) 644 645 if scale: 646 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 647 648 return self.sql(to_unix) 649 650 def jsonformat_sql(self, expression: exp.JSONFormat) -> str: 651 this = expression.this 652 is_json = expression.args.get("is_json") 653 654 if this and not (is_json or this.type): 655 from sqlglot.optimizer.annotate_types import annotate_types 656 657 this = annotate_types(this, dialect=self.dialect) 658 659 if not (is_json or this.is_type(exp.DataType.Type.JSON)): 660 this.replace(exp.cast(this, exp.DataType.Type.JSON)) 661 662 return self.function_fallback_sql(expression) 663 664 def md5_sql(self, expression: exp.MD5) -> str: 665 this = expression.this 666 667 if not this.type: 668 from sqlglot.optimizer.annotate_types import annotate_types 669 670 this = annotate_types(this, dialect=self.dialect) 671 672 if this.is_type(*exp.DataType.TEXT_TYPES): 673 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 674 675 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 676 677 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 678 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 679 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 680 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 681 # which seems to be using the same time mapping as Hive, as per: 682 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 683 this = expression.this 684 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 685 value_as_timestamp = ( 686 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 687 ) 688 689 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 690 691 formatted_value = self.func( 692 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 693 ) 694 parse_with_tz = self.func( 695 "PARSE_DATETIME", 696 formatted_value, 697 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 698 ) 699 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 700 return self.func("TO_UNIXTIME", coalesced) 701 702 def bracket_sql(self, expression: exp.Bracket) -> str: 703 if expression.args.get("safe"): 704 return bracket_to_element_at_sql(self, expression) 705 return super().bracket_sql(expression) 706 707 def struct_sql(self, expression: exp.Struct) -> str: 708 if not expression.type: 709 from sqlglot.optimizer.annotate_types import annotate_types 710 711 annotate_types(expression, dialect=self.dialect) 712 713 values: t.List[str] = [] 714 schema: t.List[str] = [] 715 unknown_type = False 716 717 for e in expression.expressions: 718 if isinstance(e, exp.PropertyEQ): 719 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 720 unknown_type = True 721 else: 722 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 723 values.append(self.sql(e, "expression")) 724 else: 725 values.append(self.sql(e)) 726 727 size = len(expression.expressions) 728 729 if not size or len(schema) != size: 730 if unknown_type: 731 self.unsupported( 732 "Cannot convert untyped key-value definitions (try annotate_types)." 733 ) 734 return self.func("ROW", *values) 735 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 736 737 def interval_sql(self, expression: exp.Interval) -> str: 738 if expression.this and expression.text("unit").upper().startswith("WEEK"): 739 return f"({expression.this.name} * INTERVAL '7' DAY)" 740 return super().interval_sql(expression) 741 742 def transaction_sql(self, expression: exp.Transaction) -> str: 743 modes = expression.args.get("modes") 744 modes = f" {', '.join(modes)}" if modes else "" 745 return f"START TRANSACTION{modes}" 746 747 def offset_limit_modifiers( 748 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 749 ) -> t.List[str]: 750 return [ 751 self.sql(expression, "offset"), 752 self.sql(limit), 753 ] 754 755 def create_sql(self, expression: exp.Create) -> str: 756 """ 757 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 758 so we need to remove them 759 """ 760 kind = expression.args["kind"] 761 schema = expression.this 762 if kind == "VIEW" and schema.expressions: 763 expression.this.set("expressions", None) 764 return super().create_sql(expression) 765 766 def delete_sql(self, expression: exp.Delete) -> str: 767 """ 768 Presto only supports DELETE FROM for a single table without an alias, so we need 769 to remove the unnecessary parts. If the original DELETE statement contains more 770 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 771 """ 772 tables = expression.args.get("tables") or [expression.this] 773 if len(tables) > 1: 774 return super().delete_sql(expression) 775 776 table = tables[0] 777 expression.set("this", table) 778 expression.set("tables", None) 779 780 if isinstance(table, exp.Table): 781 table_alias = table.args.get("alias") 782 if table_alias: 783 table_alias.pop() 784 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 785 786 return super().delete_sql(expression) 787 788 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 789 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 790 791 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 792 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 793 if not expression.args.get("variant_extract") or is_json_extract: 794 return self.func( 795 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 796 ) 797 798 this = self.sql(expression, "this") 799 800 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 801 segments = [] 802 for path_key in expression.expression.expressions[1:]: 803 if not isinstance(path_key, exp.JSONPathKey): 804 # Cannot transpile subscripts, wildcards etc to dot notation 805 self.unsupported( 806 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 807 ) 808 continue 809 key = path_key.this 810 if not exp.SAFE_IDENTIFIER_RE.match(key): 811 key = f'"{key}"' 812 segments.append(f".{key}") 813 814 expr = "".join(segments) 815 816 return f"{this}{expr}" 817 818 def groupconcat_sql(self, expression: exp.GroupConcat) -> str: 819 return self.func( 820 "ARRAY_JOIN", 821 self.func("ARRAY_AGG", expression.this), 822 expression.args.get("separator"), 823 )
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True: Always quote except for specials cases. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHEREclause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
625 def extract_sql(self, expression: exp.Extract) -> str: 626 date_part = expression.name 627 628 if not date_part.startswith("EPOCH"): 629 return super().extract_sql(expression) 630 631 if date_part == "EPOCH_MILLISECOND": 632 scale = 10**3 633 elif date_part == "EPOCH_MICROSECOND": 634 scale = 10**6 635 elif date_part == "EPOCH_NANOSECOND": 636 scale = 10**9 637 else: 638 scale = None 639 640 value = expression.expression 641 642 ts = exp.cast(value, to=exp.DataType.build("TIMESTAMP")) 643 to_unix: exp.Expression = exp.TimeToUnix(this=ts) 644 645 if scale: 646 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 647 648 return self.sql(to_unix)
650 def jsonformat_sql(self, expression: exp.JSONFormat) -> str: 651 this = expression.this 652 is_json = expression.args.get("is_json") 653 654 if this and not (is_json or this.type): 655 from sqlglot.optimizer.annotate_types import annotate_types 656 657 this = annotate_types(this, dialect=self.dialect) 658 659 if not (is_json or this.is_type(exp.DataType.Type.JSON)): 660 this.replace(exp.cast(this, exp.DataType.Type.JSON)) 661 662 return self.function_fallback_sql(expression)
664 def md5_sql(self, expression: exp.MD5) -> str: 665 this = expression.this 666 667 if not this.type: 668 from sqlglot.optimizer.annotate_types import annotate_types 669 670 this = annotate_types(this, dialect=self.dialect) 671 672 if this.is_type(*exp.DataType.TEXT_TYPES): 673 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 674 675 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this))))
677 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 678 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 679 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 680 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 681 # which seems to be using the same time mapping as Hive, as per: 682 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 683 this = expression.this 684 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 685 value_as_timestamp = ( 686 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 687 ) 688 689 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 690 691 formatted_value = self.func( 692 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 693 ) 694 parse_with_tz = self.func( 695 "PARSE_DATETIME", 696 formatted_value, 697 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 698 ) 699 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 700 return self.func("TO_UNIXTIME", coalesced)
707 def struct_sql(self, expression: exp.Struct) -> str: 708 if not expression.type: 709 from sqlglot.optimizer.annotate_types import annotate_types 710 711 annotate_types(expression, dialect=self.dialect) 712 713 values: t.List[str] = [] 714 schema: t.List[str] = [] 715 unknown_type = False 716 717 for e in expression.expressions: 718 if isinstance(e, exp.PropertyEQ): 719 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 720 unknown_type = True 721 else: 722 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 723 values.append(self.sql(e, "expression")) 724 else: 725 values.append(self.sql(e)) 726 727 size = len(expression.expressions) 728 729 if not size or len(schema) != size: 730 if unknown_type: 731 self.unsupported( 732 "Cannot convert untyped key-value definitions (try annotate_types)." 733 ) 734 return self.func("ROW", *values) 735 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))"
755 def create_sql(self, expression: exp.Create) -> str: 756 """ 757 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 758 so we need to remove them 759 """ 760 kind = expression.args["kind"] 761 schema = expression.this 762 if kind == "VIEW" and schema.expressions: 763 expression.this.set("expressions", None) 764 return super().create_sql(expression)
Presto doesn't support CREATE VIEW with expressions (ex: CREATE VIEW x (cola) then (cola) is the expression),
so we need to remove them
766 def delete_sql(self, expression: exp.Delete) -> str: 767 """ 768 Presto only supports DELETE FROM for a single table without an alias, so we need 769 to remove the unnecessary parts. If the original DELETE statement contains more 770 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 771 """ 772 tables = expression.args.get("tables") or [expression.this] 773 if len(tables) > 1: 774 return super().delete_sql(expression) 775 776 table = tables[0] 777 expression.set("this", table) 778 expression.set("tables", None) 779 780 if isinstance(table, exp.Table): 781 table_alias = table.args.get("alias") 782 if table_alias: 783 table_alias.pop() 784 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 785 786 return super().delete_sql(expression)
Presto only supports DELETE FROM for a single table without an alias, so we need to remove the unnecessary parts. If the original DELETE statement contains more than one table to be deleted, we can't safely map it 1-1 to a Presto statement.
788 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 789 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 790 791 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 792 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 793 if not expression.args.get("variant_extract") or is_json_extract: 794 return self.func( 795 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 796 ) 797 798 this = self.sql(expression, "this") 799 800 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 801 segments = [] 802 for path_key in expression.expression.expressions[1:]: 803 if not isinstance(path_key, exp.JSONPathKey): 804 # Cannot transpile subscripts, wildcards etc to dot notation 805 self.unsupported( 806 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 807 ) 808 continue 809 key = path_key.this 810 if not exp.SAFE_IDENTIFIER_RE.match(key): 811 key = f'"{key}"' 812 segments.append(f".{key}") 813 814 expr = "".join(segments) 815 816 return f"{this}{expr}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- INOUT_SEPARATOR
- DIRECTED_JOINS
- QUERY_HINT_SEP
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SUPPORTED_JSON_PATH_PARTS
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_WINDOW_EXCLUDE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- TRY_SUPPORTED
- SUPPORTS_UESCAPE
- UNICODE_SUBSTITUTE
- STAR_EXCEPT
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- NORMALIZE_EXTRACT_DATE_PARTS
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- SUPPORTS_BETWEEN_FLAGS
- SUPPORTS_LIKE_QUANTIFIERS
- MATCH_AGAINST_TABLE_PREFIX
- SET_ASSIGNMENT_REQUIRES_VARIABLE_KEYWORD
- DECLARE_DEFAULT_ASSIGNMENT
- UPDATE_STATEMENT_SUPPORTS_FROM
- STAR_EXCLUDE_REQUIRES_DERIVED_TABLE
- UNSUPPORTED_TYPES
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS
- SAFE_JSON_PATH_KEY_RE
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- pseudocolumn_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- inoutcolumnconstraint_sql
- createable_sql
- sequenceproperties_sql
- triggerproperties_sql
- triggerreferencing_sql
- triggerevent_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- rollupindex_sql
- rollupproperty_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- queryband_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- booland_sql
- boolor_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- formatphrase_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- strtotime_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- altersession_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- is_sql
- like_sql
- ilike_sql
- match_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- sub_sql
- trycast_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- generateembedding_sql
- mltranslate_sql
- mlforecast_sql
- featuresattime_sql
- vectorsearch_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- slice_sql
- apply_sql
- grant_sql
- revoke_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- install_sql
- get_put_sql
- translatecharacters_sql
- decodecase_sql
- semanticview_sql
- getextract_sql
- datefromunixdate_sql
- space_sql
- buildproperty_sql
- refreshtriggerproperty_sql
- modelattribute_sql
- directorystage_sql
- uuid_sql
- initcap_sql
- localtime_sql
- localtimestamp_sql
- weekstart_sql
- chr_sql
- block_sql
- storedprocedure_sql
- ifblock_sql
- whileblock_sql
- execute_sql
- executesql_sql