sqlglot.dialects.presto
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 bool_xor_sql, 11 build_replace_with_optional_replacement, 12 date_trunc_to_time, 13 datestrtodate_sql, 14 encode_decode_sql, 15 build_formatted_time, 16 if_sql, 17 left_to_substring_sql, 18 no_ilike_sql, 19 no_pivot_sql, 20 no_timestamp_sql, 21 regexp_extract_sql, 22 rename_func, 23 right_to_substring_sql, 24 sha256_sql, 25 strposition_sql, 26 struct_extract_sql, 27 timestamptrunc_sql, 28 timestrtotime_sql, 29 ts_or_ds_add_cast, 30 unit_to_str, 31 sequence_sql, 32 build_regexp_extract, 33 explode_to_unnest_sql, 34 sha2_digest_sql, 35) 36from sqlglot.dialects.hive import Hive 37from sqlglot.dialects.mysql import MySQL 38from sqlglot.helper import apply_index_offset, seq_get 39from sqlglot.optimizer.scope import find_all_in_scope 40from sqlglot.tokens import TokenType 41from sqlglot.transforms import unqualify_columns 42from sqlglot.generator import unsupported_args 43from sqlglot.typing.presto import EXPRESSION_METADATA 44 45DATE_ADD_OR_SUB = t.Union[exp.DateAdd, exp.TimestampAdd, exp.DateSub] 46 47 48def _initcap_sql(self: Presto.Generator, expression: exp.Initcap) -> str: 49 delimiters = expression.expression 50 if delimiters and not ( 51 delimiters.is_string and delimiters.this == self.dialect.INITCAP_DEFAULT_DELIMITER_CHARS 52 ): 53 self.unsupported("INITCAP does not support custom delimiters") 54 55 regex = r"(\w)(\w*)" 56 return f"REGEXP_REPLACE({self.sql(expression, 'this')}, '{regex}', x -> UPPER(x[1]) || LOWER(x[2]))" 57 58 59def _no_sort_array(self: Presto.Generator, expression: exp.SortArray) -> str: 60 if expression.args.get("asc") == exp.false(): 61 comparator = "(a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END" 62 else: 63 comparator = None 64 return self.func("ARRAY_SORT", expression.this, comparator) 65 66 67def _schema_sql(self: Presto.Generator, expression: exp.Schema) -> str: 68 if isinstance(expression.parent, exp.PartitionedByProperty): 69 # Any columns in the ARRAY[] string literals should not be quoted 70 expression.transform(lambda n: n.name if isinstance(n, exp.Identifier) else n, copy=False) 71 72 partition_exprs = [ 73 self.sql(c) if isinstance(c, (exp.Func, exp.Property)) else self.sql(c, "this") 74 for c in expression.expressions 75 ] 76 return self.sql(exp.Array(expressions=[exp.Literal.string(c) for c in partition_exprs])) 77 78 if expression.parent: 79 for schema in expression.parent.find_all(exp.Schema): 80 if schema is expression: 81 continue 82 83 column_defs = schema.find_all(exp.ColumnDef) 84 if column_defs and isinstance(schema.parent, exp.Property): 85 expression.expressions.extend(column_defs) 86 87 return self.schema_sql(expression) 88 89 90def _quantile_sql(self: Presto.Generator, expression: exp.Quantile) -> str: 91 self.unsupported("Presto does not support exact quantiles") 92 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 93 94 95def _str_to_time_sql( 96 self: Presto.Generator, expression: exp.StrToDate | exp.StrToTime | exp.TsOrDsToDate 97) -> str: 98 return self.func("DATE_PARSE", expression.this, self.format_time(expression)) 99 100 101def _ts_or_ds_to_date_sql(self: Presto.Generator, expression: exp.TsOrDsToDate) -> str: 102 time_format = self.format_time(expression) 103 if time_format and time_format not in (Presto.TIME_FORMAT, Presto.DATE_FORMAT): 104 return self.sql(exp.cast(_str_to_time_sql(self, expression), exp.DataType.Type.DATE)) 105 return self.sql( 106 exp.cast(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP), exp.DataType.Type.DATE) 107 ) 108 109 110def _ts_or_ds_add_sql(self: Presto.Generator, expression: exp.TsOrDsAdd) -> str: 111 expression = ts_or_ds_add_cast(expression) 112 unit = unit_to_str(expression) 113 return self.func("DATE_ADD", unit, expression.expression, expression.this) 114 115 116def _ts_or_ds_diff_sql(self: Presto.Generator, expression: exp.TsOrDsDiff) -> str: 117 this = exp.cast(expression.this, exp.DataType.Type.TIMESTAMP) 118 expr = exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP) 119 unit = unit_to_str(expression) 120 return self.func("DATE_DIFF", unit, expr, this) 121 122 123def _build_approx_percentile(args: t.List) -> exp.Expression: 124 if len(args) == 4: 125 return exp.ApproxQuantile( 126 this=seq_get(args, 0), 127 weight=seq_get(args, 1), 128 quantile=seq_get(args, 2), 129 accuracy=seq_get(args, 3), 130 ) 131 if len(args) == 3: 132 return exp.ApproxQuantile( 133 this=seq_get(args, 0), quantile=seq_get(args, 1), accuracy=seq_get(args, 2) 134 ) 135 return exp.ApproxQuantile.from_arg_list(args) 136 137 138def _build_from_unixtime(args: t.List) -> exp.Expression: 139 if len(args) == 3: 140 return exp.UnixToTime( 141 this=seq_get(args, 0), 142 hours=seq_get(args, 1), 143 minutes=seq_get(args, 2), 144 ) 145 if len(args) == 2: 146 return exp.UnixToTime(this=seq_get(args, 0), zone=seq_get(args, 1)) 147 148 return exp.UnixToTime.from_arg_list(args) 149 150 151def _first_last_sql(self: Presto.Generator, expression: exp.Func) -> str: 152 """ 153 Trino doesn't support FIRST / LAST as functions, but they're valid in the context 154 of MATCH_RECOGNIZE, so we need to preserve them in that case. In all other cases 155 they're converted into an ARBITRARY call. 156 157 Reference: https://trino.io/docs/current/sql/match-recognize.html#logical-navigation-functions 158 """ 159 if isinstance(expression.find_ancestor(exp.MatchRecognize, exp.Select), exp.MatchRecognize): 160 return self.function_fallback_sql(expression) 161 162 return rename_func("ARBITRARY")(self, expression) 163 164 165def _unix_to_time_sql(self: Presto.Generator, expression: exp.UnixToTime) -> str: 166 scale = expression.args.get("scale") 167 timestamp = self.sql(expression, "this") 168 if scale in (None, exp.UnixToTime.SECONDS): 169 return rename_func("FROM_UNIXTIME")(self, expression) 170 171 return f"FROM_UNIXTIME(CAST({timestamp} AS DOUBLE) / POW(10, {scale}))" 172 173 174def _to_int(self: Presto.Generator, expression: exp.Expression) -> exp.Expression: 175 if not expression.type: 176 from sqlglot.optimizer.annotate_types import annotate_types 177 178 annotate_types(expression, dialect=self.dialect) 179 if expression.type and expression.type.this not in exp.DataType.INTEGER_TYPES: 180 return exp.cast(expression, to=exp.DataType.Type.BIGINT) 181 return expression 182 183 184def _build_to_char(args: t.List) -> exp.TimeToStr: 185 fmt = seq_get(args, 1) 186 if isinstance(fmt, exp.Literal): 187 # We uppercase this to match Teradata's format mapping keys 188 fmt.set("this", fmt.this.upper()) 189 190 # We use "teradata" on purpose here, because the time formats are different in Presto. 191 # See https://prestodb.io/docs/current/functions/teradata.html?highlight=to_char#to_char 192 return build_formatted_time(exp.TimeToStr, "teradata")(args) 193 194 195def _date_delta_sql( 196 name: str, negate_interval: bool = False 197) -> t.Callable[[Presto.Generator, DATE_ADD_OR_SUB], str]: 198 def _delta_sql(self: Presto.Generator, expression: DATE_ADD_OR_SUB) -> str: 199 interval = _to_int(self, expression.expression) 200 return self.func( 201 name, 202 unit_to_str(expression), 203 interval * (-1) if negate_interval else interval, 204 expression.this, 205 ) 206 207 return _delta_sql 208 209 210def _explode_to_unnest_sql(self: Presto.Generator, expression: exp.Lateral) -> str: 211 explode = expression.this 212 if isinstance(explode, exp.Explode): 213 exploded_type = explode.this.type 214 alias = expression.args.get("alias") 215 216 # This attempts a best-effort transpilation of LATERAL VIEW EXPLODE on a struct array 217 if ( 218 isinstance(alias, exp.TableAlias) 219 and isinstance(exploded_type, exp.DataType) 220 and exploded_type.is_type(exp.DataType.Type.ARRAY) 221 and exploded_type.expressions 222 and exploded_type.expressions[0].is_type(exp.DataType.Type.STRUCT) 223 ): 224 # When unnesting a ROW in Presto, it produces N columns, so we need to fix the alias 225 alias.set("columns", [c.this.copy() for c in exploded_type.expressions[0].expressions]) 226 elif isinstance(explode, exp.Inline): 227 explode.replace(exp.Explode(this=explode.this.copy())) 228 229 return explode_to_unnest_sql(self, expression) 230 231 232def amend_exploded_column_table(expression: exp.Expression) -> exp.Expression: 233 # We check for expression.type because the columns can be amended only if types were inferred 234 if isinstance(expression, exp.Select) and expression.type: 235 for lateral in expression.args.get("laterals") or []: 236 alias = lateral.args.get("alias") 237 if ( 238 not isinstance(lateral.this, exp.Explode) 239 or not isinstance(alias, exp.TableAlias) 240 or len(alias.columns) != 1 241 ): 242 continue 243 244 new_table = alias.this 245 old_table = alias.columns[0].name.lower() 246 247 # When transpiling a LATERAL VIEW EXPLODE Spark query, the exploded fields may be qualified 248 # with the struct column, resulting in invalid Presto references that need to be amended 249 for column in find_all_in_scope(expression, exp.Column): 250 if column.db.lower() == old_table: 251 column.set("table", column.args["db"].pop()) 252 elif column.table.lower() == old_table: 253 column.set("table", new_table.copy()) 254 elif column.name.lower() == old_table and isinstance(column.parent, exp.Dot): 255 column.parent.replace(exp.column(column.parent.expression, table=new_table)) 256 257 return expression 258 259 260class Presto(Dialect): 261 INDEX_OFFSET = 1 262 NULL_ORDERING = "nulls_are_last" 263 TIME_FORMAT = MySQL.TIME_FORMAT 264 STRICT_STRING_CONCAT = True 265 SUPPORTS_SEMI_ANTI_JOIN = False 266 TYPED_DIVISION = True 267 TABLESAMPLE_SIZE_IS_PERCENT = True 268 LOG_BASE_FIRST: t.Optional[bool] = None 269 SUPPORTS_VALUES_DEFAULT = False 270 LEAST_GREATEST_IGNORES_NULLS = False 271 272 TIME_MAPPING = MySQL.TIME_MAPPING 273 274 # https://github.com/trinodb/trino/issues/17 275 # https://github.com/trinodb/trino/issues/12289 276 # https://github.com/prestodb/presto/issues/2863 277 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 278 279 EXPRESSION_METADATA = EXPRESSION_METADATA.copy() 280 281 SUPPORTED_SETTINGS = { 282 *Dialect.SUPPORTED_SETTINGS, 283 "variant_extract_is_json_extract", 284 } 285 286 class Tokenizer(tokens.Tokenizer): 287 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 288 UNICODE_STRINGS = [ 289 (prefix + q, q) 290 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 291 for prefix in ("U&", "u&") 292 ] 293 294 NESTED_COMMENTS = False 295 296 KEYWORDS = { 297 **tokens.Tokenizer.KEYWORDS, 298 "DEALLOCATE PREPARE": TokenType.COMMAND, 299 "DESCRIBE INPUT": TokenType.COMMAND, 300 "DESCRIBE OUTPUT": TokenType.COMMAND, 301 "RESET SESSION": TokenType.COMMAND, 302 "START": TokenType.BEGIN, 303 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 304 "ROW": TokenType.STRUCT, 305 "IPADDRESS": TokenType.IPADDRESS, 306 "IPPREFIX": TokenType.IPPREFIX, 307 "TDIGEST": TokenType.TDIGEST, 308 "HYPERLOGLOG": TokenType.HLLSKETCH, 309 } 310 KEYWORDS.pop("/*+") 311 KEYWORDS.pop("QUALIFY") 312 313 class Parser(parser.Parser): 314 VALUES_FOLLOWED_BY_PAREN = False 315 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = True 316 317 FUNCTIONS = { 318 **parser.Parser.FUNCTIONS, 319 "ARBITRARY": exp.AnyValue.from_arg_list, 320 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 321 "APPROX_PERCENTILE": _build_approx_percentile, 322 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 323 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 324 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 325 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 326 "CARDINALITY": exp.ArraySize.from_arg_list, 327 "CONTAINS": exp.ArrayContains.from_arg_list, 328 "DATE_ADD": lambda args: exp.DateAdd( 329 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 330 ), 331 "DATE_DIFF": lambda args: exp.DateDiff( 332 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 333 ), 334 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 335 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 336 "DATE_TRUNC": date_trunc_to_time, 337 "DAY_OF_WEEK": exp.DayOfWeekIso.from_arg_list, 338 "DOW": exp.DayOfWeekIso.from_arg_list, 339 "DOY": exp.DayOfYear.from_arg_list, 340 "ELEMENT_AT": lambda args: exp.Bracket( 341 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 342 ), 343 "FROM_HEX": exp.Unhex.from_arg_list, 344 "FROM_UNIXTIME": _build_from_unixtime, 345 "FROM_UTF8": lambda args: exp.Decode( 346 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 347 ), 348 "JSON_FORMAT": lambda args: exp.JSONFormat( 349 this=seq_get(args, 0), options=seq_get(args, 1), is_json=True 350 ), 351 "LEVENSHTEIN_DISTANCE": exp.Levenshtein.from_arg_list, 352 "NOW": exp.CurrentTimestamp.from_arg_list, 353 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 354 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 355 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 356 this=seq_get(args, 0), 357 expression=seq_get(args, 1), 358 replacement=seq_get(args, 2) or exp.Literal.string(""), 359 ), 360 "REPLACE": build_replace_with_optional_replacement, 361 "ROW": exp.Struct.from_arg_list, 362 "SEQUENCE": exp.GenerateSeries.from_arg_list, 363 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 364 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 365 "STRPOS": lambda args: exp.StrPosition( 366 this=seq_get(args, 0), substr=seq_get(args, 1), occurrence=seq_get(args, 2) 367 ), 368 "SLICE": exp.ArraySlice.from_arg_list, 369 "TO_CHAR": _build_to_char, 370 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 371 "TO_UTF8": lambda args: exp.Encode( 372 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 373 ), 374 "MD5": exp.MD5Digest.from_arg_list, 375 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 376 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 377 "WEEK": exp.WeekOfYear.from_arg_list, 378 } 379 380 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 381 FUNCTION_PARSERS.pop("TRIM") 382 383 class Generator(generator.Generator): 384 INTERVAL_ALLOWS_PLURAL_FORM = False 385 JOIN_HINTS = False 386 TABLE_HINTS = False 387 QUERY_HINTS = False 388 IS_BOOL_ALLOWED = False 389 TZ_TO_WITH_TIME_ZONE = True 390 NVL2_SUPPORTED = False 391 STRUCT_DELIMITER = ("(", ")") 392 LIMIT_ONLY_LITERALS = True 393 SUPPORTS_SINGLE_ARG_CONCAT = False 394 LIKE_PROPERTY_INSIDE_SCHEMA = True 395 MULTI_ARG_DISTINCT = False 396 SUPPORTS_TO_NUMBER = False 397 HEX_FUNC = "TO_HEX" 398 PARSE_JSON_NAME = "JSON_PARSE" 399 PAD_FILL_PATTERN_IS_REQUIRED = True 400 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 401 SUPPORTS_MEDIAN = False 402 ARRAY_SIZE_NAME = "CARDINALITY" 403 404 PROPERTIES_LOCATION = { 405 **generator.Generator.PROPERTIES_LOCATION, 406 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 407 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 408 } 409 410 TYPE_MAPPING = { 411 **generator.Generator.TYPE_MAPPING, 412 exp.DataType.Type.BINARY: "VARBINARY", 413 exp.DataType.Type.BIT: "BOOLEAN", 414 exp.DataType.Type.DATETIME: "TIMESTAMP", 415 exp.DataType.Type.DATETIME64: "TIMESTAMP", 416 exp.DataType.Type.FLOAT: "REAL", 417 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 418 exp.DataType.Type.INT: "INTEGER", 419 exp.DataType.Type.STRUCT: "ROW", 420 exp.DataType.Type.TEXT: "VARCHAR", 421 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 422 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 423 exp.DataType.Type.TIMETZ: "TIME", 424 } 425 426 TRANSFORMS = { 427 **generator.Generator.TRANSFORMS, 428 exp.AnyValue: rename_func("ARBITRARY"), 429 exp.ApproxQuantile: lambda self, e: self.func( 430 "APPROX_PERCENTILE", 431 e.this, 432 e.args.get("weight"), 433 e.args.get("quantile"), 434 e.args.get("accuracy"), 435 ), 436 exp.ArgMax: rename_func("MAX_BY"), 437 exp.ArgMin: rename_func("MIN_BY"), 438 exp.Array: transforms.preprocess( 439 [transforms.inherit_struct_field_names], 440 generator=lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 441 ), 442 exp.ArrayAny: rename_func("ANY_MATCH"), 443 exp.ArrayConcat: rename_func("CONCAT"), 444 exp.ArrayContains: rename_func("CONTAINS"), 445 exp.ArrayToString: rename_func("ARRAY_JOIN"), 446 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 447 exp.ArraySlice: rename_func("SLICE"), 448 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 449 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 450 exp.BitwiseLeftShift: lambda self, e: self.func( 451 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 452 ), 453 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 454 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 455 exp.BitwiseRightShift: lambda self, e: self.func( 456 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 457 ), 458 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 459 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 460 exp.CurrentTime: lambda *_: "CURRENT_TIME", 461 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 462 exp.CurrentUser: lambda *_: "CURRENT_USER", 463 exp.DateAdd: _date_delta_sql("DATE_ADD"), 464 exp.DateDiff: lambda self, e: self.func( 465 "DATE_DIFF", unit_to_str(e), e.expression, e.this 466 ), 467 exp.DateStrToDate: datestrtodate_sql, 468 exp.DateToDi: lambda self, 469 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 470 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 471 exp.DayOfWeek: lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)", 472 exp.DayOfWeekIso: rename_func("DAY_OF_WEEK"), 473 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 474 exp.DiToDate: lambda self, 475 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 476 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 477 exp.FileFormatProperty: lambda self, 478 e: f"format={self.sql(exp.Literal.string(e.name))}", 479 exp.First: _first_last_sql, 480 exp.FromTimeZone: lambda self, 481 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 482 exp.GenerateSeries: sequence_sql, 483 exp.GenerateDateArray: sequence_sql, 484 exp.If: if_sql(), 485 exp.ILike: no_ilike_sql, 486 exp.Initcap: _initcap_sql, 487 exp.Last: _first_last_sql, 488 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 489 exp.Lateral: _explode_to_unnest_sql, 490 exp.Left: left_to_substring_sql, 491 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 492 rename_func("LEVENSHTEIN_DISTANCE") 493 ), 494 exp.LogicalAnd: rename_func("BOOL_AND"), 495 exp.LogicalOr: rename_func("BOOL_OR"), 496 exp.Pivot: no_pivot_sql, 497 exp.Quantile: _quantile_sql, 498 exp.RegexpExtract: regexp_extract_sql, 499 exp.RegexpExtractAll: regexp_extract_sql, 500 exp.Right: right_to_substring_sql, 501 exp.Schema: _schema_sql, 502 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 503 exp.Select: transforms.preprocess( 504 [ 505 transforms.eliminate_window_clause, 506 transforms.eliminate_qualify, 507 transforms.eliminate_distinct_on, 508 transforms.explode_projection_to_unnest(1), 509 transforms.eliminate_semi_and_anti_joins, 510 amend_exploded_column_table, 511 ] 512 ), 513 exp.SortArray: _no_sort_array, 514 exp.StrPosition: lambda self, e: strposition_sql(self, e, supports_occurrence=True), 515 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 516 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 517 exp.StrToTime: _str_to_time_sql, 518 exp.StructExtract: struct_extract_sql, 519 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 520 exp.Timestamp: no_timestamp_sql, 521 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 522 exp.TimestampTrunc: timestamptrunc_sql(), 523 exp.TimeStrToDate: timestrtotime_sql, 524 exp.TimeStrToTime: timestrtotime_sql, 525 exp.TimeStrToUnix: lambda self, e: self.func( 526 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 527 ), 528 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 529 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 530 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 531 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 532 exp.TsOrDiToDi: lambda self, 533 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 534 exp.TsOrDsAdd: _ts_or_ds_add_sql, 535 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 536 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 537 exp.Unhex: rename_func("FROM_HEX"), 538 exp.UnixToStr: lambda self, 539 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 540 exp.UnixToTime: _unix_to_time_sql, 541 exp.UnixToTimeStr: lambda self, 542 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 543 exp.VariancePop: rename_func("VAR_POP"), 544 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 545 exp.WithinGroup: transforms.preprocess( 546 [transforms.remove_within_group_for_percentiles] 547 ), 548 exp.Xor: bool_xor_sql, 549 exp.MD5Digest: rename_func("MD5"), 550 exp.SHA: rename_func("SHA1"), 551 exp.SHA1Digest: rename_func("SHA1"), 552 exp.SHA2: sha256_sql, 553 exp.SHA2Digest: sha2_digest_sql, 554 } 555 556 RESERVED_KEYWORDS = { 557 "alter", 558 "and", 559 "as", 560 "between", 561 "by", 562 "case", 563 "cast", 564 "constraint", 565 "create", 566 "cross", 567 "current_time", 568 "current_timestamp", 569 "deallocate", 570 "delete", 571 "describe", 572 "distinct", 573 "drop", 574 "else", 575 "end", 576 "escape", 577 "except", 578 "execute", 579 "exists", 580 "extract", 581 "false", 582 "for", 583 "from", 584 "full", 585 "group", 586 "having", 587 "in", 588 "inner", 589 "insert", 590 "intersect", 591 "into", 592 "is", 593 "join", 594 "left", 595 "like", 596 "natural", 597 "not", 598 "null", 599 "on", 600 "or", 601 "order", 602 "outer", 603 "prepare", 604 "right", 605 "select", 606 "table", 607 "then", 608 "true", 609 "union", 610 "using", 611 "values", 612 "when", 613 "where", 614 "with", 615 } 616 617 def extract_sql(self, expression: exp.Extract) -> str: 618 date_part = expression.name 619 620 if not date_part.startswith("EPOCH"): 621 return super().extract_sql(expression) 622 623 if date_part == "EPOCH_MILLISECOND": 624 scale = 10**3 625 elif date_part == "EPOCH_MICROSECOND": 626 scale = 10**6 627 elif date_part == "EPOCH_NANOSECOND": 628 scale = 10**9 629 else: 630 scale = None 631 632 value = expression.expression 633 634 ts = exp.cast(value, to=exp.DataType.build("TIMESTAMP")) 635 to_unix: exp.Expression = exp.TimeToUnix(this=ts) 636 637 if scale: 638 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 639 640 return self.sql(to_unix) 641 642 def jsonformat_sql(self, expression: exp.JSONFormat) -> str: 643 this = expression.this 644 is_json = expression.args.get("is_json") 645 646 if this and not (is_json or this.type): 647 from sqlglot.optimizer.annotate_types import annotate_types 648 649 this = annotate_types(this, dialect=self.dialect) 650 651 if not (is_json or this.is_type(exp.DataType.Type.JSON)): 652 this.replace(exp.cast(this, exp.DataType.Type.JSON)) 653 654 return self.function_fallback_sql(expression) 655 656 def md5_sql(self, expression: exp.MD5) -> str: 657 this = expression.this 658 659 if not this.type: 660 from sqlglot.optimizer.annotate_types import annotate_types 661 662 this = annotate_types(this, dialect=self.dialect) 663 664 if this.is_type(*exp.DataType.TEXT_TYPES): 665 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 666 667 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 668 669 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 670 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 671 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 672 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 673 # which seems to be using the same time mapping as Hive, as per: 674 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 675 this = expression.this 676 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 677 value_as_timestamp = ( 678 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 679 ) 680 681 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 682 683 formatted_value = self.func( 684 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 685 ) 686 parse_with_tz = self.func( 687 "PARSE_DATETIME", 688 formatted_value, 689 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 690 ) 691 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 692 return self.func("TO_UNIXTIME", coalesced) 693 694 def bracket_sql(self, expression: exp.Bracket) -> str: 695 if expression.args.get("safe"): 696 return self.func( 697 "ELEMENT_AT", 698 expression.this, 699 seq_get( 700 apply_index_offset( 701 expression.this, 702 expression.expressions, 703 1 - expression.args.get("offset", 0), 704 dialect=self.dialect, 705 ), 706 0, 707 ), 708 ) 709 return super().bracket_sql(expression) 710 711 def struct_sql(self, expression: exp.Struct) -> str: 712 if not expression.type: 713 from sqlglot.optimizer.annotate_types import annotate_types 714 715 annotate_types(expression, dialect=self.dialect) 716 717 values: t.List[str] = [] 718 schema: t.List[str] = [] 719 unknown_type = False 720 721 for e in expression.expressions: 722 if isinstance(e, exp.PropertyEQ): 723 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 724 unknown_type = True 725 else: 726 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 727 values.append(self.sql(e, "expression")) 728 else: 729 values.append(self.sql(e)) 730 731 size = len(expression.expressions) 732 733 if not size or len(schema) != size: 734 if unknown_type: 735 self.unsupported( 736 "Cannot convert untyped key-value definitions (try annotate_types)." 737 ) 738 return self.func("ROW", *values) 739 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 740 741 def interval_sql(self, expression: exp.Interval) -> str: 742 if expression.this and expression.text("unit").upper().startswith("WEEK"): 743 return f"({expression.this.name} * INTERVAL '7' DAY)" 744 return super().interval_sql(expression) 745 746 def transaction_sql(self, expression: exp.Transaction) -> str: 747 modes = expression.args.get("modes") 748 modes = f" {', '.join(modes)}" if modes else "" 749 return f"START TRANSACTION{modes}" 750 751 def offset_limit_modifiers( 752 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 753 ) -> t.List[str]: 754 return [ 755 self.sql(expression, "offset"), 756 self.sql(limit), 757 ] 758 759 def create_sql(self, expression: exp.Create) -> str: 760 """ 761 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 762 so we need to remove them 763 """ 764 kind = expression.args["kind"] 765 schema = expression.this 766 if kind == "VIEW" and schema.expressions: 767 expression.this.set("expressions", None) 768 return super().create_sql(expression) 769 770 def delete_sql(self, expression: exp.Delete) -> str: 771 """ 772 Presto only supports DELETE FROM for a single table without an alias, so we need 773 to remove the unnecessary parts. If the original DELETE statement contains more 774 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 775 """ 776 tables = expression.args.get("tables") or [expression.this] 777 if len(tables) > 1: 778 return super().delete_sql(expression) 779 780 table = tables[0] 781 expression.set("this", table) 782 expression.set("tables", None) 783 784 if isinstance(table, exp.Table): 785 table_alias = table.args.get("alias") 786 if table_alias: 787 table_alias.pop() 788 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 789 790 return super().delete_sql(expression) 791 792 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 793 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 794 795 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 796 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 797 if not expression.args.get("variant_extract") or is_json_extract: 798 return self.func( 799 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 800 ) 801 802 this = self.sql(expression, "this") 803 804 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 805 segments = [] 806 for path_key in expression.expression.expressions[1:]: 807 if not isinstance(path_key, exp.JSONPathKey): 808 # Cannot transpile subscripts, wildcards etc to dot notation 809 self.unsupported( 810 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 811 ) 812 continue 813 key = path_key.this 814 if not exp.SAFE_IDENTIFIER_RE.match(key): 815 key = f'"{key}"' 816 segments.append(f".{key}") 817 818 expr = "".join(segments) 819 820 return f"{this}{expr}" 821 822 def groupconcat_sql(self, expression: exp.GroupConcat) -> str: 823 return self.func( 824 "ARRAY_JOIN", 825 self.func("ARRAY_AGG", expression.this), 826 expression.args.get("separator"), 827 )
233def amend_exploded_column_table(expression: exp.Expression) -> exp.Expression: 234 # We check for expression.type because the columns can be amended only if types were inferred 235 if isinstance(expression, exp.Select) and expression.type: 236 for lateral in expression.args.get("laterals") or []: 237 alias = lateral.args.get("alias") 238 if ( 239 not isinstance(lateral.this, exp.Explode) 240 or not isinstance(alias, exp.TableAlias) 241 or len(alias.columns) != 1 242 ): 243 continue 244 245 new_table = alias.this 246 old_table = alias.columns[0].name.lower() 247 248 # When transpiling a LATERAL VIEW EXPLODE Spark query, the exploded fields may be qualified 249 # with the struct column, resulting in invalid Presto references that need to be amended 250 for column in find_all_in_scope(expression, exp.Column): 251 if column.db.lower() == old_table: 252 column.set("table", column.args["db"].pop()) 253 elif column.table.lower() == old_table: 254 column.set("table", new_table.copy()) 255 elif column.name.lower() == old_table and isinstance(column.parent, exp.Dot): 256 column.parent.replace(exp.column(column.parent.expression, table=new_table)) 257 258 return expression
261class Presto(Dialect): 262 INDEX_OFFSET = 1 263 NULL_ORDERING = "nulls_are_last" 264 TIME_FORMAT = MySQL.TIME_FORMAT 265 STRICT_STRING_CONCAT = True 266 SUPPORTS_SEMI_ANTI_JOIN = False 267 TYPED_DIVISION = True 268 TABLESAMPLE_SIZE_IS_PERCENT = True 269 LOG_BASE_FIRST: t.Optional[bool] = None 270 SUPPORTS_VALUES_DEFAULT = False 271 LEAST_GREATEST_IGNORES_NULLS = False 272 273 TIME_MAPPING = MySQL.TIME_MAPPING 274 275 # https://github.com/trinodb/trino/issues/17 276 # https://github.com/trinodb/trino/issues/12289 277 # https://github.com/prestodb/presto/issues/2863 278 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 279 280 EXPRESSION_METADATA = EXPRESSION_METADATA.copy() 281 282 SUPPORTED_SETTINGS = { 283 *Dialect.SUPPORTED_SETTINGS, 284 "variant_extract_is_json_extract", 285 } 286 287 class Tokenizer(tokens.Tokenizer): 288 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 289 UNICODE_STRINGS = [ 290 (prefix + q, q) 291 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 292 for prefix in ("U&", "u&") 293 ] 294 295 NESTED_COMMENTS = False 296 297 KEYWORDS = { 298 **tokens.Tokenizer.KEYWORDS, 299 "DEALLOCATE PREPARE": TokenType.COMMAND, 300 "DESCRIBE INPUT": TokenType.COMMAND, 301 "DESCRIBE OUTPUT": TokenType.COMMAND, 302 "RESET SESSION": TokenType.COMMAND, 303 "START": TokenType.BEGIN, 304 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 305 "ROW": TokenType.STRUCT, 306 "IPADDRESS": TokenType.IPADDRESS, 307 "IPPREFIX": TokenType.IPPREFIX, 308 "TDIGEST": TokenType.TDIGEST, 309 "HYPERLOGLOG": TokenType.HLLSKETCH, 310 } 311 KEYWORDS.pop("/*+") 312 KEYWORDS.pop("QUALIFY") 313 314 class Parser(parser.Parser): 315 VALUES_FOLLOWED_BY_PAREN = False 316 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = True 317 318 FUNCTIONS = { 319 **parser.Parser.FUNCTIONS, 320 "ARBITRARY": exp.AnyValue.from_arg_list, 321 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 322 "APPROX_PERCENTILE": _build_approx_percentile, 323 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 324 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 325 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 326 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 327 "CARDINALITY": exp.ArraySize.from_arg_list, 328 "CONTAINS": exp.ArrayContains.from_arg_list, 329 "DATE_ADD": lambda args: exp.DateAdd( 330 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 331 ), 332 "DATE_DIFF": lambda args: exp.DateDiff( 333 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 334 ), 335 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 336 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 337 "DATE_TRUNC": date_trunc_to_time, 338 "DAY_OF_WEEK": exp.DayOfWeekIso.from_arg_list, 339 "DOW": exp.DayOfWeekIso.from_arg_list, 340 "DOY": exp.DayOfYear.from_arg_list, 341 "ELEMENT_AT": lambda args: exp.Bracket( 342 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 343 ), 344 "FROM_HEX": exp.Unhex.from_arg_list, 345 "FROM_UNIXTIME": _build_from_unixtime, 346 "FROM_UTF8": lambda args: exp.Decode( 347 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 348 ), 349 "JSON_FORMAT": lambda args: exp.JSONFormat( 350 this=seq_get(args, 0), options=seq_get(args, 1), is_json=True 351 ), 352 "LEVENSHTEIN_DISTANCE": exp.Levenshtein.from_arg_list, 353 "NOW": exp.CurrentTimestamp.from_arg_list, 354 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 355 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 356 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 357 this=seq_get(args, 0), 358 expression=seq_get(args, 1), 359 replacement=seq_get(args, 2) or exp.Literal.string(""), 360 ), 361 "REPLACE": build_replace_with_optional_replacement, 362 "ROW": exp.Struct.from_arg_list, 363 "SEQUENCE": exp.GenerateSeries.from_arg_list, 364 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 365 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 366 "STRPOS": lambda args: exp.StrPosition( 367 this=seq_get(args, 0), substr=seq_get(args, 1), occurrence=seq_get(args, 2) 368 ), 369 "SLICE": exp.ArraySlice.from_arg_list, 370 "TO_CHAR": _build_to_char, 371 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 372 "TO_UTF8": lambda args: exp.Encode( 373 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 374 ), 375 "MD5": exp.MD5Digest.from_arg_list, 376 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 377 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 378 "WEEK": exp.WeekOfYear.from_arg_list, 379 } 380 381 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 382 FUNCTION_PARSERS.pop("TRIM") 383 384 class Generator(generator.Generator): 385 INTERVAL_ALLOWS_PLURAL_FORM = False 386 JOIN_HINTS = False 387 TABLE_HINTS = False 388 QUERY_HINTS = False 389 IS_BOOL_ALLOWED = False 390 TZ_TO_WITH_TIME_ZONE = True 391 NVL2_SUPPORTED = False 392 STRUCT_DELIMITER = ("(", ")") 393 LIMIT_ONLY_LITERALS = True 394 SUPPORTS_SINGLE_ARG_CONCAT = False 395 LIKE_PROPERTY_INSIDE_SCHEMA = True 396 MULTI_ARG_DISTINCT = False 397 SUPPORTS_TO_NUMBER = False 398 HEX_FUNC = "TO_HEX" 399 PARSE_JSON_NAME = "JSON_PARSE" 400 PAD_FILL_PATTERN_IS_REQUIRED = True 401 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 402 SUPPORTS_MEDIAN = False 403 ARRAY_SIZE_NAME = "CARDINALITY" 404 405 PROPERTIES_LOCATION = { 406 **generator.Generator.PROPERTIES_LOCATION, 407 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 408 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 409 } 410 411 TYPE_MAPPING = { 412 **generator.Generator.TYPE_MAPPING, 413 exp.DataType.Type.BINARY: "VARBINARY", 414 exp.DataType.Type.BIT: "BOOLEAN", 415 exp.DataType.Type.DATETIME: "TIMESTAMP", 416 exp.DataType.Type.DATETIME64: "TIMESTAMP", 417 exp.DataType.Type.FLOAT: "REAL", 418 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 419 exp.DataType.Type.INT: "INTEGER", 420 exp.DataType.Type.STRUCT: "ROW", 421 exp.DataType.Type.TEXT: "VARCHAR", 422 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 423 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 424 exp.DataType.Type.TIMETZ: "TIME", 425 } 426 427 TRANSFORMS = { 428 **generator.Generator.TRANSFORMS, 429 exp.AnyValue: rename_func("ARBITRARY"), 430 exp.ApproxQuantile: lambda self, e: self.func( 431 "APPROX_PERCENTILE", 432 e.this, 433 e.args.get("weight"), 434 e.args.get("quantile"), 435 e.args.get("accuracy"), 436 ), 437 exp.ArgMax: rename_func("MAX_BY"), 438 exp.ArgMin: rename_func("MIN_BY"), 439 exp.Array: transforms.preprocess( 440 [transforms.inherit_struct_field_names], 441 generator=lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 442 ), 443 exp.ArrayAny: rename_func("ANY_MATCH"), 444 exp.ArrayConcat: rename_func("CONCAT"), 445 exp.ArrayContains: rename_func("CONTAINS"), 446 exp.ArrayToString: rename_func("ARRAY_JOIN"), 447 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 448 exp.ArraySlice: rename_func("SLICE"), 449 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 450 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 451 exp.BitwiseLeftShift: lambda self, e: self.func( 452 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 453 ), 454 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 455 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 456 exp.BitwiseRightShift: lambda self, e: self.func( 457 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 458 ), 459 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 460 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 461 exp.CurrentTime: lambda *_: "CURRENT_TIME", 462 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 463 exp.CurrentUser: lambda *_: "CURRENT_USER", 464 exp.DateAdd: _date_delta_sql("DATE_ADD"), 465 exp.DateDiff: lambda self, e: self.func( 466 "DATE_DIFF", unit_to_str(e), e.expression, e.this 467 ), 468 exp.DateStrToDate: datestrtodate_sql, 469 exp.DateToDi: lambda self, 470 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 471 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 472 exp.DayOfWeek: lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)", 473 exp.DayOfWeekIso: rename_func("DAY_OF_WEEK"), 474 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 475 exp.DiToDate: lambda self, 476 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 477 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 478 exp.FileFormatProperty: lambda self, 479 e: f"format={self.sql(exp.Literal.string(e.name))}", 480 exp.First: _first_last_sql, 481 exp.FromTimeZone: lambda self, 482 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 483 exp.GenerateSeries: sequence_sql, 484 exp.GenerateDateArray: sequence_sql, 485 exp.If: if_sql(), 486 exp.ILike: no_ilike_sql, 487 exp.Initcap: _initcap_sql, 488 exp.Last: _first_last_sql, 489 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 490 exp.Lateral: _explode_to_unnest_sql, 491 exp.Left: left_to_substring_sql, 492 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 493 rename_func("LEVENSHTEIN_DISTANCE") 494 ), 495 exp.LogicalAnd: rename_func("BOOL_AND"), 496 exp.LogicalOr: rename_func("BOOL_OR"), 497 exp.Pivot: no_pivot_sql, 498 exp.Quantile: _quantile_sql, 499 exp.RegexpExtract: regexp_extract_sql, 500 exp.RegexpExtractAll: regexp_extract_sql, 501 exp.Right: right_to_substring_sql, 502 exp.Schema: _schema_sql, 503 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 504 exp.Select: transforms.preprocess( 505 [ 506 transforms.eliminate_window_clause, 507 transforms.eliminate_qualify, 508 transforms.eliminate_distinct_on, 509 transforms.explode_projection_to_unnest(1), 510 transforms.eliminate_semi_and_anti_joins, 511 amend_exploded_column_table, 512 ] 513 ), 514 exp.SortArray: _no_sort_array, 515 exp.StrPosition: lambda self, e: strposition_sql(self, e, supports_occurrence=True), 516 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 517 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 518 exp.StrToTime: _str_to_time_sql, 519 exp.StructExtract: struct_extract_sql, 520 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 521 exp.Timestamp: no_timestamp_sql, 522 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 523 exp.TimestampTrunc: timestamptrunc_sql(), 524 exp.TimeStrToDate: timestrtotime_sql, 525 exp.TimeStrToTime: timestrtotime_sql, 526 exp.TimeStrToUnix: lambda self, e: self.func( 527 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 528 ), 529 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 530 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 531 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 532 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 533 exp.TsOrDiToDi: lambda self, 534 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 535 exp.TsOrDsAdd: _ts_or_ds_add_sql, 536 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 537 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 538 exp.Unhex: rename_func("FROM_HEX"), 539 exp.UnixToStr: lambda self, 540 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 541 exp.UnixToTime: _unix_to_time_sql, 542 exp.UnixToTimeStr: lambda self, 543 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 544 exp.VariancePop: rename_func("VAR_POP"), 545 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 546 exp.WithinGroup: transforms.preprocess( 547 [transforms.remove_within_group_for_percentiles] 548 ), 549 exp.Xor: bool_xor_sql, 550 exp.MD5Digest: rename_func("MD5"), 551 exp.SHA: rename_func("SHA1"), 552 exp.SHA1Digest: rename_func("SHA1"), 553 exp.SHA2: sha256_sql, 554 exp.SHA2Digest: sha2_digest_sql, 555 } 556 557 RESERVED_KEYWORDS = { 558 "alter", 559 "and", 560 "as", 561 "between", 562 "by", 563 "case", 564 "cast", 565 "constraint", 566 "create", 567 "cross", 568 "current_time", 569 "current_timestamp", 570 "deallocate", 571 "delete", 572 "describe", 573 "distinct", 574 "drop", 575 "else", 576 "end", 577 "escape", 578 "except", 579 "execute", 580 "exists", 581 "extract", 582 "false", 583 "for", 584 "from", 585 "full", 586 "group", 587 "having", 588 "in", 589 "inner", 590 "insert", 591 "intersect", 592 "into", 593 "is", 594 "join", 595 "left", 596 "like", 597 "natural", 598 "not", 599 "null", 600 "on", 601 "or", 602 "order", 603 "outer", 604 "prepare", 605 "right", 606 "select", 607 "table", 608 "then", 609 "true", 610 "union", 611 "using", 612 "values", 613 "when", 614 "where", 615 "with", 616 } 617 618 def extract_sql(self, expression: exp.Extract) -> str: 619 date_part = expression.name 620 621 if not date_part.startswith("EPOCH"): 622 return super().extract_sql(expression) 623 624 if date_part == "EPOCH_MILLISECOND": 625 scale = 10**3 626 elif date_part == "EPOCH_MICROSECOND": 627 scale = 10**6 628 elif date_part == "EPOCH_NANOSECOND": 629 scale = 10**9 630 else: 631 scale = None 632 633 value = expression.expression 634 635 ts = exp.cast(value, to=exp.DataType.build("TIMESTAMP")) 636 to_unix: exp.Expression = exp.TimeToUnix(this=ts) 637 638 if scale: 639 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 640 641 return self.sql(to_unix) 642 643 def jsonformat_sql(self, expression: exp.JSONFormat) -> str: 644 this = expression.this 645 is_json = expression.args.get("is_json") 646 647 if this and not (is_json or this.type): 648 from sqlglot.optimizer.annotate_types import annotate_types 649 650 this = annotate_types(this, dialect=self.dialect) 651 652 if not (is_json or this.is_type(exp.DataType.Type.JSON)): 653 this.replace(exp.cast(this, exp.DataType.Type.JSON)) 654 655 return self.function_fallback_sql(expression) 656 657 def md5_sql(self, expression: exp.MD5) -> str: 658 this = expression.this 659 660 if not this.type: 661 from sqlglot.optimizer.annotate_types import annotate_types 662 663 this = annotate_types(this, dialect=self.dialect) 664 665 if this.is_type(*exp.DataType.TEXT_TYPES): 666 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 667 668 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 669 670 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 671 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 672 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 673 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 674 # which seems to be using the same time mapping as Hive, as per: 675 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 676 this = expression.this 677 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 678 value_as_timestamp = ( 679 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 680 ) 681 682 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 683 684 formatted_value = self.func( 685 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 686 ) 687 parse_with_tz = self.func( 688 "PARSE_DATETIME", 689 formatted_value, 690 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 691 ) 692 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 693 return self.func("TO_UNIXTIME", coalesced) 694 695 def bracket_sql(self, expression: exp.Bracket) -> str: 696 if expression.args.get("safe"): 697 return self.func( 698 "ELEMENT_AT", 699 expression.this, 700 seq_get( 701 apply_index_offset( 702 expression.this, 703 expression.expressions, 704 1 - expression.args.get("offset", 0), 705 dialect=self.dialect, 706 ), 707 0, 708 ), 709 ) 710 return super().bracket_sql(expression) 711 712 def struct_sql(self, expression: exp.Struct) -> str: 713 if not expression.type: 714 from sqlglot.optimizer.annotate_types import annotate_types 715 716 annotate_types(expression, dialect=self.dialect) 717 718 values: t.List[str] = [] 719 schema: t.List[str] = [] 720 unknown_type = False 721 722 for e in expression.expressions: 723 if isinstance(e, exp.PropertyEQ): 724 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 725 unknown_type = True 726 else: 727 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 728 values.append(self.sql(e, "expression")) 729 else: 730 values.append(self.sql(e)) 731 732 size = len(expression.expressions) 733 734 if not size or len(schema) != size: 735 if unknown_type: 736 self.unsupported( 737 "Cannot convert untyped key-value definitions (try annotate_types)." 738 ) 739 return self.func("ROW", *values) 740 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 741 742 def interval_sql(self, expression: exp.Interval) -> str: 743 if expression.this and expression.text("unit").upper().startswith("WEEK"): 744 return f"({expression.this.name} * INTERVAL '7' DAY)" 745 return super().interval_sql(expression) 746 747 def transaction_sql(self, expression: exp.Transaction) -> str: 748 modes = expression.args.get("modes") 749 modes = f" {', '.join(modes)}" if modes else "" 750 return f"START TRANSACTION{modes}" 751 752 def offset_limit_modifiers( 753 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 754 ) -> t.List[str]: 755 return [ 756 self.sql(expression, "offset"), 757 self.sql(limit), 758 ] 759 760 def create_sql(self, expression: exp.Create) -> str: 761 """ 762 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 763 so we need to remove them 764 """ 765 kind = expression.args["kind"] 766 schema = expression.this 767 if kind == "VIEW" and schema.expressions: 768 expression.this.set("expressions", None) 769 return super().create_sql(expression) 770 771 def delete_sql(self, expression: exp.Delete) -> str: 772 """ 773 Presto only supports DELETE FROM for a single table without an alias, so we need 774 to remove the unnecessary parts. If the original DELETE statement contains more 775 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 776 """ 777 tables = expression.args.get("tables") or [expression.this] 778 if len(tables) > 1: 779 return super().delete_sql(expression) 780 781 table = tables[0] 782 expression.set("this", table) 783 expression.set("tables", None) 784 785 if isinstance(table, exp.Table): 786 table_alias = table.args.get("alias") 787 if table_alias: 788 table_alias.pop() 789 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 790 791 return super().delete_sql(expression) 792 793 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 794 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 795 796 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 797 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 798 if not expression.args.get("variant_extract") or is_json_extract: 799 return self.func( 800 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 801 ) 802 803 this = self.sql(expression, "this") 804 805 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 806 segments = [] 807 for path_key in expression.expression.expressions[1:]: 808 if not isinstance(path_key, exp.JSONPathKey): 809 # Cannot transpile subscripts, wildcards etc to dot notation 810 self.unsupported( 811 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 812 ) 813 continue 814 key = path_key.this 815 if not exp.SAFE_IDENTIFIER_RE.match(key): 816 key = f'"{key}"' 817 segments.append(f".{key}") 818 819 expr = "".join(segments) 820 821 return f"{this}{expr}" 822 823 def groupconcat_sql(self, expression: exp.GroupConcat) -> str: 824 return self.func( 825 "ARRAY_JOIN", 826 self.func("ARRAY_AGG", expression.this), 827 expression.args.get("separator"), 828 )
Default NULL ordering method to use if not explicitly set.
Possible values: "nulls_are_small", "nulls_are_large", "nulls_are_last"
Whether the behavior of a / b depends on the types of a and b.
False means a / b is always float division.
True means a / b is integer division if both a and b are integers.
Whether the base comes first in the LOG function.
Possible values: True, False, None (two arguments are not supported by LOG)
Whether LEAST/GREATEST functions ignore NULL values, e.g:
- BigQuery, Snowflake, MySQL, Presto/Trino: LEAST(1, NULL, 2) -> NULL
- Spark, Postgres, DuckDB, TSQL: LEAST(1, NULL, 2) -> 1
Associates this dialect's time formats with their equivalent Python strftime formats.
Specifies the strategy according to which identifiers should be normalized.
287 class Tokenizer(tokens.Tokenizer): 288 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 289 UNICODE_STRINGS = [ 290 (prefix + q, q) 291 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 292 for prefix in ("U&", "u&") 293 ] 294 295 NESTED_COMMENTS = False 296 297 KEYWORDS = { 298 **tokens.Tokenizer.KEYWORDS, 299 "DEALLOCATE PREPARE": TokenType.COMMAND, 300 "DESCRIBE INPUT": TokenType.COMMAND, 301 "DESCRIBE OUTPUT": TokenType.COMMAND, 302 "RESET SESSION": TokenType.COMMAND, 303 "START": TokenType.BEGIN, 304 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 305 "ROW": TokenType.STRUCT, 306 "IPADDRESS": TokenType.IPADDRESS, 307 "IPPREFIX": TokenType.IPPREFIX, 308 "TDIGEST": TokenType.TDIGEST, 309 "HYPERLOGLOG": TokenType.HLLSKETCH, 310 } 311 KEYWORDS.pop("/*+") 312 KEYWORDS.pop("QUALIFY")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- BYTE_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- IDENTIFIERS
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- ESCAPE_FOLLOW_CHARS
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
314 class Parser(parser.Parser): 315 VALUES_FOLLOWED_BY_PAREN = False 316 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = True 317 318 FUNCTIONS = { 319 **parser.Parser.FUNCTIONS, 320 "ARBITRARY": exp.AnyValue.from_arg_list, 321 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 322 "APPROX_PERCENTILE": _build_approx_percentile, 323 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 324 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 325 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 326 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 327 "CARDINALITY": exp.ArraySize.from_arg_list, 328 "CONTAINS": exp.ArrayContains.from_arg_list, 329 "DATE_ADD": lambda args: exp.DateAdd( 330 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 331 ), 332 "DATE_DIFF": lambda args: exp.DateDiff( 333 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 334 ), 335 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 336 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 337 "DATE_TRUNC": date_trunc_to_time, 338 "DAY_OF_WEEK": exp.DayOfWeekIso.from_arg_list, 339 "DOW": exp.DayOfWeekIso.from_arg_list, 340 "DOY": exp.DayOfYear.from_arg_list, 341 "ELEMENT_AT": lambda args: exp.Bracket( 342 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 343 ), 344 "FROM_HEX": exp.Unhex.from_arg_list, 345 "FROM_UNIXTIME": _build_from_unixtime, 346 "FROM_UTF8": lambda args: exp.Decode( 347 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 348 ), 349 "JSON_FORMAT": lambda args: exp.JSONFormat( 350 this=seq_get(args, 0), options=seq_get(args, 1), is_json=True 351 ), 352 "LEVENSHTEIN_DISTANCE": exp.Levenshtein.from_arg_list, 353 "NOW": exp.CurrentTimestamp.from_arg_list, 354 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 355 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 356 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 357 this=seq_get(args, 0), 358 expression=seq_get(args, 1), 359 replacement=seq_get(args, 2) or exp.Literal.string(""), 360 ), 361 "REPLACE": build_replace_with_optional_replacement, 362 "ROW": exp.Struct.from_arg_list, 363 "SEQUENCE": exp.GenerateSeries.from_arg_list, 364 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 365 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 366 "STRPOS": lambda args: exp.StrPosition( 367 this=seq_get(args, 0), substr=seq_get(args, 1), occurrence=seq_get(args, 2) 368 ), 369 "SLICE": exp.ArraySlice.from_arg_list, 370 "TO_CHAR": _build_to_char, 371 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 372 "TO_UTF8": lambda args: exp.Encode( 373 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 374 ), 375 "MD5": exp.MD5Digest.from_arg_list, 376 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 377 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 378 "WEEK": exp.WeekOfYear.from_arg_list, 379 } 380 381 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 382 FUNCTION_PARSERS.pop("TRIM")
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- CAST_COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- QUERY_MODIFIER_TOKENS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- SET_ASSIGNMENT_DELIMITERS
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- ALTER_TABLE_PARTITIONS
- JOINS_HAVE_EQUAL_PRECEDENCE
- MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS
- JSON_EXTRACT_REQUIRES_JSON_EXPRESSION
- ADD_JOIN_ON_TRUE
- SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- build_cast
- errors
- sql
384 class Generator(generator.Generator): 385 INTERVAL_ALLOWS_PLURAL_FORM = False 386 JOIN_HINTS = False 387 TABLE_HINTS = False 388 QUERY_HINTS = False 389 IS_BOOL_ALLOWED = False 390 TZ_TO_WITH_TIME_ZONE = True 391 NVL2_SUPPORTED = False 392 STRUCT_DELIMITER = ("(", ")") 393 LIMIT_ONLY_LITERALS = True 394 SUPPORTS_SINGLE_ARG_CONCAT = False 395 LIKE_PROPERTY_INSIDE_SCHEMA = True 396 MULTI_ARG_DISTINCT = False 397 SUPPORTS_TO_NUMBER = False 398 HEX_FUNC = "TO_HEX" 399 PARSE_JSON_NAME = "JSON_PARSE" 400 PAD_FILL_PATTERN_IS_REQUIRED = True 401 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 402 SUPPORTS_MEDIAN = False 403 ARRAY_SIZE_NAME = "CARDINALITY" 404 405 PROPERTIES_LOCATION = { 406 **generator.Generator.PROPERTIES_LOCATION, 407 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 408 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 409 } 410 411 TYPE_MAPPING = { 412 **generator.Generator.TYPE_MAPPING, 413 exp.DataType.Type.BINARY: "VARBINARY", 414 exp.DataType.Type.BIT: "BOOLEAN", 415 exp.DataType.Type.DATETIME: "TIMESTAMP", 416 exp.DataType.Type.DATETIME64: "TIMESTAMP", 417 exp.DataType.Type.FLOAT: "REAL", 418 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 419 exp.DataType.Type.INT: "INTEGER", 420 exp.DataType.Type.STRUCT: "ROW", 421 exp.DataType.Type.TEXT: "VARCHAR", 422 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 423 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 424 exp.DataType.Type.TIMETZ: "TIME", 425 } 426 427 TRANSFORMS = { 428 **generator.Generator.TRANSFORMS, 429 exp.AnyValue: rename_func("ARBITRARY"), 430 exp.ApproxQuantile: lambda self, e: self.func( 431 "APPROX_PERCENTILE", 432 e.this, 433 e.args.get("weight"), 434 e.args.get("quantile"), 435 e.args.get("accuracy"), 436 ), 437 exp.ArgMax: rename_func("MAX_BY"), 438 exp.ArgMin: rename_func("MIN_BY"), 439 exp.Array: transforms.preprocess( 440 [transforms.inherit_struct_field_names], 441 generator=lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 442 ), 443 exp.ArrayAny: rename_func("ANY_MATCH"), 444 exp.ArrayConcat: rename_func("CONCAT"), 445 exp.ArrayContains: rename_func("CONTAINS"), 446 exp.ArrayToString: rename_func("ARRAY_JOIN"), 447 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 448 exp.ArraySlice: rename_func("SLICE"), 449 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 450 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 451 exp.BitwiseLeftShift: lambda self, e: self.func( 452 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 453 ), 454 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 455 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 456 exp.BitwiseRightShift: lambda self, e: self.func( 457 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 458 ), 459 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 460 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 461 exp.CurrentTime: lambda *_: "CURRENT_TIME", 462 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 463 exp.CurrentUser: lambda *_: "CURRENT_USER", 464 exp.DateAdd: _date_delta_sql("DATE_ADD"), 465 exp.DateDiff: lambda self, e: self.func( 466 "DATE_DIFF", unit_to_str(e), e.expression, e.this 467 ), 468 exp.DateStrToDate: datestrtodate_sql, 469 exp.DateToDi: lambda self, 470 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 471 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 472 exp.DayOfWeek: lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)", 473 exp.DayOfWeekIso: rename_func("DAY_OF_WEEK"), 474 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 475 exp.DiToDate: lambda self, 476 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 477 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 478 exp.FileFormatProperty: lambda self, 479 e: f"format={self.sql(exp.Literal.string(e.name))}", 480 exp.First: _first_last_sql, 481 exp.FromTimeZone: lambda self, 482 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 483 exp.GenerateSeries: sequence_sql, 484 exp.GenerateDateArray: sequence_sql, 485 exp.If: if_sql(), 486 exp.ILike: no_ilike_sql, 487 exp.Initcap: _initcap_sql, 488 exp.Last: _first_last_sql, 489 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 490 exp.Lateral: _explode_to_unnest_sql, 491 exp.Left: left_to_substring_sql, 492 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 493 rename_func("LEVENSHTEIN_DISTANCE") 494 ), 495 exp.LogicalAnd: rename_func("BOOL_AND"), 496 exp.LogicalOr: rename_func("BOOL_OR"), 497 exp.Pivot: no_pivot_sql, 498 exp.Quantile: _quantile_sql, 499 exp.RegexpExtract: regexp_extract_sql, 500 exp.RegexpExtractAll: regexp_extract_sql, 501 exp.Right: right_to_substring_sql, 502 exp.Schema: _schema_sql, 503 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 504 exp.Select: transforms.preprocess( 505 [ 506 transforms.eliminate_window_clause, 507 transforms.eliminate_qualify, 508 transforms.eliminate_distinct_on, 509 transforms.explode_projection_to_unnest(1), 510 transforms.eliminate_semi_and_anti_joins, 511 amend_exploded_column_table, 512 ] 513 ), 514 exp.SortArray: _no_sort_array, 515 exp.StrPosition: lambda self, e: strposition_sql(self, e, supports_occurrence=True), 516 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 517 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 518 exp.StrToTime: _str_to_time_sql, 519 exp.StructExtract: struct_extract_sql, 520 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 521 exp.Timestamp: no_timestamp_sql, 522 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 523 exp.TimestampTrunc: timestamptrunc_sql(), 524 exp.TimeStrToDate: timestrtotime_sql, 525 exp.TimeStrToTime: timestrtotime_sql, 526 exp.TimeStrToUnix: lambda self, e: self.func( 527 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 528 ), 529 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 530 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 531 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 532 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 533 exp.TsOrDiToDi: lambda self, 534 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 535 exp.TsOrDsAdd: _ts_or_ds_add_sql, 536 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 537 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 538 exp.Unhex: rename_func("FROM_HEX"), 539 exp.UnixToStr: lambda self, 540 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 541 exp.UnixToTime: _unix_to_time_sql, 542 exp.UnixToTimeStr: lambda self, 543 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 544 exp.VariancePop: rename_func("VAR_POP"), 545 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 546 exp.WithinGroup: transforms.preprocess( 547 [transforms.remove_within_group_for_percentiles] 548 ), 549 exp.Xor: bool_xor_sql, 550 exp.MD5Digest: rename_func("MD5"), 551 exp.SHA: rename_func("SHA1"), 552 exp.SHA1Digest: rename_func("SHA1"), 553 exp.SHA2: sha256_sql, 554 exp.SHA2Digest: sha2_digest_sql, 555 } 556 557 RESERVED_KEYWORDS = { 558 "alter", 559 "and", 560 "as", 561 "between", 562 "by", 563 "case", 564 "cast", 565 "constraint", 566 "create", 567 "cross", 568 "current_time", 569 "current_timestamp", 570 "deallocate", 571 "delete", 572 "describe", 573 "distinct", 574 "drop", 575 "else", 576 "end", 577 "escape", 578 "except", 579 "execute", 580 "exists", 581 "extract", 582 "false", 583 "for", 584 "from", 585 "full", 586 "group", 587 "having", 588 "in", 589 "inner", 590 "insert", 591 "intersect", 592 "into", 593 "is", 594 "join", 595 "left", 596 "like", 597 "natural", 598 "not", 599 "null", 600 "on", 601 "or", 602 "order", 603 "outer", 604 "prepare", 605 "right", 606 "select", 607 "table", 608 "then", 609 "true", 610 "union", 611 "using", 612 "values", 613 "when", 614 "where", 615 "with", 616 } 617 618 def extract_sql(self, expression: exp.Extract) -> str: 619 date_part = expression.name 620 621 if not date_part.startswith("EPOCH"): 622 return super().extract_sql(expression) 623 624 if date_part == "EPOCH_MILLISECOND": 625 scale = 10**3 626 elif date_part == "EPOCH_MICROSECOND": 627 scale = 10**6 628 elif date_part == "EPOCH_NANOSECOND": 629 scale = 10**9 630 else: 631 scale = None 632 633 value = expression.expression 634 635 ts = exp.cast(value, to=exp.DataType.build("TIMESTAMP")) 636 to_unix: exp.Expression = exp.TimeToUnix(this=ts) 637 638 if scale: 639 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 640 641 return self.sql(to_unix) 642 643 def jsonformat_sql(self, expression: exp.JSONFormat) -> str: 644 this = expression.this 645 is_json = expression.args.get("is_json") 646 647 if this and not (is_json or this.type): 648 from sqlglot.optimizer.annotate_types import annotate_types 649 650 this = annotate_types(this, dialect=self.dialect) 651 652 if not (is_json or this.is_type(exp.DataType.Type.JSON)): 653 this.replace(exp.cast(this, exp.DataType.Type.JSON)) 654 655 return self.function_fallback_sql(expression) 656 657 def md5_sql(self, expression: exp.MD5) -> str: 658 this = expression.this 659 660 if not this.type: 661 from sqlglot.optimizer.annotate_types import annotate_types 662 663 this = annotate_types(this, dialect=self.dialect) 664 665 if this.is_type(*exp.DataType.TEXT_TYPES): 666 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 667 668 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 669 670 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 671 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 672 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 673 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 674 # which seems to be using the same time mapping as Hive, as per: 675 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 676 this = expression.this 677 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 678 value_as_timestamp = ( 679 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 680 ) 681 682 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 683 684 formatted_value = self.func( 685 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 686 ) 687 parse_with_tz = self.func( 688 "PARSE_DATETIME", 689 formatted_value, 690 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 691 ) 692 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 693 return self.func("TO_UNIXTIME", coalesced) 694 695 def bracket_sql(self, expression: exp.Bracket) -> str: 696 if expression.args.get("safe"): 697 return self.func( 698 "ELEMENT_AT", 699 expression.this, 700 seq_get( 701 apply_index_offset( 702 expression.this, 703 expression.expressions, 704 1 - expression.args.get("offset", 0), 705 dialect=self.dialect, 706 ), 707 0, 708 ), 709 ) 710 return super().bracket_sql(expression) 711 712 def struct_sql(self, expression: exp.Struct) -> str: 713 if not expression.type: 714 from sqlglot.optimizer.annotate_types import annotate_types 715 716 annotate_types(expression, dialect=self.dialect) 717 718 values: t.List[str] = [] 719 schema: t.List[str] = [] 720 unknown_type = False 721 722 for e in expression.expressions: 723 if isinstance(e, exp.PropertyEQ): 724 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 725 unknown_type = True 726 else: 727 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 728 values.append(self.sql(e, "expression")) 729 else: 730 values.append(self.sql(e)) 731 732 size = len(expression.expressions) 733 734 if not size or len(schema) != size: 735 if unknown_type: 736 self.unsupported( 737 "Cannot convert untyped key-value definitions (try annotate_types)." 738 ) 739 return self.func("ROW", *values) 740 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 741 742 def interval_sql(self, expression: exp.Interval) -> str: 743 if expression.this and expression.text("unit").upper().startswith("WEEK"): 744 return f"({expression.this.name} * INTERVAL '7' DAY)" 745 return super().interval_sql(expression) 746 747 def transaction_sql(self, expression: exp.Transaction) -> str: 748 modes = expression.args.get("modes") 749 modes = f" {', '.join(modes)}" if modes else "" 750 return f"START TRANSACTION{modes}" 751 752 def offset_limit_modifiers( 753 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 754 ) -> t.List[str]: 755 return [ 756 self.sql(expression, "offset"), 757 self.sql(limit), 758 ] 759 760 def create_sql(self, expression: exp.Create) -> str: 761 """ 762 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 763 so we need to remove them 764 """ 765 kind = expression.args["kind"] 766 schema = expression.this 767 if kind == "VIEW" and schema.expressions: 768 expression.this.set("expressions", None) 769 return super().create_sql(expression) 770 771 def delete_sql(self, expression: exp.Delete) -> str: 772 """ 773 Presto only supports DELETE FROM for a single table without an alias, so we need 774 to remove the unnecessary parts. If the original DELETE statement contains more 775 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 776 """ 777 tables = expression.args.get("tables") or [expression.this] 778 if len(tables) > 1: 779 return super().delete_sql(expression) 780 781 table = tables[0] 782 expression.set("this", table) 783 expression.set("tables", None) 784 785 if isinstance(table, exp.Table): 786 table_alias = table.args.get("alias") 787 if table_alias: 788 table_alias.pop() 789 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 790 791 return super().delete_sql(expression) 792 793 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 794 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 795 796 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 797 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 798 if not expression.args.get("variant_extract") or is_json_extract: 799 return self.func( 800 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 801 ) 802 803 this = self.sql(expression, "this") 804 805 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 806 segments = [] 807 for path_key in expression.expression.expressions[1:]: 808 if not isinstance(path_key, exp.JSONPathKey): 809 # Cannot transpile subscripts, wildcards etc to dot notation 810 self.unsupported( 811 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 812 ) 813 continue 814 key = path_key.this 815 if not exp.SAFE_IDENTIFIER_RE.match(key): 816 key = f'"{key}"' 817 segments.append(f".{key}") 818 819 expr = "".join(segments) 820 821 return f"{this}{expr}" 822 823 def groupconcat_sql(self, expression: exp.GroupConcat) -> str: 824 return self.func( 825 "ARRAY_JOIN", 826 self.func("ARRAY_AGG", expression.this), 827 expression.args.get("separator"), 828 )
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True: Always quote except for specials cases. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHEREclause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
618 def extract_sql(self, expression: exp.Extract) -> str: 619 date_part = expression.name 620 621 if not date_part.startswith("EPOCH"): 622 return super().extract_sql(expression) 623 624 if date_part == "EPOCH_MILLISECOND": 625 scale = 10**3 626 elif date_part == "EPOCH_MICROSECOND": 627 scale = 10**6 628 elif date_part == "EPOCH_NANOSECOND": 629 scale = 10**9 630 else: 631 scale = None 632 633 value = expression.expression 634 635 ts = exp.cast(value, to=exp.DataType.build("TIMESTAMP")) 636 to_unix: exp.Expression = exp.TimeToUnix(this=ts) 637 638 if scale: 639 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 640 641 return self.sql(to_unix)
643 def jsonformat_sql(self, expression: exp.JSONFormat) -> str: 644 this = expression.this 645 is_json = expression.args.get("is_json") 646 647 if this and not (is_json or this.type): 648 from sqlglot.optimizer.annotate_types import annotate_types 649 650 this = annotate_types(this, dialect=self.dialect) 651 652 if not (is_json or this.is_type(exp.DataType.Type.JSON)): 653 this.replace(exp.cast(this, exp.DataType.Type.JSON)) 654 655 return self.function_fallback_sql(expression)
657 def md5_sql(self, expression: exp.MD5) -> str: 658 this = expression.this 659 660 if not this.type: 661 from sqlglot.optimizer.annotate_types import annotate_types 662 663 this = annotate_types(this, dialect=self.dialect) 664 665 if this.is_type(*exp.DataType.TEXT_TYPES): 666 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 667 668 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this))))
670 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 671 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 672 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 673 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 674 # which seems to be using the same time mapping as Hive, as per: 675 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 676 this = expression.this 677 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 678 value_as_timestamp = ( 679 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 680 ) 681 682 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 683 684 formatted_value = self.func( 685 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 686 ) 687 parse_with_tz = self.func( 688 "PARSE_DATETIME", 689 formatted_value, 690 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 691 ) 692 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 693 return self.func("TO_UNIXTIME", coalesced)
695 def bracket_sql(self, expression: exp.Bracket) -> str: 696 if expression.args.get("safe"): 697 return self.func( 698 "ELEMENT_AT", 699 expression.this, 700 seq_get( 701 apply_index_offset( 702 expression.this, 703 expression.expressions, 704 1 - expression.args.get("offset", 0), 705 dialect=self.dialect, 706 ), 707 0, 708 ), 709 ) 710 return super().bracket_sql(expression)
712 def struct_sql(self, expression: exp.Struct) -> str: 713 if not expression.type: 714 from sqlglot.optimizer.annotate_types import annotate_types 715 716 annotate_types(expression, dialect=self.dialect) 717 718 values: t.List[str] = [] 719 schema: t.List[str] = [] 720 unknown_type = False 721 722 for e in expression.expressions: 723 if isinstance(e, exp.PropertyEQ): 724 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 725 unknown_type = True 726 else: 727 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 728 values.append(self.sql(e, "expression")) 729 else: 730 values.append(self.sql(e)) 731 732 size = len(expression.expressions) 733 734 if not size or len(schema) != size: 735 if unknown_type: 736 self.unsupported( 737 "Cannot convert untyped key-value definitions (try annotate_types)." 738 ) 739 return self.func("ROW", *values) 740 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))"
760 def create_sql(self, expression: exp.Create) -> str: 761 """ 762 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 763 so we need to remove them 764 """ 765 kind = expression.args["kind"] 766 schema = expression.this 767 if kind == "VIEW" and schema.expressions: 768 expression.this.set("expressions", None) 769 return super().create_sql(expression)
Presto doesn't support CREATE VIEW with expressions (ex: CREATE VIEW x (cola) then (cola) is the expression),
so we need to remove them
771 def delete_sql(self, expression: exp.Delete) -> str: 772 """ 773 Presto only supports DELETE FROM for a single table without an alias, so we need 774 to remove the unnecessary parts. If the original DELETE statement contains more 775 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 776 """ 777 tables = expression.args.get("tables") or [expression.this] 778 if len(tables) > 1: 779 return super().delete_sql(expression) 780 781 table = tables[0] 782 expression.set("this", table) 783 expression.set("tables", None) 784 785 if isinstance(table, exp.Table): 786 table_alias = table.args.get("alias") 787 if table_alias: 788 table_alias.pop() 789 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 790 791 return super().delete_sql(expression)
Presto only supports DELETE FROM for a single table without an alias, so we need to remove the unnecessary parts. If the original DELETE statement contains more than one table to be deleted, we can't safely map it 1-1 to a Presto statement.
793 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 794 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 795 796 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 797 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 798 if not expression.args.get("variant_extract") or is_json_extract: 799 return self.func( 800 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 801 ) 802 803 this = self.sql(expression, "this") 804 805 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 806 segments = [] 807 for path_key in expression.expression.expressions[1:]: 808 if not isinstance(path_key, exp.JSONPathKey): 809 # Cannot transpile subscripts, wildcards etc to dot notation 810 self.unsupported( 811 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 812 ) 813 continue 814 key = path_key.this 815 if not exp.SAFE_IDENTIFIER_RE.match(key): 816 key = f'"{key}"' 817 segments.append(f".{key}") 818 819 expr = "".join(segments) 820 821 return f"{this}{expr}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- INOUT_SEPARATOR
- DIRECTED_JOINS
- QUERY_HINT_SEP
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SUPPORTED_JSON_PATH_PARTS
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_WINDOW_EXCLUDE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- TRY_SUPPORTED
- SUPPORTS_UESCAPE
- UNICODE_SUBSTITUTE
- STAR_EXCEPT
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- NORMALIZE_EXTRACT_DATE_PARTS
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- SUPPORTS_BETWEEN_FLAGS
- SUPPORTS_LIKE_QUANTIFIERS
- MATCH_AGAINST_TABLE_PREFIX
- SET_ASSIGNMENT_REQUIRES_VARIABLE_KEYWORD
- UPDATE_STATEMENT_SUPPORTS_FROM
- UNSUPPORTED_TYPES
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS
- SAFE_JSON_PATH_KEY_RE
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- pseudocolumn_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- inoutcolumnconstraint_sql
- createable_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- rollupindex_sql
- rollupproperty_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- queryband_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- booland_sql
- boolor_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- formatphrase_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- strtotime_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- altersession_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- is_sql
- like_sql
- ilike_sql
- match_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- sub_sql
- trycast_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- generateembedding_sql
- mltranslate_sql
- mlforecast_sql
- featuresattime_sql
- vectorsearch_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- slice_sql
- apply_sql
- grant_sql
- revoke_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- install_sql
- get_put_sql
- translatecharacters_sql
- decodecase_sql
- semanticview_sql
- getextract_sql
- datefromunixdate_sql
- space_sql
- buildproperty_sql
- refreshtriggerproperty_sql
- modelattribute_sql
- directorystage_sql
- uuid_sql
- initcap_sql
- localtime_sql
- localtimestamp_sql
- weekstart_sql
- chr_sql