sqlglot.dialects.presto
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 bool_xor_sql, 11 date_trunc_to_time, 12 datestrtodate_sql, 13 encode_decode_sql, 14 build_formatted_time, 15 if_sql, 16 left_to_substring_sql, 17 no_ilike_sql, 18 no_pivot_sql, 19 no_timestamp_sql, 20 regexp_extract_sql, 21 rename_func, 22 right_to_substring_sql, 23 sha256_sql, 24 strposition_sql, 25 struct_extract_sql, 26 timestamptrunc_sql, 27 timestrtotime_sql, 28 ts_or_ds_add_cast, 29 unit_to_str, 30 sequence_sql, 31 build_regexp_extract, 32 explode_to_unnest_sql, 33) 34from sqlglot.dialects.hive import Hive 35from sqlglot.dialects.mysql import MySQL 36from sqlglot.helper import apply_index_offset, seq_get 37from sqlglot.optimizer.scope import find_all_in_scope 38from sqlglot.tokens import TokenType 39from sqlglot.transforms import unqualify_columns 40from sqlglot.generator import unsupported_args 41 42DATE_ADD_OR_SUB = t.Union[exp.DateAdd, exp.TimestampAdd, exp.DateSub] 43 44 45def _initcap_sql(self: Presto.Generator, expression: exp.Initcap) -> str: 46 regex = r"(\w)(\w*)" 47 return f"REGEXP_REPLACE({self.sql(expression, 'this')}, '{regex}', x -> UPPER(x[1]) || LOWER(x[2]))" 48 49 50def _no_sort_array(self: Presto.Generator, expression: exp.SortArray) -> str: 51 if expression.args.get("asc") == exp.false(): 52 comparator = "(a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END" 53 else: 54 comparator = None 55 return self.func("ARRAY_SORT", expression.this, comparator) 56 57 58def _schema_sql(self: Presto.Generator, expression: exp.Schema) -> str: 59 if isinstance(expression.parent, exp.PartitionedByProperty): 60 # Any columns in the ARRAY[] string literals should not be quoted 61 expression.transform(lambda n: n.name if isinstance(n, exp.Identifier) else n, copy=False) 62 63 partition_exprs = [ 64 self.sql(c) if isinstance(c, (exp.Func, exp.Property)) else self.sql(c, "this") 65 for c in expression.expressions 66 ] 67 return self.sql(exp.Array(expressions=[exp.Literal.string(c) for c in partition_exprs])) 68 69 if expression.parent: 70 for schema in expression.parent.find_all(exp.Schema): 71 if schema is expression: 72 continue 73 74 column_defs = schema.find_all(exp.ColumnDef) 75 if column_defs and isinstance(schema.parent, exp.Property): 76 expression.expressions.extend(column_defs) 77 78 return self.schema_sql(expression) 79 80 81def _quantile_sql(self: Presto.Generator, expression: exp.Quantile) -> str: 82 self.unsupported("Presto does not support exact quantiles") 83 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 84 85 86def _str_to_time_sql( 87 self: Presto.Generator, expression: exp.StrToDate | exp.StrToTime | exp.TsOrDsToDate 88) -> str: 89 return self.func("DATE_PARSE", expression.this, self.format_time(expression)) 90 91 92def _ts_or_ds_to_date_sql(self: Presto.Generator, expression: exp.TsOrDsToDate) -> str: 93 time_format = self.format_time(expression) 94 if time_format and time_format not in (Presto.TIME_FORMAT, Presto.DATE_FORMAT): 95 return self.sql(exp.cast(_str_to_time_sql(self, expression), exp.DataType.Type.DATE)) 96 return self.sql( 97 exp.cast(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP), exp.DataType.Type.DATE) 98 ) 99 100 101def _ts_or_ds_add_sql(self: Presto.Generator, expression: exp.TsOrDsAdd) -> str: 102 expression = ts_or_ds_add_cast(expression) 103 unit = unit_to_str(expression) 104 return self.func("DATE_ADD", unit, expression.expression, expression.this) 105 106 107def _ts_or_ds_diff_sql(self: Presto.Generator, expression: exp.TsOrDsDiff) -> str: 108 this = exp.cast(expression.this, exp.DataType.Type.TIMESTAMP) 109 expr = exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP) 110 unit = unit_to_str(expression) 111 return self.func("DATE_DIFF", unit, expr, this) 112 113 114def _build_approx_percentile(args: t.List) -> exp.Expression: 115 if len(args) == 4: 116 return exp.ApproxQuantile( 117 this=seq_get(args, 0), 118 weight=seq_get(args, 1), 119 quantile=seq_get(args, 2), 120 accuracy=seq_get(args, 3), 121 ) 122 if len(args) == 3: 123 return exp.ApproxQuantile( 124 this=seq_get(args, 0), quantile=seq_get(args, 1), accuracy=seq_get(args, 2) 125 ) 126 return exp.ApproxQuantile.from_arg_list(args) 127 128 129def _build_from_unixtime(args: t.List) -> exp.Expression: 130 if len(args) == 3: 131 return exp.UnixToTime( 132 this=seq_get(args, 0), 133 hours=seq_get(args, 1), 134 minutes=seq_get(args, 2), 135 ) 136 if len(args) == 2: 137 return exp.UnixToTime(this=seq_get(args, 0), zone=seq_get(args, 1)) 138 139 return exp.UnixToTime.from_arg_list(args) 140 141 142def _first_last_sql(self: Presto.Generator, expression: exp.Func) -> str: 143 """ 144 Trino doesn't support FIRST / LAST as functions, but they're valid in the context 145 of MATCH_RECOGNIZE, so we need to preserve them in that case. In all other cases 146 they're converted into an ARBITRARY call. 147 148 Reference: https://trino.io/docs/current/sql/match-recognize.html#logical-navigation-functions 149 """ 150 if isinstance(expression.find_ancestor(exp.MatchRecognize, exp.Select), exp.MatchRecognize): 151 return self.function_fallback_sql(expression) 152 153 return rename_func("ARBITRARY")(self, expression) 154 155 156def _unix_to_time_sql(self: Presto.Generator, expression: exp.UnixToTime) -> str: 157 scale = expression.args.get("scale") 158 timestamp = self.sql(expression, "this") 159 if scale in (None, exp.UnixToTime.SECONDS): 160 return rename_func("FROM_UNIXTIME")(self, expression) 161 162 return f"FROM_UNIXTIME(CAST({timestamp} AS DOUBLE) / POW(10, {scale}))" 163 164 165def _to_int(self: Presto.Generator, expression: exp.Expression) -> exp.Expression: 166 if not expression.type: 167 from sqlglot.optimizer.annotate_types import annotate_types 168 169 annotate_types(expression, dialect=self.dialect) 170 if expression.type and expression.type.this not in exp.DataType.INTEGER_TYPES: 171 return exp.cast(expression, to=exp.DataType.Type.BIGINT) 172 return expression 173 174 175def _build_to_char(args: t.List) -> exp.TimeToStr: 176 fmt = seq_get(args, 1) 177 if isinstance(fmt, exp.Literal): 178 # We uppercase this to match Teradata's format mapping keys 179 fmt.set("this", fmt.this.upper()) 180 181 # We use "teradata" on purpose here, because the time formats are different in Presto. 182 # See https://prestodb.io/docs/current/functions/teradata.html?highlight=to_char#to_char 183 return build_formatted_time(exp.TimeToStr, "teradata")(args) 184 185 186def _date_delta_sql( 187 name: str, negate_interval: bool = False 188) -> t.Callable[[Presto.Generator, DATE_ADD_OR_SUB], str]: 189 def _delta_sql(self: Presto.Generator, expression: DATE_ADD_OR_SUB) -> str: 190 interval = _to_int(self, expression.expression) 191 return self.func( 192 name, 193 unit_to_str(expression), 194 interval * (-1) if negate_interval else interval, 195 expression.this, 196 ) 197 198 return _delta_sql 199 200 201def _explode_to_unnest_sql(self: Presto.Generator, expression: exp.Lateral) -> str: 202 explode = expression.this 203 if isinstance(explode, exp.Explode): 204 exploded_type = explode.this.type 205 alias = expression.args.get("alias") 206 207 # This attempts a best-effort transpilation of LATERAL VIEW EXPLODE on a struct array 208 if ( 209 isinstance(alias, exp.TableAlias) 210 and isinstance(exploded_type, exp.DataType) 211 and exploded_type.is_type(exp.DataType.Type.ARRAY) 212 and exploded_type.expressions 213 and exploded_type.expressions[0].is_type(exp.DataType.Type.STRUCT) 214 ): 215 # When unnesting a ROW in Presto, it produces N columns, so we need to fix the alias 216 alias.set("columns", [c.this.copy() for c in exploded_type.expressions[0].expressions]) 217 elif isinstance(explode, exp.Inline): 218 explode.replace(exp.Explode(this=explode.this.copy())) 219 220 return explode_to_unnest_sql(self, expression) 221 222 223def amend_exploded_column_table(expression: exp.Expression) -> exp.Expression: 224 # We check for expression.type because the columns can be amended only if types were inferred 225 if isinstance(expression, exp.Select) and expression.type: 226 for lateral in expression.args.get("laterals") or []: 227 alias = lateral.args.get("alias") 228 if ( 229 not isinstance(lateral.this, exp.Explode) 230 or not isinstance(alias, exp.TableAlias) 231 or len(alias.columns) != 1 232 ): 233 continue 234 235 new_table = alias.this 236 old_table = alias.columns[0].name.lower() 237 238 # When transpiling a LATERAL VIEW EXPLODE Spark query, the exploded fields may be qualified 239 # with the struct column, resulting in invalid Presto references that need to be amended 240 for column in find_all_in_scope(expression, exp.Column): 241 if column.db.lower() == old_table: 242 column.set("table", column.args["db"].pop()) 243 elif column.table.lower() == old_table: 244 column.set("table", new_table.copy()) 245 elif column.name.lower() == old_table and isinstance(column.parent, exp.Dot): 246 column.parent.replace(exp.column(column.parent.expression, table=new_table)) 247 248 return expression 249 250 251class Presto(Dialect): 252 INDEX_OFFSET = 1 253 NULL_ORDERING = "nulls_are_last" 254 TIME_FORMAT = MySQL.TIME_FORMAT 255 STRICT_STRING_CONCAT = True 256 SUPPORTS_SEMI_ANTI_JOIN = False 257 TYPED_DIVISION = True 258 TABLESAMPLE_SIZE_IS_PERCENT = True 259 LOG_BASE_FIRST: t.Optional[bool] = None 260 SUPPORTS_VALUES_DEFAULT = False 261 262 TIME_MAPPING = MySQL.TIME_MAPPING 263 264 # https://github.com/trinodb/trino/issues/17 265 # https://github.com/trinodb/trino/issues/12289 266 # https://github.com/prestodb/presto/issues/2863 267 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 268 269 # The result of certain math functions in Presto/Trino is of type 270 # equal to the input type e.g: FLOOR(5.5/2) -> DECIMAL, FLOOR(5/2) -> BIGINT 271 ANNOTATORS = { 272 **Dialect.ANNOTATORS, 273 exp.Floor: lambda self, e: self._annotate_by_args(e, "this"), 274 exp.Ceil: lambda self, e: self._annotate_by_args(e, "this"), 275 exp.Mod: lambda self, e: self._annotate_by_args(e, "this", "expression"), 276 exp.Round: lambda self, e: self._annotate_by_args(e, "this"), 277 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 278 exp.Abs: lambda self, e: self._annotate_by_args(e, "this"), 279 exp.Rand: lambda self, e: self._annotate_by_args(e, "this") 280 if e.this 281 else self._set_type(e, exp.DataType.Type.DOUBLE), 282 } 283 284 class Tokenizer(tokens.Tokenizer): 285 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 286 UNICODE_STRINGS = [ 287 (prefix + q, q) 288 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 289 for prefix in ("U&", "u&") 290 ] 291 292 KEYWORDS = { 293 **tokens.Tokenizer.KEYWORDS, 294 "DEALLOCATE PREPARE": TokenType.COMMAND, 295 "DESCRIBE INPUT": TokenType.COMMAND, 296 "DESCRIBE OUTPUT": TokenType.COMMAND, 297 "RESET SESSION": TokenType.COMMAND, 298 "START": TokenType.BEGIN, 299 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 300 "ROW": TokenType.STRUCT, 301 "IPADDRESS": TokenType.IPADDRESS, 302 "IPPREFIX": TokenType.IPPREFIX, 303 "TDIGEST": TokenType.TDIGEST, 304 "HYPERLOGLOG": TokenType.HLLSKETCH, 305 } 306 KEYWORDS.pop("/*+") 307 KEYWORDS.pop("QUALIFY") 308 309 class Parser(parser.Parser): 310 VALUES_FOLLOWED_BY_PAREN = False 311 312 FUNCTIONS = { 313 **parser.Parser.FUNCTIONS, 314 "ARBITRARY": exp.AnyValue.from_arg_list, 315 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 316 "APPROX_PERCENTILE": _build_approx_percentile, 317 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 318 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 319 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 320 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 321 "CARDINALITY": exp.ArraySize.from_arg_list, 322 "CONTAINS": exp.ArrayContains.from_arg_list, 323 "DATE_ADD": lambda args: exp.DateAdd( 324 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 325 ), 326 "DATE_DIFF": lambda args: exp.DateDiff( 327 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 328 ), 329 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 330 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 331 "DATE_TRUNC": date_trunc_to_time, 332 "DAY_OF_WEEK": exp.DayOfWeekIso.from_arg_list, 333 "DOW": exp.DayOfWeekIso.from_arg_list, 334 "DOY": exp.DayOfYear.from_arg_list, 335 "ELEMENT_AT": lambda args: exp.Bracket( 336 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 337 ), 338 "FROM_HEX": exp.Unhex.from_arg_list, 339 "FROM_UNIXTIME": _build_from_unixtime, 340 "FROM_UTF8": lambda args: exp.Decode( 341 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 342 ), 343 "JSON_FORMAT": lambda args: exp.JSONFormat( 344 this=seq_get(args, 0), options=seq_get(args, 1), is_json=True 345 ), 346 "LEVENSHTEIN_DISTANCE": exp.Levenshtein.from_arg_list, 347 "NOW": exp.CurrentTimestamp.from_arg_list, 348 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 349 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 350 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 351 this=seq_get(args, 0), 352 expression=seq_get(args, 1), 353 replacement=seq_get(args, 2) or exp.Literal.string(""), 354 ), 355 "ROW": exp.Struct.from_arg_list, 356 "SEQUENCE": exp.GenerateSeries.from_arg_list, 357 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 358 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 359 "STRPOS": lambda args: exp.StrPosition( 360 this=seq_get(args, 0), substr=seq_get(args, 1), occurrence=seq_get(args, 2) 361 ), 362 "TO_CHAR": _build_to_char, 363 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 364 "TO_UTF8": lambda args: exp.Encode( 365 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 366 ), 367 "MD5": exp.MD5Digest.from_arg_list, 368 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 369 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 370 } 371 372 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 373 FUNCTION_PARSERS.pop("TRIM") 374 375 class Generator(generator.Generator): 376 INTERVAL_ALLOWS_PLURAL_FORM = False 377 JOIN_HINTS = False 378 TABLE_HINTS = False 379 QUERY_HINTS = False 380 IS_BOOL_ALLOWED = False 381 TZ_TO_WITH_TIME_ZONE = True 382 NVL2_SUPPORTED = False 383 STRUCT_DELIMITER = ("(", ")") 384 LIMIT_ONLY_LITERALS = True 385 SUPPORTS_SINGLE_ARG_CONCAT = False 386 LIKE_PROPERTY_INSIDE_SCHEMA = True 387 MULTI_ARG_DISTINCT = False 388 SUPPORTS_TO_NUMBER = False 389 HEX_FUNC = "TO_HEX" 390 PARSE_JSON_NAME = "JSON_PARSE" 391 PAD_FILL_PATTERN_IS_REQUIRED = True 392 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 393 SUPPORTS_MEDIAN = False 394 ARRAY_SIZE_NAME = "CARDINALITY" 395 396 PROPERTIES_LOCATION = { 397 **generator.Generator.PROPERTIES_LOCATION, 398 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 399 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 400 } 401 402 TYPE_MAPPING = { 403 **generator.Generator.TYPE_MAPPING, 404 exp.DataType.Type.BINARY: "VARBINARY", 405 exp.DataType.Type.BIT: "BOOLEAN", 406 exp.DataType.Type.DATETIME: "TIMESTAMP", 407 exp.DataType.Type.DATETIME64: "TIMESTAMP", 408 exp.DataType.Type.FLOAT: "REAL", 409 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 410 exp.DataType.Type.INT: "INTEGER", 411 exp.DataType.Type.STRUCT: "ROW", 412 exp.DataType.Type.TEXT: "VARCHAR", 413 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 414 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 415 exp.DataType.Type.TIMETZ: "TIME", 416 } 417 418 TRANSFORMS = { 419 **generator.Generator.TRANSFORMS, 420 exp.AnyValue: rename_func("ARBITRARY"), 421 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 422 exp.ArgMax: rename_func("MAX_BY"), 423 exp.ArgMin: rename_func("MIN_BY"), 424 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 425 exp.ArrayAny: rename_func("ANY_MATCH"), 426 exp.ArrayConcat: rename_func("CONCAT"), 427 exp.ArrayContains: rename_func("CONTAINS"), 428 exp.ArrayToString: rename_func("ARRAY_JOIN"), 429 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 430 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 431 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 432 exp.BitwiseLeftShift: lambda self, e: self.func( 433 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 434 ), 435 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 436 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 437 exp.BitwiseRightShift: lambda self, e: self.func( 438 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 439 ), 440 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 441 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 442 exp.CurrentTime: lambda *_: "CURRENT_TIME", 443 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 444 exp.CurrentUser: lambda *_: "CURRENT_USER", 445 exp.DateAdd: _date_delta_sql("DATE_ADD"), 446 exp.DateDiff: lambda self, e: self.func( 447 "DATE_DIFF", unit_to_str(e), e.expression, e.this 448 ), 449 exp.DateStrToDate: datestrtodate_sql, 450 exp.DateToDi: lambda self, 451 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 452 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 453 exp.DayOfWeek: lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)", 454 exp.DayOfWeekIso: rename_func("DAY_OF_WEEK"), 455 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 456 exp.DiToDate: lambda self, 457 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 458 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 459 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 460 exp.First: _first_last_sql, 461 exp.FromTimeZone: lambda self, 462 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 463 exp.GenerateSeries: sequence_sql, 464 exp.GenerateDateArray: sequence_sql, 465 exp.Group: transforms.preprocess([transforms.unalias_group]), 466 exp.If: if_sql(), 467 exp.ILike: no_ilike_sql, 468 exp.Initcap: _initcap_sql, 469 exp.Last: _first_last_sql, 470 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 471 exp.Lateral: _explode_to_unnest_sql, 472 exp.Left: left_to_substring_sql, 473 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 474 rename_func("LEVENSHTEIN_DISTANCE") 475 ), 476 exp.LogicalAnd: rename_func("BOOL_AND"), 477 exp.LogicalOr: rename_func("BOOL_OR"), 478 exp.Pivot: no_pivot_sql, 479 exp.Quantile: _quantile_sql, 480 exp.RegexpExtract: regexp_extract_sql, 481 exp.RegexpExtractAll: regexp_extract_sql, 482 exp.Right: right_to_substring_sql, 483 exp.Schema: _schema_sql, 484 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 485 exp.Select: transforms.preprocess( 486 [ 487 transforms.eliminate_window_clause, 488 transforms.eliminate_qualify, 489 transforms.eliminate_distinct_on, 490 transforms.explode_projection_to_unnest(1), 491 transforms.eliminate_semi_and_anti_joins, 492 amend_exploded_column_table, 493 ] 494 ), 495 exp.SortArray: _no_sort_array, 496 exp.StrPosition: lambda self, e: strposition_sql(self, e, supports_occurrence=True), 497 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 498 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 499 exp.StrToTime: _str_to_time_sql, 500 exp.StructExtract: struct_extract_sql, 501 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 502 exp.Timestamp: no_timestamp_sql, 503 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 504 exp.TimestampTrunc: timestamptrunc_sql(), 505 exp.TimeStrToDate: timestrtotime_sql, 506 exp.TimeStrToTime: timestrtotime_sql, 507 exp.TimeStrToUnix: lambda self, e: self.func( 508 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 509 ), 510 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 511 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 512 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 513 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 514 exp.TsOrDiToDi: lambda self, 515 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 516 exp.TsOrDsAdd: _ts_or_ds_add_sql, 517 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 518 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 519 exp.Unhex: rename_func("FROM_HEX"), 520 exp.UnixToStr: lambda self, 521 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 522 exp.UnixToTime: _unix_to_time_sql, 523 exp.UnixToTimeStr: lambda self, 524 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 525 exp.VariancePop: rename_func("VAR_POP"), 526 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 527 exp.WithinGroup: transforms.preprocess( 528 [transforms.remove_within_group_for_percentiles] 529 ), 530 exp.Xor: bool_xor_sql, 531 exp.MD5Digest: rename_func("MD5"), 532 exp.SHA: rename_func("SHA1"), 533 exp.SHA2: sha256_sql, 534 } 535 536 RESERVED_KEYWORDS = { 537 "alter", 538 "and", 539 "as", 540 "between", 541 "by", 542 "case", 543 "cast", 544 "constraint", 545 "create", 546 "cross", 547 "current_time", 548 "current_timestamp", 549 "deallocate", 550 "delete", 551 "describe", 552 "distinct", 553 "drop", 554 "else", 555 "end", 556 "escape", 557 "except", 558 "execute", 559 "exists", 560 "extract", 561 "false", 562 "for", 563 "from", 564 "full", 565 "group", 566 "having", 567 "in", 568 "inner", 569 "insert", 570 "intersect", 571 "into", 572 "is", 573 "join", 574 "left", 575 "like", 576 "natural", 577 "not", 578 "null", 579 "on", 580 "or", 581 "order", 582 "outer", 583 "prepare", 584 "right", 585 "select", 586 "table", 587 "then", 588 "true", 589 "union", 590 "using", 591 "values", 592 "when", 593 "where", 594 "with", 595 } 596 597 def jsonformat_sql(self, expression: exp.JSONFormat) -> str: 598 this = expression.this 599 is_json = expression.args.get("is_json") 600 601 if this and not (is_json or this.type): 602 from sqlglot.optimizer.annotate_types import annotate_types 603 604 this = annotate_types(this, dialect=self.dialect) 605 606 if not (is_json or this.is_type(exp.DataType.Type.JSON)): 607 this.replace(exp.cast(this, exp.DataType.Type.JSON)) 608 609 return self.function_fallback_sql(expression) 610 611 def md5_sql(self, expression: exp.MD5) -> str: 612 this = expression.this 613 614 if not this.type: 615 from sqlglot.optimizer.annotate_types import annotate_types 616 617 this = annotate_types(this, dialect=self.dialect) 618 619 if this.is_type(*exp.DataType.TEXT_TYPES): 620 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 621 622 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 623 624 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 625 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 626 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 627 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 628 # which seems to be using the same time mapping as Hive, as per: 629 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 630 this = expression.this 631 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 632 value_as_timestamp = ( 633 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 634 ) 635 636 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 637 638 formatted_value = self.func( 639 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 640 ) 641 parse_with_tz = self.func( 642 "PARSE_DATETIME", 643 formatted_value, 644 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 645 ) 646 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 647 return self.func("TO_UNIXTIME", coalesced) 648 649 def bracket_sql(self, expression: exp.Bracket) -> str: 650 if expression.args.get("safe"): 651 return self.func( 652 "ELEMENT_AT", 653 expression.this, 654 seq_get( 655 apply_index_offset( 656 expression.this, 657 expression.expressions, 658 1 - expression.args.get("offset", 0), 659 dialect=self.dialect, 660 ), 661 0, 662 ), 663 ) 664 return super().bracket_sql(expression) 665 666 def struct_sql(self, expression: exp.Struct) -> str: 667 from sqlglot.optimizer.annotate_types import annotate_types 668 669 expression = annotate_types(expression, dialect=self.dialect) 670 values: t.List[str] = [] 671 schema: t.List[str] = [] 672 unknown_type = False 673 674 for e in expression.expressions: 675 if isinstance(e, exp.PropertyEQ): 676 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 677 unknown_type = True 678 else: 679 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 680 values.append(self.sql(e, "expression")) 681 else: 682 values.append(self.sql(e)) 683 684 size = len(expression.expressions) 685 686 if not size or len(schema) != size: 687 if unknown_type: 688 self.unsupported( 689 "Cannot convert untyped key-value definitions (try annotate_types)." 690 ) 691 return self.func("ROW", *values) 692 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 693 694 def interval_sql(self, expression: exp.Interval) -> str: 695 if expression.this and expression.text("unit").upper().startswith("WEEK"): 696 return f"({expression.this.name} * INTERVAL '7' DAY)" 697 return super().interval_sql(expression) 698 699 def transaction_sql(self, expression: exp.Transaction) -> str: 700 modes = expression.args.get("modes") 701 modes = f" {', '.join(modes)}" if modes else "" 702 return f"START TRANSACTION{modes}" 703 704 def offset_limit_modifiers( 705 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 706 ) -> t.List[str]: 707 return [ 708 self.sql(expression, "offset"), 709 self.sql(limit), 710 ] 711 712 def create_sql(self, expression: exp.Create) -> str: 713 """ 714 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 715 so we need to remove them 716 """ 717 kind = expression.args["kind"] 718 schema = expression.this 719 if kind == "VIEW" and schema.expressions: 720 expression.this.set("expressions", None) 721 return super().create_sql(expression) 722 723 def delete_sql(self, expression: exp.Delete) -> str: 724 """ 725 Presto only supports DELETE FROM for a single table without an alias, so we need 726 to remove the unnecessary parts. If the original DELETE statement contains more 727 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 728 """ 729 tables = expression.args.get("tables") or [expression.this] 730 if len(tables) > 1: 731 return super().delete_sql(expression) 732 733 table = tables[0] 734 expression.set("this", table) 735 expression.set("tables", None) 736 737 if isinstance(table, exp.Table): 738 table_alias = table.args.get("alias") 739 if table_alias: 740 table_alias.pop() 741 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 742 743 return super().delete_sql(expression) 744 745 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 746 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 747 748 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 749 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 750 if not expression.args.get("variant_extract") or is_json_extract: 751 return self.func( 752 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 753 ) 754 755 this = self.sql(expression, "this") 756 757 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 758 segments = [] 759 for path_key in expression.expression.expressions[1:]: 760 if not isinstance(path_key, exp.JSONPathKey): 761 # Cannot transpile subscripts, wildcards etc to dot notation 762 self.unsupported( 763 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 764 ) 765 continue 766 key = path_key.this 767 if not exp.SAFE_IDENTIFIER_RE.match(key): 768 key = f'"{key}"' 769 segments.append(f".{key}") 770 771 expr = "".join(segments) 772 773 return f"{this}{expr}" 774 775 def groupconcat_sql(self, expression: exp.GroupConcat) -> str: 776 return self.func( 777 "ARRAY_JOIN", 778 self.func("ARRAY_AGG", expression.this), 779 expression.args.get("separator"), 780 )
224def amend_exploded_column_table(expression: exp.Expression) -> exp.Expression: 225 # We check for expression.type because the columns can be amended only if types were inferred 226 if isinstance(expression, exp.Select) and expression.type: 227 for lateral in expression.args.get("laterals") or []: 228 alias = lateral.args.get("alias") 229 if ( 230 not isinstance(lateral.this, exp.Explode) 231 or not isinstance(alias, exp.TableAlias) 232 or len(alias.columns) != 1 233 ): 234 continue 235 236 new_table = alias.this 237 old_table = alias.columns[0].name.lower() 238 239 # When transpiling a LATERAL VIEW EXPLODE Spark query, the exploded fields may be qualified 240 # with the struct column, resulting in invalid Presto references that need to be amended 241 for column in find_all_in_scope(expression, exp.Column): 242 if column.db.lower() == old_table: 243 column.set("table", column.args["db"].pop()) 244 elif column.table.lower() == old_table: 245 column.set("table", new_table.copy()) 246 elif column.name.lower() == old_table and isinstance(column.parent, exp.Dot): 247 column.parent.replace(exp.column(column.parent.expression, table=new_table)) 248 249 return expression
252class Presto(Dialect): 253 INDEX_OFFSET = 1 254 NULL_ORDERING = "nulls_are_last" 255 TIME_FORMAT = MySQL.TIME_FORMAT 256 STRICT_STRING_CONCAT = True 257 SUPPORTS_SEMI_ANTI_JOIN = False 258 TYPED_DIVISION = True 259 TABLESAMPLE_SIZE_IS_PERCENT = True 260 LOG_BASE_FIRST: t.Optional[bool] = None 261 SUPPORTS_VALUES_DEFAULT = False 262 263 TIME_MAPPING = MySQL.TIME_MAPPING 264 265 # https://github.com/trinodb/trino/issues/17 266 # https://github.com/trinodb/trino/issues/12289 267 # https://github.com/prestodb/presto/issues/2863 268 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 269 270 # The result of certain math functions in Presto/Trino is of type 271 # equal to the input type e.g: FLOOR(5.5/2) -> DECIMAL, FLOOR(5/2) -> BIGINT 272 ANNOTATORS = { 273 **Dialect.ANNOTATORS, 274 exp.Floor: lambda self, e: self._annotate_by_args(e, "this"), 275 exp.Ceil: lambda self, e: self._annotate_by_args(e, "this"), 276 exp.Mod: lambda self, e: self._annotate_by_args(e, "this", "expression"), 277 exp.Round: lambda self, e: self._annotate_by_args(e, "this"), 278 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 279 exp.Abs: lambda self, e: self._annotate_by_args(e, "this"), 280 exp.Rand: lambda self, e: self._annotate_by_args(e, "this") 281 if e.this 282 else self._set_type(e, exp.DataType.Type.DOUBLE), 283 } 284 285 class Tokenizer(tokens.Tokenizer): 286 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 287 UNICODE_STRINGS = [ 288 (prefix + q, q) 289 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 290 for prefix in ("U&", "u&") 291 ] 292 293 KEYWORDS = { 294 **tokens.Tokenizer.KEYWORDS, 295 "DEALLOCATE PREPARE": TokenType.COMMAND, 296 "DESCRIBE INPUT": TokenType.COMMAND, 297 "DESCRIBE OUTPUT": TokenType.COMMAND, 298 "RESET SESSION": TokenType.COMMAND, 299 "START": TokenType.BEGIN, 300 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 301 "ROW": TokenType.STRUCT, 302 "IPADDRESS": TokenType.IPADDRESS, 303 "IPPREFIX": TokenType.IPPREFIX, 304 "TDIGEST": TokenType.TDIGEST, 305 "HYPERLOGLOG": TokenType.HLLSKETCH, 306 } 307 KEYWORDS.pop("/*+") 308 KEYWORDS.pop("QUALIFY") 309 310 class Parser(parser.Parser): 311 VALUES_FOLLOWED_BY_PAREN = False 312 313 FUNCTIONS = { 314 **parser.Parser.FUNCTIONS, 315 "ARBITRARY": exp.AnyValue.from_arg_list, 316 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 317 "APPROX_PERCENTILE": _build_approx_percentile, 318 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 319 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 320 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 321 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 322 "CARDINALITY": exp.ArraySize.from_arg_list, 323 "CONTAINS": exp.ArrayContains.from_arg_list, 324 "DATE_ADD": lambda args: exp.DateAdd( 325 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 326 ), 327 "DATE_DIFF": lambda args: exp.DateDiff( 328 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 329 ), 330 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 331 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 332 "DATE_TRUNC": date_trunc_to_time, 333 "DAY_OF_WEEK": exp.DayOfWeekIso.from_arg_list, 334 "DOW": exp.DayOfWeekIso.from_arg_list, 335 "DOY": exp.DayOfYear.from_arg_list, 336 "ELEMENT_AT": lambda args: exp.Bracket( 337 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 338 ), 339 "FROM_HEX": exp.Unhex.from_arg_list, 340 "FROM_UNIXTIME": _build_from_unixtime, 341 "FROM_UTF8": lambda args: exp.Decode( 342 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 343 ), 344 "JSON_FORMAT": lambda args: exp.JSONFormat( 345 this=seq_get(args, 0), options=seq_get(args, 1), is_json=True 346 ), 347 "LEVENSHTEIN_DISTANCE": exp.Levenshtein.from_arg_list, 348 "NOW": exp.CurrentTimestamp.from_arg_list, 349 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 350 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 351 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 352 this=seq_get(args, 0), 353 expression=seq_get(args, 1), 354 replacement=seq_get(args, 2) or exp.Literal.string(""), 355 ), 356 "ROW": exp.Struct.from_arg_list, 357 "SEQUENCE": exp.GenerateSeries.from_arg_list, 358 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 359 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 360 "STRPOS": lambda args: exp.StrPosition( 361 this=seq_get(args, 0), substr=seq_get(args, 1), occurrence=seq_get(args, 2) 362 ), 363 "TO_CHAR": _build_to_char, 364 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 365 "TO_UTF8": lambda args: exp.Encode( 366 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 367 ), 368 "MD5": exp.MD5Digest.from_arg_list, 369 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 370 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 371 } 372 373 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 374 FUNCTION_PARSERS.pop("TRIM") 375 376 class Generator(generator.Generator): 377 INTERVAL_ALLOWS_PLURAL_FORM = False 378 JOIN_HINTS = False 379 TABLE_HINTS = False 380 QUERY_HINTS = False 381 IS_BOOL_ALLOWED = False 382 TZ_TO_WITH_TIME_ZONE = True 383 NVL2_SUPPORTED = False 384 STRUCT_DELIMITER = ("(", ")") 385 LIMIT_ONLY_LITERALS = True 386 SUPPORTS_SINGLE_ARG_CONCAT = False 387 LIKE_PROPERTY_INSIDE_SCHEMA = True 388 MULTI_ARG_DISTINCT = False 389 SUPPORTS_TO_NUMBER = False 390 HEX_FUNC = "TO_HEX" 391 PARSE_JSON_NAME = "JSON_PARSE" 392 PAD_FILL_PATTERN_IS_REQUIRED = True 393 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 394 SUPPORTS_MEDIAN = False 395 ARRAY_SIZE_NAME = "CARDINALITY" 396 397 PROPERTIES_LOCATION = { 398 **generator.Generator.PROPERTIES_LOCATION, 399 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 400 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 401 } 402 403 TYPE_MAPPING = { 404 **generator.Generator.TYPE_MAPPING, 405 exp.DataType.Type.BINARY: "VARBINARY", 406 exp.DataType.Type.BIT: "BOOLEAN", 407 exp.DataType.Type.DATETIME: "TIMESTAMP", 408 exp.DataType.Type.DATETIME64: "TIMESTAMP", 409 exp.DataType.Type.FLOAT: "REAL", 410 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 411 exp.DataType.Type.INT: "INTEGER", 412 exp.DataType.Type.STRUCT: "ROW", 413 exp.DataType.Type.TEXT: "VARCHAR", 414 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 415 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 416 exp.DataType.Type.TIMETZ: "TIME", 417 } 418 419 TRANSFORMS = { 420 **generator.Generator.TRANSFORMS, 421 exp.AnyValue: rename_func("ARBITRARY"), 422 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 423 exp.ArgMax: rename_func("MAX_BY"), 424 exp.ArgMin: rename_func("MIN_BY"), 425 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 426 exp.ArrayAny: rename_func("ANY_MATCH"), 427 exp.ArrayConcat: rename_func("CONCAT"), 428 exp.ArrayContains: rename_func("CONTAINS"), 429 exp.ArrayToString: rename_func("ARRAY_JOIN"), 430 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 431 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 432 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 433 exp.BitwiseLeftShift: lambda self, e: self.func( 434 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 435 ), 436 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 437 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 438 exp.BitwiseRightShift: lambda self, e: self.func( 439 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 440 ), 441 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 442 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 443 exp.CurrentTime: lambda *_: "CURRENT_TIME", 444 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 445 exp.CurrentUser: lambda *_: "CURRENT_USER", 446 exp.DateAdd: _date_delta_sql("DATE_ADD"), 447 exp.DateDiff: lambda self, e: self.func( 448 "DATE_DIFF", unit_to_str(e), e.expression, e.this 449 ), 450 exp.DateStrToDate: datestrtodate_sql, 451 exp.DateToDi: lambda self, 452 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 453 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 454 exp.DayOfWeek: lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)", 455 exp.DayOfWeekIso: rename_func("DAY_OF_WEEK"), 456 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 457 exp.DiToDate: lambda self, 458 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 459 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 460 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 461 exp.First: _first_last_sql, 462 exp.FromTimeZone: lambda self, 463 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 464 exp.GenerateSeries: sequence_sql, 465 exp.GenerateDateArray: sequence_sql, 466 exp.Group: transforms.preprocess([transforms.unalias_group]), 467 exp.If: if_sql(), 468 exp.ILike: no_ilike_sql, 469 exp.Initcap: _initcap_sql, 470 exp.Last: _first_last_sql, 471 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 472 exp.Lateral: _explode_to_unnest_sql, 473 exp.Left: left_to_substring_sql, 474 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 475 rename_func("LEVENSHTEIN_DISTANCE") 476 ), 477 exp.LogicalAnd: rename_func("BOOL_AND"), 478 exp.LogicalOr: rename_func("BOOL_OR"), 479 exp.Pivot: no_pivot_sql, 480 exp.Quantile: _quantile_sql, 481 exp.RegexpExtract: regexp_extract_sql, 482 exp.RegexpExtractAll: regexp_extract_sql, 483 exp.Right: right_to_substring_sql, 484 exp.Schema: _schema_sql, 485 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 486 exp.Select: transforms.preprocess( 487 [ 488 transforms.eliminate_window_clause, 489 transforms.eliminate_qualify, 490 transforms.eliminate_distinct_on, 491 transforms.explode_projection_to_unnest(1), 492 transforms.eliminate_semi_and_anti_joins, 493 amend_exploded_column_table, 494 ] 495 ), 496 exp.SortArray: _no_sort_array, 497 exp.StrPosition: lambda self, e: strposition_sql(self, e, supports_occurrence=True), 498 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 499 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 500 exp.StrToTime: _str_to_time_sql, 501 exp.StructExtract: struct_extract_sql, 502 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 503 exp.Timestamp: no_timestamp_sql, 504 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 505 exp.TimestampTrunc: timestamptrunc_sql(), 506 exp.TimeStrToDate: timestrtotime_sql, 507 exp.TimeStrToTime: timestrtotime_sql, 508 exp.TimeStrToUnix: lambda self, e: self.func( 509 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 510 ), 511 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 512 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 513 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 514 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 515 exp.TsOrDiToDi: lambda self, 516 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 517 exp.TsOrDsAdd: _ts_or_ds_add_sql, 518 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 519 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 520 exp.Unhex: rename_func("FROM_HEX"), 521 exp.UnixToStr: lambda self, 522 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 523 exp.UnixToTime: _unix_to_time_sql, 524 exp.UnixToTimeStr: lambda self, 525 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 526 exp.VariancePop: rename_func("VAR_POP"), 527 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 528 exp.WithinGroup: transforms.preprocess( 529 [transforms.remove_within_group_for_percentiles] 530 ), 531 exp.Xor: bool_xor_sql, 532 exp.MD5Digest: rename_func("MD5"), 533 exp.SHA: rename_func("SHA1"), 534 exp.SHA2: sha256_sql, 535 } 536 537 RESERVED_KEYWORDS = { 538 "alter", 539 "and", 540 "as", 541 "between", 542 "by", 543 "case", 544 "cast", 545 "constraint", 546 "create", 547 "cross", 548 "current_time", 549 "current_timestamp", 550 "deallocate", 551 "delete", 552 "describe", 553 "distinct", 554 "drop", 555 "else", 556 "end", 557 "escape", 558 "except", 559 "execute", 560 "exists", 561 "extract", 562 "false", 563 "for", 564 "from", 565 "full", 566 "group", 567 "having", 568 "in", 569 "inner", 570 "insert", 571 "intersect", 572 "into", 573 "is", 574 "join", 575 "left", 576 "like", 577 "natural", 578 "not", 579 "null", 580 "on", 581 "or", 582 "order", 583 "outer", 584 "prepare", 585 "right", 586 "select", 587 "table", 588 "then", 589 "true", 590 "union", 591 "using", 592 "values", 593 "when", 594 "where", 595 "with", 596 } 597 598 def jsonformat_sql(self, expression: exp.JSONFormat) -> str: 599 this = expression.this 600 is_json = expression.args.get("is_json") 601 602 if this and not (is_json or this.type): 603 from sqlglot.optimizer.annotate_types import annotate_types 604 605 this = annotate_types(this, dialect=self.dialect) 606 607 if not (is_json or this.is_type(exp.DataType.Type.JSON)): 608 this.replace(exp.cast(this, exp.DataType.Type.JSON)) 609 610 return self.function_fallback_sql(expression) 611 612 def md5_sql(self, expression: exp.MD5) -> str: 613 this = expression.this 614 615 if not this.type: 616 from sqlglot.optimizer.annotate_types import annotate_types 617 618 this = annotate_types(this, dialect=self.dialect) 619 620 if this.is_type(*exp.DataType.TEXT_TYPES): 621 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 622 623 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 624 625 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 626 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 627 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 628 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 629 # which seems to be using the same time mapping as Hive, as per: 630 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 631 this = expression.this 632 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 633 value_as_timestamp = ( 634 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 635 ) 636 637 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 638 639 formatted_value = self.func( 640 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 641 ) 642 parse_with_tz = self.func( 643 "PARSE_DATETIME", 644 formatted_value, 645 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 646 ) 647 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 648 return self.func("TO_UNIXTIME", coalesced) 649 650 def bracket_sql(self, expression: exp.Bracket) -> str: 651 if expression.args.get("safe"): 652 return self.func( 653 "ELEMENT_AT", 654 expression.this, 655 seq_get( 656 apply_index_offset( 657 expression.this, 658 expression.expressions, 659 1 - expression.args.get("offset", 0), 660 dialect=self.dialect, 661 ), 662 0, 663 ), 664 ) 665 return super().bracket_sql(expression) 666 667 def struct_sql(self, expression: exp.Struct) -> str: 668 from sqlglot.optimizer.annotate_types import annotate_types 669 670 expression = annotate_types(expression, dialect=self.dialect) 671 values: t.List[str] = [] 672 schema: t.List[str] = [] 673 unknown_type = False 674 675 for e in expression.expressions: 676 if isinstance(e, exp.PropertyEQ): 677 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 678 unknown_type = True 679 else: 680 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 681 values.append(self.sql(e, "expression")) 682 else: 683 values.append(self.sql(e)) 684 685 size = len(expression.expressions) 686 687 if not size or len(schema) != size: 688 if unknown_type: 689 self.unsupported( 690 "Cannot convert untyped key-value definitions (try annotate_types)." 691 ) 692 return self.func("ROW", *values) 693 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 694 695 def interval_sql(self, expression: exp.Interval) -> str: 696 if expression.this and expression.text("unit").upper().startswith("WEEK"): 697 return f"({expression.this.name} * INTERVAL '7' DAY)" 698 return super().interval_sql(expression) 699 700 def transaction_sql(self, expression: exp.Transaction) -> str: 701 modes = expression.args.get("modes") 702 modes = f" {', '.join(modes)}" if modes else "" 703 return f"START TRANSACTION{modes}" 704 705 def offset_limit_modifiers( 706 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 707 ) -> t.List[str]: 708 return [ 709 self.sql(expression, "offset"), 710 self.sql(limit), 711 ] 712 713 def create_sql(self, expression: exp.Create) -> str: 714 """ 715 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 716 so we need to remove them 717 """ 718 kind = expression.args["kind"] 719 schema = expression.this 720 if kind == "VIEW" and schema.expressions: 721 expression.this.set("expressions", None) 722 return super().create_sql(expression) 723 724 def delete_sql(self, expression: exp.Delete) -> str: 725 """ 726 Presto only supports DELETE FROM for a single table without an alias, so we need 727 to remove the unnecessary parts. If the original DELETE statement contains more 728 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 729 """ 730 tables = expression.args.get("tables") or [expression.this] 731 if len(tables) > 1: 732 return super().delete_sql(expression) 733 734 table = tables[0] 735 expression.set("this", table) 736 expression.set("tables", None) 737 738 if isinstance(table, exp.Table): 739 table_alias = table.args.get("alias") 740 if table_alias: 741 table_alias.pop() 742 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 743 744 return super().delete_sql(expression) 745 746 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 747 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 748 749 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 750 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 751 if not expression.args.get("variant_extract") or is_json_extract: 752 return self.func( 753 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 754 ) 755 756 this = self.sql(expression, "this") 757 758 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 759 segments = [] 760 for path_key in expression.expression.expressions[1:]: 761 if not isinstance(path_key, exp.JSONPathKey): 762 # Cannot transpile subscripts, wildcards etc to dot notation 763 self.unsupported( 764 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 765 ) 766 continue 767 key = path_key.this 768 if not exp.SAFE_IDENTIFIER_RE.match(key): 769 key = f'"{key}"' 770 segments.append(f".{key}") 771 772 expr = "".join(segments) 773 774 return f"{this}{expr}" 775 776 def groupconcat_sql(self, expression: exp.GroupConcat) -> str: 777 return self.func( 778 "ARRAY_JOIN", 779 self.func("ARRAY_AGG", expression.this), 780 expression.args.get("separator"), 781 )
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the behavior of a / b
depends on the types of a
and b
.
False means a / b
is always float division.
True means a / b
is integer division if both a
and b
are integers.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Associates this dialect's time formats with their equivalent Python strftime
formats.
Specifies the strategy according to which identifiers should be normalized.
285 class Tokenizer(tokens.Tokenizer): 286 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 287 UNICODE_STRINGS = [ 288 (prefix + q, q) 289 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 290 for prefix in ("U&", "u&") 291 ] 292 293 KEYWORDS = { 294 **tokens.Tokenizer.KEYWORDS, 295 "DEALLOCATE PREPARE": TokenType.COMMAND, 296 "DESCRIBE INPUT": TokenType.COMMAND, 297 "DESCRIBE OUTPUT": TokenType.COMMAND, 298 "RESET SESSION": TokenType.COMMAND, 299 "START": TokenType.BEGIN, 300 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 301 "ROW": TokenType.STRUCT, 302 "IPADDRESS": TokenType.IPADDRESS, 303 "IPPREFIX": TokenType.IPPREFIX, 304 "TDIGEST": TokenType.TDIGEST, 305 "HYPERLOGLOG": TokenType.HLLSKETCH, 306 } 307 KEYWORDS.pop("/*+") 308 KEYWORDS.pop("QUALIFY")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- BYTE_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- IDENTIFIERS
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
310 class Parser(parser.Parser): 311 VALUES_FOLLOWED_BY_PAREN = False 312 313 FUNCTIONS = { 314 **parser.Parser.FUNCTIONS, 315 "ARBITRARY": exp.AnyValue.from_arg_list, 316 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 317 "APPROX_PERCENTILE": _build_approx_percentile, 318 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 319 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 320 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 321 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 322 "CARDINALITY": exp.ArraySize.from_arg_list, 323 "CONTAINS": exp.ArrayContains.from_arg_list, 324 "DATE_ADD": lambda args: exp.DateAdd( 325 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 326 ), 327 "DATE_DIFF": lambda args: exp.DateDiff( 328 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 329 ), 330 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 331 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 332 "DATE_TRUNC": date_trunc_to_time, 333 "DAY_OF_WEEK": exp.DayOfWeekIso.from_arg_list, 334 "DOW": exp.DayOfWeekIso.from_arg_list, 335 "DOY": exp.DayOfYear.from_arg_list, 336 "ELEMENT_AT": lambda args: exp.Bracket( 337 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 338 ), 339 "FROM_HEX": exp.Unhex.from_arg_list, 340 "FROM_UNIXTIME": _build_from_unixtime, 341 "FROM_UTF8": lambda args: exp.Decode( 342 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 343 ), 344 "JSON_FORMAT": lambda args: exp.JSONFormat( 345 this=seq_get(args, 0), options=seq_get(args, 1), is_json=True 346 ), 347 "LEVENSHTEIN_DISTANCE": exp.Levenshtein.from_arg_list, 348 "NOW": exp.CurrentTimestamp.from_arg_list, 349 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 350 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 351 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 352 this=seq_get(args, 0), 353 expression=seq_get(args, 1), 354 replacement=seq_get(args, 2) or exp.Literal.string(""), 355 ), 356 "ROW": exp.Struct.from_arg_list, 357 "SEQUENCE": exp.GenerateSeries.from_arg_list, 358 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 359 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 360 "STRPOS": lambda args: exp.StrPosition( 361 this=seq_get(args, 0), substr=seq_get(args, 1), occurrence=seq_get(args, 2) 362 ), 363 "TO_CHAR": _build_to_char, 364 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 365 "TO_UTF8": lambda args: exp.Encode( 366 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 367 ), 368 "MD5": exp.MD5Digest.from_arg_list, 369 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 370 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 371 } 372 373 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 374 FUNCTION_PARSERS.pop("TRIM")
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- errors
- sql
376 class Generator(generator.Generator): 377 INTERVAL_ALLOWS_PLURAL_FORM = False 378 JOIN_HINTS = False 379 TABLE_HINTS = False 380 QUERY_HINTS = False 381 IS_BOOL_ALLOWED = False 382 TZ_TO_WITH_TIME_ZONE = True 383 NVL2_SUPPORTED = False 384 STRUCT_DELIMITER = ("(", ")") 385 LIMIT_ONLY_LITERALS = True 386 SUPPORTS_SINGLE_ARG_CONCAT = False 387 LIKE_PROPERTY_INSIDE_SCHEMA = True 388 MULTI_ARG_DISTINCT = False 389 SUPPORTS_TO_NUMBER = False 390 HEX_FUNC = "TO_HEX" 391 PARSE_JSON_NAME = "JSON_PARSE" 392 PAD_FILL_PATTERN_IS_REQUIRED = True 393 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 394 SUPPORTS_MEDIAN = False 395 ARRAY_SIZE_NAME = "CARDINALITY" 396 397 PROPERTIES_LOCATION = { 398 **generator.Generator.PROPERTIES_LOCATION, 399 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 400 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 401 } 402 403 TYPE_MAPPING = { 404 **generator.Generator.TYPE_MAPPING, 405 exp.DataType.Type.BINARY: "VARBINARY", 406 exp.DataType.Type.BIT: "BOOLEAN", 407 exp.DataType.Type.DATETIME: "TIMESTAMP", 408 exp.DataType.Type.DATETIME64: "TIMESTAMP", 409 exp.DataType.Type.FLOAT: "REAL", 410 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 411 exp.DataType.Type.INT: "INTEGER", 412 exp.DataType.Type.STRUCT: "ROW", 413 exp.DataType.Type.TEXT: "VARCHAR", 414 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 415 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 416 exp.DataType.Type.TIMETZ: "TIME", 417 } 418 419 TRANSFORMS = { 420 **generator.Generator.TRANSFORMS, 421 exp.AnyValue: rename_func("ARBITRARY"), 422 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 423 exp.ArgMax: rename_func("MAX_BY"), 424 exp.ArgMin: rename_func("MIN_BY"), 425 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 426 exp.ArrayAny: rename_func("ANY_MATCH"), 427 exp.ArrayConcat: rename_func("CONCAT"), 428 exp.ArrayContains: rename_func("CONTAINS"), 429 exp.ArrayToString: rename_func("ARRAY_JOIN"), 430 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 431 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 432 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 433 exp.BitwiseLeftShift: lambda self, e: self.func( 434 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 435 ), 436 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 437 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 438 exp.BitwiseRightShift: lambda self, e: self.func( 439 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 440 ), 441 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 442 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 443 exp.CurrentTime: lambda *_: "CURRENT_TIME", 444 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 445 exp.CurrentUser: lambda *_: "CURRENT_USER", 446 exp.DateAdd: _date_delta_sql("DATE_ADD"), 447 exp.DateDiff: lambda self, e: self.func( 448 "DATE_DIFF", unit_to_str(e), e.expression, e.this 449 ), 450 exp.DateStrToDate: datestrtodate_sql, 451 exp.DateToDi: lambda self, 452 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 453 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 454 exp.DayOfWeek: lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)", 455 exp.DayOfWeekIso: rename_func("DAY_OF_WEEK"), 456 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 457 exp.DiToDate: lambda self, 458 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 459 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 460 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 461 exp.First: _first_last_sql, 462 exp.FromTimeZone: lambda self, 463 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 464 exp.GenerateSeries: sequence_sql, 465 exp.GenerateDateArray: sequence_sql, 466 exp.Group: transforms.preprocess([transforms.unalias_group]), 467 exp.If: if_sql(), 468 exp.ILike: no_ilike_sql, 469 exp.Initcap: _initcap_sql, 470 exp.Last: _first_last_sql, 471 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 472 exp.Lateral: _explode_to_unnest_sql, 473 exp.Left: left_to_substring_sql, 474 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 475 rename_func("LEVENSHTEIN_DISTANCE") 476 ), 477 exp.LogicalAnd: rename_func("BOOL_AND"), 478 exp.LogicalOr: rename_func("BOOL_OR"), 479 exp.Pivot: no_pivot_sql, 480 exp.Quantile: _quantile_sql, 481 exp.RegexpExtract: regexp_extract_sql, 482 exp.RegexpExtractAll: regexp_extract_sql, 483 exp.Right: right_to_substring_sql, 484 exp.Schema: _schema_sql, 485 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 486 exp.Select: transforms.preprocess( 487 [ 488 transforms.eliminate_window_clause, 489 transforms.eliminate_qualify, 490 transforms.eliminate_distinct_on, 491 transforms.explode_projection_to_unnest(1), 492 transforms.eliminate_semi_and_anti_joins, 493 amend_exploded_column_table, 494 ] 495 ), 496 exp.SortArray: _no_sort_array, 497 exp.StrPosition: lambda self, e: strposition_sql(self, e, supports_occurrence=True), 498 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 499 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 500 exp.StrToTime: _str_to_time_sql, 501 exp.StructExtract: struct_extract_sql, 502 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 503 exp.Timestamp: no_timestamp_sql, 504 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 505 exp.TimestampTrunc: timestamptrunc_sql(), 506 exp.TimeStrToDate: timestrtotime_sql, 507 exp.TimeStrToTime: timestrtotime_sql, 508 exp.TimeStrToUnix: lambda self, e: self.func( 509 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 510 ), 511 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 512 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 513 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 514 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 515 exp.TsOrDiToDi: lambda self, 516 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 517 exp.TsOrDsAdd: _ts_or_ds_add_sql, 518 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 519 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 520 exp.Unhex: rename_func("FROM_HEX"), 521 exp.UnixToStr: lambda self, 522 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 523 exp.UnixToTime: _unix_to_time_sql, 524 exp.UnixToTimeStr: lambda self, 525 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 526 exp.VariancePop: rename_func("VAR_POP"), 527 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 528 exp.WithinGroup: transforms.preprocess( 529 [transforms.remove_within_group_for_percentiles] 530 ), 531 exp.Xor: bool_xor_sql, 532 exp.MD5Digest: rename_func("MD5"), 533 exp.SHA: rename_func("SHA1"), 534 exp.SHA2: sha256_sql, 535 } 536 537 RESERVED_KEYWORDS = { 538 "alter", 539 "and", 540 "as", 541 "between", 542 "by", 543 "case", 544 "cast", 545 "constraint", 546 "create", 547 "cross", 548 "current_time", 549 "current_timestamp", 550 "deallocate", 551 "delete", 552 "describe", 553 "distinct", 554 "drop", 555 "else", 556 "end", 557 "escape", 558 "except", 559 "execute", 560 "exists", 561 "extract", 562 "false", 563 "for", 564 "from", 565 "full", 566 "group", 567 "having", 568 "in", 569 "inner", 570 "insert", 571 "intersect", 572 "into", 573 "is", 574 "join", 575 "left", 576 "like", 577 "natural", 578 "not", 579 "null", 580 "on", 581 "or", 582 "order", 583 "outer", 584 "prepare", 585 "right", 586 "select", 587 "table", 588 "then", 589 "true", 590 "union", 591 "using", 592 "values", 593 "when", 594 "where", 595 "with", 596 } 597 598 def jsonformat_sql(self, expression: exp.JSONFormat) -> str: 599 this = expression.this 600 is_json = expression.args.get("is_json") 601 602 if this and not (is_json or this.type): 603 from sqlglot.optimizer.annotate_types import annotate_types 604 605 this = annotate_types(this, dialect=self.dialect) 606 607 if not (is_json or this.is_type(exp.DataType.Type.JSON)): 608 this.replace(exp.cast(this, exp.DataType.Type.JSON)) 609 610 return self.function_fallback_sql(expression) 611 612 def md5_sql(self, expression: exp.MD5) -> str: 613 this = expression.this 614 615 if not this.type: 616 from sqlglot.optimizer.annotate_types import annotate_types 617 618 this = annotate_types(this, dialect=self.dialect) 619 620 if this.is_type(*exp.DataType.TEXT_TYPES): 621 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 622 623 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 624 625 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 626 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 627 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 628 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 629 # which seems to be using the same time mapping as Hive, as per: 630 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 631 this = expression.this 632 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 633 value_as_timestamp = ( 634 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 635 ) 636 637 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 638 639 formatted_value = self.func( 640 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 641 ) 642 parse_with_tz = self.func( 643 "PARSE_DATETIME", 644 formatted_value, 645 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 646 ) 647 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 648 return self.func("TO_UNIXTIME", coalesced) 649 650 def bracket_sql(self, expression: exp.Bracket) -> str: 651 if expression.args.get("safe"): 652 return self.func( 653 "ELEMENT_AT", 654 expression.this, 655 seq_get( 656 apply_index_offset( 657 expression.this, 658 expression.expressions, 659 1 - expression.args.get("offset", 0), 660 dialect=self.dialect, 661 ), 662 0, 663 ), 664 ) 665 return super().bracket_sql(expression) 666 667 def struct_sql(self, expression: exp.Struct) -> str: 668 from sqlglot.optimizer.annotate_types import annotate_types 669 670 expression = annotate_types(expression, dialect=self.dialect) 671 values: t.List[str] = [] 672 schema: t.List[str] = [] 673 unknown_type = False 674 675 for e in expression.expressions: 676 if isinstance(e, exp.PropertyEQ): 677 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 678 unknown_type = True 679 else: 680 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 681 values.append(self.sql(e, "expression")) 682 else: 683 values.append(self.sql(e)) 684 685 size = len(expression.expressions) 686 687 if not size or len(schema) != size: 688 if unknown_type: 689 self.unsupported( 690 "Cannot convert untyped key-value definitions (try annotate_types)." 691 ) 692 return self.func("ROW", *values) 693 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 694 695 def interval_sql(self, expression: exp.Interval) -> str: 696 if expression.this and expression.text("unit").upper().startswith("WEEK"): 697 return f"({expression.this.name} * INTERVAL '7' DAY)" 698 return super().interval_sql(expression) 699 700 def transaction_sql(self, expression: exp.Transaction) -> str: 701 modes = expression.args.get("modes") 702 modes = f" {', '.join(modes)}" if modes else "" 703 return f"START TRANSACTION{modes}" 704 705 def offset_limit_modifiers( 706 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 707 ) -> t.List[str]: 708 return [ 709 self.sql(expression, "offset"), 710 self.sql(limit), 711 ] 712 713 def create_sql(self, expression: exp.Create) -> str: 714 """ 715 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 716 so we need to remove them 717 """ 718 kind = expression.args["kind"] 719 schema = expression.this 720 if kind == "VIEW" and schema.expressions: 721 expression.this.set("expressions", None) 722 return super().create_sql(expression) 723 724 def delete_sql(self, expression: exp.Delete) -> str: 725 """ 726 Presto only supports DELETE FROM for a single table without an alias, so we need 727 to remove the unnecessary parts. If the original DELETE statement contains more 728 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 729 """ 730 tables = expression.args.get("tables") or [expression.this] 731 if len(tables) > 1: 732 return super().delete_sql(expression) 733 734 table = tables[0] 735 expression.set("this", table) 736 expression.set("tables", None) 737 738 if isinstance(table, exp.Table): 739 table_alias = table.args.get("alias") 740 if table_alias: 741 table_alias.pop() 742 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 743 744 return super().delete_sql(expression) 745 746 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 747 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 748 749 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 750 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 751 if not expression.args.get("variant_extract") or is_json_extract: 752 return self.func( 753 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 754 ) 755 756 this = self.sql(expression, "this") 757 758 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 759 segments = [] 760 for path_key in expression.expression.expressions[1:]: 761 if not isinstance(path_key, exp.JSONPathKey): 762 # Cannot transpile subscripts, wildcards etc to dot notation 763 self.unsupported( 764 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 765 ) 766 continue 767 key = path_key.this 768 if not exp.SAFE_IDENTIFIER_RE.match(key): 769 key = f'"{key}"' 770 segments.append(f".{key}") 771 772 expr = "".join(segments) 773 774 return f"{this}{expr}" 775 776 def groupconcat_sql(self, expression: exp.GroupConcat) -> str: 777 return self.func( 778 "ARRAY_JOIN", 779 self.func("ARRAY_AGG", expression.this), 780 expression.args.get("separator"), 781 )
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
598 def jsonformat_sql(self, expression: exp.JSONFormat) -> str: 599 this = expression.this 600 is_json = expression.args.get("is_json") 601 602 if this and not (is_json or this.type): 603 from sqlglot.optimizer.annotate_types import annotate_types 604 605 this = annotate_types(this, dialect=self.dialect) 606 607 if not (is_json or this.is_type(exp.DataType.Type.JSON)): 608 this.replace(exp.cast(this, exp.DataType.Type.JSON)) 609 610 return self.function_fallback_sql(expression)
612 def md5_sql(self, expression: exp.MD5) -> str: 613 this = expression.this 614 615 if not this.type: 616 from sqlglot.optimizer.annotate_types import annotate_types 617 618 this = annotate_types(this, dialect=self.dialect) 619 620 if this.is_type(*exp.DataType.TEXT_TYPES): 621 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 622 623 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this))))
625 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 626 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 627 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 628 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 629 # which seems to be using the same time mapping as Hive, as per: 630 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 631 this = expression.this 632 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 633 value_as_timestamp = ( 634 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 635 ) 636 637 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 638 639 formatted_value = self.func( 640 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 641 ) 642 parse_with_tz = self.func( 643 "PARSE_DATETIME", 644 formatted_value, 645 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 646 ) 647 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 648 return self.func("TO_UNIXTIME", coalesced)
650 def bracket_sql(self, expression: exp.Bracket) -> str: 651 if expression.args.get("safe"): 652 return self.func( 653 "ELEMENT_AT", 654 expression.this, 655 seq_get( 656 apply_index_offset( 657 expression.this, 658 expression.expressions, 659 1 - expression.args.get("offset", 0), 660 dialect=self.dialect, 661 ), 662 0, 663 ), 664 ) 665 return super().bracket_sql(expression)
667 def struct_sql(self, expression: exp.Struct) -> str: 668 from sqlglot.optimizer.annotate_types import annotate_types 669 670 expression = annotate_types(expression, dialect=self.dialect) 671 values: t.List[str] = [] 672 schema: t.List[str] = [] 673 unknown_type = False 674 675 for e in expression.expressions: 676 if isinstance(e, exp.PropertyEQ): 677 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 678 unknown_type = True 679 else: 680 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 681 values.append(self.sql(e, "expression")) 682 else: 683 values.append(self.sql(e)) 684 685 size = len(expression.expressions) 686 687 if not size or len(schema) != size: 688 if unknown_type: 689 self.unsupported( 690 "Cannot convert untyped key-value definitions (try annotate_types)." 691 ) 692 return self.func("ROW", *values) 693 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))"
713 def create_sql(self, expression: exp.Create) -> str: 714 """ 715 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 716 so we need to remove them 717 """ 718 kind = expression.args["kind"] 719 schema = expression.this 720 if kind == "VIEW" and schema.expressions: 721 expression.this.set("expressions", None) 722 return super().create_sql(expression)
Presto doesn't support CREATE VIEW with expressions (ex: CREATE VIEW x (cola)
then (cola)
is the expression),
so we need to remove them
724 def delete_sql(self, expression: exp.Delete) -> str: 725 """ 726 Presto only supports DELETE FROM for a single table without an alias, so we need 727 to remove the unnecessary parts. If the original DELETE statement contains more 728 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 729 """ 730 tables = expression.args.get("tables") or [expression.this] 731 if len(tables) > 1: 732 return super().delete_sql(expression) 733 734 table = tables[0] 735 expression.set("this", table) 736 expression.set("tables", None) 737 738 if isinstance(table, exp.Table): 739 table_alias = table.args.get("alias") 740 if table_alias: 741 table_alias.pop() 742 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 743 744 return super().delete_sql(expression)
Presto only supports DELETE FROM for a single table without an alias, so we need to remove the unnecessary parts. If the original DELETE statement contains more than one table to be deleted, we can't safely map it 1-1 to a Presto statement.
746 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 747 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 748 749 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 750 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 751 if not expression.args.get("variant_extract") or is_json_extract: 752 return self.func( 753 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 754 ) 755 756 this = self.sql(expression, "this") 757 758 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 759 segments = [] 760 for path_key in expression.expression.expressions[1:]: 761 if not isinstance(path_key, exp.JSONPathKey): 762 # Cannot transpile subscripts, wildcards etc to dot notation 763 self.unsupported( 764 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 765 ) 766 continue 767 key = path_key.this 768 if not exp.SAFE_IDENTIFIER_RE.match(key): 769 key = f'"{key}"' 770 segments.append(f".{key}") 771 772 expr = "".join(segments) 773 774 return f"{this}{expr}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SUPPORTED_JSON_PATH_PARTS
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_WINDOW_EXCLUDE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- TRY_SUPPORTED
- SUPPORTS_UESCAPE
- STAR_EXCEPT
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- get_put_sql
- translatecharacters_sql