sqlglot.dialects.presto
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 bool_xor_sql, 11 date_trunc_to_time, 12 datestrtodate_sql, 13 encode_decode_sql, 14 build_formatted_time, 15 if_sql, 16 left_to_substring_sql, 17 no_ilike_sql, 18 no_pivot_sql, 19 no_timestamp_sql, 20 regexp_extract_sql, 21 rename_func, 22 right_to_substring_sql, 23 sha256_sql, 24 strposition_sql, 25 struct_extract_sql, 26 timestamptrunc_sql, 27 timestrtotime_sql, 28 ts_or_ds_add_cast, 29 unit_to_str, 30 sequence_sql, 31 build_regexp_extract, 32 explode_to_unnest_sql, 33) 34from sqlglot.dialects.hive import Hive 35from sqlglot.dialects.mysql import MySQL 36from sqlglot.helper import apply_index_offset, seq_get 37from sqlglot.optimizer.scope import find_all_in_scope 38from sqlglot.tokens import TokenType 39from sqlglot.transforms import unqualify_columns 40from sqlglot.generator import unsupported_args 41 42DATE_ADD_OR_SUB = t.Union[exp.DateAdd, exp.TimestampAdd, exp.DateSub] 43 44 45def _initcap_sql(self: Presto.Generator, expression: exp.Initcap) -> str: 46 regex = r"(\w)(\w*)" 47 return f"REGEXP_REPLACE({self.sql(expression, 'this')}, '{regex}', x -> UPPER(x[1]) || LOWER(x[2]))" 48 49 50def _no_sort_array(self: Presto.Generator, expression: exp.SortArray) -> str: 51 if expression.args.get("asc") == exp.false(): 52 comparator = "(a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END" 53 else: 54 comparator = None 55 return self.func("ARRAY_SORT", expression.this, comparator) 56 57 58def _schema_sql(self: Presto.Generator, expression: exp.Schema) -> str: 59 if isinstance(expression.parent, exp.PartitionedByProperty): 60 # Any columns in the ARRAY[] string literals should not be quoted 61 expression.transform(lambda n: n.name if isinstance(n, exp.Identifier) else n, copy=False) 62 63 partition_exprs = [ 64 self.sql(c) if isinstance(c, (exp.Func, exp.Property)) else self.sql(c, "this") 65 for c in expression.expressions 66 ] 67 return self.sql(exp.Array(expressions=[exp.Literal.string(c) for c in partition_exprs])) 68 69 if expression.parent: 70 for schema in expression.parent.find_all(exp.Schema): 71 if schema is expression: 72 continue 73 74 column_defs = schema.find_all(exp.ColumnDef) 75 if column_defs and isinstance(schema.parent, exp.Property): 76 expression.expressions.extend(column_defs) 77 78 return self.schema_sql(expression) 79 80 81def _quantile_sql(self: Presto.Generator, expression: exp.Quantile) -> str: 82 self.unsupported("Presto does not support exact quantiles") 83 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 84 85 86def _str_to_time_sql( 87 self: Presto.Generator, expression: exp.StrToDate | exp.StrToTime | exp.TsOrDsToDate 88) -> str: 89 return self.func("DATE_PARSE", expression.this, self.format_time(expression)) 90 91 92def _ts_or_ds_to_date_sql(self: Presto.Generator, expression: exp.TsOrDsToDate) -> str: 93 time_format = self.format_time(expression) 94 if time_format and time_format not in (Presto.TIME_FORMAT, Presto.DATE_FORMAT): 95 return self.sql(exp.cast(_str_to_time_sql(self, expression), exp.DataType.Type.DATE)) 96 return self.sql( 97 exp.cast(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP), exp.DataType.Type.DATE) 98 ) 99 100 101def _ts_or_ds_add_sql(self: Presto.Generator, expression: exp.TsOrDsAdd) -> str: 102 expression = ts_or_ds_add_cast(expression) 103 unit = unit_to_str(expression) 104 return self.func("DATE_ADD", unit, expression.expression, expression.this) 105 106 107def _ts_or_ds_diff_sql(self: Presto.Generator, expression: exp.TsOrDsDiff) -> str: 108 this = exp.cast(expression.this, exp.DataType.Type.TIMESTAMP) 109 expr = exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP) 110 unit = unit_to_str(expression) 111 return self.func("DATE_DIFF", unit, expr, this) 112 113 114def _build_approx_percentile(args: t.List) -> exp.Expression: 115 if len(args) == 4: 116 return exp.ApproxQuantile( 117 this=seq_get(args, 0), 118 weight=seq_get(args, 1), 119 quantile=seq_get(args, 2), 120 accuracy=seq_get(args, 3), 121 ) 122 if len(args) == 3: 123 return exp.ApproxQuantile( 124 this=seq_get(args, 0), quantile=seq_get(args, 1), accuracy=seq_get(args, 2) 125 ) 126 return exp.ApproxQuantile.from_arg_list(args) 127 128 129def _build_from_unixtime(args: t.List) -> exp.Expression: 130 if len(args) == 3: 131 return exp.UnixToTime( 132 this=seq_get(args, 0), 133 hours=seq_get(args, 1), 134 minutes=seq_get(args, 2), 135 ) 136 if len(args) == 2: 137 return exp.UnixToTime(this=seq_get(args, 0), zone=seq_get(args, 1)) 138 139 return exp.UnixToTime.from_arg_list(args) 140 141 142def _first_last_sql(self: Presto.Generator, expression: exp.Func) -> str: 143 """ 144 Trino doesn't support FIRST / LAST as functions, but they're valid in the context 145 of MATCH_RECOGNIZE, so we need to preserve them in that case. In all other cases 146 they're converted into an ARBITRARY call. 147 148 Reference: https://trino.io/docs/current/sql/match-recognize.html#logical-navigation-functions 149 """ 150 if isinstance(expression.find_ancestor(exp.MatchRecognize, exp.Select), exp.MatchRecognize): 151 return self.function_fallback_sql(expression) 152 153 return rename_func("ARBITRARY")(self, expression) 154 155 156def _unix_to_time_sql(self: Presto.Generator, expression: exp.UnixToTime) -> str: 157 scale = expression.args.get("scale") 158 timestamp = self.sql(expression, "this") 159 if scale in (None, exp.UnixToTime.SECONDS): 160 return rename_func("FROM_UNIXTIME")(self, expression) 161 162 return f"FROM_UNIXTIME(CAST({timestamp} AS DOUBLE) / POW(10, {scale}))" 163 164 165def _to_int(self: Presto.Generator, expression: exp.Expression) -> exp.Expression: 166 if not expression.type: 167 from sqlglot.optimizer.annotate_types import annotate_types 168 169 annotate_types(expression, dialect=self.dialect) 170 if expression.type and expression.type.this not in exp.DataType.INTEGER_TYPES: 171 return exp.cast(expression, to=exp.DataType.Type.BIGINT) 172 return expression 173 174 175def _build_to_char(args: t.List) -> exp.TimeToStr: 176 fmt = seq_get(args, 1) 177 if isinstance(fmt, exp.Literal): 178 # We uppercase this to match Teradata's format mapping keys 179 fmt.set("this", fmt.this.upper()) 180 181 # We use "teradata" on purpose here, because the time formats are different in Presto. 182 # See https://prestodb.io/docs/current/functions/teradata.html?highlight=to_char#to_char 183 return build_formatted_time(exp.TimeToStr, "teradata")(args) 184 185 186def _date_delta_sql( 187 name: str, negate_interval: bool = False 188) -> t.Callable[[Presto.Generator, DATE_ADD_OR_SUB], str]: 189 def _delta_sql(self: Presto.Generator, expression: DATE_ADD_OR_SUB) -> str: 190 interval = _to_int(self, expression.expression) 191 return self.func( 192 name, 193 unit_to_str(expression), 194 interval * (-1) if negate_interval else interval, 195 expression.this, 196 ) 197 198 return _delta_sql 199 200 201def _explode_to_unnest_sql(self: Presto.Generator, expression: exp.Lateral) -> str: 202 explode = expression.this 203 if isinstance(explode, exp.Explode): 204 exploded_type = explode.this.type 205 alias = expression.args.get("alias") 206 207 # This attempts a best-effort transpilation of LATERAL VIEW EXPLODE on a struct array 208 if ( 209 isinstance(alias, exp.TableAlias) 210 and isinstance(exploded_type, exp.DataType) 211 and exploded_type.is_type(exp.DataType.Type.ARRAY) 212 and exploded_type.expressions 213 and exploded_type.expressions[0].is_type(exp.DataType.Type.STRUCT) 214 ): 215 # When unnesting a ROW in Presto, it produces N columns, so we need to fix the alias 216 alias.set("columns", [c.this.copy() for c in exploded_type.expressions[0].expressions]) 217 elif isinstance(explode, exp.Inline): 218 explode.replace(exp.Explode(this=explode.this.copy())) 219 220 return explode_to_unnest_sql(self, expression) 221 222 223def amend_exploded_column_table(expression: exp.Expression) -> exp.Expression: 224 # We check for expression.type because the columns can be amended only if types were inferred 225 if isinstance(expression, exp.Select) and expression.type: 226 for lateral in expression.args.get("laterals") or []: 227 alias = lateral.args.get("alias") 228 if ( 229 not isinstance(lateral.this, exp.Explode) 230 or not isinstance(alias, exp.TableAlias) 231 or len(alias.columns) != 1 232 ): 233 continue 234 235 new_table = alias.this 236 old_table = alias.columns[0].name.lower() 237 238 # When transpiling a LATERAL VIEW EXPLODE Spark query, the exploded fields may be qualified 239 # with the struct column, resulting in invalid Presto references that need to be amended 240 for column in find_all_in_scope(expression, exp.Column): 241 if column.db.lower() == old_table: 242 column.set("table", column.args["db"].pop()) 243 elif column.table.lower() == old_table: 244 column.set("table", new_table.copy()) 245 elif column.name.lower() == old_table and isinstance(column.parent, exp.Dot): 246 column.parent.replace(exp.column(column.parent.expression, table=new_table)) 247 248 return expression 249 250 251class Presto(Dialect): 252 INDEX_OFFSET = 1 253 NULL_ORDERING = "nulls_are_last" 254 TIME_FORMAT = MySQL.TIME_FORMAT 255 STRICT_STRING_CONCAT = True 256 SUPPORTS_SEMI_ANTI_JOIN = False 257 TYPED_DIVISION = True 258 TABLESAMPLE_SIZE_IS_PERCENT = True 259 LOG_BASE_FIRST: t.Optional[bool] = None 260 SUPPORTS_VALUES_DEFAULT = False 261 262 TIME_MAPPING = MySQL.TIME_MAPPING 263 264 # https://github.com/trinodb/trino/issues/17 265 # https://github.com/trinodb/trino/issues/12289 266 # https://github.com/prestodb/presto/issues/2863 267 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 268 269 # The result of certain math functions in Presto/Trino is of type 270 # equal to the input type e.g: FLOOR(5.5/2) -> DECIMAL, FLOOR(5/2) -> BIGINT 271 ANNOTATORS = { 272 **Dialect.ANNOTATORS, 273 exp.Floor: lambda self, e: self._annotate_by_args(e, "this"), 274 exp.Ceil: lambda self, e: self._annotate_by_args(e, "this"), 275 exp.Mod: lambda self, e: self._annotate_by_args(e, "this", "expression"), 276 exp.Round: lambda self, e: self._annotate_by_args(e, "this"), 277 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 278 exp.Abs: lambda self, e: self._annotate_by_args(e, "this"), 279 exp.Rand: lambda self, e: self._annotate_by_args(e, "this") 280 if e.this 281 else self._set_type(e, exp.DataType.Type.DOUBLE), 282 } 283 284 SUPPORTED_SETTINGS = { 285 *Dialect.SUPPORTED_SETTINGS, 286 "variant_extract_is_json_extract", 287 } 288 289 class Tokenizer(tokens.Tokenizer): 290 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 291 UNICODE_STRINGS = [ 292 (prefix + q, q) 293 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 294 for prefix in ("U&", "u&") 295 ] 296 297 NESTED_COMMENTS = False 298 299 KEYWORDS = { 300 **tokens.Tokenizer.KEYWORDS, 301 "DEALLOCATE PREPARE": TokenType.COMMAND, 302 "DESCRIBE INPUT": TokenType.COMMAND, 303 "DESCRIBE OUTPUT": TokenType.COMMAND, 304 "RESET SESSION": TokenType.COMMAND, 305 "START": TokenType.BEGIN, 306 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 307 "ROW": TokenType.STRUCT, 308 "IPADDRESS": TokenType.IPADDRESS, 309 "IPPREFIX": TokenType.IPPREFIX, 310 "TDIGEST": TokenType.TDIGEST, 311 "HYPERLOGLOG": TokenType.HLLSKETCH, 312 } 313 KEYWORDS.pop("/*+") 314 KEYWORDS.pop("QUALIFY") 315 316 class Parser(parser.Parser): 317 VALUES_FOLLOWED_BY_PAREN = False 318 319 FUNCTIONS = { 320 **parser.Parser.FUNCTIONS, 321 "ARBITRARY": exp.AnyValue.from_arg_list, 322 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 323 "APPROX_PERCENTILE": _build_approx_percentile, 324 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 325 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 326 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 327 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 328 "CARDINALITY": exp.ArraySize.from_arg_list, 329 "CONTAINS": exp.ArrayContains.from_arg_list, 330 "DATE_ADD": lambda args: exp.DateAdd( 331 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 332 ), 333 "DATE_DIFF": lambda args: exp.DateDiff( 334 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 335 ), 336 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 337 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 338 "DATE_TRUNC": date_trunc_to_time, 339 "DAY_OF_WEEK": exp.DayOfWeekIso.from_arg_list, 340 "DOW": exp.DayOfWeekIso.from_arg_list, 341 "DOY": exp.DayOfYear.from_arg_list, 342 "ELEMENT_AT": lambda args: exp.Bracket( 343 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 344 ), 345 "FROM_HEX": exp.Unhex.from_arg_list, 346 "FROM_UNIXTIME": _build_from_unixtime, 347 "FROM_UTF8": lambda args: exp.Decode( 348 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 349 ), 350 "JSON_FORMAT": lambda args: exp.JSONFormat( 351 this=seq_get(args, 0), options=seq_get(args, 1), is_json=True 352 ), 353 "LEVENSHTEIN_DISTANCE": exp.Levenshtein.from_arg_list, 354 "NOW": exp.CurrentTimestamp.from_arg_list, 355 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 356 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 357 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 358 this=seq_get(args, 0), 359 expression=seq_get(args, 1), 360 replacement=seq_get(args, 2) or exp.Literal.string(""), 361 ), 362 "ROW": exp.Struct.from_arg_list, 363 "SEQUENCE": exp.GenerateSeries.from_arg_list, 364 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 365 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 366 "STRPOS": lambda args: exp.StrPosition( 367 this=seq_get(args, 0), substr=seq_get(args, 1), occurrence=seq_get(args, 2) 368 ), 369 "TO_CHAR": _build_to_char, 370 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 371 "TO_UTF8": lambda args: exp.Encode( 372 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 373 ), 374 "MD5": exp.MD5Digest.from_arg_list, 375 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 376 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 377 } 378 379 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 380 FUNCTION_PARSERS.pop("TRIM") 381 382 class Generator(generator.Generator): 383 INTERVAL_ALLOWS_PLURAL_FORM = False 384 JOIN_HINTS = False 385 TABLE_HINTS = False 386 QUERY_HINTS = False 387 IS_BOOL_ALLOWED = False 388 TZ_TO_WITH_TIME_ZONE = True 389 NVL2_SUPPORTED = False 390 STRUCT_DELIMITER = ("(", ")") 391 LIMIT_ONLY_LITERALS = True 392 SUPPORTS_SINGLE_ARG_CONCAT = False 393 LIKE_PROPERTY_INSIDE_SCHEMA = True 394 MULTI_ARG_DISTINCT = False 395 SUPPORTS_TO_NUMBER = False 396 HEX_FUNC = "TO_HEX" 397 PARSE_JSON_NAME = "JSON_PARSE" 398 PAD_FILL_PATTERN_IS_REQUIRED = True 399 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 400 SUPPORTS_MEDIAN = False 401 ARRAY_SIZE_NAME = "CARDINALITY" 402 403 PROPERTIES_LOCATION = { 404 **generator.Generator.PROPERTIES_LOCATION, 405 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 406 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 407 } 408 409 TYPE_MAPPING = { 410 **generator.Generator.TYPE_MAPPING, 411 exp.DataType.Type.BINARY: "VARBINARY", 412 exp.DataType.Type.BIT: "BOOLEAN", 413 exp.DataType.Type.DATETIME: "TIMESTAMP", 414 exp.DataType.Type.DATETIME64: "TIMESTAMP", 415 exp.DataType.Type.FLOAT: "REAL", 416 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 417 exp.DataType.Type.INT: "INTEGER", 418 exp.DataType.Type.STRUCT: "ROW", 419 exp.DataType.Type.TEXT: "VARCHAR", 420 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 421 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 422 exp.DataType.Type.TIMETZ: "TIME", 423 } 424 425 TRANSFORMS = { 426 **generator.Generator.TRANSFORMS, 427 exp.AnyValue: rename_func("ARBITRARY"), 428 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 429 exp.ArgMax: rename_func("MAX_BY"), 430 exp.ArgMin: rename_func("MIN_BY"), 431 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 432 exp.ArrayAny: rename_func("ANY_MATCH"), 433 exp.ArrayConcat: rename_func("CONCAT"), 434 exp.ArrayContains: rename_func("CONTAINS"), 435 exp.ArrayToString: rename_func("ARRAY_JOIN"), 436 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 437 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 438 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 439 exp.BitwiseLeftShift: lambda self, e: self.func( 440 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 441 ), 442 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 443 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 444 exp.BitwiseRightShift: lambda self, e: self.func( 445 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 446 ), 447 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 448 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 449 exp.CurrentTime: lambda *_: "CURRENT_TIME", 450 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 451 exp.CurrentUser: lambda *_: "CURRENT_USER", 452 exp.DateAdd: _date_delta_sql("DATE_ADD"), 453 exp.DateDiff: lambda self, e: self.func( 454 "DATE_DIFF", unit_to_str(e), e.expression, e.this 455 ), 456 exp.DateStrToDate: datestrtodate_sql, 457 exp.DateToDi: lambda self, 458 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 459 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 460 exp.DayOfWeek: lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)", 461 exp.DayOfWeekIso: rename_func("DAY_OF_WEEK"), 462 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 463 exp.DiToDate: lambda self, 464 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 465 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 466 exp.FileFormatProperty: lambda self, 467 e: f"format={self.sql(exp.Literal.string(e.name))}", 468 exp.First: _first_last_sql, 469 exp.FromTimeZone: lambda self, 470 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 471 exp.GenerateSeries: sequence_sql, 472 exp.GenerateDateArray: sequence_sql, 473 exp.Group: transforms.preprocess([transforms.unalias_group]), 474 exp.If: if_sql(), 475 exp.ILike: no_ilike_sql, 476 exp.Initcap: _initcap_sql, 477 exp.Last: _first_last_sql, 478 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 479 exp.Lateral: _explode_to_unnest_sql, 480 exp.Left: left_to_substring_sql, 481 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 482 rename_func("LEVENSHTEIN_DISTANCE") 483 ), 484 exp.LogicalAnd: rename_func("BOOL_AND"), 485 exp.LogicalOr: rename_func("BOOL_OR"), 486 exp.Pivot: no_pivot_sql, 487 exp.Quantile: _quantile_sql, 488 exp.RegexpExtract: regexp_extract_sql, 489 exp.RegexpExtractAll: regexp_extract_sql, 490 exp.Right: right_to_substring_sql, 491 exp.Schema: _schema_sql, 492 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 493 exp.Select: transforms.preprocess( 494 [ 495 transforms.eliminate_window_clause, 496 transforms.eliminate_qualify, 497 transforms.eliminate_distinct_on, 498 transforms.explode_projection_to_unnest(1), 499 transforms.eliminate_semi_and_anti_joins, 500 amend_exploded_column_table, 501 ] 502 ), 503 exp.SortArray: _no_sort_array, 504 exp.StrPosition: lambda self, e: strposition_sql(self, e, supports_occurrence=True), 505 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 506 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 507 exp.StrToTime: _str_to_time_sql, 508 exp.StructExtract: struct_extract_sql, 509 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 510 exp.Timestamp: no_timestamp_sql, 511 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 512 exp.TimestampTrunc: timestamptrunc_sql(), 513 exp.TimeStrToDate: timestrtotime_sql, 514 exp.TimeStrToTime: timestrtotime_sql, 515 exp.TimeStrToUnix: lambda self, e: self.func( 516 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 517 ), 518 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 519 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 520 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 521 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 522 exp.TsOrDiToDi: lambda self, 523 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 524 exp.TsOrDsAdd: _ts_or_ds_add_sql, 525 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 526 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 527 exp.Unhex: rename_func("FROM_HEX"), 528 exp.UnixToStr: lambda self, 529 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 530 exp.UnixToTime: _unix_to_time_sql, 531 exp.UnixToTimeStr: lambda self, 532 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 533 exp.VariancePop: rename_func("VAR_POP"), 534 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 535 exp.WithinGroup: transforms.preprocess( 536 [transforms.remove_within_group_for_percentiles] 537 ), 538 exp.Xor: bool_xor_sql, 539 exp.MD5Digest: rename_func("MD5"), 540 exp.SHA: rename_func("SHA1"), 541 exp.SHA2: sha256_sql, 542 } 543 544 RESERVED_KEYWORDS = { 545 "alter", 546 "and", 547 "as", 548 "between", 549 "by", 550 "case", 551 "cast", 552 "constraint", 553 "create", 554 "cross", 555 "current_time", 556 "current_timestamp", 557 "deallocate", 558 "delete", 559 "describe", 560 "distinct", 561 "drop", 562 "else", 563 "end", 564 "escape", 565 "except", 566 "execute", 567 "exists", 568 "extract", 569 "false", 570 "for", 571 "from", 572 "full", 573 "group", 574 "having", 575 "in", 576 "inner", 577 "insert", 578 "intersect", 579 "into", 580 "is", 581 "join", 582 "left", 583 "like", 584 "natural", 585 "not", 586 "null", 587 "on", 588 "or", 589 "order", 590 "outer", 591 "prepare", 592 "right", 593 "select", 594 "table", 595 "then", 596 "true", 597 "union", 598 "using", 599 "values", 600 "when", 601 "where", 602 "with", 603 } 604 605 def jsonformat_sql(self, expression: exp.JSONFormat) -> str: 606 this = expression.this 607 is_json = expression.args.get("is_json") 608 609 if this and not (is_json or this.type): 610 from sqlglot.optimizer.annotate_types import annotate_types 611 612 this = annotate_types(this, dialect=self.dialect) 613 614 if not (is_json or this.is_type(exp.DataType.Type.JSON)): 615 this.replace(exp.cast(this, exp.DataType.Type.JSON)) 616 617 return self.function_fallback_sql(expression) 618 619 def md5_sql(self, expression: exp.MD5) -> str: 620 this = expression.this 621 622 if not this.type: 623 from sqlglot.optimizer.annotate_types import annotate_types 624 625 this = annotate_types(this, dialect=self.dialect) 626 627 if this.is_type(*exp.DataType.TEXT_TYPES): 628 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 629 630 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 631 632 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 633 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 634 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 635 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 636 # which seems to be using the same time mapping as Hive, as per: 637 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 638 this = expression.this 639 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 640 value_as_timestamp = ( 641 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 642 ) 643 644 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 645 646 formatted_value = self.func( 647 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 648 ) 649 parse_with_tz = self.func( 650 "PARSE_DATETIME", 651 formatted_value, 652 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 653 ) 654 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 655 return self.func("TO_UNIXTIME", coalesced) 656 657 def bracket_sql(self, expression: exp.Bracket) -> str: 658 if expression.args.get("safe"): 659 return self.func( 660 "ELEMENT_AT", 661 expression.this, 662 seq_get( 663 apply_index_offset( 664 expression.this, 665 expression.expressions, 666 1 - expression.args.get("offset", 0), 667 dialect=self.dialect, 668 ), 669 0, 670 ), 671 ) 672 return super().bracket_sql(expression) 673 674 def struct_sql(self, expression: exp.Struct) -> str: 675 from sqlglot.optimizer.annotate_types import annotate_types 676 677 expression = annotate_types(expression, dialect=self.dialect) 678 values: t.List[str] = [] 679 schema: t.List[str] = [] 680 unknown_type = False 681 682 for e in expression.expressions: 683 if isinstance(e, exp.PropertyEQ): 684 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 685 unknown_type = True 686 else: 687 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 688 values.append(self.sql(e, "expression")) 689 else: 690 values.append(self.sql(e)) 691 692 size = len(expression.expressions) 693 694 if not size or len(schema) != size: 695 if unknown_type: 696 self.unsupported( 697 "Cannot convert untyped key-value definitions (try annotate_types)." 698 ) 699 return self.func("ROW", *values) 700 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 701 702 def interval_sql(self, expression: exp.Interval) -> str: 703 if expression.this and expression.text("unit").upper().startswith("WEEK"): 704 return f"({expression.this.name} * INTERVAL '7' DAY)" 705 return super().interval_sql(expression) 706 707 def transaction_sql(self, expression: exp.Transaction) -> str: 708 modes = expression.args.get("modes") 709 modes = f" {', '.join(modes)}" if modes else "" 710 return f"START TRANSACTION{modes}" 711 712 def offset_limit_modifiers( 713 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 714 ) -> t.List[str]: 715 return [ 716 self.sql(expression, "offset"), 717 self.sql(limit), 718 ] 719 720 def create_sql(self, expression: exp.Create) -> str: 721 """ 722 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 723 so we need to remove them 724 """ 725 kind = expression.args["kind"] 726 schema = expression.this 727 if kind == "VIEW" and schema.expressions: 728 expression.this.set("expressions", None) 729 return super().create_sql(expression) 730 731 def delete_sql(self, expression: exp.Delete) -> str: 732 """ 733 Presto only supports DELETE FROM for a single table without an alias, so we need 734 to remove the unnecessary parts. If the original DELETE statement contains more 735 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 736 """ 737 tables = expression.args.get("tables") or [expression.this] 738 if len(tables) > 1: 739 return super().delete_sql(expression) 740 741 table = tables[0] 742 expression.set("this", table) 743 expression.set("tables", None) 744 745 if isinstance(table, exp.Table): 746 table_alias = table.args.get("alias") 747 if table_alias: 748 table_alias.pop() 749 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 750 751 return super().delete_sql(expression) 752 753 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 754 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 755 756 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 757 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 758 if not expression.args.get("variant_extract") or is_json_extract: 759 return self.func( 760 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 761 ) 762 763 this = self.sql(expression, "this") 764 765 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 766 segments = [] 767 for path_key in expression.expression.expressions[1:]: 768 if not isinstance(path_key, exp.JSONPathKey): 769 # Cannot transpile subscripts, wildcards etc to dot notation 770 self.unsupported( 771 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 772 ) 773 continue 774 key = path_key.this 775 if not exp.SAFE_IDENTIFIER_RE.match(key): 776 key = f'"{key}"' 777 segments.append(f".{key}") 778 779 expr = "".join(segments) 780 781 return f"{this}{expr}" 782 783 def groupconcat_sql(self, expression: exp.GroupConcat) -> str: 784 return self.func( 785 "ARRAY_JOIN", 786 self.func("ARRAY_AGG", expression.this), 787 expression.args.get("separator"), 788 )
224def amend_exploded_column_table(expression: exp.Expression) -> exp.Expression: 225 # We check for expression.type because the columns can be amended only if types were inferred 226 if isinstance(expression, exp.Select) and expression.type: 227 for lateral in expression.args.get("laterals") or []: 228 alias = lateral.args.get("alias") 229 if ( 230 not isinstance(lateral.this, exp.Explode) 231 or not isinstance(alias, exp.TableAlias) 232 or len(alias.columns) != 1 233 ): 234 continue 235 236 new_table = alias.this 237 old_table = alias.columns[0].name.lower() 238 239 # When transpiling a LATERAL VIEW EXPLODE Spark query, the exploded fields may be qualified 240 # with the struct column, resulting in invalid Presto references that need to be amended 241 for column in find_all_in_scope(expression, exp.Column): 242 if column.db.lower() == old_table: 243 column.set("table", column.args["db"].pop()) 244 elif column.table.lower() == old_table: 245 column.set("table", new_table.copy()) 246 elif column.name.lower() == old_table and isinstance(column.parent, exp.Dot): 247 column.parent.replace(exp.column(column.parent.expression, table=new_table)) 248 249 return expression
252class Presto(Dialect): 253 INDEX_OFFSET = 1 254 NULL_ORDERING = "nulls_are_last" 255 TIME_FORMAT = MySQL.TIME_FORMAT 256 STRICT_STRING_CONCAT = True 257 SUPPORTS_SEMI_ANTI_JOIN = False 258 TYPED_DIVISION = True 259 TABLESAMPLE_SIZE_IS_PERCENT = True 260 LOG_BASE_FIRST: t.Optional[bool] = None 261 SUPPORTS_VALUES_DEFAULT = False 262 263 TIME_MAPPING = MySQL.TIME_MAPPING 264 265 # https://github.com/trinodb/trino/issues/17 266 # https://github.com/trinodb/trino/issues/12289 267 # https://github.com/prestodb/presto/issues/2863 268 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 269 270 # The result of certain math functions in Presto/Trino is of type 271 # equal to the input type e.g: FLOOR(5.5/2) -> DECIMAL, FLOOR(5/2) -> BIGINT 272 ANNOTATORS = { 273 **Dialect.ANNOTATORS, 274 exp.Floor: lambda self, e: self._annotate_by_args(e, "this"), 275 exp.Ceil: lambda self, e: self._annotate_by_args(e, "this"), 276 exp.Mod: lambda self, e: self._annotate_by_args(e, "this", "expression"), 277 exp.Round: lambda self, e: self._annotate_by_args(e, "this"), 278 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 279 exp.Abs: lambda self, e: self._annotate_by_args(e, "this"), 280 exp.Rand: lambda self, e: self._annotate_by_args(e, "this") 281 if e.this 282 else self._set_type(e, exp.DataType.Type.DOUBLE), 283 } 284 285 SUPPORTED_SETTINGS = { 286 *Dialect.SUPPORTED_SETTINGS, 287 "variant_extract_is_json_extract", 288 } 289 290 class Tokenizer(tokens.Tokenizer): 291 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 292 UNICODE_STRINGS = [ 293 (prefix + q, q) 294 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 295 for prefix in ("U&", "u&") 296 ] 297 298 NESTED_COMMENTS = False 299 300 KEYWORDS = { 301 **tokens.Tokenizer.KEYWORDS, 302 "DEALLOCATE PREPARE": TokenType.COMMAND, 303 "DESCRIBE INPUT": TokenType.COMMAND, 304 "DESCRIBE OUTPUT": TokenType.COMMAND, 305 "RESET SESSION": TokenType.COMMAND, 306 "START": TokenType.BEGIN, 307 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 308 "ROW": TokenType.STRUCT, 309 "IPADDRESS": TokenType.IPADDRESS, 310 "IPPREFIX": TokenType.IPPREFIX, 311 "TDIGEST": TokenType.TDIGEST, 312 "HYPERLOGLOG": TokenType.HLLSKETCH, 313 } 314 KEYWORDS.pop("/*+") 315 KEYWORDS.pop("QUALIFY") 316 317 class Parser(parser.Parser): 318 VALUES_FOLLOWED_BY_PAREN = False 319 320 FUNCTIONS = { 321 **parser.Parser.FUNCTIONS, 322 "ARBITRARY": exp.AnyValue.from_arg_list, 323 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 324 "APPROX_PERCENTILE": _build_approx_percentile, 325 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 326 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 327 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 328 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 329 "CARDINALITY": exp.ArraySize.from_arg_list, 330 "CONTAINS": exp.ArrayContains.from_arg_list, 331 "DATE_ADD": lambda args: exp.DateAdd( 332 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 333 ), 334 "DATE_DIFF": lambda args: exp.DateDiff( 335 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 336 ), 337 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 338 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 339 "DATE_TRUNC": date_trunc_to_time, 340 "DAY_OF_WEEK": exp.DayOfWeekIso.from_arg_list, 341 "DOW": exp.DayOfWeekIso.from_arg_list, 342 "DOY": exp.DayOfYear.from_arg_list, 343 "ELEMENT_AT": lambda args: exp.Bracket( 344 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 345 ), 346 "FROM_HEX": exp.Unhex.from_arg_list, 347 "FROM_UNIXTIME": _build_from_unixtime, 348 "FROM_UTF8": lambda args: exp.Decode( 349 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 350 ), 351 "JSON_FORMAT": lambda args: exp.JSONFormat( 352 this=seq_get(args, 0), options=seq_get(args, 1), is_json=True 353 ), 354 "LEVENSHTEIN_DISTANCE": exp.Levenshtein.from_arg_list, 355 "NOW": exp.CurrentTimestamp.from_arg_list, 356 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 357 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 358 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 359 this=seq_get(args, 0), 360 expression=seq_get(args, 1), 361 replacement=seq_get(args, 2) or exp.Literal.string(""), 362 ), 363 "ROW": exp.Struct.from_arg_list, 364 "SEQUENCE": exp.GenerateSeries.from_arg_list, 365 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 366 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 367 "STRPOS": lambda args: exp.StrPosition( 368 this=seq_get(args, 0), substr=seq_get(args, 1), occurrence=seq_get(args, 2) 369 ), 370 "TO_CHAR": _build_to_char, 371 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 372 "TO_UTF8": lambda args: exp.Encode( 373 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 374 ), 375 "MD5": exp.MD5Digest.from_arg_list, 376 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 377 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 378 } 379 380 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 381 FUNCTION_PARSERS.pop("TRIM") 382 383 class Generator(generator.Generator): 384 INTERVAL_ALLOWS_PLURAL_FORM = False 385 JOIN_HINTS = False 386 TABLE_HINTS = False 387 QUERY_HINTS = False 388 IS_BOOL_ALLOWED = False 389 TZ_TO_WITH_TIME_ZONE = True 390 NVL2_SUPPORTED = False 391 STRUCT_DELIMITER = ("(", ")") 392 LIMIT_ONLY_LITERALS = True 393 SUPPORTS_SINGLE_ARG_CONCAT = False 394 LIKE_PROPERTY_INSIDE_SCHEMA = True 395 MULTI_ARG_DISTINCT = False 396 SUPPORTS_TO_NUMBER = False 397 HEX_FUNC = "TO_HEX" 398 PARSE_JSON_NAME = "JSON_PARSE" 399 PAD_FILL_PATTERN_IS_REQUIRED = True 400 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 401 SUPPORTS_MEDIAN = False 402 ARRAY_SIZE_NAME = "CARDINALITY" 403 404 PROPERTIES_LOCATION = { 405 **generator.Generator.PROPERTIES_LOCATION, 406 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 407 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 408 } 409 410 TYPE_MAPPING = { 411 **generator.Generator.TYPE_MAPPING, 412 exp.DataType.Type.BINARY: "VARBINARY", 413 exp.DataType.Type.BIT: "BOOLEAN", 414 exp.DataType.Type.DATETIME: "TIMESTAMP", 415 exp.DataType.Type.DATETIME64: "TIMESTAMP", 416 exp.DataType.Type.FLOAT: "REAL", 417 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 418 exp.DataType.Type.INT: "INTEGER", 419 exp.DataType.Type.STRUCT: "ROW", 420 exp.DataType.Type.TEXT: "VARCHAR", 421 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 422 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 423 exp.DataType.Type.TIMETZ: "TIME", 424 } 425 426 TRANSFORMS = { 427 **generator.Generator.TRANSFORMS, 428 exp.AnyValue: rename_func("ARBITRARY"), 429 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 430 exp.ArgMax: rename_func("MAX_BY"), 431 exp.ArgMin: rename_func("MIN_BY"), 432 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 433 exp.ArrayAny: rename_func("ANY_MATCH"), 434 exp.ArrayConcat: rename_func("CONCAT"), 435 exp.ArrayContains: rename_func("CONTAINS"), 436 exp.ArrayToString: rename_func("ARRAY_JOIN"), 437 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 438 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 439 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 440 exp.BitwiseLeftShift: lambda self, e: self.func( 441 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 442 ), 443 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 444 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 445 exp.BitwiseRightShift: lambda self, e: self.func( 446 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 447 ), 448 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 449 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 450 exp.CurrentTime: lambda *_: "CURRENT_TIME", 451 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 452 exp.CurrentUser: lambda *_: "CURRENT_USER", 453 exp.DateAdd: _date_delta_sql("DATE_ADD"), 454 exp.DateDiff: lambda self, e: self.func( 455 "DATE_DIFF", unit_to_str(e), e.expression, e.this 456 ), 457 exp.DateStrToDate: datestrtodate_sql, 458 exp.DateToDi: lambda self, 459 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 460 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 461 exp.DayOfWeek: lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)", 462 exp.DayOfWeekIso: rename_func("DAY_OF_WEEK"), 463 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 464 exp.DiToDate: lambda self, 465 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 466 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 467 exp.FileFormatProperty: lambda self, 468 e: f"format={self.sql(exp.Literal.string(e.name))}", 469 exp.First: _first_last_sql, 470 exp.FromTimeZone: lambda self, 471 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 472 exp.GenerateSeries: sequence_sql, 473 exp.GenerateDateArray: sequence_sql, 474 exp.Group: transforms.preprocess([transforms.unalias_group]), 475 exp.If: if_sql(), 476 exp.ILike: no_ilike_sql, 477 exp.Initcap: _initcap_sql, 478 exp.Last: _first_last_sql, 479 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 480 exp.Lateral: _explode_to_unnest_sql, 481 exp.Left: left_to_substring_sql, 482 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 483 rename_func("LEVENSHTEIN_DISTANCE") 484 ), 485 exp.LogicalAnd: rename_func("BOOL_AND"), 486 exp.LogicalOr: rename_func("BOOL_OR"), 487 exp.Pivot: no_pivot_sql, 488 exp.Quantile: _quantile_sql, 489 exp.RegexpExtract: regexp_extract_sql, 490 exp.RegexpExtractAll: regexp_extract_sql, 491 exp.Right: right_to_substring_sql, 492 exp.Schema: _schema_sql, 493 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 494 exp.Select: transforms.preprocess( 495 [ 496 transforms.eliminate_window_clause, 497 transforms.eliminate_qualify, 498 transforms.eliminate_distinct_on, 499 transforms.explode_projection_to_unnest(1), 500 transforms.eliminate_semi_and_anti_joins, 501 amend_exploded_column_table, 502 ] 503 ), 504 exp.SortArray: _no_sort_array, 505 exp.StrPosition: lambda self, e: strposition_sql(self, e, supports_occurrence=True), 506 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 507 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 508 exp.StrToTime: _str_to_time_sql, 509 exp.StructExtract: struct_extract_sql, 510 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 511 exp.Timestamp: no_timestamp_sql, 512 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 513 exp.TimestampTrunc: timestamptrunc_sql(), 514 exp.TimeStrToDate: timestrtotime_sql, 515 exp.TimeStrToTime: timestrtotime_sql, 516 exp.TimeStrToUnix: lambda self, e: self.func( 517 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 518 ), 519 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 520 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 521 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 522 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 523 exp.TsOrDiToDi: lambda self, 524 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 525 exp.TsOrDsAdd: _ts_or_ds_add_sql, 526 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 527 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 528 exp.Unhex: rename_func("FROM_HEX"), 529 exp.UnixToStr: lambda self, 530 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 531 exp.UnixToTime: _unix_to_time_sql, 532 exp.UnixToTimeStr: lambda self, 533 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 534 exp.VariancePop: rename_func("VAR_POP"), 535 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 536 exp.WithinGroup: transforms.preprocess( 537 [transforms.remove_within_group_for_percentiles] 538 ), 539 exp.Xor: bool_xor_sql, 540 exp.MD5Digest: rename_func("MD5"), 541 exp.SHA: rename_func("SHA1"), 542 exp.SHA2: sha256_sql, 543 } 544 545 RESERVED_KEYWORDS = { 546 "alter", 547 "and", 548 "as", 549 "between", 550 "by", 551 "case", 552 "cast", 553 "constraint", 554 "create", 555 "cross", 556 "current_time", 557 "current_timestamp", 558 "deallocate", 559 "delete", 560 "describe", 561 "distinct", 562 "drop", 563 "else", 564 "end", 565 "escape", 566 "except", 567 "execute", 568 "exists", 569 "extract", 570 "false", 571 "for", 572 "from", 573 "full", 574 "group", 575 "having", 576 "in", 577 "inner", 578 "insert", 579 "intersect", 580 "into", 581 "is", 582 "join", 583 "left", 584 "like", 585 "natural", 586 "not", 587 "null", 588 "on", 589 "or", 590 "order", 591 "outer", 592 "prepare", 593 "right", 594 "select", 595 "table", 596 "then", 597 "true", 598 "union", 599 "using", 600 "values", 601 "when", 602 "where", 603 "with", 604 } 605 606 def jsonformat_sql(self, expression: exp.JSONFormat) -> str: 607 this = expression.this 608 is_json = expression.args.get("is_json") 609 610 if this and not (is_json or this.type): 611 from sqlglot.optimizer.annotate_types import annotate_types 612 613 this = annotate_types(this, dialect=self.dialect) 614 615 if not (is_json or this.is_type(exp.DataType.Type.JSON)): 616 this.replace(exp.cast(this, exp.DataType.Type.JSON)) 617 618 return self.function_fallback_sql(expression) 619 620 def md5_sql(self, expression: exp.MD5) -> str: 621 this = expression.this 622 623 if not this.type: 624 from sqlglot.optimizer.annotate_types import annotate_types 625 626 this = annotate_types(this, dialect=self.dialect) 627 628 if this.is_type(*exp.DataType.TEXT_TYPES): 629 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 630 631 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 632 633 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 634 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 635 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 636 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 637 # which seems to be using the same time mapping as Hive, as per: 638 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 639 this = expression.this 640 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 641 value_as_timestamp = ( 642 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 643 ) 644 645 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 646 647 formatted_value = self.func( 648 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 649 ) 650 parse_with_tz = self.func( 651 "PARSE_DATETIME", 652 formatted_value, 653 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 654 ) 655 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 656 return self.func("TO_UNIXTIME", coalesced) 657 658 def bracket_sql(self, expression: exp.Bracket) -> str: 659 if expression.args.get("safe"): 660 return self.func( 661 "ELEMENT_AT", 662 expression.this, 663 seq_get( 664 apply_index_offset( 665 expression.this, 666 expression.expressions, 667 1 - expression.args.get("offset", 0), 668 dialect=self.dialect, 669 ), 670 0, 671 ), 672 ) 673 return super().bracket_sql(expression) 674 675 def struct_sql(self, expression: exp.Struct) -> str: 676 from sqlglot.optimizer.annotate_types import annotate_types 677 678 expression = annotate_types(expression, dialect=self.dialect) 679 values: t.List[str] = [] 680 schema: t.List[str] = [] 681 unknown_type = False 682 683 for e in expression.expressions: 684 if isinstance(e, exp.PropertyEQ): 685 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 686 unknown_type = True 687 else: 688 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 689 values.append(self.sql(e, "expression")) 690 else: 691 values.append(self.sql(e)) 692 693 size = len(expression.expressions) 694 695 if not size or len(schema) != size: 696 if unknown_type: 697 self.unsupported( 698 "Cannot convert untyped key-value definitions (try annotate_types)." 699 ) 700 return self.func("ROW", *values) 701 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 702 703 def interval_sql(self, expression: exp.Interval) -> str: 704 if expression.this and expression.text("unit").upper().startswith("WEEK"): 705 return f"({expression.this.name} * INTERVAL '7' DAY)" 706 return super().interval_sql(expression) 707 708 def transaction_sql(self, expression: exp.Transaction) -> str: 709 modes = expression.args.get("modes") 710 modes = f" {', '.join(modes)}" if modes else "" 711 return f"START TRANSACTION{modes}" 712 713 def offset_limit_modifiers( 714 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 715 ) -> t.List[str]: 716 return [ 717 self.sql(expression, "offset"), 718 self.sql(limit), 719 ] 720 721 def create_sql(self, expression: exp.Create) -> str: 722 """ 723 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 724 so we need to remove them 725 """ 726 kind = expression.args["kind"] 727 schema = expression.this 728 if kind == "VIEW" and schema.expressions: 729 expression.this.set("expressions", None) 730 return super().create_sql(expression) 731 732 def delete_sql(self, expression: exp.Delete) -> str: 733 """ 734 Presto only supports DELETE FROM for a single table without an alias, so we need 735 to remove the unnecessary parts. If the original DELETE statement contains more 736 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 737 """ 738 tables = expression.args.get("tables") or [expression.this] 739 if len(tables) > 1: 740 return super().delete_sql(expression) 741 742 table = tables[0] 743 expression.set("this", table) 744 expression.set("tables", None) 745 746 if isinstance(table, exp.Table): 747 table_alias = table.args.get("alias") 748 if table_alias: 749 table_alias.pop() 750 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 751 752 return super().delete_sql(expression) 753 754 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 755 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 756 757 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 758 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 759 if not expression.args.get("variant_extract") or is_json_extract: 760 return self.func( 761 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 762 ) 763 764 this = self.sql(expression, "this") 765 766 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 767 segments = [] 768 for path_key in expression.expression.expressions[1:]: 769 if not isinstance(path_key, exp.JSONPathKey): 770 # Cannot transpile subscripts, wildcards etc to dot notation 771 self.unsupported( 772 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 773 ) 774 continue 775 key = path_key.this 776 if not exp.SAFE_IDENTIFIER_RE.match(key): 777 key = f'"{key}"' 778 segments.append(f".{key}") 779 780 expr = "".join(segments) 781 782 return f"{this}{expr}" 783 784 def groupconcat_sql(self, expression: exp.GroupConcat) -> str: 785 return self.func( 786 "ARRAY_JOIN", 787 self.func("ARRAY_AGG", expression.this), 788 expression.args.get("separator"), 789 )
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the behavior of a / b
depends on the types of a
and b
.
False means a / b
is always float division.
True means a / b
is integer division if both a
and b
are integers.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Associates this dialect's time formats with their equivalent Python strftime
formats.
Specifies the strategy according to which identifiers should be normalized.
290 class Tokenizer(tokens.Tokenizer): 291 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 292 UNICODE_STRINGS = [ 293 (prefix + q, q) 294 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 295 for prefix in ("U&", "u&") 296 ] 297 298 NESTED_COMMENTS = False 299 300 KEYWORDS = { 301 **tokens.Tokenizer.KEYWORDS, 302 "DEALLOCATE PREPARE": TokenType.COMMAND, 303 "DESCRIBE INPUT": TokenType.COMMAND, 304 "DESCRIBE OUTPUT": TokenType.COMMAND, 305 "RESET SESSION": TokenType.COMMAND, 306 "START": TokenType.BEGIN, 307 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 308 "ROW": TokenType.STRUCT, 309 "IPADDRESS": TokenType.IPADDRESS, 310 "IPPREFIX": TokenType.IPPREFIX, 311 "TDIGEST": TokenType.TDIGEST, 312 "HYPERLOGLOG": TokenType.HLLSKETCH, 313 } 314 KEYWORDS.pop("/*+") 315 KEYWORDS.pop("QUALIFY")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- BYTE_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- IDENTIFIERS
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
317 class Parser(parser.Parser): 318 VALUES_FOLLOWED_BY_PAREN = False 319 320 FUNCTIONS = { 321 **parser.Parser.FUNCTIONS, 322 "ARBITRARY": exp.AnyValue.from_arg_list, 323 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 324 "APPROX_PERCENTILE": _build_approx_percentile, 325 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 326 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 327 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 328 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 329 "CARDINALITY": exp.ArraySize.from_arg_list, 330 "CONTAINS": exp.ArrayContains.from_arg_list, 331 "DATE_ADD": lambda args: exp.DateAdd( 332 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 333 ), 334 "DATE_DIFF": lambda args: exp.DateDiff( 335 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 336 ), 337 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 338 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 339 "DATE_TRUNC": date_trunc_to_time, 340 "DAY_OF_WEEK": exp.DayOfWeekIso.from_arg_list, 341 "DOW": exp.DayOfWeekIso.from_arg_list, 342 "DOY": exp.DayOfYear.from_arg_list, 343 "ELEMENT_AT": lambda args: exp.Bracket( 344 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 345 ), 346 "FROM_HEX": exp.Unhex.from_arg_list, 347 "FROM_UNIXTIME": _build_from_unixtime, 348 "FROM_UTF8": lambda args: exp.Decode( 349 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 350 ), 351 "JSON_FORMAT": lambda args: exp.JSONFormat( 352 this=seq_get(args, 0), options=seq_get(args, 1), is_json=True 353 ), 354 "LEVENSHTEIN_DISTANCE": exp.Levenshtein.from_arg_list, 355 "NOW": exp.CurrentTimestamp.from_arg_list, 356 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 357 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 358 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 359 this=seq_get(args, 0), 360 expression=seq_get(args, 1), 361 replacement=seq_get(args, 2) or exp.Literal.string(""), 362 ), 363 "ROW": exp.Struct.from_arg_list, 364 "SEQUENCE": exp.GenerateSeries.from_arg_list, 365 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 366 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 367 "STRPOS": lambda args: exp.StrPosition( 368 this=seq_get(args, 0), substr=seq_get(args, 1), occurrence=seq_get(args, 2) 369 ), 370 "TO_CHAR": _build_to_char, 371 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 372 "TO_UTF8": lambda args: exp.Encode( 373 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 374 ), 375 "MD5": exp.MD5Digest.from_arg_list, 376 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 377 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 378 } 379 380 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 381 FUNCTION_PARSERS.pop("TRIM")
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- errors
- sql
383 class Generator(generator.Generator): 384 INTERVAL_ALLOWS_PLURAL_FORM = False 385 JOIN_HINTS = False 386 TABLE_HINTS = False 387 QUERY_HINTS = False 388 IS_BOOL_ALLOWED = False 389 TZ_TO_WITH_TIME_ZONE = True 390 NVL2_SUPPORTED = False 391 STRUCT_DELIMITER = ("(", ")") 392 LIMIT_ONLY_LITERALS = True 393 SUPPORTS_SINGLE_ARG_CONCAT = False 394 LIKE_PROPERTY_INSIDE_SCHEMA = True 395 MULTI_ARG_DISTINCT = False 396 SUPPORTS_TO_NUMBER = False 397 HEX_FUNC = "TO_HEX" 398 PARSE_JSON_NAME = "JSON_PARSE" 399 PAD_FILL_PATTERN_IS_REQUIRED = True 400 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 401 SUPPORTS_MEDIAN = False 402 ARRAY_SIZE_NAME = "CARDINALITY" 403 404 PROPERTIES_LOCATION = { 405 **generator.Generator.PROPERTIES_LOCATION, 406 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 407 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 408 } 409 410 TYPE_MAPPING = { 411 **generator.Generator.TYPE_MAPPING, 412 exp.DataType.Type.BINARY: "VARBINARY", 413 exp.DataType.Type.BIT: "BOOLEAN", 414 exp.DataType.Type.DATETIME: "TIMESTAMP", 415 exp.DataType.Type.DATETIME64: "TIMESTAMP", 416 exp.DataType.Type.FLOAT: "REAL", 417 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 418 exp.DataType.Type.INT: "INTEGER", 419 exp.DataType.Type.STRUCT: "ROW", 420 exp.DataType.Type.TEXT: "VARCHAR", 421 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 422 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 423 exp.DataType.Type.TIMETZ: "TIME", 424 } 425 426 TRANSFORMS = { 427 **generator.Generator.TRANSFORMS, 428 exp.AnyValue: rename_func("ARBITRARY"), 429 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 430 exp.ArgMax: rename_func("MAX_BY"), 431 exp.ArgMin: rename_func("MIN_BY"), 432 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 433 exp.ArrayAny: rename_func("ANY_MATCH"), 434 exp.ArrayConcat: rename_func("CONCAT"), 435 exp.ArrayContains: rename_func("CONTAINS"), 436 exp.ArrayToString: rename_func("ARRAY_JOIN"), 437 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 438 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 439 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 440 exp.BitwiseLeftShift: lambda self, e: self.func( 441 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 442 ), 443 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 444 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 445 exp.BitwiseRightShift: lambda self, e: self.func( 446 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 447 ), 448 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 449 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 450 exp.CurrentTime: lambda *_: "CURRENT_TIME", 451 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 452 exp.CurrentUser: lambda *_: "CURRENT_USER", 453 exp.DateAdd: _date_delta_sql("DATE_ADD"), 454 exp.DateDiff: lambda self, e: self.func( 455 "DATE_DIFF", unit_to_str(e), e.expression, e.this 456 ), 457 exp.DateStrToDate: datestrtodate_sql, 458 exp.DateToDi: lambda self, 459 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 460 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 461 exp.DayOfWeek: lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)", 462 exp.DayOfWeekIso: rename_func("DAY_OF_WEEK"), 463 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 464 exp.DiToDate: lambda self, 465 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 466 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 467 exp.FileFormatProperty: lambda self, 468 e: f"format={self.sql(exp.Literal.string(e.name))}", 469 exp.First: _first_last_sql, 470 exp.FromTimeZone: lambda self, 471 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 472 exp.GenerateSeries: sequence_sql, 473 exp.GenerateDateArray: sequence_sql, 474 exp.Group: transforms.preprocess([transforms.unalias_group]), 475 exp.If: if_sql(), 476 exp.ILike: no_ilike_sql, 477 exp.Initcap: _initcap_sql, 478 exp.Last: _first_last_sql, 479 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 480 exp.Lateral: _explode_to_unnest_sql, 481 exp.Left: left_to_substring_sql, 482 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 483 rename_func("LEVENSHTEIN_DISTANCE") 484 ), 485 exp.LogicalAnd: rename_func("BOOL_AND"), 486 exp.LogicalOr: rename_func("BOOL_OR"), 487 exp.Pivot: no_pivot_sql, 488 exp.Quantile: _quantile_sql, 489 exp.RegexpExtract: regexp_extract_sql, 490 exp.RegexpExtractAll: regexp_extract_sql, 491 exp.Right: right_to_substring_sql, 492 exp.Schema: _schema_sql, 493 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 494 exp.Select: transforms.preprocess( 495 [ 496 transforms.eliminate_window_clause, 497 transforms.eliminate_qualify, 498 transforms.eliminate_distinct_on, 499 transforms.explode_projection_to_unnest(1), 500 transforms.eliminate_semi_and_anti_joins, 501 amend_exploded_column_table, 502 ] 503 ), 504 exp.SortArray: _no_sort_array, 505 exp.StrPosition: lambda self, e: strposition_sql(self, e, supports_occurrence=True), 506 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 507 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 508 exp.StrToTime: _str_to_time_sql, 509 exp.StructExtract: struct_extract_sql, 510 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 511 exp.Timestamp: no_timestamp_sql, 512 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 513 exp.TimestampTrunc: timestamptrunc_sql(), 514 exp.TimeStrToDate: timestrtotime_sql, 515 exp.TimeStrToTime: timestrtotime_sql, 516 exp.TimeStrToUnix: lambda self, e: self.func( 517 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 518 ), 519 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 520 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 521 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 522 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 523 exp.TsOrDiToDi: lambda self, 524 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 525 exp.TsOrDsAdd: _ts_or_ds_add_sql, 526 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 527 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 528 exp.Unhex: rename_func("FROM_HEX"), 529 exp.UnixToStr: lambda self, 530 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 531 exp.UnixToTime: _unix_to_time_sql, 532 exp.UnixToTimeStr: lambda self, 533 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 534 exp.VariancePop: rename_func("VAR_POP"), 535 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 536 exp.WithinGroup: transforms.preprocess( 537 [transforms.remove_within_group_for_percentiles] 538 ), 539 exp.Xor: bool_xor_sql, 540 exp.MD5Digest: rename_func("MD5"), 541 exp.SHA: rename_func("SHA1"), 542 exp.SHA2: sha256_sql, 543 } 544 545 RESERVED_KEYWORDS = { 546 "alter", 547 "and", 548 "as", 549 "between", 550 "by", 551 "case", 552 "cast", 553 "constraint", 554 "create", 555 "cross", 556 "current_time", 557 "current_timestamp", 558 "deallocate", 559 "delete", 560 "describe", 561 "distinct", 562 "drop", 563 "else", 564 "end", 565 "escape", 566 "except", 567 "execute", 568 "exists", 569 "extract", 570 "false", 571 "for", 572 "from", 573 "full", 574 "group", 575 "having", 576 "in", 577 "inner", 578 "insert", 579 "intersect", 580 "into", 581 "is", 582 "join", 583 "left", 584 "like", 585 "natural", 586 "not", 587 "null", 588 "on", 589 "or", 590 "order", 591 "outer", 592 "prepare", 593 "right", 594 "select", 595 "table", 596 "then", 597 "true", 598 "union", 599 "using", 600 "values", 601 "when", 602 "where", 603 "with", 604 } 605 606 def jsonformat_sql(self, expression: exp.JSONFormat) -> str: 607 this = expression.this 608 is_json = expression.args.get("is_json") 609 610 if this and not (is_json or this.type): 611 from sqlglot.optimizer.annotate_types import annotate_types 612 613 this = annotate_types(this, dialect=self.dialect) 614 615 if not (is_json or this.is_type(exp.DataType.Type.JSON)): 616 this.replace(exp.cast(this, exp.DataType.Type.JSON)) 617 618 return self.function_fallback_sql(expression) 619 620 def md5_sql(self, expression: exp.MD5) -> str: 621 this = expression.this 622 623 if not this.type: 624 from sqlglot.optimizer.annotate_types import annotate_types 625 626 this = annotate_types(this, dialect=self.dialect) 627 628 if this.is_type(*exp.DataType.TEXT_TYPES): 629 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 630 631 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 632 633 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 634 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 635 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 636 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 637 # which seems to be using the same time mapping as Hive, as per: 638 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 639 this = expression.this 640 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 641 value_as_timestamp = ( 642 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 643 ) 644 645 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 646 647 formatted_value = self.func( 648 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 649 ) 650 parse_with_tz = self.func( 651 "PARSE_DATETIME", 652 formatted_value, 653 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 654 ) 655 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 656 return self.func("TO_UNIXTIME", coalesced) 657 658 def bracket_sql(self, expression: exp.Bracket) -> str: 659 if expression.args.get("safe"): 660 return self.func( 661 "ELEMENT_AT", 662 expression.this, 663 seq_get( 664 apply_index_offset( 665 expression.this, 666 expression.expressions, 667 1 - expression.args.get("offset", 0), 668 dialect=self.dialect, 669 ), 670 0, 671 ), 672 ) 673 return super().bracket_sql(expression) 674 675 def struct_sql(self, expression: exp.Struct) -> str: 676 from sqlglot.optimizer.annotate_types import annotate_types 677 678 expression = annotate_types(expression, dialect=self.dialect) 679 values: t.List[str] = [] 680 schema: t.List[str] = [] 681 unknown_type = False 682 683 for e in expression.expressions: 684 if isinstance(e, exp.PropertyEQ): 685 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 686 unknown_type = True 687 else: 688 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 689 values.append(self.sql(e, "expression")) 690 else: 691 values.append(self.sql(e)) 692 693 size = len(expression.expressions) 694 695 if not size or len(schema) != size: 696 if unknown_type: 697 self.unsupported( 698 "Cannot convert untyped key-value definitions (try annotate_types)." 699 ) 700 return self.func("ROW", *values) 701 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 702 703 def interval_sql(self, expression: exp.Interval) -> str: 704 if expression.this and expression.text("unit").upper().startswith("WEEK"): 705 return f"({expression.this.name} * INTERVAL '7' DAY)" 706 return super().interval_sql(expression) 707 708 def transaction_sql(self, expression: exp.Transaction) -> str: 709 modes = expression.args.get("modes") 710 modes = f" {', '.join(modes)}" if modes else "" 711 return f"START TRANSACTION{modes}" 712 713 def offset_limit_modifiers( 714 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 715 ) -> t.List[str]: 716 return [ 717 self.sql(expression, "offset"), 718 self.sql(limit), 719 ] 720 721 def create_sql(self, expression: exp.Create) -> str: 722 """ 723 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 724 so we need to remove them 725 """ 726 kind = expression.args["kind"] 727 schema = expression.this 728 if kind == "VIEW" and schema.expressions: 729 expression.this.set("expressions", None) 730 return super().create_sql(expression) 731 732 def delete_sql(self, expression: exp.Delete) -> str: 733 """ 734 Presto only supports DELETE FROM for a single table without an alias, so we need 735 to remove the unnecessary parts. If the original DELETE statement contains more 736 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 737 """ 738 tables = expression.args.get("tables") or [expression.this] 739 if len(tables) > 1: 740 return super().delete_sql(expression) 741 742 table = tables[0] 743 expression.set("this", table) 744 expression.set("tables", None) 745 746 if isinstance(table, exp.Table): 747 table_alias = table.args.get("alias") 748 if table_alias: 749 table_alias.pop() 750 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 751 752 return super().delete_sql(expression) 753 754 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 755 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 756 757 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 758 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 759 if not expression.args.get("variant_extract") or is_json_extract: 760 return self.func( 761 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 762 ) 763 764 this = self.sql(expression, "this") 765 766 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 767 segments = [] 768 for path_key in expression.expression.expressions[1:]: 769 if not isinstance(path_key, exp.JSONPathKey): 770 # Cannot transpile subscripts, wildcards etc to dot notation 771 self.unsupported( 772 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 773 ) 774 continue 775 key = path_key.this 776 if not exp.SAFE_IDENTIFIER_RE.match(key): 777 key = f'"{key}"' 778 segments.append(f".{key}") 779 780 expr = "".join(segments) 781 782 return f"{this}{expr}" 783 784 def groupconcat_sql(self, expression: exp.GroupConcat) -> str: 785 return self.func( 786 "ARRAY_JOIN", 787 self.func("ARRAY_AGG", expression.this), 788 expression.args.get("separator"), 789 )
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
606 def jsonformat_sql(self, expression: exp.JSONFormat) -> str: 607 this = expression.this 608 is_json = expression.args.get("is_json") 609 610 if this and not (is_json or this.type): 611 from sqlglot.optimizer.annotate_types import annotate_types 612 613 this = annotate_types(this, dialect=self.dialect) 614 615 if not (is_json or this.is_type(exp.DataType.Type.JSON)): 616 this.replace(exp.cast(this, exp.DataType.Type.JSON)) 617 618 return self.function_fallback_sql(expression)
620 def md5_sql(self, expression: exp.MD5) -> str: 621 this = expression.this 622 623 if not this.type: 624 from sqlglot.optimizer.annotate_types import annotate_types 625 626 this = annotate_types(this, dialect=self.dialect) 627 628 if this.is_type(*exp.DataType.TEXT_TYPES): 629 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 630 631 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this))))
633 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 634 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 635 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 636 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 637 # which seems to be using the same time mapping as Hive, as per: 638 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 639 this = expression.this 640 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 641 value_as_timestamp = ( 642 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 643 ) 644 645 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 646 647 formatted_value = self.func( 648 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 649 ) 650 parse_with_tz = self.func( 651 "PARSE_DATETIME", 652 formatted_value, 653 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 654 ) 655 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 656 return self.func("TO_UNIXTIME", coalesced)
658 def bracket_sql(self, expression: exp.Bracket) -> str: 659 if expression.args.get("safe"): 660 return self.func( 661 "ELEMENT_AT", 662 expression.this, 663 seq_get( 664 apply_index_offset( 665 expression.this, 666 expression.expressions, 667 1 - expression.args.get("offset", 0), 668 dialect=self.dialect, 669 ), 670 0, 671 ), 672 ) 673 return super().bracket_sql(expression)
675 def struct_sql(self, expression: exp.Struct) -> str: 676 from sqlglot.optimizer.annotate_types import annotate_types 677 678 expression = annotate_types(expression, dialect=self.dialect) 679 values: t.List[str] = [] 680 schema: t.List[str] = [] 681 unknown_type = False 682 683 for e in expression.expressions: 684 if isinstance(e, exp.PropertyEQ): 685 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 686 unknown_type = True 687 else: 688 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 689 values.append(self.sql(e, "expression")) 690 else: 691 values.append(self.sql(e)) 692 693 size = len(expression.expressions) 694 695 if not size or len(schema) != size: 696 if unknown_type: 697 self.unsupported( 698 "Cannot convert untyped key-value definitions (try annotate_types)." 699 ) 700 return self.func("ROW", *values) 701 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))"
721 def create_sql(self, expression: exp.Create) -> str: 722 """ 723 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 724 so we need to remove them 725 """ 726 kind = expression.args["kind"] 727 schema = expression.this 728 if kind == "VIEW" and schema.expressions: 729 expression.this.set("expressions", None) 730 return super().create_sql(expression)
Presto doesn't support CREATE VIEW with expressions (ex: CREATE VIEW x (cola)
then (cola)
is the expression),
so we need to remove them
732 def delete_sql(self, expression: exp.Delete) -> str: 733 """ 734 Presto only supports DELETE FROM for a single table without an alias, so we need 735 to remove the unnecessary parts. If the original DELETE statement contains more 736 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 737 """ 738 tables = expression.args.get("tables") or [expression.this] 739 if len(tables) > 1: 740 return super().delete_sql(expression) 741 742 table = tables[0] 743 expression.set("this", table) 744 expression.set("tables", None) 745 746 if isinstance(table, exp.Table): 747 table_alias = table.args.get("alias") 748 if table_alias: 749 table_alias.pop() 750 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 751 752 return super().delete_sql(expression)
Presto only supports DELETE FROM for a single table without an alias, so we need to remove the unnecessary parts. If the original DELETE statement contains more than one table to be deleted, we can't safely map it 1-1 to a Presto statement.
754 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 755 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 756 757 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 758 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 759 if not expression.args.get("variant_extract") or is_json_extract: 760 return self.func( 761 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 762 ) 763 764 this = self.sql(expression, "this") 765 766 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 767 segments = [] 768 for path_key in expression.expression.expressions[1:]: 769 if not isinstance(path_key, exp.JSONPathKey): 770 # Cannot transpile subscripts, wildcards etc to dot notation 771 self.unsupported( 772 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 773 ) 774 continue 775 key = path_key.this 776 if not exp.SAFE_IDENTIFIER_RE.match(key): 777 key = f'"{key}"' 778 segments.append(f".{key}") 779 780 expr = "".join(segments) 781 782 return f"{this}{expr}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SUPPORTED_JSON_PATH_PARTS
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_WINDOW_EXCLUDE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- TRY_SUPPORTED
- SUPPORTS_UESCAPE
- STAR_EXCEPT
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- get_put_sql
- translatecharacters_sql