sqlglot.dialects.presto
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 bool_xor_sql, 11 build_replace_with_optional_replacement, 12 date_trunc_to_time, 13 datestrtodate_sql, 14 encode_decode_sql, 15 build_formatted_time, 16 if_sql, 17 left_to_substring_sql, 18 no_ilike_sql, 19 no_pivot_sql, 20 no_timestamp_sql, 21 regexp_extract_sql, 22 rename_func, 23 right_to_substring_sql, 24 sha256_sql, 25 strposition_sql, 26 struct_extract_sql, 27 timestamptrunc_sql, 28 timestrtotime_sql, 29 ts_or_ds_add_cast, 30 unit_to_str, 31 sequence_sql, 32 build_regexp_extract, 33 explode_to_unnest_sql, 34 space_sql, 35) 36from sqlglot.dialects.hive import Hive 37from sqlglot.dialects.mysql import MySQL 38from sqlglot.helper import apply_index_offset, seq_get 39from sqlglot.optimizer.scope import find_all_in_scope 40from sqlglot.tokens import TokenType 41from sqlglot.transforms import unqualify_columns 42from sqlglot.generator import unsupported_args 43 44DATE_ADD_OR_SUB = t.Union[exp.DateAdd, exp.TimestampAdd, exp.DateSub] 45 46 47def _initcap_sql(self: Presto.Generator, expression: exp.Initcap) -> str: 48 regex = r"(\w)(\w*)" 49 return f"REGEXP_REPLACE({self.sql(expression, 'this')}, '{regex}', x -> UPPER(x[1]) || LOWER(x[2]))" 50 51 52def _no_sort_array(self: Presto.Generator, expression: exp.SortArray) -> str: 53 if expression.args.get("asc") == exp.false(): 54 comparator = "(a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END" 55 else: 56 comparator = None 57 return self.func("ARRAY_SORT", expression.this, comparator) 58 59 60def _schema_sql(self: Presto.Generator, expression: exp.Schema) -> str: 61 if isinstance(expression.parent, exp.PartitionedByProperty): 62 # Any columns in the ARRAY[] string literals should not be quoted 63 expression.transform(lambda n: n.name if isinstance(n, exp.Identifier) else n, copy=False) 64 65 partition_exprs = [ 66 self.sql(c) if isinstance(c, (exp.Func, exp.Property)) else self.sql(c, "this") 67 for c in expression.expressions 68 ] 69 return self.sql(exp.Array(expressions=[exp.Literal.string(c) for c in partition_exprs])) 70 71 if expression.parent: 72 for schema in expression.parent.find_all(exp.Schema): 73 if schema is expression: 74 continue 75 76 column_defs = schema.find_all(exp.ColumnDef) 77 if column_defs and isinstance(schema.parent, exp.Property): 78 expression.expressions.extend(column_defs) 79 80 return self.schema_sql(expression) 81 82 83def _quantile_sql(self: Presto.Generator, expression: exp.Quantile) -> str: 84 self.unsupported("Presto does not support exact quantiles") 85 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 86 87 88def _str_to_time_sql( 89 self: Presto.Generator, expression: exp.StrToDate | exp.StrToTime | exp.TsOrDsToDate 90) -> str: 91 return self.func("DATE_PARSE", expression.this, self.format_time(expression)) 92 93 94def _ts_or_ds_to_date_sql(self: Presto.Generator, expression: exp.TsOrDsToDate) -> str: 95 time_format = self.format_time(expression) 96 if time_format and time_format not in (Presto.TIME_FORMAT, Presto.DATE_FORMAT): 97 return self.sql(exp.cast(_str_to_time_sql(self, expression), exp.DataType.Type.DATE)) 98 return self.sql( 99 exp.cast(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP), exp.DataType.Type.DATE) 100 ) 101 102 103def _ts_or_ds_add_sql(self: Presto.Generator, expression: exp.TsOrDsAdd) -> str: 104 expression = ts_or_ds_add_cast(expression) 105 unit = unit_to_str(expression) 106 return self.func("DATE_ADD", unit, expression.expression, expression.this) 107 108 109def _ts_or_ds_diff_sql(self: Presto.Generator, expression: exp.TsOrDsDiff) -> str: 110 this = exp.cast(expression.this, exp.DataType.Type.TIMESTAMP) 111 expr = exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP) 112 unit = unit_to_str(expression) 113 return self.func("DATE_DIFF", unit, expr, this) 114 115 116def _build_approx_percentile(args: t.List) -> exp.Expression: 117 if len(args) == 4: 118 return exp.ApproxQuantile( 119 this=seq_get(args, 0), 120 weight=seq_get(args, 1), 121 quantile=seq_get(args, 2), 122 accuracy=seq_get(args, 3), 123 ) 124 if len(args) == 3: 125 return exp.ApproxQuantile( 126 this=seq_get(args, 0), quantile=seq_get(args, 1), accuracy=seq_get(args, 2) 127 ) 128 return exp.ApproxQuantile.from_arg_list(args) 129 130 131def _build_from_unixtime(args: t.List) -> exp.Expression: 132 if len(args) == 3: 133 return exp.UnixToTime( 134 this=seq_get(args, 0), 135 hours=seq_get(args, 1), 136 minutes=seq_get(args, 2), 137 ) 138 if len(args) == 2: 139 return exp.UnixToTime(this=seq_get(args, 0), zone=seq_get(args, 1)) 140 141 return exp.UnixToTime.from_arg_list(args) 142 143 144def _first_last_sql(self: Presto.Generator, expression: exp.Func) -> str: 145 """ 146 Trino doesn't support FIRST / LAST as functions, but they're valid in the context 147 of MATCH_RECOGNIZE, so we need to preserve them in that case. In all other cases 148 they're converted into an ARBITRARY call. 149 150 Reference: https://trino.io/docs/current/sql/match-recognize.html#logical-navigation-functions 151 """ 152 if isinstance(expression.find_ancestor(exp.MatchRecognize, exp.Select), exp.MatchRecognize): 153 return self.function_fallback_sql(expression) 154 155 return rename_func("ARBITRARY")(self, expression) 156 157 158def _unix_to_time_sql(self: Presto.Generator, expression: exp.UnixToTime) -> str: 159 scale = expression.args.get("scale") 160 timestamp = self.sql(expression, "this") 161 if scale in (None, exp.UnixToTime.SECONDS): 162 return rename_func("FROM_UNIXTIME")(self, expression) 163 164 return f"FROM_UNIXTIME(CAST({timestamp} AS DOUBLE) / POW(10, {scale}))" 165 166 167def _to_int(self: Presto.Generator, expression: exp.Expression) -> exp.Expression: 168 if not expression.type: 169 from sqlglot.optimizer.annotate_types import annotate_types 170 171 annotate_types(expression, dialect=self.dialect) 172 if expression.type and expression.type.this not in exp.DataType.INTEGER_TYPES: 173 return exp.cast(expression, to=exp.DataType.Type.BIGINT) 174 return expression 175 176 177def _build_to_char(args: t.List) -> exp.TimeToStr: 178 fmt = seq_get(args, 1) 179 if isinstance(fmt, exp.Literal): 180 # We uppercase this to match Teradata's format mapping keys 181 fmt.set("this", fmt.this.upper()) 182 183 # We use "teradata" on purpose here, because the time formats are different in Presto. 184 # See https://prestodb.io/docs/current/functions/teradata.html?highlight=to_char#to_char 185 return build_formatted_time(exp.TimeToStr, "teradata")(args) 186 187 188def _date_delta_sql( 189 name: str, negate_interval: bool = False 190) -> t.Callable[[Presto.Generator, DATE_ADD_OR_SUB], str]: 191 def _delta_sql(self: Presto.Generator, expression: DATE_ADD_OR_SUB) -> str: 192 interval = _to_int(self, expression.expression) 193 return self.func( 194 name, 195 unit_to_str(expression), 196 interval * (-1) if negate_interval else interval, 197 expression.this, 198 ) 199 200 return _delta_sql 201 202 203def _explode_to_unnest_sql(self: Presto.Generator, expression: exp.Lateral) -> str: 204 explode = expression.this 205 if isinstance(explode, exp.Explode): 206 exploded_type = explode.this.type 207 alias = expression.args.get("alias") 208 209 # This attempts a best-effort transpilation of LATERAL VIEW EXPLODE on a struct array 210 if ( 211 isinstance(alias, exp.TableAlias) 212 and isinstance(exploded_type, exp.DataType) 213 and exploded_type.is_type(exp.DataType.Type.ARRAY) 214 and exploded_type.expressions 215 and exploded_type.expressions[0].is_type(exp.DataType.Type.STRUCT) 216 ): 217 # When unnesting a ROW in Presto, it produces N columns, so we need to fix the alias 218 alias.set("columns", [c.this.copy() for c in exploded_type.expressions[0].expressions]) 219 elif isinstance(explode, exp.Inline): 220 explode.replace(exp.Explode(this=explode.this.copy())) 221 222 return explode_to_unnest_sql(self, expression) 223 224 225def amend_exploded_column_table(expression: exp.Expression) -> exp.Expression: 226 # We check for expression.type because the columns can be amended only if types were inferred 227 if isinstance(expression, exp.Select) and expression.type: 228 for lateral in expression.args.get("laterals") or []: 229 alias = lateral.args.get("alias") 230 if ( 231 not isinstance(lateral.this, exp.Explode) 232 or not isinstance(alias, exp.TableAlias) 233 or len(alias.columns) != 1 234 ): 235 continue 236 237 new_table = alias.this 238 old_table = alias.columns[0].name.lower() 239 240 # When transpiling a LATERAL VIEW EXPLODE Spark query, the exploded fields may be qualified 241 # with the struct column, resulting in invalid Presto references that need to be amended 242 for column in find_all_in_scope(expression, exp.Column): 243 if column.db.lower() == old_table: 244 column.set("table", column.args["db"].pop()) 245 elif column.table.lower() == old_table: 246 column.set("table", new_table.copy()) 247 elif column.name.lower() == old_table and isinstance(column.parent, exp.Dot): 248 column.parent.replace(exp.column(column.parent.expression, table=new_table)) 249 250 return expression 251 252 253class Presto(Dialect): 254 INDEX_OFFSET = 1 255 NULL_ORDERING = "nulls_are_last" 256 TIME_FORMAT = MySQL.TIME_FORMAT 257 STRICT_STRING_CONCAT = True 258 SUPPORTS_SEMI_ANTI_JOIN = False 259 TYPED_DIVISION = True 260 TABLESAMPLE_SIZE_IS_PERCENT = True 261 LOG_BASE_FIRST: t.Optional[bool] = None 262 SUPPORTS_VALUES_DEFAULT = False 263 264 TIME_MAPPING = MySQL.TIME_MAPPING 265 266 # https://github.com/trinodb/trino/issues/17 267 # https://github.com/trinodb/trino/issues/12289 268 # https://github.com/prestodb/presto/issues/2863 269 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 270 271 # The result of certain math functions in Presto/Trino is of type 272 # equal to the input type e.g: FLOOR(5.5/2) -> DECIMAL, FLOOR(5/2) -> BIGINT 273 ANNOTATORS = { 274 **Dialect.ANNOTATORS, 275 exp.Floor: lambda self, e: self._annotate_by_args(e, "this"), 276 exp.Ceil: lambda self, e: self._annotate_by_args(e, "this"), 277 exp.Mod: lambda self, e: self._annotate_by_args(e, "this", "expression"), 278 exp.Round: lambda self, e: self._annotate_by_args(e, "this"), 279 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 280 exp.Abs: lambda self, e: self._annotate_by_args(e, "this"), 281 exp.Rand: lambda self, e: self._annotate_by_args(e, "this") 282 if e.this 283 else self._set_type(e, exp.DataType.Type.DOUBLE), 284 } 285 286 SUPPORTED_SETTINGS = { 287 *Dialect.SUPPORTED_SETTINGS, 288 "variant_extract_is_json_extract", 289 } 290 291 class Tokenizer(tokens.Tokenizer): 292 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 293 UNICODE_STRINGS = [ 294 (prefix + q, q) 295 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 296 for prefix in ("U&", "u&") 297 ] 298 299 NESTED_COMMENTS = False 300 301 KEYWORDS = { 302 **tokens.Tokenizer.KEYWORDS, 303 "DEALLOCATE PREPARE": TokenType.COMMAND, 304 "DESCRIBE INPUT": TokenType.COMMAND, 305 "DESCRIBE OUTPUT": TokenType.COMMAND, 306 "RESET SESSION": TokenType.COMMAND, 307 "START": TokenType.BEGIN, 308 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 309 "ROW": TokenType.STRUCT, 310 "IPADDRESS": TokenType.IPADDRESS, 311 "IPPREFIX": TokenType.IPPREFIX, 312 "TDIGEST": TokenType.TDIGEST, 313 "HYPERLOGLOG": TokenType.HLLSKETCH, 314 } 315 KEYWORDS.pop("/*+") 316 KEYWORDS.pop("QUALIFY") 317 318 class Parser(parser.Parser): 319 VALUES_FOLLOWED_BY_PAREN = False 320 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = True 321 322 FUNCTIONS = { 323 **parser.Parser.FUNCTIONS, 324 "ARBITRARY": exp.AnyValue.from_arg_list, 325 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 326 "APPROX_PERCENTILE": _build_approx_percentile, 327 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 328 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 329 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 330 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 331 "CARDINALITY": exp.ArraySize.from_arg_list, 332 "CONTAINS": exp.ArrayContains.from_arg_list, 333 "DATE_ADD": lambda args: exp.DateAdd( 334 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 335 ), 336 "DATE_DIFF": lambda args: exp.DateDiff( 337 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 338 ), 339 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 340 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 341 "DATE_TRUNC": date_trunc_to_time, 342 "DAY_OF_WEEK": exp.DayOfWeekIso.from_arg_list, 343 "DOW": exp.DayOfWeekIso.from_arg_list, 344 "DOY": exp.DayOfYear.from_arg_list, 345 "ELEMENT_AT": lambda args: exp.Bracket( 346 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 347 ), 348 "FROM_HEX": exp.Unhex.from_arg_list, 349 "FROM_UNIXTIME": _build_from_unixtime, 350 "FROM_UTF8": lambda args: exp.Decode( 351 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 352 ), 353 "JSON_FORMAT": lambda args: exp.JSONFormat( 354 this=seq_get(args, 0), options=seq_get(args, 1), is_json=True 355 ), 356 "LEVENSHTEIN_DISTANCE": exp.Levenshtein.from_arg_list, 357 "NOW": exp.CurrentTimestamp.from_arg_list, 358 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 359 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 360 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 361 this=seq_get(args, 0), 362 expression=seq_get(args, 1), 363 replacement=seq_get(args, 2) or exp.Literal.string(""), 364 ), 365 "REPLACE": build_replace_with_optional_replacement, 366 "ROW": exp.Struct.from_arg_list, 367 "SEQUENCE": exp.GenerateSeries.from_arg_list, 368 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 369 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 370 "STRPOS": lambda args: exp.StrPosition( 371 this=seq_get(args, 0), substr=seq_get(args, 1), occurrence=seq_get(args, 2) 372 ), 373 "SLICE": exp.ArraySlice.from_arg_list, 374 "TO_CHAR": _build_to_char, 375 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 376 "TO_UTF8": lambda args: exp.Encode( 377 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 378 ), 379 "MD5": exp.MD5Digest.from_arg_list, 380 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 381 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 382 } 383 384 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 385 FUNCTION_PARSERS.pop("TRIM") 386 387 class Generator(generator.Generator): 388 INTERVAL_ALLOWS_PLURAL_FORM = False 389 JOIN_HINTS = False 390 TABLE_HINTS = False 391 QUERY_HINTS = False 392 IS_BOOL_ALLOWED = False 393 TZ_TO_WITH_TIME_ZONE = True 394 NVL2_SUPPORTED = False 395 STRUCT_DELIMITER = ("(", ")") 396 LIMIT_ONLY_LITERALS = True 397 SUPPORTS_SINGLE_ARG_CONCAT = False 398 LIKE_PROPERTY_INSIDE_SCHEMA = True 399 MULTI_ARG_DISTINCT = False 400 SUPPORTS_TO_NUMBER = False 401 HEX_FUNC = "TO_HEX" 402 PARSE_JSON_NAME = "JSON_PARSE" 403 PAD_FILL_PATTERN_IS_REQUIRED = True 404 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 405 SUPPORTS_MEDIAN = False 406 ARRAY_SIZE_NAME = "CARDINALITY" 407 408 PROPERTIES_LOCATION = { 409 **generator.Generator.PROPERTIES_LOCATION, 410 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 411 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 412 } 413 414 TYPE_MAPPING = { 415 **generator.Generator.TYPE_MAPPING, 416 exp.DataType.Type.BINARY: "VARBINARY", 417 exp.DataType.Type.BIT: "BOOLEAN", 418 exp.DataType.Type.DATETIME: "TIMESTAMP", 419 exp.DataType.Type.DATETIME64: "TIMESTAMP", 420 exp.DataType.Type.FLOAT: "REAL", 421 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 422 exp.DataType.Type.INT: "INTEGER", 423 exp.DataType.Type.STRUCT: "ROW", 424 exp.DataType.Type.TEXT: "VARCHAR", 425 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 426 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 427 exp.DataType.Type.TIMETZ: "TIME", 428 } 429 430 TRANSFORMS = { 431 **generator.Generator.TRANSFORMS, 432 exp.AnyValue: rename_func("ARBITRARY"), 433 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 434 exp.ArgMax: rename_func("MAX_BY"), 435 exp.ArgMin: rename_func("MIN_BY"), 436 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 437 exp.ArrayAny: rename_func("ANY_MATCH"), 438 exp.ArrayConcat: rename_func("CONCAT"), 439 exp.ArrayContains: rename_func("CONTAINS"), 440 exp.ArrayToString: rename_func("ARRAY_JOIN"), 441 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 442 exp.ArraySlice: rename_func("SLICE"), 443 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 444 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 445 exp.BitwiseLeftShift: lambda self, e: self.func( 446 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 447 ), 448 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 449 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 450 exp.BitwiseRightShift: lambda self, e: self.func( 451 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 452 ), 453 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 454 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 455 exp.CurrentTime: lambda *_: "CURRENT_TIME", 456 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 457 exp.CurrentUser: lambda *_: "CURRENT_USER", 458 exp.DateAdd: _date_delta_sql("DATE_ADD"), 459 exp.DateDiff: lambda self, e: self.func( 460 "DATE_DIFF", unit_to_str(e), e.expression, e.this 461 ), 462 exp.DateStrToDate: datestrtodate_sql, 463 exp.DateToDi: lambda self, 464 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 465 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 466 exp.DayOfWeek: lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)", 467 exp.DayOfWeekIso: rename_func("DAY_OF_WEEK"), 468 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 469 exp.DiToDate: lambda self, 470 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 471 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 472 exp.FileFormatProperty: lambda self, 473 e: f"format={self.sql(exp.Literal.string(e.name))}", 474 exp.First: _first_last_sql, 475 exp.FromTimeZone: lambda self, 476 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 477 exp.GenerateSeries: sequence_sql, 478 exp.GenerateDateArray: sequence_sql, 479 exp.Group: transforms.preprocess([transforms.unalias_group]), 480 exp.If: if_sql(), 481 exp.ILike: no_ilike_sql, 482 exp.Initcap: _initcap_sql, 483 exp.Last: _first_last_sql, 484 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 485 exp.Lateral: _explode_to_unnest_sql, 486 exp.Left: left_to_substring_sql, 487 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 488 rename_func("LEVENSHTEIN_DISTANCE") 489 ), 490 exp.LogicalAnd: rename_func("BOOL_AND"), 491 exp.LogicalOr: rename_func("BOOL_OR"), 492 exp.Pivot: no_pivot_sql, 493 exp.Quantile: _quantile_sql, 494 exp.RegexpExtract: regexp_extract_sql, 495 exp.RegexpExtractAll: regexp_extract_sql, 496 exp.Right: right_to_substring_sql, 497 exp.Schema: _schema_sql, 498 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 499 exp.Select: transforms.preprocess( 500 [ 501 transforms.eliminate_window_clause, 502 transforms.eliminate_qualify, 503 transforms.eliminate_distinct_on, 504 transforms.explode_projection_to_unnest(1), 505 transforms.eliminate_semi_and_anti_joins, 506 amend_exploded_column_table, 507 ] 508 ), 509 exp.Space: space_sql, 510 exp.SortArray: _no_sort_array, 511 exp.StrPosition: lambda self, e: strposition_sql(self, e, supports_occurrence=True), 512 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 513 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 514 exp.StrToTime: _str_to_time_sql, 515 exp.StructExtract: struct_extract_sql, 516 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 517 exp.Timestamp: no_timestamp_sql, 518 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 519 exp.TimestampTrunc: timestamptrunc_sql(), 520 exp.TimeStrToDate: timestrtotime_sql, 521 exp.TimeStrToTime: timestrtotime_sql, 522 exp.TimeStrToUnix: lambda self, e: self.func( 523 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 524 ), 525 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 526 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 527 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 528 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 529 exp.TsOrDiToDi: lambda self, 530 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 531 exp.TsOrDsAdd: _ts_or_ds_add_sql, 532 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 533 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 534 exp.Unhex: rename_func("FROM_HEX"), 535 exp.UnixToStr: lambda self, 536 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 537 exp.UnixToTime: _unix_to_time_sql, 538 exp.UnixToTimeStr: lambda self, 539 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 540 exp.VariancePop: rename_func("VAR_POP"), 541 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 542 exp.WithinGroup: transforms.preprocess( 543 [transforms.remove_within_group_for_percentiles] 544 ), 545 exp.Xor: bool_xor_sql, 546 exp.MD5Digest: rename_func("MD5"), 547 exp.SHA: rename_func("SHA1"), 548 exp.SHA2: sha256_sql, 549 } 550 551 RESERVED_KEYWORDS = { 552 "alter", 553 "and", 554 "as", 555 "between", 556 "by", 557 "case", 558 "cast", 559 "constraint", 560 "create", 561 "cross", 562 "current_time", 563 "current_timestamp", 564 "deallocate", 565 "delete", 566 "describe", 567 "distinct", 568 "drop", 569 "else", 570 "end", 571 "escape", 572 "except", 573 "execute", 574 "exists", 575 "extract", 576 "false", 577 "for", 578 "from", 579 "full", 580 "group", 581 "having", 582 "in", 583 "inner", 584 "insert", 585 "intersect", 586 "into", 587 "is", 588 "join", 589 "left", 590 "like", 591 "natural", 592 "not", 593 "null", 594 "on", 595 "or", 596 "order", 597 "outer", 598 "prepare", 599 "right", 600 "select", 601 "table", 602 "then", 603 "true", 604 "union", 605 "using", 606 "values", 607 "when", 608 "where", 609 "with", 610 } 611 612 def jsonformat_sql(self, expression: exp.JSONFormat) -> str: 613 this = expression.this 614 is_json = expression.args.get("is_json") 615 616 if this and not (is_json or this.type): 617 from sqlglot.optimizer.annotate_types import annotate_types 618 619 this = annotate_types(this, dialect=self.dialect) 620 621 if not (is_json or this.is_type(exp.DataType.Type.JSON)): 622 this.replace(exp.cast(this, exp.DataType.Type.JSON)) 623 624 return self.function_fallback_sql(expression) 625 626 def md5_sql(self, expression: exp.MD5) -> str: 627 this = expression.this 628 629 if not this.type: 630 from sqlglot.optimizer.annotate_types import annotate_types 631 632 this = annotate_types(this, dialect=self.dialect) 633 634 if this.is_type(*exp.DataType.TEXT_TYPES): 635 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 636 637 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 638 639 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 640 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 641 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 642 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 643 # which seems to be using the same time mapping as Hive, as per: 644 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 645 this = expression.this 646 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 647 value_as_timestamp = ( 648 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 649 ) 650 651 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 652 653 formatted_value = self.func( 654 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 655 ) 656 parse_with_tz = self.func( 657 "PARSE_DATETIME", 658 formatted_value, 659 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 660 ) 661 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 662 return self.func("TO_UNIXTIME", coalesced) 663 664 def bracket_sql(self, expression: exp.Bracket) -> str: 665 if expression.args.get("safe"): 666 return self.func( 667 "ELEMENT_AT", 668 expression.this, 669 seq_get( 670 apply_index_offset( 671 expression.this, 672 expression.expressions, 673 1 - expression.args.get("offset", 0), 674 dialect=self.dialect, 675 ), 676 0, 677 ), 678 ) 679 return super().bracket_sql(expression) 680 681 def struct_sql(self, expression: exp.Struct) -> str: 682 from sqlglot.optimizer.annotate_types import annotate_types 683 684 expression = annotate_types(expression, dialect=self.dialect) 685 values: t.List[str] = [] 686 schema: t.List[str] = [] 687 unknown_type = False 688 689 for e in expression.expressions: 690 if isinstance(e, exp.PropertyEQ): 691 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 692 unknown_type = True 693 else: 694 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 695 values.append(self.sql(e, "expression")) 696 else: 697 values.append(self.sql(e)) 698 699 size = len(expression.expressions) 700 701 if not size or len(schema) != size: 702 if unknown_type: 703 self.unsupported( 704 "Cannot convert untyped key-value definitions (try annotate_types)." 705 ) 706 return self.func("ROW", *values) 707 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 708 709 def interval_sql(self, expression: exp.Interval) -> str: 710 if expression.this and expression.text("unit").upper().startswith("WEEK"): 711 return f"({expression.this.name} * INTERVAL '7' DAY)" 712 return super().interval_sql(expression) 713 714 def transaction_sql(self, expression: exp.Transaction) -> str: 715 modes = expression.args.get("modes") 716 modes = f" {', '.join(modes)}" if modes else "" 717 return f"START TRANSACTION{modes}" 718 719 def offset_limit_modifiers( 720 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 721 ) -> t.List[str]: 722 return [ 723 self.sql(expression, "offset"), 724 self.sql(limit), 725 ] 726 727 def create_sql(self, expression: exp.Create) -> str: 728 """ 729 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 730 so we need to remove them 731 """ 732 kind = expression.args["kind"] 733 schema = expression.this 734 if kind == "VIEW" and schema.expressions: 735 expression.this.set("expressions", None) 736 return super().create_sql(expression) 737 738 def delete_sql(self, expression: exp.Delete) -> str: 739 """ 740 Presto only supports DELETE FROM for a single table without an alias, so we need 741 to remove the unnecessary parts. If the original DELETE statement contains more 742 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 743 """ 744 tables = expression.args.get("tables") or [expression.this] 745 if len(tables) > 1: 746 return super().delete_sql(expression) 747 748 table = tables[0] 749 expression.set("this", table) 750 expression.set("tables", None) 751 752 if isinstance(table, exp.Table): 753 table_alias = table.args.get("alias") 754 if table_alias: 755 table_alias.pop() 756 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 757 758 return super().delete_sql(expression) 759 760 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 761 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 762 763 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 764 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 765 if not expression.args.get("variant_extract") or is_json_extract: 766 return self.func( 767 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 768 ) 769 770 this = self.sql(expression, "this") 771 772 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 773 segments = [] 774 for path_key in expression.expression.expressions[1:]: 775 if not isinstance(path_key, exp.JSONPathKey): 776 # Cannot transpile subscripts, wildcards etc to dot notation 777 self.unsupported( 778 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 779 ) 780 continue 781 key = path_key.this 782 if not exp.SAFE_IDENTIFIER_RE.match(key): 783 key = f'"{key}"' 784 segments.append(f".{key}") 785 786 expr = "".join(segments) 787 788 return f"{this}{expr}" 789 790 def groupconcat_sql(self, expression: exp.GroupConcat) -> str: 791 return self.func( 792 "ARRAY_JOIN", 793 self.func("ARRAY_AGG", expression.this), 794 expression.args.get("separator"), 795 )
226def amend_exploded_column_table(expression: exp.Expression) -> exp.Expression: 227 # We check for expression.type because the columns can be amended only if types were inferred 228 if isinstance(expression, exp.Select) and expression.type: 229 for lateral in expression.args.get("laterals") or []: 230 alias = lateral.args.get("alias") 231 if ( 232 not isinstance(lateral.this, exp.Explode) 233 or not isinstance(alias, exp.TableAlias) 234 or len(alias.columns) != 1 235 ): 236 continue 237 238 new_table = alias.this 239 old_table = alias.columns[0].name.lower() 240 241 # When transpiling a LATERAL VIEW EXPLODE Spark query, the exploded fields may be qualified 242 # with the struct column, resulting in invalid Presto references that need to be amended 243 for column in find_all_in_scope(expression, exp.Column): 244 if column.db.lower() == old_table: 245 column.set("table", column.args["db"].pop()) 246 elif column.table.lower() == old_table: 247 column.set("table", new_table.copy()) 248 elif column.name.lower() == old_table and isinstance(column.parent, exp.Dot): 249 column.parent.replace(exp.column(column.parent.expression, table=new_table)) 250 251 return expression
254class Presto(Dialect): 255 INDEX_OFFSET = 1 256 NULL_ORDERING = "nulls_are_last" 257 TIME_FORMAT = MySQL.TIME_FORMAT 258 STRICT_STRING_CONCAT = True 259 SUPPORTS_SEMI_ANTI_JOIN = False 260 TYPED_DIVISION = True 261 TABLESAMPLE_SIZE_IS_PERCENT = True 262 LOG_BASE_FIRST: t.Optional[bool] = None 263 SUPPORTS_VALUES_DEFAULT = False 264 265 TIME_MAPPING = MySQL.TIME_MAPPING 266 267 # https://github.com/trinodb/trino/issues/17 268 # https://github.com/trinodb/trino/issues/12289 269 # https://github.com/prestodb/presto/issues/2863 270 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 271 272 # The result of certain math functions in Presto/Trino is of type 273 # equal to the input type e.g: FLOOR(5.5/2) -> DECIMAL, FLOOR(5/2) -> BIGINT 274 ANNOTATORS = { 275 **Dialect.ANNOTATORS, 276 exp.Floor: lambda self, e: self._annotate_by_args(e, "this"), 277 exp.Ceil: lambda self, e: self._annotate_by_args(e, "this"), 278 exp.Mod: lambda self, e: self._annotate_by_args(e, "this", "expression"), 279 exp.Round: lambda self, e: self._annotate_by_args(e, "this"), 280 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 281 exp.Abs: lambda self, e: self._annotate_by_args(e, "this"), 282 exp.Rand: lambda self, e: self._annotate_by_args(e, "this") 283 if e.this 284 else self._set_type(e, exp.DataType.Type.DOUBLE), 285 } 286 287 SUPPORTED_SETTINGS = { 288 *Dialect.SUPPORTED_SETTINGS, 289 "variant_extract_is_json_extract", 290 } 291 292 class Tokenizer(tokens.Tokenizer): 293 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 294 UNICODE_STRINGS = [ 295 (prefix + q, q) 296 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 297 for prefix in ("U&", "u&") 298 ] 299 300 NESTED_COMMENTS = False 301 302 KEYWORDS = { 303 **tokens.Tokenizer.KEYWORDS, 304 "DEALLOCATE PREPARE": TokenType.COMMAND, 305 "DESCRIBE INPUT": TokenType.COMMAND, 306 "DESCRIBE OUTPUT": TokenType.COMMAND, 307 "RESET SESSION": TokenType.COMMAND, 308 "START": TokenType.BEGIN, 309 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 310 "ROW": TokenType.STRUCT, 311 "IPADDRESS": TokenType.IPADDRESS, 312 "IPPREFIX": TokenType.IPPREFIX, 313 "TDIGEST": TokenType.TDIGEST, 314 "HYPERLOGLOG": TokenType.HLLSKETCH, 315 } 316 KEYWORDS.pop("/*+") 317 KEYWORDS.pop("QUALIFY") 318 319 class Parser(parser.Parser): 320 VALUES_FOLLOWED_BY_PAREN = False 321 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = True 322 323 FUNCTIONS = { 324 **parser.Parser.FUNCTIONS, 325 "ARBITRARY": exp.AnyValue.from_arg_list, 326 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 327 "APPROX_PERCENTILE": _build_approx_percentile, 328 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 329 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 330 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 331 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 332 "CARDINALITY": exp.ArraySize.from_arg_list, 333 "CONTAINS": exp.ArrayContains.from_arg_list, 334 "DATE_ADD": lambda args: exp.DateAdd( 335 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 336 ), 337 "DATE_DIFF": lambda args: exp.DateDiff( 338 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 339 ), 340 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 341 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 342 "DATE_TRUNC": date_trunc_to_time, 343 "DAY_OF_WEEK": exp.DayOfWeekIso.from_arg_list, 344 "DOW": exp.DayOfWeekIso.from_arg_list, 345 "DOY": exp.DayOfYear.from_arg_list, 346 "ELEMENT_AT": lambda args: exp.Bracket( 347 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 348 ), 349 "FROM_HEX": exp.Unhex.from_arg_list, 350 "FROM_UNIXTIME": _build_from_unixtime, 351 "FROM_UTF8": lambda args: exp.Decode( 352 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 353 ), 354 "JSON_FORMAT": lambda args: exp.JSONFormat( 355 this=seq_get(args, 0), options=seq_get(args, 1), is_json=True 356 ), 357 "LEVENSHTEIN_DISTANCE": exp.Levenshtein.from_arg_list, 358 "NOW": exp.CurrentTimestamp.from_arg_list, 359 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 360 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 361 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 362 this=seq_get(args, 0), 363 expression=seq_get(args, 1), 364 replacement=seq_get(args, 2) or exp.Literal.string(""), 365 ), 366 "REPLACE": build_replace_with_optional_replacement, 367 "ROW": exp.Struct.from_arg_list, 368 "SEQUENCE": exp.GenerateSeries.from_arg_list, 369 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 370 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 371 "STRPOS": lambda args: exp.StrPosition( 372 this=seq_get(args, 0), substr=seq_get(args, 1), occurrence=seq_get(args, 2) 373 ), 374 "SLICE": exp.ArraySlice.from_arg_list, 375 "TO_CHAR": _build_to_char, 376 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 377 "TO_UTF8": lambda args: exp.Encode( 378 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 379 ), 380 "MD5": exp.MD5Digest.from_arg_list, 381 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 382 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 383 } 384 385 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 386 FUNCTION_PARSERS.pop("TRIM") 387 388 class Generator(generator.Generator): 389 INTERVAL_ALLOWS_PLURAL_FORM = False 390 JOIN_HINTS = False 391 TABLE_HINTS = False 392 QUERY_HINTS = False 393 IS_BOOL_ALLOWED = False 394 TZ_TO_WITH_TIME_ZONE = True 395 NVL2_SUPPORTED = False 396 STRUCT_DELIMITER = ("(", ")") 397 LIMIT_ONLY_LITERALS = True 398 SUPPORTS_SINGLE_ARG_CONCAT = False 399 LIKE_PROPERTY_INSIDE_SCHEMA = True 400 MULTI_ARG_DISTINCT = False 401 SUPPORTS_TO_NUMBER = False 402 HEX_FUNC = "TO_HEX" 403 PARSE_JSON_NAME = "JSON_PARSE" 404 PAD_FILL_PATTERN_IS_REQUIRED = True 405 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 406 SUPPORTS_MEDIAN = False 407 ARRAY_SIZE_NAME = "CARDINALITY" 408 409 PROPERTIES_LOCATION = { 410 **generator.Generator.PROPERTIES_LOCATION, 411 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 412 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 413 } 414 415 TYPE_MAPPING = { 416 **generator.Generator.TYPE_MAPPING, 417 exp.DataType.Type.BINARY: "VARBINARY", 418 exp.DataType.Type.BIT: "BOOLEAN", 419 exp.DataType.Type.DATETIME: "TIMESTAMP", 420 exp.DataType.Type.DATETIME64: "TIMESTAMP", 421 exp.DataType.Type.FLOAT: "REAL", 422 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 423 exp.DataType.Type.INT: "INTEGER", 424 exp.DataType.Type.STRUCT: "ROW", 425 exp.DataType.Type.TEXT: "VARCHAR", 426 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 427 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 428 exp.DataType.Type.TIMETZ: "TIME", 429 } 430 431 TRANSFORMS = { 432 **generator.Generator.TRANSFORMS, 433 exp.AnyValue: rename_func("ARBITRARY"), 434 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 435 exp.ArgMax: rename_func("MAX_BY"), 436 exp.ArgMin: rename_func("MIN_BY"), 437 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 438 exp.ArrayAny: rename_func("ANY_MATCH"), 439 exp.ArrayConcat: rename_func("CONCAT"), 440 exp.ArrayContains: rename_func("CONTAINS"), 441 exp.ArrayToString: rename_func("ARRAY_JOIN"), 442 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 443 exp.ArraySlice: rename_func("SLICE"), 444 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 445 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 446 exp.BitwiseLeftShift: lambda self, e: self.func( 447 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 448 ), 449 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 450 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 451 exp.BitwiseRightShift: lambda self, e: self.func( 452 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 453 ), 454 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 455 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 456 exp.CurrentTime: lambda *_: "CURRENT_TIME", 457 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 458 exp.CurrentUser: lambda *_: "CURRENT_USER", 459 exp.DateAdd: _date_delta_sql("DATE_ADD"), 460 exp.DateDiff: lambda self, e: self.func( 461 "DATE_DIFF", unit_to_str(e), e.expression, e.this 462 ), 463 exp.DateStrToDate: datestrtodate_sql, 464 exp.DateToDi: lambda self, 465 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 466 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 467 exp.DayOfWeek: lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)", 468 exp.DayOfWeekIso: rename_func("DAY_OF_WEEK"), 469 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 470 exp.DiToDate: lambda self, 471 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 472 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 473 exp.FileFormatProperty: lambda self, 474 e: f"format={self.sql(exp.Literal.string(e.name))}", 475 exp.First: _first_last_sql, 476 exp.FromTimeZone: lambda self, 477 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 478 exp.GenerateSeries: sequence_sql, 479 exp.GenerateDateArray: sequence_sql, 480 exp.Group: transforms.preprocess([transforms.unalias_group]), 481 exp.If: if_sql(), 482 exp.ILike: no_ilike_sql, 483 exp.Initcap: _initcap_sql, 484 exp.Last: _first_last_sql, 485 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 486 exp.Lateral: _explode_to_unnest_sql, 487 exp.Left: left_to_substring_sql, 488 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 489 rename_func("LEVENSHTEIN_DISTANCE") 490 ), 491 exp.LogicalAnd: rename_func("BOOL_AND"), 492 exp.LogicalOr: rename_func("BOOL_OR"), 493 exp.Pivot: no_pivot_sql, 494 exp.Quantile: _quantile_sql, 495 exp.RegexpExtract: regexp_extract_sql, 496 exp.RegexpExtractAll: regexp_extract_sql, 497 exp.Right: right_to_substring_sql, 498 exp.Schema: _schema_sql, 499 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 500 exp.Select: transforms.preprocess( 501 [ 502 transforms.eliminate_window_clause, 503 transforms.eliminate_qualify, 504 transforms.eliminate_distinct_on, 505 transforms.explode_projection_to_unnest(1), 506 transforms.eliminate_semi_and_anti_joins, 507 amend_exploded_column_table, 508 ] 509 ), 510 exp.Space: space_sql, 511 exp.SortArray: _no_sort_array, 512 exp.StrPosition: lambda self, e: strposition_sql(self, e, supports_occurrence=True), 513 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 514 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 515 exp.StrToTime: _str_to_time_sql, 516 exp.StructExtract: struct_extract_sql, 517 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 518 exp.Timestamp: no_timestamp_sql, 519 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 520 exp.TimestampTrunc: timestamptrunc_sql(), 521 exp.TimeStrToDate: timestrtotime_sql, 522 exp.TimeStrToTime: timestrtotime_sql, 523 exp.TimeStrToUnix: lambda self, e: self.func( 524 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 525 ), 526 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 527 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 528 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 529 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 530 exp.TsOrDiToDi: lambda self, 531 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 532 exp.TsOrDsAdd: _ts_or_ds_add_sql, 533 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 534 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 535 exp.Unhex: rename_func("FROM_HEX"), 536 exp.UnixToStr: lambda self, 537 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 538 exp.UnixToTime: _unix_to_time_sql, 539 exp.UnixToTimeStr: lambda self, 540 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 541 exp.VariancePop: rename_func("VAR_POP"), 542 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 543 exp.WithinGroup: transforms.preprocess( 544 [transforms.remove_within_group_for_percentiles] 545 ), 546 exp.Xor: bool_xor_sql, 547 exp.MD5Digest: rename_func("MD5"), 548 exp.SHA: rename_func("SHA1"), 549 exp.SHA2: sha256_sql, 550 } 551 552 RESERVED_KEYWORDS = { 553 "alter", 554 "and", 555 "as", 556 "between", 557 "by", 558 "case", 559 "cast", 560 "constraint", 561 "create", 562 "cross", 563 "current_time", 564 "current_timestamp", 565 "deallocate", 566 "delete", 567 "describe", 568 "distinct", 569 "drop", 570 "else", 571 "end", 572 "escape", 573 "except", 574 "execute", 575 "exists", 576 "extract", 577 "false", 578 "for", 579 "from", 580 "full", 581 "group", 582 "having", 583 "in", 584 "inner", 585 "insert", 586 "intersect", 587 "into", 588 "is", 589 "join", 590 "left", 591 "like", 592 "natural", 593 "not", 594 "null", 595 "on", 596 "or", 597 "order", 598 "outer", 599 "prepare", 600 "right", 601 "select", 602 "table", 603 "then", 604 "true", 605 "union", 606 "using", 607 "values", 608 "when", 609 "where", 610 "with", 611 } 612 613 def jsonformat_sql(self, expression: exp.JSONFormat) -> str: 614 this = expression.this 615 is_json = expression.args.get("is_json") 616 617 if this and not (is_json or this.type): 618 from sqlglot.optimizer.annotate_types import annotate_types 619 620 this = annotate_types(this, dialect=self.dialect) 621 622 if not (is_json or this.is_type(exp.DataType.Type.JSON)): 623 this.replace(exp.cast(this, exp.DataType.Type.JSON)) 624 625 return self.function_fallback_sql(expression) 626 627 def md5_sql(self, expression: exp.MD5) -> str: 628 this = expression.this 629 630 if not this.type: 631 from sqlglot.optimizer.annotate_types import annotate_types 632 633 this = annotate_types(this, dialect=self.dialect) 634 635 if this.is_type(*exp.DataType.TEXT_TYPES): 636 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 637 638 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 639 640 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 641 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 642 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 643 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 644 # which seems to be using the same time mapping as Hive, as per: 645 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 646 this = expression.this 647 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 648 value_as_timestamp = ( 649 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 650 ) 651 652 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 653 654 formatted_value = self.func( 655 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 656 ) 657 parse_with_tz = self.func( 658 "PARSE_DATETIME", 659 formatted_value, 660 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 661 ) 662 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 663 return self.func("TO_UNIXTIME", coalesced) 664 665 def bracket_sql(self, expression: exp.Bracket) -> str: 666 if expression.args.get("safe"): 667 return self.func( 668 "ELEMENT_AT", 669 expression.this, 670 seq_get( 671 apply_index_offset( 672 expression.this, 673 expression.expressions, 674 1 - expression.args.get("offset", 0), 675 dialect=self.dialect, 676 ), 677 0, 678 ), 679 ) 680 return super().bracket_sql(expression) 681 682 def struct_sql(self, expression: exp.Struct) -> str: 683 from sqlglot.optimizer.annotate_types import annotate_types 684 685 expression = annotate_types(expression, dialect=self.dialect) 686 values: t.List[str] = [] 687 schema: t.List[str] = [] 688 unknown_type = False 689 690 for e in expression.expressions: 691 if isinstance(e, exp.PropertyEQ): 692 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 693 unknown_type = True 694 else: 695 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 696 values.append(self.sql(e, "expression")) 697 else: 698 values.append(self.sql(e)) 699 700 size = len(expression.expressions) 701 702 if not size or len(schema) != size: 703 if unknown_type: 704 self.unsupported( 705 "Cannot convert untyped key-value definitions (try annotate_types)." 706 ) 707 return self.func("ROW", *values) 708 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 709 710 def interval_sql(self, expression: exp.Interval) -> str: 711 if expression.this and expression.text("unit").upper().startswith("WEEK"): 712 return f"({expression.this.name} * INTERVAL '7' DAY)" 713 return super().interval_sql(expression) 714 715 def transaction_sql(self, expression: exp.Transaction) -> str: 716 modes = expression.args.get("modes") 717 modes = f" {', '.join(modes)}" if modes else "" 718 return f"START TRANSACTION{modes}" 719 720 def offset_limit_modifiers( 721 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 722 ) -> t.List[str]: 723 return [ 724 self.sql(expression, "offset"), 725 self.sql(limit), 726 ] 727 728 def create_sql(self, expression: exp.Create) -> str: 729 """ 730 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 731 so we need to remove them 732 """ 733 kind = expression.args["kind"] 734 schema = expression.this 735 if kind == "VIEW" and schema.expressions: 736 expression.this.set("expressions", None) 737 return super().create_sql(expression) 738 739 def delete_sql(self, expression: exp.Delete) -> str: 740 """ 741 Presto only supports DELETE FROM for a single table without an alias, so we need 742 to remove the unnecessary parts. If the original DELETE statement contains more 743 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 744 """ 745 tables = expression.args.get("tables") or [expression.this] 746 if len(tables) > 1: 747 return super().delete_sql(expression) 748 749 table = tables[0] 750 expression.set("this", table) 751 expression.set("tables", None) 752 753 if isinstance(table, exp.Table): 754 table_alias = table.args.get("alias") 755 if table_alias: 756 table_alias.pop() 757 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 758 759 return super().delete_sql(expression) 760 761 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 762 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 763 764 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 765 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 766 if not expression.args.get("variant_extract") or is_json_extract: 767 return self.func( 768 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 769 ) 770 771 this = self.sql(expression, "this") 772 773 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 774 segments = [] 775 for path_key in expression.expression.expressions[1:]: 776 if not isinstance(path_key, exp.JSONPathKey): 777 # Cannot transpile subscripts, wildcards etc to dot notation 778 self.unsupported( 779 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 780 ) 781 continue 782 key = path_key.this 783 if not exp.SAFE_IDENTIFIER_RE.match(key): 784 key = f'"{key}"' 785 segments.append(f".{key}") 786 787 expr = "".join(segments) 788 789 return f"{this}{expr}" 790 791 def groupconcat_sql(self, expression: exp.GroupConcat) -> str: 792 return self.func( 793 "ARRAY_JOIN", 794 self.func("ARRAY_AGG", expression.this), 795 expression.args.get("separator"), 796 )
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the behavior of a / b
depends on the types of a
and b
.
False means a / b
is always float division.
True means a / b
is integer division if both a
and b
are integers.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Associates this dialect's time formats with their equivalent Python strftime
formats.
Specifies the strategy according to which identifiers should be normalized.
292 class Tokenizer(tokens.Tokenizer): 293 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 294 UNICODE_STRINGS = [ 295 (prefix + q, q) 296 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 297 for prefix in ("U&", "u&") 298 ] 299 300 NESTED_COMMENTS = False 301 302 KEYWORDS = { 303 **tokens.Tokenizer.KEYWORDS, 304 "DEALLOCATE PREPARE": TokenType.COMMAND, 305 "DESCRIBE INPUT": TokenType.COMMAND, 306 "DESCRIBE OUTPUT": TokenType.COMMAND, 307 "RESET SESSION": TokenType.COMMAND, 308 "START": TokenType.BEGIN, 309 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 310 "ROW": TokenType.STRUCT, 311 "IPADDRESS": TokenType.IPADDRESS, 312 "IPPREFIX": TokenType.IPPREFIX, 313 "TDIGEST": TokenType.TDIGEST, 314 "HYPERLOGLOG": TokenType.HLLSKETCH, 315 } 316 KEYWORDS.pop("/*+") 317 KEYWORDS.pop("QUALIFY")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- BYTE_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- IDENTIFIERS
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
319 class Parser(parser.Parser): 320 VALUES_FOLLOWED_BY_PAREN = False 321 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = True 322 323 FUNCTIONS = { 324 **parser.Parser.FUNCTIONS, 325 "ARBITRARY": exp.AnyValue.from_arg_list, 326 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 327 "APPROX_PERCENTILE": _build_approx_percentile, 328 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 329 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 330 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 331 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 332 "CARDINALITY": exp.ArraySize.from_arg_list, 333 "CONTAINS": exp.ArrayContains.from_arg_list, 334 "DATE_ADD": lambda args: exp.DateAdd( 335 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 336 ), 337 "DATE_DIFF": lambda args: exp.DateDiff( 338 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 339 ), 340 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 341 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 342 "DATE_TRUNC": date_trunc_to_time, 343 "DAY_OF_WEEK": exp.DayOfWeekIso.from_arg_list, 344 "DOW": exp.DayOfWeekIso.from_arg_list, 345 "DOY": exp.DayOfYear.from_arg_list, 346 "ELEMENT_AT": lambda args: exp.Bracket( 347 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 348 ), 349 "FROM_HEX": exp.Unhex.from_arg_list, 350 "FROM_UNIXTIME": _build_from_unixtime, 351 "FROM_UTF8": lambda args: exp.Decode( 352 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 353 ), 354 "JSON_FORMAT": lambda args: exp.JSONFormat( 355 this=seq_get(args, 0), options=seq_get(args, 1), is_json=True 356 ), 357 "LEVENSHTEIN_DISTANCE": exp.Levenshtein.from_arg_list, 358 "NOW": exp.CurrentTimestamp.from_arg_list, 359 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 360 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 361 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 362 this=seq_get(args, 0), 363 expression=seq_get(args, 1), 364 replacement=seq_get(args, 2) or exp.Literal.string(""), 365 ), 366 "REPLACE": build_replace_with_optional_replacement, 367 "ROW": exp.Struct.from_arg_list, 368 "SEQUENCE": exp.GenerateSeries.from_arg_list, 369 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 370 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 371 "STRPOS": lambda args: exp.StrPosition( 372 this=seq_get(args, 0), substr=seq_get(args, 1), occurrence=seq_get(args, 2) 373 ), 374 "SLICE": exp.ArraySlice.from_arg_list, 375 "TO_CHAR": _build_to_char, 376 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 377 "TO_UTF8": lambda args: exp.Encode( 378 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 379 ), 380 "MD5": exp.MD5Digest.from_arg_list, 381 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 382 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 383 } 384 385 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 386 FUNCTION_PARSERS.pop("TRIM")
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- JOINS_HAVE_EQUAL_PRECEDENCE
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- errors
- sql
388 class Generator(generator.Generator): 389 INTERVAL_ALLOWS_PLURAL_FORM = False 390 JOIN_HINTS = False 391 TABLE_HINTS = False 392 QUERY_HINTS = False 393 IS_BOOL_ALLOWED = False 394 TZ_TO_WITH_TIME_ZONE = True 395 NVL2_SUPPORTED = False 396 STRUCT_DELIMITER = ("(", ")") 397 LIMIT_ONLY_LITERALS = True 398 SUPPORTS_SINGLE_ARG_CONCAT = False 399 LIKE_PROPERTY_INSIDE_SCHEMA = True 400 MULTI_ARG_DISTINCT = False 401 SUPPORTS_TO_NUMBER = False 402 HEX_FUNC = "TO_HEX" 403 PARSE_JSON_NAME = "JSON_PARSE" 404 PAD_FILL_PATTERN_IS_REQUIRED = True 405 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 406 SUPPORTS_MEDIAN = False 407 ARRAY_SIZE_NAME = "CARDINALITY" 408 409 PROPERTIES_LOCATION = { 410 **generator.Generator.PROPERTIES_LOCATION, 411 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 412 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 413 } 414 415 TYPE_MAPPING = { 416 **generator.Generator.TYPE_MAPPING, 417 exp.DataType.Type.BINARY: "VARBINARY", 418 exp.DataType.Type.BIT: "BOOLEAN", 419 exp.DataType.Type.DATETIME: "TIMESTAMP", 420 exp.DataType.Type.DATETIME64: "TIMESTAMP", 421 exp.DataType.Type.FLOAT: "REAL", 422 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 423 exp.DataType.Type.INT: "INTEGER", 424 exp.DataType.Type.STRUCT: "ROW", 425 exp.DataType.Type.TEXT: "VARCHAR", 426 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 427 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 428 exp.DataType.Type.TIMETZ: "TIME", 429 } 430 431 TRANSFORMS = { 432 **generator.Generator.TRANSFORMS, 433 exp.AnyValue: rename_func("ARBITRARY"), 434 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 435 exp.ArgMax: rename_func("MAX_BY"), 436 exp.ArgMin: rename_func("MIN_BY"), 437 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 438 exp.ArrayAny: rename_func("ANY_MATCH"), 439 exp.ArrayConcat: rename_func("CONCAT"), 440 exp.ArrayContains: rename_func("CONTAINS"), 441 exp.ArrayToString: rename_func("ARRAY_JOIN"), 442 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 443 exp.ArraySlice: rename_func("SLICE"), 444 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 445 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 446 exp.BitwiseLeftShift: lambda self, e: self.func( 447 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 448 ), 449 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 450 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 451 exp.BitwiseRightShift: lambda self, e: self.func( 452 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 453 ), 454 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 455 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 456 exp.CurrentTime: lambda *_: "CURRENT_TIME", 457 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 458 exp.CurrentUser: lambda *_: "CURRENT_USER", 459 exp.DateAdd: _date_delta_sql("DATE_ADD"), 460 exp.DateDiff: lambda self, e: self.func( 461 "DATE_DIFF", unit_to_str(e), e.expression, e.this 462 ), 463 exp.DateStrToDate: datestrtodate_sql, 464 exp.DateToDi: lambda self, 465 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 466 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 467 exp.DayOfWeek: lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)", 468 exp.DayOfWeekIso: rename_func("DAY_OF_WEEK"), 469 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 470 exp.DiToDate: lambda self, 471 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 472 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 473 exp.FileFormatProperty: lambda self, 474 e: f"format={self.sql(exp.Literal.string(e.name))}", 475 exp.First: _first_last_sql, 476 exp.FromTimeZone: lambda self, 477 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 478 exp.GenerateSeries: sequence_sql, 479 exp.GenerateDateArray: sequence_sql, 480 exp.Group: transforms.preprocess([transforms.unalias_group]), 481 exp.If: if_sql(), 482 exp.ILike: no_ilike_sql, 483 exp.Initcap: _initcap_sql, 484 exp.Last: _first_last_sql, 485 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 486 exp.Lateral: _explode_to_unnest_sql, 487 exp.Left: left_to_substring_sql, 488 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 489 rename_func("LEVENSHTEIN_DISTANCE") 490 ), 491 exp.LogicalAnd: rename_func("BOOL_AND"), 492 exp.LogicalOr: rename_func("BOOL_OR"), 493 exp.Pivot: no_pivot_sql, 494 exp.Quantile: _quantile_sql, 495 exp.RegexpExtract: regexp_extract_sql, 496 exp.RegexpExtractAll: regexp_extract_sql, 497 exp.Right: right_to_substring_sql, 498 exp.Schema: _schema_sql, 499 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 500 exp.Select: transforms.preprocess( 501 [ 502 transforms.eliminate_window_clause, 503 transforms.eliminate_qualify, 504 transforms.eliminate_distinct_on, 505 transforms.explode_projection_to_unnest(1), 506 transforms.eliminate_semi_and_anti_joins, 507 amend_exploded_column_table, 508 ] 509 ), 510 exp.Space: space_sql, 511 exp.SortArray: _no_sort_array, 512 exp.StrPosition: lambda self, e: strposition_sql(self, e, supports_occurrence=True), 513 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 514 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 515 exp.StrToTime: _str_to_time_sql, 516 exp.StructExtract: struct_extract_sql, 517 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 518 exp.Timestamp: no_timestamp_sql, 519 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 520 exp.TimestampTrunc: timestamptrunc_sql(), 521 exp.TimeStrToDate: timestrtotime_sql, 522 exp.TimeStrToTime: timestrtotime_sql, 523 exp.TimeStrToUnix: lambda self, e: self.func( 524 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 525 ), 526 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 527 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 528 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 529 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 530 exp.TsOrDiToDi: lambda self, 531 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 532 exp.TsOrDsAdd: _ts_or_ds_add_sql, 533 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 534 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 535 exp.Unhex: rename_func("FROM_HEX"), 536 exp.UnixToStr: lambda self, 537 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 538 exp.UnixToTime: _unix_to_time_sql, 539 exp.UnixToTimeStr: lambda self, 540 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 541 exp.VariancePop: rename_func("VAR_POP"), 542 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 543 exp.WithinGroup: transforms.preprocess( 544 [transforms.remove_within_group_for_percentiles] 545 ), 546 exp.Xor: bool_xor_sql, 547 exp.MD5Digest: rename_func("MD5"), 548 exp.SHA: rename_func("SHA1"), 549 exp.SHA2: sha256_sql, 550 } 551 552 RESERVED_KEYWORDS = { 553 "alter", 554 "and", 555 "as", 556 "between", 557 "by", 558 "case", 559 "cast", 560 "constraint", 561 "create", 562 "cross", 563 "current_time", 564 "current_timestamp", 565 "deallocate", 566 "delete", 567 "describe", 568 "distinct", 569 "drop", 570 "else", 571 "end", 572 "escape", 573 "except", 574 "execute", 575 "exists", 576 "extract", 577 "false", 578 "for", 579 "from", 580 "full", 581 "group", 582 "having", 583 "in", 584 "inner", 585 "insert", 586 "intersect", 587 "into", 588 "is", 589 "join", 590 "left", 591 "like", 592 "natural", 593 "not", 594 "null", 595 "on", 596 "or", 597 "order", 598 "outer", 599 "prepare", 600 "right", 601 "select", 602 "table", 603 "then", 604 "true", 605 "union", 606 "using", 607 "values", 608 "when", 609 "where", 610 "with", 611 } 612 613 def jsonformat_sql(self, expression: exp.JSONFormat) -> str: 614 this = expression.this 615 is_json = expression.args.get("is_json") 616 617 if this and not (is_json or this.type): 618 from sqlglot.optimizer.annotate_types import annotate_types 619 620 this = annotate_types(this, dialect=self.dialect) 621 622 if not (is_json or this.is_type(exp.DataType.Type.JSON)): 623 this.replace(exp.cast(this, exp.DataType.Type.JSON)) 624 625 return self.function_fallback_sql(expression) 626 627 def md5_sql(self, expression: exp.MD5) -> str: 628 this = expression.this 629 630 if not this.type: 631 from sqlglot.optimizer.annotate_types import annotate_types 632 633 this = annotate_types(this, dialect=self.dialect) 634 635 if this.is_type(*exp.DataType.TEXT_TYPES): 636 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 637 638 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 639 640 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 641 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 642 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 643 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 644 # which seems to be using the same time mapping as Hive, as per: 645 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 646 this = expression.this 647 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 648 value_as_timestamp = ( 649 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 650 ) 651 652 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 653 654 formatted_value = self.func( 655 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 656 ) 657 parse_with_tz = self.func( 658 "PARSE_DATETIME", 659 formatted_value, 660 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 661 ) 662 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 663 return self.func("TO_UNIXTIME", coalesced) 664 665 def bracket_sql(self, expression: exp.Bracket) -> str: 666 if expression.args.get("safe"): 667 return self.func( 668 "ELEMENT_AT", 669 expression.this, 670 seq_get( 671 apply_index_offset( 672 expression.this, 673 expression.expressions, 674 1 - expression.args.get("offset", 0), 675 dialect=self.dialect, 676 ), 677 0, 678 ), 679 ) 680 return super().bracket_sql(expression) 681 682 def struct_sql(self, expression: exp.Struct) -> str: 683 from sqlglot.optimizer.annotate_types import annotate_types 684 685 expression = annotate_types(expression, dialect=self.dialect) 686 values: t.List[str] = [] 687 schema: t.List[str] = [] 688 unknown_type = False 689 690 for e in expression.expressions: 691 if isinstance(e, exp.PropertyEQ): 692 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 693 unknown_type = True 694 else: 695 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 696 values.append(self.sql(e, "expression")) 697 else: 698 values.append(self.sql(e)) 699 700 size = len(expression.expressions) 701 702 if not size or len(schema) != size: 703 if unknown_type: 704 self.unsupported( 705 "Cannot convert untyped key-value definitions (try annotate_types)." 706 ) 707 return self.func("ROW", *values) 708 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 709 710 def interval_sql(self, expression: exp.Interval) -> str: 711 if expression.this and expression.text("unit").upper().startswith("WEEK"): 712 return f"({expression.this.name} * INTERVAL '7' DAY)" 713 return super().interval_sql(expression) 714 715 def transaction_sql(self, expression: exp.Transaction) -> str: 716 modes = expression.args.get("modes") 717 modes = f" {', '.join(modes)}" if modes else "" 718 return f"START TRANSACTION{modes}" 719 720 def offset_limit_modifiers( 721 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 722 ) -> t.List[str]: 723 return [ 724 self.sql(expression, "offset"), 725 self.sql(limit), 726 ] 727 728 def create_sql(self, expression: exp.Create) -> str: 729 """ 730 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 731 so we need to remove them 732 """ 733 kind = expression.args["kind"] 734 schema = expression.this 735 if kind == "VIEW" and schema.expressions: 736 expression.this.set("expressions", None) 737 return super().create_sql(expression) 738 739 def delete_sql(self, expression: exp.Delete) -> str: 740 """ 741 Presto only supports DELETE FROM for a single table without an alias, so we need 742 to remove the unnecessary parts. If the original DELETE statement contains more 743 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 744 """ 745 tables = expression.args.get("tables") or [expression.this] 746 if len(tables) > 1: 747 return super().delete_sql(expression) 748 749 table = tables[0] 750 expression.set("this", table) 751 expression.set("tables", None) 752 753 if isinstance(table, exp.Table): 754 table_alias = table.args.get("alias") 755 if table_alias: 756 table_alias.pop() 757 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 758 759 return super().delete_sql(expression) 760 761 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 762 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 763 764 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 765 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 766 if not expression.args.get("variant_extract") or is_json_extract: 767 return self.func( 768 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 769 ) 770 771 this = self.sql(expression, "this") 772 773 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 774 segments = [] 775 for path_key in expression.expression.expressions[1:]: 776 if not isinstance(path_key, exp.JSONPathKey): 777 # Cannot transpile subscripts, wildcards etc to dot notation 778 self.unsupported( 779 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 780 ) 781 continue 782 key = path_key.this 783 if not exp.SAFE_IDENTIFIER_RE.match(key): 784 key = f'"{key}"' 785 segments.append(f".{key}") 786 787 expr = "".join(segments) 788 789 return f"{this}{expr}" 790 791 def groupconcat_sql(self, expression: exp.GroupConcat) -> str: 792 return self.func( 793 "ARRAY_JOIN", 794 self.func("ARRAY_AGG", expression.this), 795 expression.args.get("separator"), 796 )
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
613 def jsonformat_sql(self, expression: exp.JSONFormat) -> str: 614 this = expression.this 615 is_json = expression.args.get("is_json") 616 617 if this and not (is_json or this.type): 618 from sqlglot.optimizer.annotate_types import annotate_types 619 620 this = annotate_types(this, dialect=self.dialect) 621 622 if not (is_json or this.is_type(exp.DataType.Type.JSON)): 623 this.replace(exp.cast(this, exp.DataType.Type.JSON)) 624 625 return self.function_fallback_sql(expression)
627 def md5_sql(self, expression: exp.MD5) -> str: 628 this = expression.this 629 630 if not this.type: 631 from sqlglot.optimizer.annotate_types import annotate_types 632 633 this = annotate_types(this, dialect=self.dialect) 634 635 if this.is_type(*exp.DataType.TEXT_TYPES): 636 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 637 638 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this))))
640 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 641 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 642 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 643 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 644 # which seems to be using the same time mapping as Hive, as per: 645 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 646 this = expression.this 647 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 648 value_as_timestamp = ( 649 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 650 ) 651 652 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 653 654 formatted_value = self.func( 655 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 656 ) 657 parse_with_tz = self.func( 658 "PARSE_DATETIME", 659 formatted_value, 660 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 661 ) 662 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 663 return self.func("TO_UNIXTIME", coalesced)
665 def bracket_sql(self, expression: exp.Bracket) -> str: 666 if expression.args.get("safe"): 667 return self.func( 668 "ELEMENT_AT", 669 expression.this, 670 seq_get( 671 apply_index_offset( 672 expression.this, 673 expression.expressions, 674 1 - expression.args.get("offset", 0), 675 dialect=self.dialect, 676 ), 677 0, 678 ), 679 ) 680 return super().bracket_sql(expression)
682 def struct_sql(self, expression: exp.Struct) -> str: 683 from sqlglot.optimizer.annotate_types import annotate_types 684 685 expression = annotate_types(expression, dialect=self.dialect) 686 values: t.List[str] = [] 687 schema: t.List[str] = [] 688 unknown_type = False 689 690 for e in expression.expressions: 691 if isinstance(e, exp.PropertyEQ): 692 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 693 unknown_type = True 694 else: 695 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 696 values.append(self.sql(e, "expression")) 697 else: 698 values.append(self.sql(e)) 699 700 size = len(expression.expressions) 701 702 if not size or len(schema) != size: 703 if unknown_type: 704 self.unsupported( 705 "Cannot convert untyped key-value definitions (try annotate_types)." 706 ) 707 return self.func("ROW", *values) 708 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))"
728 def create_sql(self, expression: exp.Create) -> str: 729 """ 730 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 731 so we need to remove them 732 """ 733 kind = expression.args["kind"] 734 schema = expression.this 735 if kind == "VIEW" and schema.expressions: 736 expression.this.set("expressions", None) 737 return super().create_sql(expression)
Presto doesn't support CREATE VIEW with expressions (ex: CREATE VIEW x (cola)
then (cola)
is the expression),
so we need to remove them
739 def delete_sql(self, expression: exp.Delete) -> str: 740 """ 741 Presto only supports DELETE FROM for a single table without an alias, so we need 742 to remove the unnecessary parts. If the original DELETE statement contains more 743 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 744 """ 745 tables = expression.args.get("tables") or [expression.this] 746 if len(tables) > 1: 747 return super().delete_sql(expression) 748 749 table = tables[0] 750 expression.set("this", table) 751 expression.set("tables", None) 752 753 if isinstance(table, exp.Table): 754 table_alias = table.args.get("alias") 755 if table_alias: 756 table_alias.pop() 757 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 758 759 return super().delete_sql(expression)
Presto only supports DELETE FROM for a single table without an alias, so we need to remove the unnecessary parts. If the original DELETE statement contains more than one table to be deleted, we can't safely map it 1-1 to a Presto statement.
761 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 762 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 763 764 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 765 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 766 if not expression.args.get("variant_extract") or is_json_extract: 767 return self.func( 768 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 769 ) 770 771 this = self.sql(expression, "this") 772 773 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 774 segments = [] 775 for path_key in expression.expression.expressions[1:]: 776 if not isinstance(path_key, exp.JSONPathKey): 777 # Cannot transpile subscripts, wildcards etc to dot notation 778 self.unsupported( 779 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 780 ) 781 continue 782 key = path_key.this 783 if not exp.SAFE_IDENTIFIER_RE.match(key): 784 key = f'"{key}"' 785 segments.append(f".{key}") 786 787 expr = "".join(segments) 788 789 return f"{this}{expr}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SUPPORTED_JSON_PATH_PARTS
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_WINDOW_EXCLUDE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- TRY_SUPPORTED
- SUPPORTS_UESCAPE
- STAR_EXCEPT
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- NORMALIZE_EXTRACT_DATE_PARTS
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- get_put_sql
- translatecharacters_sql