sqlglot.dialects.presto
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 bool_xor_sql, 11 build_replace_with_optional_replacement, 12 date_trunc_to_time, 13 datestrtodate_sql, 14 encode_decode_sql, 15 build_formatted_time, 16 if_sql, 17 left_to_substring_sql, 18 no_ilike_sql, 19 no_pivot_sql, 20 no_timestamp_sql, 21 regexp_extract_sql, 22 rename_func, 23 right_to_substring_sql, 24 sha256_sql, 25 strposition_sql, 26 struct_extract_sql, 27 timestamptrunc_sql, 28 timestrtotime_sql, 29 ts_or_ds_add_cast, 30 unit_to_str, 31 sequence_sql, 32 build_regexp_extract, 33 explode_to_unnest_sql, 34) 35from sqlglot.dialects.hive import Hive 36from sqlglot.dialects.mysql import MySQL 37from sqlglot.helper import apply_index_offset, seq_get 38from sqlglot.optimizer.scope import find_all_in_scope 39from sqlglot.tokens import TokenType 40from sqlglot.transforms import unqualify_columns 41from sqlglot.generator import unsupported_args 42 43DATE_ADD_OR_SUB = t.Union[exp.DateAdd, exp.TimestampAdd, exp.DateSub] 44 45 46def _initcap_sql(self: Presto.Generator, expression: exp.Initcap) -> str: 47 regex = r"(\w)(\w*)" 48 return f"REGEXP_REPLACE({self.sql(expression, 'this')}, '{regex}', x -> UPPER(x[1]) || LOWER(x[2]))" 49 50 51def _no_sort_array(self: Presto.Generator, expression: exp.SortArray) -> str: 52 if expression.args.get("asc") == exp.false(): 53 comparator = "(a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END" 54 else: 55 comparator = None 56 return self.func("ARRAY_SORT", expression.this, comparator) 57 58 59def _schema_sql(self: Presto.Generator, expression: exp.Schema) -> str: 60 if isinstance(expression.parent, exp.PartitionedByProperty): 61 # Any columns in the ARRAY[] string literals should not be quoted 62 expression.transform(lambda n: n.name if isinstance(n, exp.Identifier) else n, copy=False) 63 64 partition_exprs = [ 65 self.sql(c) if isinstance(c, (exp.Func, exp.Property)) else self.sql(c, "this") 66 for c in expression.expressions 67 ] 68 return self.sql(exp.Array(expressions=[exp.Literal.string(c) for c in partition_exprs])) 69 70 if expression.parent: 71 for schema in expression.parent.find_all(exp.Schema): 72 if schema is expression: 73 continue 74 75 column_defs = schema.find_all(exp.ColumnDef) 76 if column_defs and isinstance(schema.parent, exp.Property): 77 expression.expressions.extend(column_defs) 78 79 return self.schema_sql(expression) 80 81 82def _quantile_sql(self: Presto.Generator, expression: exp.Quantile) -> str: 83 self.unsupported("Presto does not support exact quantiles") 84 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 85 86 87def _str_to_time_sql( 88 self: Presto.Generator, expression: exp.StrToDate | exp.StrToTime | exp.TsOrDsToDate 89) -> str: 90 return self.func("DATE_PARSE", expression.this, self.format_time(expression)) 91 92 93def _ts_or_ds_to_date_sql(self: Presto.Generator, expression: exp.TsOrDsToDate) -> str: 94 time_format = self.format_time(expression) 95 if time_format and time_format not in (Presto.TIME_FORMAT, Presto.DATE_FORMAT): 96 return self.sql(exp.cast(_str_to_time_sql(self, expression), exp.DataType.Type.DATE)) 97 return self.sql( 98 exp.cast(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP), exp.DataType.Type.DATE) 99 ) 100 101 102def _ts_or_ds_add_sql(self: Presto.Generator, expression: exp.TsOrDsAdd) -> str: 103 expression = ts_or_ds_add_cast(expression) 104 unit = unit_to_str(expression) 105 return self.func("DATE_ADD", unit, expression.expression, expression.this) 106 107 108def _ts_or_ds_diff_sql(self: Presto.Generator, expression: exp.TsOrDsDiff) -> str: 109 this = exp.cast(expression.this, exp.DataType.Type.TIMESTAMP) 110 expr = exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP) 111 unit = unit_to_str(expression) 112 return self.func("DATE_DIFF", unit, expr, this) 113 114 115def _build_approx_percentile(args: t.List) -> exp.Expression: 116 if len(args) == 4: 117 return exp.ApproxQuantile( 118 this=seq_get(args, 0), 119 weight=seq_get(args, 1), 120 quantile=seq_get(args, 2), 121 accuracy=seq_get(args, 3), 122 ) 123 if len(args) == 3: 124 return exp.ApproxQuantile( 125 this=seq_get(args, 0), quantile=seq_get(args, 1), accuracy=seq_get(args, 2) 126 ) 127 return exp.ApproxQuantile.from_arg_list(args) 128 129 130def _build_from_unixtime(args: t.List) -> exp.Expression: 131 if len(args) == 3: 132 return exp.UnixToTime( 133 this=seq_get(args, 0), 134 hours=seq_get(args, 1), 135 minutes=seq_get(args, 2), 136 ) 137 if len(args) == 2: 138 return exp.UnixToTime(this=seq_get(args, 0), zone=seq_get(args, 1)) 139 140 return exp.UnixToTime.from_arg_list(args) 141 142 143def _first_last_sql(self: Presto.Generator, expression: exp.Func) -> str: 144 """ 145 Trino doesn't support FIRST / LAST as functions, but they're valid in the context 146 of MATCH_RECOGNIZE, so we need to preserve them in that case. In all other cases 147 they're converted into an ARBITRARY call. 148 149 Reference: https://trino.io/docs/current/sql/match-recognize.html#logical-navigation-functions 150 """ 151 if isinstance(expression.find_ancestor(exp.MatchRecognize, exp.Select), exp.MatchRecognize): 152 return self.function_fallback_sql(expression) 153 154 return rename_func("ARBITRARY")(self, expression) 155 156 157def _unix_to_time_sql(self: Presto.Generator, expression: exp.UnixToTime) -> str: 158 scale = expression.args.get("scale") 159 timestamp = self.sql(expression, "this") 160 if scale in (None, exp.UnixToTime.SECONDS): 161 return rename_func("FROM_UNIXTIME")(self, expression) 162 163 return f"FROM_UNIXTIME(CAST({timestamp} AS DOUBLE) / POW(10, {scale}))" 164 165 166def _to_int(self: Presto.Generator, expression: exp.Expression) -> exp.Expression: 167 if not expression.type: 168 from sqlglot.optimizer.annotate_types import annotate_types 169 170 annotate_types(expression, dialect=self.dialect) 171 if expression.type and expression.type.this not in exp.DataType.INTEGER_TYPES: 172 return exp.cast(expression, to=exp.DataType.Type.BIGINT) 173 return expression 174 175 176def _build_to_char(args: t.List) -> exp.TimeToStr: 177 fmt = seq_get(args, 1) 178 if isinstance(fmt, exp.Literal): 179 # We uppercase this to match Teradata's format mapping keys 180 fmt.set("this", fmt.this.upper()) 181 182 # We use "teradata" on purpose here, because the time formats are different in Presto. 183 # See https://prestodb.io/docs/current/functions/teradata.html?highlight=to_char#to_char 184 return build_formatted_time(exp.TimeToStr, "teradata")(args) 185 186 187def _date_delta_sql( 188 name: str, negate_interval: bool = False 189) -> t.Callable[[Presto.Generator, DATE_ADD_OR_SUB], str]: 190 def _delta_sql(self: Presto.Generator, expression: DATE_ADD_OR_SUB) -> str: 191 interval = _to_int(self, expression.expression) 192 return self.func( 193 name, 194 unit_to_str(expression), 195 interval * (-1) if negate_interval else interval, 196 expression.this, 197 ) 198 199 return _delta_sql 200 201 202def _explode_to_unnest_sql(self: Presto.Generator, expression: exp.Lateral) -> str: 203 explode = expression.this 204 if isinstance(explode, exp.Explode): 205 exploded_type = explode.this.type 206 alias = expression.args.get("alias") 207 208 # This attempts a best-effort transpilation of LATERAL VIEW EXPLODE on a struct array 209 if ( 210 isinstance(alias, exp.TableAlias) 211 and isinstance(exploded_type, exp.DataType) 212 and exploded_type.is_type(exp.DataType.Type.ARRAY) 213 and exploded_type.expressions 214 and exploded_type.expressions[0].is_type(exp.DataType.Type.STRUCT) 215 ): 216 # When unnesting a ROW in Presto, it produces N columns, so we need to fix the alias 217 alias.set("columns", [c.this.copy() for c in exploded_type.expressions[0].expressions]) 218 elif isinstance(explode, exp.Inline): 219 explode.replace(exp.Explode(this=explode.this.copy())) 220 221 return explode_to_unnest_sql(self, expression) 222 223 224def amend_exploded_column_table(expression: exp.Expression) -> exp.Expression: 225 # We check for expression.type because the columns can be amended only if types were inferred 226 if isinstance(expression, exp.Select) and expression.type: 227 for lateral in expression.args.get("laterals") or []: 228 alias = lateral.args.get("alias") 229 if ( 230 not isinstance(lateral.this, exp.Explode) 231 or not isinstance(alias, exp.TableAlias) 232 or len(alias.columns) != 1 233 ): 234 continue 235 236 new_table = alias.this 237 old_table = alias.columns[0].name.lower() 238 239 # When transpiling a LATERAL VIEW EXPLODE Spark query, the exploded fields may be qualified 240 # with the struct column, resulting in invalid Presto references that need to be amended 241 for column in find_all_in_scope(expression, exp.Column): 242 if column.db.lower() == old_table: 243 column.set("table", column.args["db"].pop()) 244 elif column.table.lower() == old_table: 245 column.set("table", new_table.copy()) 246 elif column.name.lower() == old_table and isinstance(column.parent, exp.Dot): 247 column.parent.replace(exp.column(column.parent.expression, table=new_table)) 248 249 return expression 250 251 252class Presto(Dialect): 253 INDEX_OFFSET = 1 254 NULL_ORDERING = "nulls_are_last" 255 TIME_FORMAT = MySQL.TIME_FORMAT 256 STRICT_STRING_CONCAT = True 257 SUPPORTS_SEMI_ANTI_JOIN = False 258 TYPED_DIVISION = True 259 TABLESAMPLE_SIZE_IS_PERCENT = True 260 LOG_BASE_FIRST: t.Optional[bool] = None 261 SUPPORTS_VALUES_DEFAULT = False 262 263 TIME_MAPPING = MySQL.TIME_MAPPING 264 265 # https://github.com/trinodb/trino/issues/17 266 # https://github.com/trinodb/trino/issues/12289 267 # https://github.com/prestodb/presto/issues/2863 268 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 269 270 # The result of certain math functions in Presto/Trino is of type 271 # equal to the input type e.g: FLOOR(5.5/2) -> DECIMAL, FLOOR(5/2) -> BIGINT 272 ANNOTATORS = { 273 **Dialect.ANNOTATORS, 274 exp.Floor: lambda self, e: self._annotate_by_args(e, "this"), 275 exp.Ceil: lambda self, e: self._annotate_by_args(e, "this"), 276 exp.Mod: lambda self, e: self._annotate_by_args(e, "this", "expression"), 277 exp.Round: lambda self, e: self._annotate_by_args(e, "this"), 278 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 279 exp.Abs: lambda self, e: self._annotate_by_args(e, "this"), 280 exp.Rand: lambda self, e: self._annotate_by_args(e, "this") 281 if e.this 282 else self._set_type(e, exp.DataType.Type.DOUBLE), 283 } 284 285 SUPPORTED_SETTINGS = { 286 *Dialect.SUPPORTED_SETTINGS, 287 "variant_extract_is_json_extract", 288 } 289 290 class Tokenizer(tokens.Tokenizer): 291 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 292 UNICODE_STRINGS = [ 293 (prefix + q, q) 294 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 295 for prefix in ("U&", "u&") 296 ] 297 298 NESTED_COMMENTS = False 299 300 KEYWORDS = { 301 **tokens.Tokenizer.KEYWORDS, 302 "DEALLOCATE PREPARE": TokenType.COMMAND, 303 "DESCRIBE INPUT": TokenType.COMMAND, 304 "DESCRIBE OUTPUT": TokenType.COMMAND, 305 "RESET SESSION": TokenType.COMMAND, 306 "START": TokenType.BEGIN, 307 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 308 "ROW": TokenType.STRUCT, 309 "IPADDRESS": TokenType.IPADDRESS, 310 "IPPREFIX": TokenType.IPPREFIX, 311 "TDIGEST": TokenType.TDIGEST, 312 "HYPERLOGLOG": TokenType.HLLSKETCH, 313 } 314 KEYWORDS.pop("/*+") 315 KEYWORDS.pop("QUALIFY") 316 317 class Parser(parser.Parser): 318 VALUES_FOLLOWED_BY_PAREN = False 319 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = True 320 321 FUNCTIONS = { 322 **parser.Parser.FUNCTIONS, 323 "ARBITRARY": exp.AnyValue.from_arg_list, 324 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 325 "APPROX_PERCENTILE": _build_approx_percentile, 326 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 327 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 328 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 329 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 330 "CARDINALITY": exp.ArraySize.from_arg_list, 331 "CONTAINS": exp.ArrayContains.from_arg_list, 332 "DATE_ADD": lambda args: exp.DateAdd( 333 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 334 ), 335 "DATE_DIFF": lambda args: exp.DateDiff( 336 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 337 ), 338 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 339 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 340 "DATE_TRUNC": date_trunc_to_time, 341 "DAY_OF_WEEK": exp.DayOfWeekIso.from_arg_list, 342 "DOW": exp.DayOfWeekIso.from_arg_list, 343 "DOY": exp.DayOfYear.from_arg_list, 344 "ELEMENT_AT": lambda args: exp.Bracket( 345 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 346 ), 347 "FROM_HEX": exp.Unhex.from_arg_list, 348 "FROM_UNIXTIME": _build_from_unixtime, 349 "FROM_UTF8": lambda args: exp.Decode( 350 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 351 ), 352 "JSON_FORMAT": lambda args: exp.JSONFormat( 353 this=seq_get(args, 0), options=seq_get(args, 1), is_json=True 354 ), 355 "LEVENSHTEIN_DISTANCE": exp.Levenshtein.from_arg_list, 356 "NOW": exp.CurrentTimestamp.from_arg_list, 357 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 358 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 359 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 360 this=seq_get(args, 0), 361 expression=seq_get(args, 1), 362 replacement=seq_get(args, 2) or exp.Literal.string(""), 363 ), 364 "REPLACE": build_replace_with_optional_replacement, 365 "ROW": exp.Struct.from_arg_list, 366 "SEQUENCE": exp.GenerateSeries.from_arg_list, 367 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 368 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 369 "STRPOS": lambda args: exp.StrPosition( 370 this=seq_get(args, 0), substr=seq_get(args, 1), occurrence=seq_get(args, 2) 371 ), 372 "SLICE": exp.ArraySlice.from_arg_list, 373 "TO_CHAR": _build_to_char, 374 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 375 "TO_UTF8": lambda args: exp.Encode( 376 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 377 ), 378 "MD5": exp.MD5Digest.from_arg_list, 379 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 380 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 381 } 382 383 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 384 FUNCTION_PARSERS.pop("TRIM") 385 386 class Generator(generator.Generator): 387 INTERVAL_ALLOWS_PLURAL_FORM = False 388 JOIN_HINTS = False 389 TABLE_HINTS = False 390 QUERY_HINTS = False 391 IS_BOOL_ALLOWED = False 392 TZ_TO_WITH_TIME_ZONE = True 393 NVL2_SUPPORTED = False 394 STRUCT_DELIMITER = ("(", ")") 395 LIMIT_ONLY_LITERALS = True 396 SUPPORTS_SINGLE_ARG_CONCAT = False 397 LIKE_PROPERTY_INSIDE_SCHEMA = True 398 MULTI_ARG_DISTINCT = False 399 SUPPORTS_TO_NUMBER = False 400 HEX_FUNC = "TO_HEX" 401 PARSE_JSON_NAME = "JSON_PARSE" 402 PAD_FILL_PATTERN_IS_REQUIRED = True 403 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 404 SUPPORTS_MEDIAN = False 405 ARRAY_SIZE_NAME = "CARDINALITY" 406 407 PROPERTIES_LOCATION = { 408 **generator.Generator.PROPERTIES_LOCATION, 409 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 410 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 411 } 412 413 TYPE_MAPPING = { 414 **generator.Generator.TYPE_MAPPING, 415 exp.DataType.Type.BINARY: "VARBINARY", 416 exp.DataType.Type.BIT: "BOOLEAN", 417 exp.DataType.Type.DATETIME: "TIMESTAMP", 418 exp.DataType.Type.DATETIME64: "TIMESTAMP", 419 exp.DataType.Type.FLOAT: "REAL", 420 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 421 exp.DataType.Type.INT: "INTEGER", 422 exp.DataType.Type.STRUCT: "ROW", 423 exp.DataType.Type.TEXT: "VARCHAR", 424 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 425 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 426 exp.DataType.Type.TIMETZ: "TIME", 427 } 428 429 TRANSFORMS = { 430 **generator.Generator.TRANSFORMS, 431 exp.AnyValue: rename_func("ARBITRARY"), 432 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 433 exp.ArgMax: rename_func("MAX_BY"), 434 exp.ArgMin: rename_func("MIN_BY"), 435 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 436 exp.ArrayAny: rename_func("ANY_MATCH"), 437 exp.ArrayConcat: rename_func("CONCAT"), 438 exp.ArrayContains: rename_func("CONTAINS"), 439 exp.ArrayToString: rename_func("ARRAY_JOIN"), 440 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 441 exp.ArraySlice: rename_func("SLICE"), 442 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 443 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 444 exp.BitwiseLeftShift: lambda self, e: self.func( 445 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 446 ), 447 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 448 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 449 exp.BitwiseRightShift: lambda self, e: self.func( 450 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 451 ), 452 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 453 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 454 exp.CurrentTime: lambda *_: "CURRENT_TIME", 455 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 456 exp.CurrentUser: lambda *_: "CURRENT_USER", 457 exp.DateAdd: _date_delta_sql("DATE_ADD"), 458 exp.DateDiff: lambda self, e: self.func( 459 "DATE_DIFF", unit_to_str(e), e.expression, e.this 460 ), 461 exp.DateStrToDate: datestrtodate_sql, 462 exp.DateToDi: lambda self, 463 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 464 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 465 exp.DayOfWeek: lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)", 466 exp.DayOfWeekIso: rename_func("DAY_OF_WEEK"), 467 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 468 exp.DiToDate: lambda self, 469 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 470 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 471 exp.FileFormatProperty: lambda self, 472 e: f"format={self.sql(exp.Literal.string(e.name))}", 473 exp.First: _first_last_sql, 474 exp.FromTimeZone: lambda self, 475 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 476 exp.GenerateSeries: sequence_sql, 477 exp.GenerateDateArray: sequence_sql, 478 exp.Group: transforms.preprocess([transforms.unalias_group]), 479 exp.If: if_sql(), 480 exp.ILike: no_ilike_sql, 481 exp.Initcap: _initcap_sql, 482 exp.Last: _first_last_sql, 483 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 484 exp.Lateral: _explode_to_unnest_sql, 485 exp.Left: left_to_substring_sql, 486 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 487 rename_func("LEVENSHTEIN_DISTANCE") 488 ), 489 exp.LogicalAnd: rename_func("BOOL_AND"), 490 exp.LogicalOr: rename_func("BOOL_OR"), 491 exp.Pivot: no_pivot_sql, 492 exp.Quantile: _quantile_sql, 493 exp.RegexpExtract: regexp_extract_sql, 494 exp.RegexpExtractAll: regexp_extract_sql, 495 exp.Right: right_to_substring_sql, 496 exp.Schema: _schema_sql, 497 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 498 exp.Select: transforms.preprocess( 499 [ 500 transforms.eliminate_window_clause, 501 transforms.eliminate_qualify, 502 transforms.eliminate_distinct_on, 503 transforms.explode_projection_to_unnest(1), 504 transforms.eliminate_semi_and_anti_joins, 505 amend_exploded_column_table, 506 ] 507 ), 508 exp.SortArray: _no_sort_array, 509 exp.StrPosition: lambda self, e: strposition_sql(self, e, supports_occurrence=True), 510 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 511 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 512 exp.StrToTime: _str_to_time_sql, 513 exp.StructExtract: struct_extract_sql, 514 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 515 exp.Timestamp: no_timestamp_sql, 516 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 517 exp.TimestampTrunc: timestamptrunc_sql(), 518 exp.TimeStrToDate: timestrtotime_sql, 519 exp.TimeStrToTime: timestrtotime_sql, 520 exp.TimeStrToUnix: lambda self, e: self.func( 521 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 522 ), 523 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 524 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 525 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 526 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 527 exp.TsOrDiToDi: lambda self, 528 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 529 exp.TsOrDsAdd: _ts_or_ds_add_sql, 530 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 531 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 532 exp.Unhex: rename_func("FROM_HEX"), 533 exp.UnixToStr: lambda self, 534 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 535 exp.UnixToTime: _unix_to_time_sql, 536 exp.UnixToTimeStr: lambda self, 537 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 538 exp.VariancePop: rename_func("VAR_POP"), 539 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 540 exp.WithinGroup: transforms.preprocess( 541 [transforms.remove_within_group_for_percentiles] 542 ), 543 exp.Xor: bool_xor_sql, 544 exp.MD5Digest: rename_func("MD5"), 545 exp.SHA: rename_func("SHA1"), 546 exp.SHA2: sha256_sql, 547 } 548 549 RESERVED_KEYWORDS = { 550 "alter", 551 "and", 552 "as", 553 "between", 554 "by", 555 "case", 556 "cast", 557 "constraint", 558 "create", 559 "cross", 560 "current_time", 561 "current_timestamp", 562 "deallocate", 563 "delete", 564 "describe", 565 "distinct", 566 "drop", 567 "else", 568 "end", 569 "escape", 570 "except", 571 "execute", 572 "exists", 573 "extract", 574 "false", 575 "for", 576 "from", 577 "full", 578 "group", 579 "having", 580 "in", 581 "inner", 582 "insert", 583 "intersect", 584 "into", 585 "is", 586 "join", 587 "left", 588 "like", 589 "natural", 590 "not", 591 "null", 592 "on", 593 "or", 594 "order", 595 "outer", 596 "prepare", 597 "right", 598 "select", 599 "table", 600 "then", 601 "true", 602 "union", 603 "using", 604 "values", 605 "when", 606 "where", 607 "with", 608 } 609 610 def jsonformat_sql(self, expression: exp.JSONFormat) -> str: 611 this = expression.this 612 is_json = expression.args.get("is_json") 613 614 if this and not (is_json or this.type): 615 from sqlglot.optimizer.annotate_types import annotate_types 616 617 this = annotate_types(this, dialect=self.dialect) 618 619 if not (is_json or this.is_type(exp.DataType.Type.JSON)): 620 this.replace(exp.cast(this, exp.DataType.Type.JSON)) 621 622 return self.function_fallback_sql(expression) 623 624 def md5_sql(self, expression: exp.MD5) -> str: 625 this = expression.this 626 627 if not this.type: 628 from sqlglot.optimizer.annotate_types import annotate_types 629 630 this = annotate_types(this, dialect=self.dialect) 631 632 if this.is_type(*exp.DataType.TEXT_TYPES): 633 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 634 635 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 636 637 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 638 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 639 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 640 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 641 # which seems to be using the same time mapping as Hive, as per: 642 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 643 this = expression.this 644 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 645 value_as_timestamp = ( 646 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 647 ) 648 649 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 650 651 formatted_value = self.func( 652 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 653 ) 654 parse_with_tz = self.func( 655 "PARSE_DATETIME", 656 formatted_value, 657 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 658 ) 659 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 660 return self.func("TO_UNIXTIME", coalesced) 661 662 def bracket_sql(self, expression: exp.Bracket) -> str: 663 if expression.args.get("safe"): 664 return self.func( 665 "ELEMENT_AT", 666 expression.this, 667 seq_get( 668 apply_index_offset( 669 expression.this, 670 expression.expressions, 671 1 - expression.args.get("offset", 0), 672 dialect=self.dialect, 673 ), 674 0, 675 ), 676 ) 677 return super().bracket_sql(expression) 678 679 def struct_sql(self, expression: exp.Struct) -> str: 680 from sqlglot.optimizer.annotate_types import annotate_types 681 682 expression = annotate_types(expression, dialect=self.dialect) 683 values: t.List[str] = [] 684 schema: t.List[str] = [] 685 unknown_type = False 686 687 for e in expression.expressions: 688 if isinstance(e, exp.PropertyEQ): 689 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 690 unknown_type = True 691 else: 692 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 693 values.append(self.sql(e, "expression")) 694 else: 695 values.append(self.sql(e)) 696 697 size = len(expression.expressions) 698 699 if not size or len(schema) != size: 700 if unknown_type: 701 self.unsupported( 702 "Cannot convert untyped key-value definitions (try annotate_types)." 703 ) 704 return self.func("ROW", *values) 705 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 706 707 def interval_sql(self, expression: exp.Interval) -> str: 708 if expression.this and expression.text("unit").upper().startswith("WEEK"): 709 return f"({expression.this.name} * INTERVAL '7' DAY)" 710 return super().interval_sql(expression) 711 712 def transaction_sql(self, expression: exp.Transaction) -> str: 713 modes = expression.args.get("modes") 714 modes = f" {', '.join(modes)}" if modes else "" 715 return f"START TRANSACTION{modes}" 716 717 def offset_limit_modifiers( 718 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 719 ) -> t.List[str]: 720 return [ 721 self.sql(expression, "offset"), 722 self.sql(limit), 723 ] 724 725 def create_sql(self, expression: exp.Create) -> str: 726 """ 727 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 728 so we need to remove them 729 """ 730 kind = expression.args["kind"] 731 schema = expression.this 732 if kind == "VIEW" and schema.expressions: 733 expression.this.set("expressions", None) 734 return super().create_sql(expression) 735 736 def delete_sql(self, expression: exp.Delete) -> str: 737 """ 738 Presto only supports DELETE FROM for a single table without an alias, so we need 739 to remove the unnecessary parts. If the original DELETE statement contains more 740 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 741 """ 742 tables = expression.args.get("tables") or [expression.this] 743 if len(tables) > 1: 744 return super().delete_sql(expression) 745 746 table = tables[0] 747 expression.set("this", table) 748 expression.set("tables", None) 749 750 if isinstance(table, exp.Table): 751 table_alias = table.args.get("alias") 752 if table_alias: 753 table_alias.pop() 754 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 755 756 return super().delete_sql(expression) 757 758 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 759 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 760 761 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 762 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 763 if not expression.args.get("variant_extract") or is_json_extract: 764 return self.func( 765 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 766 ) 767 768 this = self.sql(expression, "this") 769 770 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 771 segments = [] 772 for path_key in expression.expression.expressions[1:]: 773 if not isinstance(path_key, exp.JSONPathKey): 774 # Cannot transpile subscripts, wildcards etc to dot notation 775 self.unsupported( 776 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 777 ) 778 continue 779 key = path_key.this 780 if not exp.SAFE_IDENTIFIER_RE.match(key): 781 key = f'"{key}"' 782 segments.append(f".{key}") 783 784 expr = "".join(segments) 785 786 return f"{this}{expr}" 787 788 def groupconcat_sql(self, expression: exp.GroupConcat) -> str: 789 return self.func( 790 "ARRAY_JOIN", 791 self.func("ARRAY_AGG", expression.this), 792 expression.args.get("separator"), 793 )
225def amend_exploded_column_table(expression: exp.Expression) -> exp.Expression: 226 # We check for expression.type because the columns can be amended only if types were inferred 227 if isinstance(expression, exp.Select) and expression.type: 228 for lateral in expression.args.get("laterals") or []: 229 alias = lateral.args.get("alias") 230 if ( 231 not isinstance(lateral.this, exp.Explode) 232 or not isinstance(alias, exp.TableAlias) 233 or len(alias.columns) != 1 234 ): 235 continue 236 237 new_table = alias.this 238 old_table = alias.columns[0].name.lower() 239 240 # When transpiling a LATERAL VIEW EXPLODE Spark query, the exploded fields may be qualified 241 # with the struct column, resulting in invalid Presto references that need to be amended 242 for column in find_all_in_scope(expression, exp.Column): 243 if column.db.lower() == old_table: 244 column.set("table", column.args["db"].pop()) 245 elif column.table.lower() == old_table: 246 column.set("table", new_table.copy()) 247 elif column.name.lower() == old_table and isinstance(column.parent, exp.Dot): 248 column.parent.replace(exp.column(column.parent.expression, table=new_table)) 249 250 return expression
253class Presto(Dialect): 254 INDEX_OFFSET = 1 255 NULL_ORDERING = "nulls_are_last" 256 TIME_FORMAT = MySQL.TIME_FORMAT 257 STRICT_STRING_CONCAT = True 258 SUPPORTS_SEMI_ANTI_JOIN = False 259 TYPED_DIVISION = True 260 TABLESAMPLE_SIZE_IS_PERCENT = True 261 LOG_BASE_FIRST: t.Optional[bool] = None 262 SUPPORTS_VALUES_DEFAULT = False 263 264 TIME_MAPPING = MySQL.TIME_MAPPING 265 266 # https://github.com/trinodb/trino/issues/17 267 # https://github.com/trinodb/trino/issues/12289 268 # https://github.com/prestodb/presto/issues/2863 269 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 270 271 # The result of certain math functions in Presto/Trino is of type 272 # equal to the input type e.g: FLOOR(5.5/2) -> DECIMAL, FLOOR(5/2) -> BIGINT 273 ANNOTATORS = { 274 **Dialect.ANNOTATORS, 275 exp.Floor: lambda self, e: self._annotate_by_args(e, "this"), 276 exp.Ceil: lambda self, e: self._annotate_by_args(e, "this"), 277 exp.Mod: lambda self, e: self._annotate_by_args(e, "this", "expression"), 278 exp.Round: lambda self, e: self._annotate_by_args(e, "this"), 279 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 280 exp.Abs: lambda self, e: self._annotate_by_args(e, "this"), 281 exp.Rand: lambda self, e: self._annotate_by_args(e, "this") 282 if e.this 283 else self._set_type(e, exp.DataType.Type.DOUBLE), 284 } 285 286 SUPPORTED_SETTINGS = { 287 *Dialect.SUPPORTED_SETTINGS, 288 "variant_extract_is_json_extract", 289 } 290 291 class Tokenizer(tokens.Tokenizer): 292 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 293 UNICODE_STRINGS = [ 294 (prefix + q, q) 295 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 296 for prefix in ("U&", "u&") 297 ] 298 299 NESTED_COMMENTS = False 300 301 KEYWORDS = { 302 **tokens.Tokenizer.KEYWORDS, 303 "DEALLOCATE PREPARE": TokenType.COMMAND, 304 "DESCRIBE INPUT": TokenType.COMMAND, 305 "DESCRIBE OUTPUT": TokenType.COMMAND, 306 "RESET SESSION": TokenType.COMMAND, 307 "START": TokenType.BEGIN, 308 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 309 "ROW": TokenType.STRUCT, 310 "IPADDRESS": TokenType.IPADDRESS, 311 "IPPREFIX": TokenType.IPPREFIX, 312 "TDIGEST": TokenType.TDIGEST, 313 "HYPERLOGLOG": TokenType.HLLSKETCH, 314 } 315 KEYWORDS.pop("/*+") 316 KEYWORDS.pop("QUALIFY") 317 318 class Parser(parser.Parser): 319 VALUES_FOLLOWED_BY_PAREN = False 320 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = True 321 322 FUNCTIONS = { 323 **parser.Parser.FUNCTIONS, 324 "ARBITRARY": exp.AnyValue.from_arg_list, 325 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 326 "APPROX_PERCENTILE": _build_approx_percentile, 327 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 328 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 329 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 330 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 331 "CARDINALITY": exp.ArraySize.from_arg_list, 332 "CONTAINS": exp.ArrayContains.from_arg_list, 333 "DATE_ADD": lambda args: exp.DateAdd( 334 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 335 ), 336 "DATE_DIFF": lambda args: exp.DateDiff( 337 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 338 ), 339 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 340 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 341 "DATE_TRUNC": date_trunc_to_time, 342 "DAY_OF_WEEK": exp.DayOfWeekIso.from_arg_list, 343 "DOW": exp.DayOfWeekIso.from_arg_list, 344 "DOY": exp.DayOfYear.from_arg_list, 345 "ELEMENT_AT": lambda args: exp.Bracket( 346 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 347 ), 348 "FROM_HEX": exp.Unhex.from_arg_list, 349 "FROM_UNIXTIME": _build_from_unixtime, 350 "FROM_UTF8": lambda args: exp.Decode( 351 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 352 ), 353 "JSON_FORMAT": lambda args: exp.JSONFormat( 354 this=seq_get(args, 0), options=seq_get(args, 1), is_json=True 355 ), 356 "LEVENSHTEIN_DISTANCE": exp.Levenshtein.from_arg_list, 357 "NOW": exp.CurrentTimestamp.from_arg_list, 358 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 359 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 360 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 361 this=seq_get(args, 0), 362 expression=seq_get(args, 1), 363 replacement=seq_get(args, 2) or exp.Literal.string(""), 364 ), 365 "REPLACE": build_replace_with_optional_replacement, 366 "ROW": exp.Struct.from_arg_list, 367 "SEQUENCE": exp.GenerateSeries.from_arg_list, 368 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 369 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 370 "STRPOS": lambda args: exp.StrPosition( 371 this=seq_get(args, 0), substr=seq_get(args, 1), occurrence=seq_get(args, 2) 372 ), 373 "SLICE": exp.ArraySlice.from_arg_list, 374 "TO_CHAR": _build_to_char, 375 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 376 "TO_UTF8": lambda args: exp.Encode( 377 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 378 ), 379 "MD5": exp.MD5Digest.from_arg_list, 380 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 381 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 382 } 383 384 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 385 FUNCTION_PARSERS.pop("TRIM") 386 387 class Generator(generator.Generator): 388 INTERVAL_ALLOWS_PLURAL_FORM = False 389 JOIN_HINTS = False 390 TABLE_HINTS = False 391 QUERY_HINTS = False 392 IS_BOOL_ALLOWED = False 393 TZ_TO_WITH_TIME_ZONE = True 394 NVL2_SUPPORTED = False 395 STRUCT_DELIMITER = ("(", ")") 396 LIMIT_ONLY_LITERALS = True 397 SUPPORTS_SINGLE_ARG_CONCAT = False 398 LIKE_PROPERTY_INSIDE_SCHEMA = True 399 MULTI_ARG_DISTINCT = False 400 SUPPORTS_TO_NUMBER = False 401 HEX_FUNC = "TO_HEX" 402 PARSE_JSON_NAME = "JSON_PARSE" 403 PAD_FILL_PATTERN_IS_REQUIRED = True 404 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 405 SUPPORTS_MEDIAN = False 406 ARRAY_SIZE_NAME = "CARDINALITY" 407 408 PROPERTIES_LOCATION = { 409 **generator.Generator.PROPERTIES_LOCATION, 410 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 411 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 412 } 413 414 TYPE_MAPPING = { 415 **generator.Generator.TYPE_MAPPING, 416 exp.DataType.Type.BINARY: "VARBINARY", 417 exp.DataType.Type.BIT: "BOOLEAN", 418 exp.DataType.Type.DATETIME: "TIMESTAMP", 419 exp.DataType.Type.DATETIME64: "TIMESTAMP", 420 exp.DataType.Type.FLOAT: "REAL", 421 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 422 exp.DataType.Type.INT: "INTEGER", 423 exp.DataType.Type.STRUCT: "ROW", 424 exp.DataType.Type.TEXT: "VARCHAR", 425 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 426 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 427 exp.DataType.Type.TIMETZ: "TIME", 428 } 429 430 TRANSFORMS = { 431 **generator.Generator.TRANSFORMS, 432 exp.AnyValue: rename_func("ARBITRARY"), 433 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 434 exp.ArgMax: rename_func("MAX_BY"), 435 exp.ArgMin: rename_func("MIN_BY"), 436 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 437 exp.ArrayAny: rename_func("ANY_MATCH"), 438 exp.ArrayConcat: rename_func("CONCAT"), 439 exp.ArrayContains: rename_func("CONTAINS"), 440 exp.ArrayToString: rename_func("ARRAY_JOIN"), 441 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 442 exp.ArraySlice: rename_func("SLICE"), 443 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 444 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 445 exp.BitwiseLeftShift: lambda self, e: self.func( 446 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 447 ), 448 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 449 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 450 exp.BitwiseRightShift: lambda self, e: self.func( 451 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 452 ), 453 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 454 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 455 exp.CurrentTime: lambda *_: "CURRENT_TIME", 456 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 457 exp.CurrentUser: lambda *_: "CURRENT_USER", 458 exp.DateAdd: _date_delta_sql("DATE_ADD"), 459 exp.DateDiff: lambda self, e: self.func( 460 "DATE_DIFF", unit_to_str(e), e.expression, e.this 461 ), 462 exp.DateStrToDate: datestrtodate_sql, 463 exp.DateToDi: lambda self, 464 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 465 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 466 exp.DayOfWeek: lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)", 467 exp.DayOfWeekIso: rename_func("DAY_OF_WEEK"), 468 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 469 exp.DiToDate: lambda self, 470 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 471 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 472 exp.FileFormatProperty: lambda self, 473 e: f"format={self.sql(exp.Literal.string(e.name))}", 474 exp.First: _first_last_sql, 475 exp.FromTimeZone: lambda self, 476 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 477 exp.GenerateSeries: sequence_sql, 478 exp.GenerateDateArray: sequence_sql, 479 exp.Group: transforms.preprocess([transforms.unalias_group]), 480 exp.If: if_sql(), 481 exp.ILike: no_ilike_sql, 482 exp.Initcap: _initcap_sql, 483 exp.Last: _first_last_sql, 484 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 485 exp.Lateral: _explode_to_unnest_sql, 486 exp.Left: left_to_substring_sql, 487 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 488 rename_func("LEVENSHTEIN_DISTANCE") 489 ), 490 exp.LogicalAnd: rename_func("BOOL_AND"), 491 exp.LogicalOr: rename_func("BOOL_OR"), 492 exp.Pivot: no_pivot_sql, 493 exp.Quantile: _quantile_sql, 494 exp.RegexpExtract: regexp_extract_sql, 495 exp.RegexpExtractAll: regexp_extract_sql, 496 exp.Right: right_to_substring_sql, 497 exp.Schema: _schema_sql, 498 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 499 exp.Select: transforms.preprocess( 500 [ 501 transforms.eliminate_window_clause, 502 transforms.eliminate_qualify, 503 transforms.eliminate_distinct_on, 504 transforms.explode_projection_to_unnest(1), 505 transforms.eliminate_semi_and_anti_joins, 506 amend_exploded_column_table, 507 ] 508 ), 509 exp.SortArray: _no_sort_array, 510 exp.StrPosition: lambda self, e: strposition_sql(self, e, supports_occurrence=True), 511 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 512 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 513 exp.StrToTime: _str_to_time_sql, 514 exp.StructExtract: struct_extract_sql, 515 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 516 exp.Timestamp: no_timestamp_sql, 517 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 518 exp.TimestampTrunc: timestamptrunc_sql(), 519 exp.TimeStrToDate: timestrtotime_sql, 520 exp.TimeStrToTime: timestrtotime_sql, 521 exp.TimeStrToUnix: lambda self, e: self.func( 522 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 523 ), 524 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 525 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 526 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 527 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 528 exp.TsOrDiToDi: lambda self, 529 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 530 exp.TsOrDsAdd: _ts_or_ds_add_sql, 531 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 532 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 533 exp.Unhex: rename_func("FROM_HEX"), 534 exp.UnixToStr: lambda self, 535 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 536 exp.UnixToTime: _unix_to_time_sql, 537 exp.UnixToTimeStr: lambda self, 538 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 539 exp.VariancePop: rename_func("VAR_POP"), 540 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 541 exp.WithinGroup: transforms.preprocess( 542 [transforms.remove_within_group_for_percentiles] 543 ), 544 exp.Xor: bool_xor_sql, 545 exp.MD5Digest: rename_func("MD5"), 546 exp.SHA: rename_func("SHA1"), 547 exp.SHA2: sha256_sql, 548 } 549 550 RESERVED_KEYWORDS = { 551 "alter", 552 "and", 553 "as", 554 "between", 555 "by", 556 "case", 557 "cast", 558 "constraint", 559 "create", 560 "cross", 561 "current_time", 562 "current_timestamp", 563 "deallocate", 564 "delete", 565 "describe", 566 "distinct", 567 "drop", 568 "else", 569 "end", 570 "escape", 571 "except", 572 "execute", 573 "exists", 574 "extract", 575 "false", 576 "for", 577 "from", 578 "full", 579 "group", 580 "having", 581 "in", 582 "inner", 583 "insert", 584 "intersect", 585 "into", 586 "is", 587 "join", 588 "left", 589 "like", 590 "natural", 591 "not", 592 "null", 593 "on", 594 "or", 595 "order", 596 "outer", 597 "prepare", 598 "right", 599 "select", 600 "table", 601 "then", 602 "true", 603 "union", 604 "using", 605 "values", 606 "when", 607 "where", 608 "with", 609 } 610 611 def jsonformat_sql(self, expression: exp.JSONFormat) -> str: 612 this = expression.this 613 is_json = expression.args.get("is_json") 614 615 if this and not (is_json or this.type): 616 from sqlglot.optimizer.annotate_types import annotate_types 617 618 this = annotate_types(this, dialect=self.dialect) 619 620 if not (is_json or this.is_type(exp.DataType.Type.JSON)): 621 this.replace(exp.cast(this, exp.DataType.Type.JSON)) 622 623 return self.function_fallback_sql(expression) 624 625 def md5_sql(self, expression: exp.MD5) -> str: 626 this = expression.this 627 628 if not this.type: 629 from sqlglot.optimizer.annotate_types import annotate_types 630 631 this = annotate_types(this, dialect=self.dialect) 632 633 if this.is_type(*exp.DataType.TEXT_TYPES): 634 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 635 636 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 637 638 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 639 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 640 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 641 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 642 # which seems to be using the same time mapping as Hive, as per: 643 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 644 this = expression.this 645 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 646 value_as_timestamp = ( 647 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 648 ) 649 650 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 651 652 formatted_value = self.func( 653 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 654 ) 655 parse_with_tz = self.func( 656 "PARSE_DATETIME", 657 formatted_value, 658 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 659 ) 660 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 661 return self.func("TO_UNIXTIME", coalesced) 662 663 def bracket_sql(self, expression: exp.Bracket) -> str: 664 if expression.args.get("safe"): 665 return self.func( 666 "ELEMENT_AT", 667 expression.this, 668 seq_get( 669 apply_index_offset( 670 expression.this, 671 expression.expressions, 672 1 - expression.args.get("offset", 0), 673 dialect=self.dialect, 674 ), 675 0, 676 ), 677 ) 678 return super().bracket_sql(expression) 679 680 def struct_sql(self, expression: exp.Struct) -> str: 681 from sqlglot.optimizer.annotate_types import annotate_types 682 683 expression = annotate_types(expression, dialect=self.dialect) 684 values: t.List[str] = [] 685 schema: t.List[str] = [] 686 unknown_type = False 687 688 for e in expression.expressions: 689 if isinstance(e, exp.PropertyEQ): 690 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 691 unknown_type = True 692 else: 693 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 694 values.append(self.sql(e, "expression")) 695 else: 696 values.append(self.sql(e)) 697 698 size = len(expression.expressions) 699 700 if not size or len(schema) != size: 701 if unknown_type: 702 self.unsupported( 703 "Cannot convert untyped key-value definitions (try annotate_types)." 704 ) 705 return self.func("ROW", *values) 706 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 707 708 def interval_sql(self, expression: exp.Interval) -> str: 709 if expression.this and expression.text("unit").upper().startswith("WEEK"): 710 return f"({expression.this.name} * INTERVAL '7' DAY)" 711 return super().interval_sql(expression) 712 713 def transaction_sql(self, expression: exp.Transaction) -> str: 714 modes = expression.args.get("modes") 715 modes = f" {', '.join(modes)}" if modes else "" 716 return f"START TRANSACTION{modes}" 717 718 def offset_limit_modifiers( 719 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 720 ) -> t.List[str]: 721 return [ 722 self.sql(expression, "offset"), 723 self.sql(limit), 724 ] 725 726 def create_sql(self, expression: exp.Create) -> str: 727 """ 728 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 729 so we need to remove them 730 """ 731 kind = expression.args["kind"] 732 schema = expression.this 733 if kind == "VIEW" and schema.expressions: 734 expression.this.set("expressions", None) 735 return super().create_sql(expression) 736 737 def delete_sql(self, expression: exp.Delete) -> str: 738 """ 739 Presto only supports DELETE FROM for a single table without an alias, so we need 740 to remove the unnecessary parts. If the original DELETE statement contains more 741 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 742 """ 743 tables = expression.args.get("tables") or [expression.this] 744 if len(tables) > 1: 745 return super().delete_sql(expression) 746 747 table = tables[0] 748 expression.set("this", table) 749 expression.set("tables", None) 750 751 if isinstance(table, exp.Table): 752 table_alias = table.args.get("alias") 753 if table_alias: 754 table_alias.pop() 755 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 756 757 return super().delete_sql(expression) 758 759 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 760 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 761 762 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 763 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 764 if not expression.args.get("variant_extract") or is_json_extract: 765 return self.func( 766 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 767 ) 768 769 this = self.sql(expression, "this") 770 771 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 772 segments = [] 773 for path_key in expression.expression.expressions[1:]: 774 if not isinstance(path_key, exp.JSONPathKey): 775 # Cannot transpile subscripts, wildcards etc to dot notation 776 self.unsupported( 777 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 778 ) 779 continue 780 key = path_key.this 781 if not exp.SAFE_IDENTIFIER_RE.match(key): 782 key = f'"{key}"' 783 segments.append(f".{key}") 784 785 expr = "".join(segments) 786 787 return f"{this}{expr}" 788 789 def groupconcat_sql(self, expression: exp.GroupConcat) -> str: 790 return self.func( 791 "ARRAY_JOIN", 792 self.func("ARRAY_AGG", expression.this), 793 expression.args.get("separator"), 794 )
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the behavior of a / b
depends on the types of a
and b
.
False means a / b
is always float division.
True means a / b
is integer division if both a
and b
are integers.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Associates this dialect's time formats with their equivalent Python strftime
formats.
Specifies the strategy according to which identifiers should be normalized.
291 class Tokenizer(tokens.Tokenizer): 292 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 293 UNICODE_STRINGS = [ 294 (prefix + q, q) 295 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 296 for prefix in ("U&", "u&") 297 ] 298 299 NESTED_COMMENTS = False 300 301 KEYWORDS = { 302 **tokens.Tokenizer.KEYWORDS, 303 "DEALLOCATE PREPARE": TokenType.COMMAND, 304 "DESCRIBE INPUT": TokenType.COMMAND, 305 "DESCRIBE OUTPUT": TokenType.COMMAND, 306 "RESET SESSION": TokenType.COMMAND, 307 "START": TokenType.BEGIN, 308 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 309 "ROW": TokenType.STRUCT, 310 "IPADDRESS": TokenType.IPADDRESS, 311 "IPPREFIX": TokenType.IPPREFIX, 312 "TDIGEST": TokenType.TDIGEST, 313 "HYPERLOGLOG": TokenType.HLLSKETCH, 314 } 315 KEYWORDS.pop("/*+") 316 KEYWORDS.pop("QUALIFY")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- BYTE_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- IDENTIFIERS
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
318 class Parser(parser.Parser): 319 VALUES_FOLLOWED_BY_PAREN = False 320 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = True 321 322 FUNCTIONS = { 323 **parser.Parser.FUNCTIONS, 324 "ARBITRARY": exp.AnyValue.from_arg_list, 325 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 326 "APPROX_PERCENTILE": _build_approx_percentile, 327 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 328 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 329 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 330 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 331 "CARDINALITY": exp.ArraySize.from_arg_list, 332 "CONTAINS": exp.ArrayContains.from_arg_list, 333 "DATE_ADD": lambda args: exp.DateAdd( 334 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 335 ), 336 "DATE_DIFF": lambda args: exp.DateDiff( 337 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 338 ), 339 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 340 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 341 "DATE_TRUNC": date_trunc_to_time, 342 "DAY_OF_WEEK": exp.DayOfWeekIso.from_arg_list, 343 "DOW": exp.DayOfWeekIso.from_arg_list, 344 "DOY": exp.DayOfYear.from_arg_list, 345 "ELEMENT_AT": lambda args: exp.Bracket( 346 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 347 ), 348 "FROM_HEX": exp.Unhex.from_arg_list, 349 "FROM_UNIXTIME": _build_from_unixtime, 350 "FROM_UTF8": lambda args: exp.Decode( 351 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 352 ), 353 "JSON_FORMAT": lambda args: exp.JSONFormat( 354 this=seq_get(args, 0), options=seq_get(args, 1), is_json=True 355 ), 356 "LEVENSHTEIN_DISTANCE": exp.Levenshtein.from_arg_list, 357 "NOW": exp.CurrentTimestamp.from_arg_list, 358 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 359 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 360 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 361 this=seq_get(args, 0), 362 expression=seq_get(args, 1), 363 replacement=seq_get(args, 2) or exp.Literal.string(""), 364 ), 365 "REPLACE": build_replace_with_optional_replacement, 366 "ROW": exp.Struct.from_arg_list, 367 "SEQUENCE": exp.GenerateSeries.from_arg_list, 368 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 369 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 370 "STRPOS": lambda args: exp.StrPosition( 371 this=seq_get(args, 0), substr=seq_get(args, 1), occurrence=seq_get(args, 2) 372 ), 373 "SLICE": exp.ArraySlice.from_arg_list, 374 "TO_CHAR": _build_to_char, 375 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 376 "TO_UTF8": lambda args: exp.Encode( 377 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 378 ), 379 "MD5": exp.MD5Digest.from_arg_list, 380 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 381 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 382 } 383 384 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 385 FUNCTION_PARSERS.pop("TRIM")
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- CAST_COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- QUERY_MODIFIER_TOKENS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- JOINS_HAVE_EQUAL_PRECEDENCE
- MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS
- JSON_EXTRACT_REQUIRES_JSON_EXPRESSION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- build_cast
- errors
- sql
387 class Generator(generator.Generator): 388 INTERVAL_ALLOWS_PLURAL_FORM = False 389 JOIN_HINTS = False 390 TABLE_HINTS = False 391 QUERY_HINTS = False 392 IS_BOOL_ALLOWED = False 393 TZ_TO_WITH_TIME_ZONE = True 394 NVL2_SUPPORTED = False 395 STRUCT_DELIMITER = ("(", ")") 396 LIMIT_ONLY_LITERALS = True 397 SUPPORTS_SINGLE_ARG_CONCAT = False 398 LIKE_PROPERTY_INSIDE_SCHEMA = True 399 MULTI_ARG_DISTINCT = False 400 SUPPORTS_TO_NUMBER = False 401 HEX_FUNC = "TO_HEX" 402 PARSE_JSON_NAME = "JSON_PARSE" 403 PAD_FILL_PATTERN_IS_REQUIRED = True 404 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 405 SUPPORTS_MEDIAN = False 406 ARRAY_SIZE_NAME = "CARDINALITY" 407 408 PROPERTIES_LOCATION = { 409 **generator.Generator.PROPERTIES_LOCATION, 410 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 411 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 412 } 413 414 TYPE_MAPPING = { 415 **generator.Generator.TYPE_MAPPING, 416 exp.DataType.Type.BINARY: "VARBINARY", 417 exp.DataType.Type.BIT: "BOOLEAN", 418 exp.DataType.Type.DATETIME: "TIMESTAMP", 419 exp.DataType.Type.DATETIME64: "TIMESTAMP", 420 exp.DataType.Type.FLOAT: "REAL", 421 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 422 exp.DataType.Type.INT: "INTEGER", 423 exp.DataType.Type.STRUCT: "ROW", 424 exp.DataType.Type.TEXT: "VARCHAR", 425 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 426 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 427 exp.DataType.Type.TIMETZ: "TIME", 428 } 429 430 TRANSFORMS = { 431 **generator.Generator.TRANSFORMS, 432 exp.AnyValue: rename_func("ARBITRARY"), 433 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 434 exp.ArgMax: rename_func("MAX_BY"), 435 exp.ArgMin: rename_func("MIN_BY"), 436 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 437 exp.ArrayAny: rename_func("ANY_MATCH"), 438 exp.ArrayConcat: rename_func("CONCAT"), 439 exp.ArrayContains: rename_func("CONTAINS"), 440 exp.ArrayToString: rename_func("ARRAY_JOIN"), 441 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 442 exp.ArraySlice: rename_func("SLICE"), 443 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 444 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 445 exp.BitwiseLeftShift: lambda self, e: self.func( 446 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 447 ), 448 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 449 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 450 exp.BitwiseRightShift: lambda self, e: self.func( 451 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 452 ), 453 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 454 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 455 exp.CurrentTime: lambda *_: "CURRENT_TIME", 456 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 457 exp.CurrentUser: lambda *_: "CURRENT_USER", 458 exp.DateAdd: _date_delta_sql("DATE_ADD"), 459 exp.DateDiff: lambda self, e: self.func( 460 "DATE_DIFF", unit_to_str(e), e.expression, e.this 461 ), 462 exp.DateStrToDate: datestrtodate_sql, 463 exp.DateToDi: lambda self, 464 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 465 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 466 exp.DayOfWeek: lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)", 467 exp.DayOfWeekIso: rename_func("DAY_OF_WEEK"), 468 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 469 exp.DiToDate: lambda self, 470 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 471 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 472 exp.FileFormatProperty: lambda self, 473 e: f"format={self.sql(exp.Literal.string(e.name))}", 474 exp.First: _first_last_sql, 475 exp.FromTimeZone: lambda self, 476 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 477 exp.GenerateSeries: sequence_sql, 478 exp.GenerateDateArray: sequence_sql, 479 exp.Group: transforms.preprocess([transforms.unalias_group]), 480 exp.If: if_sql(), 481 exp.ILike: no_ilike_sql, 482 exp.Initcap: _initcap_sql, 483 exp.Last: _first_last_sql, 484 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 485 exp.Lateral: _explode_to_unnest_sql, 486 exp.Left: left_to_substring_sql, 487 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 488 rename_func("LEVENSHTEIN_DISTANCE") 489 ), 490 exp.LogicalAnd: rename_func("BOOL_AND"), 491 exp.LogicalOr: rename_func("BOOL_OR"), 492 exp.Pivot: no_pivot_sql, 493 exp.Quantile: _quantile_sql, 494 exp.RegexpExtract: regexp_extract_sql, 495 exp.RegexpExtractAll: regexp_extract_sql, 496 exp.Right: right_to_substring_sql, 497 exp.Schema: _schema_sql, 498 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 499 exp.Select: transforms.preprocess( 500 [ 501 transforms.eliminate_window_clause, 502 transforms.eliminate_qualify, 503 transforms.eliminate_distinct_on, 504 transforms.explode_projection_to_unnest(1), 505 transforms.eliminate_semi_and_anti_joins, 506 amend_exploded_column_table, 507 ] 508 ), 509 exp.SortArray: _no_sort_array, 510 exp.StrPosition: lambda self, e: strposition_sql(self, e, supports_occurrence=True), 511 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 512 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 513 exp.StrToTime: _str_to_time_sql, 514 exp.StructExtract: struct_extract_sql, 515 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 516 exp.Timestamp: no_timestamp_sql, 517 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 518 exp.TimestampTrunc: timestamptrunc_sql(), 519 exp.TimeStrToDate: timestrtotime_sql, 520 exp.TimeStrToTime: timestrtotime_sql, 521 exp.TimeStrToUnix: lambda self, e: self.func( 522 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 523 ), 524 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 525 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 526 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 527 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 528 exp.TsOrDiToDi: lambda self, 529 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 530 exp.TsOrDsAdd: _ts_or_ds_add_sql, 531 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 532 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 533 exp.Unhex: rename_func("FROM_HEX"), 534 exp.UnixToStr: lambda self, 535 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 536 exp.UnixToTime: _unix_to_time_sql, 537 exp.UnixToTimeStr: lambda self, 538 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 539 exp.VariancePop: rename_func("VAR_POP"), 540 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 541 exp.WithinGroup: transforms.preprocess( 542 [transforms.remove_within_group_for_percentiles] 543 ), 544 exp.Xor: bool_xor_sql, 545 exp.MD5Digest: rename_func("MD5"), 546 exp.SHA: rename_func("SHA1"), 547 exp.SHA2: sha256_sql, 548 } 549 550 RESERVED_KEYWORDS = { 551 "alter", 552 "and", 553 "as", 554 "between", 555 "by", 556 "case", 557 "cast", 558 "constraint", 559 "create", 560 "cross", 561 "current_time", 562 "current_timestamp", 563 "deallocate", 564 "delete", 565 "describe", 566 "distinct", 567 "drop", 568 "else", 569 "end", 570 "escape", 571 "except", 572 "execute", 573 "exists", 574 "extract", 575 "false", 576 "for", 577 "from", 578 "full", 579 "group", 580 "having", 581 "in", 582 "inner", 583 "insert", 584 "intersect", 585 "into", 586 "is", 587 "join", 588 "left", 589 "like", 590 "natural", 591 "not", 592 "null", 593 "on", 594 "or", 595 "order", 596 "outer", 597 "prepare", 598 "right", 599 "select", 600 "table", 601 "then", 602 "true", 603 "union", 604 "using", 605 "values", 606 "when", 607 "where", 608 "with", 609 } 610 611 def jsonformat_sql(self, expression: exp.JSONFormat) -> str: 612 this = expression.this 613 is_json = expression.args.get("is_json") 614 615 if this and not (is_json or this.type): 616 from sqlglot.optimizer.annotate_types import annotate_types 617 618 this = annotate_types(this, dialect=self.dialect) 619 620 if not (is_json or this.is_type(exp.DataType.Type.JSON)): 621 this.replace(exp.cast(this, exp.DataType.Type.JSON)) 622 623 return self.function_fallback_sql(expression) 624 625 def md5_sql(self, expression: exp.MD5) -> str: 626 this = expression.this 627 628 if not this.type: 629 from sqlglot.optimizer.annotate_types import annotate_types 630 631 this = annotate_types(this, dialect=self.dialect) 632 633 if this.is_type(*exp.DataType.TEXT_TYPES): 634 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 635 636 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 637 638 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 639 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 640 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 641 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 642 # which seems to be using the same time mapping as Hive, as per: 643 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 644 this = expression.this 645 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 646 value_as_timestamp = ( 647 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 648 ) 649 650 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 651 652 formatted_value = self.func( 653 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 654 ) 655 parse_with_tz = self.func( 656 "PARSE_DATETIME", 657 formatted_value, 658 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 659 ) 660 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 661 return self.func("TO_UNIXTIME", coalesced) 662 663 def bracket_sql(self, expression: exp.Bracket) -> str: 664 if expression.args.get("safe"): 665 return self.func( 666 "ELEMENT_AT", 667 expression.this, 668 seq_get( 669 apply_index_offset( 670 expression.this, 671 expression.expressions, 672 1 - expression.args.get("offset", 0), 673 dialect=self.dialect, 674 ), 675 0, 676 ), 677 ) 678 return super().bracket_sql(expression) 679 680 def struct_sql(self, expression: exp.Struct) -> str: 681 from sqlglot.optimizer.annotate_types import annotate_types 682 683 expression = annotate_types(expression, dialect=self.dialect) 684 values: t.List[str] = [] 685 schema: t.List[str] = [] 686 unknown_type = False 687 688 for e in expression.expressions: 689 if isinstance(e, exp.PropertyEQ): 690 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 691 unknown_type = True 692 else: 693 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 694 values.append(self.sql(e, "expression")) 695 else: 696 values.append(self.sql(e)) 697 698 size = len(expression.expressions) 699 700 if not size or len(schema) != size: 701 if unknown_type: 702 self.unsupported( 703 "Cannot convert untyped key-value definitions (try annotate_types)." 704 ) 705 return self.func("ROW", *values) 706 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 707 708 def interval_sql(self, expression: exp.Interval) -> str: 709 if expression.this and expression.text("unit").upper().startswith("WEEK"): 710 return f"({expression.this.name} * INTERVAL '7' DAY)" 711 return super().interval_sql(expression) 712 713 def transaction_sql(self, expression: exp.Transaction) -> str: 714 modes = expression.args.get("modes") 715 modes = f" {', '.join(modes)}" if modes else "" 716 return f"START TRANSACTION{modes}" 717 718 def offset_limit_modifiers( 719 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 720 ) -> t.List[str]: 721 return [ 722 self.sql(expression, "offset"), 723 self.sql(limit), 724 ] 725 726 def create_sql(self, expression: exp.Create) -> str: 727 """ 728 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 729 so we need to remove them 730 """ 731 kind = expression.args["kind"] 732 schema = expression.this 733 if kind == "VIEW" and schema.expressions: 734 expression.this.set("expressions", None) 735 return super().create_sql(expression) 736 737 def delete_sql(self, expression: exp.Delete) -> str: 738 """ 739 Presto only supports DELETE FROM for a single table without an alias, so we need 740 to remove the unnecessary parts. If the original DELETE statement contains more 741 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 742 """ 743 tables = expression.args.get("tables") or [expression.this] 744 if len(tables) > 1: 745 return super().delete_sql(expression) 746 747 table = tables[0] 748 expression.set("this", table) 749 expression.set("tables", None) 750 751 if isinstance(table, exp.Table): 752 table_alias = table.args.get("alias") 753 if table_alias: 754 table_alias.pop() 755 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 756 757 return super().delete_sql(expression) 758 759 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 760 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 761 762 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 763 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 764 if not expression.args.get("variant_extract") or is_json_extract: 765 return self.func( 766 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 767 ) 768 769 this = self.sql(expression, "this") 770 771 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 772 segments = [] 773 for path_key in expression.expression.expressions[1:]: 774 if not isinstance(path_key, exp.JSONPathKey): 775 # Cannot transpile subscripts, wildcards etc to dot notation 776 self.unsupported( 777 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 778 ) 779 continue 780 key = path_key.this 781 if not exp.SAFE_IDENTIFIER_RE.match(key): 782 key = f'"{key}"' 783 segments.append(f".{key}") 784 785 expr = "".join(segments) 786 787 return f"{this}{expr}" 788 789 def groupconcat_sql(self, expression: exp.GroupConcat) -> str: 790 return self.func( 791 "ARRAY_JOIN", 792 self.func("ARRAY_AGG", expression.this), 793 expression.args.get("separator"), 794 )
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
611 def jsonformat_sql(self, expression: exp.JSONFormat) -> str: 612 this = expression.this 613 is_json = expression.args.get("is_json") 614 615 if this and not (is_json or this.type): 616 from sqlglot.optimizer.annotate_types import annotate_types 617 618 this = annotate_types(this, dialect=self.dialect) 619 620 if not (is_json or this.is_type(exp.DataType.Type.JSON)): 621 this.replace(exp.cast(this, exp.DataType.Type.JSON)) 622 623 return self.function_fallback_sql(expression)
625 def md5_sql(self, expression: exp.MD5) -> str: 626 this = expression.this 627 628 if not this.type: 629 from sqlglot.optimizer.annotate_types import annotate_types 630 631 this = annotate_types(this, dialect=self.dialect) 632 633 if this.is_type(*exp.DataType.TEXT_TYPES): 634 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 635 636 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this))))
638 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 639 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 640 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 641 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 642 # which seems to be using the same time mapping as Hive, as per: 643 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 644 this = expression.this 645 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 646 value_as_timestamp = ( 647 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 648 ) 649 650 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 651 652 formatted_value = self.func( 653 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 654 ) 655 parse_with_tz = self.func( 656 "PARSE_DATETIME", 657 formatted_value, 658 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 659 ) 660 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 661 return self.func("TO_UNIXTIME", coalesced)
663 def bracket_sql(self, expression: exp.Bracket) -> str: 664 if expression.args.get("safe"): 665 return self.func( 666 "ELEMENT_AT", 667 expression.this, 668 seq_get( 669 apply_index_offset( 670 expression.this, 671 expression.expressions, 672 1 - expression.args.get("offset", 0), 673 dialect=self.dialect, 674 ), 675 0, 676 ), 677 ) 678 return super().bracket_sql(expression)
680 def struct_sql(self, expression: exp.Struct) -> str: 681 from sqlglot.optimizer.annotate_types import annotate_types 682 683 expression = annotate_types(expression, dialect=self.dialect) 684 values: t.List[str] = [] 685 schema: t.List[str] = [] 686 unknown_type = False 687 688 for e in expression.expressions: 689 if isinstance(e, exp.PropertyEQ): 690 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 691 unknown_type = True 692 else: 693 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 694 values.append(self.sql(e, "expression")) 695 else: 696 values.append(self.sql(e)) 697 698 size = len(expression.expressions) 699 700 if not size or len(schema) != size: 701 if unknown_type: 702 self.unsupported( 703 "Cannot convert untyped key-value definitions (try annotate_types)." 704 ) 705 return self.func("ROW", *values) 706 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))"
726 def create_sql(self, expression: exp.Create) -> str: 727 """ 728 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 729 so we need to remove them 730 """ 731 kind = expression.args["kind"] 732 schema = expression.this 733 if kind == "VIEW" and schema.expressions: 734 expression.this.set("expressions", None) 735 return super().create_sql(expression)
Presto doesn't support CREATE VIEW with expressions (ex: CREATE VIEW x (cola)
then (cola)
is the expression),
so we need to remove them
737 def delete_sql(self, expression: exp.Delete) -> str: 738 """ 739 Presto only supports DELETE FROM for a single table without an alias, so we need 740 to remove the unnecessary parts. If the original DELETE statement contains more 741 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 742 """ 743 tables = expression.args.get("tables") or [expression.this] 744 if len(tables) > 1: 745 return super().delete_sql(expression) 746 747 table = tables[0] 748 expression.set("this", table) 749 expression.set("tables", None) 750 751 if isinstance(table, exp.Table): 752 table_alias = table.args.get("alias") 753 if table_alias: 754 table_alias.pop() 755 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 756 757 return super().delete_sql(expression)
Presto only supports DELETE FROM for a single table without an alias, so we need to remove the unnecessary parts. If the original DELETE statement contains more than one table to be deleted, we can't safely map it 1-1 to a Presto statement.
759 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 760 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 761 762 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 763 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 764 if not expression.args.get("variant_extract") or is_json_extract: 765 return self.func( 766 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 767 ) 768 769 this = self.sql(expression, "this") 770 771 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 772 segments = [] 773 for path_key in expression.expression.expressions[1:]: 774 if not isinstance(path_key, exp.JSONPathKey): 775 # Cannot transpile subscripts, wildcards etc to dot notation 776 self.unsupported( 777 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 778 ) 779 continue 780 key = path_key.this 781 if not exp.SAFE_IDENTIFIER_RE.match(key): 782 key = f'"{key}"' 783 segments.append(f".{key}") 784 785 expr = "".join(segments) 786 787 return f"{this}{expr}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SUPPORTED_JSON_PATH_PARTS
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_WINDOW_EXCLUDE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- TRY_SUPPORTED
- SUPPORTS_UESCAPE
- STAR_EXCEPT
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- NORMALIZE_EXTRACT_DATE_PARTS
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- SUPPORTS_BETWEEN_FLAGS
- SUPPORTS_LIKE_QUANTIFIERS
- UNSUPPORTED_TYPES
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS
- SAFE_JSON_PATH_KEY_RE
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- queryband_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- formatphrase_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- is_sql
- like_sql
- ilike_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- generateembedding_sql
- featuresattime_sql
- vectorsearch_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- revoke_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- get_put_sql
- translatecharacters_sql
- decodecase_sql
- semanticview_sql
- getextract_sql
- datefromunixdate_sql
- space_sql
- buildproperty_sql
- refreshtriggerproperty_sql