sqlglot.dialects.presto
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 bool_xor_sql, 11 date_trunc_to_time, 12 datestrtodate_sql, 13 encode_decode_sql, 14 build_formatted_time, 15 if_sql, 16 left_to_substring_sql, 17 no_ilike_sql, 18 no_pivot_sql, 19 no_safe_divide_sql, 20 no_timestamp_sql, 21 regexp_extract_sql, 22 rename_func, 23 right_to_substring_sql, 24 sha256_sql, 25 struct_extract_sql, 26 str_position_sql, 27 timestamptrunc_sql, 28 timestrtotime_sql, 29 ts_or_ds_add_cast, 30 unit_to_str, 31 sequence_sql, 32) 33from sqlglot.dialects.hive import Hive 34from sqlglot.dialects.mysql import MySQL 35from sqlglot.helper import apply_index_offset, seq_get 36from sqlglot.tokens import TokenType 37from sqlglot.transforms import unqualify_columns 38 39DATE_ADD_OR_SUB = t.Union[exp.DateAdd, exp.TimestampAdd, exp.DateSub] 40 41 42def _explode_to_unnest_sql(self: Presto.Generator, expression: exp.Lateral) -> str: 43 if isinstance(expression.this, exp.Explode): 44 return self.sql( 45 exp.Join( 46 this=exp.Unnest( 47 expressions=[expression.this.this], 48 alias=expression.args.get("alias"), 49 offset=isinstance(expression.this, exp.Posexplode), 50 ), 51 kind="cross", 52 ) 53 ) 54 return self.lateral_sql(expression) 55 56 57def _initcap_sql(self: Presto.Generator, expression: exp.Initcap) -> str: 58 regex = r"(\w)(\w*)" 59 return f"REGEXP_REPLACE({self.sql(expression, 'this')}, '{regex}', x -> UPPER(x[1]) || LOWER(x[2]))" 60 61 62def _no_sort_array(self: Presto.Generator, expression: exp.SortArray) -> str: 63 if expression.args.get("asc") == exp.false(): 64 comparator = "(a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END" 65 else: 66 comparator = None 67 return self.func("ARRAY_SORT", expression.this, comparator) 68 69 70def _schema_sql(self: Presto.Generator, expression: exp.Schema) -> str: 71 if isinstance(expression.parent, exp.Property): 72 columns = ", ".join(f"'{c.name}'" for c in expression.expressions) 73 return f"ARRAY[{columns}]" 74 75 if expression.parent: 76 for schema in expression.parent.find_all(exp.Schema): 77 column_defs = schema.find_all(exp.ColumnDef) 78 if column_defs and isinstance(schema.parent, exp.Property): 79 expression.expressions.extend(column_defs) 80 81 return self.schema_sql(expression) 82 83 84def _quantile_sql(self: Presto.Generator, expression: exp.Quantile) -> str: 85 self.unsupported("Presto does not support exact quantiles") 86 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 87 88 89def _str_to_time_sql( 90 self: Presto.Generator, expression: exp.StrToDate | exp.StrToTime | exp.TsOrDsToDate 91) -> str: 92 return self.func("DATE_PARSE", expression.this, self.format_time(expression)) 93 94 95def _ts_or_ds_to_date_sql(self: Presto.Generator, expression: exp.TsOrDsToDate) -> str: 96 time_format = self.format_time(expression) 97 if time_format and time_format not in (Presto.TIME_FORMAT, Presto.DATE_FORMAT): 98 return self.sql(exp.cast(_str_to_time_sql(self, expression), exp.DataType.Type.DATE)) 99 return self.sql( 100 exp.cast(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP), exp.DataType.Type.DATE) 101 ) 102 103 104def _ts_or_ds_add_sql(self: Presto.Generator, expression: exp.TsOrDsAdd) -> str: 105 expression = ts_or_ds_add_cast(expression) 106 unit = unit_to_str(expression) 107 return self.func("DATE_ADD", unit, expression.expression, expression.this) 108 109 110def _ts_or_ds_diff_sql(self: Presto.Generator, expression: exp.TsOrDsDiff) -> str: 111 this = exp.cast(expression.this, exp.DataType.Type.TIMESTAMP) 112 expr = exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP) 113 unit = unit_to_str(expression) 114 return self.func("DATE_DIFF", unit, expr, this) 115 116 117def _build_approx_percentile(args: t.List) -> exp.Expression: 118 if len(args) == 4: 119 return exp.ApproxQuantile( 120 this=seq_get(args, 0), 121 weight=seq_get(args, 1), 122 quantile=seq_get(args, 2), 123 accuracy=seq_get(args, 3), 124 ) 125 if len(args) == 3: 126 return exp.ApproxQuantile( 127 this=seq_get(args, 0), quantile=seq_get(args, 1), accuracy=seq_get(args, 2) 128 ) 129 return exp.ApproxQuantile.from_arg_list(args) 130 131 132def _build_from_unixtime(args: t.List) -> exp.Expression: 133 if len(args) == 3: 134 return exp.UnixToTime( 135 this=seq_get(args, 0), 136 hours=seq_get(args, 1), 137 minutes=seq_get(args, 2), 138 ) 139 if len(args) == 2: 140 return exp.UnixToTime(this=seq_get(args, 0), zone=seq_get(args, 1)) 141 142 return exp.UnixToTime.from_arg_list(args) 143 144 145def _unnest_sequence(expression: exp.Expression) -> exp.Expression: 146 if isinstance(expression, exp.Table): 147 if isinstance(expression.this, exp.GenerateSeries): 148 unnest = exp.Unnest(expressions=[expression.this]) 149 150 if expression.alias: 151 return exp.alias_(unnest, alias="_u", table=[expression.alias], copy=False) 152 return unnest 153 return expression 154 155 156def _first_last_sql(self: Presto.Generator, expression: exp.Func) -> str: 157 """ 158 Trino doesn't support FIRST / LAST as functions, but they're valid in the context 159 of MATCH_RECOGNIZE, so we need to preserve them in that case. In all other cases 160 they're converted into an ARBITRARY call. 161 162 Reference: https://trino.io/docs/current/sql/match-recognize.html#logical-navigation-functions 163 """ 164 if isinstance(expression.find_ancestor(exp.MatchRecognize, exp.Select), exp.MatchRecognize): 165 return self.function_fallback_sql(expression) 166 167 return rename_func("ARBITRARY")(self, expression) 168 169 170def _unix_to_time_sql(self: Presto.Generator, expression: exp.UnixToTime) -> str: 171 scale = expression.args.get("scale") 172 timestamp = self.sql(expression, "this") 173 if scale in (None, exp.UnixToTime.SECONDS): 174 return rename_func("FROM_UNIXTIME")(self, expression) 175 176 return f"FROM_UNIXTIME(CAST({timestamp} AS DOUBLE) / POW(10, {scale}))" 177 178 179def _jsonextract_sql(self: Presto.Generator, expression: exp.JSONExtract) -> str: 180 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 181 182 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 183 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 184 if not expression.args.get("variant_extract") or is_json_extract: 185 return self.func( 186 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 187 ) 188 189 this = self.sql(expression, "this") 190 191 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 192 segments = [] 193 for path_key in expression.expression.expressions[1:]: 194 if not isinstance(path_key, exp.JSONPathKey): 195 # Cannot transpile subscripts, wildcards etc to dot notation 196 self.unsupported(f"Cannot transpile JSONPath segment '{path_key}' to ROW access") 197 continue 198 key = path_key.this 199 if not exp.SAFE_IDENTIFIER_RE.match(key): 200 key = f'"{key}"' 201 segments.append(f".{key}") 202 203 expr = "".join(segments) 204 205 return f"{this}{expr}" 206 207 208def _to_int(self: Presto.Generator, expression: exp.Expression) -> exp.Expression: 209 if not expression.type: 210 from sqlglot.optimizer.annotate_types import annotate_types 211 212 annotate_types(expression, dialect=self.dialect) 213 if expression.type and expression.type.this not in exp.DataType.INTEGER_TYPES: 214 return exp.cast(expression, to=exp.DataType.Type.BIGINT) 215 return expression 216 217 218def _build_to_char(args: t.List) -> exp.TimeToStr: 219 fmt = seq_get(args, 1) 220 if isinstance(fmt, exp.Literal): 221 # We uppercase this to match Teradata's format mapping keys 222 fmt.set("this", fmt.this.upper()) 223 224 # We use "teradata" on purpose here, because the time formats are different in Presto. 225 # See https://prestodb.io/docs/current/functions/teradata.html?highlight=to_char#to_char 226 return build_formatted_time(exp.TimeToStr, "teradata")(args) 227 228 229def _date_delta_sql( 230 name: str, negate_interval: bool = False 231) -> t.Callable[[Presto.Generator, DATE_ADD_OR_SUB], str]: 232 def _delta_sql(self: Presto.Generator, expression: DATE_ADD_OR_SUB) -> str: 233 interval = _to_int(self, expression.expression) 234 return self.func( 235 name, 236 unit_to_str(expression), 237 interval * (-1) if negate_interval else interval, 238 expression.this, 239 ) 240 241 return _delta_sql 242 243 244class Presto(Dialect): 245 INDEX_OFFSET = 1 246 NULL_ORDERING = "nulls_are_last" 247 TIME_FORMAT = MySQL.TIME_FORMAT 248 TIME_MAPPING = MySQL.TIME_MAPPING 249 STRICT_STRING_CONCAT = True 250 SUPPORTS_SEMI_ANTI_JOIN = False 251 TYPED_DIVISION = True 252 TABLESAMPLE_SIZE_IS_PERCENT = True 253 LOG_BASE_FIRST: t.Optional[bool] = None 254 255 # https://github.com/trinodb/trino/issues/17 256 # https://github.com/trinodb/trino/issues/12289 257 # https://github.com/prestodb/presto/issues/2863 258 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 259 260 # The result of certain math functions in Presto/Trino is of type 261 # equal to the input type e.g: FLOOR(5.5/2) -> DECIMAL, FLOOR(5/2) -> BIGINT 262 ANNOTATORS = { 263 **Dialect.ANNOTATORS, 264 exp.Floor: lambda self, e: self._annotate_by_args(e, "this"), 265 exp.Ceil: lambda self, e: self._annotate_by_args(e, "this"), 266 exp.Mod: lambda self, e: self._annotate_by_args(e, "this", "expression"), 267 exp.Round: lambda self, e: self._annotate_by_args(e, "this"), 268 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 269 exp.Abs: lambda self, e: self._annotate_by_args(e, "this"), 270 exp.Rand: lambda self, e: self._annotate_by_args(e, "this") 271 if e.this 272 else self._set_type(e, exp.DataType.Type.DOUBLE), 273 } 274 275 class Tokenizer(tokens.Tokenizer): 276 UNICODE_STRINGS = [ 277 (prefix + q, q) 278 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 279 for prefix in ("U&", "u&") 280 ] 281 282 KEYWORDS = { 283 **tokens.Tokenizer.KEYWORDS, 284 "START": TokenType.BEGIN, 285 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 286 "ROW": TokenType.STRUCT, 287 "IPADDRESS": TokenType.IPADDRESS, 288 "IPPREFIX": TokenType.IPPREFIX, 289 "TDIGEST": TokenType.TDIGEST, 290 "HYPERLOGLOG": TokenType.HLLSKETCH, 291 } 292 KEYWORDS.pop("/*+") 293 KEYWORDS.pop("QUALIFY") 294 295 class Parser(parser.Parser): 296 VALUES_FOLLOWED_BY_PAREN = False 297 298 FUNCTIONS = { 299 **parser.Parser.FUNCTIONS, 300 "ARBITRARY": exp.AnyValue.from_arg_list, 301 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 302 "APPROX_PERCENTILE": _build_approx_percentile, 303 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 304 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 305 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 306 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 307 "CARDINALITY": exp.ArraySize.from_arg_list, 308 "CONTAINS": exp.ArrayContains.from_arg_list, 309 "DATE_ADD": lambda args: exp.DateAdd( 310 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 311 ), 312 "DATE_DIFF": lambda args: exp.DateDiff( 313 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 314 ), 315 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 316 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 317 "DATE_TRUNC": date_trunc_to_time, 318 "ELEMENT_AT": lambda args: exp.Bracket( 319 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 320 ), 321 "FROM_HEX": exp.Unhex.from_arg_list, 322 "FROM_UNIXTIME": _build_from_unixtime, 323 "FROM_UTF8": lambda args: exp.Decode( 324 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 325 ), 326 "NOW": exp.CurrentTimestamp.from_arg_list, 327 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 328 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 329 ), 330 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 331 this=seq_get(args, 0), 332 expression=seq_get(args, 1), 333 replacement=seq_get(args, 2) or exp.Literal.string(""), 334 ), 335 "ROW": exp.Struct.from_arg_list, 336 "SEQUENCE": exp.GenerateSeries.from_arg_list, 337 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 338 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 339 "STRPOS": lambda args: exp.StrPosition( 340 this=seq_get(args, 0), substr=seq_get(args, 1), instance=seq_get(args, 2) 341 ), 342 "TO_CHAR": _build_to_char, 343 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 344 "TO_UTF8": lambda args: exp.Encode( 345 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 346 ), 347 "MD5": exp.MD5Digest.from_arg_list, 348 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 349 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 350 } 351 352 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 353 FUNCTION_PARSERS.pop("TRIM") 354 355 class Generator(generator.Generator): 356 INTERVAL_ALLOWS_PLURAL_FORM = False 357 JOIN_HINTS = False 358 TABLE_HINTS = False 359 QUERY_HINTS = False 360 IS_BOOL_ALLOWED = False 361 TZ_TO_WITH_TIME_ZONE = True 362 NVL2_SUPPORTED = False 363 STRUCT_DELIMITER = ("(", ")") 364 LIMIT_ONLY_LITERALS = True 365 SUPPORTS_SINGLE_ARG_CONCAT = False 366 LIKE_PROPERTY_INSIDE_SCHEMA = True 367 MULTI_ARG_DISTINCT = False 368 SUPPORTS_TO_NUMBER = False 369 HEX_FUNC = "TO_HEX" 370 PARSE_JSON_NAME = "JSON_PARSE" 371 PAD_FILL_PATTERN_IS_REQUIRED = True 372 373 PROPERTIES_LOCATION = { 374 **generator.Generator.PROPERTIES_LOCATION, 375 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 376 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 377 } 378 379 TYPE_MAPPING = { 380 **generator.Generator.TYPE_MAPPING, 381 exp.DataType.Type.INT: "INTEGER", 382 exp.DataType.Type.FLOAT: "REAL", 383 exp.DataType.Type.BINARY: "VARBINARY", 384 exp.DataType.Type.TEXT: "VARCHAR", 385 exp.DataType.Type.TIMETZ: "TIME", 386 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 387 exp.DataType.Type.STRUCT: "ROW", 388 exp.DataType.Type.DATETIME: "TIMESTAMP", 389 exp.DataType.Type.DATETIME64: "TIMESTAMP", 390 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 391 } 392 393 TRANSFORMS = { 394 **generator.Generator.TRANSFORMS, 395 exp.AnyValue: rename_func("ARBITRARY"), 396 exp.ApproxDistinct: lambda self, e: self.func( 397 "APPROX_DISTINCT", e.this, e.args.get("accuracy") 398 ), 399 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 400 exp.ArgMax: rename_func("MAX_BY"), 401 exp.ArgMin: rename_func("MIN_BY"), 402 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 403 exp.ArrayAny: rename_func("ANY_MATCH"), 404 exp.ArrayConcat: rename_func("CONCAT"), 405 exp.ArrayContains: rename_func("CONTAINS"), 406 exp.ArraySize: rename_func("CARDINALITY"), 407 exp.ArrayToString: rename_func("ARRAY_JOIN"), 408 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 409 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 410 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 411 exp.BitwiseLeftShift: lambda self, e: self.func( 412 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 413 ), 414 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 415 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 416 exp.BitwiseRightShift: lambda self, e: self.func( 417 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 418 ), 419 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 420 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 421 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 422 exp.DateAdd: _date_delta_sql("DATE_ADD"), 423 exp.DateDiff: lambda self, e: self.func( 424 "DATE_DIFF", unit_to_str(e), e.expression, e.this 425 ), 426 exp.DateStrToDate: datestrtodate_sql, 427 exp.DateToDi: lambda self, 428 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 429 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 430 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 431 exp.DiToDate: lambda self, 432 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 433 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 434 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 435 exp.First: _first_last_sql, 436 exp.FirstValue: _first_last_sql, 437 exp.FromTimeZone: lambda self, 438 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 439 exp.GenerateSeries: sequence_sql, 440 exp.Group: transforms.preprocess([transforms.unalias_group]), 441 exp.GroupConcat: lambda self, e: self.func( 442 "ARRAY_JOIN", self.func("ARRAY_AGG", e.this), e.args.get("separator") 443 ), 444 exp.If: if_sql(), 445 exp.ILike: no_ilike_sql, 446 exp.Initcap: _initcap_sql, 447 exp.JSONExtract: _jsonextract_sql, 448 exp.Last: _first_last_sql, 449 exp.LastValue: _first_last_sql, 450 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 451 exp.Lateral: _explode_to_unnest_sql, 452 exp.Left: left_to_substring_sql, 453 exp.Levenshtein: rename_func("LEVENSHTEIN_DISTANCE"), 454 exp.LogicalAnd: rename_func("BOOL_AND"), 455 exp.LogicalOr: rename_func("BOOL_OR"), 456 exp.Pivot: no_pivot_sql, 457 exp.Quantile: _quantile_sql, 458 exp.RegexpExtract: regexp_extract_sql, 459 exp.Right: right_to_substring_sql, 460 exp.SafeDivide: no_safe_divide_sql, 461 exp.Schema: _schema_sql, 462 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 463 exp.Select: transforms.preprocess( 464 [ 465 transforms.eliminate_qualify, 466 transforms.eliminate_distinct_on, 467 transforms.explode_to_unnest(1), 468 transforms.eliminate_semi_and_anti_joins, 469 ] 470 ), 471 exp.SortArray: _no_sort_array, 472 exp.StrPosition: lambda self, e: str_position_sql(self, e, generate_instance=True), 473 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 474 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 475 exp.StrToTime: _str_to_time_sql, 476 exp.StructExtract: struct_extract_sql, 477 exp.Table: transforms.preprocess([_unnest_sequence]), 478 exp.Timestamp: no_timestamp_sql, 479 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 480 exp.TimestampTrunc: timestamptrunc_sql(), 481 exp.TimeStrToDate: timestrtotime_sql, 482 exp.TimeStrToTime: timestrtotime_sql, 483 exp.TimeStrToUnix: lambda self, e: self.func( 484 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 485 ), 486 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 487 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 488 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 489 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 490 exp.TsOrDiToDi: lambda self, 491 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 492 exp.TsOrDsAdd: _ts_or_ds_add_sql, 493 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 494 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 495 exp.Unhex: rename_func("FROM_HEX"), 496 exp.UnixToStr: lambda self, 497 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 498 exp.UnixToTime: _unix_to_time_sql, 499 exp.UnixToTimeStr: lambda self, 500 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 501 exp.VariancePop: rename_func("VAR_POP"), 502 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 503 exp.WithinGroup: transforms.preprocess( 504 [transforms.remove_within_group_for_percentiles] 505 ), 506 exp.Xor: bool_xor_sql, 507 exp.MD5Digest: rename_func("MD5"), 508 exp.SHA: rename_func("SHA1"), 509 exp.SHA2: sha256_sql, 510 } 511 512 RESERVED_KEYWORDS = { 513 "alter", 514 "and", 515 "as", 516 "between", 517 "by", 518 "case", 519 "cast", 520 "constraint", 521 "create", 522 "cross", 523 "current_time", 524 "current_timestamp", 525 "deallocate", 526 "delete", 527 "describe", 528 "distinct", 529 "drop", 530 "else", 531 "end", 532 "escape", 533 "except", 534 "execute", 535 "exists", 536 "extract", 537 "false", 538 "for", 539 "from", 540 "full", 541 "group", 542 "having", 543 "in", 544 "inner", 545 "insert", 546 "intersect", 547 "into", 548 "is", 549 "join", 550 "left", 551 "like", 552 "natural", 553 "not", 554 "null", 555 "on", 556 "or", 557 "order", 558 "outer", 559 "prepare", 560 "right", 561 "select", 562 "table", 563 "then", 564 "true", 565 "union", 566 "using", 567 "values", 568 "when", 569 "where", 570 "with", 571 } 572 573 def md5_sql(self, expression: exp.MD5) -> str: 574 this = expression.this 575 576 if not this.type: 577 from sqlglot.optimizer.annotate_types import annotate_types 578 579 this = annotate_types(this) 580 581 if this.is_type(*exp.DataType.TEXT_TYPES): 582 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 583 584 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 585 586 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 587 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 588 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 589 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 590 # which seems to be using the same time mapping as Hive, as per: 591 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 592 this = expression.this 593 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 594 value_as_timestamp = ( 595 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 596 ) 597 598 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 599 600 formatted_value = self.func( 601 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 602 ) 603 parse_with_tz = self.func( 604 "PARSE_DATETIME", 605 formatted_value, 606 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 607 ) 608 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 609 return self.func("TO_UNIXTIME", coalesced) 610 611 def bracket_sql(self, expression: exp.Bracket) -> str: 612 if expression.args.get("safe"): 613 return self.func( 614 "ELEMENT_AT", 615 expression.this, 616 seq_get( 617 apply_index_offset( 618 expression.this, 619 expression.expressions, 620 1 - expression.args.get("offset", 0), 621 ), 622 0, 623 ), 624 ) 625 return super().bracket_sql(expression) 626 627 def struct_sql(self, expression: exp.Struct) -> str: 628 from sqlglot.optimizer.annotate_types import annotate_types 629 630 expression = annotate_types(expression) 631 values: t.List[str] = [] 632 schema: t.List[str] = [] 633 unknown_type = False 634 635 for e in expression.expressions: 636 if isinstance(e, exp.PropertyEQ): 637 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 638 unknown_type = True 639 else: 640 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 641 values.append(self.sql(e, "expression")) 642 else: 643 values.append(self.sql(e)) 644 645 size = len(expression.expressions) 646 647 if not size or len(schema) != size: 648 if unknown_type: 649 self.unsupported( 650 "Cannot convert untyped key-value definitions (try annotate_types)." 651 ) 652 return self.func("ROW", *values) 653 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 654 655 def interval_sql(self, expression: exp.Interval) -> str: 656 if expression.this and expression.text("unit").upper().startswith("WEEK"): 657 return f"({expression.this.name} * INTERVAL '7' DAY)" 658 return super().interval_sql(expression) 659 660 def transaction_sql(self, expression: exp.Transaction) -> str: 661 modes = expression.args.get("modes") 662 modes = f" {', '.join(modes)}" if modes else "" 663 return f"START TRANSACTION{modes}" 664 665 def offset_limit_modifiers( 666 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 667 ) -> t.List[str]: 668 return [ 669 self.sql(expression, "offset"), 670 self.sql(limit), 671 ] 672 673 def create_sql(self, expression: exp.Create) -> str: 674 """ 675 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 676 so we need to remove them 677 """ 678 kind = expression.args["kind"] 679 schema = expression.this 680 if kind == "VIEW" and schema.expressions: 681 expression.this.set("expressions", None) 682 return super().create_sql(expression) 683 684 def delete_sql(self, expression: exp.Delete) -> str: 685 """ 686 Presto only supports DELETE FROM for a single table without an alias, so we need 687 to remove the unnecessary parts. If the original DELETE statement contains more 688 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 689 """ 690 tables = expression.args.get("tables") or [expression.this] 691 if len(tables) > 1: 692 return super().delete_sql(expression) 693 694 table = tables[0] 695 expression.set("this", table) 696 expression.set("tables", None) 697 698 if isinstance(table, exp.Table): 699 table_alias = table.args.get("alias") 700 if table_alias: 701 table_alias.pop() 702 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 703 704 return super().delete_sql(expression)
245class Presto(Dialect): 246 INDEX_OFFSET = 1 247 NULL_ORDERING = "nulls_are_last" 248 TIME_FORMAT = MySQL.TIME_FORMAT 249 TIME_MAPPING = MySQL.TIME_MAPPING 250 STRICT_STRING_CONCAT = True 251 SUPPORTS_SEMI_ANTI_JOIN = False 252 TYPED_DIVISION = True 253 TABLESAMPLE_SIZE_IS_PERCENT = True 254 LOG_BASE_FIRST: t.Optional[bool] = None 255 256 # https://github.com/trinodb/trino/issues/17 257 # https://github.com/trinodb/trino/issues/12289 258 # https://github.com/prestodb/presto/issues/2863 259 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 260 261 # The result of certain math functions in Presto/Trino is of type 262 # equal to the input type e.g: FLOOR(5.5/2) -> DECIMAL, FLOOR(5/2) -> BIGINT 263 ANNOTATORS = { 264 **Dialect.ANNOTATORS, 265 exp.Floor: lambda self, e: self._annotate_by_args(e, "this"), 266 exp.Ceil: lambda self, e: self._annotate_by_args(e, "this"), 267 exp.Mod: lambda self, e: self._annotate_by_args(e, "this", "expression"), 268 exp.Round: lambda self, e: self._annotate_by_args(e, "this"), 269 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 270 exp.Abs: lambda self, e: self._annotate_by_args(e, "this"), 271 exp.Rand: lambda self, e: self._annotate_by_args(e, "this") 272 if e.this 273 else self._set_type(e, exp.DataType.Type.DOUBLE), 274 } 275 276 class Tokenizer(tokens.Tokenizer): 277 UNICODE_STRINGS = [ 278 (prefix + q, q) 279 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 280 for prefix in ("U&", "u&") 281 ] 282 283 KEYWORDS = { 284 **tokens.Tokenizer.KEYWORDS, 285 "START": TokenType.BEGIN, 286 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 287 "ROW": TokenType.STRUCT, 288 "IPADDRESS": TokenType.IPADDRESS, 289 "IPPREFIX": TokenType.IPPREFIX, 290 "TDIGEST": TokenType.TDIGEST, 291 "HYPERLOGLOG": TokenType.HLLSKETCH, 292 } 293 KEYWORDS.pop("/*+") 294 KEYWORDS.pop("QUALIFY") 295 296 class Parser(parser.Parser): 297 VALUES_FOLLOWED_BY_PAREN = False 298 299 FUNCTIONS = { 300 **parser.Parser.FUNCTIONS, 301 "ARBITRARY": exp.AnyValue.from_arg_list, 302 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 303 "APPROX_PERCENTILE": _build_approx_percentile, 304 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 305 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 306 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 307 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 308 "CARDINALITY": exp.ArraySize.from_arg_list, 309 "CONTAINS": exp.ArrayContains.from_arg_list, 310 "DATE_ADD": lambda args: exp.DateAdd( 311 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 312 ), 313 "DATE_DIFF": lambda args: exp.DateDiff( 314 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 315 ), 316 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 317 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 318 "DATE_TRUNC": date_trunc_to_time, 319 "ELEMENT_AT": lambda args: exp.Bracket( 320 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 321 ), 322 "FROM_HEX": exp.Unhex.from_arg_list, 323 "FROM_UNIXTIME": _build_from_unixtime, 324 "FROM_UTF8": lambda args: exp.Decode( 325 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 326 ), 327 "NOW": exp.CurrentTimestamp.from_arg_list, 328 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 329 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 330 ), 331 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 332 this=seq_get(args, 0), 333 expression=seq_get(args, 1), 334 replacement=seq_get(args, 2) or exp.Literal.string(""), 335 ), 336 "ROW": exp.Struct.from_arg_list, 337 "SEQUENCE": exp.GenerateSeries.from_arg_list, 338 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 339 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 340 "STRPOS": lambda args: exp.StrPosition( 341 this=seq_get(args, 0), substr=seq_get(args, 1), instance=seq_get(args, 2) 342 ), 343 "TO_CHAR": _build_to_char, 344 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 345 "TO_UTF8": lambda args: exp.Encode( 346 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 347 ), 348 "MD5": exp.MD5Digest.from_arg_list, 349 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 350 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 351 } 352 353 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 354 FUNCTION_PARSERS.pop("TRIM") 355 356 class Generator(generator.Generator): 357 INTERVAL_ALLOWS_PLURAL_FORM = False 358 JOIN_HINTS = False 359 TABLE_HINTS = False 360 QUERY_HINTS = False 361 IS_BOOL_ALLOWED = False 362 TZ_TO_WITH_TIME_ZONE = True 363 NVL2_SUPPORTED = False 364 STRUCT_DELIMITER = ("(", ")") 365 LIMIT_ONLY_LITERALS = True 366 SUPPORTS_SINGLE_ARG_CONCAT = False 367 LIKE_PROPERTY_INSIDE_SCHEMA = True 368 MULTI_ARG_DISTINCT = False 369 SUPPORTS_TO_NUMBER = False 370 HEX_FUNC = "TO_HEX" 371 PARSE_JSON_NAME = "JSON_PARSE" 372 PAD_FILL_PATTERN_IS_REQUIRED = True 373 374 PROPERTIES_LOCATION = { 375 **generator.Generator.PROPERTIES_LOCATION, 376 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 377 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 378 } 379 380 TYPE_MAPPING = { 381 **generator.Generator.TYPE_MAPPING, 382 exp.DataType.Type.INT: "INTEGER", 383 exp.DataType.Type.FLOAT: "REAL", 384 exp.DataType.Type.BINARY: "VARBINARY", 385 exp.DataType.Type.TEXT: "VARCHAR", 386 exp.DataType.Type.TIMETZ: "TIME", 387 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 388 exp.DataType.Type.STRUCT: "ROW", 389 exp.DataType.Type.DATETIME: "TIMESTAMP", 390 exp.DataType.Type.DATETIME64: "TIMESTAMP", 391 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 392 } 393 394 TRANSFORMS = { 395 **generator.Generator.TRANSFORMS, 396 exp.AnyValue: rename_func("ARBITRARY"), 397 exp.ApproxDistinct: lambda self, e: self.func( 398 "APPROX_DISTINCT", e.this, e.args.get("accuracy") 399 ), 400 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 401 exp.ArgMax: rename_func("MAX_BY"), 402 exp.ArgMin: rename_func("MIN_BY"), 403 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 404 exp.ArrayAny: rename_func("ANY_MATCH"), 405 exp.ArrayConcat: rename_func("CONCAT"), 406 exp.ArrayContains: rename_func("CONTAINS"), 407 exp.ArraySize: rename_func("CARDINALITY"), 408 exp.ArrayToString: rename_func("ARRAY_JOIN"), 409 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 410 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 411 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 412 exp.BitwiseLeftShift: lambda self, e: self.func( 413 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 414 ), 415 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 416 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 417 exp.BitwiseRightShift: lambda self, e: self.func( 418 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 419 ), 420 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 421 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 422 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 423 exp.DateAdd: _date_delta_sql("DATE_ADD"), 424 exp.DateDiff: lambda self, e: self.func( 425 "DATE_DIFF", unit_to_str(e), e.expression, e.this 426 ), 427 exp.DateStrToDate: datestrtodate_sql, 428 exp.DateToDi: lambda self, 429 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 430 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 431 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 432 exp.DiToDate: lambda self, 433 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 434 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 435 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 436 exp.First: _first_last_sql, 437 exp.FirstValue: _first_last_sql, 438 exp.FromTimeZone: lambda self, 439 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 440 exp.GenerateSeries: sequence_sql, 441 exp.Group: transforms.preprocess([transforms.unalias_group]), 442 exp.GroupConcat: lambda self, e: self.func( 443 "ARRAY_JOIN", self.func("ARRAY_AGG", e.this), e.args.get("separator") 444 ), 445 exp.If: if_sql(), 446 exp.ILike: no_ilike_sql, 447 exp.Initcap: _initcap_sql, 448 exp.JSONExtract: _jsonextract_sql, 449 exp.Last: _first_last_sql, 450 exp.LastValue: _first_last_sql, 451 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 452 exp.Lateral: _explode_to_unnest_sql, 453 exp.Left: left_to_substring_sql, 454 exp.Levenshtein: rename_func("LEVENSHTEIN_DISTANCE"), 455 exp.LogicalAnd: rename_func("BOOL_AND"), 456 exp.LogicalOr: rename_func("BOOL_OR"), 457 exp.Pivot: no_pivot_sql, 458 exp.Quantile: _quantile_sql, 459 exp.RegexpExtract: regexp_extract_sql, 460 exp.Right: right_to_substring_sql, 461 exp.SafeDivide: no_safe_divide_sql, 462 exp.Schema: _schema_sql, 463 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 464 exp.Select: transforms.preprocess( 465 [ 466 transforms.eliminate_qualify, 467 transforms.eliminate_distinct_on, 468 transforms.explode_to_unnest(1), 469 transforms.eliminate_semi_and_anti_joins, 470 ] 471 ), 472 exp.SortArray: _no_sort_array, 473 exp.StrPosition: lambda self, e: str_position_sql(self, e, generate_instance=True), 474 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 475 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 476 exp.StrToTime: _str_to_time_sql, 477 exp.StructExtract: struct_extract_sql, 478 exp.Table: transforms.preprocess([_unnest_sequence]), 479 exp.Timestamp: no_timestamp_sql, 480 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 481 exp.TimestampTrunc: timestamptrunc_sql(), 482 exp.TimeStrToDate: timestrtotime_sql, 483 exp.TimeStrToTime: timestrtotime_sql, 484 exp.TimeStrToUnix: lambda self, e: self.func( 485 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 486 ), 487 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 488 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 489 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 490 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 491 exp.TsOrDiToDi: lambda self, 492 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 493 exp.TsOrDsAdd: _ts_or_ds_add_sql, 494 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 495 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 496 exp.Unhex: rename_func("FROM_HEX"), 497 exp.UnixToStr: lambda self, 498 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 499 exp.UnixToTime: _unix_to_time_sql, 500 exp.UnixToTimeStr: lambda self, 501 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 502 exp.VariancePop: rename_func("VAR_POP"), 503 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 504 exp.WithinGroup: transforms.preprocess( 505 [transforms.remove_within_group_for_percentiles] 506 ), 507 exp.Xor: bool_xor_sql, 508 exp.MD5Digest: rename_func("MD5"), 509 exp.SHA: rename_func("SHA1"), 510 exp.SHA2: sha256_sql, 511 } 512 513 RESERVED_KEYWORDS = { 514 "alter", 515 "and", 516 "as", 517 "between", 518 "by", 519 "case", 520 "cast", 521 "constraint", 522 "create", 523 "cross", 524 "current_time", 525 "current_timestamp", 526 "deallocate", 527 "delete", 528 "describe", 529 "distinct", 530 "drop", 531 "else", 532 "end", 533 "escape", 534 "except", 535 "execute", 536 "exists", 537 "extract", 538 "false", 539 "for", 540 "from", 541 "full", 542 "group", 543 "having", 544 "in", 545 "inner", 546 "insert", 547 "intersect", 548 "into", 549 "is", 550 "join", 551 "left", 552 "like", 553 "natural", 554 "not", 555 "null", 556 "on", 557 "or", 558 "order", 559 "outer", 560 "prepare", 561 "right", 562 "select", 563 "table", 564 "then", 565 "true", 566 "union", 567 "using", 568 "values", 569 "when", 570 "where", 571 "with", 572 } 573 574 def md5_sql(self, expression: exp.MD5) -> str: 575 this = expression.this 576 577 if not this.type: 578 from sqlglot.optimizer.annotate_types import annotate_types 579 580 this = annotate_types(this) 581 582 if this.is_type(*exp.DataType.TEXT_TYPES): 583 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 584 585 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 586 587 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 588 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 589 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 590 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 591 # which seems to be using the same time mapping as Hive, as per: 592 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 593 this = expression.this 594 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 595 value_as_timestamp = ( 596 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 597 ) 598 599 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 600 601 formatted_value = self.func( 602 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 603 ) 604 parse_with_tz = self.func( 605 "PARSE_DATETIME", 606 formatted_value, 607 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 608 ) 609 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 610 return self.func("TO_UNIXTIME", coalesced) 611 612 def bracket_sql(self, expression: exp.Bracket) -> str: 613 if expression.args.get("safe"): 614 return self.func( 615 "ELEMENT_AT", 616 expression.this, 617 seq_get( 618 apply_index_offset( 619 expression.this, 620 expression.expressions, 621 1 - expression.args.get("offset", 0), 622 ), 623 0, 624 ), 625 ) 626 return super().bracket_sql(expression) 627 628 def struct_sql(self, expression: exp.Struct) -> str: 629 from sqlglot.optimizer.annotate_types import annotate_types 630 631 expression = annotate_types(expression) 632 values: t.List[str] = [] 633 schema: t.List[str] = [] 634 unknown_type = False 635 636 for e in expression.expressions: 637 if isinstance(e, exp.PropertyEQ): 638 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 639 unknown_type = True 640 else: 641 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 642 values.append(self.sql(e, "expression")) 643 else: 644 values.append(self.sql(e)) 645 646 size = len(expression.expressions) 647 648 if not size or len(schema) != size: 649 if unknown_type: 650 self.unsupported( 651 "Cannot convert untyped key-value definitions (try annotate_types)." 652 ) 653 return self.func("ROW", *values) 654 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 655 656 def interval_sql(self, expression: exp.Interval) -> str: 657 if expression.this and expression.text("unit").upper().startswith("WEEK"): 658 return f"({expression.this.name} * INTERVAL '7' DAY)" 659 return super().interval_sql(expression) 660 661 def transaction_sql(self, expression: exp.Transaction) -> str: 662 modes = expression.args.get("modes") 663 modes = f" {', '.join(modes)}" if modes else "" 664 return f"START TRANSACTION{modes}" 665 666 def offset_limit_modifiers( 667 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 668 ) -> t.List[str]: 669 return [ 670 self.sql(expression, "offset"), 671 self.sql(limit), 672 ] 673 674 def create_sql(self, expression: exp.Create) -> str: 675 """ 676 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 677 so we need to remove them 678 """ 679 kind = expression.args["kind"] 680 schema = expression.this 681 if kind == "VIEW" and schema.expressions: 682 expression.this.set("expressions", None) 683 return super().create_sql(expression) 684 685 def delete_sql(self, expression: exp.Delete) -> str: 686 """ 687 Presto only supports DELETE FROM for a single table without an alias, so we need 688 to remove the unnecessary parts. If the original DELETE statement contains more 689 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 690 """ 691 tables = expression.args.get("tables") or [expression.this] 692 if len(tables) > 1: 693 return super().delete_sql(expression) 694 695 table = tables[0] 696 expression.set("this", table) 697 expression.set("tables", None) 698 699 if isinstance(table, exp.Table): 700 table_alias = table.args.get("alias") 701 if table_alias: 702 table_alias.pop() 703 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 704 705 return super().delete_sql(expression)
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Associates this dialect's time formats with their equivalent Python strftime
formats.
Whether the behavior of a / b
depends on the types of a
and b
.
False means a / b
is always float division.
True means a / b
is integer division if both a
and b
are integers.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Specifies the strategy according to which identifiers should be normalized.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- SUPPORTS_USER_DEFINED_TYPES
- COPY_PARAMS_ARE_CSV
- NORMALIZE_FUNCTIONS
- SAFE_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- UNESCAPED_SEQUENCES
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
276 class Tokenizer(tokens.Tokenizer): 277 UNICODE_STRINGS = [ 278 (prefix + q, q) 279 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 280 for prefix in ("U&", "u&") 281 ] 282 283 KEYWORDS = { 284 **tokens.Tokenizer.KEYWORDS, 285 "START": TokenType.BEGIN, 286 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 287 "ROW": TokenType.STRUCT, 288 "IPADDRESS": TokenType.IPADDRESS, 289 "IPPREFIX": TokenType.IPPREFIX, 290 "TDIGEST": TokenType.TDIGEST, 291 "HYPERLOGLOG": TokenType.HLLSKETCH, 292 } 293 KEYWORDS.pop("/*+") 294 KEYWORDS.pop("QUALIFY")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- IDENTIFIERS
- IDENTIFIER_ESCAPES
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
296 class Parser(parser.Parser): 297 VALUES_FOLLOWED_BY_PAREN = False 298 299 FUNCTIONS = { 300 **parser.Parser.FUNCTIONS, 301 "ARBITRARY": exp.AnyValue.from_arg_list, 302 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 303 "APPROX_PERCENTILE": _build_approx_percentile, 304 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 305 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 306 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 307 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 308 "CARDINALITY": exp.ArraySize.from_arg_list, 309 "CONTAINS": exp.ArrayContains.from_arg_list, 310 "DATE_ADD": lambda args: exp.DateAdd( 311 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 312 ), 313 "DATE_DIFF": lambda args: exp.DateDiff( 314 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 315 ), 316 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 317 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 318 "DATE_TRUNC": date_trunc_to_time, 319 "ELEMENT_AT": lambda args: exp.Bracket( 320 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 321 ), 322 "FROM_HEX": exp.Unhex.from_arg_list, 323 "FROM_UNIXTIME": _build_from_unixtime, 324 "FROM_UTF8": lambda args: exp.Decode( 325 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 326 ), 327 "NOW": exp.CurrentTimestamp.from_arg_list, 328 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 329 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 330 ), 331 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 332 this=seq_get(args, 0), 333 expression=seq_get(args, 1), 334 replacement=seq_get(args, 2) or exp.Literal.string(""), 335 ), 336 "ROW": exp.Struct.from_arg_list, 337 "SEQUENCE": exp.GenerateSeries.from_arg_list, 338 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 339 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 340 "STRPOS": lambda args: exp.StrPosition( 341 this=seq_get(args, 0), substr=seq_get(args, 1), instance=seq_get(args, 2) 342 ), 343 "TO_CHAR": _build_to_char, 344 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 345 "TO_UTF8": lambda args: exp.Encode( 346 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 347 ), 348 "MD5": exp.MD5Digest.from_arg_list, 349 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 350 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 351 } 352 353 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 354 FUNCTION_PARSERS.pop("TRIM")
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
356 class Generator(generator.Generator): 357 INTERVAL_ALLOWS_PLURAL_FORM = False 358 JOIN_HINTS = False 359 TABLE_HINTS = False 360 QUERY_HINTS = False 361 IS_BOOL_ALLOWED = False 362 TZ_TO_WITH_TIME_ZONE = True 363 NVL2_SUPPORTED = False 364 STRUCT_DELIMITER = ("(", ")") 365 LIMIT_ONLY_LITERALS = True 366 SUPPORTS_SINGLE_ARG_CONCAT = False 367 LIKE_PROPERTY_INSIDE_SCHEMA = True 368 MULTI_ARG_DISTINCT = False 369 SUPPORTS_TO_NUMBER = False 370 HEX_FUNC = "TO_HEX" 371 PARSE_JSON_NAME = "JSON_PARSE" 372 PAD_FILL_PATTERN_IS_REQUIRED = True 373 374 PROPERTIES_LOCATION = { 375 **generator.Generator.PROPERTIES_LOCATION, 376 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 377 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 378 } 379 380 TYPE_MAPPING = { 381 **generator.Generator.TYPE_MAPPING, 382 exp.DataType.Type.INT: "INTEGER", 383 exp.DataType.Type.FLOAT: "REAL", 384 exp.DataType.Type.BINARY: "VARBINARY", 385 exp.DataType.Type.TEXT: "VARCHAR", 386 exp.DataType.Type.TIMETZ: "TIME", 387 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 388 exp.DataType.Type.STRUCT: "ROW", 389 exp.DataType.Type.DATETIME: "TIMESTAMP", 390 exp.DataType.Type.DATETIME64: "TIMESTAMP", 391 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 392 } 393 394 TRANSFORMS = { 395 **generator.Generator.TRANSFORMS, 396 exp.AnyValue: rename_func("ARBITRARY"), 397 exp.ApproxDistinct: lambda self, e: self.func( 398 "APPROX_DISTINCT", e.this, e.args.get("accuracy") 399 ), 400 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 401 exp.ArgMax: rename_func("MAX_BY"), 402 exp.ArgMin: rename_func("MIN_BY"), 403 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 404 exp.ArrayAny: rename_func("ANY_MATCH"), 405 exp.ArrayConcat: rename_func("CONCAT"), 406 exp.ArrayContains: rename_func("CONTAINS"), 407 exp.ArraySize: rename_func("CARDINALITY"), 408 exp.ArrayToString: rename_func("ARRAY_JOIN"), 409 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 410 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 411 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 412 exp.BitwiseLeftShift: lambda self, e: self.func( 413 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 414 ), 415 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 416 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 417 exp.BitwiseRightShift: lambda self, e: self.func( 418 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 419 ), 420 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 421 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 422 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 423 exp.DateAdd: _date_delta_sql("DATE_ADD"), 424 exp.DateDiff: lambda self, e: self.func( 425 "DATE_DIFF", unit_to_str(e), e.expression, e.this 426 ), 427 exp.DateStrToDate: datestrtodate_sql, 428 exp.DateToDi: lambda self, 429 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 430 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 431 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 432 exp.DiToDate: lambda self, 433 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 434 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 435 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 436 exp.First: _first_last_sql, 437 exp.FirstValue: _first_last_sql, 438 exp.FromTimeZone: lambda self, 439 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 440 exp.GenerateSeries: sequence_sql, 441 exp.Group: transforms.preprocess([transforms.unalias_group]), 442 exp.GroupConcat: lambda self, e: self.func( 443 "ARRAY_JOIN", self.func("ARRAY_AGG", e.this), e.args.get("separator") 444 ), 445 exp.If: if_sql(), 446 exp.ILike: no_ilike_sql, 447 exp.Initcap: _initcap_sql, 448 exp.JSONExtract: _jsonextract_sql, 449 exp.Last: _first_last_sql, 450 exp.LastValue: _first_last_sql, 451 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 452 exp.Lateral: _explode_to_unnest_sql, 453 exp.Left: left_to_substring_sql, 454 exp.Levenshtein: rename_func("LEVENSHTEIN_DISTANCE"), 455 exp.LogicalAnd: rename_func("BOOL_AND"), 456 exp.LogicalOr: rename_func("BOOL_OR"), 457 exp.Pivot: no_pivot_sql, 458 exp.Quantile: _quantile_sql, 459 exp.RegexpExtract: regexp_extract_sql, 460 exp.Right: right_to_substring_sql, 461 exp.SafeDivide: no_safe_divide_sql, 462 exp.Schema: _schema_sql, 463 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 464 exp.Select: transforms.preprocess( 465 [ 466 transforms.eliminate_qualify, 467 transforms.eliminate_distinct_on, 468 transforms.explode_to_unnest(1), 469 transforms.eliminate_semi_and_anti_joins, 470 ] 471 ), 472 exp.SortArray: _no_sort_array, 473 exp.StrPosition: lambda self, e: str_position_sql(self, e, generate_instance=True), 474 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 475 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 476 exp.StrToTime: _str_to_time_sql, 477 exp.StructExtract: struct_extract_sql, 478 exp.Table: transforms.preprocess([_unnest_sequence]), 479 exp.Timestamp: no_timestamp_sql, 480 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 481 exp.TimestampTrunc: timestamptrunc_sql(), 482 exp.TimeStrToDate: timestrtotime_sql, 483 exp.TimeStrToTime: timestrtotime_sql, 484 exp.TimeStrToUnix: lambda self, e: self.func( 485 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 486 ), 487 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 488 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 489 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 490 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 491 exp.TsOrDiToDi: lambda self, 492 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 493 exp.TsOrDsAdd: _ts_or_ds_add_sql, 494 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 495 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 496 exp.Unhex: rename_func("FROM_HEX"), 497 exp.UnixToStr: lambda self, 498 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 499 exp.UnixToTime: _unix_to_time_sql, 500 exp.UnixToTimeStr: lambda self, 501 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 502 exp.VariancePop: rename_func("VAR_POP"), 503 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 504 exp.WithinGroup: transforms.preprocess( 505 [transforms.remove_within_group_for_percentiles] 506 ), 507 exp.Xor: bool_xor_sql, 508 exp.MD5Digest: rename_func("MD5"), 509 exp.SHA: rename_func("SHA1"), 510 exp.SHA2: sha256_sql, 511 } 512 513 RESERVED_KEYWORDS = { 514 "alter", 515 "and", 516 "as", 517 "between", 518 "by", 519 "case", 520 "cast", 521 "constraint", 522 "create", 523 "cross", 524 "current_time", 525 "current_timestamp", 526 "deallocate", 527 "delete", 528 "describe", 529 "distinct", 530 "drop", 531 "else", 532 "end", 533 "escape", 534 "except", 535 "execute", 536 "exists", 537 "extract", 538 "false", 539 "for", 540 "from", 541 "full", 542 "group", 543 "having", 544 "in", 545 "inner", 546 "insert", 547 "intersect", 548 "into", 549 "is", 550 "join", 551 "left", 552 "like", 553 "natural", 554 "not", 555 "null", 556 "on", 557 "or", 558 "order", 559 "outer", 560 "prepare", 561 "right", 562 "select", 563 "table", 564 "then", 565 "true", 566 "union", 567 "using", 568 "values", 569 "when", 570 "where", 571 "with", 572 } 573 574 def md5_sql(self, expression: exp.MD5) -> str: 575 this = expression.this 576 577 if not this.type: 578 from sqlglot.optimizer.annotate_types import annotate_types 579 580 this = annotate_types(this) 581 582 if this.is_type(*exp.DataType.TEXT_TYPES): 583 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 584 585 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 586 587 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 588 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 589 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 590 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 591 # which seems to be using the same time mapping as Hive, as per: 592 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 593 this = expression.this 594 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 595 value_as_timestamp = ( 596 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 597 ) 598 599 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 600 601 formatted_value = self.func( 602 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 603 ) 604 parse_with_tz = self.func( 605 "PARSE_DATETIME", 606 formatted_value, 607 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 608 ) 609 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 610 return self.func("TO_UNIXTIME", coalesced) 611 612 def bracket_sql(self, expression: exp.Bracket) -> str: 613 if expression.args.get("safe"): 614 return self.func( 615 "ELEMENT_AT", 616 expression.this, 617 seq_get( 618 apply_index_offset( 619 expression.this, 620 expression.expressions, 621 1 - expression.args.get("offset", 0), 622 ), 623 0, 624 ), 625 ) 626 return super().bracket_sql(expression) 627 628 def struct_sql(self, expression: exp.Struct) -> str: 629 from sqlglot.optimizer.annotate_types import annotate_types 630 631 expression = annotate_types(expression) 632 values: t.List[str] = [] 633 schema: t.List[str] = [] 634 unknown_type = False 635 636 for e in expression.expressions: 637 if isinstance(e, exp.PropertyEQ): 638 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 639 unknown_type = True 640 else: 641 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 642 values.append(self.sql(e, "expression")) 643 else: 644 values.append(self.sql(e)) 645 646 size = len(expression.expressions) 647 648 if not size or len(schema) != size: 649 if unknown_type: 650 self.unsupported( 651 "Cannot convert untyped key-value definitions (try annotate_types)." 652 ) 653 return self.func("ROW", *values) 654 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 655 656 def interval_sql(self, expression: exp.Interval) -> str: 657 if expression.this and expression.text("unit").upper().startswith("WEEK"): 658 return f"({expression.this.name} * INTERVAL '7' DAY)" 659 return super().interval_sql(expression) 660 661 def transaction_sql(self, expression: exp.Transaction) -> str: 662 modes = expression.args.get("modes") 663 modes = f" {', '.join(modes)}" if modes else "" 664 return f"START TRANSACTION{modes}" 665 666 def offset_limit_modifiers( 667 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 668 ) -> t.List[str]: 669 return [ 670 self.sql(expression, "offset"), 671 self.sql(limit), 672 ] 673 674 def create_sql(self, expression: exp.Create) -> str: 675 """ 676 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 677 so we need to remove them 678 """ 679 kind = expression.args["kind"] 680 schema = expression.this 681 if kind == "VIEW" and schema.expressions: 682 expression.this.set("expressions", None) 683 return super().create_sql(expression) 684 685 def delete_sql(self, expression: exp.Delete) -> str: 686 """ 687 Presto only supports DELETE FROM for a single table without an alias, so we need 688 to remove the unnecessary parts. If the original DELETE statement contains more 689 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 690 """ 691 tables = expression.args.get("tables") or [expression.this] 692 if len(tables) > 1: 693 return super().delete_sql(expression) 694 695 table = tables[0] 696 expression.set("this", table) 697 expression.set("tables", None) 698 699 if isinstance(table, exp.Table): 700 table_alias = table.args.get("alias") 701 if table_alias: 702 table_alias.pop() 703 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 704 705 return super().delete_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
574 def md5_sql(self, expression: exp.MD5) -> str: 575 this = expression.this 576 577 if not this.type: 578 from sqlglot.optimizer.annotate_types import annotate_types 579 580 this = annotate_types(this) 581 582 if this.is_type(*exp.DataType.TEXT_TYPES): 583 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 584 585 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this))))
587 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 588 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 589 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 590 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 591 # which seems to be using the same time mapping as Hive, as per: 592 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 593 this = expression.this 594 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 595 value_as_timestamp = ( 596 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 597 ) 598 599 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 600 601 formatted_value = self.func( 602 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 603 ) 604 parse_with_tz = self.func( 605 "PARSE_DATETIME", 606 formatted_value, 607 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 608 ) 609 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 610 return self.func("TO_UNIXTIME", coalesced)
612 def bracket_sql(self, expression: exp.Bracket) -> str: 613 if expression.args.get("safe"): 614 return self.func( 615 "ELEMENT_AT", 616 expression.this, 617 seq_get( 618 apply_index_offset( 619 expression.this, 620 expression.expressions, 621 1 - expression.args.get("offset", 0), 622 ), 623 0, 624 ), 625 ) 626 return super().bracket_sql(expression)
628 def struct_sql(self, expression: exp.Struct) -> str: 629 from sqlglot.optimizer.annotate_types import annotate_types 630 631 expression = annotate_types(expression) 632 values: t.List[str] = [] 633 schema: t.List[str] = [] 634 unknown_type = False 635 636 for e in expression.expressions: 637 if isinstance(e, exp.PropertyEQ): 638 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 639 unknown_type = True 640 else: 641 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 642 values.append(self.sql(e, "expression")) 643 else: 644 values.append(self.sql(e)) 645 646 size = len(expression.expressions) 647 648 if not size or len(schema) != size: 649 if unknown_type: 650 self.unsupported( 651 "Cannot convert untyped key-value definitions (try annotate_types)." 652 ) 653 return self.func("ROW", *values) 654 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))"
674 def create_sql(self, expression: exp.Create) -> str: 675 """ 676 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 677 so we need to remove them 678 """ 679 kind = expression.args["kind"] 680 schema = expression.this 681 if kind == "VIEW" and schema.expressions: 682 expression.this.set("expressions", None) 683 return super().create_sql(expression)
Presto doesn't support CREATE VIEW with expressions (ex: CREATE VIEW x (cola)
then (cola)
is the expression),
so we need to remove them
685 def delete_sql(self, expression: exp.Delete) -> str: 686 """ 687 Presto only supports DELETE FROM for a single table without an alias, so we need 688 to remove the unnecessary parts. If the original DELETE statement contains more 689 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 690 """ 691 tables = expression.args.get("tables") or [expression.this] 692 if len(tables) > 1: 693 return super().delete_sql(expression) 694 695 table = tables[0] 696 expression.set("this", table) 697 expression.set("tables", None) 698 699 if isinstance(table, exp.Table): 700 table_alias = table.args.get("alias") 701 if table_alias: 702 table_alias.pop() 703 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 704 705 return super().delete_sql(expression)
Presto only supports DELETE FROM for a single table without an alias, so we need to remove the unnecessary parts. If the original DELETE statement contains more than one table to be deleted, we can't safely map it 1-1 to a Presto statement.
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXPLICIT_SET_OP
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SUPPORTED_JSON_PATH_PARTS
- CAN_IMPLEMENT_ARRAY_ANY
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- TRY_SUPPORTED
- SUPPORTS_UESCAPE
- STAR_EXCEPT
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- generateseries_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- length_sql
- rand_sql
- strtodate_sql
- strtotime_sql
- changes_sql
- pad_sql