sqlglot.dialects.presto
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 bool_xor_sql, 11 date_trunc_to_time, 12 datestrtodate_sql, 13 encode_decode_sql, 14 build_formatted_time, 15 if_sql, 16 left_to_substring_sql, 17 no_ilike_sql, 18 no_pivot_sql, 19 no_timestamp_sql, 20 regexp_extract_sql, 21 rename_func, 22 right_to_substring_sql, 23 sha256_sql, 24 strposition_sql, 25 struct_extract_sql, 26 timestamptrunc_sql, 27 timestrtotime_sql, 28 ts_or_ds_add_cast, 29 unit_to_str, 30 sequence_sql, 31 build_regexp_extract, 32 explode_to_unnest_sql, 33) 34from sqlglot.dialects.hive import Hive 35from sqlglot.dialects.mysql import MySQL 36from sqlglot.helper import apply_index_offset, seq_get 37from sqlglot.optimizer.scope import find_all_in_scope 38from sqlglot.tokens import TokenType 39from sqlglot.transforms import unqualify_columns 40from sqlglot.generator import unsupported_args 41 42DATE_ADD_OR_SUB = t.Union[exp.DateAdd, exp.TimestampAdd, exp.DateSub] 43 44 45def _initcap_sql(self: Presto.Generator, expression: exp.Initcap) -> str: 46 regex = r"(\w)(\w*)" 47 return f"REGEXP_REPLACE({self.sql(expression, 'this')}, '{regex}', x -> UPPER(x[1]) || LOWER(x[2]))" 48 49 50def _no_sort_array(self: Presto.Generator, expression: exp.SortArray) -> str: 51 if expression.args.get("asc") == exp.false(): 52 comparator = "(a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END" 53 else: 54 comparator = None 55 return self.func("ARRAY_SORT", expression.this, comparator) 56 57 58def _schema_sql(self: Presto.Generator, expression: exp.Schema) -> str: 59 if isinstance(expression.parent, exp.PartitionedByProperty): 60 # Any columns in the ARRAY[] string literals should not be quoted 61 expression.transform(lambda n: n.name if isinstance(n, exp.Identifier) else n, copy=False) 62 63 partition_exprs = [ 64 self.sql(c) if isinstance(c, (exp.Func, exp.Property)) else self.sql(c, "this") 65 for c in expression.expressions 66 ] 67 return self.sql(exp.Array(expressions=[exp.Literal.string(c) for c in partition_exprs])) 68 69 if expression.parent: 70 for schema in expression.parent.find_all(exp.Schema): 71 if schema is expression: 72 continue 73 74 column_defs = schema.find_all(exp.ColumnDef) 75 if column_defs and isinstance(schema.parent, exp.Property): 76 expression.expressions.extend(column_defs) 77 78 return self.schema_sql(expression) 79 80 81def _quantile_sql(self: Presto.Generator, expression: exp.Quantile) -> str: 82 self.unsupported("Presto does not support exact quantiles") 83 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 84 85 86def _str_to_time_sql( 87 self: Presto.Generator, expression: exp.StrToDate | exp.StrToTime | exp.TsOrDsToDate 88) -> str: 89 return self.func("DATE_PARSE", expression.this, self.format_time(expression)) 90 91 92def _ts_or_ds_to_date_sql(self: Presto.Generator, expression: exp.TsOrDsToDate) -> str: 93 time_format = self.format_time(expression) 94 if time_format and time_format not in (Presto.TIME_FORMAT, Presto.DATE_FORMAT): 95 return self.sql(exp.cast(_str_to_time_sql(self, expression), exp.DataType.Type.DATE)) 96 return self.sql( 97 exp.cast(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP), exp.DataType.Type.DATE) 98 ) 99 100 101def _ts_or_ds_add_sql(self: Presto.Generator, expression: exp.TsOrDsAdd) -> str: 102 expression = ts_or_ds_add_cast(expression) 103 unit = unit_to_str(expression) 104 return self.func("DATE_ADD", unit, expression.expression, expression.this) 105 106 107def _ts_or_ds_diff_sql(self: Presto.Generator, expression: exp.TsOrDsDiff) -> str: 108 this = exp.cast(expression.this, exp.DataType.Type.TIMESTAMP) 109 expr = exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP) 110 unit = unit_to_str(expression) 111 return self.func("DATE_DIFF", unit, expr, this) 112 113 114def _build_approx_percentile(args: t.List) -> exp.Expression: 115 if len(args) == 4: 116 return exp.ApproxQuantile( 117 this=seq_get(args, 0), 118 weight=seq_get(args, 1), 119 quantile=seq_get(args, 2), 120 accuracy=seq_get(args, 3), 121 ) 122 if len(args) == 3: 123 return exp.ApproxQuantile( 124 this=seq_get(args, 0), quantile=seq_get(args, 1), accuracy=seq_get(args, 2) 125 ) 126 return exp.ApproxQuantile.from_arg_list(args) 127 128 129def _build_from_unixtime(args: t.List) -> exp.Expression: 130 if len(args) == 3: 131 return exp.UnixToTime( 132 this=seq_get(args, 0), 133 hours=seq_get(args, 1), 134 minutes=seq_get(args, 2), 135 ) 136 if len(args) == 2: 137 return exp.UnixToTime(this=seq_get(args, 0), zone=seq_get(args, 1)) 138 139 return exp.UnixToTime.from_arg_list(args) 140 141 142def _first_last_sql(self: Presto.Generator, expression: exp.Func) -> str: 143 """ 144 Trino doesn't support FIRST / LAST as functions, but they're valid in the context 145 of MATCH_RECOGNIZE, so we need to preserve them in that case. In all other cases 146 they're converted into an ARBITRARY call. 147 148 Reference: https://trino.io/docs/current/sql/match-recognize.html#logical-navigation-functions 149 """ 150 if isinstance(expression.find_ancestor(exp.MatchRecognize, exp.Select), exp.MatchRecognize): 151 return self.function_fallback_sql(expression) 152 153 return rename_func("ARBITRARY")(self, expression) 154 155 156def _unix_to_time_sql(self: Presto.Generator, expression: exp.UnixToTime) -> str: 157 scale = expression.args.get("scale") 158 timestamp = self.sql(expression, "this") 159 if scale in (None, exp.UnixToTime.SECONDS): 160 return rename_func("FROM_UNIXTIME")(self, expression) 161 162 return f"FROM_UNIXTIME(CAST({timestamp} AS DOUBLE) / POW(10, {scale}))" 163 164 165def _to_int(self: Presto.Generator, expression: exp.Expression) -> exp.Expression: 166 if not expression.type: 167 from sqlglot.optimizer.annotate_types import annotate_types 168 169 annotate_types(expression, dialect=self.dialect) 170 if expression.type and expression.type.this not in exp.DataType.INTEGER_TYPES: 171 return exp.cast(expression, to=exp.DataType.Type.BIGINT) 172 return expression 173 174 175def _build_to_char(args: t.List) -> exp.TimeToStr: 176 fmt = seq_get(args, 1) 177 if isinstance(fmt, exp.Literal): 178 # We uppercase this to match Teradata's format mapping keys 179 fmt.set("this", fmt.this.upper()) 180 181 # We use "teradata" on purpose here, because the time formats are different in Presto. 182 # See https://prestodb.io/docs/current/functions/teradata.html?highlight=to_char#to_char 183 return build_formatted_time(exp.TimeToStr, "teradata")(args) 184 185 186def _date_delta_sql( 187 name: str, negate_interval: bool = False 188) -> t.Callable[[Presto.Generator, DATE_ADD_OR_SUB], str]: 189 def _delta_sql(self: Presto.Generator, expression: DATE_ADD_OR_SUB) -> str: 190 interval = _to_int(self, expression.expression) 191 return self.func( 192 name, 193 unit_to_str(expression), 194 interval * (-1) if negate_interval else interval, 195 expression.this, 196 ) 197 198 return _delta_sql 199 200 201def _explode_to_unnest_sql(self: Presto.Generator, expression: exp.Lateral) -> str: 202 explode = expression.this 203 if isinstance(explode, exp.Explode): 204 exploded_type = explode.this.type 205 alias = expression.args.get("alias") 206 207 # This attempts a best-effort transpilation of LATERAL VIEW EXPLODE on a struct array 208 if ( 209 isinstance(alias, exp.TableAlias) 210 and isinstance(exploded_type, exp.DataType) 211 and exploded_type.is_type(exp.DataType.Type.ARRAY) 212 and exploded_type.expressions 213 and exploded_type.expressions[0].is_type(exp.DataType.Type.STRUCT) 214 ): 215 # When unnesting a ROW in Presto, it produces N columns, so we need to fix the alias 216 alias.set("columns", [c.this.copy() for c in exploded_type.expressions[0].expressions]) 217 elif isinstance(explode, exp.Inline): 218 explode.replace(exp.Explode(this=explode.this.copy())) 219 220 return explode_to_unnest_sql(self, expression) 221 222 223def _amend_exploded_column_table(expression: exp.Expression) -> exp.Expression: 224 # We check for expression.type because the columns can be amended only if types were inferred 225 if isinstance(expression, exp.Select) and expression.type: 226 for lateral in expression.args.get("laterals") or []: 227 alias = lateral.args.get("alias") 228 if ( 229 not isinstance(lateral.this, exp.Explode) 230 or not isinstance(alias, exp.TableAlias) 231 or len(alias.columns) != 1 232 ): 233 continue 234 235 new_table = alias.this 236 old_table = alias.columns[0].name.lower() 237 238 # When transpiling a LATERAL VIEW EXPLODE Spark query, the exploded fields may be qualified 239 # with the struct column, resulting in invalid Presto references that need to be amended 240 for column in find_all_in_scope(expression, exp.Column): 241 if column.db.lower() == old_table: 242 column.set("table", column.args["db"].pop()) 243 elif column.table.lower() == old_table: 244 column.set("table", new_table.copy()) 245 elif column.name.lower() == old_table and isinstance(column.parent, exp.Dot): 246 column.parent.replace(exp.column(column.parent.expression, table=new_table)) 247 248 return expression 249 250 251class Presto(Dialect): 252 INDEX_OFFSET = 1 253 NULL_ORDERING = "nulls_are_last" 254 TIME_FORMAT = MySQL.TIME_FORMAT 255 STRICT_STRING_CONCAT = True 256 SUPPORTS_SEMI_ANTI_JOIN = False 257 TYPED_DIVISION = True 258 TABLESAMPLE_SIZE_IS_PERCENT = True 259 LOG_BASE_FIRST: t.Optional[bool] = None 260 SUPPORTS_VALUES_DEFAULT = False 261 262 TIME_MAPPING = MySQL.TIME_MAPPING 263 264 # https://github.com/trinodb/trino/issues/17 265 # https://github.com/trinodb/trino/issues/12289 266 # https://github.com/prestodb/presto/issues/2863 267 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 268 269 # The result of certain math functions in Presto/Trino is of type 270 # equal to the input type e.g: FLOOR(5.5/2) -> DECIMAL, FLOOR(5/2) -> BIGINT 271 ANNOTATORS = { 272 **Dialect.ANNOTATORS, 273 exp.Floor: lambda self, e: self._annotate_by_args(e, "this"), 274 exp.Ceil: lambda self, e: self._annotate_by_args(e, "this"), 275 exp.Mod: lambda self, e: self._annotate_by_args(e, "this", "expression"), 276 exp.Round: lambda self, e: self._annotate_by_args(e, "this"), 277 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 278 exp.Abs: lambda self, e: self._annotate_by_args(e, "this"), 279 exp.Rand: lambda self, e: self._annotate_by_args(e, "this") 280 if e.this 281 else self._set_type(e, exp.DataType.Type.DOUBLE), 282 } 283 284 class Tokenizer(tokens.Tokenizer): 285 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 286 UNICODE_STRINGS = [ 287 (prefix + q, q) 288 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 289 for prefix in ("U&", "u&") 290 ] 291 292 KEYWORDS = { 293 **tokens.Tokenizer.KEYWORDS, 294 "DEALLOCATE PREPARE": TokenType.COMMAND, 295 "DESCRIBE INPUT": TokenType.COMMAND, 296 "DESCRIBE OUTPUT": TokenType.COMMAND, 297 "RESET SESSION": TokenType.COMMAND, 298 "START": TokenType.BEGIN, 299 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 300 "ROW": TokenType.STRUCT, 301 "IPADDRESS": TokenType.IPADDRESS, 302 "IPPREFIX": TokenType.IPPREFIX, 303 "TDIGEST": TokenType.TDIGEST, 304 "HYPERLOGLOG": TokenType.HLLSKETCH, 305 } 306 KEYWORDS.pop("/*+") 307 KEYWORDS.pop("QUALIFY") 308 309 class Parser(parser.Parser): 310 VALUES_FOLLOWED_BY_PAREN = False 311 312 FUNCTIONS = { 313 **parser.Parser.FUNCTIONS, 314 "ARBITRARY": exp.AnyValue.from_arg_list, 315 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 316 "APPROX_PERCENTILE": _build_approx_percentile, 317 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 318 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 319 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 320 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 321 "CARDINALITY": exp.ArraySize.from_arg_list, 322 "CONTAINS": exp.ArrayContains.from_arg_list, 323 "DATE_ADD": lambda args: exp.DateAdd( 324 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 325 ), 326 "DATE_DIFF": lambda args: exp.DateDiff( 327 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 328 ), 329 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 330 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 331 "DATE_TRUNC": date_trunc_to_time, 332 "DAY_OF_WEEK": exp.DayOfWeekIso.from_arg_list, 333 "ELEMENT_AT": lambda args: exp.Bracket( 334 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 335 ), 336 "FROM_HEX": exp.Unhex.from_arg_list, 337 "FROM_UNIXTIME": _build_from_unixtime, 338 "FROM_UTF8": lambda args: exp.Decode( 339 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 340 ), 341 "JSON_FORMAT": lambda args: exp.JSONFormat( 342 this=seq_get(args, 0), options=seq_get(args, 1), is_json=True 343 ), 344 "LEVENSHTEIN_DISTANCE": exp.Levenshtein.from_arg_list, 345 "NOW": exp.CurrentTimestamp.from_arg_list, 346 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 347 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 348 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 349 this=seq_get(args, 0), 350 expression=seq_get(args, 1), 351 replacement=seq_get(args, 2) or exp.Literal.string(""), 352 ), 353 "ROW": exp.Struct.from_arg_list, 354 "SEQUENCE": exp.GenerateSeries.from_arg_list, 355 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 356 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 357 "STRPOS": lambda args: exp.StrPosition( 358 this=seq_get(args, 0), substr=seq_get(args, 1), occurrence=seq_get(args, 2) 359 ), 360 "TO_CHAR": _build_to_char, 361 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 362 "TO_UTF8": lambda args: exp.Encode( 363 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 364 ), 365 "MD5": exp.MD5Digest.from_arg_list, 366 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 367 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 368 } 369 370 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 371 FUNCTION_PARSERS.pop("TRIM") 372 373 class Generator(generator.Generator): 374 INTERVAL_ALLOWS_PLURAL_FORM = False 375 JOIN_HINTS = False 376 TABLE_HINTS = False 377 QUERY_HINTS = False 378 IS_BOOL_ALLOWED = False 379 TZ_TO_WITH_TIME_ZONE = True 380 NVL2_SUPPORTED = False 381 STRUCT_DELIMITER = ("(", ")") 382 LIMIT_ONLY_LITERALS = True 383 SUPPORTS_SINGLE_ARG_CONCAT = False 384 LIKE_PROPERTY_INSIDE_SCHEMA = True 385 MULTI_ARG_DISTINCT = False 386 SUPPORTS_TO_NUMBER = False 387 HEX_FUNC = "TO_HEX" 388 PARSE_JSON_NAME = "JSON_PARSE" 389 PAD_FILL_PATTERN_IS_REQUIRED = True 390 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 391 SUPPORTS_MEDIAN = False 392 ARRAY_SIZE_NAME = "CARDINALITY" 393 394 PROPERTIES_LOCATION = { 395 **generator.Generator.PROPERTIES_LOCATION, 396 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 397 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 398 } 399 400 TYPE_MAPPING = { 401 **generator.Generator.TYPE_MAPPING, 402 exp.DataType.Type.BINARY: "VARBINARY", 403 exp.DataType.Type.BIT: "BOOLEAN", 404 exp.DataType.Type.DATETIME: "TIMESTAMP", 405 exp.DataType.Type.DATETIME64: "TIMESTAMP", 406 exp.DataType.Type.FLOAT: "REAL", 407 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 408 exp.DataType.Type.INT: "INTEGER", 409 exp.DataType.Type.STRUCT: "ROW", 410 exp.DataType.Type.TEXT: "VARCHAR", 411 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 412 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 413 exp.DataType.Type.TIMETZ: "TIME", 414 } 415 416 TRANSFORMS = { 417 **generator.Generator.TRANSFORMS, 418 exp.AnyValue: rename_func("ARBITRARY"), 419 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 420 exp.ArgMax: rename_func("MAX_BY"), 421 exp.ArgMin: rename_func("MIN_BY"), 422 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 423 exp.ArrayAny: rename_func("ANY_MATCH"), 424 exp.ArrayConcat: rename_func("CONCAT"), 425 exp.ArrayContains: rename_func("CONTAINS"), 426 exp.ArrayToString: rename_func("ARRAY_JOIN"), 427 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 428 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 429 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 430 exp.BitwiseLeftShift: lambda self, e: self.func( 431 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 432 ), 433 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 434 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 435 exp.BitwiseRightShift: lambda self, e: self.func( 436 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 437 ), 438 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 439 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 440 exp.CurrentTime: lambda *_: "CURRENT_TIME", 441 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 442 exp.CurrentUser: lambda *_: "CURRENT_USER", 443 exp.DateAdd: _date_delta_sql("DATE_ADD"), 444 exp.DateDiff: lambda self, e: self.func( 445 "DATE_DIFF", unit_to_str(e), e.expression, e.this 446 ), 447 exp.DateStrToDate: datestrtodate_sql, 448 exp.DateToDi: lambda self, 449 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 450 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 451 exp.DayOfWeek: lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)", 452 exp.DayOfWeekIso: rename_func("DAY_OF_WEEK"), 453 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 454 exp.DiToDate: lambda self, 455 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 456 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 457 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 458 exp.First: _first_last_sql, 459 exp.FromTimeZone: lambda self, 460 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 461 exp.GenerateSeries: sequence_sql, 462 exp.GenerateDateArray: sequence_sql, 463 exp.Group: transforms.preprocess([transforms.unalias_group]), 464 exp.If: if_sql(), 465 exp.ILike: no_ilike_sql, 466 exp.Initcap: _initcap_sql, 467 exp.Last: _first_last_sql, 468 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 469 exp.Lateral: _explode_to_unnest_sql, 470 exp.Left: left_to_substring_sql, 471 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 472 rename_func("LEVENSHTEIN_DISTANCE") 473 ), 474 exp.LogicalAnd: rename_func("BOOL_AND"), 475 exp.LogicalOr: rename_func("BOOL_OR"), 476 exp.Pivot: no_pivot_sql, 477 exp.Quantile: _quantile_sql, 478 exp.RegexpExtract: regexp_extract_sql, 479 exp.RegexpExtractAll: regexp_extract_sql, 480 exp.Right: right_to_substring_sql, 481 exp.Schema: _schema_sql, 482 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 483 exp.Select: transforms.preprocess( 484 [ 485 transforms.eliminate_qualify, 486 transforms.eliminate_distinct_on, 487 transforms.explode_projection_to_unnest(1), 488 transforms.eliminate_semi_and_anti_joins, 489 _amend_exploded_column_table, 490 ] 491 ), 492 exp.SortArray: _no_sort_array, 493 exp.StrPosition: lambda self, e: strposition_sql(self, e, supports_occurrence=True), 494 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 495 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 496 exp.StrToTime: _str_to_time_sql, 497 exp.StructExtract: struct_extract_sql, 498 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 499 exp.Timestamp: no_timestamp_sql, 500 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 501 exp.TimestampTrunc: timestamptrunc_sql(), 502 exp.TimeStrToDate: timestrtotime_sql, 503 exp.TimeStrToTime: timestrtotime_sql, 504 exp.TimeStrToUnix: lambda self, e: self.func( 505 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 506 ), 507 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 508 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 509 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 510 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 511 exp.TsOrDiToDi: lambda self, 512 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 513 exp.TsOrDsAdd: _ts_or_ds_add_sql, 514 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 515 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 516 exp.Unhex: rename_func("FROM_HEX"), 517 exp.UnixToStr: lambda self, 518 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 519 exp.UnixToTime: _unix_to_time_sql, 520 exp.UnixToTimeStr: lambda self, 521 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 522 exp.VariancePop: rename_func("VAR_POP"), 523 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 524 exp.WithinGroup: transforms.preprocess( 525 [transforms.remove_within_group_for_percentiles] 526 ), 527 exp.Xor: bool_xor_sql, 528 exp.MD5Digest: rename_func("MD5"), 529 exp.SHA: rename_func("SHA1"), 530 exp.SHA2: sha256_sql, 531 } 532 533 RESERVED_KEYWORDS = { 534 "alter", 535 "and", 536 "as", 537 "between", 538 "by", 539 "case", 540 "cast", 541 "constraint", 542 "create", 543 "cross", 544 "current_time", 545 "current_timestamp", 546 "deallocate", 547 "delete", 548 "describe", 549 "distinct", 550 "drop", 551 "else", 552 "end", 553 "escape", 554 "except", 555 "execute", 556 "exists", 557 "extract", 558 "false", 559 "for", 560 "from", 561 "full", 562 "group", 563 "having", 564 "in", 565 "inner", 566 "insert", 567 "intersect", 568 "into", 569 "is", 570 "join", 571 "left", 572 "like", 573 "natural", 574 "not", 575 "null", 576 "on", 577 "or", 578 "order", 579 "outer", 580 "prepare", 581 "right", 582 "select", 583 "table", 584 "then", 585 "true", 586 "union", 587 "using", 588 "values", 589 "when", 590 "where", 591 "with", 592 } 593 594 def jsonformat_sql(self, expression: exp.JSONFormat) -> str: 595 this = expression.this 596 is_json = expression.args.get("is_json") 597 598 if this and not (is_json or this.type): 599 from sqlglot.optimizer.annotate_types import annotate_types 600 601 this = annotate_types(this, dialect=self.dialect) 602 603 if not (is_json or this.is_type(exp.DataType.Type.JSON)): 604 this.replace(exp.cast(this, exp.DataType.Type.JSON)) 605 606 return self.function_fallback_sql(expression) 607 608 def md5_sql(self, expression: exp.MD5) -> str: 609 this = expression.this 610 611 if not this.type: 612 from sqlglot.optimizer.annotate_types import annotate_types 613 614 this = annotate_types(this, dialect=self.dialect) 615 616 if this.is_type(*exp.DataType.TEXT_TYPES): 617 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 618 619 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 620 621 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 622 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 623 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 624 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 625 # which seems to be using the same time mapping as Hive, as per: 626 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 627 this = expression.this 628 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 629 value_as_timestamp = ( 630 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 631 ) 632 633 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 634 635 formatted_value = self.func( 636 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 637 ) 638 parse_with_tz = self.func( 639 "PARSE_DATETIME", 640 formatted_value, 641 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 642 ) 643 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 644 return self.func("TO_UNIXTIME", coalesced) 645 646 def bracket_sql(self, expression: exp.Bracket) -> str: 647 if expression.args.get("safe"): 648 return self.func( 649 "ELEMENT_AT", 650 expression.this, 651 seq_get( 652 apply_index_offset( 653 expression.this, 654 expression.expressions, 655 1 - expression.args.get("offset", 0), 656 dialect=self.dialect, 657 ), 658 0, 659 ), 660 ) 661 return super().bracket_sql(expression) 662 663 def struct_sql(self, expression: exp.Struct) -> str: 664 from sqlglot.optimizer.annotate_types import annotate_types 665 666 expression = annotate_types(expression, dialect=self.dialect) 667 values: t.List[str] = [] 668 schema: t.List[str] = [] 669 unknown_type = False 670 671 for e in expression.expressions: 672 if isinstance(e, exp.PropertyEQ): 673 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 674 unknown_type = True 675 else: 676 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 677 values.append(self.sql(e, "expression")) 678 else: 679 values.append(self.sql(e)) 680 681 size = len(expression.expressions) 682 683 if not size or len(schema) != size: 684 if unknown_type: 685 self.unsupported( 686 "Cannot convert untyped key-value definitions (try annotate_types)." 687 ) 688 return self.func("ROW", *values) 689 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 690 691 def interval_sql(self, expression: exp.Interval) -> str: 692 if expression.this and expression.text("unit").upper().startswith("WEEK"): 693 return f"({expression.this.name} * INTERVAL '7' DAY)" 694 return super().interval_sql(expression) 695 696 def transaction_sql(self, expression: exp.Transaction) -> str: 697 modes = expression.args.get("modes") 698 modes = f" {', '.join(modes)}" if modes else "" 699 return f"START TRANSACTION{modes}" 700 701 def offset_limit_modifiers( 702 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 703 ) -> t.List[str]: 704 return [ 705 self.sql(expression, "offset"), 706 self.sql(limit), 707 ] 708 709 def create_sql(self, expression: exp.Create) -> str: 710 """ 711 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 712 so we need to remove them 713 """ 714 kind = expression.args["kind"] 715 schema = expression.this 716 if kind == "VIEW" and schema.expressions: 717 expression.this.set("expressions", None) 718 return super().create_sql(expression) 719 720 def delete_sql(self, expression: exp.Delete) -> str: 721 """ 722 Presto only supports DELETE FROM for a single table without an alias, so we need 723 to remove the unnecessary parts. If the original DELETE statement contains more 724 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 725 """ 726 tables = expression.args.get("tables") or [expression.this] 727 if len(tables) > 1: 728 return super().delete_sql(expression) 729 730 table = tables[0] 731 expression.set("this", table) 732 expression.set("tables", None) 733 734 if isinstance(table, exp.Table): 735 table_alias = table.args.get("alias") 736 if table_alias: 737 table_alias.pop() 738 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 739 740 return super().delete_sql(expression) 741 742 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 743 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 744 745 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 746 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 747 if not expression.args.get("variant_extract") or is_json_extract: 748 return self.func( 749 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 750 ) 751 752 this = self.sql(expression, "this") 753 754 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 755 segments = [] 756 for path_key in expression.expression.expressions[1:]: 757 if not isinstance(path_key, exp.JSONPathKey): 758 # Cannot transpile subscripts, wildcards etc to dot notation 759 self.unsupported( 760 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 761 ) 762 continue 763 key = path_key.this 764 if not exp.SAFE_IDENTIFIER_RE.match(key): 765 key = f'"{key}"' 766 segments.append(f".{key}") 767 768 expr = "".join(segments) 769 770 return f"{this}{expr}" 771 772 def groupconcat_sql(self, expression: exp.GroupConcat) -> str: 773 return self.func( 774 "ARRAY_JOIN", 775 self.func("ARRAY_AGG", expression.this), 776 expression.args.get("separator"), 777 )
252class Presto(Dialect): 253 INDEX_OFFSET = 1 254 NULL_ORDERING = "nulls_are_last" 255 TIME_FORMAT = MySQL.TIME_FORMAT 256 STRICT_STRING_CONCAT = True 257 SUPPORTS_SEMI_ANTI_JOIN = False 258 TYPED_DIVISION = True 259 TABLESAMPLE_SIZE_IS_PERCENT = True 260 LOG_BASE_FIRST: t.Optional[bool] = None 261 SUPPORTS_VALUES_DEFAULT = False 262 263 TIME_MAPPING = MySQL.TIME_MAPPING 264 265 # https://github.com/trinodb/trino/issues/17 266 # https://github.com/trinodb/trino/issues/12289 267 # https://github.com/prestodb/presto/issues/2863 268 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 269 270 # The result of certain math functions in Presto/Trino is of type 271 # equal to the input type e.g: FLOOR(5.5/2) -> DECIMAL, FLOOR(5/2) -> BIGINT 272 ANNOTATORS = { 273 **Dialect.ANNOTATORS, 274 exp.Floor: lambda self, e: self._annotate_by_args(e, "this"), 275 exp.Ceil: lambda self, e: self._annotate_by_args(e, "this"), 276 exp.Mod: lambda self, e: self._annotate_by_args(e, "this", "expression"), 277 exp.Round: lambda self, e: self._annotate_by_args(e, "this"), 278 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 279 exp.Abs: lambda self, e: self._annotate_by_args(e, "this"), 280 exp.Rand: lambda self, e: self._annotate_by_args(e, "this") 281 if e.this 282 else self._set_type(e, exp.DataType.Type.DOUBLE), 283 } 284 285 class Tokenizer(tokens.Tokenizer): 286 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 287 UNICODE_STRINGS = [ 288 (prefix + q, q) 289 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 290 for prefix in ("U&", "u&") 291 ] 292 293 KEYWORDS = { 294 **tokens.Tokenizer.KEYWORDS, 295 "DEALLOCATE PREPARE": TokenType.COMMAND, 296 "DESCRIBE INPUT": TokenType.COMMAND, 297 "DESCRIBE OUTPUT": TokenType.COMMAND, 298 "RESET SESSION": TokenType.COMMAND, 299 "START": TokenType.BEGIN, 300 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 301 "ROW": TokenType.STRUCT, 302 "IPADDRESS": TokenType.IPADDRESS, 303 "IPPREFIX": TokenType.IPPREFIX, 304 "TDIGEST": TokenType.TDIGEST, 305 "HYPERLOGLOG": TokenType.HLLSKETCH, 306 } 307 KEYWORDS.pop("/*+") 308 KEYWORDS.pop("QUALIFY") 309 310 class Parser(parser.Parser): 311 VALUES_FOLLOWED_BY_PAREN = False 312 313 FUNCTIONS = { 314 **parser.Parser.FUNCTIONS, 315 "ARBITRARY": exp.AnyValue.from_arg_list, 316 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 317 "APPROX_PERCENTILE": _build_approx_percentile, 318 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 319 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 320 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 321 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 322 "CARDINALITY": exp.ArraySize.from_arg_list, 323 "CONTAINS": exp.ArrayContains.from_arg_list, 324 "DATE_ADD": lambda args: exp.DateAdd( 325 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 326 ), 327 "DATE_DIFF": lambda args: exp.DateDiff( 328 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 329 ), 330 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 331 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 332 "DATE_TRUNC": date_trunc_to_time, 333 "DAY_OF_WEEK": exp.DayOfWeekIso.from_arg_list, 334 "ELEMENT_AT": lambda args: exp.Bracket( 335 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 336 ), 337 "FROM_HEX": exp.Unhex.from_arg_list, 338 "FROM_UNIXTIME": _build_from_unixtime, 339 "FROM_UTF8": lambda args: exp.Decode( 340 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 341 ), 342 "JSON_FORMAT": lambda args: exp.JSONFormat( 343 this=seq_get(args, 0), options=seq_get(args, 1), is_json=True 344 ), 345 "LEVENSHTEIN_DISTANCE": exp.Levenshtein.from_arg_list, 346 "NOW": exp.CurrentTimestamp.from_arg_list, 347 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 348 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 349 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 350 this=seq_get(args, 0), 351 expression=seq_get(args, 1), 352 replacement=seq_get(args, 2) or exp.Literal.string(""), 353 ), 354 "ROW": exp.Struct.from_arg_list, 355 "SEQUENCE": exp.GenerateSeries.from_arg_list, 356 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 357 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 358 "STRPOS": lambda args: exp.StrPosition( 359 this=seq_get(args, 0), substr=seq_get(args, 1), occurrence=seq_get(args, 2) 360 ), 361 "TO_CHAR": _build_to_char, 362 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 363 "TO_UTF8": lambda args: exp.Encode( 364 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 365 ), 366 "MD5": exp.MD5Digest.from_arg_list, 367 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 368 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 369 } 370 371 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 372 FUNCTION_PARSERS.pop("TRIM") 373 374 class Generator(generator.Generator): 375 INTERVAL_ALLOWS_PLURAL_FORM = False 376 JOIN_HINTS = False 377 TABLE_HINTS = False 378 QUERY_HINTS = False 379 IS_BOOL_ALLOWED = False 380 TZ_TO_WITH_TIME_ZONE = True 381 NVL2_SUPPORTED = False 382 STRUCT_DELIMITER = ("(", ")") 383 LIMIT_ONLY_LITERALS = True 384 SUPPORTS_SINGLE_ARG_CONCAT = False 385 LIKE_PROPERTY_INSIDE_SCHEMA = True 386 MULTI_ARG_DISTINCT = False 387 SUPPORTS_TO_NUMBER = False 388 HEX_FUNC = "TO_HEX" 389 PARSE_JSON_NAME = "JSON_PARSE" 390 PAD_FILL_PATTERN_IS_REQUIRED = True 391 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 392 SUPPORTS_MEDIAN = False 393 ARRAY_SIZE_NAME = "CARDINALITY" 394 395 PROPERTIES_LOCATION = { 396 **generator.Generator.PROPERTIES_LOCATION, 397 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 398 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 399 } 400 401 TYPE_MAPPING = { 402 **generator.Generator.TYPE_MAPPING, 403 exp.DataType.Type.BINARY: "VARBINARY", 404 exp.DataType.Type.BIT: "BOOLEAN", 405 exp.DataType.Type.DATETIME: "TIMESTAMP", 406 exp.DataType.Type.DATETIME64: "TIMESTAMP", 407 exp.DataType.Type.FLOAT: "REAL", 408 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 409 exp.DataType.Type.INT: "INTEGER", 410 exp.DataType.Type.STRUCT: "ROW", 411 exp.DataType.Type.TEXT: "VARCHAR", 412 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 413 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 414 exp.DataType.Type.TIMETZ: "TIME", 415 } 416 417 TRANSFORMS = { 418 **generator.Generator.TRANSFORMS, 419 exp.AnyValue: rename_func("ARBITRARY"), 420 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 421 exp.ArgMax: rename_func("MAX_BY"), 422 exp.ArgMin: rename_func("MIN_BY"), 423 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 424 exp.ArrayAny: rename_func("ANY_MATCH"), 425 exp.ArrayConcat: rename_func("CONCAT"), 426 exp.ArrayContains: rename_func("CONTAINS"), 427 exp.ArrayToString: rename_func("ARRAY_JOIN"), 428 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 429 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 430 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 431 exp.BitwiseLeftShift: lambda self, e: self.func( 432 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 433 ), 434 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 435 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 436 exp.BitwiseRightShift: lambda self, e: self.func( 437 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 438 ), 439 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 440 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 441 exp.CurrentTime: lambda *_: "CURRENT_TIME", 442 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 443 exp.CurrentUser: lambda *_: "CURRENT_USER", 444 exp.DateAdd: _date_delta_sql("DATE_ADD"), 445 exp.DateDiff: lambda self, e: self.func( 446 "DATE_DIFF", unit_to_str(e), e.expression, e.this 447 ), 448 exp.DateStrToDate: datestrtodate_sql, 449 exp.DateToDi: lambda self, 450 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 451 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 452 exp.DayOfWeek: lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)", 453 exp.DayOfWeekIso: rename_func("DAY_OF_WEEK"), 454 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 455 exp.DiToDate: lambda self, 456 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 457 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 458 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 459 exp.First: _first_last_sql, 460 exp.FromTimeZone: lambda self, 461 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 462 exp.GenerateSeries: sequence_sql, 463 exp.GenerateDateArray: sequence_sql, 464 exp.Group: transforms.preprocess([transforms.unalias_group]), 465 exp.If: if_sql(), 466 exp.ILike: no_ilike_sql, 467 exp.Initcap: _initcap_sql, 468 exp.Last: _first_last_sql, 469 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 470 exp.Lateral: _explode_to_unnest_sql, 471 exp.Left: left_to_substring_sql, 472 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 473 rename_func("LEVENSHTEIN_DISTANCE") 474 ), 475 exp.LogicalAnd: rename_func("BOOL_AND"), 476 exp.LogicalOr: rename_func("BOOL_OR"), 477 exp.Pivot: no_pivot_sql, 478 exp.Quantile: _quantile_sql, 479 exp.RegexpExtract: regexp_extract_sql, 480 exp.RegexpExtractAll: regexp_extract_sql, 481 exp.Right: right_to_substring_sql, 482 exp.Schema: _schema_sql, 483 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 484 exp.Select: transforms.preprocess( 485 [ 486 transforms.eliminate_qualify, 487 transforms.eliminate_distinct_on, 488 transforms.explode_projection_to_unnest(1), 489 transforms.eliminate_semi_and_anti_joins, 490 _amend_exploded_column_table, 491 ] 492 ), 493 exp.SortArray: _no_sort_array, 494 exp.StrPosition: lambda self, e: strposition_sql(self, e, supports_occurrence=True), 495 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 496 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 497 exp.StrToTime: _str_to_time_sql, 498 exp.StructExtract: struct_extract_sql, 499 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 500 exp.Timestamp: no_timestamp_sql, 501 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 502 exp.TimestampTrunc: timestamptrunc_sql(), 503 exp.TimeStrToDate: timestrtotime_sql, 504 exp.TimeStrToTime: timestrtotime_sql, 505 exp.TimeStrToUnix: lambda self, e: self.func( 506 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 507 ), 508 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 509 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 510 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 511 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 512 exp.TsOrDiToDi: lambda self, 513 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 514 exp.TsOrDsAdd: _ts_or_ds_add_sql, 515 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 516 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 517 exp.Unhex: rename_func("FROM_HEX"), 518 exp.UnixToStr: lambda self, 519 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 520 exp.UnixToTime: _unix_to_time_sql, 521 exp.UnixToTimeStr: lambda self, 522 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 523 exp.VariancePop: rename_func("VAR_POP"), 524 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 525 exp.WithinGroup: transforms.preprocess( 526 [transforms.remove_within_group_for_percentiles] 527 ), 528 exp.Xor: bool_xor_sql, 529 exp.MD5Digest: rename_func("MD5"), 530 exp.SHA: rename_func("SHA1"), 531 exp.SHA2: sha256_sql, 532 } 533 534 RESERVED_KEYWORDS = { 535 "alter", 536 "and", 537 "as", 538 "between", 539 "by", 540 "case", 541 "cast", 542 "constraint", 543 "create", 544 "cross", 545 "current_time", 546 "current_timestamp", 547 "deallocate", 548 "delete", 549 "describe", 550 "distinct", 551 "drop", 552 "else", 553 "end", 554 "escape", 555 "except", 556 "execute", 557 "exists", 558 "extract", 559 "false", 560 "for", 561 "from", 562 "full", 563 "group", 564 "having", 565 "in", 566 "inner", 567 "insert", 568 "intersect", 569 "into", 570 "is", 571 "join", 572 "left", 573 "like", 574 "natural", 575 "not", 576 "null", 577 "on", 578 "or", 579 "order", 580 "outer", 581 "prepare", 582 "right", 583 "select", 584 "table", 585 "then", 586 "true", 587 "union", 588 "using", 589 "values", 590 "when", 591 "where", 592 "with", 593 } 594 595 def jsonformat_sql(self, expression: exp.JSONFormat) -> str: 596 this = expression.this 597 is_json = expression.args.get("is_json") 598 599 if this and not (is_json or this.type): 600 from sqlglot.optimizer.annotate_types import annotate_types 601 602 this = annotate_types(this, dialect=self.dialect) 603 604 if not (is_json or this.is_type(exp.DataType.Type.JSON)): 605 this.replace(exp.cast(this, exp.DataType.Type.JSON)) 606 607 return self.function_fallback_sql(expression) 608 609 def md5_sql(self, expression: exp.MD5) -> str: 610 this = expression.this 611 612 if not this.type: 613 from sqlglot.optimizer.annotate_types import annotate_types 614 615 this = annotate_types(this, dialect=self.dialect) 616 617 if this.is_type(*exp.DataType.TEXT_TYPES): 618 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 619 620 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 621 622 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 623 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 624 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 625 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 626 # which seems to be using the same time mapping as Hive, as per: 627 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 628 this = expression.this 629 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 630 value_as_timestamp = ( 631 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 632 ) 633 634 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 635 636 formatted_value = self.func( 637 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 638 ) 639 parse_with_tz = self.func( 640 "PARSE_DATETIME", 641 formatted_value, 642 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 643 ) 644 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 645 return self.func("TO_UNIXTIME", coalesced) 646 647 def bracket_sql(self, expression: exp.Bracket) -> str: 648 if expression.args.get("safe"): 649 return self.func( 650 "ELEMENT_AT", 651 expression.this, 652 seq_get( 653 apply_index_offset( 654 expression.this, 655 expression.expressions, 656 1 - expression.args.get("offset", 0), 657 dialect=self.dialect, 658 ), 659 0, 660 ), 661 ) 662 return super().bracket_sql(expression) 663 664 def struct_sql(self, expression: exp.Struct) -> str: 665 from sqlglot.optimizer.annotate_types import annotate_types 666 667 expression = annotate_types(expression, dialect=self.dialect) 668 values: t.List[str] = [] 669 schema: t.List[str] = [] 670 unknown_type = False 671 672 for e in expression.expressions: 673 if isinstance(e, exp.PropertyEQ): 674 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 675 unknown_type = True 676 else: 677 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 678 values.append(self.sql(e, "expression")) 679 else: 680 values.append(self.sql(e)) 681 682 size = len(expression.expressions) 683 684 if not size or len(schema) != size: 685 if unknown_type: 686 self.unsupported( 687 "Cannot convert untyped key-value definitions (try annotate_types)." 688 ) 689 return self.func("ROW", *values) 690 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 691 692 def interval_sql(self, expression: exp.Interval) -> str: 693 if expression.this and expression.text("unit").upper().startswith("WEEK"): 694 return f"({expression.this.name} * INTERVAL '7' DAY)" 695 return super().interval_sql(expression) 696 697 def transaction_sql(self, expression: exp.Transaction) -> str: 698 modes = expression.args.get("modes") 699 modes = f" {', '.join(modes)}" if modes else "" 700 return f"START TRANSACTION{modes}" 701 702 def offset_limit_modifiers( 703 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 704 ) -> t.List[str]: 705 return [ 706 self.sql(expression, "offset"), 707 self.sql(limit), 708 ] 709 710 def create_sql(self, expression: exp.Create) -> str: 711 """ 712 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 713 so we need to remove them 714 """ 715 kind = expression.args["kind"] 716 schema = expression.this 717 if kind == "VIEW" and schema.expressions: 718 expression.this.set("expressions", None) 719 return super().create_sql(expression) 720 721 def delete_sql(self, expression: exp.Delete) -> str: 722 """ 723 Presto only supports DELETE FROM for a single table without an alias, so we need 724 to remove the unnecessary parts. If the original DELETE statement contains more 725 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 726 """ 727 tables = expression.args.get("tables") or [expression.this] 728 if len(tables) > 1: 729 return super().delete_sql(expression) 730 731 table = tables[0] 732 expression.set("this", table) 733 expression.set("tables", None) 734 735 if isinstance(table, exp.Table): 736 table_alias = table.args.get("alias") 737 if table_alias: 738 table_alias.pop() 739 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 740 741 return super().delete_sql(expression) 742 743 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 744 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 745 746 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 747 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 748 if not expression.args.get("variant_extract") or is_json_extract: 749 return self.func( 750 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 751 ) 752 753 this = self.sql(expression, "this") 754 755 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 756 segments = [] 757 for path_key in expression.expression.expressions[1:]: 758 if not isinstance(path_key, exp.JSONPathKey): 759 # Cannot transpile subscripts, wildcards etc to dot notation 760 self.unsupported( 761 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 762 ) 763 continue 764 key = path_key.this 765 if not exp.SAFE_IDENTIFIER_RE.match(key): 766 key = f'"{key}"' 767 segments.append(f".{key}") 768 769 expr = "".join(segments) 770 771 return f"{this}{expr}" 772 773 def groupconcat_sql(self, expression: exp.GroupConcat) -> str: 774 return self.func( 775 "ARRAY_JOIN", 776 self.func("ARRAY_AGG", expression.this), 777 expression.args.get("separator"), 778 )
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the behavior of a / b
depends on the types of a
and b
.
False means a / b
is always float division.
True means a / b
is integer division if both a
and b
are integers.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Associates this dialect's time formats with their equivalent Python strftime
formats.
Specifies the strategy according to which identifiers should be normalized.
285 class Tokenizer(tokens.Tokenizer): 286 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 287 UNICODE_STRINGS = [ 288 (prefix + q, q) 289 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 290 for prefix in ("U&", "u&") 291 ] 292 293 KEYWORDS = { 294 **tokens.Tokenizer.KEYWORDS, 295 "DEALLOCATE PREPARE": TokenType.COMMAND, 296 "DESCRIBE INPUT": TokenType.COMMAND, 297 "DESCRIBE OUTPUT": TokenType.COMMAND, 298 "RESET SESSION": TokenType.COMMAND, 299 "START": TokenType.BEGIN, 300 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 301 "ROW": TokenType.STRUCT, 302 "IPADDRESS": TokenType.IPADDRESS, 303 "IPPREFIX": TokenType.IPPREFIX, 304 "TDIGEST": TokenType.TDIGEST, 305 "HYPERLOGLOG": TokenType.HLLSKETCH, 306 } 307 KEYWORDS.pop("/*+") 308 KEYWORDS.pop("QUALIFY")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- BYTE_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- IDENTIFIERS
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
310 class Parser(parser.Parser): 311 VALUES_FOLLOWED_BY_PAREN = False 312 313 FUNCTIONS = { 314 **parser.Parser.FUNCTIONS, 315 "ARBITRARY": exp.AnyValue.from_arg_list, 316 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 317 "APPROX_PERCENTILE": _build_approx_percentile, 318 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 319 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 320 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 321 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 322 "CARDINALITY": exp.ArraySize.from_arg_list, 323 "CONTAINS": exp.ArrayContains.from_arg_list, 324 "DATE_ADD": lambda args: exp.DateAdd( 325 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 326 ), 327 "DATE_DIFF": lambda args: exp.DateDiff( 328 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 329 ), 330 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 331 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 332 "DATE_TRUNC": date_trunc_to_time, 333 "DAY_OF_WEEK": exp.DayOfWeekIso.from_arg_list, 334 "ELEMENT_AT": lambda args: exp.Bracket( 335 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 336 ), 337 "FROM_HEX": exp.Unhex.from_arg_list, 338 "FROM_UNIXTIME": _build_from_unixtime, 339 "FROM_UTF8": lambda args: exp.Decode( 340 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 341 ), 342 "JSON_FORMAT": lambda args: exp.JSONFormat( 343 this=seq_get(args, 0), options=seq_get(args, 1), is_json=True 344 ), 345 "LEVENSHTEIN_DISTANCE": exp.Levenshtein.from_arg_list, 346 "NOW": exp.CurrentTimestamp.from_arg_list, 347 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 348 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 349 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 350 this=seq_get(args, 0), 351 expression=seq_get(args, 1), 352 replacement=seq_get(args, 2) or exp.Literal.string(""), 353 ), 354 "ROW": exp.Struct.from_arg_list, 355 "SEQUENCE": exp.GenerateSeries.from_arg_list, 356 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 357 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 358 "STRPOS": lambda args: exp.StrPosition( 359 this=seq_get(args, 0), substr=seq_get(args, 1), occurrence=seq_get(args, 2) 360 ), 361 "TO_CHAR": _build_to_char, 362 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 363 "TO_UTF8": lambda args: exp.Encode( 364 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 365 ), 366 "MD5": exp.MD5Digest.from_arg_list, 367 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 368 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 369 } 370 371 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 372 FUNCTION_PARSERS.pop("TRIM")
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- errors
- sql
374 class Generator(generator.Generator): 375 INTERVAL_ALLOWS_PLURAL_FORM = False 376 JOIN_HINTS = False 377 TABLE_HINTS = False 378 QUERY_HINTS = False 379 IS_BOOL_ALLOWED = False 380 TZ_TO_WITH_TIME_ZONE = True 381 NVL2_SUPPORTED = False 382 STRUCT_DELIMITER = ("(", ")") 383 LIMIT_ONLY_LITERALS = True 384 SUPPORTS_SINGLE_ARG_CONCAT = False 385 LIKE_PROPERTY_INSIDE_SCHEMA = True 386 MULTI_ARG_DISTINCT = False 387 SUPPORTS_TO_NUMBER = False 388 HEX_FUNC = "TO_HEX" 389 PARSE_JSON_NAME = "JSON_PARSE" 390 PAD_FILL_PATTERN_IS_REQUIRED = True 391 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 392 SUPPORTS_MEDIAN = False 393 ARRAY_SIZE_NAME = "CARDINALITY" 394 395 PROPERTIES_LOCATION = { 396 **generator.Generator.PROPERTIES_LOCATION, 397 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 398 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 399 } 400 401 TYPE_MAPPING = { 402 **generator.Generator.TYPE_MAPPING, 403 exp.DataType.Type.BINARY: "VARBINARY", 404 exp.DataType.Type.BIT: "BOOLEAN", 405 exp.DataType.Type.DATETIME: "TIMESTAMP", 406 exp.DataType.Type.DATETIME64: "TIMESTAMP", 407 exp.DataType.Type.FLOAT: "REAL", 408 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 409 exp.DataType.Type.INT: "INTEGER", 410 exp.DataType.Type.STRUCT: "ROW", 411 exp.DataType.Type.TEXT: "VARCHAR", 412 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 413 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 414 exp.DataType.Type.TIMETZ: "TIME", 415 } 416 417 TRANSFORMS = { 418 **generator.Generator.TRANSFORMS, 419 exp.AnyValue: rename_func("ARBITRARY"), 420 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 421 exp.ArgMax: rename_func("MAX_BY"), 422 exp.ArgMin: rename_func("MIN_BY"), 423 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 424 exp.ArrayAny: rename_func("ANY_MATCH"), 425 exp.ArrayConcat: rename_func("CONCAT"), 426 exp.ArrayContains: rename_func("CONTAINS"), 427 exp.ArrayToString: rename_func("ARRAY_JOIN"), 428 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 429 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 430 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 431 exp.BitwiseLeftShift: lambda self, e: self.func( 432 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 433 ), 434 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 435 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 436 exp.BitwiseRightShift: lambda self, e: self.func( 437 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 438 ), 439 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 440 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 441 exp.CurrentTime: lambda *_: "CURRENT_TIME", 442 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 443 exp.CurrentUser: lambda *_: "CURRENT_USER", 444 exp.DateAdd: _date_delta_sql("DATE_ADD"), 445 exp.DateDiff: lambda self, e: self.func( 446 "DATE_DIFF", unit_to_str(e), e.expression, e.this 447 ), 448 exp.DateStrToDate: datestrtodate_sql, 449 exp.DateToDi: lambda self, 450 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 451 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 452 exp.DayOfWeek: lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)", 453 exp.DayOfWeekIso: rename_func("DAY_OF_WEEK"), 454 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 455 exp.DiToDate: lambda self, 456 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 457 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 458 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 459 exp.First: _first_last_sql, 460 exp.FromTimeZone: lambda self, 461 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 462 exp.GenerateSeries: sequence_sql, 463 exp.GenerateDateArray: sequence_sql, 464 exp.Group: transforms.preprocess([transforms.unalias_group]), 465 exp.If: if_sql(), 466 exp.ILike: no_ilike_sql, 467 exp.Initcap: _initcap_sql, 468 exp.Last: _first_last_sql, 469 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 470 exp.Lateral: _explode_to_unnest_sql, 471 exp.Left: left_to_substring_sql, 472 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 473 rename_func("LEVENSHTEIN_DISTANCE") 474 ), 475 exp.LogicalAnd: rename_func("BOOL_AND"), 476 exp.LogicalOr: rename_func("BOOL_OR"), 477 exp.Pivot: no_pivot_sql, 478 exp.Quantile: _quantile_sql, 479 exp.RegexpExtract: regexp_extract_sql, 480 exp.RegexpExtractAll: regexp_extract_sql, 481 exp.Right: right_to_substring_sql, 482 exp.Schema: _schema_sql, 483 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 484 exp.Select: transforms.preprocess( 485 [ 486 transforms.eliminate_qualify, 487 transforms.eliminate_distinct_on, 488 transforms.explode_projection_to_unnest(1), 489 transforms.eliminate_semi_and_anti_joins, 490 _amend_exploded_column_table, 491 ] 492 ), 493 exp.SortArray: _no_sort_array, 494 exp.StrPosition: lambda self, e: strposition_sql(self, e, supports_occurrence=True), 495 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 496 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 497 exp.StrToTime: _str_to_time_sql, 498 exp.StructExtract: struct_extract_sql, 499 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 500 exp.Timestamp: no_timestamp_sql, 501 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 502 exp.TimestampTrunc: timestamptrunc_sql(), 503 exp.TimeStrToDate: timestrtotime_sql, 504 exp.TimeStrToTime: timestrtotime_sql, 505 exp.TimeStrToUnix: lambda self, e: self.func( 506 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 507 ), 508 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 509 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 510 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 511 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 512 exp.TsOrDiToDi: lambda self, 513 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 514 exp.TsOrDsAdd: _ts_or_ds_add_sql, 515 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 516 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 517 exp.Unhex: rename_func("FROM_HEX"), 518 exp.UnixToStr: lambda self, 519 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 520 exp.UnixToTime: _unix_to_time_sql, 521 exp.UnixToTimeStr: lambda self, 522 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 523 exp.VariancePop: rename_func("VAR_POP"), 524 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 525 exp.WithinGroup: transforms.preprocess( 526 [transforms.remove_within_group_for_percentiles] 527 ), 528 exp.Xor: bool_xor_sql, 529 exp.MD5Digest: rename_func("MD5"), 530 exp.SHA: rename_func("SHA1"), 531 exp.SHA2: sha256_sql, 532 } 533 534 RESERVED_KEYWORDS = { 535 "alter", 536 "and", 537 "as", 538 "between", 539 "by", 540 "case", 541 "cast", 542 "constraint", 543 "create", 544 "cross", 545 "current_time", 546 "current_timestamp", 547 "deallocate", 548 "delete", 549 "describe", 550 "distinct", 551 "drop", 552 "else", 553 "end", 554 "escape", 555 "except", 556 "execute", 557 "exists", 558 "extract", 559 "false", 560 "for", 561 "from", 562 "full", 563 "group", 564 "having", 565 "in", 566 "inner", 567 "insert", 568 "intersect", 569 "into", 570 "is", 571 "join", 572 "left", 573 "like", 574 "natural", 575 "not", 576 "null", 577 "on", 578 "or", 579 "order", 580 "outer", 581 "prepare", 582 "right", 583 "select", 584 "table", 585 "then", 586 "true", 587 "union", 588 "using", 589 "values", 590 "when", 591 "where", 592 "with", 593 } 594 595 def jsonformat_sql(self, expression: exp.JSONFormat) -> str: 596 this = expression.this 597 is_json = expression.args.get("is_json") 598 599 if this and not (is_json or this.type): 600 from sqlglot.optimizer.annotate_types import annotate_types 601 602 this = annotate_types(this, dialect=self.dialect) 603 604 if not (is_json or this.is_type(exp.DataType.Type.JSON)): 605 this.replace(exp.cast(this, exp.DataType.Type.JSON)) 606 607 return self.function_fallback_sql(expression) 608 609 def md5_sql(self, expression: exp.MD5) -> str: 610 this = expression.this 611 612 if not this.type: 613 from sqlglot.optimizer.annotate_types import annotate_types 614 615 this = annotate_types(this, dialect=self.dialect) 616 617 if this.is_type(*exp.DataType.TEXT_TYPES): 618 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 619 620 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 621 622 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 623 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 624 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 625 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 626 # which seems to be using the same time mapping as Hive, as per: 627 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 628 this = expression.this 629 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 630 value_as_timestamp = ( 631 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 632 ) 633 634 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 635 636 formatted_value = self.func( 637 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 638 ) 639 parse_with_tz = self.func( 640 "PARSE_DATETIME", 641 formatted_value, 642 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 643 ) 644 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 645 return self.func("TO_UNIXTIME", coalesced) 646 647 def bracket_sql(self, expression: exp.Bracket) -> str: 648 if expression.args.get("safe"): 649 return self.func( 650 "ELEMENT_AT", 651 expression.this, 652 seq_get( 653 apply_index_offset( 654 expression.this, 655 expression.expressions, 656 1 - expression.args.get("offset", 0), 657 dialect=self.dialect, 658 ), 659 0, 660 ), 661 ) 662 return super().bracket_sql(expression) 663 664 def struct_sql(self, expression: exp.Struct) -> str: 665 from sqlglot.optimizer.annotate_types import annotate_types 666 667 expression = annotate_types(expression, dialect=self.dialect) 668 values: t.List[str] = [] 669 schema: t.List[str] = [] 670 unknown_type = False 671 672 for e in expression.expressions: 673 if isinstance(e, exp.PropertyEQ): 674 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 675 unknown_type = True 676 else: 677 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 678 values.append(self.sql(e, "expression")) 679 else: 680 values.append(self.sql(e)) 681 682 size = len(expression.expressions) 683 684 if not size or len(schema) != size: 685 if unknown_type: 686 self.unsupported( 687 "Cannot convert untyped key-value definitions (try annotate_types)." 688 ) 689 return self.func("ROW", *values) 690 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 691 692 def interval_sql(self, expression: exp.Interval) -> str: 693 if expression.this and expression.text("unit").upper().startswith("WEEK"): 694 return f"({expression.this.name} * INTERVAL '7' DAY)" 695 return super().interval_sql(expression) 696 697 def transaction_sql(self, expression: exp.Transaction) -> str: 698 modes = expression.args.get("modes") 699 modes = f" {', '.join(modes)}" if modes else "" 700 return f"START TRANSACTION{modes}" 701 702 def offset_limit_modifiers( 703 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 704 ) -> t.List[str]: 705 return [ 706 self.sql(expression, "offset"), 707 self.sql(limit), 708 ] 709 710 def create_sql(self, expression: exp.Create) -> str: 711 """ 712 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 713 so we need to remove them 714 """ 715 kind = expression.args["kind"] 716 schema = expression.this 717 if kind == "VIEW" and schema.expressions: 718 expression.this.set("expressions", None) 719 return super().create_sql(expression) 720 721 def delete_sql(self, expression: exp.Delete) -> str: 722 """ 723 Presto only supports DELETE FROM for a single table without an alias, so we need 724 to remove the unnecessary parts. If the original DELETE statement contains more 725 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 726 """ 727 tables = expression.args.get("tables") or [expression.this] 728 if len(tables) > 1: 729 return super().delete_sql(expression) 730 731 table = tables[0] 732 expression.set("this", table) 733 expression.set("tables", None) 734 735 if isinstance(table, exp.Table): 736 table_alias = table.args.get("alias") 737 if table_alias: 738 table_alias.pop() 739 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 740 741 return super().delete_sql(expression) 742 743 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 744 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 745 746 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 747 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 748 if not expression.args.get("variant_extract") or is_json_extract: 749 return self.func( 750 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 751 ) 752 753 this = self.sql(expression, "this") 754 755 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 756 segments = [] 757 for path_key in expression.expression.expressions[1:]: 758 if not isinstance(path_key, exp.JSONPathKey): 759 # Cannot transpile subscripts, wildcards etc to dot notation 760 self.unsupported( 761 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 762 ) 763 continue 764 key = path_key.this 765 if not exp.SAFE_IDENTIFIER_RE.match(key): 766 key = f'"{key}"' 767 segments.append(f".{key}") 768 769 expr = "".join(segments) 770 771 return f"{this}{expr}" 772 773 def groupconcat_sql(self, expression: exp.GroupConcat) -> str: 774 return self.func( 775 "ARRAY_JOIN", 776 self.func("ARRAY_AGG", expression.this), 777 expression.args.get("separator"), 778 )
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
595 def jsonformat_sql(self, expression: exp.JSONFormat) -> str: 596 this = expression.this 597 is_json = expression.args.get("is_json") 598 599 if this and not (is_json or this.type): 600 from sqlglot.optimizer.annotate_types import annotate_types 601 602 this = annotate_types(this, dialect=self.dialect) 603 604 if not (is_json or this.is_type(exp.DataType.Type.JSON)): 605 this.replace(exp.cast(this, exp.DataType.Type.JSON)) 606 607 return self.function_fallback_sql(expression)
609 def md5_sql(self, expression: exp.MD5) -> str: 610 this = expression.this 611 612 if not this.type: 613 from sqlglot.optimizer.annotate_types import annotate_types 614 615 this = annotate_types(this, dialect=self.dialect) 616 617 if this.is_type(*exp.DataType.TEXT_TYPES): 618 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 619 620 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this))))
622 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 623 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 624 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 625 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 626 # which seems to be using the same time mapping as Hive, as per: 627 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 628 this = expression.this 629 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 630 value_as_timestamp = ( 631 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 632 ) 633 634 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 635 636 formatted_value = self.func( 637 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 638 ) 639 parse_with_tz = self.func( 640 "PARSE_DATETIME", 641 formatted_value, 642 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 643 ) 644 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 645 return self.func("TO_UNIXTIME", coalesced)
647 def bracket_sql(self, expression: exp.Bracket) -> str: 648 if expression.args.get("safe"): 649 return self.func( 650 "ELEMENT_AT", 651 expression.this, 652 seq_get( 653 apply_index_offset( 654 expression.this, 655 expression.expressions, 656 1 - expression.args.get("offset", 0), 657 dialect=self.dialect, 658 ), 659 0, 660 ), 661 ) 662 return super().bracket_sql(expression)
664 def struct_sql(self, expression: exp.Struct) -> str: 665 from sqlglot.optimizer.annotate_types import annotate_types 666 667 expression = annotate_types(expression, dialect=self.dialect) 668 values: t.List[str] = [] 669 schema: t.List[str] = [] 670 unknown_type = False 671 672 for e in expression.expressions: 673 if isinstance(e, exp.PropertyEQ): 674 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 675 unknown_type = True 676 else: 677 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 678 values.append(self.sql(e, "expression")) 679 else: 680 values.append(self.sql(e)) 681 682 size = len(expression.expressions) 683 684 if not size or len(schema) != size: 685 if unknown_type: 686 self.unsupported( 687 "Cannot convert untyped key-value definitions (try annotate_types)." 688 ) 689 return self.func("ROW", *values) 690 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))"
710 def create_sql(self, expression: exp.Create) -> str: 711 """ 712 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 713 so we need to remove them 714 """ 715 kind = expression.args["kind"] 716 schema = expression.this 717 if kind == "VIEW" and schema.expressions: 718 expression.this.set("expressions", None) 719 return super().create_sql(expression)
Presto doesn't support CREATE VIEW with expressions (ex: CREATE VIEW x (cola)
then (cola)
is the expression),
so we need to remove them
721 def delete_sql(self, expression: exp.Delete) -> str: 722 """ 723 Presto only supports DELETE FROM for a single table without an alias, so we need 724 to remove the unnecessary parts. If the original DELETE statement contains more 725 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 726 """ 727 tables = expression.args.get("tables") or [expression.this] 728 if len(tables) > 1: 729 return super().delete_sql(expression) 730 731 table = tables[0] 732 expression.set("this", table) 733 expression.set("tables", None) 734 735 if isinstance(table, exp.Table): 736 table_alias = table.args.get("alias") 737 if table_alias: 738 table_alias.pop() 739 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 740 741 return super().delete_sql(expression)
Presto only supports DELETE FROM for a single table without an alias, so we need to remove the unnecessary parts. If the original DELETE statement contains more than one table to be deleted, we can't safely map it 1-1 to a Presto statement.
743 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 744 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 745 746 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 747 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 748 if not expression.args.get("variant_extract") or is_json_extract: 749 return self.func( 750 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 751 ) 752 753 this = self.sql(expression, "this") 754 755 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 756 segments = [] 757 for path_key in expression.expression.expressions[1:]: 758 if not isinstance(path_key, exp.JSONPathKey): 759 # Cannot transpile subscripts, wildcards etc to dot notation 760 self.unsupported( 761 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 762 ) 763 continue 764 key = path_key.this 765 if not exp.SAFE_IDENTIFIER_RE.match(key): 766 key = f'"{key}"' 767 segments.append(f".{key}") 768 769 expr = "".join(segments) 770 771 return f"{this}{expr}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SUPPORTED_JSON_PATH_PARTS
- CAN_IMPLEMENT_ARRAY_ANY
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- TRY_SUPPORTED
- SUPPORTS_UESCAPE
- STAR_EXCEPT
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- put_sql