sqlglot.dialects.presto
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 bool_xor_sql, 11 date_trunc_to_time, 12 datestrtodate_sql, 13 encode_decode_sql, 14 build_formatted_time, 15 if_sql, 16 left_to_substring_sql, 17 no_ilike_sql, 18 no_pivot_sql, 19 no_timestamp_sql, 20 regexp_extract_sql, 21 rename_func, 22 right_to_substring_sql, 23 sha256_sql, 24 strposition_sql, 25 struct_extract_sql, 26 timestamptrunc_sql, 27 timestrtotime_sql, 28 ts_or_ds_add_cast, 29 unit_to_str, 30 sequence_sql, 31 build_regexp_extract, 32 explode_to_unnest_sql, 33) 34from sqlglot.dialects.hive import Hive 35from sqlglot.dialects.mysql import MySQL 36from sqlglot.helper import apply_index_offset, seq_get 37from sqlglot.optimizer.scope import find_all_in_scope 38from sqlglot.tokens import TokenType 39from sqlglot.transforms import unqualify_columns 40from sqlglot.generator import unsupported_args 41 42DATE_ADD_OR_SUB = t.Union[exp.DateAdd, exp.TimestampAdd, exp.DateSub] 43 44 45def _initcap_sql(self: Presto.Generator, expression: exp.Initcap) -> str: 46 regex = r"(\w)(\w*)" 47 return f"REGEXP_REPLACE({self.sql(expression, 'this')}, '{regex}', x -> UPPER(x[1]) || LOWER(x[2]))" 48 49 50def _no_sort_array(self: Presto.Generator, expression: exp.SortArray) -> str: 51 if expression.args.get("asc") == exp.false(): 52 comparator = "(a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END" 53 else: 54 comparator = None 55 return self.func("ARRAY_SORT", expression.this, comparator) 56 57 58def _schema_sql(self: Presto.Generator, expression: exp.Schema) -> str: 59 if isinstance(expression.parent, exp.PartitionedByProperty): 60 columns = ", ".join(f"'{c.name}'" for c in expression.expressions) 61 return f"ARRAY[{columns}]" 62 63 if expression.parent: 64 for schema in expression.parent.find_all(exp.Schema): 65 if schema is expression: 66 continue 67 68 column_defs = schema.find_all(exp.ColumnDef) 69 if column_defs and isinstance(schema.parent, exp.Property): 70 expression.expressions.extend(column_defs) 71 72 return self.schema_sql(expression) 73 74 75def _quantile_sql(self: Presto.Generator, expression: exp.Quantile) -> str: 76 self.unsupported("Presto does not support exact quantiles") 77 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 78 79 80def _str_to_time_sql( 81 self: Presto.Generator, expression: exp.StrToDate | exp.StrToTime | exp.TsOrDsToDate 82) -> str: 83 return self.func("DATE_PARSE", expression.this, self.format_time(expression)) 84 85 86def _ts_or_ds_to_date_sql(self: Presto.Generator, expression: exp.TsOrDsToDate) -> str: 87 time_format = self.format_time(expression) 88 if time_format and time_format not in (Presto.TIME_FORMAT, Presto.DATE_FORMAT): 89 return self.sql(exp.cast(_str_to_time_sql(self, expression), exp.DataType.Type.DATE)) 90 return self.sql( 91 exp.cast(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP), exp.DataType.Type.DATE) 92 ) 93 94 95def _ts_or_ds_add_sql(self: Presto.Generator, expression: exp.TsOrDsAdd) -> str: 96 expression = ts_or_ds_add_cast(expression) 97 unit = unit_to_str(expression) 98 return self.func("DATE_ADD", unit, expression.expression, expression.this) 99 100 101def _ts_or_ds_diff_sql(self: Presto.Generator, expression: exp.TsOrDsDiff) -> str: 102 this = exp.cast(expression.this, exp.DataType.Type.TIMESTAMP) 103 expr = exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP) 104 unit = unit_to_str(expression) 105 return self.func("DATE_DIFF", unit, expr, this) 106 107 108def _build_approx_percentile(args: t.List) -> exp.Expression: 109 if len(args) == 4: 110 return exp.ApproxQuantile( 111 this=seq_get(args, 0), 112 weight=seq_get(args, 1), 113 quantile=seq_get(args, 2), 114 accuracy=seq_get(args, 3), 115 ) 116 if len(args) == 3: 117 return exp.ApproxQuantile( 118 this=seq_get(args, 0), quantile=seq_get(args, 1), accuracy=seq_get(args, 2) 119 ) 120 return exp.ApproxQuantile.from_arg_list(args) 121 122 123def _build_from_unixtime(args: t.List) -> exp.Expression: 124 if len(args) == 3: 125 return exp.UnixToTime( 126 this=seq_get(args, 0), 127 hours=seq_get(args, 1), 128 minutes=seq_get(args, 2), 129 ) 130 if len(args) == 2: 131 return exp.UnixToTime(this=seq_get(args, 0), zone=seq_get(args, 1)) 132 133 return exp.UnixToTime.from_arg_list(args) 134 135 136def _first_last_sql(self: Presto.Generator, expression: exp.Func) -> str: 137 """ 138 Trino doesn't support FIRST / LAST as functions, but they're valid in the context 139 of MATCH_RECOGNIZE, so we need to preserve them in that case. In all other cases 140 they're converted into an ARBITRARY call. 141 142 Reference: https://trino.io/docs/current/sql/match-recognize.html#logical-navigation-functions 143 """ 144 if isinstance(expression.find_ancestor(exp.MatchRecognize, exp.Select), exp.MatchRecognize): 145 return self.function_fallback_sql(expression) 146 147 return rename_func("ARBITRARY")(self, expression) 148 149 150def _unix_to_time_sql(self: Presto.Generator, expression: exp.UnixToTime) -> str: 151 scale = expression.args.get("scale") 152 timestamp = self.sql(expression, "this") 153 if scale in (None, exp.UnixToTime.SECONDS): 154 return rename_func("FROM_UNIXTIME")(self, expression) 155 156 return f"FROM_UNIXTIME(CAST({timestamp} AS DOUBLE) / POW(10, {scale}))" 157 158 159def _to_int(self: Presto.Generator, expression: exp.Expression) -> exp.Expression: 160 if not expression.type: 161 from sqlglot.optimizer.annotate_types import annotate_types 162 163 annotate_types(expression, dialect=self.dialect) 164 if expression.type and expression.type.this not in exp.DataType.INTEGER_TYPES: 165 return exp.cast(expression, to=exp.DataType.Type.BIGINT) 166 return expression 167 168 169def _build_to_char(args: t.List) -> exp.TimeToStr: 170 fmt = seq_get(args, 1) 171 if isinstance(fmt, exp.Literal): 172 # We uppercase this to match Teradata's format mapping keys 173 fmt.set("this", fmt.this.upper()) 174 175 # We use "teradata" on purpose here, because the time formats are different in Presto. 176 # See https://prestodb.io/docs/current/functions/teradata.html?highlight=to_char#to_char 177 return build_formatted_time(exp.TimeToStr, "teradata")(args) 178 179 180def _date_delta_sql( 181 name: str, negate_interval: bool = False 182) -> t.Callable[[Presto.Generator, DATE_ADD_OR_SUB], str]: 183 def _delta_sql(self: Presto.Generator, expression: DATE_ADD_OR_SUB) -> str: 184 interval = _to_int(self, expression.expression) 185 return self.func( 186 name, 187 unit_to_str(expression), 188 interval * (-1) if negate_interval else interval, 189 expression.this, 190 ) 191 192 return _delta_sql 193 194 195def _explode_to_unnest_sql(self: Presto.Generator, expression: exp.Lateral) -> str: 196 explode = expression.this 197 if isinstance(explode, exp.Explode): 198 exploded_type = explode.this.type 199 alias = expression.args.get("alias") 200 201 # This attempts a best-effort transpilation of LATERAL VIEW EXPLODE on a struct array 202 if ( 203 isinstance(alias, exp.TableAlias) 204 and isinstance(exploded_type, exp.DataType) 205 and exploded_type.is_type(exp.DataType.Type.ARRAY) 206 and exploded_type.expressions 207 and exploded_type.expressions[0].is_type(exp.DataType.Type.STRUCT) 208 ): 209 # When unnesting a ROW in Presto, it produces N columns, so we need to fix the alias 210 alias.set("columns", [c.this.copy() for c in exploded_type.expressions[0].expressions]) 211 elif isinstance(explode, exp.Inline): 212 explode.replace(exp.Explode(this=explode.this.copy())) 213 214 return explode_to_unnest_sql(self, expression) 215 216 217def _amend_exploded_column_table(expression: exp.Expression) -> exp.Expression: 218 # We check for expression.type because the columns can be amended only if types were inferred 219 if isinstance(expression, exp.Select) and expression.type: 220 for lateral in expression.args.get("laterals") or []: 221 alias = lateral.args.get("alias") 222 if ( 223 not isinstance(lateral.this, exp.Explode) 224 or not isinstance(alias, exp.TableAlias) 225 or len(alias.columns) != 1 226 ): 227 continue 228 229 new_table = alias.this 230 old_table = alias.columns[0].name.lower() 231 232 # When transpiling a LATERAL VIEW EXPLODE Spark query, the exploded fields may be qualified 233 # with the struct column, resulting in invalid Presto references that need to be amended 234 for column in find_all_in_scope(expression, exp.Column): 235 if column.db.lower() == old_table: 236 column.set("table", column.args["db"].pop()) 237 elif column.table.lower() == old_table: 238 column.set("table", new_table.copy()) 239 elif column.name.lower() == old_table and isinstance(column.parent, exp.Dot): 240 column.parent.replace(exp.column(column.parent.expression, table=new_table)) 241 242 return expression 243 244 245class Presto(Dialect): 246 INDEX_OFFSET = 1 247 NULL_ORDERING = "nulls_are_last" 248 TIME_FORMAT = MySQL.TIME_FORMAT 249 STRICT_STRING_CONCAT = True 250 SUPPORTS_SEMI_ANTI_JOIN = False 251 TYPED_DIVISION = True 252 TABLESAMPLE_SIZE_IS_PERCENT = True 253 LOG_BASE_FIRST: t.Optional[bool] = None 254 SUPPORTS_VALUES_DEFAULT = False 255 256 TIME_MAPPING = MySQL.TIME_MAPPING 257 258 # https://github.com/trinodb/trino/issues/17 259 # https://github.com/trinodb/trino/issues/12289 260 # https://github.com/prestodb/presto/issues/2863 261 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 262 263 # The result of certain math functions in Presto/Trino is of type 264 # equal to the input type e.g: FLOOR(5.5/2) -> DECIMAL, FLOOR(5/2) -> BIGINT 265 ANNOTATORS = { 266 **Dialect.ANNOTATORS, 267 exp.Floor: lambda self, e: self._annotate_by_args(e, "this"), 268 exp.Ceil: lambda self, e: self._annotate_by_args(e, "this"), 269 exp.Mod: lambda self, e: self._annotate_by_args(e, "this", "expression"), 270 exp.Round: lambda self, e: self._annotate_by_args(e, "this"), 271 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 272 exp.Abs: lambda self, e: self._annotate_by_args(e, "this"), 273 exp.Rand: lambda self, e: self._annotate_by_args(e, "this") 274 if e.this 275 else self._set_type(e, exp.DataType.Type.DOUBLE), 276 } 277 278 class Tokenizer(tokens.Tokenizer): 279 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 280 UNICODE_STRINGS = [ 281 (prefix + q, q) 282 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 283 for prefix in ("U&", "u&") 284 ] 285 286 KEYWORDS = { 287 **tokens.Tokenizer.KEYWORDS, 288 "DEALLOCATE PREPARE": TokenType.COMMAND, 289 "DESCRIBE INPUT": TokenType.COMMAND, 290 "DESCRIBE OUTPUT": TokenType.COMMAND, 291 "RESET SESSION": TokenType.COMMAND, 292 "START": TokenType.BEGIN, 293 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 294 "ROW": TokenType.STRUCT, 295 "IPADDRESS": TokenType.IPADDRESS, 296 "IPPREFIX": TokenType.IPPREFIX, 297 "TDIGEST": TokenType.TDIGEST, 298 "HYPERLOGLOG": TokenType.HLLSKETCH, 299 } 300 KEYWORDS.pop("/*+") 301 KEYWORDS.pop("QUALIFY") 302 303 class Parser(parser.Parser): 304 VALUES_FOLLOWED_BY_PAREN = False 305 306 FUNCTIONS = { 307 **parser.Parser.FUNCTIONS, 308 "ARBITRARY": exp.AnyValue.from_arg_list, 309 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 310 "APPROX_PERCENTILE": _build_approx_percentile, 311 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 312 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 313 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 314 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 315 "CARDINALITY": exp.ArraySize.from_arg_list, 316 "CONTAINS": exp.ArrayContains.from_arg_list, 317 "DATE_ADD": lambda args: exp.DateAdd( 318 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 319 ), 320 "DATE_DIFF": lambda args: exp.DateDiff( 321 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 322 ), 323 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 324 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 325 "DATE_TRUNC": date_trunc_to_time, 326 "DAY_OF_WEEK": exp.DayOfWeekIso.from_arg_list, 327 "ELEMENT_AT": lambda args: exp.Bracket( 328 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 329 ), 330 "FROM_HEX": exp.Unhex.from_arg_list, 331 "FROM_UNIXTIME": _build_from_unixtime, 332 "FROM_UTF8": lambda args: exp.Decode( 333 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 334 ), 335 "LEVENSHTEIN_DISTANCE": exp.Levenshtein.from_arg_list, 336 "NOW": exp.CurrentTimestamp.from_arg_list, 337 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 338 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 339 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 340 this=seq_get(args, 0), 341 expression=seq_get(args, 1), 342 replacement=seq_get(args, 2) or exp.Literal.string(""), 343 ), 344 "ROW": exp.Struct.from_arg_list, 345 "SEQUENCE": exp.GenerateSeries.from_arg_list, 346 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 347 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 348 "STRPOS": lambda args: exp.StrPosition( 349 this=seq_get(args, 0), substr=seq_get(args, 1), occurrence=seq_get(args, 2) 350 ), 351 "TO_CHAR": _build_to_char, 352 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 353 "TO_UTF8": lambda args: exp.Encode( 354 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 355 ), 356 "MD5": exp.MD5Digest.from_arg_list, 357 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 358 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 359 } 360 361 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 362 FUNCTION_PARSERS.pop("TRIM") 363 364 class Generator(generator.Generator): 365 INTERVAL_ALLOWS_PLURAL_FORM = False 366 JOIN_HINTS = False 367 TABLE_HINTS = False 368 QUERY_HINTS = False 369 IS_BOOL_ALLOWED = False 370 TZ_TO_WITH_TIME_ZONE = True 371 NVL2_SUPPORTED = False 372 STRUCT_DELIMITER = ("(", ")") 373 LIMIT_ONLY_LITERALS = True 374 SUPPORTS_SINGLE_ARG_CONCAT = False 375 LIKE_PROPERTY_INSIDE_SCHEMA = True 376 MULTI_ARG_DISTINCT = False 377 SUPPORTS_TO_NUMBER = False 378 HEX_FUNC = "TO_HEX" 379 PARSE_JSON_NAME = "JSON_PARSE" 380 PAD_FILL_PATTERN_IS_REQUIRED = True 381 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 382 SUPPORTS_MEDIAN = False 383 ARRAY_SIZE_NAME = "CARDINALITY" 384 385 PROPERTIES_LOCATION = { 386 **generator.Generator.PROPERTIES_LOCATION, 387 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 388 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 389 } 390 391 TYPE_MAPPING = { 392 **generator.Generator.TYPE_MAPPING, 393 exp.DataType.Type.BINARY: "VARBINARY", 394 exp.DataType.Type.BIT: "BOOLEAN", 395 exp.DataType.Type.DATETIME: "TIMESTAMP", 396 exp.DataType.Type.DATETIME64: "TIMESTAMP", 397 exp.DataType.Type.FLOAT: "REAL", 398 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 399 exp.DataType.Type.INT: "INTEGER", 400 exp.DataType.Type.STRUCT: "ROW", 401 exp.DataType.Type.TEXT: "VARCHAR", 402 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 403 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 404 exp.DataType.Type.TIMETZ: "TIME", 405 } 406 407 TRANSFORMS = { 408 **generator.Generator.TRANSFORMS, 409 exp.AnyValue: rename_func("ARBITRARY"), 410 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 411 exp.ArgMax: rename_func("MAX_BY"), 412 exp.ArgMin: rename_func("MIN_BY"), 413 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 414 exp.ArrayAny: rename_func("ANY_MATCH"), 415 exp.ArrayConcat: rename_func("CONCAT"), 416 exp.ArrayContains: rename_func("CONTAINS"), 417 exp.ArrayToString: rename_func("ARRAY_JOIN"), 418 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 419 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 420 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 421 exp.BitwiseLeftShift: lambda self, e: self.func( 422 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 423 ), 424 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 425 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 426 exp.BitwiseRightShift: lambda self, e: self.func( 427 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 428 ), 429 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 430 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 431 exp.CurrentTime: lambda *_: "CURRENT_TIME", 432 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 433 exp.CurrentUser: lambda *_: "CURRENT_USER", 434 exp.DateAdd: _date_delta_sql("DATE_ADD"), 435 exp.DateDiff: lambda self, e: self.func( 436 "DATE_DIFF", unit_to_str(e), e.expression, e.this 437 ), 438 exp.DateStrToDate: datestrtodate_sql, 439 exp.DateToDi: lambda self, 440 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 441 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 442 exp.DayOfWeek: lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)", 443 exp.DayOfWeekIso: rename_func("DAY_OF_WEEK"), 444 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 445 exp.DiToDate: lambda self, 446 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 447 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 448 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 449 exp.First: _first_last_sql, 450 exp.FromTimeZone: lambda self, 451 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 452 exp.GenerateSeries: sequence_sql, 453 exp.GenerateDateArray: sequence_sql, 454 exp.Group: transforms.preprocess([transforms.unalias_group]), 455 exp.If: if_sql(), 456 exp.ILike: no_ilike_sql, 457 exp.Initcap: _initcap_sql, 458 exp.Last: _first_last_sql, 459 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 460 exp.Lateral: _explode_to_unnest_sql, 461 exp.Left: left_to_substring_sql, 462 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 463 rename_func("LEVENSHTEIN_DISTANCE") 464 ), 465 exp.LogicalAnd: rename_func("BOOL_AND"), 466 exp.LogicalOr: rename_func("BOOL_OR"), 467 exp.Pivot: no_pivot_sql, 468 exp.Quantile: _quantile_sql, 469 exp.RegexpExtract: regexp_extract_sql, 470 exp.RegexpExtractAll: regexp_extract_sql, 471 exp.Right: right_to_substring_sql, 472 exp.Schema: _schema_sql, 473 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 474 exp.Select: transforms.preprocess( 475 [ 476 transforms.eliminate_qualify, 477 transforms.eliminate_distinct_on, 478 transforms.explode_projection_to_unnest(1), 479 transforms.eliminate_semi_and_anti_joins, 480 _amend_exploded_column_table, 481 ] 482 ), 483 exp.SortArray: _no_sort_array, 484 exp.StrPosition: lambda self, e: strposition_sql(self, e, supports_occurrence=True), 485 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 486 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 487 exp.StrToTime: _str_to_time_sql, 488 exp.StructExtract: struct_extract_sql, 489 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 490 exp.Timestamp: no_timestamp_sql, 491 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 492 exp.TimestampTrunc: timestamptrunc_sql(), 493 exp.TimeStrToDate: timestrtotime_sql, 494 exp.TimeStrToTime: timestrtotime_sql, 495 exp.TimeStrToUnix: lambda self, e: self.func( 496 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 497 ), 498 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 499 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 500 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 501 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 502 exp.TsOrDiToDi: lambda self, 503 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 504 exp.TsOrDsAdd: _ts_or_ds_add_sql, 505 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 506 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 507 exp.Unhex: rename_func("FROM_HEX"), 508 exp.UnixToStr: lambda self, 509 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 510 exp.UnixToTime: _unix_to_time_sql, 511 exp.UnixToTimeStr: lambda self, 512 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 513 exp.VariancePop: rename_func("VAR_POP"), 514 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 515 exp.WithinGroup: transforms.preprocess( 516 [transforms.remove_within_group_for_percentiles] 517 ), 518 exp.Xor: bool_xor_sql, 519 exp.MD5Digest: rename_func("MD5"), 520 exp.SHA: rename_func("SHA1"), 521 exp.SHA2: sha256_sql, 522 } 523 524 RESERVED_KEYWORDS = { 525 "alter", 526 "and", 527 "as", 528 "between", 529 "by", 530 "case", 531 "cast", 532 "constraint", 533 "create", 534 "cross", 535 "current_time", 536 "current_timestamp", 537 "deallocate", 538 "delete", 539 "describe", 540 "distinct", 541 "drop", 542 "else", 543 "end", 544 "escape", 545 "except", 546 "execute", 547 "exists", 548 "extract", 549 "false", 550 "for", 551 "from", 552 "full", 553 "group", 554 "having", 555 "in", 556 "inner", 557 "insert", 558 "intersect", 559 "into", 560 "is", 561 "join", 562 "left", 563 "like", 564 "natural", 565 "not", 566 "null", 567 "on", 568 "or", 569 "order", 570 "outer", 571 "prepare", 572 "right", 573 "select", 574 "table", 575 "then", 576 "true", 577 "union", 578 "using", 579 "values", 580 "when", 581 "where", 582 "with", 583 } 584 585 def md5_sql(self, expression: exp.MD5) -> str: 586 this = expression.this 587 588 if not this.type: 589 from sqlglot.optimizer.annotate_types import annotate_types 590 591 this = annotate_types(this) 592 593 if this.is_type(*exp.DataType.TEXT_TYPES): 594 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 595 596 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 597 598 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 599 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 600 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 601 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 602 # which seems to be using the same time mapping as Hive, as per: 603 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 604 this = expression.this 605 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 606 value_as_timestamp = ( 607 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 608 ) 609 610 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 611 612 formatted_value = self.func( 613 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 614 ) 615 parse_with_tz = self.func( 616 "PARSE_DATETIME", 617 formatted_value, 618 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 619 ) 620 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 621 return self.func("TO_UNIXTIME", coalesced) 622 623 def bracket_sql(self, expression: exp.Bracket) -> str: 624 if expression.args.get("safe"): 625 return self.func( 626 "ELEMENT_AT", 627 expression.this, 628 seq_get( 629 apply_index_offset( 630 expression.this, 631 expression.expressions, 632 1 - expression.args.get("offset", 0), 633 ), 634 0, 635 ), 636 ) 637 return super().bracket_sql(expression) 638 639 def struct_sql(self, expression: exp.Struct) -> str: 640 from sqlglot.optimizer.annotate_types import annotate_types 641 642 expression = annotate_types(expression) 643 values: t.List[str] = [] 644 schema: t.List[str] = [] 645 unknown_type = False 646 647 for e in expression.expressions: 648 if isinstance(e, exp.PropertyEQ): 649 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 650 unknown_type = True 651 else: 652 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 653 values.append(self.sql(e, "expression")) 654 else: 655 values.append(self.sql(e)) 656 657 size = len(expression.expressions) 658 659 if not size or len(schema) != size: 660 if unknown_type: 661 self.unsupported( 662 "Cannot convert untyped key-value definitions (try annotate_types)." 663 ) 664 return self.func("ROW", *values) 665 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 666 667 def interval_sql(self, expression: exp.Interval) -> str: 668 if expression.this and expression.text("unit").upper().startswith("WEEK"): 669 return f"({expression.this.name} * INTERVAL '7' DAY)" 670 return super().interval_sql(expression) 671 672 def transaction_sql(self, expression: exp.Transaction) -> str: 673 modes = expression.args.get("modes") 674 modes = f" {', '.join(modes)}" if modes else "" 675 return f"START TRANSACTION{modes}" 676 677 def offset_limit_modifiers( 678 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 679 ) -> t.List[str]: 680 return [ 681 self.sql(expression, "offset"), 682 self.sql(limit), 683 ] 684 685 def create_sql(self, expression: exp.Create) -> str: 686 """ 687 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 688 so we need to remove them 689 """ 690 kind = expression.args["kind"] 691 schema = expression.this 692 if kind == "VIEW" and schema.expressions: 693 expression.this.set("expressions", None) 694 return super().create_sql(expression) 695 696 def delete_sql(self, expression: exp.Delete) -> str: 697 """ 698 Presto only supports DELETE FROM for a single table without an alias, so we need 699 to remove the unnecessary parts. If the original DELETE statement contains more 700 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 701 """ 702 tables = expression.args.get("tables") or [expression.this] 703 if len(tables) > 1: 704 return super().delete_sql(expression) 705 706 table = tables[0] 707 expression.set("this", table) 708 expression.set("tables", None) 709 710 if isinstance(table, exp.Table): 711 table_alias = table.args.get("alias") 712 if table_alias: 713 table_alias.pop() 714 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 715 716 return super().delete_sql(expression) 717 718 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 719 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 720 721 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 722 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 723 if not expression.args.get("variant_extract") or is_json_extract: 724 return self.func( 725 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 726 ) 727 728 this = self.sql(expression, "this") 729 730 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 731 segments = [] 732 for path_key in expression.expression.expressions[1:]: 733 if not isinstance(path_key, exp.JSONPathKey): 734 # Cannot transpile subscripts, wildcards etc to dot notation 735 self.unsupported( 736 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 737 ) 738 continue 739 key = path_key.this 740 if not exp.SAFE_IDENTIFIER_RE.match(key): 741 key = f'"{key}"' 742 segments.append(f".{key}") 743 744 expr = "".join(segments) 745 746 return f"{this}{expr}" 747 748 def groupconcat_sql(self, expression: exp.GroupConcat) -> str: 749 return self.func( 750 "ARRAY_JOIN", 751 self.func("ARRAY_AGG", expression.this), 752 expression.args.get("separator"), 753 )
246class Presto(Dialect): 247 INDEX_OFFSET = 1 248 NULL_ORDERING = "nulls_are_last" 249 TIME_FORMAT = MySQL.TIME_FORMAT 250 STRICT_STRING_CONCAT = True 251 SUPPORTS_SEMI_ANTI_JOIN = False 252 TYPED_DIVISION = True 253 TABLESAMPLE_SIZE_IS_PERCENT = True 254 LOG_BASE_FIRST: t.Optional[bool] = None 255 SUPPORTS_VALUES_DEFAULT = False 256 257 TIME_MAPPING = MySQL.TIME_MAPPING 258 259 # https://github.com/trinodb/trino/issues/17 260 # https://github.com/trinodb/trino/issues/12289 261 # https://github.com/prestodb/presto/issues/2863 262 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 263 264 # The result of certain math functions in Presto/Trino is of type 265 # equal to the input type e.g: FLOOR(5.5/2) -> DECIMAL, FLOOR(5/2) -> BIGINT 266 ANNOTATORS = { 267 **Dialect.ANNOTATORS, 268 exp.Floor: lambda self, e: self._annotate_by_args(e, "this"), 269 exp.Ceil: lambda self, e: self._annotate_by_args(e, "this"), 270 exp.Mod: lambda self, e: self._annotate_by_args(e, "this", "expression"), 271 exp.Round: lambda self, e: self._annotate_by_args(e, "this"), 272 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 273 exp.Abs: lambda self, e: self._annotate_by_args(e, "this"), 274 exp.Rand: lambda self, e: self._annotate_by_args(e, "this") 275 if e.this 276 else self._set_type(e, exp.DataType.Type.DOUBLE), 277 } 278 279 class Tokenizer(tokens.Tokenizer): 280 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 281 UNICODE_STRINGS = [ 282 (prefix + q, q) 283 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 284 for prefix in ("U&", "u&") 285 ] 286 287 KEYWORDS = { 288 **tokens.Tokenizer.KEYWORDS, 289 "DEALLOCATE PREPARE": TokenType.COMMAND, 290 "DESCRIBE INPUT": TokenType.COMMAND, 291 "DESCRIBE OUTPUT": TokenType.COMMAND, 292 "RESET SESSION": TokenType.COMMAND, 293 "START": TokenType.BEGIN, 294 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 295 "ROW": TokenType.STRUCT, 296 "IPADDRESS": TokenType.IPADDRESS, 297 "IPPREFIX": TokenType.IPPREFIX, 298 "TDIGEST": TokenType.TDIGEST, 299 "HYPERLOGLOG": TokenType.HLLSKETCH, 300 } 301 KEYWORDS.pop("/*+") 302 KEYWORDS.pop("QUALIFY") 303 304 class Parser(parser.Parser): 305 VALUES_FOLLOWED_BY_PAREN = False 306 307 FUNCTIONS = { 308 **parser.Parser.FUNCTIONS, 309 "ARBITRARY": exp.AnyValue.from_arg_list, 310 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 311 "APPROX_PERCENTILE": _build_approx_percentile, 312 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 313 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 314 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 315 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 316 "CARDINALITY": exp.ArraySize.from_arg_list, 317 "CONTAINS": exp.ArrayContains.from_arg_list, 318 "DATE_ADD": lambda args: exp.DateAdd( 319 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 320 ), 321 "DATE_DIFF": lambda args: exp.DateDiff( 322 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 323 ), 324 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 325 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 326 "DATE_TRUNC": date_trunc_to_time, 327 "DAY_OF_WEEK": exp.DayOfWeekIso.from_arg_list, 328 "ELEMENT_AT": lambda args: exp.Bracket( 329 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 330 ), 331 "FROM_HEX": exp.Unhex.from_arg_list, 332 "FROM_UNIXTIME": _build_from_unixtime, 333 "FROM_UTF8": lambda args: exp.Decode( 334 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 335 ), 336 "LEVENSHTEIN_DISTANCE": exp.Levenshtein.from_arg_list, 337 "NOW": exp.CurrentTimestamp.from_arg_list, 338 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 339 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 340 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 341 this=seq_get(args, 0), 342 expression=seq_get(args, 1), 343 replacement=seq_get(args, 2) or exp.Literal.string(""), 344 ), 345 "ROW": exp.Struct.from_arg_list, 346 "SEQUENCE": exp.GenerateSeries.from_arg_list, 347 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 348 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 349 "STRPOS": lambda args: exp.StrPosition( 350 this=seq_get(args, 0), substr=seq_get(args, 1), occurrence=seq_get(args, 2) 351 ), 352 "TO_CHAR": _build_to_char, 353 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 354 "TO_UTF8": lambda args: exp.Encode( 355 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 356 ), 357 "MD5": exp.MD5Digest.from_arg_list, 358 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 359 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 360 } 361 362 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 363 FUNCTION_PARSERS.pop("TRIM") 364 365 class Generator(generator.Generator): 366 INTERVAL_ALLOWS_PLURAL_FORM = False 367 JOIN_HINTS = False 368 TABLE_HINTS = False 369 QUERY_HINTS = False 370 IS_BOOL_ALLOWED = False 371 TZ_TO_WITH_TIME_ZONE = True 372 NVL2_SUPPORTED = False 373 STRUCT_DELIMITER = ("(", ")") 374 LIMIT_ONLY_LITERALS = True 375 SUPPORTS_SINGLE_ARG_CONCAT = False 376 LIKE_PROPERTY_INSIDE_SCHEMA = True 377 MULTI_ARG_DISTINCT = False 378 SUPPORTS_TO_NUMBER = False 379 HEX_FUNC = "TO_HEX" 380 PARSE_JSON_NAME = "JSON_PARSE" 381 PAD_FILL_PATTERN_IS_REQUIRED = True 382 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 383 SUPPORTS_MEDIAN = False 384 ARRAY_SIZE_NAME = "CARDINALITY" 385 386 PROPERTIES_LOCATION = { 387 **generator.Generator.PROPERTIES_LOCATION, 388 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 389 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 390 } 391 392 TYPE_MAPPING = { 393 **generator.Generator.TYPE_MAPPING, 394 exp.DataType.Type.BINARY: "VARBINARY", 395 exp.DataType.Type.BIT: "BOOLEAN", 396 exp.DataType.Type.DATETIME: "TIMESTAMP", 397 exp.DataType.Type.DATETIME64: "TIMESTAMP", 398 exp.DataType.Type.FLOAT: "REAL", 399 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 400 exp.DataType.Type.INT: "INTEGER", 401 exp.DataType.Type.STRUCT: "ROW", 402 exp.DataType.Type.TEXT: "VARCHAR", 403 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 404 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 405 exp.DataType.Type.TIMETZ: "TIME", 406 } 407 408 TRANSFORMS = { 409 **generator.Generator.TRANSFORMS, 410 exp.AnyValue: rename_func("ARBITRARY"), 411 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 412 exp.ArgMax: rename_func("MAX_BY"), 413 exp.ArgMin: rename_func("MIN_BY"), 414 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 415 exp.ArrayAny: rename_func("ANY_MATCH"), 416 exp.ArrayConcat: rename_func("CONCAT"), 417 exp.ArrayContains: rename_func("CONTAINS"), 418 exp.ArrayToString: rename_func("ARRAY_JOIN"), 419 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 420 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 421 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 422 exp.BitwiseLeftShift: lambda self, e: self.func( 423 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 424 ), 425 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 426 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 427 exp.BitwiseRightShift: lambda self, e: self.func( 428 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 429 ), 430 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 431 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 432 exp.CurrentTime: lambda *_: "CURRENT_TIME", 433 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 434 exp.CurrentUser: lambda *_: "CURRENT_USER", 435 exp.DateAdd: _date_delta_sql("DATE_ADD"), 436 exp.DateDiff: lambda self, e: self.func( 437 "DATE_DIFF", unit_to_str(e), e.expression, e.this 438 ), 439 exp.DateStrToDate: datestrtodate_sql, 440 exp.DateToDi: lambda self, 441 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 442 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 443 exp.DayOfWeek: lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)", 444 exp.DayOfWeekIso: rename_func("DAY_OF_WEEK"), 445 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 446 exp.DiToDate: lambda self, 447 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 448 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 449 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 450 exp.First: _first_last_sql, 451 exp.FromTimeZone: lambda self, 452 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 453 exp.GenerateSeries: sequence_sql, 454 exp.GenerateDateArray: sequence_sql, 455 exp.Group: transforms.preprocess([transforms.unalias_group]), 456 exp.If: if_sql(), 457 exp.ILike: no_ilike_sql, 458 exp.Initcap: _initcap_sql, 459 exp.Last: _first_last_sql, 460 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 461 exp.Lateral: _explode_to_unnest_sql, 462 exp.Left: left_to_substring_sql, 463 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 464 rename_func("LEVENSHTEIN_DISTANCE") 465 ), 466 exp.LogicalAnd: rename_func("BOOL_AND"), 467 exp.LogicalOr: rename_func("BOOL_OR"), 468 exp.Pivot: no_pivot_sql, 469 exp.Quantile: _quantile_sql, 470 exp.RegexpExtract: regexp_extract_sql, 471 exp.RegexpExtractAll: regexp_extract_sql, 472 exp.Right: right_to_substring_sql, 473 exp.Schema: _schema_sql, 474 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 475 exp.Select: transforms.preprocess( 476 [ 477 transforms.eliminate_qualify, 478 transforms.eliminate_distinct_on, 479 transforms.explode_projection_to_unnest(1), 480 transforms.eliminate_semi_and_anti_joins, 481 _amend_exploded_column_table, 482 ] 483 ), 484 exp.SortArray: _no_sort_array, 485 exp.StrPosition: lambda self, e: strposition_sql(self, e, supports_occurrence=True), 486 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 487 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 488 exp.StrToTime: _str_to_time_sql, 489 exp.StructExtract: struct_extract_sql, 490 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 491 exp.Timestamp: no_timestamp_sql, 492 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 493 exp.TimestampTrunc: timestamptrunc_sql(), 494 exp.TimeStrToDate: timestrtotime_sql, 495 exp.TimeStrToTime: timestrtotime_sql, 496 exp.TimeStrToUnix: lambda self, e: self.func( 497 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 498 ), 499 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 500 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 501 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 502 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 503 exp.TsOrDiToDi: lambda self, 504 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 505 exp.TsOrDsAdd: _ts_or_ds_add_sql, 506 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 507 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 508 exp.Unhex: rename_func("FROM_HEX"), 509 exp.UnixToStr: lambda self, 510 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 511 exp.UnixToTime: _unix_to_time_sql, 512 exp.UnixToTimeStr: lambda self, 513 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 514 exp.VariancePop: rename_func("VAR_POP"), 515 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 516 exp.WithinGroup: transforms.preprocess( 517 [transforms.remove_within_group_for_percentiles] 518 ), 519 exp.Xor: bool_xor_sql, 520 exp.MD5Digest: rename_func("MD5"), 521 exp.SHA: rename_func("SHA1"), 522 exp.SHA2: sha256_sql, 523 } 524 525 RESERVED_KEYWORDS = { 526 "alter", 527 "and", 528 "as", 529 "between", 530 "by", 531 "case", 532 "cast", 533 "constraint", 534 "create", 535 "cross", 536 "current_time", 537 "current_timestamp", 538 "deallocate", 539 "delete", 540 "describe", 541 "distinct", 542 "drop", 543 "else", 544 "end", 545 "escape", 546 "except", 547 "execute", 548 "exists", 549 "extract", 550 "false", 551 "for", 552 "from", 553 "full", 554 "group", 555 "having", 556 "in", 557 "inner", 558 "insert", 559 "intersect", 560 "into", 561 "is", 562 "join", 563 "left", 564 "like", 565 "natural", 566 "not", 567 "null", 568 "on", 569 "or", 570 "order", 571 "outer", 572 "prepare", 573 "right", 574 "select", 575 "table", 576 "then", 577 "true", 578 "union", 579 "using", 580 "values", 581 "when", 582 "where", 583 "with", 584 } 585 586 def md5_sql(self, expression: exp.MD5) -> str: 587 this = expression.this 588 589 if not this.type: 590 from sqlglot.optimizer.annotate_types import annotate_types 591 592 this = annotate_types(this) 593 594 if this.is_type(*exp.DataType.TEXT_TYPES): 595 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 596 597 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 598 599 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 600 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 601 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 602 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 603 # which seems to be using the same time mapping as Hive, as per: 604 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 605 this = expression.this 606 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 607 value_as_timestamp = ( 608 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 609 ) 610 611 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 612 613 formatted_value = self.func( 614 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 615 ) 616 parse_with_tz = self.func( 617 "PARSE_DATETIME", 618 formatted_value, 619 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 620 ) 621 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 622 return self.func("TO_UNIXTIME", coalesced) 623 624 def bracket_sql(self, expression: exp.Bracket) -> str: 625 if expression.args.get("safe"): 626 return self.func( 627 "ELEMENT_AT", 628 expression.this, 629 seq_get( 630 apply_index_offset( 631 expression.this, 632 expression.expressions, 633 1 - expression.args.get("offset", 0), 634 ), 635 0, 636 ), 637 ) 638 return super().bracket_sql(expression) 639 640 def struct_sql(self, expression: exp.Struct) -> str: 641 from sqlglot.optimizer.annotate_types import annotate_types 642 643 expression = annotate_types(expression) 644 values: t.List[str] = [] 645 schema: t.List[str] = [] 646 unknown_type = False 647 648 for e in expression.expressions: 649 if isinstance(e, exp.PropertyEQ): 650 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 651 unknown_type = True 652 else: 653 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 654 values.append(self.sql(e, "expression")) 655 else: 656 values.append(self.sql(e)) 657 658 size = len(expression.expressions) 659 660 if not size or len(schema) != size: 661 if unknown_type: 662 self.unsupported( 663 "Cannot convert untyped key-value definitions (try annotate_types)." 664 ) 665 return self.func("ROW", *values) 666 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 667 668 def interval_sql(self, expression: exp.Interval) -> str: 669 if expression.this and expression.text("unit").upper().startswith("WEEK"): 670 return f"({expression.this.name} * INTERVAL '7' DAY)" 671 return super().interval_sql(expression) 672 673 def transaction_sql(self, expression: exp.Transaction) -> str: 674 modes = expression.args.get("modes") 675 modes = f" {', '.join(modes)}" if modes else "" 676 return f"START TRANSACTION{modes}" 677 678 def offset_limit_modifiers( 679 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 680 ) -> t.List[str]: 681 return [ 682 self.sql(expression, "offset"), 683 self.sql(limit), 684 ] 685 686 def create_sql(self, expression: exp.Create) -> str: 687 """ 688 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 689 so we need to remove them 690 """ 691 kind = expression.args["kind"] 692 schema = expression.this 693 if kind == "VIEW" and schema.expressions: 694 expression.this.set("expressions", None) 695 return super().create_sql(expression) 696 697 def delete_sql(self, expression: exp.Delete) -> str: 698 """ 699 Presto only supports DELETE FROM for a single table without an alias, so we need 700 to remove the unnecessary parts. If the original DELETE statement contains more 701 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 702 """ 703 tables = expression.args.get("tables") or [expression.this] 704 if len(tables) > 1: 705 return super().delete_sql(expression) 706 707 table = tables[0] 708 expression.set("this", table) 709 expression.set("tables", None) 710 711 if isinstance(table, exp.Table): 712 table_alias = table.args.get("alias") 713 if table_alias: 714 table_alias.pop() 715 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 716 717 return super().delete_sql(expression) 718 719 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 720 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 721 722 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 723 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 724 if not expression.args.get("variant_extract") or is_json_extract: 725 return self.func( 726 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 727 ) 728 729 this = self.sql(expression, "this") 730 731 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 732 segments = [] 733 for path_key in expression.expression.expressions[1:]: 734 if not isinstance(path_key, exp.JSONPathKey): 735 # Cannot transpile subscripts, wildcards etc to dot notation 736 self.unsupported( 737 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 738 ) 739 continue 740 key = path_key.this 741 if not exp.SAFE_IDENTIFIER_RE.match(key): 742 key = f'"{key}"' 743 segments.append(f".{key}") 744 745 expr = "".join(segments) 746 747 return f"{this}{expr}" 748 749 def groupconcat_sql(self, expression: exp.GroupConcat) -> str: 750 return self.func( 751 "ARRAY_JOIN", 752 self.func("ARRAY_AGG", expression.this), 753 expression.args.get("separator"), 754 )
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the behavior of a / b
depends on the types of a
and b
.
False means a / b
is always float division.
True means a / b
is integer division if both a
and b
are integers.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Associates this dialect's time formats with their equivalent Python strftime
formats.
Specifies the strategy according to which identifiers should be normalized.
279 class Tokenizer(tokens.Tokenizer): 280 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 281 UNICODE_STRINGS = [ 282 (prefix + q, q) 283 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 284 for prefix in ("U&", "u&") 285 ] 286 287 KEYWORDS = { 288 **tokens.Tokenizer.KEYWORDS, 289 "DEALLOCATE PREPARE": TokenType.COMMAND, 290 "DESCRIBE INPUT": TokenType.COMMAND, 291 "DESCRIBE OUTPUT": TokenType.COMMAND, 292 "RESET SESSION": TokenType.COMMAND, 293 "START": TokenType.BEGIN, 294 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 295 "ROW": TokenType.STRUCT, 296 "IPADDRESS": TokenType.IPADDRESS, 297 "IPPREFIX": TokenType.IPPREFIX, 298 "TDIGEST": TokenType.TDIGEST, 299 "HYPERLOGLOG": TokenType.HLLSKETCH, 300 } 301 KEYWORDS.pop("/*+") 302 KEYWORDS.pop("QUALIFY")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- BYTE_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- IDENTIFIERS
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
304 class Parser(parser.Parser): 305 VALUES_FOLLOWED_BY_PAREN = False 306 307 FUNCTIONS = { 308 **parser.Parser.FUNCTIONS, 309 "ARBITRARY": exp.AnyValue.from_arg_list, 310 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 311 "APPROX_PERCENTILE": _build_approx_percentile, 312 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 313 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 314 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 315 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 316 "CARDINALITY": exp.ArraySize.from_arg_list, 317 "CONTAINS": exp.ArrayContains.from_arg_list, 318 "DATE_ADD": lambda args: exp.DateAdd( 319 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 320 ), 321 "DATE_DIFF": lambda args: exp.DateDiff( 322 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 323 ), 324 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 325 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 326 "DATE_TRUNC": date_trunc_to_time, 327 "DAY_OF_WEEK": exp.DayOfWeekIso.from_arg_list, 328 "ELEMENT_AT": lambda args: exp.Bracket( 329 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 330 ), 331 "FROM_HEX": exp.Unhex.from_arg_list, 332 "FROM_UNIXTIME": _build_from_unixtime, 333 "FROM_UTF8": lambda args: exp.Decode( 334 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 335 ), 336 "LEVENSHTEIN_DISTANCE": exp.Levenshtein.from_arg_list, 337 "NOW": exp.CurrentTimestamp.from_arg_list, 338 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 339 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 340 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 341 this=seq_get(args, 0), 342 expression=seq_get(args, 1), 343 replacement=seq_get(args, 2) or exp.Literal.string(""), 344 ), 345 "ROW": exp.Struct.from_arg_list, 346 "SEQUENCE": exp.GenerateSeries.from_arg_list, 347 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 348 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 349 "STRPOS": lambda args: exp.StrPosition( 350 this=seq_get(args, 0), substr=seq_get(args, 1), occurrence=seq_get(args, 2) 351 ), 352 "TO_CHAR": _build_to_char, 353 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 354 "TO_UTF8": lambda args: exp.Encode( 355 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 356 ), 357 "MD5": exp.MD5Digest.from_arg_list, 358 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 359 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 360 } 361 362 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 363 FUNCTION_PARSERS.pop("TRIM")
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
365 class Generator(generator.Generator): 366 INTERVAL_ALLOWS_PLURAL_FORM = False 367 JOIN_HINTS = False 368 TABLE_HINTS = False 369 QUERY_HINTS = False 370 IS_BOOL_ALLOWED = False 371 TZ_TO_WITH_TIME_ZONE = True 372 NVL2_SUPPORTED = False 373 STRUCT_DELIMITER = ("(", ")") 374 LIMIT_ONLY_LITERALS = True 375 SUPPORTS_SINGLE_ARG_CONCAT = False 376 LIKE_PROPERTY_INSIDE_SCHEMA = True 377 MULTI_ARG_DISTINCT = False 378 SUPPORTS_TO_NUMBER = False 379 HEX_FUNC = "TO_HEX" 380 PARSE_JSON_NAME = "JSON_PARSE" 381 PAD_FILL_PATTERN_IS_REQUIRED = True 382 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 383 SUPPORTS_MEDIAN = False 384 ARRAY_SIZE_NAME = "CARDINALITY" 385 386 PROPERTIES_LOCATION = { 387 **generator.Generator.PROPERTIES_LOCATION, 388 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 389 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 390 } 391 392 TYPE_MAPPING = { 393 **generator.Generator.TYPE_MAPPING, 394 exp.DataType.Type.BINARY: "VARBINARY", 395 exp.DataType.Type.BIT: "BOOLEAN", 396 exp.DataType.Type.DATETIME: "TIMESTAMP", 397 exp.DataType.Type.DATETIME64: "TIMESTAMP", 398 exp.DataType.Type.FLOAT: "REAL", 399 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 400 exp.DataType.Type.INT: "INTEGER", 401 exp.DataType.Type.STRUCT: "ROW", 402 exp.DataType.Type.TEXT: "VARCHAR", 403 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 404 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 405 exp.DataType.Type.TIMETZ: "TIME", 406 } 407 408 TRANSFORMS = { 409 **generator.Generator.TRANSFORMS, 410 exp.AnyValue: rename_func("ARBITRARY"), 411 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 412 exp.ArgMax: rename_func("MAX_BY"), 413 exp.ArgMin: rename_func("MIN_BY"), 414 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 415 exp.ArrayAny: rename_func("ANY_MATCH"), 416 exp.ArrayConcat: rename_func("CONCAT"), 417 exp.ArrayContains: rename_func("CONTAINS"), 418 exp.ArrayToString: rename_func("ARRAY_JOIN"), 419 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 420 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 421 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 422 exp.BitwiseLeftShift: lambda self, e: self.func( 423 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 424 ), 425 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 426 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 427 exp.BitwiseRightShift: lambda self, e: self.func( 428 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 429 ), 430 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 431 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 432 exp.CurrentTime: lambda *_: "CURRENT_TIME", 433 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 434 exp.CurrentUser: lambda *_: "CURRENT_USER", 435 exp.DateAdd: _date_delta_sql("DATE_ADD"), 436 exp.DateDiff: lambda self, e: self.func( 437 "DATE_DIFF", unit_to_str(e), e.expression, e.this 438 ), 439 exp.DateStrToDate: datestrtodate_sql, 440 exp.DateToDi: lambda self, 441 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 442 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 443 exp.DayOfWeek: lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)", 444 exp.DayOfWeekIso: rename_func("DAY_OF_WEEK"), 445 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 446 exp.DiToDate: lambda self, 447 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 448 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 449 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 450 exp.First: _first_last_sql, 451 exp.FromTimeZone: lambda self, 452 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 453 exp.GenerateSeries: sequence_sql, 454 exp.GenerateDateArray: sequence_sql, 455 exp.Group: transforms.preprocess([transforms.unalias_group]), 456 exp.If: if_sql(), 457 exp.ILike: no_ilike_sql, 458 exp.Initcap: _initcap_sql, 459 exp.Last: _first_last_sql, 460 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 461 exp.Lateral: _explode_to_unnest_sql, 462 exp.Left: left_to_substring_sql, 463 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 464 rename_func("LEVENSHTEIN_DISTANCE") 465 ), 466 exp.LogicalAnd: rename_func("BOOL_AND"), 467 exp.LogicalOr: rename_func("BOOL_OR"), 468 exp.Pivot: no_pivot_sql, 469 exp.Quantile: _quantile_sql, 470 exp.RegexpExtract: regexp_extract_sql, 471 exp.RegexpExtractAll: regexp_extract_sql, 472 exp.Right: right_to_substring_sql, 473 exp.Schema: _schema_sql, 474 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 475 exp.Select: transforms.preprocess( 476 [ 477 transforms.eliminate_qualify, 478 transforms.eliminate_distinct_on, 479 transforms.explode_projection_to_unnest(1), 480 transforms.eliminate_semi_and_anti_joins, 481 _amend_exploded_column_table, 482 ] 483 ), 484 exp.SortArray: _no_sort_array, 485 exp.StrPosition: lambda self, e: strposition_sql(self, e, supports_occurrence=True), 486 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 487 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 488 exp.StrToTime: _str_to_time_sql, 489 exp.StructExtract: struct_extract_sql, 490 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 491 exp.Timestamp: no_timestamp_sql, 492 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 493 exp.TimestampTrunc: timestamptrunc_sql(), 494 exp.TimeStrToDate: timestrtotime_sql, 495 exp.TimeStrToTime: timestrtotime_sql, 496 exp.TimeStrToUnix: lambda self, e: self.func( 497 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 498 ), 499 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 500 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 501 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 502 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 503 exp.TsOrDiToDi: lambda self, 504 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 505 exp.TsOrDsAdd: _ts_or_ds_add_sql, 506 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 507 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 508 exp.Unhex: rename_func("FROM_HEX"), 509 exp.UnixToStr: lambda self, 510 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 511 exp.UnixToTime: _unix_to_time_sql, 512 exp.UnixToTimeStr: lambda self, 513 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 514 exp.VariancePop: rename_func("VAR_POP"), 515 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 516 exp.WithinGroup: transforms.preprocess( 517 [transforms.remove_within_group_for_percentiles] 518 ), 519 exp.Xor: bool_xor_sql, 520 exp.MD5Digest: rename_func("MD5"), 521 exp.SHA: rename_func("SHA1"), 522 exp.SHA2: sha256_sql, 523 } 524 525 RESERVED_KEYWORDS = { 526 "alter", 527 "and", 528 "as", 529 "between", 530 "by", 531 "case", 532 "cast", 533 "constraint", 534 "create", 535 "cross", 536 "current_time", 537 "current_timestamp", 538 "deallocate", 539 "delete", 540 "describe", 541 "distinct", 542 "drop", 543 "else", 544 "end", 545 "escape", 546 "except", 547 "execute", 548 "exists", 549 "extract", 550 "false", 551 "for", 552 "from", 553 "full", 554 "group", 555 "having", 556 "in", 557 "inner", 558 "insert", 559 "intersect", 560 "into", 561 "is", 562 "join", 563 "left", 564 "like", 565 "natural", 566 "not", 567 "null", 568 "on", 569 "or", 570 "order", 571 "outer", 572 "prepare", 573 "right", 574 "select", 575 "table", 576 "then", 577 "true", 578 "union", 579 "using", 580 "values", 581 "when", 582 "where", 583 "with", 584 } 585 586 def md5_sql(self, expression: exp.MD5) -> str: 587 this = expression.this 588 589 if not this.type: 590 from sqlglot.optimizer.annotate_types import annotate_types 591 592 this = annotate_types(this) 593 594 if this.is_type(*exp.DataType.TEXT_TYPES): 595 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 596 597 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 598 599 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 600 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 601 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 602 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 603 # which seems to be using the same time mapping as Hive, as per: 604 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 605 this = expression.this 606 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 607 value_as_timestamp = ( 608 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 609 ) 610 611 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 612 613 formatted_value = self.func( 614 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 615 ) 616 parse_with_tz = self.func( 617 "PARSE_DATETIME", 618 formatted_value, 619 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 620 ) 621 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 622 return self.func("TO_UNIXTIME", coalesced) 623 624 def bracket_sql(self, expression: exp.Bracket) -> str: 625 if expression.args.get("safe"): 626 return self.func( 627 "ELEMENT_AT", 628 expression.this, 629 seq_get( 630 apply_index_offset( 631 expression.this, 632 expression.expressions, 633 1 - expression.args.get("offset", 0), 634 ), 635 0, 636 ), 637 ) 638 return super().bracket_sql(expression) 639 640 def struct_sql(self, expression: exp.Struct) -> str: 641 from sqlglot.optimizer.annotate_types import annotate_types 642 643 expression = annotate_types(expression) 644 values: t.List[str] = [] 645 schema: t.List[str] = [] 646 unknown_type = False 647 648 for e in expression.expressions: 649 if isinstance(e, exp.PropertyEQ): 650 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 651 unknown_type = True 652 else: 653 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 654 values.append(self.sql(e, "expression")) 655 else: 656 values.append(self.sql(e)) 657 658 size = len(expression.expressions) 659 660 if not size or len(schema) != size: 661 if unknown_type: 662 self.unsupported( 663 "Cannot convert untyped key-value definitions (try annotate_types)." 664 ) 665 return self.func("ROW", *values) 666 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 667 668 def interval_sql(self, expression: exp.Interval) -> str: 669 if expression.this and expression.text("unit").upper().startswith("WEEK"): 670 return f"({expression.this.name} * INTERVAL '7' DAY)" 671 return super().interval_sql(expression) 672 673 def transaction_sql(self, expression: exp.Transaction) -> str: 674 modes = expression.args.get("modes") 675 modes = f" {', '.join(modes)}" if modes else "" 676 return f"START TRANSACTION{modes}" 677 678 def offset_limit_modifiers( 679 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 680 ) -> t.List[str]: 681 return [ 682 self.sql(expression, "offset"), 683 self.sql(limit), 684 ] 685 686 def create_sql(self, expression: exp.Create) -> str: 687 """ 688 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 689 so we need to remove them 690 """ 691 kind = expression.args["kind"] 692 schema = expression.this 693 if kind == "VIEW" and schema.expressions: 694 expression.this.set("expressions", None) 695 return super().create_sql(expression) 696 697 def delete_sql(self, expression: exp.Delete) -> str: 698 """ 699 Presto only supports DELETE FROM for a single table without an alias, so we need 700 to remove the unnecessary parts. If the original DELETE statement contains more 701 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 702 """ 703 tables = expression.args.get("tables") or [expression.this] 704 if len(tables) > 1: 705 return super().delete_sql(expression) 706 707 table = tables[0] 708 expression.set("this", table) 709 expression.set("tables", None) 710 711 if isinstance(table, exp.Table): 712 table_alias = table.args.get("alias") 713 if table_alias: 714 table_alias.pop() 715 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 716 717 return super().delete_sql(expression) 718 719 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 720 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 721 722 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 723 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 724 if not expression.args.get("variant_extract") or is_json_extract: 725 return self.func( 726 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 727 ) 728 729 this = self.sql(expression, "this") 730 731 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 732 segments = [] 733 for path_key in expression.expression.expressions[1:]: 734 if not isinstance(path_key, exp.JSONPathKey): 735 # Cannot transpile subscripts, wildcards etc to dot notation 736 self.unsupported( 737 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 738 ) 739 continue 740 key = path_key.this 741 if not exp.SAFE_IDENTIFIER_RE.match(key): 742 key = f'"{key}"' 743 segments.append(f".{key}") 744 745 expr = "".join(segments) 746 747 return f"{this}{expr}" 748 749 def groupconcat_sql(self, expression: exp.GroupConcat) -> str: 750 return self.func( 751 "ARRAY_JOIN", 752 self.func("ARRAY_AGG", expression.this), 753 expression.args.get("separator"), 754 )
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
586 def md5_sql(self, expression: exp.MD5) -> str: 587 this = expression.this 588 589 if not this.type: 590 from sqlglot.optimizer.annotate_types import annotate_types 591 592 this = annotate_types(this) 593 594 if this.is_type(*exp.DataType.TEXT_TYPES): 595 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 596 597 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this))))
599 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 600 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 601 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 602 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 603 # which seems to be using the same time mapping as Hive, as per: 604 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 605 this = expression.this 606 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 607 value_as_timestamp = ( 608 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 609 ) 610 611 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 612 613 formatted_value = self.func( 614 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 615 ) 616 parse_with_tz = self.func( 617 "PARSE_DATETIME", 618 formatted_value, 619 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 620 ) 621 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 622 return self.func("TO_UNIXTIME", coalesced)
624 def bracket_sql(self, expression: exp.Bracket) -> str: 625 if expression.args.get("safe"): 626 return self.func( 627 "ELEMENT_AT", 628 expression.this, 629 seq_get( 630 apply_index_offset( 631 expression.this, 632 expression.expressions, 633 1 - expression.args.get("offset", 0), 634 ), 635 0, 636 ), 637 ) 638 return super().bracket_sql(expression)
640 def struct_sql(self, expression: exp.Struct) -> str: 641 from sqlglot.optimizer.annotate_types import annotate_types 642 643 expression = annotate_types(expression) 644 values: t.List[str] = [] 645 schema: t.List[str] = [] 646 unknown_type = False 647 648 for e in expression.expressions: 649 if isinstance(e, exp.PropertyEQ): 650 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 651 unknown_type = True 652 else: 653 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 654 values.append(self.sql(e, "expression")) 655 else: 656 values.append(self.sql(e)) 657 658 size = len(expression.expressions) 659 660 if not size or len(schema) != size: 661 if unknown_type: 662 self.unsupported( 663 "Cannot convert untyped key-value definitions (try annotate_types)." 664 ) 665 return self.func("ROW", *values) 666 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))"
686 def create_sql(self, expression: exp.Create) -> str: 687 """ 688 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 689 so we need to remove them 690 """ 691 kind = expression.args["kind"] 692 schema = expression.this 693 if kind == "VIEW" and schema.expressions: 694 expression.this.set("expressions", None) 695 return super().create_sql(expression)
Presto doesn't support CREATE VIEW with expressions (ex: CREATE VIEW x (cola)
then (cola)
is the expression),
so we need to remove them
697 def delete_sql(self, expression: exp.Delete) -> str: 698 """ 699 Presto only supports DELETE FROM for a single table without an alias, so we need 700 to remove the unnecessary parts. If the original DELETE statement contains more 701 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 702 """ 703 tables = expression.args.get("tables") or [expression.this] 704 if len(tables) > 1: 705 return super().delete_sql(expression) 706 707 table = tables[0] 708 expression.set("this", table) 709 expression.set("tables", None) 710 711 if isinstance(table, exp.Table): 712 table_alias = table.args.get("alias") 713 if table_alias: 714 table_alias.pop() 715 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 716 717 return super().delete_sql(expression)
Presto only supports DELETE FROM for a single table without an alias, so we need to remove the unnecessary parts. If the original DELETE statement contains more than one table to be deleted, we can't safely map it 1-1 to a Presto statement.
719 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 720 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 721 722 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 723 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 724 if not expression.args.get("variant_extract") or is_json_extract: 725 return self.func( 726 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 727 ) 728 729 this = self.sql(expression, "this") 730 731 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 732 segments = [] 733 for path_key in expression.expression.expressions[1:]: 734 if not isinstance(path_key, exp.JSONPathKey): 735 # Cannot transpile subscripts, wildcards etc to dot notation 736 self.unsupported( 737 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 738 ) 739 continue 740 key = path_key.this 741 if not exp.SAFE_IDENTIFIER_RE.match(key): 742 key = f'"{key}"' 743 segments.append(f".{key}") 744 745 expr = "".join(segments) 746 747 return f"{this}{expr}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SUPPORTED_JSON_PATH_PARTS
- CAN_IMPLEMENT_ARRAY_ANY
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- TRY_SUPPORTED
- SUPPORTS_UESCAPE
- STAR_EXCEPT
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- put_sql