sqlglot.dialects.redshift
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, transforms 6from sqlglot.dialects.dialect import ( 7 NormalizationStrategy, 8 concat_to_dpipe_sql, 9 concat_ws_to_dpipe_sql, 10 date_delta_sql, 11 generatedasidentitycolumnconstraint_sql, 12 json_extract_segments, 13 no_tablesample_sql, 14 rename_func, 15 map_date_part, 16) 17from sqlglot.dialects.postgres import Postgres 18from sqlglot.helper import seq_get 19from sqlglot.tokens import TokenType 20from sqlglot.parser import build_convert_timezone 21 22if t.TYPE_CHECKING: 23 from sqlglot._typing import E 24 25 26def _build_date_delta(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 27 def _builder(args: t.List) -> E: 28 expr = expr_type( 29 this=seq_get(args, 2), 30 expression=seq_get(args, 1), 31 unit=map_date_part(seq_get(args, 0)), 32 ) 33 if expr_type is exp.TsOrDsAdd: 34 expr.set("return_type", exp.DataType.build("TIMESTAMP")) 35 36 return expr 37 38 return _builder 39 40 41class Redshift(Postgres): 42 # https://docs.aws.amazon.com/redshift/latest/dg/r_names.html 43 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 44 45 SUPPORTS_USER_DEFINED_TYPES = False 46 INDEX_OFFSET = 0 47 COPY_PARAMS_ARE_CSV = False 48 HEX_LOWERCASE = True 49 HAS_DISTINCT_ARRAY_CONSTRUCTORS = True 50 51 # ref: https://docs.aws.amazon.com/redshift/latest/dg/r_FORMAT_strings.html 52 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 53 TIME_MAPPING = {**Postgres.TIME_MAPPING, "MON": "%b", "HH24": "%H", "HH": "%I"} 54 55 class Parser(Postgres.Parser): 56 FUNCTIONS = { 57 **Postgres.Parser.FUNCTIONS, 58 "ADD_MONTHS": lambda args: exp.TsOrDsAdd( 59 this=seq_get(args, 0), 60 expression=seq_get(args, 1), 61 unit=exp.var("month"), 62 return_type=exp.DataType.build("TIMESTAMP"), 63 ), 64 "CONVERT_TIMEZONE": lambda args: build_convert_timezone(args, "UTC"), 65 "DATEADD": _build_date_delta(exp.TsOrDsAdd), 66 "DATE_ADD": _build_date_delta(exp.TsOrDsAdd), 67 "DATEDIFF": _build_date_delta(exp.TsOrDsDiff), 68 "DATE_DIFF": _build_date_delta(exp.TsOrDsDiff), 69 "GETDATE": exp.CurrentTimestamp.from_arg_list, 70 "LISTAGG": exp.GroupConcat.from_arg_list, 71 "SPLIT_TO_ARRAY": lambda args: exp.StringToArray( 72 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string(",") 73 ), 74 "STRTOL": exp.FromBase.from_arg_list, 75 } 76 77 NO_PAREN_FUNCTION_PARSERS = { 78 **Postgres.Parser.NO_PAREN_FUNCTION_PARSERS, 79 "APPROXIMATE": lambda self: self._parse_approximate_count(), 80 "SYSDATE": lambda self: self.expression(exp.CurrentTimestamp, sysdate=True), 81 } 82 83 SUPPORTS_IMPLICIT_UNNEST = True 84 85 def _parse_table( 86 self, 87 schema: bool = False, 88 joins: bool = False, 89 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 90 parse_bracket: bool = False, 91 is_db_reference: bool = False, 92 parse_partition: bool = False, 93 consume_pipe: bool = False, 94 ) -> t.Optional[exp.Expression]: 95 # Redshift supports UNPIVOTing SUPER objects, e.g. `UNPIVOT foo.obj[0] AS val AT attr` 96 unpivot = self._match(TokenType.UNPIVOT) 97 table = super()._parse_table( 98 schema=schema, 99 joins=joins, 100 alias_tokens=alias_tokens, 101 parse_bracket=parse_bracket, 102 is_db_reference=is_db_reference, 103 ) 104 105 return self.expression(exp.Pivot, this=table, unpivot=True) if unpivot else table 106 107 def _parse_convert( 108 self, strict: bool, safe: t.Optional[bool] = None 109 ) -> t.Optional[exp.Expression]: 110 to = self._parse_types() 111 self._match(TokenType.COMMA) 112 this = self._parse_bitwise() 113 return self.expression(exp.TryCast, this=this, to=to, safe=safe) 114 115 def _parse_approximate_count(self) -> t.Optional[exp.ApproxDistinct]: 116 index = self._index - 1 117 func = self._parse_function() 118 119 if isinstance(func, exp.Count) and isinstance(func.this, exp.Distinct): 120 return self.expression(exp.ApproxDistinct, this=seq_get(func.this.expressions, 0)) 121 self._retreat(index) 122 return None 123 124 class Tokenizer(Postgres.Tokenizer): 125 BIT_STRINGS = [] 126 HEX_STRINGS = [] 127 STRING_ESCAPES = ["\\", "'"] 128 129 KEYWORDS = { 130 **Postgres.Tokenizer.KEYWORDS, 131 "(+)": TokenType.JOIN_MARKER, 132 "HLLSKETCH": TokenType.HLLSKETCH, 133 "MINUS": TokenType.EXCEPT, 134 "SUPER": TokenType.SUPER, 135 "TOP": TokenType.TOP, 136 "UNLOAD": TokenType.COMMAND, 137 "VARBYTE": TokenType.VARBINARY, 138 "BINARY VARYING": TokenType.VARBINARY, 139 } 140 KEYWORDS.pop("VALUES") 141 142 # Redshift allows # to appear as a table identifier prefix 143 SINGLE_TOKENS = Postgres.Tokenizer.SINGLE_TOKENS.copy() 144 SINGLE_TOKENS.pop("#") 145 146 class Generator(Postgres.Generator): 147 LOCKING_READS_SUPPORTED = False 148 QUERY_HINTS = False 149 VALUES_AS_TABLE = False 150 TZ_TO_WITH_TIME_ZONE = True 151 NVL2_SUPPORTED = True 152 LAST_DAY_SUPPORTS_DATE_PART = False 153 CAN_IMPLEMENT_ARRAY_ANY = False 154 MULTI_ARG_DISTINCT = True 155 COPY_PARAMS_ARE_WRAPPED = False 156 HEX_FUNC = "TO_HEX" 157 PARSE_JSON_NAME = "JSON_PARSE" 158 ARRAY_CONCAT_IS_VAR_LEN = False 159 SUPPORTS_CONVERT_TIMEZONE = True 160 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 161 SUPPORTS_MEDIAN = True 162 ALTER_SET_TYPE = "TYPE" 163 SUPPORTS_DECODE_CASE = True 164 SUPPORTS_BETWEEN_FLAGS = False 165 LIMIT_FETCH = "LIMIT" 166 167 # Redshift doesn't have `WITH` as part of their with_properties so we remove it 168 WITH_PROPERTIES_PREFIX = " " 169 170 TYPE_MAPPING = { 171 **Postgres.Generator.TYPE_MAPPING, 172 exp.DataType.Type.BINARY: "VARBYTE", 173 exp.DataType.Type.BLOB: "VARBYTE", 174 exp.DataType.Type.INT: "INTEGER", 175 exp.DataType.Type.TIMETZ: "TIME", 176 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 177 exp.DataType.Type.VARBINARY: "VARBYTE", 178 exp.DataType.Type.ROWVERSION: "VARBYTE", 179 } 180 181 TRANSFORMS = { 182 **Postgres.Generator.TRANSFORMS, 183 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CONCAT"), 184 exp.Concat: concat_to_dpipe_sql, 185 exp.ConcatWs: concat_ws_to_dpipe_sql, 186 exp.ApproxDistinct: lambda self, 187 e: f"APPROXIMATE COUNT(DISTINCT {self.sql(e, 'this')})", 188 exp.CurrentTimestamp: lambda self, e: ( 189 "SYSDATE" if e.args.get("sysdate") else "GETDATE()" 190 ), 191 exp.DateAdd: date_delta_sql("DATEADD"), 192 exp.DateDiff: date_delta_sql("DATEDIFF"), 193 exp.DistKeyProperty: lambda self, e: self.func("DISTKEY", e.this), 194 exp.DistStyleProperty: lambda self, e: self.naked_property(e), 195 exp.Explode: lambda self, e: self.explode_sql(e), 196 exp.FarmFingerprint: rename_func("FARMFINGERPRINT64"), 197 exp.FromBase: rename_func("STRTOL"), 198 exp.GeneratedAsIdentityColumnConstraint: generatedasidentitycolumnconstraint_sql, 199 exp.JSONExtract: json_extract_segments("JSON_EXTRACT_PATH_TEXT"), 200 exp.JSONExtractScalar: json_extract_segments("JSON_EXTRACT_PATH_TEXT"), 201 exp.GroupConcat: rename_func("LISTAGG"), 202 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 203 exp.Select: transforms.preprocess( 204 [ 205 transforms.eliminate_window_clause, 206 transforms.eliminate_distinct_on, 207 transforms.eliminate_semi_and_anti_joins, 208 transforms.unqualify_unnest, 209 transforms.unnest_generate_date_array_using_recursive_cte, 210 ] 211 ), 212 exp.SortKeyProperty: lambda self, 213 e: f"{'COMPOUND ' if e.args['compound'] else ''}SORTKEY({self.format_args(*e.this)})", 214 exp.StartsWith: lambda self, 215 e: f"{self.sql(e.this)} LIKE {self.sql(e.expression)} || '%'", 216 exp.StringToArray: rename_func("SPLIT_TO_ARRAY"), 217 exp.TableSample: no_tablesample_sql, 218 exp.TsOrDsAdd: date_delta_sql("DATEADD"), 219 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 220 exp.UnixToTime: lambda self, e: self._unix_to_time_sql(e), 221 } 222 223 # Postgres maps exp.Pivot to no_pivot_sql, but Redshift support pivots 224 TRANSFORMS.pop(exp.Pivot) 225 226 # Postgres doesn't support JSON_PARSE, but Redshift does 227 TRANSFORMS.pop(exp.ParseJSON) 228 229 # Redshift supports these functions 230 TRANSFORMS.pop(exp.AnyValue) 231 TRANSFORMS.pop(exp.LastDay) 232 TRANSFORMS.pop(exp.SHA2) 233 234 RESERVED_KEYWORDS = { 235 "aes128", 236 "aes256", 237 "all", 238 "allowoverwrite", 239 "analyse", 240 "analyze", 241 "and", 242 "any", 243 "array", 244 "as", 245 "asc", 246 "authorization", 247 "az64", 248 "backup", 249 "between", 250 "binary", 251 "blanksasnull", 252 "both", 253 "bytedict", 254 "bzip2", 255 "case", 256 "cast", 257 "check", 258 "collate", 259 "column", 260 "constraint", 261 "create", 262 "credentials", 263 "cross", 264 "current_date", 265 "current_time", 266 "current_timestamp", 267 "current_user", 268 "current_user_id", 269 "default", 270 "deferrable", 271 "deflate", 272 "defrag", 273 "delta", 274 "delta32k", 275 "desc", 276 "disable", 277 "distinct", 278 "do", 279 "else", 280 "emptyasnull", 281 "enable", 282 "encode", 283 "encrypt ", 284 "encryption", 285 "end", 286 "except", 287 "explicit", 288 "false", 289 "for", 290 "foreign", 291 "freeze", 292 "from", 293 "full", 294 "globaldict256", 295 "globaldict64k", 296 "grant", 297 "group", 298 "gzip", 299 "having", 300 "identity", 301 "ignore", 302 "ilike", 303 "in", 304 "initially", 305 "inner", 306 "intersect", 307 "interval", 308 "into", 309 "is", 310 "isnull", 311 "join", 312 "leading", 313 "left", 314 "like", 315 "limit", 316 "localtime", 317 "localtimestamp", 318 "lun", 319 "luns", 320 "lzo", 321 "lzop", 322 "minus", 323 "mostly16", 324 "mostly32", 325 "mostly8", 326 "natural", 327 "new", 328 "not", 329 "notnull", 330 "null", 331 "nulls", 332 "off", 333 "offline", 334 "offset", 335 "oid", 336 "old", 337 "on", 338 "only", 339 "open", 340 "or", 341 "order", 342 "outer", 343 "overlaps", 344 "parallel", 345 "partition", 346 "percent", 347 "permissions", 348 "pivot", 349 "placing", 350 "primary", 351 "raw", 352 "readratio", 353 "recover", 354 "references", 355 "rejectlog", 356 "resort", 357 "respect", 358 "restore", 359 "right", 360 "select", 361 "session_user", 362 "similar", 363 "snapshot", 364 "some", 365 "sysdate", 366 "system", 367 "table", 368 "tag", 369 "tdes", 370 "text255", 371 "text32k", 372 "then", 373 "timestamp", 374 "to", 375 "top", 376 "trailing", 377 "true", 378 "truncatecolumns", 379 "type", 380 "union", 381 "unique", 382 "unnest", 383 "unpivot", 384 "user", 385 "using", 386 "verbose", 387 "wallet", 388 "when", 389 "where", 390 "with", 391 "without", 392 } 393 394 def unnest_sql(self, expression: exp.Unnest) -> str: 395 args = expression.expressions 396 num_args = len(args) 397 398 if num_args != 1: 399 self.unsupported(f"Unsupported number of arguments in UNNEST: {num_args}") 400 return "" 401 402 if isinstance(expression.find_ancestor(exp.From, exp.Join, exp.Select), exp.Select): 403 self.unsupported("Unsupported UNNEST when not used in FROM/JOIN clauses") 404 return "" 405 406 arg = self.sql(seq_get(args, 0)) 407 408 alias = self.expressions(expression.args.get("alias"), key="columns", flat=True) 409 return f"{arg} AS {alias}" if alias else arg 410 411 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 412 if expression.is_type(exp.DataType.Type.JSON): 413 # Redshift doesn't support a JSON type, so casting to it is treated as a noop 414 return self.sql(expression, "this") 415 416 return super().cast_sql(expression, safe_prefix=safe_prefix) 417 418 def datatype_sql(self, expression: exp.DataType) -> str: 419 """ 420 Redshift converts the `TEXT` data type to `VARCHAR(255)` by default when people more generally mean 421 VARCHAR of max length which is `VARCHAR(max)` in Redshift. Therefore if we get a `TEXT` data type 422 without precision we convert it to `VARCHAR(max)` and if it does have precision then we just convert 423 `TEXT` to `VARCHAR`. 424 """ 425 if expression.is_type("text"): 426 expression.set("this", exp.DataType.Type.VARCHAR) 427 precision = expression.args.get("expressions") 428 429 if not precision: 430 expression.append("expressions", exp.var("MAX")) 431 432 return super().datatype_sql(expression) 433 434 def alterset_sql(self, expression: exp.AlterSet) -> str: 435 exprs = self.expressions(expression, flat=True) 436 exprs = f" TABLE PROPERTIES ({exprs})" if exprs else "" 437 location = self.sql(expression, "location") 438 location = f" LOCATION {location}" if location else "" 439 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 440 file_format = f" FILE FORMAT {file_format}" if file_format else "" 441 442 return f"SET{exprs}{location}{file_format}" 443 444 def array_sql(self, expression: exp.Array) -> str: 445 if expression.args.get("bracket_notation"): 446 return super().array_sql(expression) 447 448 return rename_func("ARRAY")(self, expression) 449 450 def explode_sql(self, expression: exp.Explode) -> str: 451 self.unsupported("Unsupported EXPLODE() function") 452 return "" 453 454 def _unix_to_time_sql(self, expression: exp.UnixToTime) -> str: 455 scale = expression.args.get("scale") 456 this = self.sql(expression.this) 457 458 if scale is not None and scale != exp.UnixToTime.SECONDS and scale.is_int: 459 this = f"({this} / POWER(10, {scale.to_py()}))" 460 461 return f"(TIMESTAMP 'epoch' + {this} * INTERVAL '1 SECOND')"
42class Redshift(Postgres): 43 # https://docs.aws.amazon.com/redshift/latest/dg/r_names.html 44 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 45 46 SUPPORTS_USER_DEFINED_TYPES = False 47 INDEX_OFFSET = 0 48 COPY_PARAMS_ARE_CSV = False 49 HEX_LOWERCASE = True 50 HAS_DISTINCT_ARRAY_CONSTRUCTORS = True 51 52 # ref: https://docs.aws.amazon.com/redshift/latest/dg/r_FORMAT_strings.html 53 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 54 TIME_MAPPING = {**Postgres.TIME_MAPPING, "MON": "%b", "HH24": "%H", "HH": "%I"} 55 56 class Parser(Postgres.Parser): 57 FUNCTIONS = { 58 **Postgres.Parser.FUNCTIONS, 59 "ADD_MONTHS": lambda args: exp.TsOrDsAdd( 60 this=seq_get(args, 0), 61 expression=seq_get(args, 1), 62 unit=exp.var("month"), 63 return_type=exp.DataType.build("TIMESTAMP"), 64 ), 65 "CONVERT_TIMEZONE": lambda args: build_convert_timezone(args, "UTC"), 66 "DATEADD": _build_date_delta(exp.TsOrDsAdd), 67 "DATE_ADD": _build_date_delta(exp.TsOrDsAdd), 68 "DATEDIFF": _build_date_delta(exp.TsOrDsDiff), 69 "DATE_DIFF": _build_date_delta(exp.TsOrDsDiff), 70 "GETDATE": exp.CurrentTimestamp.from_arg_list, 71 "LISTAGG": exp.GroupConcat.from_arg_list, 72 "SPLIT_TO_ARRAY": lambda args: exp.StringToArray( 73 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string(",") 74 ), 75 "STRTOL": exp.FromBase.from_arg_list, 76 } 77 78 NO_PAREN_FUNCTION_PARSERS = { 79 **Postgres.Parser.NO_PAREN_FUNCTION_PARSERS, 80 "APPROXIMATE": lambda self: self._parse_approximate_count(), 81 "SYSDATE": lambda self: self.expression(exp.CurrentTimestamp, sysdate=True), 82 } 83 84 SUPPORTS_IMPLICIT_UNNEST = True 85 86 def _parse_table( 87 self, 88 schema: bool = False, 89 joins: bool = False, 90 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 91 parse_bracket: bool = False, 92 is_db_reference: bool = False, 93 parse_partition: bool = False, 94 consume_pipe: bool = False, 95 ) -> t.Optional[exp.Expression]: 96 # Redshift supports UNPIVOTing SUPER objects, e.g. `UNPIVOT foo.obj[0] AS val AT attr` 97 unpivot = self._match(TokenType.UNPIVOT) 98 table = super()._parse_table( 99 schema=schema, 100 joins=joins, 101 alias_tokens=alias_tokens, 102 parse_bracket=parse_bracket, 103 is_db_reference=is_db_reference, 104 ) 105 106 return self.expression(exp.Pivot, this=table, unpivot=True) if unpivot else table 107 108 def _parse_convert( 109 self, strict: bool, safe: t.Optional[bool] = None 110 ) -> t.Optional[exp.Expression]: 111 to = self._parse_types() 112 self._match(TokenType.COMMA) 113 this = self._parse_bitwise() 114 return self.expression(exp.TryCast, this=this, to=to, safe=safe) 115 116 def _parse_approximate_count(self) -> t.Optional[exp.ApproxDistinct]: 117 index = self._index - 1 118 func = self._parse_function() 119 120 if isinstance(func, exp.Count) and isinstance(func.this, exp.Distinct): 121 return self.expression(exp.ApproxDistinct, this=seq_get(func.this.expressions, 0)) 122 self._retreat(index) 123 return None 124 125 class Tokenizer(Postgres.Tokenizer): 126 BIT_STRINGS = [] 127 HEX_STRINGS = [] 128 STRING_ESCAPES = ["\\", "'"] 129 130 KEYWORDS = { 131 **Postgres.Tokenizer.KEYWORDS, 132 "(+)": TokenType.JOIN_MARKER, 133 "HLLSKETCH": TokenType.HLLSKETCH, 134 "MINUS": TokenType.EXCEPT, 135 "SUPER": TokenType.SUPER, 136 "TOP": TokenType.TOP, 137 "UNLOAD": TokenType.COMMAND, 138 "VARBYTE": TokenType.VARBINARY, 139 "BINARY VARYING": TokenType.VARBINARY, 140 } 141 KEYWORDS.pop("VALUES") 142 143 # Redshift allows # to appear as a table identifier prefix 144 SINGLE_TOKENS = Postgres.Tokenizer.SINGLE_TOKENS.copy() 145 SINGLE_TOKENS.pop("#") 146 147 class Generator(Postgres.Generator): 148 LOCKING_READS_SUPPORTED = False 149 QUERY_HINTS = False 150 VALUES_AS_TABLE = False 151 TZ_TO_WITH_TIME_ZONE = True 152 NVL2_SUPPORTED = True 153 LAST_DAY_SUPPORTS_DATE_PART = False 154 CAN_IMPLEMENT_ARRAY_ANY = False 155 MULTI_ARG_DISTINCT = True 156 COPY_PARAMS_ARE_WRAPPED = False 157 HEX_FUNC = "TO_HEX" 158 PARSE_JSON_NAME = "JSON_PARSE" 159 ARRAY_CONCAT_IS_VAR_LEN = False 160 SUPPORTS_CONVERT_TIMEZONE = True 161 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 162 SUPPORTS_MEDIAN = True 163 ALTER_SET_TYPE = "TYPE" 164 SUPPORTS_DECODE_CASE = True 165 SUPPORTS_BETWEEN_FLAGS = False 166 LIMIT_FETCH = "LIMIT" 167 168 # Redshift doesn't have `WITH` as part of their with_properties so we remove it 169 WITH_PROPERTIES_PREFIX = " " 170 171 TYPE_MAPPING = { 172 **Postgres.Generator.TYPE_MAPPING, 173 exp.DataType.Type.BINARY: "VARBYTE", 174 exp.DataType.Type.BLOB: "VARBYTE", 175 exp.DataType.Type.INT: "INTEGER", 176 exp.DataType.Type.TIMETZ: "TIME", 177 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 178 exp.DataType.Type.VARBINARY: "VARBYTE", 179 exp.DataType.Type.ROWVERSION: "VARBYTE", 180 } 181 182 TRANSFORMS = { 183 **Postgres.Generator.TRANSFORMS, 184 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CONCAT"), 185 exp.Concat: concat_to_dpipe_sql, 186 exp.ConcatWs: concat_ws_to_dpipe_sql, 187 exp.ApproxDistinct: lambda self, 188 e: f"APPROXIMATE COUNT(DISTINCT {self.sql(e, 'this')})", 189 exp.CurrentTimestamp: lambda self, e: ( 190 "SYSDATE" if e.args.get("sysdate") else "GETDATE()" 191 ), 192 exp.DateAdd: date_delta_sql("DATEADD"), 193 exp.DateDiff: date_delta_sql("DATEDIFF"), 194 exp.DistKeyProperty: lambda self, e: self.func("DISTKEY", e.this), 195 exp.DistStyleProperty: lambda self, e: self.naked_property(e), 196 exp.Explode: lambda self, e: self.explode_sql(e), 197 exp.FarmFingerprint: rename_func("FARMFINGERPRINT64"), 198 exp.FromBase: rename_func("STRTOL"), 199 exp.GeneratedAsIdentityColumnConstraint: generatedasidentitycolumnconstraint_sql, 200 exp.JSONExtract: json_extract_segments("JSON_EXTRACT_PATH_TEXT"), 201 exp.JSONExtractScalar: json_extract_segments("JSON_EXTRACT_PATH_TEXT"), 202 exp.GroupConcat: rename_func("LISTAGG"), 203 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 204 exp.Select: transforms.preprocess( 205 [ 206 transforms.eliminate_window_clause, 207 transforms.eliminate_distinct_on, 208 transforms.eliminate_semi_and_anti_joins, 209 transforms.unqualify_unnest, 210 transforms.unnest_generate_date_array_using_recursive_cte, 211 ] 212 ), 213 exp.SortKeyProperty: lambda self, 214 e: f"{'COMPOUND ' if e.args['compound'] else ''}SORTKEY({self.format_args(*e.this)})", 215 exp.StartsWith: lambda self, 216 e: f"{self.sql(e.this)} LIKE {self.sql(e.expression)} || '%'", 217 exp.StringToArray: rename_func("SPLIT_TO_ARRAY"), 218 exp.TableSample: no_tablesample_sql, 219 exp.TsOrDsAdd: date_delta_sql("DATEADD"), 220 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 221 exp.UnixToTime: lambda self, e: self._unix_to_time_sql(e), 222 } 223 224 # Postgres maps exp.Pivot to no_pivot_sql, but Redshift support pivots 225 TRANSFORMS.pop(exp.Pivot) 226 227 # Postgres doesn't support JSON_PARSE, but Redshift does 228 TRANSFORMS.pop(exp.ParseJSON) 229 230 # Redshift supports these functions 231 TRANSFORMS.pop(exp.AnyValue) 232 TRANSFORMS.pop(exp.LastDay) 233 TRANSFORMS.pop(exp.SHA2) 234 235 RESERVED_KEYWORDS = { 236 "aes128", 237 "aes256", 238 "all", 239 "allowoverwrite", 240 "analyse", 241 "analyze", 242 "and", 243 "any", 244 "array", 245 "as", 246 "asc", 247 "authorization", 248 "az64", 249 "backup", 250 "between", 251 "binary", 252 "blanksasnull", 253 "both", 254 "bytedict", 255 "bzip2", 256 "case", 257 "cast", 258 "check", 259 "collate", 260 "column", 261 "constraint", 262 "create", 263 "credentials", 264 "cross", 265 "current_date", 266 "current_time", 267 "current_timestamp", 268 "current_user", 269 "current_user_id", 270 "default", 271 "deferrable", 272 "deflate", 273 "defrag", 274 "delta", 275 "delta32k", 276 "desc", 277 "disable", 278 "distinct", 279 "do", 280 "else", 281 "emptyasnull", 282 "enable", 283 "encode", 284 "encrypt ", 285 "encryption", 286 "end", 287 "except", 288 "explicit", 289 "false", 290 "for", 291 "foreign", 292 "freeze", 293 "from", 294 "full", 295 "globaldict256", 296 "globaldict64k", 297 "grant", 298 "group", 299 "gzip", 300 "having", 301 "identity", 302 "ignore", 303 "ilike", 304 "in", 305 "initially", 306 "inner", 307 "intersect", 308 "interval", 309 "into", 310 "is", 311 "isnull", 312 "join", 313 "leading", 314 "left", 315 "like", 316 "limit", 317 "localtime", 318 "localtimestamp", 319 "lun", 320 "luns", 321 "lzo", 322 "lzop", 323 "minus", 324 "mostly16", 325 "mostly32", 326 "mostly8", 327 "natural", 328 "new", 329 "not", 330 "notnull", 331 "null", 332 "nulls", 333 "off", 334 "offline", 335 "offset", 336 "oid", 337 "old", 338 "on", 339 "only", 340 "open", 341 "or", 342 "order", 343 "outer", 344 "overlaps", 345 "parallel", 346 "partition", 347 "percent", 348 "permissions", 349 "pivot", 350 "placing", 351 "primary", 352 "raw", 353 "readratio", 354 "recover", 355 "references", 356 "rejectlog", 357 "resort", 358 "respect", 359 "restore", 360 "right", 361 "select", 362 "session_user", 363 "similar", 364 "snapshot", 365 "some", 366 "sysdate", 367 "system", 368 "table", 369 "tag", 370 "tdes", 371 "text255", 372 "text32k", 373 "then", 374 "timestamp", 375 "to", 376 "top", 377 "trailing", 378 "true", 379 "truncatecolumns", 380 "type", 381 "union", 382 "unique", 383 "unnest", 384 "unpivot", 385 "user", 386 "using", 387 "verbose", 388 "wallet", 389 "when", 390 "where", 391 "with", 392 "without", 393 } 394 395 def unnest_sql(self, expression: exp.Unnest) -> str: 396 args = expression.expressions 397 num_args = len(args) 398 399 if num_args != 1: 400 self.unsupported(f"Unsupported number of arguments in UNNEST: {num_args}") 401 return "" 402 403 if isinstance(expression.find_ancestor(exp.From, exp.Join, exp.Select), exp.Select): 404 self.unsupported("Unsupported UNNEST when not used in FROM/JOIN clauses") 405 return "" 406 407 arg = self.sql(seq_get(args, 0)) 408 409 alias = self.expressions(expression.args.get("alias"), key="columns", flat=True) 410 return f"{arg} AS {alias}" if alias else arg 411 412 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 413 if expression.is_type(exp.DataType.Type.JSON): 414 # Redshift doesn't support a JSON type, so casting to it is treated as a noop 415 return self.sql(expression, "this") 416 417 return super().cast_sql(expression, safe_prefix=safe_prefix) 418 419 def datatype_sql(self, expression: exp.DataType) -> str: 420 """ 421 Redshift converts the `TEXT` data type to `VARCHAR(255)` by default when people more generally mean 422 VARCHAR of max length which is `VARCHAR(max)` in Redshift. Therefore if we get a `TEXT` data type 423 without precision we convert it to `VARCHAR(max)` and if it does have precision then we just convert 424 `TEXT` to `VARCHAR`. 425 """ 426 if expression.is_type("text"): 427 expression.set("this", exp.DataType.Type.VARCHAR) 428 precision = expression.args.get("expressions") 429 430 if not precision: 431 expression.append("expressions", exp.var("MAX")) 432 433 return super().datatype_sql(expression) 434 435 def alterset_sql(self, expression: exp.AlterSet) -> str: 436 exprs = self.expressions(expression, flat=True) 437 exprs = f" TABLE PROPERTIES ({exprs})" if exprs else "" 438 location = self.sql(expression, "location") 439 location = f" LOCATION {location}" if location else "" 440 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 441 file_format = f" FILE FORMAT {file_format}" if file_format else "" 442 443 return f"SET{exprs}{location}{file_format}" 444 445 def array_sql(self, expression: exp.Array) -> str: 446 if expression.args.get("bracket_notation"): 447 return super().array_sql(expression) 448 449 return rename_func("ARRAY")(self, expression) 450 451 def explode_sql(self, expression: exp.Explode) -> str: 452 self.unsupported("Unsupported EXPLODE() function") 453 return "" 454 455 def _unix_to_time_sql(self, expression: exp.UnixToTime) -> str: 456 scale = expression.args.get("scale") 457 this = self.sql(expression.this) 458 459 if scale is not None and scale != exp.UnixToTime.SECONDS and scale.is_int: 460 this = f"({this} / POWER(10, {scale.to_py()}))" 461 462 return f"(TIMESTAMP 'epoch' + {this} * INTERVAL '1 SECOND')"
Specifies the strategy according to which identifiers should be normalized.
Whether the ARRAY constructor is context-sensitive, i.e in Redshift ARRAY[1, 2, 3] != ARRAY(1, 2, 3) as the former is of type INT[] vs the latter which is SUPER
Associates this dialect's time formats with their equivalent Python strftime
formats.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
56 class Parser(Postgres.Parser): 57 FUNCTIONS = { 58 **Postgres.Parser.FUNCTIONS, 59 "ADD_MONTHS": lambda args: exp.TsOrDsAdd( 60 this=seq_get(args, 0), 61 expression=seq_get(args, 1), 62 unit=exp.var("month"), 63 return_type=exp.DataType.build("TIMESTAMP"), 64 ), 65 "CONVERT_TIMEZONE": lambda args: build_convert_timezone(args, "UTC"), 66 "DATEADD": _build_date_delta(exp.TsOrDsAdd), 67 "DATE_ADD": _build_date_delta(exp.TsOrDsAdd), 68 "DATEDIFF": _build_date_delta(exp.TsOrDsDiff), 69 "DATE_DIFF": _build_date_delta(exp.TsOrDsDiff), 70 "GETDATE": exp.CurrentTimestamp.from_arg_list, 71 "LISTAGG": exp.GroupConcat.from_arg_list, 72 "SPLIT_TO_ARRAY": lambda args: exp.StringToArray( 73 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string(",") 74 ), 75 "STRTOL": exp.FromBase.from_arg_list, 76 } 77 78 NO_PAREN_FUNCTION_PARSERS = { 79 **Postgres.Parser.NO_PAREN_FUNCTION_PARSERS, 80 "APPROXIMATE": lambda self: self._parse_approximate_count(), 81 "SYSDATE": lambda self: self.expression(exp.CurrentTimestamp, sysdate=True), 82 } 83 84 SUPPORTS_IMPLICIT_UNNEST = True 85 86 def _parse_table( 87 self, 88 schema: bool = False, 89 joins: bool = False, 90 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 91 parse_bracket: bool = False, 92 is_db_reference: bool = False, 93 parse_partition: bool = False, 94 consume_pipe: bool = False, 95 ) -> t.Optional[exp.Expression]: 96 # Redshift supports UNPIVOTing SUPER objects, e.g. `UNPIVOT foo.obj[0] AS val AT attr` 97 unpivot = self._match(TokenType.UNPIVOT) 98 table = super()._parse_table( 99 schema=schema, 100 joins=joins, 101 alias_tokens=alias_tokens, 102 parse_bracket=parse_bracket, 103 is_db_reference=is_db_reference, 104 ) 105 106 return self.expression(exp.Pivot, this=table, unpivot=True) if unpivot else table 107 108 def _parse_convert( 109 self, strict: bool, safe: t.Optional[bool] = None 110 ) -> t.Optional[exp.Expression]: 111 to = self._parse_types() 112 self._match(TokenType.COMMA) 113 this = self._parse_bitwise() 114 return self.expression(exp.TryCast, this=this, to=to, safe=safe) 115 116 def _parse_approximate_count(self) -> t.Optional[exp.ApproxDistinct]: 117 index = self._index - 1 118 func = self._parse_function() 119 120 if isinstance(func, exp.Count) and isinstance(func.this, exp.Distinct): 121 return self.expression(exp.ApproxDistinct, this=seq_get(func.this.expressions, 0)) 122 self._retreat(index) 123 return None
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- CAST_COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- QUERY_MODIFIER_TOKENS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- JOINS_HAVE_EQUAL_PRECEDENCE
- ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
- MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS
- JSON_EXTRACT_REQUIRES_JSON_EXPRESSION
- ADD_JOIN_ON_TRUE
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- build_cast
- errors
- sql
125 class Tokenizer(Postgres.Tokenizer): 126 BIT_STRINGS = [] 127 HEX_STRINGS = [] 128 STRING_ESCAPES = ["\\", "'"] 129 130 KEYWORDS = { 131 **Postgres.Tokenizer.KEYWORDS, 132 "(+)": TokenType.JOIN_MARKER, 133 "HLLSKETCH": TokenType.HLLSKETCH, 134 "MINUS": TokenType.EXCEPT, 135 "SUPER": TokenType.SUPER, 136 "TOP": TokenType.TOP, 137 "UNLOAD": TokenType.COMMAND, 138 "VARBYTE": TokenType.VARBINARY, 139 "BINARY VARYING": TokenType.VARBINARY, 140 } 141 KEYWORDS.pop("VALUES") 142 143 # Redshift allows # to appear as a table identifier prefix 144 SINGLE_TOKENS = Postgres.Tokenizer.SINGLE_TOKENS.copy() 145 SINGLE_TOKENS.pop("#")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- IDENTIFIER_ESCAPES
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
147 class Generator(Postgres.Generator): 148 LOCKING_READS_SUPPORTED = False 149 QUERY_HINTS = False 150 VALUES_AS_TABLE = False 151 TZ_TO_WITH_TIME_ZONE = True 152 NVL2_SUPPORTED = True 153 LAST_DAY_SUPPORTS_DATE_PART = False 154 CAN_IMPLEMENT_ARRAY_ANY = False 155 MULTI_ARG_DISTINCT = True 156 COPY_PARAMS_ARE_WRAPPED = False 157 HEX_FUNC = "TO_HEX" 158 PARSE_JSON_NAME = "JSON_PARSE" 159 ARRAY_CONCAT_IS_VAR_LEN = False 160 SUPPORTS_CONVERT_TIMEZONE = True 161 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 162 SUPPORTS_MEDIAN = True 163 ALTER_SET_TYPE = "TYPE" 164 SUPPORTS_DECODE_CASE = True 165 SUPPORTS_BETWEEN_FLAGS = False 166 LIMIT_FETCH = "LIMIT" 167 168 # Redshift doesn't have `WITH` as part of their with_properties so we remove it 169 WITH_PROPERTIES_PREFIX = " " 170 171 TYPE_MAPPING = { 172 **Postgres.Generator.TYPE_MAPPING, 173 exp.DataType.Type.BINARY: "VARBYTE", 174 exp.DataType.Type.BLOB: "VARBYTE", 175 exp.DataType.Type.INT: "INTEGER", 176 exp.DataType.Type.TIMETZ: "TIME", 177 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 178 exp.DataType.Type.VARBINARY: "VARBYTE", 179 exp.DataType.Type.ROWVERSION: "VARBYTE", 180 } 181 182 TRANSFORMS = { 183 **Postgres.Generator.TRANSFORMS, 184 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CONCAT"), 185 exp.Concat: concat_to_dpipe_sql, 186 exp.ConcatWs: concat_ws_to_dpipe_sql, 187 exp.ApproxDistinct: lambda self, 188 e: f"APPROXIMATE COUNT(DISTINCT {self.sql(e, 'this')})", 189 exp.CurrentTimestamp: lambda self, e: ( 190 "SYSDATE" if e.args.get("sysdate") else "GETDATE()" 191 ), 192 exp.DateAdd: date_delta_sql("DATEADD"), 193 exp.DateDiff: date_delta_sql("DATEDIFF"), 194 exp.DistKeyProperty: lambda self, e: self.func("DISTKEY", e.this), 195 exp.DistStyleProperty: lambda self, e: self.naked_property(e), 196 exp.Explode: lambda self, e: self.explode_sql(e), 197 exp.FarmFingerprint: rename_func("FARMFINGERPRINT64"), 198 exp.FromBase: rename_func("STRTOL"), 199 exp.GeneratedAsIdentityColumnConstraint: generatedasidentitycolumnconstraint_sql, 200 exp.JSONExtract: json_extract_segments("JSON_EXTRACT_PATH_TEXT"), 201 exp.JSONExtractScalar: json_extract_segments("JSON_EXTRACT_PATH_TEXT"), 202 exp.GroupConcat: rename_func("LISTAGG"), 203 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 204 exp.Select: transforms.preprocess( 205 [ 206 transforms.eliminate_window_clause, 207 transforms.eliminate_distinct_on, 208 transforms.eliminate_semi_and_anti_joins, 209 transforms.unqualify_unnest, 210 transforms.unnest_generate_date_array_using_recursive_cte, 211 ] 212 ), 213 exp.SortKeyProperty: lambda self, 214 e: f"{'COMPOUND ' if e.args['compound'] else ''}SORTKEY({self.format_args(*e.this)})", 215 exp.StartsWith: lambda self, 216 e: f"{self.sql(e.this)} LIKE {self.sql(e.expression)} || '%'", 217 exp.StringToArray: rename_func("SPLIT_TO_ARRAY"), 218 exp.TableSample: no_tablesample_sql, 219 exp.TsOrDsAdd: date_delta_sql("DATEADD"), 220 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 221 exp.UnixToTime: lambda self, e: self._unix_to_time_sql(e), 222 } 223 224 # Postgres maps exp.Pivot to no_pivot_sql, but Redshift support pivots 225 TRANSFORMS.pop(exp.Pivot) 226 227 # Postgres doesn't support JSON_PARSE, but Redshift does 228 TRANSFORMS.pop(exp.ParseJSON) 229 230 # Redshift supports these functions 231 TRANSFORMS.pop(exp.AnyValue) 232 TRANSFORMS.pop(exp.LastDay) 233 TRANSFORMS.pop(exp.SHA2) 234 235 RESERVED_KEYWORDS = { 236 "aes128", 237 "aes256", 238 "all", 239 "allowoverwrite", 240 "analyse", 241 "analyze", 242 "and", 243 "any", 244 "array", 245 "as", 246 "asc", 247 "authorization", 248 "az64", 249 "backup", 250 "between", 251 "binary", 252 "blanksasnull", 253 "both", 254 "bytedict", 255 "bzip2", 256 "case", 257 "cast", 258 "check", 259 "collate", 260 "column", 261 "constraint", 262 "create", 263 "credentials", 264 "cross", 265 "current_date", 266 "current_time", 267 "current_timestamp", 268 "current_user", 269 "current_user_id", 270 "default", 271 "deferrable", 272 "deflate", 273 "defrag", 274 "delta", 275 "delta32k", 276 "desc", 277 "disable", 278 "distinct", 279 "do", 280 "else", 281 "emptyasnull", 282 "enable", 283 "encode", 284 "encrypt ", 285 "encryption", 286 "end", 287 "except", 288 "explicit", 289 "false", 290 "for", 291 "foreign", 292 "freeze", 293 "from", 294 "full", 295 "globaldict256", 296 "globaldict64k", 297 "grant", 298 "group", 299 "gzip", 300 "having", 301 "identity", 302 "ignore", 303 "ilike", 304 "in", 305 "initially", 306 "inner", 307 "intersect", 308 "interval", 309 "into", 310 "is", 311 "isnull", 312 "join", 313 "leading", 314 "left", 315 "like", 316 "limit", 317 "localtime", 318 "localtimestamp", 319 "lun", 320 "luns", 321 "lzo", 322 "lzop", 323 "minus", 324 "mostly16", 325 "mostly32", 326 "mostly8", 327 "natural", 328 "new", 329 "not", 330 "notnull", 331 "null", 332 "nulls", 333 "off", 334 "offline", 335 "offset", 336 "oid", 337 "old", 338 "on", 339 "only", 340 "open", 341 "or", 342 "order", 343 "outer", 344 "overlaps", 345 "parallel", 346 "partition", 347 "percent", 348 "permissions", 349 "pivot", 350 "placing", 351 "primary", 352 "raw", 353 "readratio", 354 "recover", 355 "references", 356 "rejectlog", 357 "resort", 358 "respect", 359 "restore", 360 "right", 361 "select", 362 "session_user", 363 "similar", 364 "snapshot", 365 "some", 366 "sysdate", 367 "system", 368 "table", 369 "tag", 370 "tdes", 371 "text255", 372 "text32k", 373 "then", 374 "timestamp", 375 "to", 376 "top", 377 "trailing", 378 "true", 379 "truncatecolumns", 380 "type", 381 "union", 382 "unique", 383 "unnest", 384 "unpivot", 385 "user", 386 "using", 387 "verbose", 388 "wallet", 389 "when", 390 "where", 391 "with", 392 "without", 393 } 394 395 def unnest_sql(self, expression: exp.Unnest) -> str: 396 args = expression.expressions 397 num_args = len(args) 398 399 if num_args != 1: 400 self.unsupported(f"Unsupported number of arguments in UNNEST: {num_args}") 401 return "" 402 403 if isinstance(expression.find_ancestor(exp.From, exp.Join, exp.Select), exp.Select): 404 self.unsupported("Unsupported UNNEST when not used in FROM/JOIN clauses") 405 return "" 406 407 arg = self.sql(seq_get(args, 0)) 408 409 alias = self.expressions(expression.args.get("alias"), key="columns", flat=True) 410 return f"{arg} AS {alias}" if alias else arg 411 412 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 413 if expression.is_type(exp.DataType.Type.JSON): 414 # Redshift doesn't support a JSON type, so casting to it is treated as a noop 415 return self.sql(expression, "this") 416 417 return super().cast_sql(expression, safe_prefix=safe_prefix) 418 419 def datatype_sql(self, expression: exp.DataType) -> str: 420 """ 421 Redshift converts the `TEXT` data type to `VARCHAR(255)` by default when people more generally mean 422 VARCHAR of max length which is `VARCHAR(max)` in Redshift. Therefore if we get a `TEXT` data type 423 without precision we convert it to `VARCHAR(max)` and if it does have precision then we just convert 424 `TEXT` to `VARCHAR`. 425 """ 426 if expression.is_type("text"): 427 expression.set("this", exp.DataType.Type.VARCHAR) 428 precision = expression.args.get("expressions") 429 430 if not precision: 431 expression.append("expressions", exp.var("MAX")) 432 433 return super().datatype_sql(expression) 434 435 def alterset_sql(self, expression: exp.AlterSet) -> str: 436 exprs = self.expressions(expression, flat=True) 437 exprs = f" TABLE PROPERTIES ({exprs})" if exprs else "" 438 location = self.sql(expression, "location") 439 location = f" LOCATION {location}" if location else "" 440 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 441 file_format = f" FILE FORMAT {file_format}" if file_format else "" 442 443 return f"SET{exprs}{location}{file_format}" 444 445 def array_sql(self, expression: exp.Array) -> str: 446 if expression.args.get("bracket_notation"): 447 return super().array_sql(expression) 448 449 return rename_func("ARRAY")(self, expression) 450 451 def explode_sql(self, expression: exp.Explode) -> str: 452 self.unsupported("Unsupported EXPLODE() function") 453 return "" 454 455 def _unix_to_time_sql(self, expression: exp.UnixToTime) -> str: 456 scale = expression.args.get("scale") 457 this = self.sql(expression.this) 458 459 if scale is not None and scale != exp.UnixToTime.SECONDS and scale.is_int: 460 this = f"({this} / POWER(10, {scale.to_py()}))" 461 462 return f"(TIMESTAMP 'epoch' + {this} * INTERVAL '1 SECOND')"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
395 def unnest_sql(self, expression: exp.Unnest) -> str: 396 args = expression.expressions 397 num_args = len(args) 398 399 if num_args != 1: 400 self.unsupported(f"Unsupported number of arguments in UNNEST: {num_args}") 401 return "" 402 403 if isinstance(expression.find_ancestor(exp.From, exp.Join, exp.Select), exp.Select): 404 self.unsupported("Unsupported UNNEST when not used in FROM/JOIN clauses") 405 return "" 406 407 arg = self.sql(seq_get(args, 0)) 408 409 alias = self.expressions(expression.args.get("alias"), key="columns", flat=True) 410 return f"{arg} AS {alias}" if alias else arg
412 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 413 if expression.is_type(exp.DataType.Type.JSON): 414 # Redshift doesn't support a JSON type, so casting to it is treated as a noop 415 return self.sql(expression, "this") 416 417 return super().cast_sql(expression, safe_prefix=safe_prefix)
419 def datatype_sql(self, expression: exp.DataType) -> str: 420 """ 421 Redshift converts the `TEXT` data type to `VARCHAR(255)` by default when people more generally mean 422 VARCHAR of max length which is `VARCHAR(max)` in Redshift. Therefore if we get a `TEXT` data type 423 without precision we convert it to `VARCHAR(max)` and if it does have precision then we just convert 424 `TEXT` to `VARCHAR`. 425 """ 426 if expression.is_type("text"): 427 expression.set("this", exp.DataType.Type.VARCHAR) 428 precision = expression.args.get("expressions") 429 430 if not precision: 431 expression.append("expressions", exp.var("MAX")) 432 433 return super().datatype_sql(expression)
Redshift converts the TEXT
data type to VARCHAR(255)
by default when people more generally mean
VARCHAR of max length which is VARCHAR(max)
in Redshift. Therefore if we get a TEXT
data type
without precision we convert it to VARCHAR(max)
and if it does have precision then we just convert
TEXT
to VARCHAR
.
435 def alterset_sql(self, expression: exp.AlterSet) -> str: 436 exprs = self.expressions(expression, flat=True) 437 exprs = f" TABLE PROPERTIES ({exprs})" if exprs else "" 438 location = self.sql(expression, "location") 439 location = f" LOCATION {location}" if location else "" 440 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 441 file_format = f" FILE FORMAT {file_format}" if file_format else "" 442 443 return f"SET{exprs}{location}{file_format}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_CREATE_TABLE_LIKE
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SUPPORTS_TO_NUMBER
- SET_OP_MODIFIERS
- COPY_PARAMS_EQ_REQUIRED
- UNICODE_SUBSTITUTE
- STAR_EXCEPT
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_EXPLODING_PROJECTIONS
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- NORMALIZE_EXTRACT_DATE_PARTS
- ARRAY_SIZE_NAME
- SUPPORTS_LIKE_QUANTIFIERS
- MATCH_AGAINST_TABLE_PREFIX
- UNSUPPORTED_TYPES
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS
- SAFE_JSON_PATH_KEY_RE
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- queryband_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- formatphrase_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alter_sql
- altersession_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- is_sql
- like_sql
- ilike_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- generateembedding_sql
- mltranslate_sql
- mlforecast_sql
- featuresattime_sql
- vectorsearch_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- revoke_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- install_sql
- get_put_sql
- translatecharacters_sql
- decodecase_sql
- semanticview_sql
- getextract_sql
- datefromunixdate_sql
- space_sql
- buildproperty_sql
- refreshtriggerproperty_sql
- modelattribute_sql
- sqlglot.dialects.postgres.Postgres.Generator
- SINGLE_STRING_INTERVAL
- RENAME_TABLE_WITH_DB
- JOIN_HINTS
- TABLE_HINTS
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_SEED_KEYWORD
- SUPPORTS_SELECT_INTO
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- SUPPORTS_WINDOW_EXCLUDE
- COPY_HAS_INTO_KEYWORD
- ARRAY_SIZE_DIM_REQUIRED
- SUPPORTED_JSON_PATH_PARTS
- PROPERTIES_LOCATION
- round_sql
- schemacommentproperty_sql
- commentcolumnconstraint_sql
- bracket_sql
- matchagainst_sql
- computedcolumnconstraint_sql
- isascii_sql
- currentschema_sql
- interval_sql
- placeholder_sql
- arraycontains_sql