sqlglot.parsers.athena
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp 6from sqlglot.errors import ErrorLevel 7from sqlglot.parser import Parser 8from sqlglot.parsers.trino import TrinoParser 9from sqlglot.tokens import TokenType, Token 10 11if t.TYPE_CHECKING: 12 from sqlglot.dialects.dialect import DialectType 13 from sqlglot.dialects.hive import Hive 14 from sqlglot.dialects.trino import Trino 15 16 17class AthenaTrinoParser(TrinoParser): 18 STATEMENT_PARSERS = { 19 **TrinoParser.STATEMENT_PARSERS, 20 TokenType.USING: lambda self: self._parse_as_command(self._prev), 21 } 22 23 24class AthenaParser(Parser): 25 def __init__( 26 self, 27 error_level: ErrorLevel | None = None, 28 error_message_context: int = 100, 29 max_errors: int = 3, 30 dialect: DialectType = None, 31 hive: Hive | None = None, 32 trino: Trino | None = None, 33 ) -> None: 34 from sqlglot.dialects.hive import Hive 35 from sqlglot.dialects.trino import Trino 36 37 hive = hive or Hive() 38 trino = trino or Trino() 39 40 super().__init__( 41 error_level=error_level, 42 error_message_context=error_message_context, 43 max_errors=max_errors, 44 dialect=dialect, 45 ) 46 47 self._hive_parser = hive.parser( 48 error_level=error_level, 49 error_message_context=error_message_context, 50 max_errors=max_errors, 51 ) 52 self._trino_parser = AthenaTrinoParser( 53 error_level=error_level, 54 error_message_context=error_message_context, 55 max_errors=max_errors, 56 dialect=trino, 57 ) 58 59 def parse(self, raw_tokens: list[Token], sql: str) -> list[exp.Expr | None]: 60 if raw_tokens and raw_tokens[0].token_type == TokenType.HIVE_TOKEN_STREAM: 61 return self._hive_parser.parse(raw_tokens[1:], sql) 62 63 return self._trino_parser.parse(raw_tokens, sql) 64 65 def parse_into( 66 self, 67 expression_types: exp.IntoType, 68 raw_tokens: list[Token], 69 sql: str | None = None, 70 ) -> list[exp.Expr | None]: 71 if raw_tokens and raw_tokens[0].token_type == TokenType.HIVE_TOKEN_STREAM: 72 return self._hive_parser.parse_into(expression_types, raw_tokens[1:], sql) 73 74 return self._trino_parser.parse_into(expression_types, raw_tokens, sql)
18class AthenaTrinoParser(TrinoParser): 19 STATEMENT_PARSERS = { 20 **TrinoParser.STATEMENT_PARSERS, 21 TokenType.USING: lambda self: self._parse_as_command(self._prev), 22 }
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
- max_nodes: Maximum number of AST nodes to prevent memory exhaustion. Set to -1 (default) to disable the check.
STATEMENT_PARSERS =
{<TokenType.ALTER: 217>: <function Parser.<lambda>>, <TokenType.ANALYZE: 435>: <function Parser.<lambda>>, <TokenType.BEGIN: 227>: <function Parser.<lambda>>, <TokenType.CACHE: 230>: <function Parser.<lambda>>, <TokenType.COMMENT: 236>: <function Parser.<lambda>>, <TokenType.COMMIT: 237>: <function Parser.<lambda>>, <TokenType.COPY: 240>: <function Parser.<lambda>>, <TokenType.CREATE: 241>: <function Parser.<lambda>>, <TokenType.DELETE: 254>: <function Parser.<lambda>>, <TokenType.DESC: 255>: <function Parser.<lambda>>, <TokenType.DESCRIBE: 256>: <function Parser.<lambda>>, <TokenType.DROP: 262>: <function Parser.<lambda>>, <TokenType.GRANT: 286>: <function Parser.<lambda>>, <TokenType.REVOKE: 373>: <function Parser.<lambda>>, <TokenType.INSERT: 297>: <function Parser.<lambda>>, <TokenType.KILL: 311>: <function Parser.<lambda>>, <TokenType.LOAD: 318>: <function Parser.<lambda>>, <TokenType.MERGE: 325>: <function Parser.<lambda>>, <TokenType.PIVOT: 352>: <function Parser.<lambda>>, <TokenType.PRAGMA: 357>: <function Parser.<lambda>>, <TokenType.REFRESH: 369>: <function Parser.<lambda>>, <TokenType.ROLLBACK: 378>: <function Parser.<lambda>>, <TokenType.SET: 388>: <function Parser.<lambda>>, <TokenType.TRUNCATE: 407>: <function Parser.<lambda>>, <TokenType.UNCACHE: 409>: <function Parser.<lambda>>, <TokenType.UNPIVOT: 412>: <function Parser.<lambda>>, <TokenType.UPDATE: 413>: <function Parser.<lambda>>, <TokenType.USE: 414>: <function Parser.<lambda>>, <TokenType.SEMICOLON: 19>: <function Parser.<lambda>>, <TokenType.USING: 415>: <function AthenaTrinoParser.<lambda>>}
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- SUBQUERY_TOKENS
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- TRIGGER_EVENTS
- ALTERABLES
- ID_VAR_TOKENS
- ALIAS_TOKENS
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- IDENTIFIER_TOKENS
- BRACKETS
- COLUMN_POSTFIX_TOKENS
- TABLE_POSTFIX_TOKENS
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- TABLE_TERMINATORS
- LAMBDAS
- TYPED_LAMBDA_ARGS
- LAMBDA_ARG_TERMINATORS
- COLUMN_OPERATORS
- CAST_COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- QUERY_MODIFIER_TOKENS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- TRIGGER_TIMING
- TRIGGER_DEFERRABLE
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- SET_ASSIGNMENT_DELIMITERS
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- SECURITY_PROPERTY_KEYWORDS
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- ALTER_TABLE_PARTITIONS
- JOINS_HAVE_EQUAL_PRECEDENCE
- MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS
- JSON_EXTRACT_REQUIRES_JSON_EXPRESSION
- ADD_JOIN_ON_TRUE
- SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT
- SHOW_TRIE
- SET_TRIE
- error_level
- error_message_context
- max_errors
- max_nodes
- dialect
- sql
- errors
- reset
- raise_error
- validate_expression
- parse
- parse_into
- check_errors
- expression
- parse_set_operation
- build_cast
25class AthenaParser(Parser): 26 def __init__( 27 self, 28 error_level: ErrorLevel | None = None, 29 error_message_context: int = 100, 30 max_errors: int = 3, 31 dialect: DialectType = None, 32 hive: Hive | None = None, 33 trino: Trino | None = None, 34 ) -> None: 35 from sqlglot.dialects.hive import Hive 36 from sqlglot.dialects.trino import Trino 37 38 hive = hive or Hive() 39 trino = trino or Trino() 40 41 super().__init__( 42 error_level=error_level, 43 error_message_context=error_message_context, 44 max_errors=max_errors, 45 dialect=dialect, 46 ) 47 48 self._hive_parser = hive.parser( 49 error_level=error_level, 50 error_message_context=error_message_context, 51 max_errors=max_errors, 52 ) 53 self._trino_parser = AthenaTrinoParser( 54 error_level=error_level, 55 error_message_context=error_message_context, 56 max_errors=max_errors, 57 dialect=trino, 58 ) 59 60 def parse(self, raw_tokens: list[Token], sql: str) -> list[exp.Expr | None]: 61 if raw_tokens and raw_tokens[0].token_type == TokenType.HIVE_TOKEN_STREAM: 62 return self._hive_parser.parse(raw_tokens[1:], sql) 63 64 return self._trino_parser.parse(raw_tokens, sql) 65 66 def parse_into( 67 self, 68 expression_types: exp.IntoType, 69 raw_tokens: list[Token], 70 sql: str | None = None, 71 ) -> list[exp.Expr | None]: 72 if raw_tokens and raw_tokens[0].token_type == TokenType.HIVE_TOKEN_STREAM: 73 return self._hive_parser.parse_into(expression_types, raw_tokens[1:], sql) 74 75 return self._trino_parser.parse_into(expression_types, raw_tokens, sql)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
- max_nodes: Maximum number of AST nodes to prevent memory exhaustion. Set to -1 (default) to disable the check.
AthenaParser( error_level: sqlglot.errors.ErrorLevel | None = None, error_message_context: int = 100, max_errors: int = 3, dialect: Union[str, sqlglot.dialects.Dialect, type[sqlglot.dialects.Dialect], NoneType] = None, hive: sqlglot.dialects.hive.Hive | None = None, trino: sqlglot.dialects.trino.Trino | None = None)
26 def __init__( 27 self, 28 error_level: ErrorLevel | None = None, 29 error_message_context: int = 100, 30 max_errors: int = 3, 31 dialect: DialectType = None, 32 hive: Hive | None = None, 33 trino: Trino | None = None, 34 ) -> None: 35 from sqlglot.dialects.hive import Hive 36 from sqlglot.dialects.trino import Trino 37 38 hive = hive or Hive() 39 trino = trino or Trino() 40 41 super().__init__( 42 error_level=error_level, 43 error_message_context=error_message_context, 44 max_errors=max_errors, 45 dialect=dialect, 46 ) 47 48 self._hive_parser = hive.parser( 49 error_level=error_level, 50 error_message_context=error_message_context, 51 max_errors=max_errors, 52 ) 53 self._trino_parser = AthenaTrinoParser( 54 error_level=error_level, 55 error_message_context=error_message_context, 56 max_errors=max_errors, 57 dialect=trino, 58 )
def
parse( self, raw_tokens: list[sqlglot.tokenizer_core.Token], sql: str) -> list[sqlglot.expressions.core.Expr | None]:
60 def parse(self, raw_tokens: list[Token], sql: str) -> list[exp.Expr | None]: 61 if raw_tokens and raw_tokens[0].token_type == TokenType.HIVE_TOKEN_STREAM: 62 return self._hive_parser.parse(raw_tokens[1:], sql) 63 64 return self._trino_parser.parse(raw_tokens, sql)
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string.
Returns:
The list of the produced syntax trees.
def
parse_into( self, expression_types: Union[type[sqlglot.expressions.core.Expr], Collection[type[sqlglot.expressions.core.Expr]]], raw_tokens: list[sqlglot.tokenizer_core.Token], sql: str | None = None) -> list[sqlglot.expressions.core.Expr | None]:
66 def parse_into( 67 self, 68 expression_types: exp.IntoType, 69 raw_tokens: list[Token], 70 sql: str | None = None, 71 ) -> list[exp.Expr | None]: 72 if raw_tokens and raw_tokens[0].token_type == TokenType.HIVE_TOKEN_STREAM: 73 return self._hive_parser.parse_into(expression_types, raw_tokens[1:], sql) 74 75 return self._trino_parser.parse_into(expression_types, raw_tokens, sql)
Parses a list of tokens into a given Expr type. If a collection of Expr types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expr.
Inherited Members
- sqlglot.parser.Parser
- FUNCTIONS
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- SUBQUERY_TOKENS
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- TRIGGER_EVENTS
- ALTERABLES
- ID_VAR_TOKENS
- TABLE_ALIAS_TOKENS
- ALIAS_TOKENS
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- IDENTIFIER_TOKENS
- BRACKETS
- COLUMN_POSTFIX_TOKENS
- TABLE_POSTFIX_TOKENS
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- TABLE_TERMINATORS
- LAMBDAS
- TYPED_LAMBDA_ARGS
- LAMBDA_ARG_TERMINATORS
- COLUMN_OPERATORS
- CAST_COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- FUNCTION_PARSERS
- QUERY_MODIFIER_PARSERS
- QUERY_MODIFIER_TOKENS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- TRIGGER_TIMING
- TRIGGER_DEFERRABLE
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- SET_ASSIGNMENT_DELIMITERS
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- SECURITY_PROPERTY_KEYWORDS
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- ALTER_TABLE_PARTITIONS
- JOINS_HAVE_EQUAL_PRECEDENCE
- ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
- MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS
- JSON_EXTRACT_REQUIRES_JSON_EXPRESSION
- ADD_JOIN_ON_TRUE
- SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT
- SHOW_TRIE
- SET_TRIE
- error_level
- error_message_context
- max_errors
- max_nodes
- dialect
- sql
- errors
- reset
- raise_error
- validate_expression
- check_errors
- expression
- parse_set_operation
- build_cast