Edit on GitHub

sqlglot.parsers.athena

 1from __future__ import annotations
 2
 3import typing as t
 4
 5from sqlglot import exp
 6from sqlglot.errors import ErrorLevel
 7from sqlglot.parser import Parser
 8from sqlglot.parsers.trino import TrinoParser
 9from sqlglot.tokens import TokenType, Token
10
11if t.TYPE_CHECKING:
12    from sqlglot.dialects.dialect import DialectType
13    from sqlglot.dialects.hive import Hive
14    from sqlglot.dialects.trino import Trino
15
16
17class AthenaTrinoParser(TrinoParser):
18    STATEMENT_PARSERS = {
19        **TrinoParser.STATEMENT_PARSERS,
20        TokenType.USING: lambda self: self._parse_as_command(self._prev),
21    }
22
23
24class AthenaParser(Parser):
25    def __init__(
26        self,
27        error_level: ErrorLevel | None = None,
28        error_message_context: int = 100,
29        max_errors: int = 3,
30        dialect: DialectType = None,
31        hive: Hive | None = None,
32        trino: Trino | None = None,
33    ) -> None:
34        from sqlglot.dialects.hive import Hive
35        from sqlglot.dialects.trino import Trino
36
37        hive = hive or Hive()
38        trino = trino or Trino()
39
40        super().__init__(
41            error_level=error_level,
42            error_message_context=error_message_context,
43            max_errors=max_errors,
44            dialect=dialect,
45        )
46
47        self._hive_parser = hive.parser(
48            error_level=error_level,
49            error_message_context=error_message_context,
50            max_errors=max_errors,
51        )
52        self._trino_parser = AthenaTrinoParser(
53            error_level=error_level,
54            error_message_context=error_message_context,
55            max_errors=max_errors,
56            dialect=trino,
57        )
58
59    def parse(self, raw_tokens: list[Token], sql: str) -> list[exp.Expr | None]:
60        if raw_tokens and raw_tokens[0].token_type == TokenType.HIVE_TOKEN_STREAM:
61            return self._hive_parser.parse(raw_tokens[1:], sql)
62
63        return self._trino_parser.parse(raw_tokens, sql)
64
65    def parse_into(
66        self,
67        expression_types: exp.IntoType,
68        raw_tokens: list[Token],
69        sql: str | None = None,
70    ) -> list[exp.Expr | None]:
71        if raw_tokens and raw_tokens[0].token_type == TokenType.HIVE_TOKEN_STREAM:
72            return self._hive_parser.parse_into(expression_types, raw_tokens[1:], sql)
73
74        return self._trino_parser.parse_into(expression_types, raw_tokens, sql)
class AthenaTrinoParser(sqlglot.parsers.trino.TrinoParser):
18class AthenaTrinoParser(TrinoParser):
19    STATEMENT_PARSERS = {
20        **TrinoParser.STATEMENT_PARSERS,
21        TokenType.USING: lambda self: self._parse_as_command(self._prev),
22    }

Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.

Arguments:
  • error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
  • error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
  • max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
  • max_nodes: Maximum number of AST nodes to prevent memory exhaustion. Set to -1 (default) to disable the check.
STATEMENT_PARSERS = {<TokenType.ALTER: 217>: <function Parser.<lambda>>, <TokenType.ANALYZE: 435>: <function Parser.<lambda>>, <TokenType.BEGIN: 227>: <function Parser.<lambda>>, <TokenType.CACHE: 230>: <function Parser.<lambda>>, <TokenType.COMMENT: 236>: <function Parser.<lambda>>, <TokenType.COMMIT: 237>: <function Parser.<lambda>>, <TokenType.COPY: 240>: <function Parser.<lambda>>, <TokenType.CREATE: 241>: <function Parser.<lambda>>, <TokenType.DELETE: 254>: <function Parser.<lambda>>, <TokenType.DESC: 255>: <function Parser.<lambda>>, <TokenType.DESCRIBE: 256>: <function Parser.<lambda>>, <TokenType.DROP: 262>: <function Parser.<lambda>>, <TokenType.GRANT: 286>: <function Parser.<lambda>>, <TokenType.REVOKE: 373>: <function Parser.<lambda>>, <TokenType.INSERT: 297>: <function Parser.<lambda>>, <TokenType.KILL: 311>: <function Parser.<lambda>>, <TokenType.LOAD: 318>: <function Parser.<lambda>>, <TokenType.MERGE: 325>: <function Parser.<lambda>>, <TokenType.PIVOT: 352>: <function Parser.<lambda>>, <TokenType.PRAGMA: 357>: <function Parser.<lambda>>, <TokenType.REFRESH: 369>: <function Parser.<lambda>>, <TokenType.ROLLBACK: 378>: <function Parser.<lambda>>, <TokenType.SET: 388>: <function Parser.<lambda>>, <TokenType.TRUNCATE: 407>: <function Parser.<lambda>>, <TokenType.UNCACHE: 409>: <function Parser.<lambda>>, <TokenType.UNPIVOT: 412>: <function Parser.<lambda>>, <TokenType.UPDATE: 413>: <function Parser.<lambda>>, <TokenType.USE: 414>: <function Parser.<lambda>>, <TokenType.SEMICOLON: 19>: <function Parser.<lambda>>, <TokenType.USING: 415>: <function AthenaTrinoParser.<lambda>>}
Inherited Members
sqlglot.parser.Parser
Parser
STRUCT_TYPE_TOKENS
NESTED_TYPE_TOKENS
ENUM_TYPE_TOKENS
AGGREGATE_TYPE_TOKENS
TYPE_TOKENS
SIGNED_TO_UNSIGNED_TYPE_TOKEN
SUBQUERY_PREDICATES
SUBQUERY_TOKENS
RESERVED_TOKENS
DB_CREATABLES
CREATABLES
TRIGGER_EVENTS
ALTERABLES
ID_VAR_TOKENS
ALIAS_TOKENS
COLON_PLACEHOLDER_TOKENS
ARRAY_CONSTRUCTORS
COMMENT_TABLE_ALIAS_TOKENS
UPDATE_ALIAS_TOKENS
TRIM_TYPES
IDENTIFIER_TOKENS
BRACKETS
COLUMN_POSTFIX_TOKENS
TABLE_POSTFIX_TOKENS
FUNC_TOKENS
CONJUNCTION
ASSIGNMENT
DISJUNCTION
EQUALITY
COMPARISON
BITWISE
TERM
FACTOR
EXPONENT
TIMES
TIMESTAMPS
SET_OPERATIONS
JOIN_METHODS
JOIN_SIDES
JOIN_KINDS
JOIN_HINTS
TABLE_TERMINATORS
LAMBDAS
TYPED_LAMBDA_ARGS
LAMBDA_ARG_TERMINATORS
COLUMN_OPERATORS
CAST_COLUMN_OPERATORS
EXPRESSION_PARSERS
UNARY_PARSERS
STRING_PARSERS
NUMERIC_PARSERS
PRIMARY_PARSERS
PLACEHOLDER_PARSERS
RANGE_PARSERS
PIPE_SYNTAX_TRANSFORM_PARSERS
PROPERTY_PARSERS
CONSTRAINT_PARSERS
ALTER_PARSERS
ALTER_ALTER_PARSERS
SCHEMA_UNNAMED_CONSTRAINTS
NO_PAREN_FUNCTION_PARSERS
INVALID_FUNC_NAME_TOKENS
FUNCTIONS_WITH_ALIASED_ARGS
KEY_VALUE_DEFINITIONS
QUERY_MODIFIER_PARSERS
QUERY_MODIFIER_TOKENS
SET_PARSERS
SHOW_PARSERS
TYPE_LITERAL_PARSERS
TYPE_CONVERTERS
DDL_SELECT_TOKENS
PRE_VOLATILE_TOKENS
TRANSACTION_KIND
TRANSACTION_CHARACTERISTICS
CONFLICT_ACTIONS
TRIGGER_TIMING
TRIGGER_DEFERRABLE
CREATE_SEQUENCE
ISOLATED_LOADING_OPTIONS
USABLES
CAST_ACTIONS
SCHEMA_BINDING_OPTIONS
PROCEDURE_OPTIONS
EXECUTE_AS_OPTIONS
KEY_CONSTRAINT_OPTIONS
WINDOW_EXCLUDE_OPTIONS
INSERT_ALTERNATIVES
CLONE_KEYWORDS
HISTORICAL_DATA_PREFIX
HISTORICAL_DATA_KIND
OPCLASS_FOLLOW_KEYWORDS
OPTYPE_FOLLOW_TOKENS
TABLE_INDEX_HINT_TOKENS
VIEW_ATTRIBUTES
WINDOW_ALIAS_TOKENS
WINDOW_BEFORE_PAREN_TOKENS
WINDOW_SIDES
JSON_KEY_VALUE_SEPARATOR_TOKENS
FETCH_TOKENS
ADD_CONSTRAINT_TOKENS
DISTINCT_TOKENS
UNNEST_OFFSET_ALIAS_TOKENS
SELECT_START_TOKENS
COPY_INTO_VARLEN_OPTIONS
IS_JSON_PREDICATE_KIND
ODBC_DATETIME_LITERALS
ON_CONDITION_TOKENS
PRIVILEGE_FOLLOW_TOKENS
DESCRIBE_STYLES
SET_ASSIGNMENT_DELIMITERS
ANALYZE_STYLES
ANALYZE_EXPRESSION_PARSERS
PARTITION_KEYWORDS
AMBIGUOUS_ALIAS_TOKENS
OPERATION_MODIFIERS
RECURSIVE_CTE_SEARCH_KIND
SECURITY_PROPERTY_KEYWORDS
MODIFIABLES
STRICT_CAST
PREFIXED_PIVOT_COLUMNS
IDENTIFY_PIVOT_STRINGS
LOG_DEFAULTS_TO_LN
TABLESAMPLE_CSV
DEFAULT_SAMPLING_METHOD
SET_REQUIRES_ASSIGNMENT_DELIMITER
TRIM_PATTERN_FIRST
STRING_ALIASES
MODIFIERS_ATTACHED_TO_SET_OP
SET_OP_MODIFIERS
NO_PAREN_IF_COMMANDS
JSON_ARROWS_REQUIRE_JSON_TYPE
COLON_IS_VARIANT_EXTRACT
SUPPORTS_IMPLICIT_UNNEST
INTERVAL_SPANS
SUPPORTS_PARTITION_SELECTION
WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
OPTIONAL_ALIAS_TOKEN_CTE
ALTER_RENAME_REQUIRES_COLUMN
ALTER_TABLE_PARTITIONS
JOINS_HAVE_EQUAL_PRECEDENCE
MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS
JSON_EXTRACT_REQUIRES_JSON_EXPRESSION
ADD_JOIN_ON_TRUE
SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT
SHOW_TRIE
SET_TRIE
error_level
error_message_context
max_errors
max_nodes
dialect
sql
errors
reset
raise_error
validate_expression
parse
parse_into
check_errors
expression
parse_set_operation
build_cast
sqlglot.parsers.trino.TrinoParser
NO_PAREN_FUNCTIONS
FUNCTIONS
FUNCTION_PARSERS
JSON_QUERY_OPTIONS
sqlglot.parsers.presto.PrestoParser
VALUES_FOLLOWED_BY_PAREN
ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
TABLE_ALIAS_TOKENS
class AthenaParser(sqlglot.parser.Parser):
25class AthenaParser(Parser):
26    def __init__(
27        self,
28        error_level: ErrorLevel | None = None,
29        error_message_context: int = 100,
30        max_errors: int = 3,
31        dialect: DialectType = None,
32        hive: Hive | None = None,
33        trino: Trino | None = None,
34    ) -> None:
35        from sqlglot.dialects.hive import Hive
36        from sqlglot.dialects.trino import Trino
37
38        hive = hive or Hive()
39        trino = trino or Trino()
40
41        super().__init__(
42            error_level=error_level,
43            error_message_context=error_message_context,
44            max_errors=max_errors,
45            dialect=dialect,
46        )
47
48        self._hive_parser = hive.parser(
49            error_level=error_level,
50            error_message_context=error_message_context,
51            max_errors=max_errors,
52        )
53        self._trino_parser = AthenaTrinoParser(
54            error_level=error_level,
55            error_message_context=error_message_context,
56            max_errors=max_errors,
57            dialect=trino,
58        )
59
60    def parse(self, raw_tokens: list[Token], sql: str) -> list[exp.Expr | None]:
61        if raw_tokens and raw_tokens[0].token_type == TokenType.HIVE_TOKEN_STREAM:
62            return self._hive_parser.parse(raw_tokens[1:], sql)
63
64        return self._trino_parser.parse(raw_tokens, sql)
65
66    def parse_into(
67        self,
68        expression_types: exp.IntoType,
69        raw_tokens: list[Token],
70        sql: str | None = None,
71    ) -> list[exp.Expr | None]:
72        if raw_tokens and raw_tokens[0].token_type == TokenType.HIVE_TOKEN_STREAM:
73            return self._hive_parser.parse_into(expression_types, raw_tokens[1:], sql)
74
75        return self._trino_parser.parse_into(expression_types, raw_tokens, sql)

Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.

Arguments:
  • error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
  • error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
  • max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
  • max_nodes: Maximum number of AST nodes to prevent memory exhaustion. Set to -1 (default) to disable the check.
AthenaParser( error_level: sqlglot.errors.ErrorLevel | None = None, error_message_context: int = 100, max_errors: int = 3, dialect: Union[str, sqlglot.dialects.Dialect, type[sqlglot.dialects.Dialect], NoneType] = None, hive: sqlglot.dialects.hive.Hive | None = None, trino: sqlglot.dialects.trino.Trino | None = None)
26    def __init__(
27        self,
28        error_level: ErrorLevel | None = None,
29        error_message_context: int = 100,
30        max_errors: int = 3,
31        dialect: DialectType = None,
32        hive: Hive | None = None,
33        trino: Trino | None = None,
34    ) -> None:
35        from sqlglot.dialects.hive import Hive
36        from sqlglot.dialects.trino import Trino
37
38        hive = hive or Hive()
39        trino = trino or Trino()
40
41        super().__init__(
42            error_level=error_level,
43            error_message_context=error_message_context,
44            max_errors=max_errors,
45            dialect=dialect,
46        )
47
48        self._hive_parser = hive.parser(
49            error_level=error_level,
50            error_message_context=error_message_context,
51            max_errors=max_errors,
52        )
53        self._trino_parser = AthenaTrinoParser(
54            error_level=error_level,
55            error_message_context=error_message_context,
56            max_errors=max_errors,
57            dialect=trino,
58        )
def parse( self, raw_tokens: list[sqlglot.tokenizer_core.Token], sql: str) -> list[sqlglot.expressions.core.Expr | None]:
60    def parse(self, raw_tokens: list[Token], sql: str) -> list[exp.Expr | None]:
61        if raw_tokens and raw_tokens[0].token_type == TokenType.HIVE_TOKEN_STREAM:
62            return self._hive_parser.parse(raw_tokens[1:], sql)
63
64        return self._trino_parser.parse(raw_tokens, sql)

Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.

Arguments:
  • raw_tokens: The list of tokens.
  • sql: The original SQL string.
Returns:

The list of the produced syntax trees.

def parse_into( self, expression_types: Union[type[sqlglot.expressions.core.Expr], Collection[type[sqlglot.expressions.core.Expr]]], raw_tokens: list[sqlglot.tokenizer_core.Token], sql: str | None = None) -> list[sqlglot.expressions.core.Expr | None]:
66    def parse_into(
67        self,
68        expression_types: exp.IntoType,
69        raw_tokens: list[Token],
70        sql: str | None = None,
71    ) -> list[exp.Expr | None]:
72        if raw_tokens and raw_tokens[0].token_type == TokenType.HIVE_TOKEN_STREAM:
73            return self._hive_parser.parse_into(expression_types, raw_tokens[1:], sql)
74
75        return self._trino_parser.parse_into(expression_types, raw_tokens, sql)

Parses a list of tokens into a given Expr type. If a collection of Expr types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.

Arguments:
  • expression_types: The expression type(s) to try and parse the token list into.
  • raw_tokens: The list of tokens.
  • sql: The original SQL string, used to produce helpful debug messages.
Returns:

The target Expr.

Inherited Members
sqlglot.parser.Parser
FUNCTIONS
NO_PAREN_FUNCTIONS
STRUCT_TYPE_TOKENS
NESTED_TYPE_TOKENS
ENUM_TYPE_TOKENS
AGGREGATE_TYPE_TOKENS
TYPE_TOKENS
SIGNED_TO_UNSIGNED_TYPE_TOKEN
SUBQUERY_PREDICATES
SUBQUERY_TOKENS
RESERVED_TOKENS
DB_CREATABLES
CREATABLES
TRIGGER_EVENTS
ALTERABLES
ID_VAR_TOKENS
TABLE_ALIAS_TOKENS
ALIAS_TOKENS
COLON_PLACEHOLDER_TOKENS
ARRAY_CONSTRUCTORS
COMMENT_TABLE_ALIAS_TOKENS
UPDATE_ALIAS_TOKENS
TRIM_TYPES
IDENTIFIER_TOKENS
BRACKETS
COLUMN_POSTFIX_TOKENS
TABLE_POSTFIX_TOKENS
FUNC_TOKENS
CONJUNCTION
ASSIGNMENT
DISJUNCTION
EQUALITY
COMPARISON
BITWISE
TERM
FACTOR
EXPONENT
TIMES
TIMESTAMPS
SET_OPERATIONS
JOIN_METHODS
JOIN_SIDES
JOIN_KINDS
JOIN_HINTS
TABLE_TERMINATORS
LAMBDAS
TYPED_LAMBDA_ARGS
LAMBDA_ARG_TERMINATORS
COLUMN_OPERATORS
CAST_COLUMN_OPERATORS
EXPRESSION_PARSERS
STATEMENT_PARSERS
UNARY_PARSERS
STRING_PARSERS
NUMERIC_PARSERS
PRIMARY_PARSERS
PLACEHOLDER_PARSERS
RANGE_PARSERS
PIPE_SYNTAX_TRANSFORM_PARSERS
PROPERTY_PARSERS
CONSTRAINT_PARSERS
ALTER_PARSERS
ALTER_ALTER_PARSERS
SCHEMA_UNNAMED_CONSTRAINTS
NO_PAREN_FUNCTION_PARSERS
INVALID_FUNC_NAME_TOKENS
FUNCTIONS_WITH_ALIASED_ARGS
KEY_VALUE_DEFINITIONS
FUNCTION_PARSERS
QUERY_MODIFIER_PARSERS
QUERY_MODIFIER_TOKENS
SET_PARSERS
SHOW_PARSERS
TYPE_LITERAL_PARSERS
TYPE_CONVERTERS
DDL_SELECT_TOKENS
PRE_VOLATILE_TOKENS
TRANSACTION_KIND
TRANSACTION_CHARACTERISTICS
CONFLICT_ACTIONS
TRIGGER_TIMING
TRIGGER_DEFERRABLE
CREATE_SEQUENCE
ISOLATED_LOADING_OPTIONS
USABLES
CAST_ACTIONS
SCHEMA_BINDING_OPTIONS
PROCEDURE_OPTIONS
EXECUTE_AS_OPTIONS
KEY_CONSTRAINT_OPTIONS
WINDOW_EXCLUDE_OPTIONS
INSERT_ALTERNATIVES
CLONE_KEYWORDS
HISTORICAL_DATA_PREFIX
HISTORICAL_DATA_KIND
OPCLASS_FOLLOW_KEYWORDS
OPTYPE_FOLLOW_TOKENS
TABLE_INDEX_HINT_TOKENS
VIEW_ATTRIBUTES
WINDOW_ALIAS_TOKENS
WINDOW_BEFORE_PAREN_TOKENS
WINDOW_SIDES
JSON_KEY_VALUE_SEPARATOR_TOKENS
FETCH_TOKENS
ADD_CONSTRAINT_TOKENS
DISTINCT_TOKENS
UNNEST_OFFSET_ALIAS_TOKENS
SELECT_START_TOKENS
COPY_INTO_VARLEN_OPTIONS
IS_JSON_PREDICATE_KIND
ODBC_DATETIME_LITERALS
ON_CONDITION_TOKENS
PRIVILEGE_FOLLOW_TOKENS
DESCRIBE_STYLES
SET_ASSIGNMENT_DELIMITERS
ANALYZE_STYLES
ANALYZE_EXPRESSION_PARSERS
PARTITION_KEYWORDS
AMBIGUOUS_ALIAS_TOKENS
OPERATION_MODIFIERS
RECURSIVE_CTE_SEARCH_KIND
SECURITY_PROPERTY_KEYWORDS
MODIFIABLES
STRICT_CAST
PREFIXED_PIVOT_COLUMNS
IDENTIFY_PIVOT_STRINGS
LOG_DEFAULTS_TO_LN
TABLESAMPLE_CSV
DEFAULT_SAMPLING_METHOD
SET_REQUIRES_ASSIGNMENT_DELIMITER
TRIM_PATTERN_FIRST
STRING_ALIASES
MODIFIERS_ATTACHED_TO_SET_OP
SET_OP_MODIFIERS
NO_PAREN_IF_COMMANDS
JSON_ARROWS_REQUIRE_JSON_TYPE
COLON_IS_VARIANT_EXTRACT
VALUES_FOLLOWED_BY_PAREN
SUPPORTS_IMPLICIT_UNNEST
INTERVAL_SPANS
SUPPORTS_PARTITION_SELECTION
WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
OPTIONAL_ALIAS_TOKEN_CTE
ALTER_RENAME_REQUIRES_COLUMN
ALTER_TABLE_PARTITIONS
JOINS_HAVE_EQUAL_PRECEDENCE
ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS
JSON_EXTRACT_REQUIRES_JSON_EXPRESSION
ADD_JOIN_ON_TRUE
SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT
SHOW_TRIE
SET_TRIE
error_level
error_message_context
max_errors
max_nodes
dialect
sql
errors
reset
raise_error
validate_expression
check_errors
expression
parse_set_operation
build_cast