Edit on GitHub

sqlglot.parsers.athena

View Source

 1from __future__ import annotations
 2
 3import typing as t
 4
 5from sqlglot import exp
 6from sqlglot.errors import ErrorLevel
 7from sqlglot.parser import Parser
 8from sqlglot.parsers.trino import TrinoParser
 9from sqlglot.tokens import TokenType, Token
10
11if t.TYPE_CHECKING:
12    from sqlglot.dialects.dialect import DialectType
13    from sqlglot.dialects.hive import Hive
14    from sqlglot.dialects.trino import Trino
15
16
17class AthenaTrinoParser(TrinoParser):
18    STATEMENT_PARSERS = {
19        **TrinoParser.STATEMENT_PARSERS,
20        TokenType.USING: lambda self: self._parse_as_command(self._prev),
21    }
22
23
24class AthenaParser(Parser):
25    def __init__(
26        self,
27        error_level: ErrorLevel | None = None,
28        error_message_context: int = 100,
29        max_errors: int = 3,
30        dialect: DialectType = None,
31        hive: Hive | None = None,
32        trino: Trino | None = None,
33    ) -> None:
34        from sqlglot.dialects.hive import Hive
35        from sqlglot.dialects.trino import Trino
36
37        hive = hive or Hive()
38        trino = trino or Trino()
39
40        super().__init__(
41            error_level=error_level,
42            error_message_context=error_message_context,
43            max_errors=max_errors,
44            dialect=dialect,
45        )
46
47        self._hive_parser = hive.parser(
48            error_level=error_level,
49            error_message_context=error_message_context,
50            max_errors=max_errors,
51        )
52        self._trino_parser = AthenaTrinoParser(
53            error_level=error_level,
54            error_message_context=error_message_context,
55            max_errors=max_errors,
56            dialect=trino,
57        )
58
59    def parse(self, raw_tokens: list[Token], sql: str) -> list[exp.Expr | None]:
60        if raw_tokens and raw_tokens[0].token_type == TokenType.HIVE_TOKEN_STREAM:
61            return self._hive_parser.parse(raw_tokens[1:], sql)
62
63        return self._trino_parser.parse(raw_tokens, sql)
64
65    def parse_into(
66        self,
67        expression_types: exp.IntoType,
68        raw_tokens: list[Token],
69        sql: str | None = None,
70    ) -> list[exp.Expr | None]:
71        if raw_tokens and raw_tokens[0].token_type == TokenType.HIVE_TOKEN_STREAM:
72            return self._hive_parser.parse_into(expression_types, raw_tokens[1:], sql)
73
74        return self._trino_parser.parse_into(expression_types, raw_tokens, sql)

class AthenaTrinoParser(sqlglot.parsers.trino.TrinoParser): View Source

18class AthenaTrinoParser(TrinoParser):
19    STATEMENT_PARSERS = {
20        **TrinoParser.STATEMENT_PARSERS,
21        TokenType.USING: lambda self: self._parse_as_command(self._prev),
22    }

Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.

Arguments:

error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
max_nodes: Maximum number of AST nodes to prevent memory exhaustion. Set to -1 (default) to disable the check.

STATEMENT_PARSERS = {<TokenType.ALTER: 219>: <function Parser.<lambda>>, <TokenType.ANALYZE: 440>: <function Parser.<lambda>>, <TokenType.BEGIN: 229>: <function Parser.<lambda>>, <TokenType.CACHE: 232>: <function Parser.<lambda>>, <TokenType.COMMENT: 238>: <function Parser.<lambda>>, <TokenType.COMMIT: 239>: <function Parser.<lambda>>, <TokenType.COPY: 242>: <function Parser.<lambda>>, <TokenType.CREATE: 243>: <function Parser.<lambda>>, <TokenType.DELETE: 257>: <function Parser.<lambda>>, <TokenType.DESC: 258>: <function Parser.<lambda>>, <TokenType.DESCRIBE: 259>: <function Parser.<lambda>>, <TokenType.DROP: 265>: <function Parser.<lambda>>, <TokenType.GRANT: 289>: <function Parser.<lambda>>, <TokenType.REVOKE: 376>: <function Parser.<lambda>>, <TokenType.INSERT: 300>: <function Parser.<lambda>>, <TokenType.KILL: 314>: <function Parser.<lambda>>, <TokenType.LOAD: 321>: <function Parser.<lambda>>, <TokenType.MERGE: 328>: <function Parser.<lambda>>, <TokenType.PIVOT: 355>: <function Parser.<lambda>>, <TokenType.PRAGMA: 360>: <function Parser.<lambda>>, <TokenType.REFRESH: 372>: <function Parser.<lambda>>, <TokenType.ROLLBACK: 381>: <function Parser.<lambda>>, <TokenType.SET: 391>: <function Parser.<lambda>>, <TokenType.TRUNCATE: 410>: <function Parser.<lambda>>, <TokenType.UNCACHE: 413>: <function Parser.<lambda>>, <TokenType.UNPIVOT: 417>: <function Parser.<lambda>>, <TokenType.UPDATE: 418>: <function Parser.<lambda>>, <TokenType.USE: 419>: <function Parser.<lambda>>, <TokenType.SEMICOLON: 19>: <function Parser.<lambda>>, <TokenType.USING: 420>: <function AthenaTrinoParser.<lambda>>}

Inherited Members

sqlglot.parser.Parser: Parser; STRUCT_TYPE_TOKENS; NESTED_TYPE_TOKENS; ENUM_TYPE_TOKENS; AGGREGATE_TYPE_TOKENS; TYPE_TOKENS; SIGNED_TO_UNSIGNED_TYPE_TOKEN; SUBQUERY_PREDICATES; SUBQUERY_TOKENS; RESERVED_TOKENS; DB_CREATABLES; CREATABLES; TRIGGER_EVENTS; ALTERABLES; ID_VAR_TOKENS; ALIAS_TOKENS; COLON_PLACEHOLDER_TOKENS; ARRAY_CONSTRUCTORS; COMMENT_TABLE_ALIAS_TOKENS; UPDATE_ALIAS_TOKENS; TRIM_TYPES; IDENTIFIER_TOKENS; BRACKETS; COLUMN_POSTFIX_TOKENS; TABLE_POSTFIX_TOKENS; FUNC_TOKENS; CONJUNCTION; ASSIGNMENT; DISJUNCTION; EQUALITY; COMPARISON; BITWISE; TERM; FACTOR; EXPONENT; TIMES; TIMESTAMPS; SET_OPERATIONS; JOIN_METHODS; JOIN_SIDES; JOIN_KINDS; JOIN_HINTS; TABLE_TERMINATORS; LAMBDAS; TYPED_LAMBDA_ARGS; LAMBDA_ARG_TERMINATORS; COLUMN_OPERATORS; CAST_COLUMN_OPERATORS; EXPRESSION_PARSERS; UNARY_PARSERS; STRING_PARSERS; NUMERIC_PARSERS; PRIMARY_PARSERS; PLACEHOLDER_PARSERS; RANGE_PARSERS; PIPE_SYNTAX_TRANSFORM_PARSERS; PROPERTY_PARSERS; CONSTRAINT_PARSERS; ALTER_PARSERS; ALTER_ALTER_PARSERS; SCHEMA_UNNAMED_CONSTRAINTS; NO_PAREN_FUNCTION_PARSERS; INVALID_FUNC_NAME_TOKENS; FUNCTIONS_WITH_ALIASED_ARGS; KEY_VALUE_DEFINITIONS; QUERY_MODIFIER_PARSERS; QUERY_MODIFIER_TOKENS; SET_PARSERS; SHOW_PARSERS; TYPE_LITERAL_PARSERS; TYPE_CONVERTERS; DDL_SELECT_TOKENS; PRE_VOLATILE_TOKENS; TRANSACTION_KIND; TRANSACTION_CHARACTERISTICS; CONFLICT_ACTIONS; TRIGGER_TIMING; TRIGGER_DEFERRABLE; CREATE_SEQUENCE; ISOLATED_LOADING_OPTIONS; USABLES; CAST_ACTIONS; SCHEMA_BINDING_OPTIONS; PROCEDURE_OPTIONS; EXECUTE_AS_OPTIONS; KEY_CONSTRAINT_OPTIONS; WINDOW_EXCLUDE_OPTIONS; INSERT_ALTERNATIVES; CLONE_KEYWORDS; HISTORICAL_DATA_PREFIX; HISTORICAL_DATA_KIND; OPCLASS_FOLLOW_KEYWORDS; OPTYPE_FOLLOW_TOKENS; TABLE_INDEX_HINT_TOKENS; VIEW_ATTRIBUTES; WINDOW_ALIAS_TOKENS; WINDOW_BEFORE_PAREN_TOKENS; WINDOW_SIDES; JSON_KEY_VALUE_SEPARATOR_TOKENS; FETCH_TOKENS; ADD_CONSTRAINT_TOKENS; DISTINCT_TOKENS; UNNEST_OFFSET_ALIAS_TOKENS; SELECT_START_TOKENS; COPY_INTO_VARLEN_OPTIONS; IS_JSON_PREDICATE_KIND; ODBC_DATETIME_LITERALS; ON_CONDITION_TOKENS; PRIVILEGE_FOLLOW_TOKENS; DESCRIBE_STYLES; SET_ASSIGNMENT_DELIMITERS; ANALYZE_STYLES; ANALYZE_EXPRESSION_PARSERS; PARTITION_KEYWORDS; AMBIGUOUS_ALIAS_TOKENS; OPERATION_MODIFIERS; RECURSIVE_CTE_SEARCH_KIND; SECURITY_PROPERTY_KEYWORDS; MODIFIABLES; STRICT_CAST; PREFIXED_PIVOT_COLUMNS; IDENTIFY_PIVOT_STRINGS; PIVOT_COLUMN_NAMING; LOG_DEFAULTS_TO_LN; TABLESAMPLE_CSV; DEFAULT_SAMPLING_METHOD; SET_REQUIRES_ASSIGNMENT_DELIMITER; TRIM_PATTERN_FIRST; STRING_ALIASES; MODIFIERS_ATTACHED_TO_SET_OP; SET_OP_MODIFIERS; NO_PAREN_IF_COMMANDS; JSON_ARROWS_REQUIRE_JSON_TYPE; COLON_IS_VARIANT_EXTRACT; COLON_CHAIN_IS_SINGLE_EXTRACT; SUPPORTS_IMPLICIT_UNNEST; INTERVAL_SPANS; SUPPORTS_PARTITION_SELECTION; WRAPPED_TRANSFORM_COLUMN_CONSTRAINT; OPTIONAL_ALIAS_TOKEN_CTE; ALTER_RENAME_REQUIRES_COLUMN; ALTER_TABLE_PARTITIONS; JOINS_HAVE_EQUAL_PRECEDENCE; MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS; JSON_EXTRACT_REQUIRES_JSON_EXPRESSION; ADD_JOIN_ON_TRUE; SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT; ADJACENT_STRINGS_CANNOT_BE_CONNECTED; SHOW_TRIE; SET_TRIE; error_level; error_message_context; max_errors; max_nodes; dialect; sql; errors; reset; raise_error; validate_expression; parse; parse_into; check_errors; expression; parse_set_operation; build_cast
sqlglot.parsers.trino.TrinoParser: NO_PAREN_FUNCTIONS; FUNCTIONS; FUNCTION_PARSERS; JSON_QUERY_OPTIONS
sqlglot.parsers.presto.PrestoParser: VALUES_FOLLOWED_BY_PAREN; ZONE_AWARE_TIMESTAMP_CONSTRUCTOR; TABLE_ALIAS_TOKENS

class AthenaParser(sqlglot.parser.Parser): View Source

25class AthenaParser(Parser):
26    def __init__(
27        self,
28        error_level: ErrorLevel | None = None,
29        error_message_context: int = 100,
30        max_errors: int = 3,
31        dialect: DialectType = None,
32        hive: Hive | None = None,
33        trino: Trino | None = None,
34    ) -> None:
35        from sqlglot.dialects.hive import Hive
36        from sqlglot.dialects.trino import Trino
37
38        hive = hive or Hive()
39        trino = trino or Trino()
40
41        super().__init__(
42            error_level=error_level,
43            error_message_context=error_message_context,
44            max_errors=max_errors,
45            dialect=dialect,
46        )
47
48        self._hive_parser = hive.parser(
49            error_level=error_level,
50            error_message_context=error_message_context,
51            max_errors=max_errors,
52        )
53        self._trino_parser = AthenaTrinoParser(
54            error_level=error_level,
55            error_message_context=error_message_context,
56            max_errors=max_errors,
57            dialect=trino,
58        )
59
60    def parse(self, raw_tokens: list[Token], sql: str) -> list[exp.Expr | None]:
61        if raw_tokens and raw_tokens[0].token_type == TokenType.HIVE_TOKEN_STREAM:
62            return self._hive_parser.parse(raw_tokens[1:], sql)
63
64        return self._trino_parser.parse(raw_tokens, sql)
65
66    def parse_into(
67        self,
68        expression_types: exp.IntoType,
69        raw_tokens: list[Token],
70        sql: str | None = None,
71    ) -> list[exp.Expr | None]:
72        if raw_tokens and raw_tokens[0].token_type == TokenType.HIVE_TOKEN_STREAM:
73            return self._hive_parser.parse_into(expression_types, raw_tokens[1:], sql)
74
75        return self._trino_parser.parse_into(expression_types, raw_tokens, sql)

Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.

Arguments:

error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
max_nodes: Maximum number of AST nodes to prevent memory exhaustion. Set to -1 (default) to disable the check.

AthenaParser( error_level: sqlglot.errors.ErrorLevel | None = None, error_message_context: int = 100, max_errors: int = 3, dialect: Union[str, sqlglot.dialects.Dialect, type[sqlglot.dialects.Dialect], NoneType] = None, hive: sqlglot.dialects.hive.Hive | None = None, trino: sqlglot.dialects.trino.Trino | None = None) View Source

26    def __init__(
27        self,
28        error_level: ErrorLevel | None = None,
29        error_message_context: int = 100,
30        max_errors: int = 3,
31        dialect: DialectType = None,
32        hive: Hive | None = None,
33        trino: Trino | None = None,
34    ) -> None:
35        from sqlglot.dialects.hive import Hive
36        from sqlglot.dialects.trino import Trino
37
38        hive = hive or Hive()
39        trino = trino or Trino()
40
41        super().__init__(
42            error_level=error_level,
43            error_message_context=error_message_context,
44            max_errors=max_errors,
45            dialect=dialect,
46        )
47
48        self._hive_parser = hive.parser(
49            error_level=error_level,
50            error_message_context=error_message_context,
51            max_errors=max_errors,
52        )
53        self._trino_parser = AthenaTrinoParser(
54            error_level=error_level,
55            error_message_context=error_message_context,
56            max_errors=max_errors,
57            dialect=trino,
58        )

def parse( self, raw_tokens: list[sqlglot.tokenizer_core.Token], sql: str) -> list[sqlglot.expressions.core.Expr | None]: View Source

60    def parse(self, raw_tokens: list[Token], sql: str) -> list[exp.Expr | None]:
61        if raw_tokens and raw_tokens[0].token_type == TokenType.HIVE_TOKEN_STREAM:
62            return self._hive_parser.parse(raw_tokens[1:], sql)
63
64        return self._trino_parser.parse(raw_tokens, sql)

Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.

Arguments:

raw_tokens: The list of tokens.
sql: The original SQL string.

Returns:

The list of the produced syntax trees.

def parse_into( self, expression_types: Union[type[sqlglot.expressions.core.Expr], Collection[type[sqlglot.expressions.core.Expr]]], raw_tokens: list[sqlglot.tokenizer_core.Token], sql: str | None = None) -> list[sqlglot.expressions.core.Expr | None]: View Source

66    def parse_into(
67        self,
68        expression_types: exp.IntoType,
69        raw_tokens: list[Token],
70        sql: str | None = None,
71    ) -> list[exp.Expr | None]:
72        if raw_tokens and raw_tokens[0].token_type == TokenType.HIVE_TOKEN_STREAM:
73            return self._hive_parser.parse_into(expression_types, raw_tokens[1:], sql)
74
75        return self._trino_parser.parse_into(expression_types, raw_tokens, sql)

Parses a list of tokens into a given Expr type. If a collection of Expr types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.

Arguments:

expression_types: The expression type(s) to try and parse the token list into.
raw_tokens: The list of tokens.
sql: The original SQL string, used to produce helpful debug messages.

Returns:

The target Expr.

Inherited Members

sqlglot.parser.Parser: FUNCTIONS; NO_PAREN_FUNCTIONS; STRUCT_TYPE_TOKENS; NESTED_TYPE_TOKENS; ENUM_TYPE_TOKENS; AGGREGATE_TYPE_TOKENS; TYPE_TOKENS; SIGNED_TO_UNSIGNED_TYPE_TOKEN; SUBQUERY_PREDICATES; SUBQUERY_TOKENS; RESERVED_TOKENS; DB_CREATABLES; CREATABLES; TRIGGER_EVENTS; ALTERABLES; ID_VAR_TOKENS; TABLE_ALIAS_TOKENS; ALIAS_TOKENS; COLON_PLACEHOLDER_TOKENS; ARRAY_CONSTRUCTORS; COMMENT_TABLE_ALIAS_TOKENS; UPDATE_ALIAS_TOKENS; TRIM_TYPES; IDENTIFIER_TOKENS; BRACKETS; COLUMN_POSTFIX_TOKENS; TABLE_POSTFIX_TOKENS; FUNC_TOKENS; CONJUNCTION; ASSIGNMENT; DISJUNCTION; EQUALITY; COMPARISON; BITWISE; TERM; FACTOR; EXPONENT; TIMES; TIMESTAMPS; SET_OPERATIONS; JOIN_METHODS; JOIN_SIDES; JOIN_KINDS; JOIN_HINTS; TABLE_TERMINATORS; LAMBDAS; TYPED_LAMBDA_ARGS; LAMBDA_ARG_TERMINATORS; COLUMN_OPERATORS; CAST_COLUMN_OPERATORS; EXPRESSION_PARSERS; STATEMENT_PARSERS; UNARY_PARSERS; STRING_PARSERS; NUMERIC_PARSERS; PRIMARY_PARSERS; PLACEHOLDER_PARSERS; RANGE_PARSERS; PIPE_SYNTAX_TRANSFORM_PARSERS; PROPERTY_PARSERS; CONSTRAINT_PARSERS; ALTER_PARSERS; ALTER_ALTER_PARSERS; SCHEMA_UNNAMED_CONSTRAINTS; NO_PAREN_FUNCTION_PARSERS; INVALID_FUNC_NAME_TOKENS; FUNCTIONS_WITH_ALIASED_ARGS; KEY_VALUE_DEFINITIONS; FUNCTION_PARSERS; QUERY_MODIFIER_PARSERS; QUERY_MODIFIER_TOKENS; SET_PARSERS; SHOW_PARSERS; TYPE_LITERAL_PARSERS; TYPE_CONVERTERS; DDL_SELECT_TOKENS; PRE_VOLATILE_TOKENS; TRANSACTION_KIND; TRANSACTION_CHARACTERISTICS; CONFLICT_ACTIONS; TRIGGER_TIMING; TRIGGER_DEFERRABLE; CREATE_SEQUENCE; ISOLATED_LOADING_OPTIONS; USABLES; CAST_ACTIONS; SCHEMA_BINDING_OPTIONS; PROCEDURE_OPTIONS; EXECUTE_AS_OPTIONS; KEY_CONSTRAINT_OPTIONS; WINDOW_EXCLUDE_OPTIONS; INSERT_ALTERNATIVES; CLONE_KEYWORDS; HISTORICAL_DATA_PREFIX; HISTORICAL_DATA_KIND; OPCLASS_FOLLOW_KEYWORDS; OPTYPE_FOLLOW_TOKENS; TABLE_INDEX_HINT_TOKENS; VIEW_ATTRIBUTES; WINDOW_ALIAS_TOKENS; WINDOW_BEFORE_PAREN_TOKENS; WINDOW_SIDES; JSON_KEY_VALUE_SEPARATOR_TOKENS; FETCH_TOKENS; ADD_CONSTRAINT_TOKENS; DISTINCT_TOKENS; UNNEST_OFFSET_ALIAS_TOKENS; SELECT_START_TOKENS; COPY_INTO_VARLEN_OPTIONS; IS_JSON_PREDICATE_KIND; ODBC_DATETIME_LITERALS; ON_CONDITION_TOKENS; PRIVILEGE_FOLLOW_TOKENS; DESCRIBE_STYLES; SET_ASSIGNMENT_DELIMITERS; ANALYZE_STYLES; ANALYZE_EXPRESSION_PARSERS; PARTITION_KEYWORDS; AMBIGUOUS_ALIAS_TOKENS; OPERATION_MODIFIERS; RECURSIVE_CTE_SEARCH_KIND; SECURITY_PROPERTY_KEYWORDS; MODIFIABLES; STRICT_CAST; PREFIXED_PIVOT_COLUMNS; IDENTIFY_PIVOT_STRINGS; PIVOT_COLUMN_NAMING; LOG_DEFAULTS_TO_LN; TABLESAMPLE_CSV; DEFAULT_SAMPLING_METHOD; SET_REQUIRES_ASSIGNMENT_DELIMITER; TRIM_PATTERN_FIRST; STRING_ALIASES; MODIFIERS_ATTACHED_TO_SET_OP; SET_OP_MODIFIERS; NO_PAREN_IF_COMMANDS; JSON_ARROWS_REQUIRE_JSON_TYPE; COLON_IS_VARIANT_EXTRACT; COLON_CHAIN_IS_SINGLE_EXTRACT; VALUES_FOLLOWED_BY_PAREN; SUPPORTS_IMPLICIT_UNNEST; INTERVAL_SPANS; SUPPORTS_PARTITION_SELECTION; WRAPPED_TRANSFORM_COLUMN_CONSTRAINT; OPTIONAL_ALIAS_TOKEN_CTE; ALTER_RENAME_REQUIRES_COLUMN; ALTER_TABLE_PARTITIONS; JOINS_HAVE_EQUAL_PRECEDENCE; ZONE_AWARE_TIMESTAMP_CONSTRUCTOR; MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS; JSON_EXTRACT_REQUIRES_JSON_EXPRESSION; ADD_JOIN_ON_TRUE; SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT; ADJACENT_STRINGS_CANNOT_BE_CONNECTED; SHOW_TRIE; SET_TRIE; error_level; error_message_context; max_errors; max_nodes; dialect; sql; errors; reset; raise_error; validate_expression; check_errors; expression; parse_set_operation; build_cast