sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import typing as t 4 5 6from sqlglot.optimizer.annotate_types import TypeAnnotator 7 8from sqlglot import exp, jsonpath, tokens 9from sqlglot._typing import E 10from sqlglot.parsers.bigquery import BigQueryParser 11from sqlglot.generators.bigquery import BigQueryGenerator 12from sqlglot.dialects.dialect import ( 13 Dialect, 14 NormalizationStrategy, 15) 16from sqlglot.tokens import TokenType 17from sqlglot.typing.bigquery import EXPRESSION_METADATA 18 19if t.TYPE_CHECKING: 20 from sqlglot.optimizer.annotate_types import TypeAnnotator 21 22 23class BigQuery(Dialect): 24 WEEK_OFFSET = -1 25 UNNEST_COLUMN_ONLY = True 26 SUPPORTS_USER_DEFINED_TYPES = False 27 LOG_BASE_FIRST = False 28 HEX_LOWERCASE = True 29 FORCE_EARLY_ALIAS_REF_EXPANSION = True 30 EXPAND_ONLY_GROUP_ALIAS_REF = True 31 PRESERVE_ORIGINAL_NAMES = True 32 HEX_STRING_IS_INTEGER_TYPE = True 33 BYTE_STRING_IS_BYTES_TYPE = True 34 UUID_IS_STRING_TYPE = True 35 ANNOTATE_ALL_SCOPES = True 36 PROJECTION_ALIASES_SHADOW_SOURCE_NAMES = True 37 TABLES_REFERENCEABLE_AS_COLUMNS = True 38 SUPPORTS_STRUCT_STAR_EXPANSION = True 39 EXCLUDES_PSEUDOCOLUMNS_FROM_STAR = True 40 QUERY_RESULTS_ARE_STRUCTS = True 41 JSON_EXTRACT_SCALAR_SCALAR_ONLY = True 42 JSON_PATH_SINGLE_DOT_IS_WILDCARD = True 43 LEAST_GREATEST_IGNORES_NULLS = False 44 DEFAULT_NULL_TYPE = exp.DType.BIGINT 45 PRIORITIZE_NON_LITERAL_TYPES = True 46 ALIAS_POST_VERSION = False 47 48 # https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#initcap 49 INITCAP_DEFAULT_DELIMITER_CHARS = ' \t\n\r\f\v\\[\\](){}/|<>!?@"^#$&~_,.:;*%+\\-' 50 51 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 52 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 53 54 # bigquery udfs are case sensitive 55 NORMALIZE_FUNCTIONS = False 56 57 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 58 TIME_MAPPING = { 59 "%x": "%m/%d/%y", 60 "%D": "%m/%d/%y", 61 "%E6S": "%S.%f", 62 "%e": "%-d", 63 "%F": "%Y-%m-%d", 64 "%T": "%H:%M:%S", 65 "%c": "%a %b %e %H:%M:%S %Y", 66 } 67 68 INVERSE_TIME_MAPPING = { 69 # Preserve %E6S instead of expanding to %T.%f - since both %E6S & %T.%f are semantically different in BigQuery 70 # %E6S is semantically different from %T.%f: %E6S works as a single atomic specifier for seconds with microseconds, while %T.%f expands incorrectly and fails to parse. 71 "%H:%M:%S.%f": "%H:%M:%E6S", 72 } 73 74 FORMAT_MAPPING = { 75 "DD": "%d", 76 "MM": "%m", 77 "MON": "%b", 78 "MONTH": "%B", 79 "YYYY": "%Y", 80 "YY": "%y", 81 "HH": "%I", 82 "HH12": "%I", 83 "HH24": "%H", 84 "MI": "%M", 85 "SS": "%S", 86 "SSSSS": "%f", 87 "TZH": "%z", 88 } 89 90 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 91 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 92 # https://cloud.google.com/bigquery/docs/querying-wildcard-tables#scanning_a_range_of_tables_using_table_suffix 93 # https://cloud.google.com/bigquery/docs/query-cloud-storage-data#query_the_file_name_pseudo-column 94 PSEUDOCOLUMNS = { 95 "_PARTITIONTIME", 96 "_PARTITIONDATE", 97 "_TABLE_SUFFIX", 98 "_FILE_NAME", 99 "_DBT_MAX_PARTITION", 100 } 101 102 # All set operations require either a DISTINCT or ALL specifier 103 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 104 105 # https://cloud.google.com/bigquery/docs/reference/standard-sql/navigation_functions#percentile_cont 106 COERCES_TO = { 107 **TypeAnnotator.COERCES_TO, 108 exp.DType.BIGDECIMAL: {exp.DType.DOUBLE}, 109 } 110 COERCES_TO[exp.DType.DECIMAL] |= {exp.DType.BIGDECIMAL} 111 COERCES_TO[exp.DType.BIGINT] |= {exp.DType.BIGDECIMAL} 112 COERCES_TO[exp.DType.VARCHAR] |= { 113 exp.DType.DATE, 114 exp.DType.DATETIME, 115 exp.DType.TIME, 116 exp.DType.TIMESTAMP, 117 exp.DType.TIMESTAMPTZ, 118 } 119 120 EXPRESSION_METADATA = EXPRESSION_METADATA.copy() 121 122 def normalize_identifier(self, expression: E) -> E: 123 if ( 124 isinstance(expression, exp.Identifier) 125 and self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 126 ): 127 parent = expression.parent 128 while isinstance(parent, exp.Dot): 129 parent = parent.parent 130 131 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 132 # by default. The following check uses a heuristic to detect tables based on whether 133 # they are qualified. This should generally be correct, because tables in BigQuery 134 # must be qualified with at least a dataset, unless @@dataset_id is set. 135 case_sensitive = ( 136 isinstance(parent, exp.UserDefinedFunction) 137 or ( 138 isinstance(parent, exp.Table) 139 and parent.db 140 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 141 ) 142 or expression.meta.get("is_table") 143 ) 144 if not case_sensitive: 145 expression.set("this", expression.this.lower()) 146 147 return t.cast(E, expression) 148 149 return super().normalize_identifier(expression) 150 151 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 152 VAR_TOKENS = { 153 *jsonpath.JSONPathTokenizer.VAR_TOKENS, 154 TokenType.DASH, 155 TokenType.NUMBER, 156 } 157 158 class Tokenizer(tokens.Tokenizer): 159 QUOTES = ["'", '"', '"""', "'''"] 160 COMMENTS = ["--", "#", ("/*", "*/")] 161 IDENTIFIERS = ["`"] 162 STRING_ESCAPES = ["\\"] 163 164 HEX_STRINGS = [("0x", ""), ("0X", "")] 165 166 BYTE_STRINGS = [(prefix + q, q) for q in t.cast(list[str], QUOTES) for prefix in ("b", "B")] 167 168 RAW_STRINGS = [(prefix + q, q) for q in t.cast(list[str], QUOTES) for prefix in ("r", "R")] 169 170 NESTED_COMMENTS = False 171 172 KEYWORDS = { 173 **tokens.Tokenizer.KEYWORDS, 174 "ANY TYPE": TokenType.VARIANT, 175 "BEGIN": TokenType.COMMAND, 176 "BEGIN TRANSACTION": TokenType.BEGIN, 177 "BYTEINT": TokenType.INT, 178 "BYTES": TokenType.BINARY, 179 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 180 "DATETIME": TokenType.TIMESTAMP, 181 "DECLARE": TokenType.DECLARE, 182 "ELSEIF": TokenType.COMMAND, 183 "EXCEPTION": TokenType.COMMAND, 184 "EXPORT": TokenType.EXPORT, 185 "FLOAT64": TokenType.DOUBLE, 186 "FOR SYSTEM TIME": TokenType.TIMESTAMP_SNAPSHOT, 187 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 188 "LOOP": TokenType.COMMAND, 189 "MODEL": TokenType.MODEL, 190 "NOT DETERMINISTIC": TokenType.VOLATILE, 191 "RECORD": TokenType.STRUCT, 192 "REPEAT": TokenType.COMMAND, 193 "TIMESTAMP": TokenType.TIMESTAMPTZ, 194 "WHILE": TokenType.COMMAND, 195 } 196 KEYWORDS.pop("DIV") 197 KEYWORDS.pop("VALUES") 198 KEYWORDS.pop("/*+") 199 200 Parser = BigQueryParser 201 202 Generator = BigQueryGenerator
24class BigQuery(Dialect): 25 WEEK_OFFSET = -1 26 UNNEST_COLUMN_ONLY = True 27 SUPPORTS_USER_DEFINED_TYPES = False 28 LOG_BASE_FIRST = False 29 HEX_LOWERCASE = True 30 FORCE_EARLY_ALIAS_REF_EXPANSION = True 31 EXPAND_ONLY_GROUP_ALIAS_REF = True 32 PRESERVE_ORIGINAL_NAMES = True 33 HEX_STRING_IS_INTEGER_TYPE = True 34 BYTE_STRING_IS_BYTES_TYPE = True 35 UUID_IS_STRING_TYPE = True 36 ANNOTATE_ALL_SCOPES = True 37 PROJECTION_ALIASES_SHADOW_SOURCE_NAMES = True 38 TABLES_REFERENCEABLE_AS_COLUMNS = True 39 SUPPORTS_STRUCT_STAR_EXPANSION = True 40 EXCLUDES_PSEUDOCOLUMNS_FROM_STAR = True 41 QUERY_RESULTS_ARE_STRUCTS = True 42 JSON_EXTRACT_SCALAR_SCALAR_ONLY = True 43 JSON_PATH_SINGLE_DOT_IS_WILDCARD = True 44 LEAST_GREATEST_IGNORES_NULLS = False 45 DEFAULT_NULL_TYPE = exp.DType.BIGINT 46 PRIORITIZE_NON_LITERAL_TYPES = True 47 ALIAS_POST_VERSION = False 48 49 # https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#initcap 50 INITCAP_DEFAULT_DELIMITER_CHARS = ' \t\n\r\f\v\\[\\](){}/|<>!?@"^#$&~_,.:;*%+\\-' 51 52 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 53 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 54 55 # bigquery udfs are case sensitive 56 NORMALIZE_FUNCTIONS = False 57 58 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 59 TIME_MAPPING = { 60 "%x": "%m/%d/%y", 61 "%D": "%m/%d/%y", 62 "%E6S": "%S.%f", 63 "%e": "%-d", 64 "%F": "%Y-%m-%d", 65 "%T": "%H:%M:%S", 66 "%c": "%a %b %e %H:%M:%S %Y", 67 } 68 69 INVERSE_TIME_MAPPING = { 70 # Preserve %E6S instead of expanding to %T.%f - since both %E6S & %T.%f are semantically different in BigQuery 71 # %E6S is semantically different from %T.%f: %E6S works as a single atomic specifier for seconds with microseconds, while %T.%f expands incorrectly and fails to parse. 72 "%H:%M:%S.%f": "%H:%M:%E6S", 73 } 74 75 FORMAT_MAPPING = { 76 "DD": "%d", 77 "MM": "%m", 78 "MON": "%b", 79 "MONTH": "%B", 80 "YYYY": "%Y", 81 "YY": "%y", 82 "HH": "%I", 83 "HH12": "%I", 84 "HH24": "%H", 85 "MI": "%M", 86 "SS": "%S", 87 "SSSSS": "%f", 88 "TZH": "%z", 89 } 90 91 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 92 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 93 # https://cloud.google.com/bigquery/docs/querying-wildcard-tables#scanning_a_range_of_tables_using_table_suffix 94 # https://cloud.google.com/bigquery/docs/query-cloud-storage-data#query_the_file_name_pseudo-column 95 PSEUDOCOLUMNS = { 96 "_PARTITIONTIME", 97 "_PARTITIONDATE", 98 "_TABLE_SUFFIX", 99 "_FILE_NAME", 100 "_DBT_MAX_PARTITION", 101 } 102 103 # All set operations require either a DISTINCT or ALL specifier 104 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 105 106 # https://cloud.google.com/bigquery/docs/reference/standard-sql/navigation_functions#percentile_cont 107 COERCES_TO = { 108 **TypeAnnotator.COERCES_TO, 109 exp.DType.BIGDECIMAL: {exp.DType.DOUBLE}, 110 } 111 COERCES_TO[exp.DType.DECIMAL] |= {exp.DType.BIGDECIMAL} 112 COERCES_TO[exp.DType.BIGINT] |= {exp.DType.BIGDECIMAL} 113 COERCES_TO[exp.DType.VARCHAR] |= { 114 exp.DType.DATE, 115 exp.DType.DATETIME, 116 exp.DType.TIME, 117 exp.DType.TIMESTAMP, 118 exp.DType.TIMESTAMPTZ, 119 } 120 121 EXPRESSION_METADATA = EXPRESSION_METADATA.copy() 122 123 def normalize_identifier(self, expression: E) -> E: 124 if ( 125 isinstance(expression, exp.Identifier) 126 and self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 127 ): 128 parent = expression.parent 129 while isinstance(parent, exp.Dot): 130 parent = parent.parent 131 132 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 133 # by default. The following check uses a heuristic to detect tables based on whether 134 # they are qualified. This should generally be correct, because tables in BigQuery 135 # must be qualified with at least a dataset, unless @@dataset_id is set. 136 case_sensitive = ( 137 isinstance(parent, exp.UserDefinedFunction) 138 or ( 139 isinstance(parent, exp.Table) 140 and parent.db 141 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 142 ) 143 or expression.meta.get("is_table") 144 ) 145 if not case_sensitive: 146 expression.set("this", expression.this.lower()) 147 148 return t.cast(E, expression) 149 150 return super().normalize_identifier(expression) 151 152 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 153 VAR_TOKENS = { 154 *jsonpath.JSONPathTokenizer.VAR_TOKENS, 155 TokenType.DASH, 156 TokenType.NUMBER, 157 } 158 159 class Tokenizer(tokens.Tokenizer): 160 QUOTES = ["'", '"', '"""', "'''"] 161 COMMENTS = ["--", "#", ("/*", "*/")] 162 IDENTIFIERS = ["`"] 163 STRING_ESCAPES = ["\\"] 164 165 HEX_STRINGS = [("0x", ""), ("0X", "")] 166 167 BYTE_STRINGS = [(prefix + q, q) for q in t.cast(list[str], QUOTES) for prefix in ("b", "B")] 168 169 RAW_STRINGS = [(prefix + q, q) for q in t.cast(list[str], QUOTES) for prefix in ("r", "R")] 170 171 NESTED_COMMENTS = False 172 173 KEYWORDS = { 174 **tokens.Tokenizer.KEYWORDS, 175 "ANY TYPE": TokenType.VARIANT, 176 "BEGIN": TokenType.COMMAND, 177 "BEGIN TRANSACTION": TokenType.BEGIN, 178 "BYTEINT": TokenType.INT, 179 "BYTES": TokenType.BINARY, 180 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 181 "DATETIME": TokenType.TIMESTAMP, 182 "DECLARE": TokenType.DECLARE, 183 "ELSEIF": TokenType.COMMAND, 184 "EXCEPTION": TokenType.COMMAND, 185 "EXPORT": TokenType.EXPORT, 186 "FLOAT64": TokenType.DOUBLE, 187 "FOR SYSTEM TIME": TokenType.TIMESTAMP_SNAPSHOT, 188 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 189 "LOOP": TokenType.COMMAND, 190 "MODEL": TokenType.MODEL, 191 "NOT DETERMINISTIC": TokenType.VOLATILE, 192 "RECORD": TokenType.STRUCT, 193 "REPEAT": TokenType.COMMAND, 194 "TIMESTAMP": TokenType.TIMESTAMPTZ, 195 "WHILE": TokenType.COMMAND, 196 } 197 KEYWORDS.pop("DIV") 198 KEYWORDS.pop("VALUES") 199 KEYWORDS.pop("/*+") 200 201 Parser = BigQueryParser 202 203 Generator = BigQueryGenerator
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether the base comes first in the LOG function.
Possible values: True, False, None (two arguments are not supported by LOG)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether alias reference expansion before qualification should only happen for the GROUP BY clause.
Whether the name of the function should be preserved inside the node's metadata, can be useful for roundtripping deprecated vs new functions that share an AST node e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery
Whether hex strings such as x'CC' evaluate to integer or binary/blob type
Whether byte string literals (ex: BigQuery's b'...') are typed as BYTES/BINARY
Whether to annotate all scopes during optimization. Used by BigQuery for UNNEST support.
Whether projection alias names can shadow table/source names in GROUP BY and HAVING clauses.
In BigQuery, when a projection alias has the same name as a source table, the alias takes precedence in GROUP BY and HAVING clauses, and the table becomes inaccessible by that name.
For example, in BigQuery: SELECT id, ARRAY_AGG(col) AS custom_fields FROM custom_fields GROUP BY id HAVING id >= 1
The "custom_fields" source is shadowed by the projection alias, so we cannot qualify "id" with "custom_fields" in GROUP BY/HAVING.
Whether table names can be referenced as columns (treated as structs).
BigQuery allows tables to be referenced as columns in queries, automatically treating them as struct values containing all the table's columns.
For example, in BigQuery: SELECT t FROM my_table AS t -- Returns entire row as a struct
Whether the dialect supports expanding struct fields using star notation (e.g., struct_col.*).
BigQuery allows struct fields to be expanded with the star operator:
SELECT t.struct_col.* FROM table t
RisingWave also allows struct field expansion with the star operator using parentheses:
SELECT (t.struct_col).* FROM table t
This expands to all fields within the struct.
Whether pseudocolumns should be excluded from star expansion (SELECT *).
Pseudocolumns are special dialect-specific columns (e.g., Oracle's ROWNUM, ROWID, LEVEL, or BigQuery's _PARTITIONTIME, _PARTITIONDATE) that are implicitly available but not part of the table schema. When this is True, SELECT * will not include these pseudocolumns; they must be explicitly selected.
Whether query results are typed as structs in metadata for type inference.
In BigQuery, subqueries store their column types as a STRUCT in metadata,
enabling special type inference for ARRAY(SELECT ...) expressions:
ARRAY(SELECT x, y FROM t) → ARRAY For single column subqueries, BigQuery unwraps the struct:
ARRAY(SELECT x FROM t) → ARRAY This is metadata-only for type inference.
Whether JSON_EXTRACT_SCALAR returns null if a non-scalar value is selected.
Whether a single DOT in a JSON path (e.g. $.) is treated as a valid wildcard key.
Whether LEAST/GREATEST functions ignore NULL values, e.g:
- BigQuery, Snowflake, MySQL, Presto/Trino: LEAST(1, NULL, 2) -> NULL
- Spark, Postgres, DuckDB, TSQL: LEAST(1, NULL, 2) -> 1
The default type of NULL for producing the correct projection type.
For example, in BigQuery the default type of the NULL value is INT64.
Whether to prioritize non-literal types over literals during type annotation.
Whether the table alias comes after version (timestamp or iceberg snapshot).
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime formats.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy').
If empty, the corresponding trie will be constructed off of TIME_MAPPING.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT * queries.
Whether a set operation uses DISTINCT by default. This is None when either DISTINCT or ALL
must be explicitly specified.
123 def normalize_identifier(self, expression: E) -> E: 124 if ( 125 isinstance(expression, exp.Identifier) 126 and self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 127 ): 128 parent = expression.parent 129 while isinstance(parent, exp.Dot): 130 parent = parent.parent 131 132 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 133 # by default. The following check uses a heuristic to detect tables based on whether 134 # they are qualified. This should generally be correct, because tables in BigQuery 135 # must be qualified with at least a dataset, unless @@dataset_id is set. 136 case_sensitive = ( 137 isinstance(parent, exp.UserDefinedFunction) 138 or ( 139 isinstance(parent, exp.Table) 140 and parent.db 141 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 142 ) 143 or expression.meta.get("is_table") 144 ) 145 if not case_sensitive: 146 expression.set("this", expression.this.lower()) 147 148 return t.cast(E, expression) 149 150 return super().normalize_identifier(expression)
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO would be resolved as foo in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Mapping of an escaped sequence (\n) to its unescaped version (
).
Whether string literals support escape sequences (e.g. \n). Set by the metaclass based on the tokenizer's STRING_ESCAPES.
Whether byte string literals support escape sequences. Set by the metaclass based on the tokenizer's BYTE_STRING_ESCAPES.
152 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 153 VAR_TOKENS = { 154 *jsonpath.JSONPathTokenizer.VAR_TOKENS, 155 TokenType.DASH, 156 TokenType.NUMBER, 157 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- VAR_SINGLE_TOKENS
- ESCAPE_FOLLOW_CHARS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- tokenize
- sql
- size
- tokens
159 class Tokenizer(tokens.Tokenizer): 160 QUOTES = ["'", '"', '"""', "'''"] 161 COMMENTS = ["--", "#", ("/*", "*/")] 162 IDENTIFIERS = ["`"] 163 STRING_ESCAPES = ["\\"] 164 165 HEX_STRINGS = [("0x", ""), ("0X", "")] 166 167 BYTE_STRINGS = [(prefix + q, q) for q in t.cast(list[str], QUOTES) for prefix in ("b", "B")] 168 169 RAW_STRINGS = [(prefix + q, q) for q in t.cast(list[str], QUOTES) for prefix in ("r", "R")] 170 171 NESTED_COMMENTS = False 172 173 KEYWORDS = { 174 **tokens.Tokenizer.KEYWORDS, 175 "ANY TYPE": TokenType.VARIANT, 176 "BEGIN": TokenType.COMMAND, 177 "BEGIN TRANSACTION": TokenType.BEGIN, 178 "BYTEINT": TokenType.INT, 179 "BYTES": TokenType.BINARY, 180 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 181 "DATETIME": TokenType.TIMESTAMP, 182 "DECLARE": TokenType.DECLARE, 183 "ELSEIF": TokenType.COMMAND, 184 "EXCEPTION": TokenType.COMMAND, 185 "EXPORT": TokenType.EXPORT, 186 "FLOAT64": TokenType.DOUBLE, 187 "FOR SYSTEM TIME": TokenType.TIMESTAMP_SNAPSHOT, 188 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 189 "LOOP": TokenType.COMMAND, 190 "MODEL": TokenType.MODEL, 191 "NOT DETERMINISTIC": TokenType.VOLATILE, 192 "RECORD": TokenType.STRUCT, 193 "REPEAT": TokenType.COMMAND, 194 "TIMESTAMP": TokenType.TIMESTAMPTZ, 195 "WHILE": TokenType.COMMAND, 196 } 197 KEYWORDS.pop("DIV") 198 KEYWORDS.pop("VALUES") 199 KEYWORDS.pop("/*+")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- VAR_SINGLE_TOKENS
- ESCAPE_FOLLOW_CHARS
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- NUMBERS_CAN_HAVE_DECIMALS
- dialect
- tokenize
- sql
- size
- tokens