sqlglot.dialects.snowflake
1from __future__ import annotations 2 3from sqlglot import exp, jsonpath, tokens 4from sqlglot.dialects.dialect import ( 5 Dialect, 6 NormalizationStrategy, 7) 8from sqlglot.generators.snowflake import SnowflakeGenerator 9from sqlglot.parsers.snowflake import ( 10 SnowflakeParser, 11) 12from sqlglot.tokens import TokenType 13from sqlglot.typing.snowflake import EXPRESSION_METADATA 14 15 16class Snowflake(Dialect): 17 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 18 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 19 NULL_ORDERING = "nulls_are_large" 20 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 21 SUPPORTS_USER_DEFINED_TYPES = False 22 PREFER_CTE_ALIAS_COLUMN = True 23 TABLESAMPLE_SIZE_IS_PERCENT = True 24 COPY_PARAMS_ARE_CSV = False 25 ARRAY_AGG_INCLUDES_NULLS = None 26 ARRAY_FUNCS_PROPAGATES_NULLS = True 27 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = False 28 TRY_CAST_REQUIRES_STRING = True 29 SUPPORTS_ALIAS_REFS_IN_JOIN_CONDITIONS = True 30 LEAST_GREATEST_IGNORES_NULLS = False 31 UUID_IS_STRING_TYPE = True 32 33 EXPRESSION_METADATA = EXPRESSION_METADATA.copy() 34 35 # https://docs.snowflake.com/en/en/sql-reference/functions/initcap 36 INITCAP_DEFAULT_DELIMITER_CHARS = ' \t\n\r\f\v!?@"^#$&~_,.:;+\\-*%/|\\[\\](){}<>' 37 38 INVERSE_TIME_MAPPING = { 39 "T": "T", # in TIME_MAPPING we map '"T"' with the double quotes to 'T', and we want to prevent 'T' from being mapped back to '"T"' so that 'AUTO' doesn't become 'AU"T"O' 40 } 41 42 TIME_MAPPING = { 43 "YYYY": "%Y", 44 "yyyy": "%Y", 45 "YY": "%y", 46 "yy": "%y", 47 "MMMM": "%B", 48 "mmmm": "%B", 49 "MON": "%b", 50 "mon": "%b", 51 "MM": "%m", 52 "mm": "%m", 53 "DD": "%d", 54 "dd": "%-d", 55 "DY": "%a", 56 "dy": "%w", 57 "HH24": "%H", 58 "hh24": "%H", 59 "HH12": "%I", 60 "hh12": "%I", 61 "MI": "%M", 62 "mi": "%M", 63 "SS": "%S", 64 "ss": "%S", 65 "FF": "%f_nine", # %f_ internal representation with precision specified 66 "ff": "%f_nine", 67 "FF0": "%f_zero", 68 "ff0": "%f_zero", 69 "FF1": "%f_one", 70 "ff1": "%f_one", 71 "FF2": "%f_two", 72 "ff2": "%f_two", 73 "FF3": "%f_three", 74 "ff3": "%f_three", 75 "FF4": "%f_four", 76 "ff4": "%f_four", 77 "FF5": "%f_five", 78 "ff5": "%f_five", 79 "FF6": "%f", 80 "ff6": "%f", 81 "FF7": "%f_seven", 82 "ff7": "%f_seven", 83 "FF8": "%f_eight", 84 "ff8": "%f_eight", 85 "FF9": "%f_nine", 86 "ff9": "%f_nine", 87 "TZHTZM": "%z", 88 "tzhtzm": "%z", 89 "TZH:TZM": "%:z", # internal representation for ±HH:MM 90 "tzh:tzm": "%:z", 91 "TZH": "%-z", # internal representation ±HH 92 "tzh": "%-z", 93 '"T"': "T", # remove the optional double quotes around the separator between the date and time 94 # Seems like Snowflake treats AM/PM in the format string as equivalent, 95 # only the time (stamp) value's AM/PM affects the output 96 "AM": "%p", 97 "am": "%p", 98 "PM": "%p", 99 "pm": "%p", 100 } 101 102 DATE_PART_MAPPING = { 103 **Dialect.DATE_PART_MAPPING, 104 "ISOWEEK": "WEEKISO", 105 # The base Dialect maps EPOCH_SECOND -> EPOCH, but we need to preserve 106 # EPOCH_SECOND as a distinct value for two reasons: 107 # 1. Type annotation: EPOCH_SECOND returns BIGINT, while EPOCH returns DOUBLE 108 # 2. Transpilation: DuckDB's EPOCH() returns float, so we cast EPOCH_SECOND 109 # to BIGINT to match Snowflake's integer behavior 110 # Without this override, EXTRACT(EPOCH_SECOND FROM ts) would be normalized 111 # to EXTRACT(EPOCH FROM ts) and lose the integer semantics. 112 "EPOCH_SECOND": "EPOCH_SECOND", 113 "EPOCH_SECONDS": "EPOCH_SECOND", 114 } 115 116 PSEUDOCOLUMNS = {"LEVEL"} 117 118 def can_quote(self, identifier: exp.Identifier, identify: str | bool = "safe") -> bool: 119 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 120 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 121 return super().can_quote(identifier, identify) and not ( 122 isinstance(identifier.parent, exp.Table) 123 and not identifier.quoted 124 and identifier.name.lower() == "dual" 125 ) 126 127 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 128 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 129 SINGLE_TOKENS.pop("$") 130 131 Parser = SnowflakeParser 132 133 class Tokenizer(tokens.Tokenizer): 134 STRING_ESCAPES = ["\\", "'"] 135 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 136 RAW_STRINGS = ["$$"] 137 COMMENTS = ["--", "//", ("/*", "*/")] 138 NESTED_COMMENTS = False 139 140 KEYWORDS = { 141 **tokens.Tokenizer.KEYWORDS, 142 "BYTEINT": TokenType.INT, 143 "FILE://": TokenType.URI_START, 144 "FILE FORMAT": TokenType.FILE_FORMAT, 145 "GET": TokenType.GET, 146 "INTEGRATION": TokenType.INTEGRATION, 147 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 148 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 149 "MINUS": TokenType.EXCEPT, 150 "NCHAR VARYING": TokenType.VARCHAR, 151 "PACKAGE": TokenType.PACKAGE, 152 "POLICY": TokenType.POLICY, 153 "POOL": TokenType.POOL, 154 "PUT": TokenType.PUT, 155 "REMOVE": TokenType.COMMAND, 156 "RM": TokenType.COMMAND, 157 "ROLE": TokenType.ROLE, 158 "RULE": TokenType.RULE, 159 "SAMPLE": TokenType.TABLE_SAMPLE, 160 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 161 "SQL_DOUBLE": TokenType.DOUBLE, 162 "SQL_VARCHAR": TokenType.VARCHAR, 163 "STAGE": TokenType.STAGE, 164 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 165 "STREAMLIT": TokenType.STREAMLIT, 166 "TAG": TokenType.TAG, 167 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 168 "TOP": TokenType.TOP, 169 "VOLUME": TokenType.VOLUME, 170 "WAREHOUSE": TokenType.WAREHOUSE, 171 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#float 172 # FLOAT is a synonym for DOUBLE in Snowflake 173 "FLOAT": TokenType.DOUBLE, 174 } 175 KEYWORDS.pop("/*+") 176 177 SINGLE_TOKENS = { 178 **tokens.Tokenizer.SINGLE_TOKENS, 179 "$": TokenType.PARAMETER, 180 "!": TokenType.EXCLAMATION, 181 } 182 183 VAR_SINGLE_TOKENS = {"$"} 184 185 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 186 187 Generator = SnowflakeGenerator
17class Snowflake(Dialect): 18 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 19 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 20 NULL_ORDERING = "nulls_are_large" 21 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 22 SUPPORTS_USER_DEFINED_TYPES = False 23 PREFER_CTE_ALIAS_COLUMN = True 24 TABLESAMPLE_SIZE_IS_PERCENT = True 25 COPY_PARAMS_ARE_CSV = False 26 ARRAY_AGG_INCLUDES_NULLS = None 27 ARRAY_FUNCS_PROPAGATES_NULLS = True 28 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = False 29 TRY_CAST_REQUIRES_STRING = True 30 SUPPORTS_ALIAS_REFS_IN_JOIN_CONDITIONS = True 31 LEAST_GREATEST_IGNORES_NULLS = False 32 UUID_IS_STRING_TYPE = True 33 34 EXPRESSION_METADATA = EXPRESSION_METADATA.copy() 35 36 # https://docs.snowflake.com/en/en/sql-reference/functions/initcap 37 INITCAP_DEFAULT_DELIMITER_CHARS = ' \t\n\r\f\v!?@"^#$&~_,.:;+\\-*%/|\\[\\](){}<>' 38 39 INVERSE_TIME_MAPPING = { 40 "T": "T", # in TIME_MAPPING we map '"T"' with the double quotes to 'T', and we want to prevent 'T' from being mapped back to '"T"' so that 'AUTO' doesn't become 'AU"T"O' 41 } 42 43 TIME_MAPPING = { 44 "YYYY": "%Y", 45 "yyyy": "%Y", 46 "YY": "%y", 47 "yy": "%y", 48 "MMMM": "%B", 49 "mmmm": "%B", 50 "MON": "%b", 51 "mon": "%b", 52 "MM": "%m", 53 "mm": "%m", 54 "DD": "%d", 55 "dd": "%-d", 56 "DY": "%a", 57 "dy": "%w", 58 "HH24": "%H", 59 "hh24": "%H", 60 "HH12": "%I", 61 "hh12": "%I", 62 "MI": "%M", 63 "mi": "%M", 64 "SS": "%S", 65 "ss": "%S", 66 "FF": "%f_nine", # %f_ internal representation with precision specified 67 "ff": "%f_nine", 68 "FF0": "%f_zero", 69 "ff0": "%f_zero", 70 "FF1": "%f_one", 71 "ff1": "%f_one", 72 "FF2": "%f_two", 73 "ff2": "%f_two", 74 "FF3": "%f_three", 75 "ff3": "%f_three", 76 "FF4": "%f_four", 77 "ff4": "%f_four", 78 "FF5": "%f_five", 79 "ff5": "%f_five", 80 "FF6": "%f", 81 "ff6": "%f", 82 "FF7": "%f_seven", 83 "ff7": "%f_seven", 84 "FF8": "%f_eight", 85 "ff8": "%f_eight", 86 "FF9": "%f_nine", 87 "ff9": "%f_nine", 88 "TZHTZM": "%z", 89 "tzhtzm": "%z", 90 "TZH:TZM": "%:z", # internal representation for ±HH:MM 91 "tzh:tzm": "%:z", 92 "TZH": "%-z", # internal representation ±HH 93 "tzh": "%-z", 94 '"T"': "T", # remove the optional double quotes around the separator between the date and time 95 # Seems like Snowflake treats AM/PM in the format string as equivalent, 96 # only the time (stamp) value's AM/PM affects the output 97 "AM": "%p", 98 "am": "%p", 99 "PM": "%p", 100 "pm": "%p", 101 } 102 103 DATE_PART_MAPPING = { 104 **Dialect.DATE_PART_MAPPING, 105 "ISOWEEK": "WEEKISO", 106 # The base Dialect maps EPOCH_SECOND -> EPOCH, but we need to preserve 107 # EPOCH_SECOND as a distinct value for two reasons: 108 # 1. Type annotation: EPOCH_SECOND returns BIGINT, while EPOCH returns DOUBLE 109 # 2. Transpilation: DuckDB's EPOCH() returns float, so we cast EPOCH_SECOND 110 # to BIGINT to match Snowflake's integer behavior 111 # Without this override, EXTRACT(EPOCH_SECOND FROM ts) would be normalized 112 # to EXTRACT(EPOCH FROM ts) and lose the integer semantics. 113 "EPOCH_SECOND": "EPOCH_SECOND", 114 "EPOCH_SECONDS": "EPOCH_SECOND", 115 } 116 117 PSEUDOCOLUMNS = {"LEVEL"} 118 119 def can_quote(self, identifier: exp.Identifier, identify: str | bool = "safe") -> bool: 120 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 121 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 122 return super().can_quote(identifier, identify) and not ( 123 isinstance(identifier.parent, exp.Table) 124 and not identifier.quoted 125 and identifier.name.lower() == "dual" 126 ) 127 128 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 129 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 130 SINGLE_TOKENS.pop("$") 131 132 Parser = SnowflakeParser 133 134 class Tokenizer(tokens.Tokenizer): 135 STRING_ESCAPES = ["\\", "'"] 136 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 137 RAW_STRINGS = ["$$"] 138 COMMENTS = ["--", "//", ("/*", "*/")] 139 NESTED_COMMENTS = False 140 141 KEYWORDS = { 142 **tokens.Tokenizer.KEYWORDS, 143 "BYTEINT": TokenType.INT, 144 "FILE://": TokenType.URI_START, 145 "FILE FORMAT": TokenType.FILE_FORMAT, 146 "GET": TokenType.GET, 147 "INTEGRATION": TokenType.INTEGRATION, 148 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 149 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 150 "MINUS": TokenType.EXCEPT, 151 "NCHAR VARYING": TokenType.VARCHAR, 152 "PACKAGE": TokenType.PACKAGE, 153 "POLICY": TokenType.POLICY, 154 "POOL": TokenType.POOL, 155 "PUT": TokenType.PUT, 156 "REMOVE": TokenType.COMMAND, 157 "RM": TokenType.COMMAND, 158 "ROLE": TokenType.ROLE, 159 "RULE": TokenType.RULE, 160 "SAMPLE": TokenType.TABLE_SAMPLE, 161 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 162 "SQL_DOUBLE": TokenType.DOUBLE, 163 "SQL_VARCHAR": TokenType.VARCHAR, 164 "STAGE": TokenType.STAGE, 165 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 166 "STREAMLIT": TokenType.STREAMLIT, 167 "TAG": TokenType.TAG, 168 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 169 "TOP": TokenType.TOP, 170 "VOLUME": TokenType.VOLUME, 171 "WAREHOUSE": TokenType.WAREHOUSE, 172 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#float 173 # FLOAT is a synonym for DOUBLE in Snowflake 174 "FLOAT": TokenType.DOUBLE, 175 } 176 KEYWORDS.pop("/*+") 177 178 SINGLE_TOKENS = { 179 **tokens.Tokenizer.SINGLE_TOKENS, 180 "$": TokenType.PARAMETER, 181 "!": TokenType.EXCLAMATION, 182 } 183 184 VAR_SINGLE_TOKENS = {"$"} 185 186 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 187 188 Generator = SnowflakeGenerator
Specifies the strategy according to which identifiers should be normalized.
Default NULL ordering method to use if not explicitly set.
Possible values: "nulls_are_small", "nulls_are_large", "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Whether Array update functions return NULL when the input array is NULL.
Whether alias references are allowed in JOIN ... ON clauses.
Most dialects do not support this, but Snowflake allows alias expansion in the JOIN ... ON clause (and almost everywhere else)
For example, in Snowflake: SELECT a.id AS user_id FROM a JOIN b ON user_id = b.id -- VALID
Reference: sqlglot.dialects.snowflake.com/en/sql-reference/sql/select#usage-notes">https://docssqlglot.dialects.snowflake.com/en/sql-reference/sql/select#usage-notes
Whether LEAST/GREATEST functions ignore NULL values, e.g:
- BigQuery, Snowflake, MySQL, Presto/Trino: LEAST(1, NULL, 2) -> NULL
- Spark, Postgres, DuckDB, TSQL: LEAST(1, NULL, 2) -> 1
Associates this dialect's time formats with their equivalent Python strftime formats.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT * queries.
119 def can_quote(self, identifier: exp.Identifier, identify: str | bool = "safe") -> bool: 120 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 121 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 122 return super().can_quote(identifier, identify) and not ( 123 isinstance(identifier.parent, exp.Table) 124 and not identifier.quoted 125 and identifier.name.lower() == "dual" 126 )
Checks if an identifier can be quoted
Arguments:
- identifier: The identifier to check.
- identify:
True: Always returnsTrueexcept for certain cases."safe": Only returnsTrueif the identifier is case-insensitive."unsafe": Only returnsTrueif the identifier is case-sensitive.
Returns:
Whether the given text can be identified.
Mapping of an escaped sequence (\n) to its unescaped version (
).
Whether string literals support escape sequences (e.g. \n). Set by the metaclass based on the tokenizer's STRING_ESCAPES.
Whether byte string literals support escape sequences. Set by the metaclass based on the tokenizer's BYTE_STRING_ESCAPES.
128 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 129 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 130 SINGLE_TOKENS.pop("$")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- VAR_SINGLE_TOKENS
- ESCAPE_FOLLOW_CHARS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- tokenize
- sql
- size
- tokens
134 class Tokenizer(tokens.Tokenizer): 135 STRING_ESCAPES = ["\\", "'"] 136 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 137 RAW_STRINGS = ["$$"] 138 COMMENTS = ["--", "//", ("/*", "*/")] 139 NESTED_COMMENTS = False 140 141 KEYWORDS = { 142 **tokens.Tokenizer.KEYWORDS, 143 "BYTEINT": TokenType.INT, 144 "FILE://": TokenType.URI_START, 145 "FILE FORMAT": TokenType.FILE_FORMAT, 146 "GET": TokenType.GET, 147 "INTEGRATION": TokenType.INTEGRATION, 148 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 149 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 150 "MINUS": TokenType.EXCEPT, 151 "NCHAR VARYING": TokenType.VARCHAR, 152 "PACKAGE": TokenType.PACKAGE, 153 "POLICY": TokenType.POLICY, 154 "POOL": TokenType.POOL, 155 "PUT": TokenType.PUT, 156 "REMOVE": TokenType.COMMAND, 157 "RM": TokenType.COMMAND, 158 "ROLE": TokenType.ROLE, 159 "RULE": TokenType.RULE, 160 "SAMPLE": TokenType.TABLE_SAMPLE, 161 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 162 "SQL_DOUBLE": TokenType.DOUBLE, 163 "SQL_VARCHAR": TokenType.VARCHAR, 164 "STAGE": TokenType.STAGE, 165 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 166 "STREAMLIT": TokenType.STREAMLIT, 167 "TAG": TokenType.TAG, 168 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 169 "TOP": TokenType.TOP, 170 "VOLUME": TokenType.VOLUME, 171 "WAREHOUSE": TokenType.WAREHOUSE, 172 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#float 173 # FLOAT is a synonym for DOUBLE in Snowflake 174 "FLOAT": TokenType.DOUBLE, 175 } 176 KEYWORDS.pop("/*+") 177 178 SINGLE_TOKENS = { 179 **tokens.Tokenizer.SINGLE_TOKENS, 180 "$": TokenType.PARAMETER, 181 "!": TokenType.EXCLAMATION, 182 } 183 184 VAR_SINGLE_TOKENS = {"$"} 185 186 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- ESCAPE_FOLLOW_CHARS
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- NUMBERS_CAN_HAVE_DECIMALS
- dialect
- tokenize
- sql
- size
- tokens