sqlglot.dialects.snowflake
1from __future__ import annotations 2 3from sqlglot import exp, jsonpath, tokens 4from sqlglot.dialects.dialect import ( 5 Dialect, 6 NormalizationStrategy, 7) 8from sqlglot.generators.snowflake import SnowflakeGenerator 9from sqlglot.parsers.snowflake import ( 10 SnowflakeParser, 11) 12from sqlglot.tokens import TokenType 13from sqlglot.typing.snowflake import EXPRESSION_METADATA 14 15 16class Snowflake(Dialect): 17 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 18 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 19 NULL_ORDERING = "nulls_are_large" 20 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 21 SUPPORTS_USER_DEFINED_TYPES = False 22 PREFER_CTE_ALIAS_COLUMN = True 23 TABLESAMPLE_SIZE_IS_PERCENT = True 24 COPY_PARAMS_ARE_CSV = False 25 ARRAY_AGG_INCLUDES_NULLS = None 26 ARRAY_FUNCS_PROPAGATES_NULLS = True 27 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = False 28 TRY_CAST_REQUIRES_STRING = True 29 SUPPORTS_ALIAS_REFS_IN_JOIN_CONDITIONS = True 30 LEAST_GREATEST_IGNORES_NULLS = False 31 32 EXPRESSION_METADATA = EXPRESSION_METADATA.copy() 33 34 # https://docs.snowflake.com/en/en/sql-reference/functions/initcap 35 INITCAP_DEFAULT_DELIMITER_CHARS = ' \t\n\r\f\v!?@"^#$&~_,.:;+\\-*%/|\\[\\](){}<>' 36 37 INVERSE_TIME_MAPPING = { 38 "T": "T", # in TIME_MAPPING we map '"T"' with the double quotes to 'T', and we want to prevent 'T' from being mapped back to '"T"' so that 'AUTO' doesn't become 'AU"T"O' 39 } 40 41 TIME_MAPPING = { 42 "YYYY": "%Y", 43 "yyyy": "%Y", 44 "YY": "%y", 45 "yy": "%y", 46 "MMMM": "%B", 47 "mmmm": "%B", 48 "MON": "%b", 49 "mon": "%b", 50 "MM": "%m", 51 "mm": "%m", 52 "DD": "%d", 53 "dd": "%-d", 54 "DY": "%a", 55 "dy": "%w", 56 "HH24": "%H", 57 "hh24": "%H", 58 "HH12": "%I", 59 "hh12": "%I", 60 "MI": "%M", 61 "mi": "%M", 62 "SS": "%S", 63 "ss": "%S", 64 "FF": "%f_nine", # %f_ internal representation with precision specified 65 "ff": "%f_nine", 66 "FF0": "%f_zero", 67 "ff0": "%f_zero", 68 "FF1": "%f_one", 69 "ff1": "%f_one", 70 "FF2": "%f_two", 71 "ff2": "%f_two", 72 "FF3": "%f_three", 73 "ff3": "%f_three", 74 "FF4": "%f_four", 75 "ff4": "%f_four", 76 "FF5": "%f_five", 77 "ff5": "%f_five", 78 "FF6": "%f", 79 "ff6": "%f", 80 "FF7": "%f_seven", 81 "ff7": "%f_seven", 82 "FF8": "%f_eight", 83 "ff8": "%f_eight", 84 "FF9": "%f_nine", 85 "ff9": "%f_nine", 86 "TZHTZM": "%z", 87 "tzhtzm": "%z", 88 "TZH:TZM": "%:z", # internal representation for ±HH:MM 89 "tzh:tzm": "%:z", 90 "TZH": "%-z", # internal representation ±HH 91 "tzh": "%-z", 92 '"T"': "T", # remove the optional double quotes around the separator between the date and time 93 # Seems like Snowflake treats AM/PM in the format string as equivalent, 94 # only the time (stamp) value's AM/PM affects the output 95 "AM": "%p", 96 "am": "%p", 97 "PM": "%p", 98 "pm": "%p", 99 } 100 101 DATE_PART_MAPPING = { 102 **Dialect.DATE_PART_MAPPING, 103 "ISOWEEK": "WEEKISO", 104 # The base Dialect maps EPOCH_SECOND -> EPOCH, but we need to preserve 105 # EPOCH_SECOND as a distinct value for two reasons: 106 # 1. Type annotation: EPOCH_SECOND returns BIGINT, while EPOCH returns DOUBLE 107 # 2. Transpilation: DuckDB's EPOCH() returns float, so we cast EPOCH_SECOND 108 # to BIGINT to match Snowflake's integer behavior 109 # Without this override, EXTRACT(EPOCH_SECOND FROM ts) would be normalized 110 # to EXTRACT(EPOCH FROM ts) and lose the integer semantics. 111 "EPOCH_SECOND": "EPOCH_SECOND", 112 "EPOCH_SECONDS": "EPOCH_SECOND", 113 } 114 115 PSEUDOCOLUMNS = {"LEVEL"} 116 117 def can_quote(self, identifier: exp.Identifier, identify: str | bool = "safe") -> bool: 118 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 119 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 120 return super().can_quote(identifier, identify) and not ( 121 isinstance(identifier.parent, exp.Table) 122 and not identifier.quoted 123 and identifier.name.lower() == "dual" 124 ) 125 126 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 127 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 128 SINGLE_TOKENS.pop("$") 129 130 Parser = SnowflakeParser 131 132 class Tokenizer(tokens.Tokenizer): 133 STRING_ESCAPES = ["\\", "'"] 134 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 135 RAW_STRINGS = ["$$"] 136 COMMENTS = ["--", "//", ("/*", "*/")] 137 NESTED_COMMENTS = False 138 139 KEYWORDS = { 140 **tokens.Tokenizer.KEYWORDS, 141 "BYTEINT": TokenType.INT, 142 "FILE://": TokenType.URI_START, 143 "FILE FORMAT": TokenType.FILE_FORMAT, 144 "GET": TokenType.GET, 145 "INTEGRATION": TokenType.INTEGRATION, 146 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 147 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 148 "MINUS": TokenType.EXCEPT, 149 "NCHAR VARYING": TokenType.VARCHAR, 150 "PACKAGE": TokenType.PACKAGE, 151 "POLICY": TokenType.POLICY, 152 "POOL": TokenType.POOL, 153 "PUT": TokenType.PUT, 154 "REMOVE": TokenType.COMMAND, 155 "RM": TokenType.COMMAND, 156 "ROLE": TokenType.ROLE, 157 "RULE": TokenType.RULE, 158 "SAMPLE": TokenType.TABLE_SAMPLE, 159 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 160 "SQL_DOUBLE": TokenType.DOUBLE, 161 "SQL_VARCHAR": TokenType.VARCHAR, 162 "STAGE": TokenType.STAGE, 163 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 164 "STREAMLIT": TokenType.STREAMLIT, 165 "TAG": TokenType.TAG, 166 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 167 "TOP": TokenType.TOP, 168 "VOLUME": TokenType.VOLUME, 169 "WAREHOUSE": TokenType.WAREHOUSE, 170 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#float 171 # FLOAT is a synonym for DOUBLE in Snowflake 172 "FLOAT": TokenType.DOUBLE, 173 } 174 KEYWORDS.pop("/*+") 175 176 SINGLE_TOKENS = { 177 **tokens.Tokenizer.SINGLE_TOKENS, 178 "$": TokenType.PARAMETER, 179 "!": TokenType.EXCLAMATION, 180 } 181 182 VAR_SINGLE_TOKENS = {"$"} 183 184 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 185 186 Generator = SnowflakeGenerator
17class Snowflake(Dialect): 18 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 19 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 20 NULL_ORDERING = "nulls_are_large" 21 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 22 SUPPORTS_USER_DEFINED_TYPES = False 23 PREFER_CTE_ALIAS_COLUMN = True 24 TABLESAMPLE_SIZE_IS_PERCENT = True 25 COPY_PARAMS_ARE_CSV = False 26 ARRAY_AGG_INCLUDES_NULLS = None 27 ARRAY_FUNCS_PROPAGATES_NULLS = True 28 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = False 29 TRY_CAST_REQUIRES_STRING = True 30 SUPPORTS_ALIAS_REFS_IN_JOIN_CONDITIONS = True 31 LEAST_GREATEST_IGNORES_NULLS = False 32 33 EXPRESSION_METADATA = EXPRESSION_METADATA.copy() 34 35 # https://docs.snowflake.com/en/en/sql-reference/functions/initcap 36 INITCAP_DEFAULT_DELIMITER_CHARS = ' \t\n\r\f\v!?@"^#$&~_,.:;+\\-*%/|\\[\\](){}<>' 37 38 INVERSE_TIME_MAPPING = { 39 "T": "T", # in TIME_MAPPING we map '"T"' with the double quotes to 'T', and we want to prevent 'T' from being mapped back to '"T"' so that 'AUTO' doesn't become 'AU"T"O' 40 } 41 42 TIME_MAPPING = { 43 "YYYY": "%Y", 44 "yyyy": "%Y", 45 "YY": "%y", 46 "yy": "%y", 47 "MMMM": "%B", 48 "mmmm": "%B", 49 "MON": "%b", 50 "mon": "%b", 51 "MM": "%m", 52 "mm": "%m", 53 "DD": "%d", 54 "dd": "%-d", 55 "DY": "%a", 56 "dy": "%w", 57 "HH24": "%H", 58 "hh24": "%H", 59 "HH12": "%I", 60 "hh12": "%I", 61 "MI": "%M", 62 "mi": "%M", 63 "SS": "%S", 64 "ss": "%S", 65 "FF": "%f_nine", # %f_ internal representation with precision specified 66 "ff": "%f_nine", 67 "FF0": "%f_zero", 68 "ff0": "%f_zero", 69 "FF1": "%f_one", 70 "ff1": "%f_one", 71 "FF2": "%f_two", 72 "ff2": "%f_two", 73 "FF3": "%f_three", 74 "ff3": "%f_three", 75 "FF4": "%f_four", 76 "ff4": "%f_four", 77 "FF5": "%f_five", 78 "ff5": "%f_five", 79 "FF6": "%f", 80 "ff6": "%f", 81 "FF7": "%f_seven", 82 "ff7": "%f_seven", 83 "FF8": "%f_eight", 84 "ff8": "%f_eight", 85 "FF9": "%f_nine", 86 "ff9": "%f_nine", 87 "TZHTZM": "%z", 88 "tzhtzm": "%z", 89 "TZH:TZM": "%:z", # internal representation for ±HH:MM 90 "tzh:tzm": "%:z", 91 "TZH": "%-z", # internal representation ±HH 92 "tzh": "%-z", 93 '"T"': "T", # remove the optional double quotes around the separator between the date and time 94 # Seems like Snowflake treats AM/PM in the format string as equivalent, 95 # only the time (stamp) value's AM/PM affects the output 96 "AM": "%p", 97 "am": "%p", 98 "PM": "%p", 99 "pm": "%p", 100 } 101 102 DATE_PART_MAPPING = { 103 **Dialect.DATE_PART_MAPPING, 104 "ISOWEEK": "WEEKISO", 105 # The base Dialect maps EPOCH_SECOND -> EPOCH, but we need to preserve 106 # EPOCH_SECOND as a distinct value for two reasons: 107 # 1. Type annotation: EPOCH_SECOND returns BIGINT, while EPOCH returns DOUBLE 108 # 2. Transpilation: DuckDB's EPOCH() returns float, so we cast EPOCH_SECOND 109 # to BIGINT to match Snowflake's integer behavior 110 # Without this override, EXTRACT(EPOCH_SECOND FROM ts) would be normalized 111 # to EXTRACT(EPOCH FROM ts) and lose the integer semantics. 112 "EPOCH_SECOND": "EPOCH_SECOND", 113 "EPOCH_SECONDS": "EPOCH_SECOND", 114 } 115 116 PSEUDOCOLUMNS = {"LEVEL"} 117 118 def can_quote(self, identifier: exp.Identifier, identify: str | bool = "safe") -> bool: 119 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 120 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 121 return super().can_quote(identifier, identify) and not ( 122 isinstance(identifier.parent, exp.Table) 123 and not identifier.quoted 124 and identifier.name.lower() == "dual" 125 ) 126 127 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 128 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 129 SINGLE_TOKENS.pop("$") 130 131 Parser = SnowflakeParser 132 133 class Tokenizer(tokens.Tokenizer): 134 STRING_ESCAPES = ["\\", "'"] 135 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 136 RAW_STRINGS = ["$$"] 137 COMMENTS = ["--", "//", ("/*", "*/")] 138 NESTED_COMMENTS = False 139 140 KEYWORDS = { 141 **tokens.Tokenizer.KEYWORDS, 142 "BYTEINT": TokenType.INT, 143 "FILE://": TokenType.URI_START, 144 "FILE FORMAT": TokenType.FILE_FORMAT, 145 "GET": TokenType.GET, 146 "INTEGRATION": TokenType.INTEGRATION, 147 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 148 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 149 "MINUS": TokenType.EXCEPT, 150 "NCHAR VARYING": TokenType.VARCHAR, 151 "PACKAGE": TokenType.PACKAGE, 152 "POLICY": TokenType.POLICY, 153 "POOL": TokenType.POOL, 154 "PUT": TokenType.PUT, 155 "REMOVE": TokenType.COMMAND, 156 "RM": TokenType.COMMAND, 157 "ROLE": TokenType.ROLE, 158 "RULE": TokenType.RULE, 159 "SAMPLE": TokenType.TABLE_SAMPLE, 160 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 161 "SQL_DOUBLE": TokenType.DOUBLE, 162 "SQL_VARCHAR": TokenType.VARCHAR, 163 "STAGE": TokenType.STAGE, 164 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 165 "STREAMLIT": TokenType.STREAMLIT, 166 "TAG": TokenType.TAG, 167 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 168 "TOP": TokenType.TOP, 169 "VOLUME": TokenType.VOLUME, 170 "WAREHOUSE": TokenType.WAREHOUSE, 171 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#float 172 # FLOAT is a synonym for DOUBLE in Snowflake 173 "FLOAT": TokenType.DOUBLE, 174 } 175 KEYWORDS.pop("/*+") 176 177 SINGLE_TOKENS = { 178 **tokens.Tokenizer.SINGLE_TOKENS, 179 "$": TokenType.PARAMETER, 180 "!": TokenType.EXCLAMATION, 181 } 182 183 VAR_SINGLE_TOKENS = {"$"} 184 185 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 186 187 Generator = SnowflakeGenerator
Specifies the strategy according to which identifiers should be normalized.
Default NULL ordering method to use if not explicitly set.
Possible values: "nulls_are_small", "nulls_are_large", "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Whether Array update functions return NULL when the input array is NULL.
Whether alias references are allowed in JOIN ... ON clauses.
Most dialects do not support this, but Snowflake allows alias expansion in the JOIN ... ON clause (and almost everywhere else)
For example, in Snowflake: SELECT a.id AS user_id FROM a JOIN b ON user_id = b.id -- VALID
Reference: sqlglot.dialects.snowflake.com/en/sql-reference/sql/select#usage-notes">https://docssqlglot.dialects.snowflake.com/en/sql-reference/sql/select#usage-notes
Whether LEAST/GREATEST functions ignore NULL values, e.g:
- BigQuery, Snowflake, MySQL, Presto/Trino: LEAST(1, NULL, 2) -> NULL
- Spark, Postgres, DuckDB, TSQL: LEAST(1, NULL, 2) -> 1
Associates this dialect's time formats with their equivalent Python strftime formats.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT * queries.
118 def can_quote(self, identifier: exp.Identifier, identify: str | bool = "safe") -> bool: 119 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 120 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 121 return super().can_quote(identifier, identify) and not ( 122 isinstance(identifier.parent, exp.Table) 123 and not identifier.quoted 124 and identifier.name.lower() == "dual" 125 )
Checks if an identifier can be quoted
Arguments:
- identifier: The identifier to check.
- identify:
True: Always returnsTrueexcept for certain cases."safe": Only returnsTrueif the identifier is case-insensitive."unsafe": Only returnsTrueif the identifier is case-sensitive.
Returns:
Whether the given text can be identified.
Mapping of an escaped sequence (\n) to its unescaped version (
).
Whether string literals support escape sequences (e.g. \n). Set by the metaclass based on the tokenizer's STRING_ESCAPES.
Whether byte string literals support escape sequences. Set by the metaclass based on the tokenizer's BYTE_STRING_ESCAPES.
127 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 128 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 129 SINGLE_TOKENS.pop("$")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- VAR_SINGLE_TOKENS
- ESCAPE_FOLLOW_CHARS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- tokenize
- sql
- size
- tokens
133 class Tokenizer(tokens.Tokenizer): 134 STRING_ESCAPES = ["\\", "'"] 135 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 136 RAW_STRINGS = ["$$"] 137 COMMENTS = ["--", "//", ("/*", "*/")] 138 NESTED_COMMENTS = False 139 140 KEYWORDS = { 141 **tokens.Tokenizer.KEYWORDS, 142 "BYTEINT": TokenType.INT, 143 "FILE://": TokenType.URI_START, 144 "FILE FORMAT": TokenType.FILE_FORMAT, 145 "GET": TokenType.GET, 146 "INTEGRATION": TokenType.INTEGRATION, 147 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 148 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 149 "MINUS": TokenType.EXCEPT, 150 "NCHAR VARYING": TokenType.VARCHAR, 151 "PACKAGE": TokenType.PACKAGE, 152 "POLICY": TokenType.POLICY, 153 "POOL": TokenType.POOL, 154 "PUT": TokenType.PUT, 155 "REMOVE": TokenType.COMMAND, 156 "RM": TokenType.COMMAND, 157 "ROLE": TokenType.ROLE, 158 "RULE": TokenType.RULE, 159 "SAMPLE": TokenType.TABLE_SAMPLE, 160 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 161 "SQL_DOUBLE": TokenType.DOUBLE, 162 "SQL_VARCHAR": TokenType.VARCHAR, 163 "STAGE": TokenType.STAGE, 164 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 165 "STREAMLIT": TokenType.STREAMLIT, 166 "TAG": TokenType.TAG, 167 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 168 "TOP": TokenType.TOP, 169 "VOLUME": TokenType.VOLUME, 170 "WAREHOUSE": TokenType.WAREHOUSE, 171 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#float 172 # FLOAT is a synonym for DOUBLE in Snowflake 173 "FLOAT": TokenType.DOUBLE, 174 } 175 KEYWORDS.pop("/*+") 176 177 SINGLE_TOKENS = { 178 **tokens.Tokenizer.SINGLE_TOKENS, 179 "$": TokenType.PARAMETER, 180 "!": TokenType.EXCLAMATION, 181 } 182 183 VAR_SINGLE_TOKENS = {"$"} 184 185 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- ESCAPE_FOLLOW_CHARS
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- NUMBERS_CAN_HAVE_DECIMALS
- dialect
- tokenize
- sql
- size
- tokens