sqlglot.dialects.duckdb
1from __future__ import annotations 2 3 4from sqlglot import exp, tokens 5 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9) 10from sqlglot.generators.duckdb import DuckDBGenerator, WS_CONTROL_CHARS_TO_DUCK # noqa: F401 11from sqlglot.parsers.duckdb import DuckDBParser 12from sqlglot.tokens import TokenType 13from sqlglot.typing.duckdb import EXPRESSION_METADATA 14 15 16class DuckDB(Dialect): 17 NULL_ORDERING = "nulls_are_last" 18 SUPPORTS_USER_DEFINED_TYPES = True 19 SAFE_DIVISION = True 20 INDEX_OFFSET = 1 21 CONCAT_COALESCE = True 22 CONCAT_WS_COALESCE = True 23 SUPPORTS_ORDER_BY_ALL = True 24 SUPPORTS_FIXED_SIZE_ARRAYS = True 25 STRICT_JSON_PATH_SYNTAX = False 26 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 27 UUID_IS_STRING_TYPE = False 28 29 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 30 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 31 32 DATE_PART_MAPPING = { 33 **Dialect.DATE_PART_MAPPING, 34 "DAYOFWEEKISO": "ISODOW", 35 } 36 37 EXPRESSION_METADATA = EXPRESSION_METADATA.copy() 38 39 DATE_PART_MAPPING.pop("WEEKDAY") 40 41 INVERSE_TIME_MAPPING = { 42 "%e": "%-d", # BigQuery's space-padded day (%e) -> DuckDB's no-padding day (%-d) 43 "%:z": "%z", # In DuckDB %z can represent +/-HH:MM, +/-HHMM, or +/-HH. 44 "%-z": "%z", 45 "%f_zero": "%n", 46 "%f_one": "%n", 47 "%f_two": "%n", 48 "%f_three": "%g", 49 "%f_four": "%n", 50 "%f_five": "%n", 51 "%f_seven": "%n", 52 "%f_eight": "%n", 53 "%f_nine": "%n", 54 } 55 56 def to_json_path(self, path: exp.Expr | None) -> exp.Expr | None: 57 if isinstance(path, exp.Literal): 58 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 59 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 60 # This check ensures we'll avoid trying to parse these as JSON paths, which can 61 # either result in a noisy warning or in an invalid representation of the path. 62 path_text = path.name 63 if path_text.startswith("/") or "[#" in path_text: 64 return path 65 66 return super().to_json_path(path) 67 68 class Tokenizer(tokens.Tokenizer): 69 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 70 BYTE_STRING_ESCAPES = ["'", "\\"] 71 HEREDOC_STRINGS = ["$"] 72 73 HEREDOC_TAG_IS_IDENTIFIER = True 74 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 75 76 KEYWORDS = { 77 **tokens.Tokenizer.KEYWORDS, 78 "//": TokenType.DIV, 79 "**": TokenType.DSTAR, 80 "^@": TokenType.CARET_AT, 81 "@>": TokenType.AT_GT, 82 "<@": TokenType.LT_AT, 83 "ATTACH": TokenType.ATTACH, 84 "BINARY": TokenType.VARBINARY, 85 "BITSTRING": TokenType.BIT, 86 "BPCHAR": TokenType.TEXT, 87 "CHAR": TokenType.TEXT, 88 "DATETIME": TokenType.TIMESTAMPNTZ, 89 "DETACH": TokenType.DETACH, 90 "FORCE": TokenType.FORCE, 91 "INSTALL": TokenType.INSTALL, 92 "INT8": TokenType.BIGINT, 93 "LOGICAL": TokenType.BOOLEAN, 94 "MACRO": TokenType.FUNCTION, 95 "ONLY": TokenType.ONLY, 96 "PIVOT_WIDER": TokenType.PIVOT, 97 "POSITIONAL": TokenType.POSITIONAL, 98 "RESET": TokenType.COMMAND, 99 "ROW": TokenType.STRUCT, 100 "SIGNED": TokenType.INT, 101 "STRING": TokenType.TEXT, 102 "SUMMARIZE": TokenType.SUMMARIZE, 103 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 104 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 105 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 106 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 107 "TIMESTAMP_US": TokenType.TIMESTAMP, 108 "UBIGINT": TokenType.UBIGINT, 109 "UINTEGER": TokenType.UINT, 110 "USMALLINT": TokenType.USMALLINT, 111 "UTINYINT": TokenType.UTINYINT, 112 "VARCHAR": TokenType.TEXT, 113 } 114 KEYWORDS.pop("/*+") 115 116 SINGLE_TOKENS = { 117 **tokens.Tokenizer.SINGLE_TOKENS, 118 "$": TokenType.PARAMETER, 119 } 120 121 VAR_SINGLE_TOKENS = {"$"} 122 123 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 124 125 Parser = DuckDBParser 126 127 Generator = DuckDBGenerator
17class DuckDB(Dialect): 18 NULL_ORDERING = "nulls_are_last" 19 SUPPORTS_USER_DEFINED_TYPES = True 20 SAFE_DIVISION = True 21 INDEX_OFFSET = 1 22 CONCAT_COALESCE = True 23 CONCAT_WS_COALESCE = True 24 SUPPORTS_ORDER_BY_ALL = True 25 SUPPORTS_FIXED_SIZE_ARRAYS = True 26 STRICT_JSON_PATH_SYNTAX = False 27 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 28 UUID_IS_STRING_TYPE = False 29 30 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 31 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 32 33 DATE_PART_MAPPING = { 34 **Dialect.DATE_PART_MAPPING, 35 "DAYOFWEEKISO": "ISODOW", 36 } 37 38 EXPRESSION_METADATA = EXPRESSION_METADATA.copy() 39 40 DATE_PART_MAPPING.pop("WEEKDAY") 41 42 INVERSE_TIME_MAPPING = { 43 "%e": "%-d", # BigQuery's space-padded day (%e) -> DuckDB's no-padding day (%-d) 44 "%:z": "%z", # In DuckDB %z can represent +/-HH:MM, +/-HHMM, or +/-HH. 45 "%-z": "%z", 46 "%f_zero": "%n", 47 "%f_one": "%n", 48 "%f_two": "%n", 49 "%f_three": "%g", 50 "%f_four": "%n", 51 "%f_five": "%n", 52 "%f_seven": "%n", 53 "%f_eight": "%n", 54 "%f_nine": "%n", 55 } 56 57 def to_json_path(self, path: exp.Expr | None) -> exp.Expr | None: 58 if isinstance(path, exp.Literal): 59 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 60 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 61 # This check ensures we'll avoid trying to parse these as JSON paths, which can 62 # either result in a noisy warning or in an invalid representation of the path. 63 path_text = path.name 64 if path_text.startswith("/") or "[#" in path_text: 65 return path 66 67 return super().to_json_path(path) 68 69 class Tokenizer(tokens.Tokenizer): 70 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 71 BYTE_STRING_ESCAPES = ["'", "\\"] 72 HEREDOC_STRINGS = ["$"] 73 74 HEREDOC_TAG_IS_IDENTIFIER = True 75 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 76 77 KEYWORDS = { 78 **tokens.Tokenizer.KEYWORDS, 79 "//": TokenType.DIV, 80 "**": TokenType.DSTAR, 81 "^@": TokenType.CARET_AT, 82 "@>": TokenType.AT_GT, 83 "<@": TokenType.LT_AT, 84 "ATTACH": TokenType.ATTACH, 85 "BINARY": TokenType.VARBINARY, 86 "BITSTRING": TokenType.BIT, 87 "BPCHAR": TokenType.TEXT, 88 "CHAR": TokenType.TEXT, 89 "DATETIME": TokenType.TIMESTAMPNTZ, 90 "DETACH": TokenType.DETACH, 91 "FORCE": TokenType.FORCE, 92 "INSTALL": TokenType.INSTALL, 93 "INT8": TokenType.BIGINT, 94 "LOGICAL": TokenType.BOOLEAN, 95 "MACRO": TokenType.FUNCTION, 96 "ONLY": TokenType.ONLY, 97 "PIVOT_WIDER": TokenType.PIVOT, 98 "POSITIONAL": TokenType.POSITIONAL, 99 "RESET": TokenType.COMMAND, 100 "ROW": TokenType.STRUCT, 101 "SIGNED": TokenType.INT, 102 "STRING": TokenType.TEXT, 103 "SUMMARIZE": TokenType.SUMMARIZE, 104 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 105 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 106 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 107 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 108 "TIMESTAMP_US": TokenType.TIMESTAMP, 109 "UBIGINT": TokenType.UBIGINT, 110 "UINTEGER": TokenType.UINT, 111 "USMALLINT": TokenType.USMALLINT, 112 "UTINYINT": TokenType.UTINYINT, 113 "VARCHAR": TokenType.TEXT, 114 } 115 KEYWORDS.pop("/*+") 116 117 SINGLE_TOKENS = { 118 **tokens.Tokenizer.SINGLE_TOKENS, 119 "$": TokenType.PARAMETER, 120 } 121 122 VAR_SINGLE_TOKENS = {"$"} 123 124 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 125 126 Parser = DuckDBParser 127 128 Generator = DuckDBGenerator
Default NULL ordering method to use if not explicitly set.
Possible values: "nulls_are_small", "nulls_are_large", "nulls_are_last"
A NULL arg in CONCAT yields NULL by default, but in some dialects it yields an empty string.
A NULL arg in CONCAT_WS yields NULL by default, but in some dialects it is skipped.
Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator.
Whether failing to parse a JSON path expression using the JSONPath dialect will log a warning.
Whether number literals can include underscores for better readability
Specifies the strategy according to which identifiers should be normalized.
57 def to_json_path(self, path: exp.Expr | None) -> exp.Expr | None: 58 if isinstance(path, exp.Literal): 59 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 60 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 61 # This check ensures we'll avoid trying to parse these as JSON paths, which can 62 # either result in a noisy warning or in an invalid representation of the path. 63 path_text = path.name 64 if path_text.startswith("/") or "[#" in path_text: 65 return path 66 67 return super().to_json_path(path)
Mapping of an escaped sequence (\n) to its unescaped version (
).
Whether string literals support escape sequences (e.g. \n). Set by the metaclass based on the tokenizer's STRING_ESCAPES.
Whether byte string literals support escape sequences. Set by the metaclass based on the tokenizer's BYTE_STRING_ESCAPES.
69 class Tokenizer(tokens.Tokenizer): 70 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 71 BYTE_STRING_ESCAPES = ["'", "\\"] 72 HEREDOC_STRINGS = ["$"] 73 74 HEREDOC_TAG_IS_IDENTIFIER = True 75 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 76 77 KEYWORDS = { 78 **tokens.Tokenizer.KEYWORDS, 79 "//": TokenType.DIV, 80 "**": TokenType.DSTAR, 81 "^@": TokenType.CARET_AT, 82 "@>": TokenType.AT_GT, 83 "<@": TokenType.LT_AT, 84 "ATTACH": TokenType.ATTACH, 85 "BINARY": TokenType.VARBINARY, 86 "BITSTRING": TokenType.BIT, 87 "BPCHAR": TokenType.TEXT, 88 "CHAR": TokenType.TEXT, 89 "DATETIME": TokenType.TIMESTAMPNTZ, 90 "DETACH": TokenType.DETACH, 91 "FORCE": TokenType.FORCE, 92 "INSTALL": TokenType.INSTALL, 93 "INT8": TokenType.BIGINT, 94 "LOGICAL": TokenType.BOOLEAN, 95 "MACRO": TokenType.FUNCTION, 96 "ONLY": TokenType.ONLY, 97 "PIVOT_WIDER": TokenType.PIVOT, 98 "POSITIONAL": TokenType.POSITIONAL, 99 "RESET": TokenType.COMMAND, 100 "ROW": TokenType.STRUCT, 101 "SIGNED": TokenType.INT, 102 "STRING": TokenType.TEXT, 103 "SUMMARIZE": TokenType.SUMMARIZE, 104 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 105 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 106 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 107 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 108 "TIMESTAMP_US": TokenType.TIMESTAMP, 109 "UBIGINT": TokenType.UBIGINT, 110 "UINTEGER": TokenType.UINT, 111 "USMALLINT": TokenType.USMALLINT, 112 "UTINYINT": TokenType.UTINYINT, 113 "VARCHAR": TokenType.TEXT, 114 } 115 KEYWORDS.pop("/*+") 116 117 SINGLE_TOKENS = { 118 **tokens.Tokenizer.SINGLE_TOKENS, 119 "$": TokenType.PARAMETER, 120 } 121 122 VAR_SINGLE_TOKENS = {"$"} 123 124 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- STRING_ESCAPES
- ESCAPE_FOLLOW_CHARS
- IDENTIFIER_ESCAPES
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- NUMBERS_CAN_HAVE_DECIMALS
- COMMENTS
- dialect
- tokenize
- sql
- size
- tokens