sqlglot.dialects.duckdb
1from __future__ import annotations 2 3 4from sqlglot import exp, tokens 5 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9) 10from sqlglot.generators.duckdb import DuckDBGenerator, WS_CONTROL_CHARS_TO_DUCK # noqa: F401 11from sqlglot.parsers.duckdb import DuckDBParser 12from sqlglot.tokens import TokenType 13from sqlglot.typing.duckdb import EXPRESSION_METADATA 14 15 16class DuckDB(Dialect): 17 NULL_ORDERING = "nulls_are_last" 18 SUPPORTS_USER_DEFINED_TYPES = True 19 SAFE_DIVISION = True 20 INDEX_OFFSET = 1 21 CONCAT_COALESCE = True 22 CONCAT_WS_COALESCE = True 23 SUPPORTS_ORDER_BY_ALL = True 24 SUPPORTS_LIMIT_ALL = True 25 SUPPORTS_FIXED_SIZE_ARRAYS = True 26 STRICT_JSON_PATH_SYNTAX = False 27 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 28 UUID_IS_STRING_TYPE = False 29 30 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 31 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 32 33 DATE_PART_MAPPING = { 34 **Dialect.DATE_PART_MAPPING, 35 "DAYOFWEEKISO": "ISODOW", 36 } 37 38 EXPRESSION_METADATA = EXPRESSION_METADATA.copy() 39 40 DATE_PART_MAPPING.pop("WEEKDAY") 41 42 INVERSE_TIME_MAPPING = { 43 "%e": "%-d", # BigQuery's space-padded day (%e) -> DuckDB's no-padding day (%-d) 44 "%:z": "%z", # In DuckDB %z can represent +/-HH:MM, +/-HHMM, or +/-HH. 45 "%-z": "%z", 46 "%f_zero": "%n", 47 "%f_one": "%n", 48 "%f_two": "%n", 49 "%f_three": "%g", 50 "%f_four": "%n", 51 "%f_five": "%n", 52 "%f_seven": "%n", 53 "%f_eight": "%n", 54 "%f_nine": "%n", 55 } 56 57 def to_json_path(self, path: exp.Expr | None) -> exp.Expr | None: 58 if isinstance(path, exp.Literal): 59 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 60 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 61 # This check ensures we'll avoid trying to parse these as JSON paths, which can 62 # either result in a noisy warning or in an invalid representation of the path. 63 path_text = path.name 64 if path_text.startswith("/") or "[#" in path_text: 65 return path 66 67 return super().to_json_path(path) 68 69 class Tokenizer(tokens.Tokenizer): 70 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 71 BYTE_STRING_ESCAPES = ["'", "\\"] 72 HEREDOC_STRINGS = ["$"] 73 74 HEREDOC_TAG_IS_IDENTIFIER = True 75 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 76 77 KEYWORDS = { 78 **tokens.Tokenizer.KEYWORDS, 79 "//": TokenType.DIV, 80 "**": TokenType.DSTAR, 81 "^@": TokenType.CARET_AT, 82 "@>": TokenType.AT_GT, 83 "<@": TokenType.LT_AT, 84 "ATTACH": TokenType.ATTACH, 85 "BINARY": TokenType.VARBINARY, 86 "BITSTRING": TokenType.BIT, 87 "BPCHAR": TokenType.TEXT, 88 "CHAR": TokenType.TEXT, 89 "DATETIME": TokenType.TIMESTAMPNTZ, 90 "DETACH": TokenType.DETACH, 91 "FORCE": TokenType.FORCE, 92 "INSTALL": TokenType.INSTALL, 93 "INT8": TokenType.BIGINT, 94 "LOGICAL": TokenType.BOOLEAN, 95 "MACRO": TokenType.FUNCTION, 96 "ONLY": TokenType.ONLY, 97 "PIVOT_WIDER": TokenType.PIVOT, 98 "POSITIONAL": TokenType.POSITIONAL, 99 "RESET": TokenType.COMMAND, 100 "ROW": TokenType.STRUCT, 101 "SIGNED": TokenType.INT, 102 "STRING": TokenType.TEXT, 103 "SUMMARIZE": TokenType.SUMMARIZE, 104 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 105 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 106 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 107 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 108 "TIMESTAMP_US": TokenType.TIMESTAMP, 109 "UBIGINT": TokenType.UBIGINT, 110 "UINTEGER": TokenType.UINT, 111 "USMALLINT": TokenType.USMALLINT, 112 "UTINYINT": TokenType.UTINYINT, 113 "VARCHAR": TokenType.TEXT, 114 } 115 KEYWORDS.pop("/*+") 116 117 SINGLE_TOKENS = { 118 **tokens.Tokenizer.SINGLE_TOKENS, 119 "$": TokenType.PARAMETER, 120 } 121 122 VAR_SINGLE_TOKENS = {"$"} 123 124 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 125 126 Parser = DuckDBParser 127 128 Generator = DuckDBGenerator
17class DuckDB(Dialect): 18 NULL_ORDERING = "nulls_are_last" 19 SUPPORTS_USER_DEFINED_TYPES = True 20 SAFE_DIVISION = True 21 INDEX_OFFSET = 1 22 CONCAT_COALESCE = True 23 CONCAT_WS_COALESCE = True 24 SUPPORTS_ORDER_BY_ALL = True 25 SUPPORTS_LIMIT_ALL = True 26 SUPPORTS_FIXED_SIZE_ARRAYS = True 27 STRICT_JSON_PATH_SYNTAX = False 28 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 29 UUID_IS_STRING_TYPE = False 30 31 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 32 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 33 34 DATE_PART_MAPPING = { 35 **Dialect.DATE_PART_MAPPING, 36 "DAYOFWEEKISO": "ISODOW", 37 } 38 39 EXPRESSION_METADATA = EXPRESSION_METADATA.copy() 40 41 DATE_PART_MAPPING.pop("WEEKDAY") 42 43 INVERSE_TIME_MAPPING = { 44 "%e": "%-d", # BigQuery's space-padded day (%e) -> DuckDB's no-padding day (%-d) 45 "%:z": "%z", # In DuckDB %z can represent +/-HH:MM, +/-HHMM, or +/-HH. 46 "%-z": "%z", 47 "%f_zero": "%n", 48 "%f_one": "%n", 49 "%f_two": "%n", 50 "%f_three": "%g", 51 "%f_four": "%n", 52 "%f_five": "%n", 53 "%f_seven": "%n", 54 "%f_eight": "%n", 55 "%f_nine": "%n", 56 } 57 58 def to_json_path(self, path: exp.Expr | None) -> exp.Expr | None: 59 if isinstance(path, exp.Literal): 60 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 61 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 62 # This check ensures we'll avoid trying to parse these as JSON paths, which can 63 # either result in a noisy warning or in an invalid representation of the path. 64 path_text = path.name 65 if path_text.startswith("/") or "[#" in path_text: 66 return path 67 68 return super().to_json_path(path) 69 70 class Tokenizer(tokens.Tokenizer): 71 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 72 BYTE_STRING_ESCAPES = ["'", "\\"] 73 HEREDOC_STRINGS = ["$"] 74 75 HEREDOC_TAG_IS_IDENTIFIER = True 76 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 77 78 KEYWORDS = { 79 **tokens.Tokenizer.KEYWORDS, 80 "//": TokenType.DIV, 81 "**": TokenType.DSTAR, 82 "^@": TokenType.CARET_AT, 83 "@>": TokenType.AT_GT, 84 "<@": TokenType.LT_AT, 85 "ATTACH": TokenType.ATTACH, 86 "BINARY": TokenType.VARBINARY, 87 "BITSTRING": TokenType.BIT, 88 "BPCHAR": TokenType.TEXT, 89 "CHAR": TokenType.TEXT, 90 "DATETIME": TokenType.TIMESTAMPNTZ, 91 "DETACH": TokenType.DETACH, 92 "FORCE": TokenType.FORCE, 93 "INSTALL": TokenType.INSTALL, 94 "INT8": TokenType.BIGINT, 95 "LOGICAL": TokenType.BOOLEAN, 96 "MACRO": TokenType.FUNCTION, 97 "ONLY": TokenType.ONLY, 98 "PIVOT_WIDER": TokenType.PIVOT, 99 "POSITIONAL": TokenType.POSITIONAL, 100 "RESET": TokenType.COMMAND, 101 "ROW": TokenType.STRUCT, 102 "SIGNED": TokenType.INT, 103 "STRING": TokenType.TEXT, 104 "SUMMARIZE": TokenType.SUMMARIZE, 105 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 106 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 107 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 108 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 109 "TIMESTAMP_US": TokenType.TIMESTAMP, 110 "UBIGINT": TokenType.UBIGINT, 111 "UINTEGER": TokenType.UINT, 112 "USMALLINT": TokenType.USMALLINT, 113 "UTINYINT": TokenType.UTINYINT, 114 "VARCHAR": TokenType.TEXT, 115 } 116 KEYWORDS.pop("/*+") 117 118 SINGLE_TOKENS = { 119 **tokens.Tokenizer.SINGLE_TOKENS, 120 "$": TokenType.PARAMETER, 121 } 122 123 VAR_SINGLE_TOKENS = {"$"} 124 125 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 126 127 Parser = DuckDBParser 128 129 Generator = DuckDBGenerator
Default NULL ordering method to use if not explicitly set.
Possible values: "nulls_are_small", "nulls_are_large", "nulls_are_last"
A NULL arg in CONCAT yields NULL by default, but in some dialects it yields an empty string.
A NULL arg in CONCAT_WS yields NULL by default, but in some dialects it is skipped.
Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator.
Whether failing to parse a JSON path expression using the JSONPath dialect will log a warning.
Whether number literals can include underscores for better readability
Specifies the strategy according to which identifiers should be normalized.
58 def to_json_path(self, path: exp.Expr | None) -> exp.Expr | None: 59 if isinstance(path, exp.Literal): 60 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 61 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 62 # This check ensures we'll avoid trying to parse these as JSON paths, which can 63 # either result in a noisy warning or in an invalid representation of the path. 64 path_text = path.name 65 if path_text.startswith("/") or "[#" in path_text: 66 return path 67 68 return super().to_json_path(path)
Mapping of an escaped sequence (\n) to its unescaped version (
).
Whether string literals support escape sequences (e.g. \n). Set by the metaclass based on the tokenizer's STRING_ESCAPES.
Whether byte string literals support escape sequences. Set by the metaclass based on the tokenizer's BYTE_STRING_ESCAPES.
70 class Tokenizer(tokens.Tokenizer): 71 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 72 BYTE_STRING_ESCAPES = ["'", "\\"] 73 HEREDOC_STRINGS = ["$"] 74 75 HEREDOC_TAG_IS_IDENTIFIER = True 76 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 77 78 KEYWORDS = { 79 **tokens.Tokenizer.KEYWORDS, 80 "//": TokenType.DIV, 81 "**": TokenType.DSTAR, 82 "^@": TokenType.CARET_AT, 83 "@>": TokenType.AT_GT, 84 "<@": TokenType.LT_AT, 85 "ATTACH": TokenType.ATTACH, 86 "BINARY": TokenType.VARBINARY, 87 "BITSTRING": TokenType.BIT, 88 "BPCHAR": TokenType.TEXT, 89 "CHAR": TokenType.TEXT, 90 "DATETIME": TokenType.TIMESTAMPNTZ, 91 "DETACH": TokenType.DETACH, 92 "FORCE": TokenType.FORCE, 93 "INSTALL": TokenType.INSTALL, 94 "INT8": TokenType.BIGINT, 95 "LOGICAL": TokenType.BOOLEAN, 96 "MACRO": TokenType.FUNCTION, 97 "ONLY": TokenType.ONLY, 98 "PIVOT_WIDER": TokenType.PIVOT, 99 "POSITIONAL": TokenType.POSITIONAL, 100 "RESET": TokenType.COMMAND, 101 "ROW": TokenType.STRUCT, 102 "SIGNED": TokenType.INT, 103 "STRING": TokenType.TEXT, 104 "SUMMARIZE": TokenType.SUMMARIZE, 105 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 106 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 107 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 108 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 109 "TIMESTAMP_US": TokenType.TIMESTAMP, 110 "UBIGINT": TokenType.UBIGINT, 111 "UINTEGER": TokenType.UINT, 112 "USMALLINT": TokenType.USMALLINT, 113 "UTINYINT": TokenType.UTINYINT, 114 "VARCHAR": TokenType.TEXT, 115 } 116 KEYWORDS.pop("/*+") 117 118 SINGLE_TOKENS = { 119 **tokens.Tokenizer.SINGLE_TOKENS, 120 "$": TokenType.PARAMETER, 121 } 122 123 VAR_SINGLE_TOKENS = {"$"} 124 125 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- STRING_ESCAPES
- ESCAPE_FOLLOW_CHARS
- IDENTIFIER_ESCAPES
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- NUMBERS_CAN_HAVE_DECIMALS
- COMMENTS
- dialect
- tokenize
- sql
- size
- tokens