sqlglot.dialects.mysql
1from __future__ import annotations 2 3from sqlglot import tokens 4from sqlglot.dialects.dialect import ( 5 Dialect, 6 NormalizationStrategy, 7) 8from sqlglot.generators.mysql import MySQLGenerator 9from sqlglot.parsers.mysql import MySQLParser 10from sqlglot.tokens import TokenType 11from sqlglot.typing.mysql import EXPRESSION_METADATA 12 13 14class MySQL(Dialect): 15 PROMOTE_TO_INFERRED_DATETIME_TYPE = True 16 17 # https://dev.mysql.com/doc/refman/8.0/en/identifiers.html 18 IDENTIFIERS_CAN_START_WITH_DIGIT = True 19 20 # We default to treating all identifiers as case-sensitive, since it matches MySQL's 21 # behavior on Linux systems. For MacOS and Windows systems, one can override this 22 # setting by specifying `dialect="mysql, normalization_strategy = lowercase"`. 23 # 24 # See also https://dev.mysql.com/doc/refman/8.2/en/identifier-case-sensitivity.html 25 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 26 27 TIME_FORMAT = "'%Y-%m-%d %T'" 28 DPIPE_IS_STRING_CONCAT = False 29 SUPPORTS_USER_DEFINED_TYPES = False 30 SAFE_DIVISION = True 31 SAFE_TO_ELIMINATE_DOUBLE_NEGATION = False 32 LEAST_GREATEST_IGNORES_NULLS = False 33 34 EXPRESSION_METADATA = EXPRESSION_METADATA.copy() 35 36 # https://prestodb.io/docs/current/functions/datetime.html#mysql-date-functions 37 TIME_MAPPING = { 38 "%M": "%B", 39 "%c": "%-m", 40 "%e": "%-d", 41 "%h": "%I", 42 "%i": "%M", 43 "%s": "%S", 44 "%u": "%W", 45 "%k": "%-H", 46 "%l": "%-I", 47 "%T": "%H:%M:%S", 48 "%W": "%A", 49 } 50 51 VALID_INTERVAL_UNITS = { 52 *Dialect.VALID_INTERVAL_UNITS, 53 "SECOND_MICROSECOND", 54 "MINUTE_MICROSECOND", 55 "MINUTE_SECOND", 56 "HOUR_MICROSECOND", 57 "HOUR_SECOND", 58 "HOUR_MINUTE", 59 "DAY_MICROSECOND", 60 "DAY_SECOND", 61 "DAY_MINUTE", 62 "DAY_HOUR", 63 "YEAR_MONTH", 64 } 65 66 class Tokenizer(tokens.Tokenizer): 67 QUOTES = ["'", '"'] 68 COMMENTS = ["--", "#", ("/*", "*/")] 69 IDENTIFIERS = ["`"] 70 STRING_ESCAPES = ["'", '"', "\\"] 71 BIT_STRINGS = [("b'", "'"), ("B'", "'"), ("0b", "")] 72 HEX_STRINGS = [("x'", "'"), ("X'", "'"), ("0x", "")] 73 # https://dev.mysql.com/doc/refman/8.4/en/string-literals.html 74 ESCAPE_FOLLOW_CHARS = ["0", "b", "n", "r", "t", "Z", "%", "_"] 75 76 NESTED_COMMENTS = False 77 78 KEYWORDS = { 79 **tokens.Tokenizer.KEYWORDS, 80 "BLOB": TokenType.BLOB, 81 "CHARSET": TokenType.CHARACTER_SET, 82 "DISTINCTROW": TokenType.DISTINCT, 83 "EXPLAIN": TokenType.DESCRIBE, 84 "FORCE": TokenType.FORCE, 85 "IGNORE": TokenType.IGNORE, 86 "KEY": TokenType.KEY, 87 "LOCK TABLES": TokenType.COMMAND, 88 "LONGBLOB": TokenType.LONGBLOB, 89 "LONGTEXT": TokenType.LONGTEXT, 90 "MEDIUMBLOB": TokenType.MEDIUMBLOB, 91 "MEDIUMINT": TokenType.MEDIUMINT, 92 "MEDIUMTEXT": TokenType.MEDIUMTEXT, 93 "MEMBER OF": TokenType.MEMBER_OF, 94 "MOD": TokenType.MOD, 95 "SEPARATOR": TokenType.SEPARATOR, 96 "SERIAL": TokenType.SERIAL, 97 "SIGNED": TokenType.BIGINT, 98 "SIGNED INTEGER": TokenType.BIGINT, 99 "SOUNDS LIKE": TokenType.SOUNDS_LIKE, 100 "START": TokenType.BEGIN, 101 "TIMESTAMP": TokenType.TIMESTAMPTZ, 102 "TINYBLOB": TokenType.TINYBLOB, 103 "TINYTEXT": TokenType.TINYTEXT, 104 "UNLOCK TABLES": TokenType.COMMAND, 105 "UNSIGNED": TokenType.UBIGINT, 106 "UNSIGNED INTEGER": TokenType.UBIGINT, 107 "YEAR": TokenType.YEAR, 108 "_ARMSCII8": TokenType.INTRODUCER, 109 "_ASCII": TokenType.INTRODUCER, 110 "_BIG5": TokenType.INTRODUCER, 111 "_BINARY": TokenType.INTRODUCER, 112 "_CP1250": TokenType.INTRODUCER, 113 "_CP1251": TokenType.INTRODUCER, 114 "_CP1256": TokenType.INTRODUCER, 115 "_CP1257": TokenType.INTRODUCER, 116 "_CP850": TokenType.INTRODUCER, 117 "_CP852": TokenType.INTRODUCER, 118 "_CP866": TokenType.INTRODUCER, 119 "_CP932": TokenType.INTRODUCER, 120 "_DEC8": TokenType.INTRODUCER, 121 "_EUCJPMS": TokenType.INTRODUCER, 122 "_EUCKR": TokenType.INTRODUCER, 123 "_GB18030": TokenType.INTRODUCER, 124 "_GB2312": TokenType.INTRODUCER, 125 "_GBK": TokenType.INTRODUCER, 126 "_GEOSTD8": TokenType.INTRODUCER, 127 "_GREEK": TokenType.INTRODUCER, 128 "_HEBREW": TokenType.INTRODUCER, 129 "_HP8": TokenType.INTRODUCER, 130 "_KEYBCS2": TokenType.INTRODUCER, 131 "_KOI8R": TokenType.INTRODUCER, 132 "_KOI8U": TokenType.INTRODUCER, 133 "_LATIN1": TokenType.INTRODUCER, 134 "_LATIN2": TokenType.INTRODUCER, 135 "_LATIN5": TokenType.INTRODUCER, 136 "_LATIN7": TokenType.INTRODUCER, 137 "_MACCE": TokenType.INTRODUCER, 138 "_MACROMAN": TokenType.INTRODUCER, 139 "_SJIS": TokenType.INTRODUCER, 140 "_SWE7": TokenType.INTRODUCER, 141 "_TIS620": TokenType.INTRODUCER, 142 "_UCS2": TokenType.INTRODUCER, 143 "_UJIS": TokenType.INTRODUCER, 144 # https://dev.mysql.com/doc/refman/8.0/en/string-literals.html 145 "_UTF8": TokenType.INTRODUCER, 146 "_UTF16": TokenType.INTRODUCER, 147 "_UTF16LE": TokenType.INTRODUCER, 148 "_UTF32": TokenType.INTRODUCER, 149 "_UTF8MB3": TokenType.INTRODUCER, 150 "_UTF8MB4": TokenType.INTRODUCER, 151 "@@": TokenType.SESSION_PARAMETER, 152 } 153 154 COMMANDS = {*tokens.Tokenizer.COMMANDS, TokenType.REPLACE} - {TokenType.SHOW} 155 156 Parser = MySQLParser 157 158 Generator = MySQLGenerator
15class MySQL(Dialect): 16 PROMOTE_TO_INFERRED_DATETIME_TYPE = True 17 18 # https://dev.mysql.com/doc/refman/8.0/en/identifiers.html 19 IDENTIFIERS_CAN_START_WITH_DIGIT = True 20 21 # We default to treating all identifiers as case-sensitive, since it matches MySQL's 22 # behavior on Linux systems. For MacOS and Windows systems, one can override this 23 # setting by specifying `dialect="mysql, normalization_strategy = lowercase"`. 24 # 25 # See also https://dev.mysql.com/doc/refman/8.2/en/identifier-case-sensitivity.html 26 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 27 28 TIME_FORMAT = "'%Y-%m-%d %T'" 29 DPIPE_IS_STRING_CONCAT = False 30 SUPPORTS_USER_DEFINED_TYPES = False 31 SAFE_DIVISION = True 32 SAFE_TO_ELIMINATE_DOUBLE_NEGATION = False 33 LEAST_GREATEST_IGNORES_NULLS = False 34 35 EXPRESSION_METADATA = EXPRESSION_METADATA.copy() 36 37 # https://prestodb.io/docs/current/functions/datetime.html#mysql-date-functions 38 TIME_MAPPING = { 39 "%M": "%B", 40 "%c": "%-m", 41 "%e": "%-d", 42 "%h": "%I", 43 "%i": "%M", 44 "%s": "%S", 45 "%u": "%W", 46 "%k": "%-H", 47 "%l": "%-I", 48 "%T": "%H:%M:%S", 49 "%W": "%A", 50 } 51 52 VALID_INTERVAL_UNITS = { 53 *Dialect.VALID_INTERVAL_UNITS, 54 "SECOND_MICROSECOND", 55 "MINUTE_MICROSECOND", 56 "MINUTE_SECOND", 57 "HOUR_MICROSECOND", 58 "HOUR_SECOND", 59 "HOUR_MINUTE", 60 "DAY_MICROSECOND", 61 "DAY_SECOND", 62 "DAY_MINUTE", 63 "DAY_HOUR", 64 "YEAR_MONTH", 65 } 66 67 class Tokenizer(tokens.Tokenizer): 68 QUOTES = ["'", '"'] 69 COMMENTS = ["--", "#", ("/*", "*/")] 70 IDENTIFIERS = ["`"] 71 STRING_ESCAPES = ["'", '"', "\\"] 72 BIT_STRINGS = [("b'", "'"), ("B'", "'"), ("0b", "")] 73 HEX_STRINGS = [("x'", "'"), ("X'", "'"), ("0x", "")] 74 # https://dev.mysql.com/doc/refman/8.4/en/string-literals.html 75 ESCAPE_FOLLOW_CHARS = ["0", "b", "n", "r", "t", "Z", "%", "_"] 76 77 NESTED_COMMENTS = False 78 79 KEYWORDS = { 80 **tokens.Tokenizer.KEYWORDS, 81 "BLOB": TokenType.BLOB, 82 "CHARSET": TokenType.CHARACTER_SET, 83 "DISTINCTROW": TokenType.DISTINCT, 84 "EXPLAIN": TokenType.DESCRIBE, 85 "FORCE": TokenType.FORCE, 86 "IGNORE": TokenType.IGNORE, 87 "KEY": TokenType.KEY, 88 "LOCK TABLES": TokenType.COMMAND, 89 "LONGBLOB": TokenType.LONGBLOB, 90 "LONGTEXT": TokenType.LONGTEXT, 91 "MEDIUMBLOB": TokenType.MEDIUMBLOB, 92 "MEDIUMINT": TokenType.MEDIUMINT, 93 "MEDIUMTEXT": TokenType.MEDIUMTEXT, 94 "MEMBER OF": TokenType.MEMBER_OF, 95 "MOD": TokenType.MOD, 96 "SEPARATOR": TokenType.SEPARATOR, 97 "SERIAL": TokenType.SERIAL, 98 "SIGNED": TokenType.BIGINT, 99 "SIGNED INTEGER": TokenType.BIGINT, 100 "SOUNDS LIKE": TokenType.SOUNDS_LIKE, 101 "START": TokenType.BEGIN, 102 "TIMESTAMP": TokenType.TIMESTAMPTZ, 103 "TINYBLOB": TokenType.TINYBLOB, 104 "TINYTEXT": TokenType.TINYTEXT, 105 "UNLOCK TABLES": TokenType.COMMAND, 106 "UNSIGNED": TokenType.UBIGINT, 107 "UNSIGNED INTEGER": TokenType.UBIGINT, 108 "YEAR": TokenType.YEAR, 109 "_ARMSCII8": TokenType.INTRODUCER, 110 "_ASCII": TokenType.INTRODUCER, 111 "_BIG5": TokenType.INTRODUCER, 112 "_BINARY": TokenType.INTRODUCER, 113 "_CP1250": TokenType.INTRODUCER, 114 "_CP1251": TokenType.INTRODUCER, 115 "_CP1256": TokenType.INTRODUCER, 116 "_CP1257": TokenType.INTRODUCER, 117 "_CP850": TokenType.INTRODUCER, 118 "_CP852": TokenType.INTRODUCER, 119 "_CP866": TokenType.INTRODUCER, 120 "_CP932": TokenType.INTRODUCER, 121 "_DEC8": TokenType.INTRODUCER, 122 "_EUCJPMS": TokenType.INTRODUCER, 123 "_EUCKR": TokenType.INTRODUCER, 124 "_GB18030": TokenType.INTRODUCER, 125 "_GB2312": TokenType.INTRODUCER, 126 "_GBK": TokenType.INTRODUCER, 127 "_GEOSTD8": TokenType.INTRODUCER, 128 "_GREEK": TokenType.INTRODUCER, 129 "_HEBREW": TokenType.INTRODUCER, 130 "_HP8": TokenType.INTRODUCER, 131 "_KEYBCS2": TokenType.INTRODUCER, 132 "_KOI8R": TokenType.INTRODUCER, 133 "_KOI8U": TokenType.INTRODUCER, 134 "_LATIN1": TokenType.INTRODUCER, 135 "_LATIN2": TokenType.INTRODUCER, 136 "_LATIN5": TokenType.INTRODUCER, 137 "_LATIN7": TokenType.INTRODUCER, 138 "_MACCE": TokenType.INTRODUCER, 139 "_MACROMAN": TokenType.INTRODUCER, 140 "_SJIS": TokenType.INTRODUCER, 141 "_SWE7": TokenType.INTRODUCER, 142 "_TIS620": TokenType.INTRODUCER, 143 "_UCS2": TokenType.INTRODUCER, 144 "_UJIS": TokenType.INTRODUCER, 145 # https://dev.mysql.com/doc/refman/8.0/en/string-literals.html 146 "_UTF8": TokenType.INTRODUCER, 147 "_UTF16": TokenType.INTRODUCER, 148 "_UTF16LE": TokenType.INTRODUCER, 149 "_UTF32": TokenType.INTRODUCER, 150 "_UTF8MB3": TokenType.INTRODUCER, 151 "_UTF8MB4": TokenType.INTRODUCER, 152 "@@": TokenType.SESSION_PARAMETER, 153 } 154 155 COMMANDS = {*tokens.Tokenizer.COMMANDS, TokenType.REPLACE} - {TokenType.SHOW} 156 157 Parser = MySQLParser 158 159 Generator = MySQLGenerator
This flag is used in the optimizer's canonicalize rule and determines whether x will be promoted to the literal's type in x::DATE < '2020-01-01 12:05:03' (i.e., DATETIME). When false, the literal is cast to x's type to match it instead.
Specifies the strategy according to which identifiers should be normalized.
Whether LEAST/GREATEST functions ignore NULL values, e.g:
- BigQuery, Snowflake, MySQL, Presto/Trino: LEAST(1, NULL, 2) -> NULL
- Spark, Postgres, DuckDB, TSQL: LEAST(1, NULL, 2) -> 1
Associates this dialect's time formats with their equivalent Python strftime formats.
Mapping of an escaped sequence (\n) to its unescaped version (
).
Whether string literals support escape sequences (e.g. \n). Set by the metaclass based on the tokenizer's STRING_ESCAPES.
Whether byte string literals support escape sequences. Set by the metaclass based on the tokenizer's BYTE_STRING_ESCAPES.
67 class Tokenizer(tokens.Tokenizer): 68 QUOTES = ["'", '"'] 69 COMMENTS = ["--", "#", ("/*", "*/")] 70 IDENTIFIERS = ["`"] 71 STRING_ESCAPES = ["'", '"', "\\"] 72 BIT_STRINGS = [("b'", "'"), ("B'", "'"), ("0b", "")] 73 HEX_STRINGS = [("x'", "'"), ("X'", "'"), ("0x", "")] 74 # https://dev.mysql.com/doc/refman/8.4/en/string-literals.html 75 ESCAPE_FOLLOW_CHARS = ["0", "b", "n", "r", "t", "Z", "%", "_"] 76 77 NESTED_COMMENTS = False 78 79 KEYWORDS = { 80 **tokens.Tokenizer.KEYWORDS, 81 "BLOB": TokenType.BLOB, 82 "CHARSET": TokenType.CHARACTER_SET, 83 "DISTINCTROW": TokenType.DISTINCT, 84 "EXPLAIN": TokenType.DESCRIBE, 85 "FORCE": TokenType.FORCE, 86 "IGNORE": TokenType.IGNORE, 87 "KEY": TokenType.KEY, 88 "LOCK TABLES": TokenType.COMMAND, 89 "LONGBLOB": TokenType.LONGBLOB, 90 "LONGTEXT": TokenType.LONGTEXT, 91 "MEDIUMBLOB": TokenType.MEDIUMBLOB, 92 "MEDIUMINT": TokenType.MEDIUMINT, 93 "MEDIUMTEXT": TokenType.MEDIUMTEXT, 94 "MEMBER OF": TokenType.MEMBER_OF, 95 "MOD": TokenType.MOD, 96 "SEPARATOR": TokenType.SEPARATOR, 97 "SERIAL": TokenType.SERIAL, 98 "SIGNED": TokenType.BIGINT, 99 "SIGNED INTEGER": TokenType.BIGINT, 100 "SOUNDS LIKE": TokenType.SOUNDS_LIKE, 101 "START": TokenType.BEGIN, 102 "TIMESTAMP": TokenType.TIMESTAMPTZ, 103 "TINYBLOB": TokenType.TINYBLOB, 104 "TINYTEXT": TokenType.TINYTEXT, 105 "UNLOCK TABLES": TokenType.COMMAND, 106 "UNSIGNED": TokenType.UBIGINT, 107 "UNSIGNED INTEGER": TokenType.UBIGINT, 108 "YEAR": TokenType.YEAR, 109 "_ARMSCII8": TokenType.INTRODUCER, 110 "_ASCII": TokenType.INTRODUCER, 111 "_BIG5": TokenType.INTRODUCER, 112 "_BINARY": TokenType.INTRODUCER, 113 "_CP1250": TokenType.INTRODUCER, 114 "_CP1251": TokenType.INTRODUCER, 115 "_CP1256": TokenType.INTRODUCER, 116 "_CP1257": TokenType.INTRODUCER, 117 "_CP850": TokenType.INTRODUCER, 118 "_CP852": TokenType.INTRODUCER, 119 "_CP866": TokenType.INTRODUCER, 120 "_CP932": TokenType.INTRODUCER, 121 "_DEC8": TokenType.INTRODUCER, 122 "_EUCJPMS": TokenType.INTRODUCER, 123 "_EUCKR": TokenType.INTRODUCER, 124 "_GB18030": TokenType.INTRODUCER, 125 "_GB2312": TokenType.INTRODUCER, 126 "_GBK": TokenType.INTRODUCER, 127 "_GEOSTD8": TokenType.INTRODUCER, 128 "_GREEK": TokenType.INTRODUCER, 129 "_HEBREW": TokenType.INTRODUCER, 130 "_HP8": TokenType.INTRODUCER, 131 "_KEYBCS2": TokenType.INTRODUCER, 132 "_KOI8R": TokenType.INTRODUCER, 133 "_KOI8U": TokenType.INTRODUCER, 134 "_LATIN1": TokenType.INTRODUCER, 135 "_LATIN2": TokenType.INTRODUCER, 136 "_LATIN5": TokenType.INTRODUCER, 137 "_LATIN7": TokenType.INTRODUCER, 138 "_MACCE": TokenType.INTRODUCER, 139 "_MACROMAN": TokenType.INTRODUCER, 140 "_SJIS": TokenType.INTRODUCER, 141 "_SWE7": TokenType.INTRODUCER, 142 "_TIS620": TokenType.INTRODUCER, 143 "_UCS2": TokenType.INTRODUCER, 144 "_UJIS": TokenType.INTRODUCER, 145 # https://dev.mysql.com/doc/refman/8.0/en/string-literals.html 146 "_UTF8": TokenType.INTRODUCER, 147 "_UTF16": TokenType.INTRODUCER, 148 "_UTF16LE": TokenType.INTRODUCER, 149 "_UTF32": TokenType.INTRODUCER, 150 "_UTF8MB3": TokenType.INTRODUCER, 151 "_UTF8MB4": TokenType.INTRODUCER, 152 "@@": TokenType.SESSION_PARAMETER, 153 } 154 155 COMMANDS = {*tokens.Tokenizer.COMMANDS, TokenType.REPLACE} - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BYTE_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- NUMBERS_CAN_HAVE_DECIMALS
- dialect
- tokenize
- sql
- size
- tokens