sqlglot.dialects.mysql
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 arrow_json_extract_sql, 10 date_add_interval_sql, 11 datestrtodate_sql, 12 build_formatted_time, 13 isnull_to_is_null, 14 length_or_char_length_sql, 15 max_or_greatest, 16 min_or_least, 17 no_ilike_sql, 18 no_paren_current_date_sql, 19 no_pivot_sql, 20 no_tablesample_sql, 21 no_trycast_sql, 22 build_date_delta, 23 build_date_delta_with_interval, 24 rename_func, 25 strposition_sql, 26 unit_to_var, 27 trim_sql, 28 timestrtotime_sql, 29) 30from sqlglot.generator import unsupported_args 31from sqlglot.helper import seq_get 32from sqlglot.tokens import TokenType 33from sqlglot.typing.mysql import EXPRESSION_METADATA 34 35 36def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[MySQL.Parser], exp.Show]: 37 def _parse(self: MySQL.Parser) -> exp.Show: 38 return self._parse_show_mysql(*args, **kwargs) 39 40 return _parse 41 42 43def _date_trunc_sql(self: MySQL.Generator, expression: exp.DateTrunc) -> str: 44 expr = self.sql(expression, "this") 45 unit = expression.text("unit").upper() 46 47 if unit == "WEEK": 48 concat = f"CONCAT(YEAR({expr}), ' ', WEEK({expr}, 1), ' 1')" 49 date_format = "%Y %u %w" 50 elif unit == "MONTH": 51 concat = f"CONCAT(YEAR({expr}), ' ', MONTH({expr}), ' 1')" 52 date_format = "%Y %c %e" 53 elif unit == "QUARTER": 54 concat = f"CONCAT(YEAR({expr}), ' ', QUARTER({expr}) * 3 - 2, ' 1')" 55 date_format = "%Y %c %e" 56 elif unit == "YEAR": 57 concat = f"CONCAT(YEAR({expr}), ' 1 1')" 58 date_format = "%Y %c %e" 59 else: 60 if unit != "DAY": 61 self.unsupported(f"Unexpected interval unit: {unit}") 62 return self.func("DATE", expr) 63 64 return self.func("STR_TO_DATE", concat, f"'{date_format}'") 65 66 67# All specifiers for time parts (as opposed to date parts) 68# https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format 69TIME_SPECIFIERS = {"f", "H", "h", "I", "i", "k", "l", "p", "r", "S", "s", "T"} 70 71 72def _has_time_specifier(date_format: str) -> bool: 73 i = 0 74 length = len(date_format) 75 76 while i < length: 77 if date_format[i] == "%": 78 i += 1 79 if i < length and date_format[i] in TIME_SPECIFIERS: 80 return True 81 i += 1 82 return False 83 84 85def _str_to_date(args: t.List) -> exp.StrToDate | exp.StrToTime: 86 mysql_date_format = seq_get(args, 1) 87 date_format = MySQL.format_time(mysql_date_format) 88 this = seq_get(args, 0) 89 90 if mysql_date_format and _has_time_specifier(mysql_date_format.name): 91 return exp.StrToTime(this=this, format=date_format) 92 93 return exp.StrToDate(this=this, format=date_format) 94 95 96def _str_to_date_sql( 97 self: MySQL.Generator, expression: exp.StrToDate | exp.StrToTime | exp.TsOrDsToDate 98) -> str: 99 return self.func("STR_TO_DATE", expression.this, self.format_time(expression)) 100 101 102def _unix_to_time_sql(self: MySQL.Generator, expression: exp.UnixToTime) -> str: 103 scale = expression.args.get("scale") 104 timestamp = expression.this 105 106 if scale in (None, exp.UnixToTime.SECONDS): 107 return self.func("FROM_UNIXTIME", timestamp, self.format_time(expression)) 108 109 return self.func( 110 "FROM_UNIXTIME", 111 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), 112 self.format_time(expression), 113 ) 114 115 116def date_add_sql( 117 kind: str, 118) -> t.Callable[[generator.Generator, exp.Expression], str]: 119 def func(self: generator.Generator, expression: exp.Expression) -> str: 120 return self.func( 121 f"DATE_{kind}", 122 expression.this, 123 exp.Interval(this=expression.expression, unit=unit_to_var(expression)), 124 ) 125 126 return func 127 128 129def _ts_or_ds_to_date_sql(self: MySQL.Generator, expression: exp.TsOrDsToDate) -> str: 130 time_format = expression.args.get("format") 131 return _str_to_date_sql(self, expression) if time_format else self.func("DATE", expression.this) 132 133 134def _remove_ts_or_ds_to_date( 135 to_sql: t.Optional[t.Callable[[MySQL.Generator, exp.Expression], str]] = None, 136 args: t.Tuple[str, ...] = ("this",), 137) -> t.Callable[[MySQL.Generator, exp.Func], str]: 138 def func(self: MySQL.Generator, expression: exp.Func) -> str: 139 for arg_key in args: 140 arg = expression.args.get(arg_key) 141 if isinstance(arg, exp.TsOrDsToDate) and not arg.args.get("format"): 142 expression.set(arg_key, arg.this) 143 144 return to_sql(self, expression) if to_sql else self.function_fallback_sql(expression) 145 146 return func 147 148 149class MySQL(Dialect): 150 PROMOTE_TO_INFERRED_DATETIME_TYPE = True 151 152 # https://dev.mysql.com/doc/refman/8.0/en/identifiers.html 153 IDENTIFIERS_CAN_START_WITH_DIGIT = True 154 155 # We default to treating all identifiers as case-sensitive, since it matches MySQL's 156 # behavior on Linux systems. For MacOS and Windows systems, one can override this 157 # setting by specifying `dialect="mysql, normalization_strategy = lowercase"`. 158 # 159 # See also https://dev.mysql.com/doc/refman/8.2/en/identifier-case-sensitivity.html 160 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 161 162 TIME_FORMAT = "'%Y-%m-%d %T'" 163 DPIPE_IS_STRING_CONCAT = False 164 SUPPORTS_USER_DEFINED_TYPES = False 165 SUPPORTS_SEMI_ANTI_JOIN = False 166 SAFE_DIVISION = True 167 SAFE_TO_ELIMINATE_DOUBLE_NEGATION = False 168 LEAST_GREATEST_IGNORES_NULLS = False 169 170 EXPRESSION_METADATA = EXPRESSION_METADATA.copy() 171 172 # https://prestodb.io/docs/current/functions/datetime.html#mysql-date-functions 173 TIME_MAPPING = { 174 "%M": "%B", 175 "%c": "%-m", 176 "%e": "%-d", 177 "%h": "%I", 178 "%i": "%M", 179 "%s": "%S", 180 "%u": "%W", 181 "%k": "%-H", 182 "%l": "%-I", 183 "%T": "%H:%M:%S", 184 "%W": "%A", 185 } 186 187 VALID_INTERVAL_UNITS = { 188 *Dialect.VALID_INTERVAL_UNITS, 189 "SECOND_MICROSECOND", 190 "MINUTE_MICROSECOND", 191 "MINUTE_SECOND", 192 "HOUR_MICROSECOND", 193 "HOUR_SECOND", 194 "HOUR_MINUTE", 195 "DAY_MICROSECOND", 196 "DAY_SECOND", 197 "DAY_MINUTE", 198 "DAY_HOUR", 199 "YEAR_MONTH", 200 } 201 202 class Tokenizer(tokens.Tokenizer): 203 QUOTES = ["'", '"'] 204 COMMENTS = ["--", "#", ("/*", "*/")] 205 IDENTIFIERS = ["`"] 206 STRING_ESCAPES = ["'", '"', "\\"] 207 BIT_STRINGS = [("b'", "'"), ("B'", "'"), ("0b", "")] 208 HEX_STRINGS = [("x'", "'"), ("X'", "'"), ("0x", "")] 209 # https://dev.mysql.com/doc/refman/8.4/en/string-literals.html 210 ESCAPE_FOLLOW_CHARS = ["0", "b", "n", "r", "t", "Z", "%", "_"] 211 212 NESTED_COMMENTS = False 213 214 KEYWORDS = { 215 **tokens.Tokenizer.KEYWORDS, 216 "BLOB": TokenType.BLOB, 217 "CHARSET": TokenType.CHARACTER_SET, 218 "DISTINCTROW": TokenType.DISTINCT, 219 "EXPLAIN": TokenType.DESCRIBE, 220 "FORCE": TokenType.FORCE, 221 "IGNORE": TokenType.IGNORE, 222 "KEY": TokenType.KEY, 223 "LOCK TABLES": TokenType.COMMAND, 224 "LONGBLOB": TokenType.LONGBLOB, 225 "LONGTEXT": TokenType.LONGTEXT, 226 "MEDIUMBLOB": TokenType.MEDIUMBLOB, 227 "MEDIUMINT": TokenType.MEDIUMINT, 228 "MEDIUMTEXT": TokenType.MEDIUMTEXT, 229 "MEMBER OF": TokenType.MEMBER_OF, 230 "MOD": TokenType.MOD, 231 "SEPARATOR": TokenType.SEPARATOR, 232 "SERIAL": TokenType.SERIAL, 233 "SIGNED": TokenType.BIGINT, 234 "SIGNED INTEGER": TokenType.BIGINT, 235 "SOUNDS LIKE": TokenType.SOUNDS_LIKE, 236 "START": TokenType.BEGIN, 237 "TIMESTAMP": TokenType.TIMESTAMPTZ, 238 "TINYBLOB": TokenType.TINYBLOB, 239 "TINYTEXT": TokenType.TINYTEXT, 240 "UNLOCK TABLES": TokenType.COMMAND, 241 "UNSIGNED": TokenType.UBIGINT, 242 "UNSIGNED INTEGER": TokenType.UBIGINT, 243 "YEAR": TokenType.YEAR, 244 "_ARMSCII8": TokenType.INTRODUCER, 245 "_ASCII": TokenType.INTRODUCER, 246 "_BIG5": TokenType.INTRODUCER, 247 "_BINARY": TokenType.INTRODUCER, 248 "_CP1250": TokenType.INTRODUCER, 249 "_CP1251": TokenType.INTRODUCER, 250 "_CP1256": TokenType.INTRODUCER, 251 "_CP1257": TokenType.INTRODUCER, 252 "_CP850": TokenType.INTRODUCER, 253 "_CP852": TokenType.INTRODUCER, 254 "_CP866": TokenType.INTRODUCER, 255 "_CP932": TokenType.INTRODUCER, 256 "_DEC8": TokenType.INTRODUCER, 257 "_EUCJPMS": TokenType.INTRODUCER, 258 "_EUCKR": TokenType.INTRODUCER, 259 "_GB18030": TokenType.INTRODUCER, 260 "_GB2312": TokenType.INTRODUCER, 261 "_GBK": TokenType.INTRODUCER, 262 "_GEOSTD8": TokenType.INTRODUCER, 263 "_GREEK": TokenType.INTRODUCER, 264 "_HEBREW": TokenType.INTRODUCER, 265 "_HP8": TokenType.INTRODUCER, 266 "_KEYBCS2": TokenType.INTRODUCER, 267 "_KOI8R": TokenType.INTRODUCER, 268 "_KOI8U": TokenType.INTRODUCER, 269 "_LATIN1": TokenType.INTRODUCER, 270 "_LATIN2": TokenType.INTRODUCER, 271 "_LATIN5": TokenType.INTRODUCER, 272 "_LATIN7": TokenType.INTRODUCER, 273 "_MACCE": TokenType.INTRODUCER, 274 "_MACROMAN": TokenType.INTRODUCER, 275 "_SJIS": TokenType.INTRODUCER, 276 "_SWE7": TokenType.INTRODUCER, 277 "_TIS620": TokenType.INTRODUCER, 278 "_UCS2": TokenType.INTRODUCER, 279 "_UJIS": TokenType.INTRODUCER, 280 # https://dev.mysql.com/doc/refman/8.0/en/string-literals.html 281 "_UTF8": TokenType.INTRODUCER, 282 "_UTF16": TokenType.INTRODUCER, 283 "_UTF16LE": TokenType.INTRODUCER, 284 "_UTF32": TokenType.INTRODUCER, 285 "_UTF8MB3": TokenType.INTRODUCER, 286 "_UTF8MB4": TokenType.INTRODUCER, 287 "@@": TokenType.SESSION_PARAMETER, 288 } 289 290 COMMANDS = {*tokens.Tokenizer.COMMANDS, TokenType.REPLACE} - {TokenType.SHOW} 291 292 class Parser(parser.Parser): 293 FUNC_TOKENS = { 294 *parser.Parser.FUNC_TOKENS, 295 TokenType.DATABASE, 296 TokenType.MOD, 297 TokenType.SCHEMA, 298 TokenType.VALUES, 299 TokenType.CHARACTER_SET, 300 } 301 302 CONJUNCTION = { 303 **parser.Parser.CONJUNCTION, 304 TokenType.DAMP: exp.And, 305 TokenType.XOR: exp.Xor, 306 } 307 308 DISJUNCTION = { 309 **parser.Parser.DISJUNCTION, 310 TokenType.DPIPE: exp.Or, 311 } 312 313 TABLE_ALIAS_TOKENS = ( 314 parser.Parser.TABLE_ALIAS_TOKENS - parser.Parser.TABLE_INDEX_HINT_TOKENS 315 ) 316 317 RANGE_PARSERS = { 318 **parser.Parser.RANGE_PARSERS, 319 TokenType.SOUNDS_LIKE: lambda self, this: self.expression( 320 exp.EQ, 321 this=self.expression(exp.Soundex, this=this), 322 expression=self.expression(exp.Soundex, this=self._parse_term()), 323 ), 324 TokenType.MEMBER_OF: lambda self, this: self.expression( 325 exp.JSONArrayContains, 326 this=this, 327 expression=self._parse_wrapped(self._parse_expression), 328 ), 329 } 330 331 FUNCTIONS = { 332 **parser.Parser.FUNCTIONS, 333 "BIT_AND": exp.BitwiseAndAgg.from_arg_list, 334 "BIT_OR": exp.BitwiseOrAgg.from_arg_list, 335 "BIT_XOR": exp.BitwiseXorAgg.from_arg_list, 336 "BIT_COUNT": exp.BitwiseCount.from_arg_list, 337 "CONVERT_TZ": lambda args: exp.ConvertTimezone( 338 source_tz=seq_get(args, 1), target_tz=seq_get(args, 2), timestamp=seq_get(args, 0) 339 ), 340 "CURDATE": exp.CurrentDate.from_arg_list, 341 "CURTIME": exp.CurrentTime.from_arg_list, 342 "DATE": lambda args: exp.TsOrDsToDate(this=seq_get(args, 0)), 343 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 344 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "mysql"), 345 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 346 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 347 "DAYOFMONTH": lambda args: exp.DayOfMonth(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 348 "DAYOFWEEK": lambda args: exp.DayOfWeek(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 349 "DAYOFYEAR": lambda args: exp.DayOfYear(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 350 "FORMAT": exp.NumberToStr.from_arg_list, 351 "FROM_UNIXTIME": build_formatted_time(exp.UnixToTime, "mysql"), 352 "ISNULL": isnull_to_is_null, 353 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 354 "MAKETIME": exp.TimeFromParts.from_arg_list, 355 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 356 "MONTHNAME": lambda args: exp.TimeToStr( 357 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 358 format=exp.Literal.string("%B"), 359 ), 360 "SCHEMA": exp.CurrentSchema.from_arg_list, 361 "DATABASE": exp.CurrentSchema.from_arg_list, 362 "STR_TO_DATE": _str_to_date, 363 "TIMESTAMPDIFF": build_date_delta(exp.TimestampDiff), 364 "TO_DAYS": lambda args: exp.paren( 365 exp.DateDiff( 366 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 367 expression=exp.TsOrDsToDate(this=exp.Literal.string("0000-01-01")), 368 unit=exp.var("DAY"), 369 ) 370 + 1 371 ), 372 "VERSION": exp.CurrentVersion.from_arg_list, 373 "WEEK": lambda args: exp.Week( 374 this=exp.TsOrDsToDate(this=seq_get(args, 0)), mode=seq_get(args, 1) 375 ), 376 "WEEKOFYEAR": lambda args: exp.WeekOfYear(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 377 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 378 } 379 380 FUNCTION_PARSERS = { 381 **parser.Parser.FUNCTION_PARSERS, 382 "GROUP_CONCAT": lambda self: self._parse_group_concat(), 383 # https://dev.mysql.com/doc/refman/5.7/en/miscellaneous-functions.html#function_values 384 "VALUES": lambda self: self.expression( 385 exp.Anonymous, this="VALUES", expressions=[self._parse_id_var()] 386 ), 387 "JSON_VALUE": lambda self: self._parse_json_value(), 388 "SUBSTR": lambda self: self._parse_substring(), 389 } 390 391 STATEMENT_PARSERS = { 392 **parser.Parser.STATEMENT_PARSERS, 393 TokenType.SHOW: lambda self: self._parse_show(), 394 } 395 396 SHOW_PARSERS = { 397 "BINARY LOGS": _show_parser("BINARY LOGS"), 398 "MASTER LOGS": _show_parser("BINARY LOGS"), 399 "BINLOG EVENTS": _show_parser("BINLOG EVENTS"), 400 "CHARACTER SET": _show_parser("CHARACTER SET"), 401 "CHARSET": _show_parser("CHARACTER SET"), 402 "COLLATION": _show_parser("COLLATION"), 403 "FULL COLUMNS": _show_parser("COLUMNS", target="FROM", full=True), 404 "COLUMNS": _show_parser("COLUMNS", target="FROM"), 405 "CREATE DATABASE": _show_parser("CREATE DATABASE", target=True), 406 "CREATE EVENT": _show_parser("CREATE EVENT", target=True), 407 "CREATE FUNCTION": _show_parser("CREATE FUNCTION", target=True), 408 "CREATE PROCEDURE": _show_parser("CREATE PROCEDURE", target=True), 409 "CREATE TABLE": _show_parser("CREATE TABLE", target=True), 410 "CREATE TRIGGER": _show_parser("CREATE TRIGGER", target=True), 411 "CREATE VIEW": _show_parser("CREATE VIEW", target=True), 412 "DATABASES": _show_parser("DATABASES"), 413 "SCHEMAS": _show_parser("DATABASES"), 414 "ENGINE": _show_parser("ENGINE", target=True), 415 "STORAGE ENGINES": _show_parser("ENGINES"), 416 "ENGINES": _show_parser("ENGINES"), 417 "ERRORS": _show_parser("ERRORS"), 418 "EVENTS": _show_parser("EVENTS"), 419 "FUNCTION CODE": _show_parser("FUNCTION CODE", target=True), 420 "FUNCTION STATUS": _show_parser("FUNCTION STATUS"), 421 "GRANTS": _show_parser("GRANTS", target="FOR"), 422 "INDEX": _show_parser("INDEX", target="FROM"), 423 "MASTER STATUS": _show_parser("MASTER STATUS"), 424 "OPEN TABLES": _show_parser("OPEN TABLES"), 425 "PLUGINS": _show_parser("PLUGINS"), 426 "PROCEDURE CODE": _show_parser("PROCEDURE CODE", target=True), 427 "PROCEDURE STATUS": _show_parser("PROCEDURE STATUS"), 428 "PRIVILEGES": _show_parser("PRIVILEGES"), 429 "FULL PROCESSLIST": _show_parser("PROCESSLIST", full=True), 430 "PROCESSLIST": _show_parser("PROCESSLIST"), 431 "PROFILE": _show_parser("PROFILE"), 432 "PROFILES": _show_parser("PROFILES"), 433 "RELAYLOG EVENTS": _show_parser("RELAYLOG EVENTS"), 434 "REPLICAS": _show_parser("REPLICAS"), 435 "SLAVE HOSTS": _show_parser("REPLICAS"), 436 "REPLICA STATUS": _show_parser("REPLICA STATUS"), 437 "SLAVE STATUS": _show_parser("REPLICA STATUS"), 438 "GLOBAL STATUS": _show_parser("STATUS", global_=True), 439 "SESSION STATUS": _show_parser("STATUS"), 440 "STATUS": _show_parser("STATUS"), 441 "TABLE STATUS": _show_parser("TABLE STATUS"), 442 "FULL TABLES": _show_parser("TABLES", full=True), 443 "TABLES": _show_parser("TABLES"), 444 "TRIGGERS": _show_parser("TRIGGERS"), 445 "GLOBAL VARIABLES": _show_parser("VARIABLES", global_=True), 446 "SESSION VARIABLES": _show_parser("VARIABLES"), 447 "VARIABLES": _show_parser("VARIABLES"), 448 "WARNINGS": _show_parser("WARNINGS"), 449 } 450 451 PROPERTY_PARSERS = { 452 **parser.Parser.PROPERTY_PARSERS, 453 "LOCK": lambda self: self._parse_property_assignment(exp.LockProperty), 454 "PARTITION BY": lambda self: self._parse_partition_property(), 455 } 456 457 SET_PARSERS = { 458 **parser.Parser.SET_PARSERS, 459 "PERSIST": lambda self: self._parse_set_item_assignment("PERSIST"), 460 "PERSIST_ONLY": lambda self: self._parse_set_item_assignment("PERSIST_ONLY"), 461 "CHARACTER SET": lambda self: self._parse_set_item_charset("CHARACTER SET"), 462 "CHARSET": lambda self: self._parse_set_item_charset("CHARACTER SET"), 463 "NAMES": lambda self: self._parse_set_item_names(), 464 } 465 466 CONSTRAINT_PARSERS = { 467 **parser.Parser.CONSTRAINT_PARSERS, 468 "FULLTEXT": lambda self: self._parse_index_constraint(kind="FULLTEXT"), 469 "INDEX": lambda self: self._parse_index_constraint(), 470 "KEY": lambda self: self._parse_index_constraint(), 471 "SPATIAL": lambda self: self._parse_index_constraint(kind="SPATIAL"), 472 "ZEROFILL": lambda self: self.expression(exp.ZeroFillColumnConstraint), 473 } 474 475 ALTER_PARSERS = { 476 **parser.Parser.ALTER_PARSERS, 477 "MODIFY": lambda self: self._parse_alter_table_alter(), 478 } 479 480 ALTER_ALTER_PARSERS = { 481 **parser.Parser.ALTER_ALTER_PARSERS, 482 "INDEX": lambda self: self._parse_alter_table_alter_index(), 483 } 484 485 SCHEMA_UNNAMED_CONSTRAINTS = { 486 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 487 "FULLTEXT", 488 "INDEX", 489 "KEY", 490 "SPATIAL", 491 } 492 493 PROFILE_TYPES: parser.OPTIONS_TYPE = { 494 **dict.fromkeys(("ALL", "CPU", "IPC", "MEMORY", "SOURCE", "SWAPS"), tuple()), 495 "BLOCK": ("IO",), 496 "CONTEXT": ("SWITCHES",), 497 "PAGE": ("FAULTS",), 498 } 499 500 TYPE_TOKENS = { 501 *parser.Parser.TYPE_TOKENS, 502 TokenType.SET, 503 } 504 505 ENUM_TYPE_TOKENS = { 506 *parser.Parser.ENUM_TYPE_TOKENS, 507 TokenType.SET, 508 } 509 510 # SELECT [ ALL | DISTINCT | DISTINCTROW ] [ <OPERATION_MODIFIERS> ] 511 OPERATION_MODIFIERS = { 512 "HIGH_PRIORITY", 513 "STRAIGHT_JOIN", 514 "SQL_SMALL_RESULT", 515 "SQL_BIG_RESULT", 516 "SQL_BUFFER_RESULT", 517 "SQL_NO_CACHE", 518 "SQL_CALC_FOUND_ROWS", 519 } 520 521 LOG_DEFAULTS_TO_LN = True 522 STRING_ALIASES = True 523 VALUES_FOLLOWED_BY_PAREN = False 524 SUPPORTS_PARTITION_SELECTION = True 525 526 def _parse_generated_as_identity( 527 self, 528 ) -> ( 529 exp.GeneratedAsIdentityColumnConstraint 530 | exp.ComputedColumnConstraint 531 | exp.GeneratedAsRowColumnConstraint 532 ): 533 this = super()._parse_generated_as_identity() 534 535 if self._match_texts(("STORED", "VIRTUAL")): 536 persisted = self._prev.text.upper() == "STORED" 537 538 if isinstance(this, exp.ComputedColumnConstraint): 539 this.set("persisted", persisted) 540 elif isinstance(this, exp.GeneratedAsIdentityColumnConstraint): 541 this = self.expression( 542 exp.ComputedColumnConstraint, this=this.expression, persisted=persisted 543 ) 544 545 return this 546 547 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 548 this = self._parse_id_var() 549 if not self._match(TokenType.L_PAREN): 550 return this 551 552 expression = self._parse_number() 553 self._match_r_paren() 554 return self.expression(exp.ColumnPrefix, this=this, expression=expression) 555 556 def _parse_index_constraint( 557 self, kind: t.Optional[str] = None 558 ) -> exp.IndexColumnConstraint: 559 if kind: 560 self._match_texts(("INDEX", "KEY")) 561 562 this = self._parse_id_var(any_token=False) 563 index_type = self._match(TokenType.USING) and self._advance_any() and self._prev.text 564 expressions = self._parse_wrapped_csv(self._parse_ordered) 565 566 options = [] 567 while True: 568 if self._match_text_seq("KEY_BLOCK_SIZE"): 569 self._match(TokenType.EQ) 570 opt = exp.IndexConstraintOption(key_block_size=self._parse_number()) 571 elif self._match(TokenType.USING): 572 opt = exp.IndexConstraintOption(using=self._advance_any() and self._prev.text) 573 elif self._match_text_seq("WITH", "PARSER"): 574 opt = exp.IndexConstraintOption(parser=self._parse_var(any_token=True)) 575 elif self._match(TokenType.COMMENT): 576 opt = exp.IndexConstraintOption(comment=self._parse_string()) 577 elif self._match_text_seq("VISIBLE"): 578 opt = exp.IndexConstraintOption(visible=True) 579 elif self._match_text_seq("INVISIBLE"): 580 opt = exp.IndexConstraintOption(visible=False) 581 elif self._match_text_seq("ENGINE_ATTRIBUTE"): 582 self._match(TokenType.EQ) 583 opt = exp.IndexConstraintOption(engine_attr=self._parse_string()) 584 elif self._match_text_seq("SECONDARY_ENGINE_ATTRIBUTE"): 585 self._match(TokenType.EQ) 586 opt = exp.IndexConstraintOption(secondary_engine_attr=self._parse_string()) 587 else: 588 opt = None 589 590 if not opt: 591 break 592 593 options.append(opt) 594 595 return self.expression( 596 exp.IndexColumnConstraint, 597 this=this, 598 expressions=expressions, 599 kind=kind, 600 index_type=index_type, 601 options=options, 602 ) 603 604 def _parse_show_mysql( 605 self, 606 this: str, 607 target: bool | str = False, 608 full: t.Optional[bool] = None, 609 global_: t.Optional[bool] = None, 610 ) -> exp.Show: 611 json = self._match_text_seq("JSON") 612 613 if target: 614 if isinstance(target, str): 615 self._match_text_seq(*target.split(" ")) 616 target_id = self._parse_id_var() 617 else: 618 target_id = None 619 620 log = self._parse_string() if self._match_text_seq("IN") else None 621 622 if this in ("BINLOG EVENTS", "RELAYLOG EVENTS"): 623 position = self._parse_number() if self._match_text_seq("FROM") else None 624 db = None 625 else: 626 position = None 627 db = None 628 629 if self._match(TokenType.FROM): 630 db = self._parse_id_var() 631 elif self._match(TokenType.DOT): 632 db = target_id 633 target_id = self._parse_id_var() 634 635 channel = self._parse_id_var() if self._match_text_seq("FOR", "CHANNEL") else None 636 637 like = self._parse_string() if self._match_text_seq("LIKE") else None 638 where = self._parse_where() 639 640 if this == "PROFILE": 641 types = self._parse_csv(lambda: self._parse_var_from_options(self.PROFILE_TYPES)) 642 query = self._parse_number() if self._match_text_seq("FOR", "QUERY") else None 643 offset = self._parse_number() if self._match_text_seq("OFFSET") else None 644 limit = self._parse_number() if self._match_text_seq("LIMIT") else None 645 else: 646 types, query = None, None 647 offset, limit = self._parse_oldstyle_limit() 648 649 mutex = True if self._match_text_seq("MUTEX") else None 650 mutex = False if self._match_text_seq("STATUS") else mutex 651 652 for_table = self._parse_id_var() if self._match_text_seq("FOR", "TABLE") else None 653 for_group = self._parse_string() if self._match_text_seq("FOR", "GROUP") else None 654 for_user = self._parse_string() if self._match_text_seq("FOR", "USER") else None 655 for_role = self._parse_string() if self._match_text_seq("FOR", "ROLE") else None 656 into_outfile = self._parse_string() if self._match_text_seq("INTO", "OUTFILE") else None 657 658 return self.expression( 659 exp.Show, 660 this=this, 661 target=target_id, 662 full=full, 663 log=log, 664 position=position, 665 db=db, 666 channel=channel, 667 like=like, 668 where=where, 669 types=types, 670 query=query, 671 offset=offset, 672 limit=limit, 673 mutex=mutex, 674 for_table=for_table, 675 for_group=for_group, 676 for_user=for_user, 677 for_role=for_role, 678 into_outfile=into_outfile, 679 json=json, 680 global_=global_, 681 ) 682 683 def _parse_oldstyle_limit( 684 self, 685 ) -> t.Tuple[t.Optional[exp.Expression], t.Optional[exp.Expression]]: 686 limit = None 687 offset = None 688 if self._match_text_seq("LIMIT"): 689 parts = self._parse_csv(self._parse_number) 690 if len(parts) == 1: 691 limit = parts[0] 692 elif len(parts) == 2: 693 limit = parts[1] 694 offset = parts[0] 695 696 return offset, limit 697 698 def _parse_set_item_charset(self, kind: str) -> exp.Expression: 699 this = self._parse_string() or self._parse_unquoted_field() 700 return self.expression(exp.SetItem, this=this, kind=kind) 701 702 def _parse_set_item_names(self) -> exp.Expression: 703 charset = self._parse_string() or self._parse_unquoted_field() 704 if self._match_text_seq("COLLATE"): 705 collate = self._parse_string() or self._parse_unquoted_field() 706 else: 707 collate = None 708 709 return self.expression(exp.SetItem, this=charset, collate=collate, kind="NAMES") 710 711 def _parse_type( 712 self, parse_interval: bool = True, fallback_to_identifier: bool = False 713 ) -> t.Optional[exp.Expression]: 714 # mysql binary is special and can work anywhere, even in order by operations 715 # it operates like a no paren func 716 if self._match(TokenType.BINARY, advance=False): 717 data_type = self._parse_types(check_func=True, allow_identifiers=False) 718 719 if isinstance(data_type, exp.DataType): 720 return self.expression(exp.Cast, this=self._parse_column(), to=data_type) 721 722 return super()._parse_type( 723 parse_interval=parse_interval, fallback_to_identifier=fallback_to_identifier 724 ) 725 726 def _parse_alter_table_alter_index(self) -> exp.AlterIndex: 727 index = self._parse_field(any_token=True) 728 729 if self._match_text_seq("VISIBLE"): 730 visible = True 731 elif self._match_text_seq("INVISIBLE"): 732 visible = False 733 else: 734 visible = None 735 736 return self.expression(exp.AlterIndex, this=index, visible=visible) 737 738 def _parse_partition_property( 739 self, 740 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 741 partition_cls: t.Optional[t.Type[exp.Expression]] = None 742 value_parser = None 743 744 if self._match_text_seq("RANGE"): 745 partition_cls = exp.PartitionByRangeProperty 746 value_parser = self._parse_partition_range_value 747 elif self._match_text_seq("LIST"): 748 partition_cls = exp.PartitionByListProperty 749 value_parser = self._parse_partition_list_value 750 751 if not partition_cls or not value_parser: 752 return None 753 754 partition_expressions = self._parse_wrapped_csv(self._parse_assignment) 755 756 # For Doris and Starrocks 757 if not self._match_text_seq("(", "PARTITION", advance=False): 758 return partition_expressions 759 760 create_expressions = self._parse_wrapped_csv(value_parser) 761 762 return self.expression( 763 partition_cls, 764 partition_expressions=partition_expressions, 765 create_expressions=create_expressions, 766 ) 767 768 def _parse_partition_range_value(self) -> t.Optional[exp.Expression]: 769 self._match_text_seq("PARTITION") 770 name = self._parse_id_var() 771 772 if not self._match_text_seq("VALUES", "LESS", "THAN"): 773 return name 774 775 values = self._parse_wrapped_csv(self._parse_expression) 776 777 if ( 778 len(values) == 1 779 and isinstance(values[0], exp.Column) 780 and values[0].name.upper() == "MAXVALUE" 781 ): 782 values = [exp.var("MAXVALUE")] 783 784 part_range = self.expression(exp.PartitionRange, this=name, expressions=values) 785 return self.expression(exp.Partition, expressions=[part_range]) 786 787 def _parse_partition_list_value(self) -> exp.Partition: 788 self._match_text_seq("PARTITION") 789 name = self._parse_id_var() 790 self._match_text_seq("VALUES", "IN") 791 values = self._parse_wrapped_csv(self._parse_expression) 792 part_list = self.expression(exp.PartitionList, this=name, expressions=values) 793 return self.expression(exp.Partition, expressions=[part_list]) 794 795 def _parse_primary_key( 796 self, 797 wrapped_optional: bool = False, 798 in_props: bool = False, 799 named_primary_key: bool = False, 800 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 801 return super()._parse_primary_key( 802 wrapped_optional=wrapped_optional, in_props=in_props, named_primary_key=True 803 ) 804 805 class Generator(generator.Generator): 806 INTERVAL_ALLOWS_PLURAL_FORM = False 807 LOCKING_READS_SUPPORTED = True 808 NULL_ORDERING_SUPPORTED: t.Optional[bool] = None 809 JOIN_HINTS = False 810 TABLE_HINTS = True 811 DUPLICATE_KEY_UPDATE_WITH_SET = False 812 QUERY_HINT_SEP = " " 813 VALUES_AS_TABLE = False 814 NVL2_SUPPORTED = False 815 LAST_DAY_SUPPORTS_DATE_PART = False 816 JSON_TYPE_REQUIRED_FOR_EXTRACTION = True 817 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 818 JSON_KEY_VALUE_PAIR_SEP = "," 819 SUPPORTS_TO_NUMBER = False 820 PARSE_JSON_NAME: t.Optional[str] = None 821 PAD_FILL_PATTERN_IS_REQUIRED = True 822 WRAP_DERIVED_VALUES = False 823 VARCHAR_REQUIRES_SIZE = True 824 SUPPORTS_MEDIAN = False 825 UPDATE_STATEMENT_SUPPORTS_FROM = False 826 827 TRANSFORMS = { 828 **generator.Generator.TRANSFORMS, 829 exp.ArrayAgg: rename_func("GROUP_CONCAT"), 830 exp.BitwiseAndAgg: rename_func("BIT_AND"), 831 exp.BitwiseOrAgg: rename_func("BIT_OR"), 832 exp.BitwiseXorAgg: rename_func("BIT_XOR"), 833 exp.BitwiseCount: rename_func("BIT_COUNT"), 834 exp.Chr: lambda self, e: self.chr_sql(e, "CHAR"), 835 exp.CurrentDate: no_paren_current_date_sql, 836 exp.CurrentVersion: rename_func("VERSION"), 837 exp.DateDiff: _remove_ts_or_ds_to_date( 838 lambda self, e: self.func("DATEDIFF", e.this, e.expression), ("this", "expression") 839 ), 840 exp.DateAdd: _remove_ts_or_ds_to_date(date_add_sql("ADD")), 841 exp.DateStrToDate: datestrtodate_sql, 842 exp.DateSub: _remove_ts_or_ds_to_date(date_add_sql("SUB")), 843 exp.DateTrunc: _date_trunc_sql, 844 exp.Day: _remove_ts_or_ds_to_date(), 845 exp.DayOfMonth: _remove_ts_or_ds_to_date(rename_func("DAYOFMONTH")), 846 exp.DayOfWeek: _remove_ts_or_ds_to_date(rename_func("DAYOFWEEK")), 847 exp.DayOfYear: _remove_ts_or_ds_to_date(rename_func("DAYOFYEAR")), 848 exp.GroupConcat: lambda self, 849 e: f"""GROUP_CONCAT({self.sql(e, "this")} SEPARATOR {self.sql(e, "separator") or "','"})""", 850 exp.ILike: no_ilike_sql, 851 exp.JSONExtractScalar: arrow_json_extract_sql, 852 exp.Length: length_or_char_length_sql, 853 exp.LogicalOr: rename_func("MAX"), 854 exp.LogicalAnd: rename_func("MIN"), 855 exp.Max: max_or_greatest, 856 exp.Min: min_or_least, 857 exp.Month: _remove_ts_or_ds_to_date(), 858 exp.NullSafeEQ: lambda self, e: self.binary(e, "<=>"), 859 exp.NullSafeNEQ: lambda self, e: f"NOT {self.binary(e, '<=>')}", 860 exp.NumberToStr: rename_func("FORMAT"), 861 exp.Pivot: no_pivot_sql, 862 exp.Select: transforms.preprocess( 863 [ 864 transforms.eliminate_distinct_on, 865 transforms.eliminate_semi_and_anti_joins, 866 transforms.eliminate_qualify, 867 transforms.eliminate_full_outer_join, 868 transforms.unnest_generate_date_array_using_recursive_cte, 869 ] 870 ), 871 exp.StrPosition: lambda self, e: strposition_sql( 872 self, e, func_name="LOCATE", supports_position=True 873 ), 874 exp.StrToDate: _str_to_date_sql, 875 exp.StrToTime: _str_to_date_sql, 876 exp.Stuff: rename_func("INSERT"), 877 exp.SessionUser: lambda *_: "SESSION_USER()", 878 exp.TableSample: no_tablesample_sql, 879 exp.TimeFromParts: rename_func("MAKETIME"), 880 exp.TimestampAdd: date_add_interval_sql("DATE", "ADD"), 881 exp.TimestampDiff: lambda self, e: self.func( 882 "TIMESTAMPDIFF", unit_to_var(e), e.expression, e.this 883 ), 884 exp.TimestampSub: date_add_interval_sql("DATE", "SUB"), 885 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 886 exp.TimeStrToTime: lambda self, e: timestrtotime_sql( 887 self, 888 e, 889 include_precision=not e.args.get("zone"), 890 ), 891 exp.TimeToStr: _remove_ts_or_ds_to_date( 892 lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)) 893 ), 894 exp.Trim: trim_sql, 895 exp.Trunc: rename_func("TRUNCATE"), 896 exp.TryCast: no_trycast_sql, 897 exp.TsOrDsAdd: date_add_sql("ADD"), 898 exp.TsOrDsDiff: lambda self, e: self.func("DATEDIFF", e.this, e.expression), 899 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 900 exp.Unicode: lambda self, e: f"ORD(CONVERT({self.sql(e.this)} USING utf32))", 901 exp.UnixToTime: _unix_to_time_sql, 902 exp.Week: _remove_ts_or_ds_to_date(), 903 exp.WeekOfYear: _remove_ts_or_ds_to_date(rename_func("WEEKOFYEAR")), 904 exp.Year: _remove_ts_or_ds_to_date(), 905 exp.UtcTimestamp: rename_func("UTC_TIMESTAMP"), 906 exp.UtcTime: rename_func("UTC_TIME"), 907 } 908 909 UNSIGNED_TYPE_MAPPING = { 910 exp.DataType.Type.UBIGINT: "BIGINT", 911 exp.DataType.Type.UINT: "INT", 912 exp.DataType.Type.UMEDIUMINT: "MEDIUMINT", 913 exp.DataType.Type.USMALLINT: "SMALLINT", 914 exp.DataType.Type.UTINYINT: "TINYINT", 915 exp.DataType.Type.UDECIMAL: "DECIMAL", 916 exp.DataType.Type.UDOUBLE: "DOUBLE", 917 } 918 919 TIMESTAMP_TYPE_MAPPING = { 920 exp.DataType.Type.DATETIME2: "DATETIME", 921 exp.DataType.Type.SMALLDATETIME: "DATETIME", 922 exp.DataType.Type.TIMESTAMP: "DATETIME", 923 exp.DataType.Type.TIMESTAMPNTZ: "DATETIME", 924 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 925 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 926 } 927 928 TYPE_MAPPING = { 929 **generator.Generator.TYPE_MAPPING, 930 **UNSIGNED_TYPE_MAPPING, 931 **TIMESTAMP_TYPE_MAPPING, 932 } 933 934 TYPE_MAPPING.pop(exp.DataType.Type.MEDIUMTEXT) 935 TYPE_MAPPING.pop(exp.DataType.Type.LONGTEXT) 936 TYPE_MAPPING.pop(exp.DataType.Type.TINYTEXT) 937 TYPE_MAPPING.pop(exp.DataType.Type.BLOB) 938 TYPE_MAPPING.pop(exp.DataType.Type.MEDIUMBLOB) 939 TYPE_MAPPING.pop(exp.DataType.Type.LONGBLOB) 940 TYPE_MAPPING.pop(exp.DataType.Type.TINYBLOB) 941 942 PROPERTIES_LOCATION = { 943 **generator.Generator.PROPERTIES_LOCATION, 944 exp.TransientProperty: exp.Properties.Location.UNSUPPORTED, 945 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 946 exp.PartitionedByProperty: exp.Properties.Location.UNSUPPORTED, 947 exp.PartitionByRangeProperty: exp.Properties.Location.POST_SCHEMA, 948 exp.PartitionByListProperty: exp.Properties.Location.POST_SCHEMA, 949 } 950 951 LIMIT_FETCH = "LIMIT" 952 953 LIMIT_ONLY_LITERALS = True 954 955 CHAR_CAST_MAPPING = dict.fromkeys( 956 ( 957 exp.DataType.Type.LONGTEXT, 958 exp.DataType.Type.LONGBLOB, 959 exp.DataType.Type.MEDIUMBLOB, 960 exp.DataType.Type.MEDIUMTEXT, 961 exp.DataType.Type.TEXT, 962 exp.DataType.Type.TINYBLOB, 963 exp.DataType.Type.TINYTEXT, 964 exp.DataType.Type.VARCHAR, 965 ), 966 "CHAR", 967 ) 968 SIGNED_CAST_MAPPING = dict.fromkeys( 969 ( 970 exp.DataType.Type.BIGINT, 971 exp.DataType.Type.BOOLEAN, 972 exp.DataType.Type.INT, 973 exp.DataType.Type.SMALLINT, 974 exp.DataType.Type.TINYINT, 975 exp.DataType.Type.MEDIUMINT, 976 ), 977 "SIGNED", 978 ) 979 980 # MySQL doesn't support many datatypes in cast. 981 # https://dev.mysql.com/doc/refman/8.0/en/cast-functions.html#function_cast 982 CAST_MAPPING = { 983 **CHAR_CAST_MAPPING, 984 **SIGNED_CAST_MAPPING, 985 exp.DataType.Type.UBIGINT: "UNSIGNED", 986 } 987 988 TIMESTAMP_FUNC_TYPES = { 989 exp.DataType.Type.TIMESTAMPTZ, 990 exp.DataType.Type.TIMESTAMPLTZ, 991 } 992 993 # https://dev.mysql.com/doc/refman/8.0/en/keywords.html 994 RESERVED_KEYWORDS = { 995 "accessible", 996 "add", 997 "all", 998 "alter", 999 "analyze", 1000 "and", 1001 "as", 1002 "asc", 1003 "asensitive", 1004 "before", 1005 "between", 1006 "bigint", 1007 "binary", 1008 "blob", 1009 "both", 1010 "by", 1011 "call", 1012 "cascade", 1013 "case", 1014 "change", 1015 "char", 1016 "character", 1017 "check", 1018 "collate", 1019 "column", 1020 "condition", 1021 "constraint", 1022 "continue", 1023 "convert", 1024 "create", 1025 "cross", 1026 "cube", 1027 "cume_dist", 1028 "current_date", 1029 "current_time", 1030 "current_timestamp", 1031 "current_user", 1032 "cursor", 1033 "database", 1034 "databases", 1035 "day_hour", 1036 "day_microsecond", 1037 "day_minute", 1038 "day_second", 1039 "dec", 1040 "decimal", 1041 "declare", 1042 "default", 1043 "delayed", 1044 "delete", 1045 "dense_rank", 1046 "desc", 1047 "describe", 1048 "deterministic", 1049 "distinct", 1050 "distinctrow", 1051 "div", 1052 "double", 1053 "drop", 1054 "dual", 1055 "each", 1056 "else", 1057 "elseif", 1058 "empty", 1059 "enclosed", 1060 "escaped", 1061 "except", 1062 "exists", 1063 "exit", 1064 "explain", 1065 "false", 1066 "fetch", 1067 "first_value", 1068 "float", 1069 "float4", 1070 "float8", 1071 "for", 1072 "force", 1073 "foreign", 1074 "from", 1075 "fulltext", 1076 "function", 1077 "generated", 1078 "get", 1079 "grant", 1080 "group", 1081 "grouping", 1082 "groups", 1083 "having", 1084 "high_priority", 1085 "hour_microsecond", 1086 "hour_minute", 1087 "hour_second", 1088 "if", 1089 "ignore", 1090 "in", 1091 "index", 1092 "infile", 1093 "inner", 1094 "inout", 1095 "insensitive", 1096 "insert", 1097 "int", 1098 "int1", 1099 "int2", 1100 "int3", 1101 "int4", 1102 "int8", 1103 "integer", 1104 "intersect", 1105 "interval", 1106 "into", 1107 "io_after_gtids", 1108 "io_before_gtids", 1109 "is", 1110 "iterate", 1111 "join", 1112 "json_table", 1113 "key", 1114 "keys", 1115 "kill", 1116 "lag", 1117 "last_value", 1118 "lateral", 1119 "lead", 1120 "leading", 1121 "leave", 1122 "left", 1123 "like", 1124 "limit", 1125 "linear", 1126 "lines", 1127 "load", 1128 "localtime", 1129 "localtimestamp", 1130 "lock", 1131 "long", 1132 "longblob", 1133 "longtext", 1134 "loop", 1135 "low_priority", 1136 "master_bind", 1137 "master_ssl_verify_server_cert", 1138 "match", 1139 "maxvalue", 1140 "mediumblob", 1141 "mediumint", 1142 "mediumtext", 1143 "middleint", 1144 "minute_microsecond", 1145 "minute_second", 1146 "mod", 1147 "modifies", 1148 "natural", 1149 "not", 1150 "no_write_to_binlog", 1151 "nth_value", 1152 "ntile", 1153 "null", 1154 "numeric", 1155 "of", 1156 "on", 1157 "optimize", 1158 "optimizer_costs", 1159 "option", 1160 "optionally", 1161 "or", 1162 "order", 1163 "out", 1164 "outer", 1165 "outfile", 1166 "over", 1167 "partition", 1168 "percent_rank", 1169 "precision", 1170 "primary", 1171 "procedure", 1172 "purge", 1173 "range", 1174 "rank", 1175 "read", 1176 "reads", 1177 "read_write", 1178 "real", 1179 "recursive", 1180 "references", 1181 "regexp", 1182 "release", 1183 "rename", 1184 "repeat", 1185 "replace", 1186 "require", 1187 "resignal", 1188 "restrict", 1189 "return", 1190 "revoke", 1191 "right", 1192 "rlike", 1193 "row", 1194 "rows", 1195 "row_number", 1196 "schema", 1197 "schemas", 1198 "second_microsecond", 1199 "select", 1200 "sensitive", 1201 "separator", 1202 "set", 1203 "show", 1204 "signal", 1205 "smallint", 1206 "spatial", 1207 "specific", 1208 "sql", 1209 "sqlexception", 1210 "sqlstate", 1211 "sqlwarning", 1212 "sql_big_result", 1213 "sql_calc_found_rows", 1214 "sql_small_result", 1215 "ssl", 1216 "starting", 1217 "stored", 1218 "straight_join", 1219 "system", 1220 "table", 1221 "terminated", 1222 "then", 1223 "tinyblob", 1224 "tinyint", 1225 "tinytext", 1226 "to", 1227 "trailing", 1228 "trigger", 1229 "true", 1230 "undo", 1231 "union", 1232 "unique", 1233 "unlock", 1234 "unsigned", 1235 "update", 1236 "usage", 1237 "use", 1238 "using", 1239 "utc_date", 1240 "utc_time", 1241 "utc_timestamp", 1242 "values", 1243 "varbinary", 1244 "varchar", 1245 "varcharacter", 1246 "varying", 1247 "virtual", 1248 "when", 1249 "where", 1250 "while", 1251 "window", 1252 "with", 1253 "write", 1254 "xor", 1255 "year_month", 1256 "zerofill", 1257 } 1258 1259 def computedcolumnconstraint_sql(self, expression: exp.ComputedColumnConstraint) -> str: 1260 persisted = "STORED" if expression.args.get("persisted") else "VIRTUAL" 1261 return f"GENERATED ALWAYS AS ({self.sql(expression.this.unnest())}) {persisted}" 1262 1263 def array_sql(self, expression: exp.Array) -> str: 1264 self.unsupported("Arrays are not supported by MySQL") 1265 return self.function_fallback_sql(expression) 1266 1267 def arraycontainsall_sql(self, expression: exp.ArrayContainsAll) -> str: 1268 self.unsupported("Array operations are not supported by MySQL") 1269 return self.function_fallback_sql(expression) 1270 1271 def dpipe_sql(self, expression: exp.DPipe) -> str: 1272 return self.func("CONCAT", *expression.flatten()) 1273 1274 def extract_sql(self, expression: exp.Extract) -> str: 1275 unit = expression.name 1276 if unit and unit.lower() == "epoch": 1277 return self.func("UNIX_TIMESTAMP", expression.expression) 1278 1279 return super().extract_sql(expression) 1280 1281 def datatype_sql(self, expression: exp.DataType) -> str: 1282 if ( 1283 self.VARCHAR_REQUIRES_SIZE 1284 and expression.is_type(exp.DataType.Type.VARCHAR) 1285 and not expression.expressions 1286 ): 1287 # `VARCHAR` must always have a size - if it doesn't, we always generate `TEXT` 1288 return "TEXT" 1289 1290 # https://dev.mysql.com/doc/refman/8.0/en/numeric-type-syntax.html 1291 result = super().datatype_sql(expression) 1292 if expression.this in self.UNSIGNED_TYPE_MAPPING: 1293 result = f"{result} UNSIGNED" 1294 1295 return result 1296 1297 def jsonarraycontains_sql(self, expression: exp.JSONArrayContains) -> str: 1298 return f"{self.sql(expression, 'this')} MEMBER OF({self.sql(expression, 'expression')})" 1299 1300 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1301 if expression.to.this in self.TIMESTAMP_FUNC_TYPES: 1302 return self.func("TIMESTAMP", expression.this) 1303 1304 to = self.CAST_MAPPING.get(expression.to.this) 1305 1306 if to: 1307 expression.to.set("this", to) 1308 return super().cast_sql(expression) 1309 1310 def show_sql(self, expression: exp.Show) -> str: 1311 this = f" {expression.name}" 1312 full = " FULL" if expression.args.get("full") else "" 1313 global_ = " GLOBAL" if expression.args.get("global_") else "" 1314 1315 target = self.sql(expression, "target") 1316 target = f" {target}" if target else "" 1317 if expression.name in ("COLUMNS", "INDEX"): 1318 target = f" FROM{target}" 1319 elif expression.name == "GRANTS": 1320 target = f" FOR{target}" 1321 elif expression.name in ("LINKS", "PARTITIONS"): 1322 target = f" ON{target}" if target else "" 1323 elif expression.name == "PROJECTIONS": 1324 target = f" ON TABLE{target}" if target else "" 1325 1326 db = self._prefixed_sql("FROM", expression, "db") 1327 1328 like = self._prefixed_sql("LIKE", expression, "like") 1329 where = self.sql(expression, "where") 1330 1331 types = self.expressions(expression, key="types") 1332 types = f" {types}" if types else types 1333 query = self._prefixed_sql("FOR QUERY", expression, "query") 1334 1335 if expression.name == "PROFILE": 1336 offset = self._prefixed_sql("OFFSET", expression, "offset") 1337 limit = self._prefixed_sql("LIMIT", expression, "limit") 1338 else: 1339 offset = "" 1340 limit = self._oldstyle_limit_sql(expression) 1341 1342 log = self._prefixed_sql("IN", expression, "log") 1343 position = self._prefixed_sql("FROM", expression, "position") 1344 1345 channel = self._prefixed_sql("FOR CHANNEL", expression, "channel") 1346 1347 if expression.name == "ENGINE": 1348 mutex_or_status = " MUTEX" if expression.args.get("mutex") else " STATUS" 1349 else: 1350 mutex_or_status = "" 1351 1352 for_table = self._prefixed_sql("FOR TABLE", expression, "for_table") 1353 for_group = self._prefixed_sql("FOR GROUP", expression, "for_group") 1354 for_user = self._prefixed_sql("FOR USER", expression, "for_user") 1355 for_role = self._prefixed_sql("FOR ROLE", expression, "for_role") 1356 into_outfile = self._prefixed_sql("INTO OUTFILE", expression, "into_outfile") 1357 json = " JSON" if expression.args.get("json") else "" 1358 1359 return f"SHOW{full}{global_}{this}{json}{target}{for_table}{types}{db}{query}{log}{position}{channel}{mutex_or_status}{like}{where}{offset}{limit}{for_group}{for_user}{for_role}{into_outfile}" 1360 1361 def alterrename_sql(self, expression: exp.AlterRename, include_to: bool = True) -> str: 1362 """To avoid TO keyword in ALTER ... RENAME statements. 1363 It's moved from Doris, because it's the same for all MySQL, Doris, and StarRocks. 1364 """ 1365 return super().alterrename_sql(expression, include_to=False) 1366 1367 def altercolumn_sql(self, expression: exp.AlterColumn) -> str: 1368 dtype = self.sql(expression, "dtype") 1369 if not dtype: 1370 return super().altercolumn_sql(expression) 1371 1372 this = self.sql(expression, "this") 1373 return f"MODIFY COLUMN {this} {dtype}" 1374 1375 def _prefixed_sql(self, prefix: str, expression: exp.Expression, arg: str) -> str: 1376 sql = self.sql(expression, arg) 1377 return f" {prefix} {sql}" if sql else "" 1378 1379 def _oldstyle_limit_sql(self, expression: exp.Show) -> str: 1380 limit = self.sql(expression, "limit") 1381 offset = self.sql(expression, "offset") 1382 if limit: 1383 limit_offset = f"{offset}, {limit}" if offset else limit 1384 return f" LIMIT {limit_offset}" 1385 return "" 1386 1387 def timestamptrunc_sql(self, expression: exp.TimestampTrunc) -> str: 1388 unit = expression.args.get("unit") 1389 1390 # Pick an old-enough date to avoid negative timestamp diffs 1391 start_ts = "'0000-01-01 00:00:00'" 1392 1393 # Source: https://stackoverflow.com/a/32955740 1394 timestamp_diff = build_date_delta(exp.TimestampDiff)([unit, start_ts, expression.this]) 1395 interval = exp.Interval(this=timestamp_diff, unit=unit) 1396 dateadd = build_date_delta_with_interval(exp.DateAdd)([start_ts, interval]) 1397 1398 return self.sql(dateadd) 1399 1400 def converttimezone_sql(self, expression: exp.ConvertTimezone) -> str: 1401 from_tz = expression.args.get("source_tz") 1402 to_tz = expression.args.get("target_tz") 1403 dt = expression.args.get("timestamp") 1404 1405 return self.func("CONVERT_TZ", dt, from_tz, to_tz) 1406 1407 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1408 self.unsupported("AT TIME ZONE is not supported by MySQL") 1409 return self.sql(expression.this) 1410 1411 def isascii_sql(self, expression: exp.IsAscii) -> str: 1412 return f"REGEXP_LIKE({self.sql(expression.this)}, '^[[:ascii:]]*$')" 1413 1414 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1415 # https://dev.mysql.com/doc/refman/8.4/en/window-function-descriptions.html 1416 self.unsupported("MySQL does not support IGNORE NULLS.") 1417 return self.sql(expression.this) 1418 1419 @unsupported_args("this") 1420 def currentschema_sql(self, expression: exp.CurrentSchema) -> str: 1421 return self.func("SCHEMA") 1422 1423 def partition_sql(self, expression: exp.Partition) -> str: 1424 parent = expression.parent 1425 if isinstance(parent, (exp.PartitionByRangeProperty, exp.PartitionByListProperty)): 1426 return self.expressions(expression, flat=True) 1427 return super().partition_sql(expression) 1428 1429 def _partition_by_sql( 1430 self, expression: exp.PartitionByRangeProperty | exp.PartitionByListProperty, kind: str 1431 ) -> str: 1432 partitions = self.expressions(expression, key="partition_expressions", flat=True) 1433 create = self.expressions(expression, key="create_expressions", flat=True) 1434 return f"PARTITION BY {kind} ({partitions}) ({create})" 1435 1436 def partitionbyrangeproperty_sql(self, expression: exp.PartitionByRangeProperty) -> str: 1437 return self._partition_by_sql(expression, "RANGE") 1438 1439 def partitionbylistproperty_sql(self, expression: exp.PartitionByListProperty) -> str: 1440 return self._partition_by_sql(expression, "LIST") 1441 1442 def partitionlist_sql(self, expression: exp.PartitionList) -> str: 1443 name = self.sql(expression, "this") 1444 values = self.expressions(expression, flat=True) 1445 return f"PARTITION {name} VALUES IN ({values})" 1446 1447 def partitionrange_sql(self, expression: exp.PartitionRange) -> str: 1448 name = self.sql(expression, "this") 1449 values = self.expressions(expression, flat=True) 1450 return f"PARTITION {name} VALUES LESS THAN ({values})"
117def date_add_sql( 118 kind: str, 119) -> t.Callable[[generator.Generator, exp.Expression], str]: 120 def func(self: generator.Generator, expression: exp.Expression) -> str: 121 return self.func( 122 f"DATE_{kind}", 123 expression.this, 124 exp.Interval(this=expression.expression, unit=unit_to_var(expression)), 125 ) 126 127 return func
150class MySQL(Dialect): 151 PROMOTE_TO_INFERRED_DATETIME_TYPE = True 152 153 # https://dev.mysql.com/doc/refman/8.0/en/identifiers.html 154 IDENTIFIERS_CAN_START_WITH_DIGIT = True 155 156 # We default to treating all identifiers as case-sensitive, since it matches MySQL's 157 # behavior on Linux systems. For MacOS and Windows systems, one can override this 158 # setting by specifying `dialect="mysql, normalization_strategy = lowercase"`. 159 # 160 # See also https://dev.mysql.com/doc/refman/8.2/en/identifier-case-sensitivity.html 161 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 162 163 TIME_FORMAT = "'%Y-%m-%d %T'" 164 DPIPE_IS_STRING_CONCAT = False 165 SUPPORTS_USER_DEFINED_TYPES = False 166 SUPPORTS_SEMI_ANTI_JOIN = False 167 SAFE_DIVISION = True 168 SAFE_TO_ELIMINATE_DOUBLE_NEGATION = False 169 LEAST_GREATEST_IGNORES_NULLS = False 170 171 EXPRESSION_METADATA = EXPRESSION_METADATA.copy() 172 173 # https://prestodb.io/docs/current/functions/datetime.html#mysql-date-functions 174 TIME_MAPPING = { 175 "%M": "%B", 176 "%c": "%-m", 177 "%e": "%-d", 178 "%h": "%I", 179 "%i": "%M", 180 "%s": "%S", 181 "%u": "%W", 182 "%k": "%-H", 183 "%l": "%-I", 184 "%T": "%H:%M:%S", 185 "%W": "%A", 186 } 187 188 VALID_INTERVAL_UNITS = { 189 *Dialect.VALID_INTERVAL_UNITS, 190 "SECOND_MICROSECOND", 191 "MINUTE_MICROSECOND", 192 "MINUTE_SECOND", 193 "HOUR_MICROSECOND", 194 "HOUR_SECOND", 195 "HOUR_MINUTE", 196 "DAY_MICROSECOND", 197 "DAY_SECOND", 198 "DAY_MINUTE", 199 "DAY_HOUR", 200 "YEAR_MONTH", 201 } 202 203 class Tokenizer(tokens.Tokenizer): 204 QUOTES = ["'", '"'] 205 COMMENTS = ["--", "#", ("/*", "*/")] 206 IDENTIFIERS = ["`"] 207 STRING_ESCAPES = ["'", '"', "\\"] 208 BIT_STRINGS = [("b'", "'"), ("B'", "'"), ("0b", "")] 209 HEX_STRINGS = [("x'", "'"), ("X'", "'"), ("0x", "")] 210 # https://dev.mysql.com/doc/refman/8.4/en/string-literals.html 211 ESCAPE_FOLLOW_CHARS = ["0", "b", "n", "r", "t", "Z", "%", "_"] 212 213 NESTED_COMMENTS = False 214 215 KEYWORDS = { 216 **tokens.Tokenizer.KEYWORDS, 217 "BLOB": TokenType.BLOB, 218 "CHARSET": TokenType.CHARACTER_SET, 219 "DISTINCTROW": TokenType.DISTINCT, 220 "EXPLAIN": TokenType.DESCRIBE, 221 "FORCE": TokenType.FORCE, 222 "IGNORE": TokenType.IGNORE, 223 "KEY": TokenType.KEY, 224 "LOCK TABLES": TokenType.COMMAND, 225 "LONGBLOB": TokenType.LONGBLOB, 226 "LONGTEXT": TokenType.LONGTEXT, 227 "MEDIUMBLOB": TokenType.MEDIUMBLOB, 228 "MEDIUMINT": TokenType.MEDIUMINT, 229 "MEDIUMTEXT": TokenType.MEDIUMTEXT, 230 "MEMBER OF": TokenType.MEMBER_OF, 231 "MOD": TokenType.MOD, 232 "SEPARATOR": TokenType.SEPARATOR, 233 "SERIAL": TokenType.SERIAL, 234 "SIGNED": TokenType.BIGINT, 235 "SIGNED INTEGER": TokenType.BIGINT, 236 "SOUNDS LIKE": TokenType.SOUNDS_LIKE, 237 "START": TokenType.BEGIN, 238 "TIMESTAMP": TokenType.TIMESTAMPTZ, 239 "TINYBLOB": TokenType.TINYBLOB, 240 "TINYTEXT": TokenType.TINYTEXT, 241 "UNLOCK TABLES": TokenType.COMMAND, 242 "UNSIGNED": TokenType.UBIGINT, 243 "UNSIGNED INTEGER": TokenType.UBIGINT, 244 "YEAR": TokenType.YEAR, 245 "_ARMSCII8": TokenType.INTRODUCER, 246 "_ASCII": TokenType.INTRODUCER, 247 "_BIG5": TokenType.INTRODUCER, 248 "_BINARY": TokenType.INTRODUCER, 249 "_CP1250": TokenType.INTRODUCER, 250 "_CP1251": TokenType.INTRODUCER, 251 "_CP1256": TokenType.INTRODUCER, 252 "_CP1257": TokenType.INTRODUCER, 253 "_CP850": TokenType.INTRODUCER, 254 "_CP852": TokenType.INTRODUCER, 255 "_CP866": TokenType.INTRODUCER, 256 "_CP932": TokenType.INTRODUCER, 257 "_DEC8": TokenType.INTRODUCER, 258 "_EUCJPMS": TokenType.INTRODUCER, 259 "_EUCKR": TokenType.INTRODUCER, 260 "_GB18030": TokenType.INTRODUCER, 261 "_GB2312": TokenType.INTRODUCER, 262 "_GBK": TokenType.INTRODUCER, 263 "_GEOSTD8": TokenType.INTRODUCER, 264 "_GREEK": TokenType.INTRODUCER, 265 "_HEBREW": TokenType.INTRODUCER, 266 "_HP8": TokenType.INTRODUCER, 267 "_KEYBCS2": TokenType.INTRODUCER, 268 "_KOI8R": TokenType.INTRODUCER, 269 "_KOI8U": TokenType.INTRODUCER, 270 "_LATIN1": TokenType.INTRODUCER, 271 "_LATIN2": TokenType.INTRODUCER, 272 "_LATIN5": TokenType.INTRODUCER, 273 "_LATIN7": TokenType.INTRODUCER, 274 "_MACCE": TokenType.INTRODUCER, 275 "_MACROMAN": TokenType.INTRODUCER, 276 "_SJIS": TokenType.INTRODUCER, 277 "_SWE7": TokenType.INTRODUCER, 278 "_TIS620": TokenType.INTRODUCER, 279 "_UCS2": TokenType.INTRODUCER, 280 "_UJIS": TokenType.INTRODUCER, 281 # https://dev.mysql.com/doc/refman/8.0/en/string-literals.html 282 "_UTF8": TokenType.INTRODUCER, 283 "_UTF16": TokenType.INTRODUCER, 284 "_UTF16LE": TokenType.INTRODUCER, 285 "_UTF32": TokenType.INTRODUCER, 286 "_UTF8MB3": TokenType.INTRODUCER, 287 "_UTF8MB4": TokenType.INTRODUCER, 288 "@@": TokenType.SESSION_PARAMETER, 289 } 290 291 COMMANDS = {*tokens.Tokenizer.COMMANDS, TokenType.REPLACE} - {TokenType.SHOW} 292 293 class Parser(parser.Parser): 294 FUNC_TOKENS = { 295 *parser.Parser.FUNC_TOKENS, 296 TokenType.DATABASE, 297 TokenType.MOD, 298 TokenType.SCHEMA, 299 TokenType.VALUES, 300 TokenType.CHARACTER_SET, 301 } 302 303 CONJUNCTION = { 304 **parser.Parser.CONJUNCTION, 305 TokenType.DAMP: exp.And, 306 TokenType.XOR: exp.Xor, 307 } 308 309 DISJUNCTION = { 310 **parser.Parser.DISJUNCTION, 311 TokenType.DPIPE: exp.Or, 312 } 313 314 TABLE_ALIAS_TOKENS = ( 315 parser.Parser.TABLE_ALIAS_TOKENS - parser.Parser.TABLE_INDEX_HINT_TOKENS 316 ) 317 318 RANGE_PARSERS = { 319 **parser.Parser.RANGE_PARSERS, 320 TokenType.SOUNDS_LIKE: lambda self, this: self.expression( 321 exp.EQ, 322 this=self.expression(exp.Soundex, this=this), 323 expression=self.expression(exp.Soundex, this=self._parse_term()), 324 ), 325 TokenType.MEMBER_OF: lambda self, this: self.expression( 326 exp.JSONArrayContains, 327 this=this, 328 expression=self._parse_wrapped(self._parse_expression), 329 ), 330 } 331 332 FUNCTIONS = { 333 **parser.Parser.FUNCTIONS, 334 "BIT_AND": exp.BitwiseAndAgg.from_arg_list, 335 "BIT_OR": exp.BitwiseOrAgg.from_arg_list, 336 "BIT_XOR": exp.BitwiseXorAgg.from_arg_list, 337 "BIT_COUNT": exp.BitwiseCount.from_arg_list, 338 "CONVERT_TZ": lambda args: exp.ConvertTimezone( 339 source_tz=seq_get(args, 1), target_tz=seq_get(args, 2), timestamp=seq_get(args, 0) 340 ), 341 "CURDATE": exp.CurrentDate.from_arg_list, 342 "CURTIME": exp.CurrentTime.from_arg_list, 343 "DATE": lambda args: exp.TsOrDsToDate(this=seq_get(args, 0)), 344 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 345 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "mysql"), 346 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 347 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 348 "DAYOFMONTH": lambda args: exp.DayOfMonth(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 349 "DAYOFWEEK": lambda args: exp.DayOfWeek(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 350 "DAYOFYEAR": lambda args: exp.DayOfYear(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 351 "FORMAT": exp.NumberToStr.from_arg_list, 352 "FROM_UNIXTIME": build_formatted_time(exp.UnixToTime, "mysql"), 353 "ISNULL": isnull_to_is_null, 354 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 355 "MAKETIME": exp.TimeFromParts.from_arg_list, 356 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 357 "MONTHNAME": lambda args: exp.TimeToStr( 358 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 359 format=exp.Literal.string("%B"), 360 ), 361 "SCHEMA": exp.CurrentSchema.from_arg_list, 362 "DATABASE": exp.CurrentSchema.from_arg_list, 363 "STR_TO_DATE": _str_to_date, 364 "TIMESTAMPDIFF": build_date_delta(exp.TimestampDiff), 365 "TO_DAYS": lambda args: exp.paren( 366 exp.DateDiff( 367 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 368 expression=exp.TsOrDsToDate(this=exp.Literal.string("0000-01-01")), 369 unit=exp.var("DAY"), 370 ) 371 + 1 372 ), 373 "VERSION": exp.CurrentVersion.from_arg_list, 374 "WEEK": lambda args: exp.Week( 375 this=exp.TsOrDsToDate(this=seq_get(args, 0)), mode=seq_get(args, 1) 376 ), 377 "WEEKOFYEAR": lambda args: exp.WeekOfYear(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 378 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 379 } 380 381 FUNCTION_PARSERS = { 382 **parser.Parser.FUNCTION_PARSERS, 383 "GROUP_CONCAT": lambda self: self._parse_group_concat(), 384 # https://dev.mysql.com/doc/refman/5.7/en/miscellaneous-functions.html#function_values 385 "VALUES": lambda self: self.expression( 386 exp.Anonymous, this="VALUES", expressions=[self._parse_id_var()] 387 ), 388 "JSON_VALUE": lambda self: self._parse_json_value(), 389 "SUBSTR": lambda self: self._parse_substring(), 390 } 391 392 STATEMENT_PARSERS = { 393 **parser.Parser.STATEMENT_PARSERS, 394 TokenType.SHOW: lambda self: self._parse_show(), 395 } 396 397 SHOW_PARSERS = { 398 "BINARY LOGS": _show_parser("BINARY LOGS"), 399 "MASTER LOGS": _show_parser("BINARY LOGS"), 400 "BINLOG EVENTS": _show_parser("BINLOG EVENTS"), 401 "CHARACTER SET": _show_parser("CHARACTER SET"), 402 "CHARSET": _show_parser("CHARACTER SET"), 403 "COLLATION": _show_parser("COLLATION"), 404 "FULL COLUMNS": _show_parser("COLUMNS", target="FROM", full=True), 405 "COLUMNS": _show_parser("COLUMNS", target="FROM"), 406 "CREATE DATABASE": _show_parser("CREATE DATABASE", target=True), 407 "CREATE EVENT": _show_parser("CREATE EVENT", target=True), 408 "CREATE FUNCTION": _show_parser("CREATE FUNCTION", target=True), 409 "CREATE PROCEDURE": _show_parser("CREATE PROCEDURE", target=True), 410 "CREATE TABLE": _show_parser("CREATE TABLE", target=True), 411 "CREATE TRIGGER": _show_parser("CREATE TRIGGER", target=True), 412 "CREATE VIEW": _show_parser("CREATE VIEW", target=True), 413 "DATABASES": _show_parser("DATABASES"), 414 "SCHEMAS": _show_parser("DATABASES"), 415 "ENGINE": _show_parser("ENGINE", target=True), 416 "STORAGE ENGINES": _show_parser("ENGINES"), 417 "ENGINES": _show_parser("ENGINES"), 418 "ERRORS": _show_parser("ERRORS"), 419 "EVENTS": _show_parser("EVENTS"), 420 "FUNCTION CODE": _show_parser("FUNCTION CODE", target=True), 421 "FUNCTION STATUS": _show_parser("FUNCTION STATUS"), 422 "GRANTS": _show_parser("GRANTS", target="FOR"), 423 "INDEX": _show_parser("INDEX", target="FROM"), 424 "MASTER STATUS": _show_parser("MASTER STATUS"), 425 "OPEN TABLES": _show_parser("OPEN TABLES"), 426 "PLUGINS": _show_parser("PLUGINS"), 427 "PROCEDURE CODE": _show_parser("PROCEDURE CODE", target=True), 428 "PROCEDURE STATUS": _show_parser("PROCEDURE STATUS"), 429 "PRIVILEGES": _show_parser("PRIVILEGES"), 430 "FULL PROCESSLIST": _show_parser("PROCESSLIST", full=True), 431 "PROCESSLIST": _show_parser("PROCESSLIST"), 432 "PROFILE": _show_parser("PROFILE"), 433 "PROFILES": _show_parser("PROFILES"), 434 "RELAYLOG EVENTS": _show_parser("RELAYLOG EVENTS"), 435 "REPLICAS": _show_parser("REPLICAS"), 436 "SLAVE HOSTS": _show_parser("REPLICAS"), 437 "REPLICA STATUS": _show_parser("REPLICA STATUS"), 438 "SLAVE STATUS": _show_parser("REPLICA STATUS"), 439 "GLOBAL STATUS": _show_parser("STATUS", global_=True), 440 "SESSION STATUS": _show_parser("STATUS"), 441 "STATUS": _show_parser("STATUS"), 442 "TABLE STATUS": _show_parser("TABLE STATUS"), 443 "FULL TABLES": _show_parser("TABLES", full=True), 444 "TABLES": _show_parser("TABLES"), 445 "TRIGGERS": _show_parser("TRIGGERS"), 446 "GLOBAL VARIABLES": _show_parser("VARIABLES", global_=True), 447 "SESSION VARIABLES": _show_parser("VARIABLES"), 448 "VARIABLES": _show_parser("VARIABLES"), 449 "WARNINGS": _show_parser("WARNINGS"), 450 } 451 452 PROPERTY_PARSERS = { 453 **parser.Parser.PROPERTY_PARSERS, 454 "LOCK": lambda self: self._parse_property_assignment(exp.LockProperty), 455 "PARTITION BY": lambda self: self._parse_partition_property(), 456 } 457 458 SET_PARSERS = { 459 **parser.Parser.SET_PARSERS, 460 "PERSIST": lambda self: self._parse_set_item_assignment("PERSIST"), 461 "PERSIST_ONLY": lambda self: self._parse_set_item_assignment("PERSIST_ONLY"), 462 "CHARACTER SET": lambda self: self._parse_set_item_charset("CHARACTER SET"), 463 "CHARSET": lambda self: self._parse_set_item_charset("CHARACTER SET"), 464 "NAMES": lambda self: self._parse_set_item_names(), 465 } 466 467 CONSTRAINT_PARSERS = { 468 **parser.Parser.CONSTRAINT_PARSERS, 469 "FULLTEXT": lambda self: self._parse_index_constraint(kind="FULLTEXT"), 470 "INDEX": lambda self: self._parse_index_constraint(), 471 "KEY": lambda self: self._parse_index_constraint(), 472 "SPATIAL": lambda self: self._parse_index_constraint(kind="SPATIAL"), 473 "ZEROFILL": lambda self: self.expression(exp.ZeroFillColumnConstraint), 474 } 475 476 ALTER_PARSERS = { 477 **parser.Parser.ALTER_PARSERS, 478 "MODIFY": lambda self: self._parse_alter_table_alter(), 479 } 480 481 ALTER_ALTER_PARSERS = { 482 **parser.Parser.ALTER_ALTER_PARSERS, 483 "INDEX": lambda self: self._parse_alter_table_alter_index(), 484 } 485 486 SCHEMA_UNNAMED_CONSTRAINTS = { 487 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 488 "FULLTEXT", 489 "INDEX", 490 "KEY", 491 "SPATIAL", 492 } 493 494 PROFILE_TYPES: parser.OPTIONS_TYPE = { 495 **dict.fromkeys(("ALL", "CPU", "IPC", "MEMORY", "SOURCE", "SWAPS"), tuple()), 496 "BLOCK": ("IO",), 497 "CONTEXT": ("SWITCHES",), 498 "PAGE": ("FAULTS",), 499 } 500 501 TYPE_TOKENS = { 502 *parser.Parser.TYPE_TOKENS, 503 TokenType.SET, 504 } 505 506 ENUM_TYPE_TOKENS = { 507 *parser.Parser.ENUM_TYPE_TOKENS, 508 TokenType.SET, 509 } 510 511 # SELECT [ ALL | DISTINCT | DISTINCTROW ] [ <OPERATION_MODIFIERS> ] 512 OPERATION_MODIFIERS = { 513 "HIGH_PRIORITY", 514 "STRAIGHT_JOIN", 515 "SQL_SMALL_RESULT", 516 "SQL_BIG_RESULT", 517 "SQL_BUFFER_RESULT", 518 "SQL_NO_CACHE", 519 "SQL_CALC_FOUND_ROWS", 520 } 521 522 LOG_DEFAULTS_TO_LN = True 523 STRING_ALIASES = True 524 VALUES_FOLLOWED_BY_PAREN = False 525 SUPPORTS_PARTITION_SELECTION = True 526 527 def _parse_generated_as_identity( 528 self, 529 ) -> ( 530 exp.GeneratedAsIdentityColumnConstraint 531 | exp.ComputedColumnConstraint 532 | exp.GeneratedAsRowColumnConstraint 533 ): 534 this = super()._parse_generated_as_identity() 535 536 if self._match_texts(("STORED", "VIRTUAL")): 537 persisted = self._prev.text.upper() == "STORED" 538 539 if isinstance(this, exp.ComputedColumnConstraint): 540 this.set("persisted", persisted) 541 elif isinstance(this, exp.GeneratedAsIdentityColumnConstraint): 542 this = self.expression( 543 exp.ComputedColumnConstraint, this=this.expression, persisted=persisted 544 ) 545 546 return this 547 548 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 549 this = self._parse_id_var() 550 if not self._match(TokenType.L_PAREN): 551 return this 552 553 expression = self._parse_number() 554 self._match_r_paren() 555 return self.expression(exp.ColumnPrefix, this=this, expression=expression) 556 557 def _parse_index_constraint( 558 self, kind: t.Optional[str] = None 559 ) -> exp.IndexColumnConstraint: 560 if kind: 561 self._match_texts(("INDEX", "KEY")) 562 563 this = self._parse_id_var(any_token=False) 564 index_type = self._match(TokenType.USING) and self._advance_any() and self._prev.text 565 expressions = self._parse_wrapped_csv(self._parse_ordered) 566 567 options = [] 568 while True: 569 if self._match_text_seq("KEY_BLOCK_SIZE"): 570 self._match(TokenType.EQ) 571 opt = exp.IndexConstraintOption(key_block_size=self._parse_number()) 572 elif self._match(TokenType.USING): 573 opt = exp.IndexConstraintOption(using=self._advance_any() and self._prev.text) 574 elif self._match_text_seq("WITH", "PARSER"): 575 opt = exp.IndexConstraintOption(parser=self._parse_var(any_token=True)) 576 elif self._match(TokenType.COMMENT): 577 opt = exp.IndexConstraintOption(comment=self._parse_string()) 578 elif self._match_text_seq("VISIBLE"): 579 opt = exp.IndexConstraintOption(visible=True) 580 elif self._match_text_seq("INVISIBLE"): 581 opt = exp.IndexConstraintOption(visible=False) 582 elif self._match_text_seq("ENGINE_ATTRIBUTE"): 583 self._match(TokenType.EQ) 584 opt = exp.IndexConstraintOption(engine_attr=self._parse_string()) 585 elif self._match_text_seq("SECONDARY_ENGINE_ATTRIBUTE"): 586 self._match(TokenType.EQ) 587 opt = exp.IndexConstraintOption(secondary_engine_attr=self._parse_string()) 588 else: 589 opt = None 590 591 if not opt: 592 break 593 594 options.append(opt) 595 596 return self.expression( 597 exp.IndexColumnConstraint, 598 this=this, 599 expressions=expressions, 600 kind=kind, 601 index_type=index_type, 602 options=options, 603 ) 604 605 def _parse_show_mysql( 606 self, 607 this: str, 608 target: bool | str = False, 609 full: t.Optional[bool] = None, 610 global_: t.Optional[bool] = None, 611 ) -> exp.Show: 612 json = self._match_text_seq("JSON") 613 614 if target: 615 if isinstance(target, str): 616 self._match_text_seq(*target.split(" ")) 617 target_id = self._parse_id_var() 618 else: 619 target_id = None 620 621 log = self._parse_string() if self._match_text_seq("IN") else None 622 623 if this in ("BINLOG EVENTS", "RELAYLOG EVENTS"): 624 position = self._parse_number() if self._match_text_seq("FROM") else None 625 db = None 626 else: 627 position = None 628 db = None 629 630 if self._match(TokenType.FROM): 631 db = self._parse_id_var() 632 elif self._match(TokenType.DOT): 633 db = target_id 634 target_id = self._parse_id_var() 635 636 channel = self._parse_id_var() if self._match_text_seq("FOR", "CHANNEL") else None 637 638 like = self._parse_string() if self._match_text_seq("LIKE") else None 639 where = self._parse_where() 640 641 if this == "PROFILE": 642 types = self._parse_csv(lambda: self._parse_var_from_options(self.PROFILE_TYPES)) 643 query = self._parse_number() if self._match_text_seq("FOR", "QUERY") else None 644 offset = self._parse_number() if self._match_text_seq("OFFSET") else None 645 limit = self._parse_number() if self._match_text_seq("LIMIT") else None 646 else: 647 types, query = None, None 648 offset, limit = self._parse_oldstyle_limit() 649 650 mutex = True if self._match_text_seq("MUTEX") else None 651 mutex = False if self._match_text_seq("STATUS") else mutex 652 653 for_table = self._parse_id_var() if self._match_text_seq("FOR", "TABLE") else None 654 for_group = self._parse_string() if self._match_text_seq("FOR", "GROUP") else None 655 for_user = self._parse_string() if self._match_text_seq("FOR", "USER") else None 656 for_role = self._parse_string() if self._match_text_seq("FOR", "ROLE") else None 657 into_outfile = self._parse_string() if self._match_text_seq("INTO", "OUTFILE") else None 658 659 return self.expression( 660 exp.Show, 661 this=this, 662 target=target_id, 663 full=full, 664 log=log, 665 position=position, 666 db=db, 667 channel=channel, 668 like=like, 669 where=where, 670 types=types, 671 query=query, 672 offset=offset, 673 limit=limit, 674 mutex=mutex, 675 for_table=for_table, 676 for_group=for_group, 677 for_user=for_user, 678 for_role=for_role, 679 into_outfile=into_outfile, 680 json=json, 681 global_=global_, 682 ) 683 684 def _parse_oldstyle_limit( 685 self, 686 ) -> t.Tuple[t.Optional[exp.Expression], t.Optional[exp.Expression]]: 687 limit = None 688 offset = None 689 if self._match_text_seq("LIMIT"): 690 parts = self._parse_csv(self._parse_number) 691 if len(parts) == 1: 692 limit = parts[0] 693 elif len(parts) == 2: 694 limit = parts[1] 695 offset = parts[0] 696 697 return offset, limit 698 699 def _parse_set_item_charset(self, kind: str) -> exp.Expression: 700 this = self._parse_string() or self._parse_unquoted_field() 701 return self.expression(exp.SetItem, this=this, kind=kind) 702 703 def _parse_set_item_names(self) -> exp.Expression: 704 charset = self._parse_string() or self._parse_unquoted_field() 705 if self._match_text_seq("COLLATE"): 706 collate = self._parse_string() or self._parse_unquoted_field() 707 else: 708 collate = None 709 710 return self.expression(exp.SetItem, this=charset, collate=collate, kind="NAMES") 711 712 def _parse_type( 713 self, parse_interval: bool = True, fallback_to_identifier: bool = False 714 ) -> t.Optional[exp.Expression]: 715 # mysql binary is special and can work anywhere, even in order by operations 716 # it operates like a no paren func 717 if self._match(TokenType.BINARY, advance=False): 718 data_type = self._parse_types(check_func=True, allow_identifiers=False) 719 720 if isinstance(data_type, exp.DataType): 721 return self.expression(exp.Cast, this=self._parse_column(), to=data_type) 722 723 return super()._parse_type( 724 parse_interval=parse_interval, fallback_to_identifier=fallback_to_identifier 725 ) 726 727 def _parse_alter_table_alter_index(self) -> exp.AlterIndex: 728 index = self._parse_field(any_token=True) 729 730 if self._match_text_seq("VISIBLE"): 731 visible = True 732 elif self._match_text_seq("INVISIBLE"): 733 visible = False 734 else: 735 visible = None 736 737 return self.expression(exp.AlterIndex, this=index, visible=visible) 738 739 def _parse_partition_property( 740 self, 741 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 742 partition_cls: t.Optional[t.Type[exp.Expression]] = None 743 value_parser = None 744 745 if self._match_text_seq("RANGE"): 746 partition_cls = exp.PartitionByRangeProperty 747 value_parser = self._parse_partition_range_value 748 elif self._match_text_seq("LIST"): 749 partition_cls = exp.PartitionByListProperty 750 value_parser = self._parse_partition_list_value 751 752 if not partition_cls or not value_parser: 753 return None 754 755 partition_expressions = self._parse_wrapped_csv(self._parse_assignment) 756 757 # For Doris and Starrocks 758 if not self._match_text_seq("(", "PARTITION", advance=False): 759 return partition_expressions 760 761 create_expressions = self._parse_wrapped_csv(value_parser) 762 763 return self.expression( 764 partition_cls, 765 partition_expressions=partition_expressions, 766 create_expressions=create_expressions, 767 ) 768 769 def _parse_partition_range_value(self) -> t.Optional[exp.Expression]: 770 self._match_text_seq("PARTITION") 771 name = self._parse_id_var() 772 773 if not self._match_text_seq("VALUES", "LESS", "THAN"): 774 return name 775 776 values = self._parse_wrapped_csv(self._parse_expression) 777 778 if ( 779 len(values) == 1 780 and isinstance(values[0], exp.Column) 781 and values[0].name.upper() == "MAXVALUE" 782 ): 783 values = [exp.var("MAXVALUE")] 784 785 part_range = self.expression(exp.PartitionRange, this=name, expressions=values) 786 return self.expression(exp.Partition, expressions=[part_range]) 787 788 def _parse_partition_list_value(self) -> exp.Partition: 789 self._match_text_seq("PARTITION") 790 name = self._parse_id_var() 791 self._match_text_seq("VALUES", "IN") 792 values = self._parse_wrapped_csv(self._parse_expression) 793 part_list = self.expression(exp.PartitionList, this=name, expressions=values) 794 return self.expression(exp.Partition, expressions=[part_list]) 795 796 def _parse_primary_key( 797 self, 798 wrapped_optional: bool = False, 799 in_props: bool = False, 800 named_primary_key: bool = False, 801 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 802 return super()._parse_primary_key( 803 wrapped_optional=wrapped_optional, in_props=in_props, named_primary_key=True 804 ) 805 806 class Generator(generator.Generator): 807 INTERVAL_ALLOWS_PLURAL_FORM = False 808 LOCKING_READS_SUPPORTED = True 809 NULL_ORDERING_SUPPORTED: t.Optional[bool] = None 810 JOIN_HINTS = False 811 TABLE_HINTS = True 812 DUPLICATE_KEY_UPDATE_WITH_SET = False 813 QUERY_HINT_SEP = " " 814 VALUES_AS_TABLE = False 815 NVL2_SUPPORTED = False 816 LAST_DAY_SUPPORTS_DATE_PART = False 817 JSON_TYPE_REQUIRED_FOR_EXTRACTION = True 818 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 819 JSON_KEY_VALUE_PAIR_SEP = "," 820 SUPPORTS_TO_NUMBER = False 821 PARSE_JSON_NAME: t.Optional[str] = None 822 PAD_FILL_PATTERN_IS_REQUIRED = True 823 WRAP_DERIVED_VALUES = False 824 VARCHAR_REQUIRES_SIZE = True 825 SUPPORTS_MEDIAN = False 826 UPDATE_STATEMENT_SUPPORTS_FROM = False 827 828 TRANSFORMS = { 829 **generator.Generator.TRANSFORMS, 830 exp.ArrayAgg: rename_func("GROUP_CONCAT"), 831 exp.BitwiseAndAgg: rename_func("BIT_AND"), 832 exp.BitwiseOrAgg: rename_func("BIT_OR"), 833 exp.BitwiseXorAgg: rename_func("BIT_XOR"), 834 exp.BitwiseCount: rename_func("BIT_COUNT"), 835 exp.Chr: lambda self, e: self.chr_sql(e, "CHAR"), 836 exp.CurrentDate: no_paren_current_date_sql, 837 exp.CurrentVersion: rename_func("VERSION"), 838 exp.DateDiff: _remove_ts_or_ds_to_date( 839 lambda self, e: self.func("DATEDIFF", e.this, e.expression), ("this", "expression") 840 ), 841 exp.DateAdd: _remove_ts_or_ds_to_date(date_add_sql("ADD")), 842 exp.DateStrToDate: datestrtodate_sql, 843 exp.DateSub: _remove_ts_or_ds_to_date(date_add_sql("SUB")), 844 exp.DateTrunc: _date_trunc_sql, 845 exp.Day: _remove_ts_or_ds_to_date(), 846 exp.DayOfMonth: _remove_ts_or_ds_to_date(rename_func("DAYOFMONTH")), 847 exp.DayOfWeek: _remove_ts_or_ds_to_date(rename_func("DAYOFWEEK")), 848 exp.DayOfYear: _remove_ts_or_ds_to_date(rename_func("DAYOFYEAR")), 849 exp.GroupConcat: lambda self, 850 e: f"""GROUP_CONCAT({self.sql(e, "this")} SEPARATOR {self.sql(e, "separator") or "','"})""", 851 exp.ILike: no_ilike_sql, 852 exp.JSONExtractScalar: arrow_json_extract_sql, 853 exp.Length: length_or_char_length_sql, 854 exp.LogicalOr: rename_func("MAX"), 855 exp.LogicalAnd: rename_func("MIN"), 856 exp.Max: max_or_greatest, 857 exp.Min: min_or_least, 858 exp.Month: _remove_ts_or_ds_to_date(), 859 exp.NullSafeEQ: lambda self, e: self.binary(e, "<=>"), 860 exp.NullSafeNEQ: lambda self, e: f"NOT {self.binary(e, '<=>')}", 861 exp.NumberToStr: rename_func("FORMAT"), 862 exp.Pivot: no_pivot_sql, 863 exp.Select: transforms.preprocess( 864 [ 865 transforms.eliminate_distinct_on, 866 transforms.eliminate_semi_and_anti_joins, 867 transforms.eliminate_qualify, 868 transforms.eliminate_full_outer_join, 869 transforms.unnest_generate_date_array_using_recursive_cte, 870 ] 871 ), 872 exp.StrPosition: lambda self, e: strposition_sql( 873 self, e, func_name="LOCATE", supports_position=True 874 ), 875 exp.StrToDate: _str_to_date_sql, 876 exp.StrToTime: _str_to_date_sql, 877 exp.Stuff: rename_func("INSERT"), 878 exp.SessionUser: lambda *_: "SESSION_USER()", 879 exp.TableSample: no_tablesample_sql, 880 exp.TimeFromParts: rename_func("MAKETIME"), 881 exp.TimestampAdd: date_add_interval_sql("DATE", "ADD"), 882 exp.TimestampDiff: lambda self, e: self.func( 883 "TIMESTAMPDIFF", unit_to_var(e), e.expression, e.this 884 ), 885 exp.TimestampSub: date_add_interval_sql("DATE", "SUB"), 886 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 887 exp.TimeStrToTime: lambda self, e: timestrtotime_sql( 888 self, 889 e, 890 include_precision=not e.args.get("zone"), 891 ), 892 exp.TimeToStr: _remove_ts_or_ds_to_date( 893 lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)) 894 ), 895 exp.Trim: trim_sql, 896 exp.Trunc: rename_func("TRUNCATE"), 897 exp.TryCast: no_trycast_sql, 898 exp.TsOrDsAdd: date_add_sql("ADD"), 899 exp.TsOrDsDiff: lambda self, e: self.func("DATEDIFF", e.this, e.expression), 900 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 901 exp.Unicode: lambda self, e: f"ORD(CONVERT({self.sql(e.this)} USING utf32))", 902 exp.UnixToTime: _unix_to_time_sql, 903 exp.Week: _remove_ts_or_ds_to_date(), 904 exp.WeekOfYear: _remove_ts_or_ds_to_date(rename_func("WEEKOFYEAR")), 905 exp.Year: _remove_ts_or_ds_to_date(), 906 exp.UtcTimestamp: rename_func("UTC_TIMESTAMP"), 907 exp.UtcTime: rename_func("UTC_TIME"), 908 } 909 910 UNSIGNED_TYPE_MAPPING = { 911 exp.DataType.Type.UBIGINT: "BIGINT", 912 exp.DataType.Type.UINT: "INT", 913 exp.DataType.Type.UMEDIUMINT: "MEDIUMINT", 914 exp.DataType.Type.USMALLINT: "SMALLINT", 915 exp.DataType.Type.UTINYINT: "TINYINT", 916 exp.DataType.Type.UDECIMAL: "DECIMAL", 917 exp.DataType.Type.UDOUBLE: "DOUBLE", 918 } 919 920 TIMESTAMP_TYPE_MAPPING = { 921 exp.DataType.Type.DATETIME2: "DATETIME", 922 exp.DataType.Type.SMALLDATETIME: "DATETIME", 923 exp.DataType.Type.TIMESTAMP: "DATETIME", 924 exp.DataType.Type.TIMESTAMPNTZ: "DATETIME", 925 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 926 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 927 } 928 929 TYPE_MAPPING = { 930 **generator.Generator.TYPE_MAPPING, 931 **UNSIGNED_TYPE_MAPPING, 932 **TIMESTAMP_TYPE_MAPPING, 933 } 934 935 TYPE_MAPPING.pop(exp.DataType.Type.MEDIUMTEXT) 936 TYPE_MAPPING.pop(exp.DataType.Type.LONGTEXT) 937 TYPE_MAPPING.pop(exp.DataType.Type.TINYTEXT) 938 TYPE_MAPPING.pop(exp.DataType.Type.BLOB) 939 TYPE_MAPPING.pop(exp.DataType.Type.MEDIUMBLOB) 940 TYPE_MAPPING.pop(exp.DataType.Type.LONGBLOB) 941 TYPE_MAPPING.pop(exp.DataType.Type.TINYBLOB) 942 943 PROPERTIES_LOCATION = { 944 **generator.Generator.PROPERTIES_LOCATION, 945 exp.TransientProperty: exp.Properties.Location.UNSUPPORTED, 946 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 947 exp.PartitionedByProperty: exp.Properties.Location.UNSUPPORTED, 948 exp.PartitionByRangeProperty: exp.Properties.Location.POST_SCHEMA, 949 exp.PartitionByListProperty: exp.Properties.Location.POST_SCHEMA, 950 } 951 952 LIMIT_FETCH = "LIMIT" 953 954 LIMIT_ONLY_LITERALS = True 955 956 CHAR_CAST_MAPPING = dict.fromkeys( 957 ( 958 exp.DataType.Type.LONGTEXT, 959 exp.DataType.Type.LONGBLOB, 960 exp.DataType.Type.MEDIUMBLOB, 961 exp.DataType.Type.MEDIUMTEXT, 962 exp.DataType.Type.TEXT, 963 exp.DataType.Type.TINYBLOB, 964 exp.DataType.Type.TINYTEXT, 965 exp.DataType.Type.VARCHAR, 966 ), 967 "CHAR", 968 ) 969 SIGNED_CAST_MAPPING = dict.fromkeys( 970 ( 971 exp.DataType.Type.BIGINT, 972 exp.DataType.Type.BOOLEAN, 973 exp.DataType.Type.INT, 974 exp.DataType.Type.SMALLINT, 975 exp.DataType.Type.TINYINT, 976 exp.DataType.Type.MEDIUMINT, 977 ), 978 "SIGNED", 979 ) 980 981 # MySQL doesn't support many datatypes in cast. 982 # https://dev.mysql.com/doc/refman/8.0/en/cast-functions.html#function_cast 983 CAST_MAPPING = { 984 **CHAR_CAST_MAPPING, 985 **SIGNED_CAST_MAPPING, 986 exp.DataType.Type.UBIGINT: "UNSIGNED", 987 } 988 989 TIMESTAMP_FUNC_TYPES = { 990 exp.DataType.Type.TIMESTAMPTZ, 991 exp.DataType.Type.TIMESTAMPLTZ, 992 } 993 994 # https://dev.mysql.com/doc/refman/8.0/en/keywords.html 995 RESERVED_KEYWORDS = { 996 "accessible", 997 "add", 998 "all", 999 "alter", 1000 "analyze", 1001 "and", 1002 "as", 1003 "asc", 1004 "asensitive", 1005 "before", 1006 "between", 1007 "bigint", 1008 "binary", 1009 "blob", 1010 "both", 1011 "by", 1012 "call", 1013 "cascade", 1014 "case", 1015 "change", 1016 "char", 1017 "character", 1018 "check", 1019 "collate", 1020 "column", 1021 "condition", 1022 "constraint", 1023 "continue", 1024 "convert", 1025 "create", 1026 "cross", 1027 "cube", 1028 "cume_dist", 1029 "current_date", 1030 "current_time", 1031 "current_timestamp", 1032 "current_user", 1033 "cursor", 1034 "database", 1035 "databases", 1036 "day_hour", 1037 "day_microsecond", 1038 "day_minute", 1039 "day_second", 1040 "dec", 1041 "decimal", 1042 "declare", 1043 "default", 1044 "delayed", 1045 "delete", 1046 "dense_rank", 1047 "desc", 1048 "describe", 1049 "deterministic", 1050 "distinct", 1051 "distinctrow", 1052 "div", 1053 "double", 1054 "drop", 1055 "dual", 1056 "each", 1057 "else", 1058 "elseif", 1059 "empty", 1060 "enclosed", 1061 "escaped", 1062 "except", 1063 "exists", 1064 "exit", 1065 "explain", 1066 "false", 1067 "fetch", 1068 "first_value", 1069 "float", 1070 "float4", 1071 "float8", 1072 "for", 1073 "force", 1074 "foreign", 1075 "from", 1076 "fulltext", 1077 "function", 1078 "generated", 1079 "get", 1080 "grant", 1081 "group", 1082 "grouping", 1083 "groups", 1084 "having", 1085 "high_priority", 1086 "hour_microsecond", 1087 "hour_minute", 1088 "hour_second", 1089 "if", 1090 "ignore", 1091 "in", 1092 "index", 1093 "infile", 1094 "inner", 1095 "inout", 1096 "insensitive", 1097 "insert", 1098 "int", 1099 "int1", 1100 "int2", 1101 "int3", 1102 "int4", 1103 "int8", 1104 "integer", 1105 "intersect", 1106 "interval", 1107 "into", 1108 "io_after_gtids", 1109 "io_before_gtids", 1110 "is", 1111 "iterate", 1112 "join", 1113 "json_table", 1114 "key", 1115 "keys", 1116 "kill", 1117 "lag", 1118 "last_value", 1119 "lateral", 1120 "lead", 1121 "leading", 1122 "leave", 1123 "left", 1124 "like", 1125 "limit", 1126 "linear", 1127 "lines", 1128 "load", 1129 "localtime", 1130 "localtimestamp", 1131 "lock", 1132 "long", 1133 "longblob", 1134 "longtext", 1135 "loop", 1136 "low_priority", 1137 "master_bind", 1138 "master_ssl_verify_server_cert", 1139 "match", 1140 "maxvalue", 1141 "mediumblob", 1142 "mediumint", 1143 "mediumtext", 1144 "middleint", 1145 "minute_microsecond", 1146 "minute_second", 1147 "mod", 1148 "modifies", 1149 "natural", 1150 "not", 1151 "no_write_to_binlog", 1152 "nth_value", 1153 "ntile", 1154 "null", 1155 "numeric", 1156 "of", 1157 "on", 1158 "optimize", 1159 "optimizer_costs", 1160 "option", 1161 "optionally", 1162 "or", 1163 "order", 1164 "out", 1165 "outer", 1166 "outfile", 1167 "over", 1168 "partition", 1169 "percent_rank", 1170 "precision", 1171 "primary", 1172 "procedure", 1173 "purge", 1174 "range", 1175 "rank", 1176 "read", 1177 "reads", 1178 "read_write", 1179 "real", 1180 "recursive", 1181 "references", 1182 "regexp", 1183 "release", 1184 "rename", 1185 "repeat", 1186 "replace", 1187 "require", 1188 "resignal", 1189 "restrict", 1190 "return", 1191 "revoke", 1192 "right", 1193 "rlike", 1194 "row", 1195 "rows", 1196 "row_number", 1197 "schema", 1198 "schemas", 1199 "second_microsecond", 1200 "select", 1201 "sensitive", 1202 "separator", 1203 "set", 1204 "show", 1205 "signal", 1206 "smallint", 1207 "spatial", 1208 "specific", 1209 "sql", 1210 "sqlexception", 1211 "sqlstate", 1212 "sqlwarning", 1213 "sql_big_result", 1214 "sql_calc_found_rows", 1215 "sql_small_result", 1216 "ssl", 1217 "starting", 1218 "stored", 1219 "straight_join", 1220 "system", 1221 "table", 1222 "terminated", 1223 "then", 1224 "tinyblob", 1225 "tinyint", 1226 "tinytext", 1227 "to", 1228 "trailing", 1229 "trigger", 1230 "true", 1231 "undo", 1232 "union", 1233 "unique", 1234 "unlock", 1235 "unsigned", 1236 "update", 1237 "usage", 1238 "use", 1239 "using", 1240 "utc_date", 1241 "utc_time", 1242 "utc_timestamp", 1243 "values", 1244 "varbinary", 1245 "varchar", 1246 "varcharacter", 1247 "varying", 1248 "virtual", 1249 "when", 1250 "where", 1251 "while", 1252 "window", 1253 "with", 1254 "write", 1255 "xor", 1256 "year_month", 1257 "zerofill", 1258 } 1259 1260 def computedcolumnconstraint_sql(self, expression: exp.ComputedColumnConstraint) -> str: 1261 persisted = "STORED" if expression.args.get("persisted") else "VIRTUAL" 1262 return f"GENERATED ALWAYS AS ({self.sql(expression.this.unnest())}) {persisted}" 1263 1264 def array_sql(self, expression: exp.Array) -> str: 1265 self.unsupported("Arrays are not supported by MySQL") 1266 return self.function_fallback_sql(expression) 1267 1268 def arraycontainsall_sql(self, expression: exp.ArrayContainsAll) -> str: 1269 self.unsupported("Array operations are not supported by MySQL") 1270 return self.function_fallback_sql(expression) 1271 1272 def dpipe_sql(self, expression: exp.DPipe) -> str: 1273 return self.func("CONCAT", *expression.flatten()) 1274 1275 def extract_sql(self, expression: exp.Extract) -> str: 1276 unit = expression.name 1277 if unit and unit.lower() == "epoch": 1278 return self.func("UNIX_TIMESTAMP", expression.expression) 1279 1280 return super().extract_sql(expression) 1281 1282 def datatype_sql(self, expression: exp.DataType) -> str: 1283 if ( 1284 self.VARCHAR_REQUIRES_SIZE 1285 and expression.is_type(exp.DataType.Type.VARCHAR) 1286 and not expression.expressions 1287 ): 1288 # `VARCHAR` must always have a size - if it doesn't, we always generate `TEXT` 1289 return "TEXT" 1290 1291 # https://dev.mysql.com/doc/refman/8.0/en/numeric-type-syntax.html 1292 result = super().datatype_sql(expression) 1293 if expression.this in self.UNSIGNED_TYPE_MAPPING: 1294 result = f"{result} UNSIGNED" 1295 1296 return result 1297 1298 def jsonarraycontains_sql(self, expression: exp.JSONArrayContains) -> str: 1299 return f"{self.sql(expression, 'this')} MEMBER OF({self.sql(expression, 'expression')})" 1300 1301 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1302 if expression.to.this in self.TIMESTAMP_FUNC_TYPES: 1303 return self.func("TIMESTAMP", expression.this) 1304 1305 to = self.CAST_MAPPING.get(expression.to.this) 1306 1307 if to: 1308 expression.to.set("this", to) 1309 return super().cast_sql(expression) 1310 1311 def show_sql(self, expression: exp.Show) -> str: 1312 this = f" {expression.name}" 1313 full = " FULL" if expression.args.get("full") else "" 1314 global_ = " GLOBAL" if expression.args.get("global_") else "" 1315 1316 target = self.sql(expression, "target") 1317 target = f" {target}" if target else "" 1318 if expression.name in ("COLUMNS", "INDEX"): 1319 target = f" FROM{target}" 1320 elif expression.name == "GRANTS": 1321 target = f" FOR{target}" 1322 elif expression.name in ("LINKS", "PARTITIONS"): 1323 target = f" ON{target}" if target else "" 1324 elif expression.name == "PROJECTIONS": 1325 target = f" ON TABLE{target}" if target else "" 1326 1327 db = self._prefixed_sql("FROM", expression, "db") 1328 1329 like = self._prefixed_sql("LIKE", expression, "like") 1330 where = self.sql(expression, "where") 1331 1332 types = self.expressions(expression, key="types") 1333 types = f" {types}" if types else types 1334 query = self._prefixed_sql("FOR QUERY", expression, "query") 1335 1336 if expression.name == "PROFILE": 1337 offset = self._prefixed_sql("OFFSET", expression, "offset") 1338 limit = self._prefixed_sql("LIMIT", expression, "limit") 1339 else: 1340 offset = "" 1341 limit = self._oldstyle_limit_sql(expression) 1342 1343 log = self._prefixed_sql("IN", expression, "log") 1344 position = self._prefixed_sql("FROM", expression, "position") 1345 1346 channel = self._prefixed_sql("FOR CHANNEL", expression, "channel") 1347 1348 if expression.name == "ENGINE": 1349 mutex_or_status = " MUTEX" if expression.args.get("mutex") else " STATUS" 1350 else: 1351 mutex_or_status = "" 1352 1353 for_table = self._prefixed_sql("FOR TABLE", expression, "for_table") 1354 for_group = self._prefixed_sql("FOR GROUP", expression, "for_group") 1355 for_user = self._prefixed_sql("FOR USER", expression, "for_user") 1356 for_role = self._prefixed_sql("FOR ROLE", expression, "for_role") 1357 into_outfile = self._prefixed_sql("INTO OUTFILE", expression, "into_outfile") 1358 json = " JSON" if expression.args.get("json") else "" 1359 1360 return f"SHOW{full}{global_}{this}{json}{target}{for_table}{types}{db}{query}{log}{position}{channel}{mutex_or_status}{like}{where}{offset}{limit}{for_group}{for_user}{for_role}{into_outfile}" 1361 1362 def alterrename_sql(self, expression: exp.AlterRename, include_to: bool = True) -> str: 1363 """To avoid TO keyword in ALTER ... RENAME statements. 1364 It's moved from Doris, because it's the same for all MySQL, Doris, and StarRocks. 1365 """ 1366 return super().alterrename_sql(expression, include_to=False) 1367 1368 def altercolumn_sql(self, expression: exp.AlterColumn) -> str: 1369 dtype = self.sql(expression, "dtype") 1370 if not dtype: 1371 return super().altercolumn_sql(expression) 1372 1373 this = self.sql(expression, "this") 1374 return f"MODIFY COLUMN {this} {dtype}" 1375 1376 def _prefixed_sql(self, prefix: str, expression: exp.Expression, arg: str) -> str: 1377 sql = self.sql(expression, arg) 1378 return f" {prefix} {sql}" if sql else "" 1379 1380 def _oldstyle_limit_sql(self, expression: exp.Show) -> str: 1381 limit = self.sql(expression, "limit") 1382 offset = self.sql(expression, "offset") 1383 if limit: 1384 limit_offset = f"{offset}, {limit}" if offset else limit 1385 return f" LIMIT {limit_offset}" 1386 return "" 1387 1388 def timestamptrunc_sql(self, expression: exp.TimestampTrunc) -> str: 1389 unit = expression.args.get("unit") 1390 1391 # Pick an old-enough date to avoid negative timestamp diffs 1392 start_ts = "'0000-01-01 00:00:00'" 1393 1394 # Source: https://stackoverflow.com/a/32955740 1395 timestamp_diff = build_date_delta(exp.TimestampDiff)([unit, start_ts, expression.this]) 1396 interval = exp.Interval(this=timestamp_diff, unit=unit) 1397 dateadd = build_date_delta_with_interval(exp.DateAdd)([start_ts, interval]) 1398 1399 return self.sql(dateadd) 1400 1401 def converttimezone_sql(self, expression: exp.ConvertTimezone) -> str: 1402 from_tz = expression.args.get("source_tz") 1403 to_tz = expression.args.get("target_tz") 1404 dt = expression.args.get("timestamp") 1405 1406 return self.func("CONVERT_TZ", dt, from_tz, to_tz) 1407 1408 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1409 self.unsupported("AT TIME ZONE is not supported by MySQL") 1410 return self.sql(expression.this) 1411 1412 def isascii_sql(self, expression: exp.IsAscii) -> str: 1413 return f"REGEXP_LIKE({self.sql(expression.this)}, '^[[:ascii:]]*$')" 1414 1415 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1416 # https://dev.mysql.com/doc/refman/8.4/en/window-function-descriptions.html 1417 self.unsupported("MySQL does not support IGNORE NULLS.") 1418 return self.sql(expression.this) 1419 1420 @unsupported_args("this") 1421 def currentschema_sql(self, expression: exp.CurrentSchema) -> str: 1422 return self.func("SCHEMA") 1423 1424 def partition_sql(self, expression: exp.Partition) -> str: 1425 parent = expression.parent 1426 if isinstance(parent, (exp.PartitionByRangeProperty, exp.PartitionByListProperty)): 1427 return self.expressions(expression, flat=True) 1428 return super().partition_sql(expression) 1429 1430 def _partition_by_sql( 1431 self, expression: exp.PartitionByRangeProperty | exp.PartitionByListProperty, kind: str 1432 ) -> str: 1433 partitions = self.expressions(expression, key="partition_expressions", flat=True) 1434 create = self.expressions(expression, key="create_expressions", flat=True) 1435 return f"PARTITION BY {kind} ({partitions}) ({create})" 1436 1437 def partitionbyrangeproperty_sql(self, expression: exp.PartitionByRangeProperty) -> str: 1438 return self._partition_by_sql(expression, "RANGE") 1439 1440 def partitionbylistproperty_sql(self, expression: exp.PartitionByListProperty) -> str: 1441 return self._partition_by_sql(expression, "LIST") 1442 1443 def partitionlist_sql(self, expression: exp.PartitionList) -> str: 1444 name = self.sql(expression, "this") 1445 values = self.expressions(expression, flat=True) 1446 return f"PARTITION {name} VALUES IN ({values})" 1447 1448 def partitionrange_sql(self, expression: exp.PartitionRange) -> str: 1449 name = self.sql(expression, "this") 1450 values = self.expressions(expression, flat=True) 1451 return f"PARTITION {name} VALUES LESS THAN ({values})"
This flag is used in the optimizer's canonicalize rule and determines whether x will be promoted to the literal's type in x::DATE < '2020-01-01 12:05:03' (i.e., DATETIME). When false, the literal is cast to x's type to match it instead.
Specifies the strategy according to which identifiers should be normalized.
Whether LEAST/GREATEST functions ignore NULL values, e.g:
- BigQuery, Snowflake, MySQL, Presto/Trino: LEAST(1, NULL, 2) -> NULL
- Spark, Postgres, DuckDB, TSQL: LEAST(1, NULL, 2) -> 1
Associates this dialect's time formats with their equivalent Python strftime formats.
Mapping of an escaped sequence (\n) to its unescaped version (
).
203 class Tokenizer(tokens.Tokenizer): 204 QUOTES = ["'", '"'] 205 COMMENTS = ["--", "#", ("/*", "*/")] 206 IDENTIFIERS = ["`"] 207 STRING_ESCAPES = ["'", '"', "\\"] 208 BIT_STRINGS = [("b'", "'"), ("B'", "'"), ("0b", "")] 209 HEX_STRINGS = [("x'", "'"), ("X'", "'"), ("0x", "")] 210 # https://dev.mysql.com/doc/refman/8.4/en/string-literals.html 211 ESCAPE_FOLLOW_CHARS = ["0", "b", "n", "r", "t", "Z", "%", "_"] 212 213 NESTED_COMMENTS = False 214 215 KEYWORDS = { 216 **tokens.Tokenizer.KEYWORDS, 217 "BLOB": TokenType.BLOB, 218 "CHARSET": TokenType.CHARACTER_SET, 219 "DISTINCTROW": TokenType.DISTINCT, 220 "EXPLAIN": TokenType.DESCRIBE, 221 "FORCE": TokenType.FORCE, 222 "IGNORE": TokenType.IGNORE, 223 "KEY": TokenType.KEY, 224 "LOCK TABLES": TokenType.COMMAND, 225 "LONGBLOB": TokenType.LONGBLOB, 226 "LONGTEXT": TokenType.LONGTEXT, 227 "MEDIUMBLOB": TokenType.MEDIUMBLOB, 228 "MEDIUMINT": TokenType.MEDIUMINT, 229 "MEDIUMTEXT": TokenType.MEDIUMTEXT, 230 "MEMBER OF": TokenType.MEMBER_OF, 231 "MOD": TokenType.MOD, 232 "SEPARATOR": TokenType.SEPARATOR, 233 "SERIAL": TokenType.SERIAL, 234 "SIGNED": TokenType.BIGINT, 235 "SIGNED INTEGER": TokenType.BIGINT, 236 "SOUNDS LIKE": TokenType.SOUNDS_LIKE, 237 "START": TokenType.BEGIN, 238 "TIMESTAMP": TokenType.TIMESTAMPTZ, 239 "TINYBLOB": TokenType.TINYBLOB, 240 "TINYTEXT": TokenType.TINYTEXT, 241 "UNLOCK TABLES": TokenType.COMMAND, 242 "UNSIGNED": TokenType.UBIGINT, 243 "UNSIGNED INTEGER": TokenType.UBIGINT, 244 "YEAR": TokenType.YEAR, 245 "_ARMSCII8": TokenType.INTRODUCER, 246 "_ASCII": TokenType.INTRODUCER, 247 "_BIG5": TokenType.INTRODUCER, 248 "_BINARY": TokenType.INTRODUCER, 249 "_CP1250": TokenType.INTRODUCER, 250 "_CP1251": TokenType.INTRODUCER, 251 "_CP1256": TokenType.INTRODUCER, 252 "_CP1257": TokenType.INTRODUCER, 253 "_CP850": TokenType.INTRODUCER, 254 "_CP852": TokenType.INTRODUCER, 255 "_CP866": TokenType.INTRODUCER, 256 "_CP932": TokenType.INTRODUCER, 257 "_DEC8": TokenType.INTRODUCER, 258 "_EUCJPMS": TokenType.INTRODUCER, 259 "_EUCKR": TokenType.INTRODUCER, 260 "_GB18030": TokenType.INTRODUCER, 261 "_GB2312": TokenType.INTRODUCER, 262 "_GBK": TokenType.INTRODUCER, 263 "_GEOSTD8": TokenType.INTRODUCER, 264 "_GREEK": TokenType.INTRODUCER, 265 "_HEBREW": TokenType.INTRODUCER, 266 "_HP8": TokenType.INTRODUCER, 267 "_KEYBCS2": TokenType.INTRODUCER, 268 "_KOI8R": TokenType.INTRODUCER, 269 "_KOI8U": TokenType.INTRODUCER, 270 "_LATIN1": TokenType.INTRODUCER, 271 "_LATIN2": TokenType.INTRODUCER, 272 "_LATIN5": TokenType.INTRODUCER, 273 "_LATIN7": TokenType.INTRODUCER, 274 "_MACCE": TokenType.INTRODUCER, 275 "_MACROMAN": TokenType.INTRODUCER, 276 "_SJIS": TokenType.INTRODUCER, 277 "_SWE7": TokenType.INTRODUCER, 278 "_TIS620": TokenType.INTRODUCER, 279 "_UCS2": TokenType.INTRODUCER, 280 "_UJIS": TokenType.INTRODUCER, 281 # https://dev.mysql.com/doc/refman/8.0/en/string-literals.html 282 "_UTF8": TokenType.INTRODUCER, 283 "_UTF16": TokenType.INTRODUCER, 284 "_UTF16LE": TokenType.INTRODUCER, 285 "_UTF32": TokenType.INTRODUCER, 286 "_UTF8MB3": TokenType.INTRODUCER, 287 "_UTF8MB4": TokenType.INTRODUCER, 288 "@@": TokenType.SESSION_PARAMETER, 289 } 290 291 COMMANDS = {*tokens.Tokenizer.COMMANDS, TokenType.REPLACE} - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BYTE_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- tokenize
- sql
- size
- tokens
293 class Parser(parser.Parser): 294 FUNC_TOKENS = { 295 *parser.Parser.FUNC_TOKENS, 296 TokenType.DATABASE, 297 TokenType.MOD, 298 TokenType.SCHEMA, 299 TokenType.VALUES, 300 TokenType.CHARACTER_SET, 301 } 302 303 CONJUNCTION = { 304 **parser.Parser.CONJUNCTION, 305 TokenType.DAMP: exp.And, 306 TokenType.XOR: exp.Xor, 307 } 308 309 DISJUNCTION = { 310 **parser.Parser.DISJUNCTION, 311 TokenType.DPIPE: exp.Or, 312 } 313 314 TABLE_ALIAS_TOKENS = ( 315 parser.Parser.TABLE_ALIAS_TOKENS - parser.Parser.TABLE_INDEX_HINT_TOKENS 316 ) 317 318 RANGE_PARSERS = { 319 **parser.Parser.RANGE_PARSERS, 320 TokenType.SOUNDS_LIKE: lambda self, this: self.expression( 321 exp.EQ, 322 this=self.expression(exp.Soundex, this=this), 323 expression=self.expression(exp.Soundex, this=self._parse_term()), 324 ), 325 TokenType.MEMBER_OF: lambda self, this: self.expression( 326 exp.JSONArrayContains, 327 this=this, 328 expression=self._parse_wrapped(self._parse_expression), 329 ), 330 } 331 332 FUNCTIONS = { 333 **parser.Parser.FUNCTIONS, 334 "BIT_AND": exp.BitwiseAndAgg.from_arg_list, 335 "BIT_OR": exp.BitwiseOrAgg.from_arg_list, 336 "BIT_XOR": exp.BitwiseXorAgg.from_arg_list, 337 "BIT_COUNT": exp.BitwiseCount.from_arg_list, 338 "CONVERT_TZ": lambda args: exp.ConvertTimezone( 339 source_tz=seq_get(args, 1), target_tz=seq_get(args, 2), timestamp=seq_get(args, 0) 340 ), 341 "CURDATE": exp.CurrentDate.from_arg_list, 342 "CURTIME": exp.CurrentTime.from_arg_list, 343 "DATE": lambda args: exp.TsOrDsToDate(this=seq_get(args, 0)), 344 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 345 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "mysql"), 346 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 347 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 348 "DAYOFMONTH": lambda args: exp.DayOfMonth(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 349 "DAYOFWEEK": lambda args: exp.DayOfWeek(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 350 "DAYOFYEAR": lambda args: exp.DayOfYear(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 351 "FORMAT": exp.NumberToStr.from_arg_list, 352 "FROM_UNIXTIME": build_formatted_time(exp.UnixToTime, "mysql"), 353 "ISNULL": isnull_to_is_null, 354 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 355 "MAKETIME": exp.TimeFromParts.from_arg_list, 356 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 357 "MONTHNAME": lambda args: exp.TimeToStr( 358 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 359 format=exp.Literal.string("%B"), 360 ), 361 "SCHEMA": exp.CurrentSchema.from_arg_list, 362 "DATABASE": exp.CurrentSchema.from_arg_list, 363 "STR_TO_DATE": _str_to_date, 364 "TIMESTAMPDIFF": build_date_delta(exp.TimestampDiff), 365 "TO_DAYS": lambda args: exp.paren( 366 exp.DateDiff( 367 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 368 expression=exp.TsOrDsToDate(this=exp.Literal.string("0000-01-01")), 369 unit=exp.var("DAY"), 370 ) 371 + 1 372 ), 373 "VERSION": exp.CurrentVersion.from_arg_list, 374 "WEEK": lambda args: exp.Week( 375 this=exp.TsOrDsToDate(this=seq_get(args, 0)), mode=seq_get(args, 1) 376 ), 377 "WEEKOFYEAR": lambda args: exp.WeekOfYear(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 378 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 379 } 380 381 FUNCTION_PARSERS = { 382 **parser.Parser.FUNCTION_PARSERS, 383 "GROUP_CONCAT": lambda self: self._parse_group_concat(), 384 # https://dev.mysql.com/doc/refman/5.7/en/miscellaneous-functions.html#function_values 385 "VALUES": lambda self: self.expression( 386 exp.Anonymous, this="VALUES", expressions=[self._parse_id_var()] 387 ), 388 "JSON_VALUE": lambda self: self._parse_json_value(), 389 "SUBSTR": lambda self: self._parse_substring(), 390 } 391 392 STATEMENT_PARSERS = { 393 **parser.Parser.STATEMENT_PARSERS, 394 TokenType.SHOW: lambda self: self._parse_show(), 395 } 396 397 SHOW_PARSERS = { 398 "BINARY LOGS": _show_parser("BINARY LOGS"), 399 "MASTER LOGS": _show_parser("BINARY LOGS"), 400 "BINLOG EVENTS": _show_parser("BINLOG EVENTS"), 401 "CHARACTER SET": _show_parser("CHARACTER SET"), 402 "CHARSET": _show_parser("CHARACTER SET"), 403 "COLLATION": _show_parser("COLLATION"), 404 "FULL COLUMNS": _show_parser("COLUMNS", target="FROM", full=True), 405 "COLUMNS": _show_parser("COLUMNS", target="FROM"), 406 "CREATE DATABASE": _show_parser("CREATE DATABASE", target=True), 407 "CREATE EVENT": _show_parser("CREATE EVENT", target=True), 408 "CREATE FUNCTION": _show_parser("CREATE FUNCTION", target=True), 409 "CREATE PROCEDURE": _show_parser("CREATE PROCEDURE", target=True), 410 "CREATE TABLE": _show_parser("CREATE TABLE", target=True), 411 "CREATE TRIGGER": _show_parser("CREATE TRIGGER", target=True), 412 "CREATE VIEW": _show_parser("CREATE VIEW", target=True), 413 "DATABASES": _show_parser("DATABASES"), 414 "SCHEMAS": _show_parser("DATABASES"), 415 "ENGINE": _show_parser("ENGINE", target=True), 416 "STORAGE ENGINES": _show_parser("ENGINES"), 417 "ENGINES": _show_parser("ENGINES"), 418 "ERRORS": _show_parser("ERRORS"), 419 "EVENTS": _show_parser("EVENTS"), 420 "FUNCTION CODE": _show_parser("FUNCTION CODE", target=True), 421 "FUNCTION STATUS": _show_parser("FUNCTION STATUS"), 422 "GRANTS": _show_parser("GRANTS", target="FOR"), 423 "INDEX": _show_parser("INDEX", target="FROM"), 424 "MASTER STATUS": _show_parser("MASTER STATUS"), 425 "OPEN TABLES": _show_parser("OPEN TABLES"), 426 "PLUGINS": _show_parser("PLUGINS"), 427 "PROCEDURE CODE": _show_parser("PROCEDURE CODE", target=True), 428 "PROCEDURE STATUS": _show_parser("PROCEDURE STATUS"), 429 "PRIVILEGES": _show_parser("PRIVILEGES"), 430 "FULL PROCESSLIST": _show_parser("PROCESSLIST", full=True), 431 "PROCESSLIST": _show_parser("PROCESSLIST"), 432 "PROFILE": _show_parser("PROFILE"), 433 "PROFILES": _show_parser("PROFILES"), 434 "RELAYLOG EVENTS": _show_parser("RELAYLOG EVENTS"), 435 "REPLICAS": _show_parser("REPLICAS"), 436 "SLAVE HOSTS": _show_parser("REPLICAS"), 437 "REPLICA STATUS": _show_parser("REPLICA STATUS"), 438 "SLAVE STATUS": _show_parser("REPLICA STATUS"), 439 "GLOBAL STATUS": _show_parser("STATUS", global_=True), 440 "SESSION STATUS": _show_parser("STATUS"), 441 "STATUS": _show_parser("STATUS"), 442 "TABLE STATUS": _show_parser("TABLE STATUS"), 443 "FULL TABLES": _show_parser("TABLES", full=True), 444 "TABLES": _show_parser("TABLES"), 445 "TRIGGERS": _show_parser("TRIGGERS"), 446 "GLOBAL VARIABLES": _show_parser("VARIABLES", global_=True), 447 "SESSION VARIABLES": _show_parser("VARIABLES"), 448 "VARIABLES": _show_parser("VARIABLES"), 449 "WARNINGS": _show_parser("WARNINGS"), 450 } 451 452 PROPERTY_PARSERS = { 453 **parser.Parser.PROPERTY_PARSERS, 454 "LOCK": lambda self: self._parse_property_assignment(exp.LockProperty), 455 "PARTITION BY": lambda self: self._parse_partition_property(), 456 } 457 458 SET_PARSERS = { 459 **parser.Parser.SET_PARSERS, 460 "PERSIST": lambda self: self._parse_set_item_assignment("PERSIST"), 461 "PERSIST_ONLY": lambda self: self._parse_set_item_assignment("PERSIST_ONLY"), 462 "CHARACTER SET": lambda self: self._parse_set_item_charset("CHARACTER SET"), 463 "CHARSET": lambda self: self._parse_set_item_charset("CHARACTER SET"), 464 "NAMES": lambda self: self._parse_set_item_names(), 465 } 466 467 CONSTRAINT_PARSERS = { 468 **parser.Parser.CONSTRAINT_PARSERS, 469 "FULLTEXT": lambda self: self._parse_index_constraint(kind="FULLTEXT"), 470 "INDEX": lambda self: self._parse_index_constraint(), 471 "KEY": lambda self: self._parse_index_constraint(), 472 "SPATIAL": lambda self: self._parse_index_constraint(kind="SPATIAL"), 473 "ZEROFILL": lambda self: self.expression(exp.ZeroFillColumnConstraint), 474 } 475 476 ALTER_PARSERS = { 477 **parser.Parser.ALTER_PARSERS, 478 "MODIFY": lambda self: self._parse_alter_table_alter(), 479 } 480 481 ALTER_ALTER_PARSERS = { 482 **parser.Parser.ALTER_ALTER_PARSERS, 483 "INDEX": lambda self: self._parse_alter_table_alter_index(), 484 } 485 486 SCHEMA_UNNAMED_CONSTRAINTS = { 487 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 488 "FULLTEXT", 489 "INDEX", 490 "KEY", 491 "SPATIAL", 492 } 493 494 PROFILE_TYPES: parser.OPTIONS_TYPE = { 495 **dict.fromkeys(("ALL", "CPU", "IPC", "MEMORY", "SOURCE", "SWAPS"), tuple()), 496 "BLOCK": ("IO",), 497 "CONTEXT": ("SWITCHES",), 498 "PAGE": ("FAULTS",), 499 } 500 501 TYPE_TOKENS = { 502 *parser.Parser.TYPE_TOKENS, 503 TokenType.SET, 504 } 505 506 ENUM_TYPE_TOKENS = { 507 *parser.Parser.ENUM_TYPE_TOKENS, 508 TokenType.SET, 509 } 510 511 # SELECT [ ALL | DISTINCT | DISTINCTROW ] [ <OPERATION_MODIFIERS> ] 512 OPERATION_MODIFIERS = { 513 "HIGH_PRIORITY", 514 "STRAIGHT_JOIN", 515 "SQL_SMALL_RESULT", 516 "SQL_BIG_RESULT", 517 "SQL_BUFFER_RESULT", 518 "SQL_NO_CACHE", 519 "SQL_CALC_FOUND_ROWS", 520 } 521 522 LOG_DEFAULTS_TO_LN = True 523 STRING_ALIASES = True 524 VALUES_FOLLOWED_BY_PAREN = False 525 SUPPORTS_PARTITION_SELECTION = True 526 527 def _parse_generated_as_identity( 528 self, 529 ) -> ( 530 exp.GeneratedAsIdentityColumnConstraint 531 | exp.ComputedColumnConstraint 532 | exp.GeneratedAsRowColumnConstraint 533 ): 534 this = super()._parse_generated_as_identity() 535 536 if self._match_texts(("STORED", "VIRTUAL")): 537 persisted = self._prev.text.upper() == "STORED" 538 539 if isinstance(this, exp.ComputedColumnConstraint): 540 this.set("persisted", persisted) 541 elif isinstance(this, exp.GeneratedAsIdentityColumnConstraint): 542 this = self.expression( 543 exp.ComputedColumnConstraint, this=this.expression, persisted=persisted 544 ) 545 546 return this 547 548 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 549 this = self._parse_id_var() 550 if not self._match(TokenType.L_PAREN): 551 return this 552 553 expression = self._parse_number() 554 self._match_r_paren() 555 return self.expression(exp.ColumnPrefix, this=this, expression=expression) 556 557 def _parse_index_constraint( 558 self, kind: t.Optional[str] = None 559 ) -> exp.IndexColumnConstraint: 560 if kind: 561 self._match_texts(("INDEX", "KEY")) 562 563 this = self._parse_id_var(any_token=False) 564 index_type = self._match(TokenType.USING) and self._advance_any() and self._prev.text 565 expressions = self._parse_wrapped_csv(self._parse_ordered) 566 567 options = [] 568 while True: 569 if self._match_text_seq("KEY_BLOCK_SIZE"): 570 self._match(TokenType.EQ) 571 opt = exp.IndexConstraintOption(key_block_size=self._parse_number()) 572 elif self._match(TokenType.USING): 573 opt = exp.IndexConstraintOption(using=self._advance_any() and self._prev.text) 574 elif self._match_text_seq("WITH", "PARSER"): 575 opt = exp.IndexConstraintOption(parser=self._parse_var(any_token=True)) 576 elif self._match(TokenType.COMMENT): 577 opt = exp.IndexConstraintOption(comment=self._parse_string()) 578 elif self._match_text_seq("VISIBLE"): 579 opt = exp.IndexConstraintOption(visible=True) 580 elif self._match_text_seq("INVISIBLE"): 581 opt = exp.IndexConstraintOption(visible=False) 582 elif self._match_text_seq("ENGINE_ATTRIBUTE"): 583 self._match(TokenType.EQ) 584 opt = exp.IndexConstraintOption(engine_attr=self._parse_string()) 585 elif self._match_text_seq("SECONDARY_ENGINE_ATTRIBUTE"): 586 self._match(TokenType.EQ) 587 opt = exp.IndexConstraintOption(secondary_engine_attr=self._parse_string()) 588 else: 589 opt = None 590 591 if not opt: 592 break 593 594 options.append(opt) 595 596 return self.expression( 597 exp.IndexColumnConstraint, 598 this=this, 599 expressions=expressions, 600 kind=kind, 601 index_type=index_type, 602 options=options, 603 ) 604 605 def _parse_show_mysql( 606 self, 607 this: str, 608 target: bool | str = False, 609 full: t.Optional[bool] = None, 610 global_: t.Optional[bool] = None, 611 ) -> exp.Show: 612 json = self._match_text_seq("JSON") 613 614 if target: 615 if isinstance(target, str): 616 self._match_text_seq(*target.split(" ")) 617 target_id = self._parse_id_var() 618 else: 619 target_id = None 620 621 log = self._parse_string() if self._match_text_seq("IN") else None 622 623 if this in ("BINLOG EVENTS", "RELAYLOG EVENTS"): 624 position = self._parse_number() if self._match_text_seq("FROM") else None 625 db = None 626 else: 627 position = None 628 db = None 629 630 if self._match(TokenType.FROM): 631 db = self._parse_id_var() 632 elif self._match(TokenType.DOT): 633 db = target_id 634 target_id = self._parse_id_var() 635 636 channel = self._parse_id_var() if self._match_text_seq("FOR", "CHANNEL") else None 637 638 like = self._parse_string() if self._match_text_seq("LIKE") else None 639 where = self._parse_where() 640 641 if this == "PROFILE": 642 types = self._parse_csv(lambda: self._parse_var_from_options(self.PROFILE_TYPES)) 643 query = self._parse_number() if self._match_text_seq("FOR", "QUERY") else None 644 offset = self._parse_number() if self._match_text_seq("OFFSET") else None 645 limit = self._parse_number() if self._match_text_seq("LIMIT") else None 646 else: 647 types, query = None, None 648 offset, limit = self._parse_oldstyle_limit() 649 650 mutex = True if self._match_text_seq("MUTEX") else None 651 mutex = False if self._match_text_seq("STATUS") else mutex 652 653 for_table = self._parse_id_var() if self._match_text_seq("FOR", "TABLE") else None 654 for_group = self._parse_string() if self._match_text_seq("FOR", "GROUP") else None 655 for_user = self._parse_string() if self._match_text_seq("FOR", "USER") else None 656 for_role = self._parse_string() if self._match_text_seq("FOR", "ROLE") else None 657 into_outfile = self._parse_string() if self._match_text_seq("INTO", "OUTFILE") else None 658 659 return self.expression( 660 exp.Show, 661 this=this, 662 target=target_id, 663 full=full, 664 log=log, 665 position=position, 666 db=db, 667 channel=channel, 668 like=like, 669 where=where, 670 types=types, 671 query=query, 672 offset=offset, 673 limit=limit, 674 mutex=mutex, 675 for_table=for_table, 676 for_group=for_group, 677 for_user=for_user, 678 for_role=for_role, 679 into_outfile=into_outfile, 680 json=json, 681 global_=global_, 682 ) 683 684 def _parse_oldstyle_limit( 685 self, 686 ) -> t.Tuple[t.Optional[exp.Expression], t.Optional[exp.Expression]]: 687 limit = None 688 offset = None 689 if self._match_text_seq("LIMIT"): 690 parts = self._parse_csv(self._parse_number) 691 if len(parts) == 1: 692 limit = parts[0] 693 elif len(parts) == 2: 694 limit = parts[1] 695 offset = parts[0] 696 697 return offset, limit 698 699 def _parse_set_item_charset(self, kind: str) -> exp.Expression: 700 this = self._parse_string() or self._parse_unquoted_field() 701 return self.expression(exp.SetItem, this=this, kind=kind) 702 703 def _parse_set_item_names(self) -> exp.Expression: 704 charset = self._parse_string() or self._parse_unquoted_field() 705 if self._match_text_seq("COLLATE"): 706 collate = self._parse_string() or self._parse_unquoted_field() 707 else: 708 collate = None 709 710 return self.expression(exp.SetItem, this=charset, collate=collate, kind="NAMES") 711 712 def _parse_type( 713 self, parse_interval: bool = True, fallback_to_identifier: bool = False 714 ) -> t.Optional[exp.Expression]: 715 # mysql binary is special and can work anywhere, even in order by operations 716 # it operates like a no paren func 717 if self._match(TokenType.BINARY, advance=False): 718 data_type = self._parse_types(check_func=True, allow_identifiers=False) 719 720 if isinstance(data_type, exp.DataType): 721 return self.expression(exp.Cast, this=self._parse_column(), to=data_type) 722 723 return super()._parse_type( 724 parse_interval=parse_interval, fallback_to_identifier=fallback_to_identifier 725 ) 726 727 def _parse_alter_table_alter_index(self) -> exp.AlterIndex: 728 index = self._parse_field(any_token=True) 729 730 if self._match_text_seq("VISIBLE"): 731 visible = True 732 elif self._match_text_seq("INVISIBLE"): 733 visible = False 734 else: 735 visible = None 736 737 return self.expression(exp.AlterIndex, this=index, visible=visible) 738 739 def _parse_partition_property( 740 self, 741 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 742 partition_cls: t.Optional[t.Type[exp.Expression]] = None 743 value_parser = None 744 745 if self._match_text_seq("RANGE"): 746 partition_cls = exp.PartitionByRangeProperty 747 value_parser = self._parse_partition_range_value 748 elif self._match_text_seq("LIST"): 749 partition_cls = exp.PartitionByListProperty 750 value_parser = self._parse_partition_list_value 751 752 if not partition_cls or not value_parser: 753 return None 754 755 partition_expressions = self._parse_wrapped_csv(self._parse_assignment) 756 757 # For Doris and Starrocks 758 if not self._match_text_seq("(", "PARTITION", advance=False): 759 return partition_expressions 760 761 create_expressions = self._parse_wrapped_csv(value_parser) 762 763 return self.expression( 764 partition_cls, 765 partition_expressions=partition_expressions, 766 create_expressions=create_expressions, 767 ) 768 769 def _parse_partition_range_value(self) -> t.Optional[exp.Expression]: 770 self._match_text_seq("PARTITION") 771 name = self._parse_id_var() 772 773 if not self._match_text_seq("VALUES", "LESS", "THAN"): 774 return name 775 776 values = self._parse_wrapped_csv(self._parse_expression) 777 778 if ( 779 len(values) == 1 780 and isinstance(values[0], exp.Column) 781 and values[0].name.upper() == "MAXVALUE" 782 ): 783 values = [exp.var("MAXVALUE")] 784 785 part_range = self.expression(exp.PartitionRange, this=name, expressions=values) 786 return self.expression(exp.Partition, expressions=[part_range]) 787 788 def _parse_partition_list_value(self) -> exp.Partition: 789 self._match_text_seq("PARTITION") 790 name = self._parse_id_var() 791 self._match_text_seq("VALUES", "IN") 792 values = self._parse_wrapped_csv(self._parse_expression) 793 part_list = self.expression(exp.PartitionList, this=name, expressions=values) 794 return self.expression(exp.Partition, expressions=[part_list]) 795 796 def _parse_primary_key( 797 self, 798 wrapped_optional: bool = False, 799 in_props: bool = False, 800 named_primary_key: bool = False, 801 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 802 return super()._parse_primary_key( 803 wrapped_optional=wrapped_optional, in_props=in_props, named_primary_key=True 804 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- TRIGGER_EVENTS
- ALTERABLES
- ALIAS_TOKENS
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- ASSIGNMENT
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- CAST_COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- QUERY_MODIFIER_TOKENS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- TRIGGER_TIMING
- TRIGGER_DEFERRABLE
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- SET_ASSIGNMENT_DELIMITERS
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- ALTER_TABLE_PARTITIONS
- JOINS_HAVE_EQUAL_PRECEDENCE
- ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
- MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS
- JSON_EXTRACT_REQUIRES_JSON_EXPRESSION
- ADD_JOIN_ON_TRUE
- SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT
- raise_error
- validate_expression
- reset
- errors
- error_level
- error_message_context
- max_errors
- dialect
- sql
- parse
- parse_into
- check_errors
- expression
- parse_set_operation
- build_cast
806 class Generator(generator.Generator): 807 INTERVAL_ALLOWS_PLURAL_FORM = False 808 LOCKING_READS_SUPPORTED = True 809 NULL_ORDERING_SUPPORTED: t.Optional[bool] = None 810 JOIN_HINTS = False 811 TABLE_HINTS = True 812 DUPLICATE_KEY_UPDATE_WITH_SET = False 813 QUERY_HINT_SEP = " " 814 VALUES_AS_TABLE = False 815 NVL2_SUPPORTED = False 816 LAST_DAY_SUPPORTS_DATE_PART = False 817 JSON_TYPE_REQUIRED_FOR_EXTRACTION = True 818 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 819 JSON_KEY_VALUE_PAIR_SEP = "," 820 SUPPORTS_TO_NUMBER = False 821 PARSE_JSON_NAME: t.Optional[str] = None 822 PAD_FILL_PATTERN_IS_REQUIRED = True 823 WRAP_DERIVED_VALUES = False 824 VARCHAR_REQUIRES_SIZE = True 825 SUPPORTS_MEDIAN = False 826 UPDATE_STATEMENT_SUPPORTS_FROM = False 827 828 TRANSFORMS = { 829 **generator.Generator.TRANSFORMS, 830 exp.ArrayAgg: rename_func("GROUP_CONCAT"), 831 exp.BitwiseAndAgg: rename_func("BIT_AND"), 832 exp.BitwiseOrAgg: rename_func("BIT_OR"), 833 exp.BitwiseXorAgg: rename_func("BIT_XOR"), 834 exp.BitwiseCount: rename_func("BIT_COUNT"), 835 exp.Chr: lambda self, e: self.chr_sql(e, "CHAR"), 836 exp.CurrentDate: no_paren_current_date_sql, 837 exp.CurrentVersion: rename_func("VERSION"), 838 exp.DateDiff: _remove_ts_or_ds_to_date( 839 lambda self, e: self.func("DATEDIFF", e.this, e.expression), ("this", "expression") 840 ), 841 exp.DateAdd: _remove_ts_or_ds_to_date(date_add_sql("ADD")), 842 exp.DateStrToDate: datestrtodate_sql, 843 exp.DateSub: _remove_ts_or_ds_to_date(date_add_sql("SUB")), 844 exp.DateTrunc: _date_trunc_sql, 845 exp.Day: _remove_ts_or_ds_to_date(), 846 exp.DayOfMonth: _remove_ts_or_ds_to_date(rename_func("DAYOFMONTH")), 847 exp.DayOfWeek: _remove_ts_or_ds_to_date(rename_func("DAYOFWEEK")), 848 exp.DayOfYear: _remove_ts_or_ds_to_date(rename_func("DAYOFYEAR")), 849 exp.GroupConcat: lambda self, 850 e: f"""GROUP_CONCAT({self.sql(e, "this")} SEPARATOR {self.sql(e, "separator") or "','"})""", 851 exp.ILike: no_ilike_sql, 852 exp.JSONExtractScalar: arrow_json_extract_sql, 853 exp.Length: length_or_char_length_sql, 854 exp.LogicalOr: rename_func("MAX"), 855 exp.LogicalAnd: rename_func("MIN"), 856 exp.Max: max_or_greatest, 857 exp.Min: min_or_least, 858 exp.Month: _remove_ts_or_ds_to_date(), 859 exp.NullSafeEQ: lambda self, e: self.binary(e, "<=>"), 860 exp.NullSafeNEQ: lambda self, e: f"NOT {self.binary(e, '<=>')}", 861 exp.NumberToStr: rename_func("FORMAT"), 862 exp.Pivot: no_pivot_sql, 863 exp.Select: transforms.preprocess( 864 [ 865 transforms.eliminate_distinct_on, 866 transforms.eliminate_semi_and_anti_joins, 867 transforms.eliminate_qualify, 868 transforms.eliminate_full_outer_join, 869 transforms.unnest_generate_date_array_using_recursive_cte, 870 ] 871 ), 872 exp.StrPosition: lambda self, e: strposition_sql( 873 self, e, func_name="LOCATE", supports_position=True 874 ), 875 exp.StrToDate: _str_to_date_sql, 876 exp.StrToTime: _str_to_date_sql, 877 exp.Stuff: rename_func("INSERT"), 878 exp.SessionUser: lambda *_: "SESSION_USER()", 879 exp.TableSample: no_tablesample_sql, 880 exp.TimeFromParts: rename_func("MAKETIME"), 881 exp.TimestampAdd: date_add_interval_sql("DATE", "ADD"), 882 exp.TimestampDiff: lambda self, e: self.func( 883 "TIMESTAMPDIFF", unit_to_var(e), e.expression, e.this 884 ), 885 exp.TimestampSub: date_add_interval_sql("DATE", "SUB"), 886 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 887 exp.TimeStrToTime: lambda self, e: timestrtotime_sql( 888 self, 889 e, 890 include_precision=not e.args.get("zone"), 891 ), 892 exp.TimeToStr: _remove_ts_or_ds_to_date( 893 lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)) 894 ), 895 exp.Trim: trim_sql, 896 exp.Trunc: rename_func("TRUNCATE"), 897 exp.TryCast: no_trycast_sql, 898 exp.TsOrDsAdd: date_add_sql("ADD"), 899 exp.TsOrDsDiff: lambda self, e: self.func("DATEDIFF", e.this, e.expression), 900 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 901 exp.Unicode: lambda self, e: f"ORD(CONVERT({self.sql(e.this)} USING utf32))", 902 exp.UnixToTime: _unix_to_time_sql, 903 exp.Week: _remove_ts_or_ds_to_date(), 904 exp.WeekOfYear: _remove_ts_or_ds_to_date(rename_func("WEEKOFYEAR")), 905 exp.Year: _remove_ts_or_ds_to_date(), 906 exp.UtcTimestamp: rename_func("UTC_TIMESTAMP"), 907 exp.UtcTime: rename_func("UTC_TIME"), 908 } 909 910 UNSIGNED_TYPE_MAPPING = { 911 exp.DataType.Type.UBIGINT: "BIGINT", 912 exp.DataType.Type.UINT: "INT", 913 exp.DataType.Type.UMEDIUMINT: "MEDIUMINT", 914 exp.DataType.Type.USMALLINT: "SMALLINT", 915 exp.DataType.Type.UTINYINT: "TINYINT", 916 exp.DataType.Type.UDECIMAL: "DECIMAL", 917 exp.DataType.Type.UDOUBLE: "DOUBLE", 918 } 919 920 TIMESTAMP_TYPE_MAPPING = { 921 exp.DataType.Type.DATETIME2: "DATETIME", 922 exp.DataType.Type.SMALLDATETIME: "DATETIME", 923 exp.DataType.Type.TIMESTAMP: "DATETIME", 924 exp.DataType.Type.TIMESTAMPNTZ: "DATETIME", 925 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 926 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 927 } 928 929 TYPE_MAPPING = { 930 **generator.Generator.TYPE_MAPPING, 931 **UNSIGNED_TYPE_MAPPING, 932 **TIMESTAMP_TYPE_MAPPING, 933 } 934 935 TYPE_MAPPING.pop(exp.DataType.Type.MEDIUMTEXT) 936 TYPE_MAPPING.pop(exp.DataType.Type.LONGTEXT) 937 TYPE_MAPPING.pop(exp.DataType.Type.TINYTEXT) 938 TYPE_MAPPING.pop(exp.DataType.Type.BLOB) 939 TYPE_MAPPING.pop(exp.DataType.Type.MEDIUMBLOB) 940 TYPE_MAPPING.pop(exp.DataType.Type.LONGBLOB) 941 TYPE_MAPPING.pop(exp.DataType.Type.TINYBLOB) 942 943 PROPERTIES_LOCATION = { 944 **generator.Generator.PROPERTIES_LOCATION, 945 exp.TransientProperty: exp.Properties.Location.UNSUPPORTED, 946 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 947 exp.PartitionedByProperty: exp.Properties.Location.UNSUPPORTED, 948 exp.PartitionByRangeProperty: exp.Properties.Location.POST_SCHEMA, 949 exp.PartitionByListProperty: exp.Properties.Location.POST_SCHEMA, 950 } 951 952 LIMIT_FETCH = "LIMIT" 953 954 LIMIT_ONLY_LITERALS = True 955 956 CHAR_CAST_MAPPING = dict.fromkeys( 957 ( 958 exp.DataType.Type.LONGTEXT, 959 exp.DataType.Type.LONGBLOB, 960 exp.DataType.Type.MEDIUMBLOB, 961 exp.DataType.Type.MEDIUMTEXT, 962 exp.DataType.Type.TEXT, 963 exp.DataType.Type.TINYBLOB, 964 exp.DataType.Type.TINYTEXT, 965 exp.DataType.Type.VARCHAR, 966 ), 967 "CHAR", 968 ) 969 SIGNED_CAST_MAPPING = dict.fromkeys( 970 ( 971 exp.DataType.Type.BIGINT, 972 exp.DataType.Type.BOOLEAN, 973 exp.DataType.Type.INT, 974 exp.DataType.Type.SMALLINT, 975 exp.DataType.Type.TINYINT, 976 exp.DataType.Type.MEDIUMINT, 977 ), 978 "SIGNED", 979 ) 980 981 # MySQL doesn't support many datatypes in cast. 982 # https://dev.mysql.com/doc/refman/8.0/en/cast-functions.html#function_cast 983 CAST_MAPPING = { 984 **CHAR_CAST_MAPPING, 985 **SIGNED_CAST_MAPPING, 986 exp.DataType.Type.UBIGINT: "UNSIGNED", 987 } 988 989 TIMESTAMP_FUNC_TYPES = { 990 exp.DataType.Type.TIMESTAMPTZ, 991 exp.DataType.Type.TIMESTAMPLTZ, 992 } 993 994 # https://dev.mysql.com/doc/refman/8.0/en/keywords.html 995 RESERVED_KEYWORDS = { 996 "accessible", 997 "add", 998 "all", 999 "alter", 1000 "analyze", 1001 "and", 1002 "as", 1003 "asc", 1004 "asensitive", 1005 "before", 1006 "between", 1007 "bigint", 1008 "binary", 1009 "blob", 1010 "both", 1011 "by", 1012 "call", 1013 "cascade", 1014 "case", 1015 "change", 1016 "char", 1017 "character", 1018 "check", 1019 "collate", 1020 "column", 1021 "condition", 1022 "constraint", 1023 "continue", 1024 "convert", 1025 "create", 1026 "cross", 1027 "cube", 1028 "cume_dist", 1029 "current_date", 1030 "current_time", 1031 "current_timestamp", 1032 "current_user", 1033 "cursor", 1034 "database", 1035 "databases", 1036 "day_hour", 1037 "day_microsecond", 1038 "day_minute", 1039 "day_second", 1040 "dec", 1041 "decimal", 1042 "declare", 1043 "default", 1044 "delayed", 1045 "delete", 1046 "dense_rank", 1047 "desc", 1048 "describe", 1049 "deterministic", 1050 "distinct", 1051 "distinctrow", 1052 "div", 1053 "double", 1054 "drop", 1055 "dual", 1056 "each", 1057 "else", 1058 "elseif", 1059 "empty", 1060 "enclosed", 1061 "escaped", 1062 "except", 1063 "exists", 1064 "exit", 1065 "explain", 1066 "false", 1067 "fetch", 1068 "first_value", 1069 "float", 1070 "float4", 1071 "float8", 1072 "for", 1073 "force", 1074 "foreign", 1075 "from", 1076 "fulltext", 1077 "function", 1078 "generated", 1079 "get", 1080 "grant", 1081 "group", 1082 "grouping", 1083 "groups", 1084 "having", 1085 "high_priority", 1086 "hour_microsecond", 1087 "hour_minute", 1088 "hour_second", 1089 "if", 1090 "ignore", 1091 "in", 1092 "index", 1093 "infile", 1094 "inner", 1095 "inout", 1096 "insensitive", 1097 "insert", 1098 "int", 1099 "int1", 1100 "int2", 1101 "int3", 1102 "int4", 1103 "int8", 1104 "integer", 1105 "intersect", 1106 "interval", 1107 "into", 1108 "io_after_gtids", 1109 "io_before_gtids", 1110 "is", 1111 "iterate", 1112 "join", 1113 "json_table", 1114 "key", 1115 "keys", 1116 "kill", 1117 "lag", 1118 "last_value", 1119 "lateral", 1120 "lead", 1121 "leading", 1122 "leave", 1123 "left", 1124 "like", 1125 "limit", 1126 "linear", 1127 "lines", 1128 "load", 1129 "localtime", 1130 "localtimestamp", 1131 "lock", 1132 "long", 1133 "longblob", 1134 "longtext", 1135 "loop", 1136 "low_priority", 1137 "master_bind", 1138 "master_ssl_verify_server_cert", 1139 "match", 1140 "maxvalue", 1141 "mediumblob", 1142 "mediumint", 1143 "mediumtext", 1144 "middleint", 1145 "minute_microsecond", 1146 "minute_second", 1147 "mod", 1148 "modifies", 1149 "natural", 1150 "not", 1151 "no_write_to_binlog", 1152 "nth_value", 1153 "ntile", 1154 "null", 1155 "numeric", 1156 "of", 1157 "on", 1158 "optimize", 1159 "optimizer_costs", 1160 "option", 1161 "optionally", 1162 "or", 1163 "order", 1164 "out", 1165 "outer", 1166 "outfile", 1167 "over", 1168 "partition", 1169 "percent_rank", 1170 "precision", 1171 "primary", 1172 "procedure", 1173 "purge", 1174 "range", 1175 "rank", 1176 "read", 1177 "reads", 1178 "read_write", 1179 "real", 1180 "recursive", 1181 "references", 1182 "regexp", 1183 "release", 1184 "rename", 1185 "repeat", 1186 "replace", 1187 "require", 1188 "resignal", 1189 "restrict", 1190 "return", 1191 "revoke", 1192 "right", 1193 "rlike", 1194 "row", 1195 "rows", 1196 "row_number", 1197 "schema", 1198 "schemas", 1199 "second_microsecond", 1200 "select", 1201 "sensitive", 1202 "separator", 1203 "set", 1204 "show", 1205 "signal", 1206 "smallint", 1207 "spatial", 1208 "specific", 1209 "sql", 1210 "sqlexception", 1211 "sqlstate", 1212 "sqlwarning", 1213 "sql_big_result", 1214 "sql_calc_found_rows", 1215 "sql_small_result", 1216 "ssl", 1217 "starting", 1218 "stored", 1219 "straight_join", 1220 "system", 1221 "table", 1222 "terminated", 1223 "then", 1224 "tinyblob", 1225 "tinyint", 1226 "tinytext", 1227 "to", 1228 "trailing", 1229 "trigger", 1230 "true", 1231 "undo", 1232 "union", 1233 "unique", 1234 "unlock", 1235 "unsigned", 1236 "update", 1237 "usage", 1238 "use", 1239 "using", 1240 "utc_date", 1241 "utc_time", 1242 "utc_timestamp", 1243 "values", 1244 "varbinary", 1245 "varchar", 1246 "varcharacter", 1247 "varying", 1248 "virtual", 1249 "when", 1250 "where", 1251 "while", 1252 "window", 1253 "with", 1254 "write", 1255 "xor", 1256 "year_month", 1257 "zerofill", 1258 } 1259 1260 def computedcolumnconstraint_sql(self, expression: exp.ComputedColumnConstraint) -> str: 1261 persisted = "STORED" if expression.args.get("persisted") else "VIRTUAL" 1262 return f"GENERATED ALWAYS AS ({self.sql(expression.this.unnest())}) {persisted}" 1263 1264 def array_sql(self, expression: exp.Array) -> str: 1265 self.unsupported("Arrays are not supported by MySQL") 1266 return self.function_fallback_sql(expression) 1267 1268 def arraycontainsall_sql(self, expression: exp.ArrayContainsAll) -> str: 1269 self.unsupported("Array operations are not supported by MySQL") 1270 return self.function_fallback_sql(expression) 1271 1272 def dpipe_sql(self, expression: exp.DPipe) -> str: 1273 return self.func("CONCAT", *expression.flatten()) 1274 1275 def extract_sql(self, expression: exp.Extract) -> str: 1276 unit = expression.name 1277 if unit and unit.lower() == "epoch": 1278 return self.func("UNIX_TIMESTAMP", expression.expression) 1279 1280 return super().extract_sql(expression) 1281 1282 def datatype_sql(self, expression: exp.DataType) -> str: 1283 if ( 1284 self.VARCHAR_REQUIRES_SIZE 1285 and expression.is_type(exp.DataType.Type.VARCHAR) 1286 and not expression.expressions 1287 ): 1288 # `VARCHAR` must always have a size - if it doesn't, we always generate `TEXT` 1289 return "TEXT" 1290 1291 # https://dev.mysql.com/doc/refman/8.0/en/numeric-type-syntax.html 1292 result = super().datatype_sql(expression) 1293 if expression.this in self.UNSIGNED_TYPE_MAPPING: 1294 result = f"{result} UNSIGNED" 1295 1296 return result 1297 1298 def jsonarraycontains_sql(self, expression: exp.JSONArrayContains) -> str: 1299 return f"{self.sql(expression, 'this')} MEMBER OF({self.sql(expression, 'expression')})" 1300 1301 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1302 if expression.to.this in self.TIMESTAMP_FUNC_TYPES: 1303 return self.func("TIMESTAMP", expression.this) 1304 1305 to = self.CAST_MAPPING.get(expression.to.this) 1306 1307 if to: 1308 expression.to.set("this", to) 1309 return super().cast_sql(expression) 1310 1311 def show_sql(self, expression: exp.Show) -> str: 1312 this = f" {expression.name}" 1313 full = " FULL" if expression.args.get("full") else "" 1314 global_ = " GLOBAL" if expression.args.get("global_") else "" 1315 1316 target = self.sql(expression, "target") 1317 target = f" {target}" if target else "" 1318 if expression.name in ("COLUMNS", "INDEX"): 1319 target = f" FROM{target}" 1320 elif expression.name == "GRANTS": 1321 target = f" FOR{target}" 1322 elif expression.name in ("LINKS", "PARTITIONS"): 1323 target = f" ON{target}" if target else "" 1324 elif expression.name == "PROJECTIONS": 1325 target = f" ON TABLE{target}" if target else "" 1326 1327 db = self._prefixed_sql("FROM", expression, "db") 1328 1329 like = self._prefixed_sql("LIKE", expression, "like") 1330 where = self.sql(expression, "where") 1331 1332 types = self.expressions(expression, key="types") 1333 types = f" {types}" if types else types 1334 query = self._prefixed_sql("FOR QUERY", expression, "query") 1335 1336 if expression.name == "PROFILE": 1337 offset = self._prefixed_sql("OFFSET", expression, "offset") 1338 limit = self._prefixed_sql("LIMIT", expression, "limit") 1339 else: 1340 offset = "" 1341 limit = self._oldstyle_limit_sql(expression) 1342 1343 log = self._prefixed_sql("IN", expression, "log") 1344 position = self._prefixed_sql("FROM", expression, "position") 1345 1346 channel = self._prefixed_sql("FOR CHANNEL", expression, "channel") 1347 1348 if expression.name == "ENGINE": 1349 mutex_or_status = " MUTEX" if expression.args.get("mutex") else " STATUS" 1350 else: 1351 mutex_or_status = "" 1352 1353 for_table = self._prefixed_sql("FOR TABLE", expression, "for_table") 1354 for_group = self._prefixed_sql("FOR GROUP", expression, "for_group") 1355 for_user = self._prefixed_sql("FOR USER", expression, "for_user") 1356 for_role = self._prefixed_sql("FOR ROLE", expression, "for_role") 1357 into_outfile = self._prefixed_sql("INTO OUTFILE", expression, "into_outfile") 1358 json = " JSON" if expression.args.get("json") else "" 1359 1360 return f"SHOW{full}{global_}{this}{json}{target}{for_table}{types}{db}{query}{log}{position}{channel}{mutex_or_status}{like}{where}{offset}{limit}{for_group}{for_user}{for_role}{into_outfile}" 1361 1362 def alterrename_sql(self, expression: exp.AlterRename, include_to: bool = True) -> str: 1363 """To avoid TO keyword in ALTER ... RENAME statements. 1364 It's moved from Doris, because it's the same for all MySQL, Doris, and StarRocks. 1365 """ 1366 return super().alterrename_sql(expression, include_to=False) 1367 1368 def altercolumn_sql(self, expression: exp.AlterColumn) -> str: 1369 dtype = self.sql(expression, "dtype") 1370 if not dtype: 1371 return super().altercolumn_sql(expression) 1372 1373 this = self.sql(expression, "this") 1374 return f"MODIFY COLUMN {this} {dtype}" 1375 1376 def _prefixed_sql(self, prefix: str, expression: exp.Expression, arg: str) -> str: 1377 sql = self.sql(expression, arg) 1378 return f" {prefix} {sql}" if sql else "" 1379 1380 def _oldstyle_limit_sql(self, expression: exp.Show) -> str: 1381 limit = self.sql(expression, "limit") 1382 offset = self.sql(expression, "offset") 1383 if limit: 1384 limit_offset = f"{offset}, {limit}" if offset else limit 1385 return f" LIMIT {limit_offset}" 1386 return "" 1387 1388 def timestamptrunc_sql(self, expression: exp.TimestampTrunc) -> str: 1389 unit = expression.args.get("unit") 1390 1391 # Pick an old-enough date to avoid negative timestamp diffs 1392 start_ts = "'0000-01-01 00:00:00'" 1393 1394 # Source: https://stackoverflow.com/a/32955740 1395 timestamp_diff = build_date_delta(exp.TimestampDiff)([unit, start_ts, expression.this]) 1396 interval = exp.Interval(this=timestamp_diff, unit=unit) 1397 dateadd = build_date_delta_with_interval(exp.DateAdd)([start_ts, interval]) 1398 1399 return self.sql(dateadd) 1400 1401 def converttimezone_sql(self, expression: exp.ConvertTimezone) -> str: 1402 from_tz = expression.args.get("source_tz") 1403 to_tz = expression.args.get("target_tz") 1404 dt = expression.args.get("timestamp") 1405 1406 return self.func("CONVERT_TZ", dt, from_tz, to_tz) 1407 1408 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1409 self.unsupported("AT TIME ZONE is not supported by MySQL") 1410 return self.sql(expression.this) 1411 1412 def isascii_sql(self, expression: exp.IsAscii) -> str: 1413 return f"REGEXP_LIKE({self.sql(expression.this)}, '^[[:ascii:]]*$')" 1414 1415 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1416 # https://dev.mysql.com/doc/refman/8.4/en/window-function-descriptions.html 1417 self.unsupported("MySQL does not support IGNORE NULLS.") 1418 return self.sql(expression.this) 1419 1420 @unsupported_args("this") 1421 def currentschema_sql(self, expression: exp.CurrentSchema) -> str: 1422 return self.func("SCHEMA") 1423 1424 def partition_sql(self, expression: exp.Partition) -> str: 1425 parent = expression.parent 1426 if isinstance(parent, (exp.PartitionByRangeProperty, exp.PartitionByListProperty)): 1427 return self.expressions(expression, flat=True) 1428 return super().partition_sql(expression) 1429 1430 def _partition_by_sql( 1431 self, expression: exp.PartitionByRangeProperty | exp.PartitionByListProperty, kind: str 1432 ) -> str: 1433 partitions = self.expressions(expression, key="partition_expressions", flat=True) 1434 create = self.expressions(expression, key="create_expressions", flat=True) 1435 return f"PARTITION BY {kind} ({partitions}) ({create})" 1436 1437 def partitionbyrangeproperty_sql(self, expression: exp.PartitionByRangeProperty) -> str: 1438 return self._partition_by_sql(expression, "RANGE") 1439 1440 def partitionbylistproperty_sql(self, expression: exp.PartitionByListProperty) -> str: 1441 return self._partition_by_sql(expression, "LIST") 1442 1443 def partitionlist_sql(self, expression: exp.PartitionList) -> str: 1444 name = self.sql(expression, "this") 1445 values = self.expressions(expression, flat=True) 1446 return f"PARTITION {name} VALUES IN ({values})" 1447 1448 def partitionrange_sql(self, expression: exp.PartitionRange) -> str: 1449 name = self.sql(expression, "this") 1450 values = self.expressions(expression, flat=True) 1451 return f"PARTITION {name} VALUES LESS THAN ({values})"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True: Always quote except for specials cases. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHEREclause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1282 def datatype_sql(self, expression: exp.DataType) -> str: 1283 if ( 1284 self.VARCHAR_REQUIRES_SIZE 1285 and expression.is_type(exp.DataType.Type.VARCHAR) 1286 and not expression.expressions 1287 ): 1288 # `VARCHAR` must always have a size - if it doesn't, we always generate `TEXT` 1289 return "TEXT" 1290 1291 # https://dev.mysql.com/doc/refman/8.0/en/numeric-type-syntax.html 1292 result = super().datatype_sql(expression) 1293 if expression.this in self.UNSIGNED_TYPE_MAPPING: 1294 result = f"{result} UNSIGNED" 1295 1296 return result
1301 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1302 if expression.to.this in self.TIMESTAMP_FUNC_TYPES: 1303 return self.func("TIMESTAMP", expression.this) 1304 1305 to = self.CAST_MAPPING.get(expression.to.this) 1306 1307 if to: 1308 expression.to.set("this", to) 1309 return super().cast_sql(expression)
1311 def show_sql(self, expression: exp.Show) -> str: 1312 this = f" {expression.name}" 1313 full = " FULL" if expression.args.get("full") else "" 1314 global_ = " GLOBAL" if expression.args.get("global_") else "" 1315 1316 target = self.sql(expression, "target") 1317 target = f" {target}" if target else "" 1318 if expression.name in ("COLUMNS", "INDEX"): 1319 target = f" FROM{target}" 1320 elif expression.name == "GRANTS": 1321 target = f" FOR{target}" 1322 elif expression.name in ("LINKS", "PARTITIONS"): 1323 target = f" ON{target}" if target else "" 1324 elif expression.name == "PROJECTIONS": 1325 target = f" ON TABLE{target}" if target else "" 1326 1327 db = self._prefixed_sql("FROM", expression, "db") 1328 1329 like = self._prefixed_sql("LIKE", expression, "like") 1330 where = self.sql(expression, "where") 1331 1332 types = self.expressions(expression, key="types") 1333 types = f" {types}" if types else types 1334 query = self._prefixed_sql("FOR QUERY", expression, "query") 1335 1336 if expression.name == "PROFILE": 1337 offset = self._prefixed_sql("OFFSET", expression, "offset") 1338 limit = self._prefixed_sql("LIMIT", expression, "limit") 1339 else: 1340 offset = "" 1341 limit = self._oldstyle_limit_sql(expression) 1342 1343 log = self._prefixed_sql("IN", expression, "log") 1344 position = self._prefixed_sql("FROM", expression, "position") 1345 1346 channel = self._prefixed_sql("FOR CHANNEL", expression, "channel") 1347 1348 if expression.name == "ENGINE": 1349 mutex_or_status = " MUTEX" if expression.args.get("mutex") else " STATUS" 1350 else: 1351 mutex_or_status = "" 1352 1353 for_table = self._prefixed_sql("FOR TABLE", expression, "for_table") 1354 for_group = self._prefixed_sql("FOR GROUP", expression, "for_group") 1355 for_user = self._prefixed_sql("FOR USER", expression, "for_user") 1356 for_role = self._prefixed_sql("FOR ROLE", expression, "for_role") 1357 into_outfile = self._prefixed_sql("INTO OUTFILE", expression, "into_outfile") 1358 json = " JSON" if expression.args.get("json") else "" 1359 1360 return f"SHOW{full}{global_}{this}{json}{target}{for_table}{types}{db}{query}{log}{position}{channel}{mutex_or_status}{like}{where}{offset}{limit}{for_group}{for_user}{for_role}{into_outfile}"
1362 def alterrename_sql(self, expression: exp.AlterRename, include_to: bool = True) -> str: 1363 """To avoid TO keyword in ALTER ... RENAME statements. 1364 It's moved from Doris, because it's the same for all MySQL, Doris, and StarRocks. 1365 """ 1366 return super().alterrename_sql(expression, include_to=False)
To avoid TO keyword in ALTER ... RENAME statements. It's moved from Doris, because it's the same for all MySQL, Doris, and StarRocks.
1388 def timestamptrunc_sql(self, expression: exp.TimestampTrunc) -> str: 1389 unit = expression.args.get("unit") 1390 1391 # Pick an old-enough date to avoid negative timestamp diffs 1392 start_ts = "'0000-01-01 00:00:00'" 1393 1394 # Source: https://stackoverflow.com/a/32955740 1395 timestamp_diff = build_date_delta(exp.TimestampDiff)([unit, start_ts, expression.this]) 1396 interval = exp.Interval(this=timestamp_diff, unit=unit) 1397 dateadd = build_date_delta_with_interval(exp.DateAdd)([start_ts, interval]) 1398 1399 return self.sql(dateadd)
Inherited Members
- sqlglot.generator.Generator
- Generator
- IGNORE_NULLS_IN_FUNC
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- INOUT_SEPARATOR
- DIRECTED_JOINS
- QUERY_HINTS
- IS_BOOL_ALLOWED
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SUPPORTED_JSON_PATH_PARTS
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_WINDOW_EXCLUDE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- UNICODE_SUBSTITUTE
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- NORMALIZE_EXTRACT_DATE_PARTS
- ARRAY_SIZE_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- SUPPORTS_BETWEEN_FLAGS
- SUPPORTS_LIKE_QUANTIFIERS
- MATCH_AGAINST_TABLE_PREFIX
- SET_ASSIGNMENT_REQUIRES_VARIABLE_KEYWORD
- DECLARE_DEFAULT_ASSIGNMENT
- STAR_EXCLUDE_REQUIRES_DERIVED_TABLE
- UNSUPPORTED_TYPES
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS
- SAFE_JSON_PATH_KEY_RE
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- pseudocolumn_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- inoutcolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- triggerproperties_sql
- triggerreferencing_sql
- triggerevent_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- rollupindex_sql
- rollupproperty_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- queryband_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- booland_sql
- boolor_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- formatphrase_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- strtotime_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- altersession_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- is_sql
- like_sql
- ilike_sql
- match_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- sub_sql
- trycast_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- generateembedding_sql
- mltranslate_sql
- mlforecast_sql
- featuresattime_sql
- vectorsearch_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- slice_sql
- apply_sql
- grant_sql
- revoke_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- install_sql
- get_put_sql
- translatecharacters_sql
- decodecase_sql
- semanticview_sql
- getextract_sql
- datefromunixdate_sql
- space_sql
- buildproperty_sql
- refreshtriggerproperty_sql
- modelattribute_sql
- directorystage_sql
- uuid_sql
- initcap_sql
- localtime_sql
- localtimestamp_sql
- weekstart_sql
- chr_sql
- block_sql
- storedprocedure_sql
- ifblock_sql
- whileblock_sql
- execute_sql
- executesql_sql