sqlglot.optimizer.normalize_identifiers
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp 6from sqlglot.dialects.dialect import Dialect, DialectType 7 8if t.TYPE_CHECKING: 9 from sqlglot._typing import E 10 11 12@t.overload 13def normalize_identifiers( 14 expression: E, dialect: DialectType = None, store_original_column_identifiers: bool = False 15) -> E: ... 16 17 18@t.overload 19def normalize_identifiers( 20 expression: str, dialect: DialectType = None, store_original_column_identifiers: bool = False 21) -> exp.Identifier: ... 22 23 24def normalize_identifiers(expression, dialect=None, store_original_column_identifiers=False): 25 """ 26 Normalize identifiers by converting them to either lower or upper case, 27 ensuring the semantics are preserved in each case (e.g. by respecting 28 case-sensitivity). 29 30 This transformation reflects how identifiers would be resolved by the engine corresponding 31 to each SQL dialect, and plays a very important role in the standardization of the AST. 32 33 It's possible to make this a no-op by adding a special comment next to the 34 identifier of interest: 35 36 SELECT a /* sqlglot.meta case_sensitive */ FROM table 37 38 In this example, the identifier `a` will not be normalized. 39 40 Note: 41 Some dialects (e.g. DuckDB) treat all identifiers as case-insensitive even 42 when they're quoted, so in these cases all identifiers are normalized. 43 44 Example: 45 >>> import sqlglot 46 >>> expression = sqlglot.parse_one('SELECT Bar.A AS A FROM "Foo".Bar') 47 >>> normalize_identifiers(expression).sql() 48 'SELECT bar.a AS a FROM "Foo".bar' 49 >>> normalize_identifiers("foo", dialect="snowflake").sql(dialect="snowflake") 50 'FOO' 51 52 Args: 53 expression: The expression to transform. 54 dialect: The dialect to use in order to decide how to normalize identifiers. 55 store_original_column_identifiers: Whether to store the original column identifiers in 56 the meta data of the expression in case we want to undo the normalization at a later point. 57 58 Returns: 59 The transformed expression. 60 """ 61 dialect = Dialect.get_or_raise(dialect) 62 63 if isinstance(expression, str): 64 expression = exp.parse_identifier(expression, dialect=dialect) 65 66 for node in expression.walk(prune=lambda n: n.meta.get("case_sensitive")): 67 if not node.meta.get("case_sensitive"): 68 if store_original_column_identifiers and isinstance(node, exp.Column): 69 # TODO: This does not handle non-column cases, e.g PARSE_JSON(...).key 70 parent = node 71 while parent and isinstance(parent.parent, exp.Dot): 72 parent = parent.parent 73 74 node.meta["dot_parts"] = [p.name for p in parent.parts] 75 76 dialect.normalize_identifier(node) 77 78 return expression
def
normalize_identifiers(expression, dialect=None, store_original_column_identifiers=False):
25def normalize_identifiers(expression, dialect=None, store_original_column_identifiers=False): 26 """ 27 Normalize identifiers by converting them to either lower or upper case, 28 ensuring the semantics are preserved in each case (e.g. by respecting 29 case-sensitivity). 30 31 This transformation reflects how identifiers would be resolved by the engine corresponding 32 to each SQL dialect, and plays a very important role in the standardization of the AST. 33 34 It's possible to make this a no-op by adding a special comment next to the 35 identifier of interest: 36 37 SELECT a /* sqlglot.meta case_sensitive */ FROM table 38 39 In this example, the identifier `a` will not be normalized. 40 41 Note: 42 Some dialects (e.g. DuckDB) treat all identifiers as case-insensitive even 43 when they're quoted, so in these cases all identifiers are normalized. 44 45 Example: 46 >>> import sqlglot 47 >>> expression = sqlglot.parse_one('SELECT Bar.A AS A FROM "Foo".Bar') 48 >>> normalize_identifiers(expression).sql() 49 'SELECT bar.a AS a FROM "Foo".bar' 50 >>> normalize_identifiers("foo", dialect="snowflake").sql(dialect="snowflake") 51 'FOO' 52 53 Args: 54 expression: The expression to transform. 55 dialect: The dialect to use in order to decide how to normalize identifiers. 56 store_original_column_identifiers: Whether to store the original column identifiers in 57 the meta data of the expression in case we want to undo the normalization at a later point. 58 59 Returns: 60 The transformed expression. 61 """ 62 dialect = Dialect.get_or_raise(dialect) 63 64 if isinstance(expression, str): 65 expression = exp.parse_identifier(expression, dialect=dialect) 66 67 for node in expression.walk(prune=lambda n: n.meta.get("case_sensitive")): 68 if not node.meta.get("case_sensitive"): 69 if store_original_column_identifiers and isinstance(node, exp.Column): 70 # TODO: This does not handle non-column cases, e.g PARSE_JSON(...).key 71 parent = node 72 while parent and isinstance(parent.parent, exp.Dot): 73 parent = parent.parent 74 75 node.meta["dot_parts"] = [p.name for p in parent.parts] 76 77 dialect.normalize_identifier(node) 78 79 return expression
Normalize identifiers by converting them to either lower or upper case, ensuring the semantics are preserved in each case (e.g. by respecting case-sensitivity).
This transformation reflects how identifiers would be resolved by the engine corresponding to each SQL dialect, and plays a very important role in the standardization of the AST.
It's possible to make this a no-op by adding a special comment next to the identifier of interest:
SELECT a /* sqlglot.meta case_sensitive */ FROM table
In this example, the identifier a will not be normalized.
Note:
Some dialects (e.g. DuckDB) treat all identifiers as case-insensitive even when they're quoted, so in these cases all identifiers are normalized.
Example:
>>> import sqlglot >>> expression = sqlglot.parse_one('SELECT Bar.A AS A FROM "Foo".Bar') >>> normalize_identifiers(expression).sql() 'SELECT bar.a AS a FROM "Foo".bar' >>> normalize_identifiers("foo", dialect="snowflake").sql(dialect="snowflake") 'FOO'
Arguments:
- expression: The expression to transform.
- dialect: The dialect to use in order to decide how to normalize identifiers.
- store_original_column_identifiers: Whether to store the original column identifiers in the meta data of the expression in case we want to undo the normalization at a later point.
Returns:
The transformed expression.