Edit on GitHub

sqlglot.optimizer.normalize_identifiers

 1from __future__ import annotations
 2
 3import typing as t
 4
 5from sqlglot import exp
 6from sqlglot.dialects.dialect import Dialect, DialectType
 7
 8if t.TYPE_CHECKING:
 9    from sqlglot._typing import E
10
11
12@t.overload
13def normalize_identifiers(
14    expression: E, dialect: DialectType = None, store_original_column_identifiers: bool = False
15) -> E: ...
16
17
18@t.overload
19def normalize_identifiers(
20    expression: str, dialect: DialectType = None, store_original_column_identifiers: bool = False
21) -> exp.Identifier: ...
22
23
24def normalize_identifiers(expression, dialect=None, store_original_column_identifiers=False):
25    """
26    Normalize identifiers by converting them to either lower or upper case,
27    ensuring the semantics are preserved in each case (e.g. by respecting
28    case-sensitivity).
29
30    This transformation reflects how identifiers would be resolved by the engine corresponding
31    to each SQL dialect, and plays a very important role in the standardization of the AST.
32
33    It's possible to make this a no-op by adding a special comment next to the
34    identifier of interest:
35
36        SELECT a /* sqlglot.meta case_sensitive */ FROM table
37
38    In this example, the identifier `a` will not be normalized.
39
40    Note:
41        Some dialects (e.g. DuckDB) treat all identifiers as case-insensitive even
42        when they're quoted, so in these cases all identifiers are normalized.
43
44    Example:
45        >>> import sqlglot
46        >>> expression = sqlglot.parse_one('SELECT Bar.A AS A FROM "Foo".Bar')
47        >>> normalize_identifiers(expression).sql()
48        'SELECT bar.a AS a FROM "Foo".bar'
49        >>> normalize_identifiers("foo", dialect="snowflake").sql(dialect="snowflake")
50        'FOO'
51
52    Args:
53        expression: The expression to transform.
54        dialect: The dialect to use in order to decide how to normalize identifiers.
55        store_original_column_identifiers: Whether to store the original column identifiers in
56            the meta data of the expression in case we want to undo the normalization at a later point.
57
58    Returns:
59        The transformed expression.
60    """
61    dialect = Dialect.get_or_raise(dialect)
62
63    if isinstance(expression, str):
64        expression = exp.parse_identifier(expression, dialect=dialect)
65
66    for node in expression.walk(prune=lambda n: n.meta.get("case_sensitive")):
67        if not node.meta.get("case_sensitive"):
68            if store_original_column_identifiers and isinstance(node, exp.Column):
69                # TODO: This does not handle non-column cases, e.g PARSE_JSON(...).key
70                parent = node
71                while parent and isinstance(parent.parent, exp.Dot):
72                    parent = parent.parent
73
74                node.meta["dot_parts"] = [p.name for p in parent.parts]
75
76            dialect.normalize_identifier(node)
77
78    return expression
def normalize_identifiers(expression, dialect=None, store_original_column_identifiers=False):
25def normalize_identifiers(expression, dialect=None, store_original_column_identifiers=False):
26    """
27    Normalize identifiers by converting them to either lower or upper case,
28    ensuring the semantics are preserved in each case (e.g. by respecting
29    case-sensitivity).
30
31    This transformation reflects how identifiers would be resolved by the engine corresponding
32    to each SQL dialect, and plays a very important role in the standardization of the AST.
33
34    It's possible to make this a no-op by adding a special comment next to the
35    identifier of interest:
36
37        SELECT a /* sqlglot.meta case_sensitive */ FROM table
38
39    In this example, the identifier `a` will not be normalized.
40
41    Note:
42        Some dialects (e.g. DuckDB) treat all identifiers as case-insensitive even
43        when they're quoted, so in these cases all identifiers are normalized.
44
45    Example:
46        >>> import sqlglot
47        >>> expression = sqlglot.parse_one('SELECT Bar.A AS A FROM "Foo".Bar')
48        >>> normalize_identifiers(expression).sql()
49        'SELECT bar.a AS a FROM "Foo".bar'
50        >>> normalize_identifiers("foo", dialect="snowflake").sql(dialect="snowflake")
51        'FOO'
52
53    Args:
54        expression: The expression to transform.
55        dialect: The dialect to use in order to decide how to normalize identifiers.
56        store_original_column_identifiers: Whether to store the original column identifiers in
57            the meta data of the expression in case we want to undo the normalization at a later point.
58
59    Returns:
60        The transformed expression.
61    """
62    dialect = Dialect.get_or_raise(dialect)
63
64    if isinstance(expression, str):
65        expression = exp.parse_identifier(expression, dialect=dialect)
66
67    for node in expression.walk(prune=lambda n: n.meta.get("case_sensitive")):
68        if not node.meta.get("case_sensitive"):
69            if store_original_column_identifiers and isinstance(node, exp.Column):
70                # TODO: This does not handle non-column cases, e.g PARSE_JSON(...).key
71                parent = node
72                while parent and isinstance(parent.parent, exp.Dot):
73                    parent = parent.parent
74
75                node.meta["dot_parts"] = [p.name for p in parent.parts]
76
77            dialect.normalize_identifier(node)
78
79    return expression

Normalize identifiers by converting them to either lower or upper case, ensuring the semantics are preserved in each case (e.g. by respecting case-sensitivity).

This transformation reflects how identifiers would be resolved by the engine corresponding to each SQL dialect, and plays a very important role in the standardization of the AST.

It's possible to make this a no-op by adding a special comment next to the identifier of interest:

SELECT a /* sqlglot.meta case_sensitive */ FROM table

In this example, the identifier a will not be normalized.

Note:

Some dialects (e.g. DuckDB) treat all identifiers as case-insensitive even when they're quoted, so in these cases all identifiers are normalized.

Example:
>>> import sqlglot
>>> expression = sqlglot.parse_one('SELECT Bar.A AS A FROM "Foo".Bar')
>>> normalize_identifiers(expression).sql()
'SELECT bar.a AS a FROM "Foo".bar'
>>> normalize_identifiers("foo", dialect="snowflake").sql(dialect="snowflake")
'FOO'
Arguments:
  • expression: The expression to transform.
  • dialect: The dialect to use in order to decide how to normalize identifiers.
  • store_original_column_identifiers: Whether to store the original column identifiers in the meta data of the expression in case we want to undo the normalization at a later point.
Returns:

The transformed expression.