Edit on GitHub

sqlglot.optimizer.qualify

  1from __future__ import annotations
  2
  3import typing as t
  4
  5from sqlglot import exp
  6from sqlglot.dialects.dialect import Dialect, DialectType
  7from sqlglot.optimizer.isolate_table_selects import isolate_table_selects
  8from sqlglot.optimizer.normalize_identifiers import normalize_identifiers
  9from sqlglot.optimizer.qualify_columns import (
 10    pushdown_cte_alias_columns as pushdown_cte_alias_columns_func,
 11    qualify_columns as qualify_columns_func,
 12    quote_identifiers as quote_identifiers_func,
 13    validate_qualify_columns as validate_qualify_columns_func,
 14)
 15from sqlglot.optimizer.qualify_tables import qualify_tables
 16from sqlglot.schema import Schema, ensure_schema
 17
 18
 19def qualify(
 20    expression: exp.Expression,
 21    dialect: DialectType = None,
 22    db: t.Optional[str] = None,
 23    catalog: t.Optional[str] = None,
 24    schema: t.Optional[dict | Schema] = None,
 25    expand_alias_refs: bool = True,
 26    expand_stars: bool = True,
 27    infer_schema: t.Optional[bool] = None,
 28    isolate_tables: bool = False,
 29    qualify_columns: bool = True,
 30    allow_partial_qualification: bool = False,
 31    validate_qualify_columns: bool = True,
 32    quote_identifiers: bool = True,
 33    identify: bool = True,
 34    infer_csv_schemas: bool = False,
 35) -> exp.Expression:
 36    """
 37    Rewrite sqlglot AST to have normalized and qualified tables and columns.
 38
 39    This step is necessary for all further SQLGlot optimizations.
 40
 41    Example:
 42        >>> import sqlglot
 43        >>> schema = {"tbl": {"col": "INT"}}
 44        >>> expression = sqlglot.parse_one("SELECT col FROM tbl")
 45        >>> qualify(expression, schema=schema).sql()
 46        'SELECT "tbl"."col" AS "col" FROM "tbl" AS "tbl"'
 47
 48    Args:
 49        expression: Expression to qualify.
 50        db: Default database name for tables.
 51        catalog: Default catalog name for tables.
 52        schema: Schema to infer column names and types.
 53        expand_alias_refs: Whether to expand references to aliases.
 54        expand_stars: Whether to expand star queries. This is a necessary step
 55            for most of the optimizer's rules to work; do not set to False unless you
 56            know what you're doing!
 57        infer_schema: Whether to infer the schema if missing.
 58        isolate_tables: Whether to isolate table selects.
 59        qualify_columns: Whether to qualify columns.
 60        allow_partial_qualification: Whether to allow partial qualification.
 61        validate_qualify_columns: Whether to validate columns.
 62        quote_identifiers: Whether to run the quote_identifiers step.
 63            This step is necessary to ensure correctness for case sensitive queries.
 64            But this flag is provided in case this step is performed at a later time.
 65        identify: If True, quote all identifiers, else only necessary ones.
 66        infer_csv_schemas: Whether to scan READ_CSV calls in order to infer the CSVs' schemas.
 67
 68    Returns:
 69        The qualified expression.
 70    """
 71    schema = ensure_schema(schema, dialect=dialect)
 72    expression = normalize_identifiers(expression, dialect=dialect)
 73    expression = qualify_tables(
 74        expression,
 75        db=db,
 76        catalog=catalog,
 77        schema=schema,
 78        dialect=dialect,
 79        infer_csv_schemas=infer_csv_schemas,
 80    )
 81
 82    if isolate_tables:
 83        expression = isolate_table_selects(expression, schema=schema)
 84
 85    if Dialect.get_or_raise(dialect).PREFER_CTE_ALIAS_COLUMN:
 86        expression = pushdown_cte_alias_columns_func(expression)
 87
 88    if qualify_columns:
 89        expression = qualify_columns_func(
 90            expression,
 91            schema,
 92            expand_alias_refs=expand_alias_refs,
 93            expand_stars=expand_stars,
 94            infer_schema=infer_schema,
 95            allow_partial_qualification=allow_partial_qualification,
 96        )
 97
 98    if quote_identifiers:
 99        expression = quote_identifiers_func(expression, dialect=dialect, identify=identify)
100
101    if validate_qualify_columns:
102        validate_qualify_columns_func(expression)
103
104    return expression
def qualify( expression: sqlglot.expressions.Expression, dialect: Union[str, sqlglot.dialects.dialect.Dialect, Type[sqlglot.dialects.dialect.Dialect], NoneType] = None, db: Optional[str] = None, catalog: Optional[str] = None, schema: Union[dict, sqlglot.schema.Schema, NoneType] = None, expand_alias_refs: bool = True, expand_stars: bool = True, infer_schema: Optional[bool] = None, isolate_tables: bool = False, qualify_columns: bool = True, allow_partial_qualification: bool = False, validate_qualify_columns: bool = True, quote_identifiers: bool = True, identify: bool = True, infer_csv_schemas: bool = False) -> sqlglot.expressions.Expression:
 20def qualify(
 21    expression: exp.Expression,
 22    dialect: DialectType = None,
 23    db: t.Optional[str] = None,
 24    catalog: t.Optional[str] = None,
 25    schema: t.Optional[dict | Schema] = None,
 26    expand_alias_refs: bool = True,
 27    expand_stars: bool = True,
 28    infer_schema: t.Optional[bool] = None,
 29    isolate_tables: bool = False,
 30    qualify_columns: bool = True,
 31    allow_partial_qualification: bool = False,
 32    validate_qualify_columns: bool = True,
 33    quote_identifiers: bool = True,
 34    identify: bool = True,
 35    infer_csv_schemas: bool = False,
 36) -> exp.Expression:
 37    """
 38    Rewrite sqlglot AST to have normalized and qualified tables and columns.
 39
 40    This step is necessary for all further SQLGlot optimizations.
 41
 42    Example:
 43        >>> import sqlglot
 44        >>> schema = {"tbl": {"col": "INT"}}
 45        >>> expression = sqlglot.parse_one("SELECT col FROM tbl")
 46        >>> qualify(expression, schema=schema).sql()
 47        'SELECT "tbl"."col" AS "col" FROM "tbl" AS "tbl"'
 48
 49    Args:
 50        expression: Expression to qualify.
 51        db: Default database name for tables.
 52        catalog: Default catalog name for tables.
 53        schema: Schema to infer column names and types.
 54        expand_alias_refs: Whether to expand references to aliases.
 55        expand_stars: Whether to expand star queries. This is a necessary step
 56            for most of the optimizer's rules to work; do not set to False unless you
 57            know what you're doing!
 58        infer_schema: Whether to infer the schema if missing.
 59        isolate_tables: Whether to isolate table selects.
 60        qualify_columns: Whether to qualify columns.
 61        allow_partial_qualification: Whether to allow partial qualification.
 62        validate_qualify_columns: Whether to validate columns.
 63        quote_identifiers: Whether to run the quote_identifiers step.
 64            This step is necessary to ensure correctness for case sensitive queries.
 65            But this flag is provided in case this step is performed at a later time.
 66        identify: If True, quote all identifiers, else only necessary ones.
 67        infer_csv_schemas: Whether to scan READ_CSV calls in order to infer the CSVs' schemas.
 68
 69    Returns:
 70        The qualified expression.
 71    """
 72    schema = ensure_schema(schema, dialect=dialect)
 73    expression = normalize_identifiers(expression, dialect=dialect)
 74    expression = qualify_tables(
 75        expression,
 76        db=db,
 77        catalog=catalog,
 78        schema=schema,
 79        dialect=dialect,
 80        infer_csv_schemas=infer_csv_schemas,
 81    )
 82
 83    if isolate_tables:
 84        expression = isolate_table_selects(expression, schema=schema)
 85
 86    if Dialect.get_or_raise(dialect).PREFER_CTE_ALIAS_COLUMN:
 87        expression = pushdown_cte_alias_columns_func(expression)
 88
 89    if qualify_columns:
 90        expression = qualify_columns_func(
 91            expression,
 92            schema,
 93            expand_alias_refs=expand_alias_refs,
 94            expand_stars=expand_stars,
 95            infer_schema=infer_schema,
 96            allow_partial_qualification=allow_partial_qualification,
 97        )
 98
 99    if quote_identifiers:
100        expression = quote_identifiers_func(expression, dialect=dialect, identify=identify)
101
102    if validate_qualify_columns:
103        validate_qualify_columns_func(expression)
104
105    return expression

Rewrite sqlglot AST to have normalized and qualified tables and columns.

This step is necessary for all further SQLGlot optimizations.

Example:
>>> import sqlglot
>>> schema = {"tbl": {"col": "INT"}}
>>> expression = sqlglot.parse_one("SELECT col FROM tbl")
>>> qualify(expression, schema=schema).sql()
'SELECT "tbl"."col" AS "col" FROM "tbl" AS "tbl"'
Arguments:
  • expression: Expression to qualify.
  • db: Default database name for tables.
  • catalog: Default catalog name for tables.
  • schema: Schema to infer column names and types.
  • expand_alias_refs: Whether to expand references to aliases.
  • expand_stars: Whether to expand star queries. This is a necessary step for most of the optimizer's rules to work; do not set to False unless you know what you're doing!
  • infer_schema: Whether to infer the schema if missing.
  • isolate_tables: Whether to isolate table selects.
  • qualify_columns: Whether to qualify columns.
  • allow_partial_qualification: Whether to allow partial qualification.
  • validate_qualify_columns: Whether to validate columns.
  • quote_identifiers: Whether to run the quote_identifiers step. This step is necessary to ensure correctness for case sensitive queries. But this flag is provided in case this step is performed at a later time.
  • identify: If True, quote all identifiers, else only necessary ones.
  • infer_csv_schemas: Whether to scan READ_CSV calls in order to infer the CSVs' schemas.
Returns:

The qualified expression.