Edit on GitHub

sqlglot.optimizer.qualify

  1from __future__ import annotations
  2
  3import typing as t
  4
  5from sqlglot import exp
  6from sqlglot.dialects.dialect import Dialect, DialectType
  7from sqlglot.optimizer.isolate_table_selects import isolate_table_selects
  8from sqlglot.optimizer.normalize_identifiers import normalize_identifiers
  9from sqlglot.optimizer.qualify_columns import (
 10    pushdown_cte_alias_columns as pushdown_cte_alias_columns_func,
 11    qualify_columns as qualify_columns_func,
 12    quote_identifiers as quote_identifiers_func,
 13    validate_qualify_columns as validate_qualify_columns_func,
 14)
 15from sqlglot.optimizer.qualify_tables import qualify_tables
 16from sqlglot.schema import Schema, ensure_schema
 17
 18
 19def qualify(
 20    expression: exp.Expression,
 21    dialect: DialectType = None,
 22    db: t.Optional[str] = None,
 23    catalog: t.Optional[str] = None,
 24    schema: t.Optional[dict | Schema] = None,
 25    expand_alias_refs: bool = True,
 26    expand_stars: bool = True,
 27    infer_schema: t.Optional[bool] = None,
 28    isolate_tables: bool = False,
 29    qualify_columns: bool = True,
 30    validate_qualify_columns: bool = True,
 31    quote_identifiers: bool = True,
 32    identify: bool = True,
 33    infer_csv_schemas: bool = False,
 34) -> exp.Expression:
 35    """
 36    Rewrite sqlglot AST to have normalized and qualified tables and columns.
 37
 38    This step is necessary for all further SQLGlot optimizations.
 39
 40    Example:
 41        >>> import sqlglot
 42        >>> schema = {"tbl": {"col": "INT"}}
 43        >>> expression = sqlglot.parse_one("SELECT col FROM tbl")
 44        >>> qualify(expression, schema=schema).sql()
 45        'SELECT "tbl"."col" AS "col" FROM "tbl" AS "tbl"'
 46
 47    Args:
 48        expression: Expression to qualify.
 49        db: Default database name for tables.
 50        catalog: Default catalog name for tables.
 51        schema: Schema to infer column names and types.
 52        expand_alias_refs: Whether to expand references to aliases.
 53        expand_stars: Whether to expand star queries. This is a necessary step
 54            for most of the optimizer's rules to work; do not set to False unless you
 55            know what you're doing!
 56        infer_schema: Whether to infer the schema if missing.
 57        isolate_tables: Whether to isolate table selects.
 58        qualify_columns: Whether to qualify columns.
 59        validate_qualify_columns: Whether to validate columns.
 60        quote_identifiers: Whether to run the quote_identifiers step.
 61            This step is necessary to ensure correctness for case sensitive queries.
 62            But this flag is provided in case this step is performed at a later time.
 63        identify: If True, quote all identifiers, else only necessary ones.
 64        infer_csv_schemas: Whether to scan READ_CSV calls in order to infer the CSVs' schemas.
 65
 66    Returns:
 67        The qualified expression.
 68    """
 69    schema = ensure_schema(schema, dialect=dialect)
 70    expression = normalize_identifiers(expression, dialect=dialect)
 71    expression = qualify_tables(
 72        expression,
 73        db=db,
 74        catalog=catalog,
 75        schema=schema,
 76        dialect=dialect,
 77        infer_csv_schemas=infer_csv_schemas,
 78    )
 79
 80    if isolate_tables:
 81        expression = isolate_table_selects(expression, schema=schema)
 82
 83    if Dialect.get_or_raise(dialect).PREFER_CTE_ALIAS_COLUMN:
 84        expression = pushdown_cte_alias_columns_func(expression)
 85
 86    if qualify_columns:
 87        expression = qualify_columns_func(
 88            expression,
 89            schema,
 90            expand_alias_refs=expand_alias_refs,
 91            expand_stars=expand_stars,
 92            infer_schema=infer_schema,
 93        )
 94
 95    if quote_identifiers:
 96        expression = quote_identifiers_func(expression, dialect=dialect, identify=identify)
 97
 98    if validate_qualify_columns:
 99        validate_qualify_columns_func(expression)
100
101    return expression
def qualify( expression: sqlglot.expressions.Expression, dialect: Union[str, sqlglot.dialects.dialect.Dialect, Type[sqlglot.dialects.dialect.Dialect], NoneType] = None, db: Optional[str] = None, catalog: Optional[str] = None, schema: Union[dict, sqlglot.schema.Schema, NoneType] = None, expand_alias_refs: bool = True, expand_stars: bool = True, infer_schema: Optional[bool] = None, isolate_tables: bool = False, qualify_columns: bool = True, validate_qualify_columns: bool = True, quote_identifiers: bool = True, identify: bool = True, infer_csv_schemas: bool = False) -> sqlglot.expressions.Expression:
 20def qualify(
 21    expression: exp.Expression,
 22    dialect: DialectType = None,
 23    db: t.Optional[str] = None,
 24    catalog: t.Optional[str] = None,
 25    schema: t.Optional[dict | Schema] = None,
 26    expand_alias_refs: bool = True,
 27    expand_stars: bool = True,
 28    infer_schema: t.Optional[bool] = None,
 29    isolate_tables: bool = False,
 30    qualify_columns: bool = True,
 31    validate_qualify_columns: bool = True,
 32    quote_identifiers: bool = True,
 33    identify: bool = True,
 34    infer_csv_schemas: bool = False,
 35) -> exp.Expression:
 36    """
 37    Rewrite sqlglot AST to have normalized and qualified tables and columns.
 38
 39    This step is necessary for all further SQLGlot optimizations.
 40
 41    Example:
 42        >>> import sqlglot
 43        >>> schema = {"tbl": {"col": "INT"}}
 44        >>> expression = sqlglot.parse_one("SELECT col FROM tbl")
 45        >>> qualify(expression, schema=schema).sql()
 46        'SELECT "tbl"."col" AS "col" FROM "tbl" AS "tbl"'
 47
 48    Args:
 49        expression: Expression to qualify.
 50        db: Default database name for tables.
 51        catalog: Default catalog name for tables.
 52        schema: Schema to infer column names and types.
 53        expand_alias_refs: Whether to expand references to aliases.
 54        expand_stars: Whether to expand star queries. This is a necessary step
 55            for most of the optimizer's rules to work; do not set to False unless you
 56            know what you're doing!
 57        infer_schema: Whether to infer the schema if missing.
 58        isolate_tables: Whether to isolate table selects.
 59        qualify_columns: Whether to qualify columns.
 60        validate_qualify_columns: Whether to validate columns.
 61        quote_identifiers: Whether to run the quote_identifiers step.
 62            This step is necessary to ensure correctness for case sensitive queries.
 63            But this flag is provided in case this step is performed at a later time.
 64        identify: If True, quote all identifiers, else only necessary ones.
 65        infer_csv_schemas: Whether to scan READ_CSV calls in order to infer the CSVs' schemas.
 66
 67    Returns:
 68        The qualified expression.
 69    """
 70    schema = ensure_schema(schema, dialect=dialect)
 71    expression = normalize_identifiers(expression, dialect=dialect)
 72    expression = qualify_tables(
 73        expression,
 74        db=db,
 75        catalog=catalog,
 76        schema=schema,
 77        dialect=dialect,
 78        infer_csv_schemas=infer_csv_schemas,
 79    )
 80
 81    if isolate_tables:
 82        expression = isolate_table_selects(expression, schema=schema)
 83
 84    if Dialect.get_or_raise(dialect).PREFER_CTE_ALIAS_COLUMN:
 85        expression = pushdown_cte_alias_columns_func(expression)
 86
 87    if qualify_columns:
 88        expression = qualify_columns_func(
 89            expression,
 90            schema,
 91            expand_alias_refs=expand_alias_refs,
 92            expand_stars=expand_stars,
 93            infer_schema=infer_schema,
 94        )
 95
 96    if quote_identifiers:
 97        expression = quote_identifiers_func(expression, dialect=dialect, identify=identify)
 98
 99    if validate_qualify_columns:
100        validate_qualify_columns_func(expression)
101
102    return expression

Rewrite sqlglot AST to have normalized and qualified tables and columns.

This step is necessary for all further SQLGlot optimizations.

Example:
>>> import sqlglot
>>> schema = {"tbl": {"col": "INT"}}
>>> expression = sqlglot.parse_one("SELECT col FROM tbl")
>>> qualify(expression, schema=schema).sql()
'SELECT "tbl"."col" AS "col" FROM "tbl" AS "tbl"'
Arguments:
  • expression: Expression to qualify.
  • db: Default database name for tables.
  • catalog: Default catalog name for tables.
  • schema: Schema to infer column names and types.
  • expand_alias_refs: Whether to expand references to aliases.
  • expand_stars: Whether to expand star queries. This is a necessary step for most of the optimizer's rules to work; do not set to False unless you know what you're doing!
  • infer_schema: Whether to infer the schema if missing.
  • isolate_tables: Whether to isolate table selects.
  • qualify_columns: Whether to qualify columns.
  • validate_qualify_columns: Whether to validate columns.
  • quote_identifiers: Whether to run the quote_identifiers step. This step is necessary to ensure correctness for case sensitive queries. But this flag is provided in case this step is performed at a later time.
  • identify: If True, quote all identifiers, else only necessary ones.
  • infer_csv_schemas: Whether to scan READ_CSV calls in order to infer the CSVs' schemas.
Returns:

The qualified expression.