Edit on GitHub

sqlglot.optimizer.scope

  1from __future__ import annotations
  2
  3import itertools
  4import logging
  5import typing as t
  6from collections import defaultdict
  7from enum import Enum, auto
  8
  9from sqlglot import exp
 10from sqlglot.errors import OptimizeError
 11from sqlglot.helper import ensure_collection, find_new_name, seq_get
 12
 13logger = logging.getLogger("sqlglot")
 14
 15TRAVERSABLES = (exp.Query, exp.DDL, exp.DML)
 16
 17
 18class ScopeType(Enum):
 19    ROOT = auto()
 20    SUBQUERY = auto()
 21    DERIVED_TABLE = auto()
 22    CTE = auto()
 23    UNION = auto()
 24    UDTF = auto()
 25
 26
 27class Scope:
 28    """
 29    Selection scope.
 30
 31    Attributes:
 32        expression (exp.Select|exp.SetOperation): Root expression of this scope
 33        sources (dict[str, exp.Table|Scope]): Mapping of source name to either
 34            a Table expression or another Scope instance. For example:
 35                SELECT * FROM x                     {"x": Table(this="x")}
 36                SELECT * FROM x AS y                {"y": Table(this="x")}
 37                SELECT * FROM (SELECT ...) AS y     {"y": Scope(...)}
 38        lateral_sources (dict[str, exp.Table|Scope]): Sources from laterals
 39            For example:
 40                SELECT c FROM x LATERAL VIEW EXPLODE (a) AS c;
 41            The LATERAL VIEW EXPLODE gets x as a source.
 42        cte_sources (dict[str, Scope]): Sources from CTES
 43        outer_columns (list[str]): If this is a derived table or CTE, and the outer query
 44            defines a column list for the alias of this scope, this is that list of columns.
 45            For example:
 46                SELECT * FROM (SELECT ...) AS y(col1, col2)
 47            The inner query would have `["col1", "col2"]` for its `outer_columns`
 48        parent (Scope): Parent scope
 49        scope_type (ScopeType): Type of this scope, relative to it's parent
 50        subquery_scopes (list[Scope]): List of all child scopes for subqueries
 51        cte_scopes (list[Scope]): List of all child scopes for CTEs
 52        derived_table_scopes (list[Scope]): List of all child scopes for derived_tables
 53        udtf_scopes (list[Scope]): List of all child scopes for user defined tabular functions
 54        table_scopes (list[Scope]): derived_table_scopes + udtf_scopes, in the order that they're defined
 55        union_scopes (list[Scope, Scope]): If this Scope is for a Union expression, this will be
 56            a list of the left and right child scopes.
 57    """
 58
 59    def __init__(
 60        self,
 61        expression,
 62        sources=None,
 63        outer_columns=None,
 64        parent=None,
 65        scope_type=ScopeType.ROOT,
 66        lateral_sources=None,
 67        cte_sources=None,
 68    ):
 69        self.expression = expression
 70        self.sources = sources or {}
 71        self.lateral_sources = lateral_sources or {}
 72        self.cte_sources = cte_sources or {}
 73        self.sources.update(self.lateral_sources)
 74        self.sources.update(self.cte_sources)
 75        self.outer_columns = outer_columns or []
 76        self.parent = parent
 77        self.scope_type = scope_type
 78        self.subquery_scopes = []
 79        self.derived_table_scopes = []
 80        self.table_scopes = []
 81        self.cte_scopes = []
 82        self.union_scopes = []
 83        self.udtf_scopes = []
 84        self.clear_cache()
 85
 86    def clear_cache(self):
 87        self._collected = False
 88        self._raw_columns = None
 89        self._stars = None
 90        self._derived_tables = None
 91        self._udtfs = None
 92        self._tables = None
 93        self._ctes = None
 94        self._subqueries = None
 95        self._selected_sources = None
 96        self._columns = None
 97        self._external_columns = None
 98        self._join_hints = None
 99        self._pivots = None
100        self._references = None
101
102    def branch(
103        self, expression, scope_type, sources=None, cte_sources=None, lateral_sources=None, **kwargs
104    ):
105        """Branch from the current scope to a new, inner scope"""
106        return Scope(
107            expression=expression.unnest(),
108            sources=sources.copy() if sources else None,
109            parent=self,
110            scope_type=scope_type,
111            cte_sources={**self.cte_sources, **(cte_sources or {})},
112            lateral_sources=lateral_sources.copy() if lateral_sources else None,
113            **kwargs,
114        )
115
116    def _collect(self):
117        self._tables = []
118        self._ctes = []
119        self._subqueries = []
120        self._derived_tables = []
121        self._udtfs = []
122        self._raw_columns = []
123        self._stars = []
124        self._join_hints = []
125
126        for node in self.walk(bfs=False):
127            if node is self.expression:
128                continue
129
130            if isinstance(node, exp.Dot) and node.is_star:
131                self._stars.append(node)
132            elif isinstance(node, exp.Column):
133                if isinstance(node.this, exp.Star):
134                    self._stars.append(node)
135                else:
136                    self._raw_columns.append(node)
137            elif isinstance(node, exp.Table) and not isinstance(node.parent, exp.JoinHint):
138                self._tables.append(node)
139            elif isinstance(node, exp.JoinHint):
140                self._join_hints.append(node)
141            elif isinstance(node, exp.UDTF):
142                self._udtfs.append(node)
143            elif isinstance(node, exp.CTE):
144                self._ctes.append(node)
145            elif _is_derived_table(node) and isinstance(
146                node.parent, (exp.From, exp.Join, exp.Subquery)
147            ):
148                self._derived_tables.append(node)
149            elif isinstance(node, exp.UNWRAPPED_QUERIES):
150                self._subqueries.append(node)
151
152        self._collected = True
153
154    def _ensure_collected(self):
155        if not self._collected:
156            self._collect()
157
158    def walk(self, bfs=True, prune=None):
159        return walk_in_scope(self.expression, bfs=bfs, prune=None)
160
161    def find(self, *expression_types, bfs=True):
162        return find_in_scope(self.expression, expression_types, bfs=bfs)
163
164    def find_all(self, *expression_types, bfs=True):
165        return find_all_in_scope(self.expression, expression_types, bfs=bfs)
166
167    def replace(self, old, new):
168        """
169        Replace `old` with `new`.
170
171        This can be used instead of `exp.Expression.replace` to ensure the `Scope` is kept up-to-date.
172
173        Args:
174            old (exp.Expression): old node
175            new (exp.Expression): new node
176        """
177        old.replace(new)
178        self.clear_cache()
179
180    @property
181    def tables(self):
182        """
183        List of tables in this scope.
184
185        Returns:
186            list[exp.Table]: tables
187        """
188        self._ensure_collected()
189        return self._tables
190
191    @property
192    def ctes(self):
193        """
194        List of CTEs in this scope.
195
196        Returns:
197            list[exp.CTE]: ctes
198        """
199        self._ensure_collected()
200        return self._ctes
201
202    @property
203    def derived_tables(self):
204        """
205        List of derived tables in this scope.
206
207        For example:
208            SELECT * FROM (SELECT ...) <- that's a derived table
209
210        Returns:
211            list[exp.Subquery]: derived tables
212        """
213        self._ensure_collected()
214        return self._derived_tables
215
216    @property
217    def udtfs(self):
218        """
219        List of "User Defined Tabular Functions" in this scope.
220
221        Returns:
222            list[exp.UDTF]: UDTFs
223        """
224        self._ensure_collected()
225        return self._udtfs
226
227    @property
228    def subqueries(self):
229        """
230        List of subqueries in this scope.
231
232        For example:
233            SELECT * FROM x WHERE a IN (SELECT ...) <- that's a subquery
234
235        Returns:
236            list[exp.Select | exp.SetOperation]: subqueries
237        """
238        self._ensure_collected()
239        return self._subqueries
240
241    @property
242    def stars(self) -> t.List[exp.Column | exp.Dot]:
243        """
244        List of star expressions (columns or dots) in this scope.
245        """
246        self._ensure_collected()
247        return self._stars
248
249    @property
250    def columns(self):
251        """
252        List of columns in this scope.
253
254        Returns:
255            list[exp.Column]: Column instances in this scope, plus any
256                Columns that reference this scope from correlated subqueries.
257        """
258        if self._columns is None:
259            self._ensure_collected()
260            columns = self._raw_columns
261
262            external_columns = [
263                column
264                for scope in itertools.chain(self.subquery_scopes, self.udtf_scopes)
265                for column in scope.external_columns
266            ]
267
268            named_selects = set(self.expression.named_selects)
269
270            self._columns = []
271            for column in columns + external_columns:
272                ancestor = column.find_ancestor(
273                    exp.Select, exp.Qualify, exp.Order, exp.Having, exp.Hint, exp.Table, exp.Star
274                )
275                if (
276                    not ancestor
277                    or column.table
278                    or isinstance(ancestor, exp.Select)
279                    or (isinstance(ancestor, exp.Table) and not isinstance(ancestor.this, exp.Func))
280                    or (
281                        isinstance(ancestor, exp.Order)
282                        and (
283                            isinstance(ancestor.parent, exp.Window)
284                            or column.name not in named_selects
285                        )
286                    )
287                ):
288                    self._columns.append(column)
289
290        return self._columns
291
292    @property
293    def selected_sources(self):
294        """
295        Mapping of nodes and sources that are actually selected from in this scope.
296
297        That is, all tables in a schema are selectable at any point. But a
298        table only becomes a selected source if it's included in a FROM or JOIN clause.
299
300        Returns:
301            dict[str, (exp.Table|exp.Select, exp.Table|Scope)]: selected sources and nodes
302        """
303        if self._selected_sources is None:
304            result = {}
305
306            for name, node in self.references:
307                if name in result:
308                    raise OptimizeError(f"Alias already used: {name}")
309                if name in self.sources:
310                    result[name] = (node, self.sources[name])
311
312            self._selected_sources = result
313        return self._selected_sources
314
315    @property
316    def references(self) -> t.List[t.Tuple[str, exp.Expression]]:
317        if self._references is None:
318            self._references = []
319
320            for table in self.tables:
321                self._references.append((table.alias_or_name, table))
322            for expression in itertools.chain(self.derived_tables, self.udtfs):
323                self._references.append(
324                    (
325                        expression.alias,
326                        expression if expression.args.get("pivots") else expression.unnest(),
327                    )
328                )
329
330        return self._references
331
332    @property
333    def external_columns(self):
334        """
335        Columns that appear to reference sources in outer scopes.
336
337        Returns:
338            list[exp.Column]: Column instances that don't reference
339                sources in the current scope.
340        """
341        if self._external_columns is None:
342            if isinstance(self.expression, exp.SetOperation):
343                left, right = self.union_scopes
344                self._external_columns = left.external_columns + right.external_columns
345            else:
346                self._external_columns = [
347                    c for c in self.columns if c.table not in self.selected_sources
348                ]
349
350        return self._external_columns
351
352    @property
353    def unqualified_columns(self):
354        """
355        Unqualified columns in the current scope.
356
357        Returns:
358             list[exp.Column]: Unqualified columns
359        """
360        return [c for c in self.columns if not c.table]
361
362    @property
363    def join_hints(self):
364        """
365        Hints that exist in the scope that reference tables
366
367        Returns:
368            list[exp.JoinHint]: Join hints that are referenced within the scope
369        """
370        if self._join_hints is None:
371            return []
372        return self._join_hints
373
374    @property
375    def pivots(self):
376        if not self._pivots:
377            self._pivots = [
378                pivot for _, node in self.references for pivot in node.args.get("pivots") or []
379            ]
380
381        return self._pivots
382
383    def source_columns(self, source_name):
384        """
385        Get all columns in the current scope for a particular source.
386
387        Args:
388            source_name (str): Name of the source
389        Returns:
390            list[exp.Column]: Column instances that reference `source_name`
391        """
392        return [column for column in self.columns if column.table == source_name]
393
394    @property
395    def is_subquery(self):
396        """Determine if this scope is a subquery"""
397        return self.scope_type == ScopeType.SUBQUERY
398
399    @property
400    def is_derived_table(self):
401        """Determine if this scope is a derived table"""
402        return self.scope_type == ScopeType.DERIVED_TABLE
403
404    @property
405    def is_union(self):
406        """Determine if this scope is a union"""
407        return self.scope_type == ScopeType.UNION
408
409    @property
410    def is_cte(self):
411        """Determine if this scope is a common table expression"""
412        return self.scope_type == ScopeType.CTE
413
414    @property
415    def is_root(self):
416        """Determine if this is the root scope"""
417        return self.scope_type == ScopeType.ROOT
418
419    @property
420    def is_udtf(self):
421        """Determine if this scope is a UDTF (User Defined Table Function)"""
422        return self.scope_type == ScopeType.UDTF
423
424    @property
425    def is_correlated_subquery(self):
426        """Determine if this scope is a correlated subquery"""
427        return bool(
428            (self.is_subquery or (self.parent and isinstance(self.parent.expression, exp.Lateral)))
429            and self.external_columns
430        )
431
432    def rename_source(self, old_name, new_name):
433        """Rename a source in this scope"""
434        columns = self.sources.pop(old_name or "", [])
435        self.sources[new_name] = columns
436
437    def add_source(self, name, source):
438        """Add a source to this scope"""
439        self.sources[name] = source
440        self.clear_cache()
441
442    def remove_source(self, name):
443        """Remove a source from this scope"""
444        self.sources.pop(name, None)
445        self.clear_cache()
446
447    def __repr__(self):
448        return f"Scope<{self.expression.sql()}>"
449
450    def traverse(self):
451        """
452        Traverse the scope tree from this node.
453
454        Yields:
455            Scope: scope instances in depth-first-search post-order
456        """
457        stack = [self]
458        result = []
459        while stack:
460            scope = stack.pop()
461            result.append(scope)
462            stack.extend(
463                itertools.chain(
464                    scope.cte_scopes,
465                    scope.union_scopes,
466                    scope.table_scopes,
467                    scope.subquery_scopes,
468                )
469            )
470
471        yield from reversed(result)
472
473    def ref_count(self):
474        """
475        Count the number of times each scope in this tree is referenced.
476
477        Returns:
478            dict[int, int]: Mapping of Scope instance ID to reference count
479        """
480        scope_ref_count = defaultdict(lambda: 0)
481
482        for scope in self.traverse():
483            for _, source in scope.selected_sources.values():
484                scope_ref_count[id(source)] += 1
485
486        return scope_ref_count
487
488
489def traverse_scope(expression: exp.Expression) -> t.List[Scope]:
490    """
491    Traverse an expression by its "scopes".
492
493    "Scope" represents the current context of a Select statement.
494
495    This is helpful for optimizing queries, where we need more information than
496    the expression tree itself. For example, we might care about the source
497    names within a subquery. Returns a list because a generator could result in
498    incomplete properties which is confusing.
499
500    Examples:
501        >>> import sqlglot
502        >>> expression = sqlglot.parse_one("SELECT a FROM (SELECT a FROM x) AS y")
503        >>> scopes = traverse_scope(expression)
504        >>> scopes[0].expression.sql(), list(scopes[0].sources)
505        ('SELECT a FROM x', ['x'])
506        >>> scopes[1].expression.sql(), list(scopes[1].sources)
507        ('SELECT a FROM (SELECT a FROM x) AS y', ['y'])
508
509    Args:
510        expression: Expression to traverse
511
512    Returns:
513        A list of the created scope instances
514    """
515    if isinstance(expression, TRAVERSABLES):
516        return list(_traverse_scope(Scope(expression)))
517    return []
518
519
520def build_scope(expression: exp.Expression) -> t.Optional[Scope]:
521    """
522    Build a scope tree.
523
524    Args:
525        expression: Expression to build the scope tree for.
526
527    Returns:
528        The root scope
529    """
530    return seq_get(traverse_scope(expression), -1)
531
532
533def _traverse_scope(scope):
534    expression = scope.expression
535
536    if isinstance(expression, exp.Select):
537        yield from _traverse_select(scope)
538    elif isinstance(expression, exp.SetOperation):
539        yield from _traverse_ctes(scope)
540        yield from _traverse_union(scope)
541        return
542    elif isinstance(expression, exp.Subquery):
543        if scope.is_root:
544            yield from _traverse_select(scope)
545        else:
546            yield from _traverse_subqueries(scope)
547    elif isinstance(expression, exp.Table):
548        yield from _traverse_tables(scope)
549    elif isinstance(expression, exp.UDTF):
550        yield from _traverse_udtfs(scope)
551    elif isinstance(expression, exp.DDL):
552        if isinstance(expression.expression, exp.Query):
553            yield from _traverse_ctes(scope)
554            yield from _traverse_scope(Scope(expression.expression, cte_sources=scope.cte_sources))
555        return
556    elif isinstance(expression, exp.DML):
557        yield from _traverse_ctes(scope)
558        for query in find_all_in_scope(expression, exp.Query):
559            # This check ensures we don't yield the CTE queries twice
560            if not isinstance(query.parent, exp.CTE):
561                yield from _traverse_scope(Scope(query, cte_sources=scope.cte_sources))
562        return
563    else:
564        logger.warning("Cannot traverse scope %s with type '%s'", expression, type(expression))
565        return
566
567    yield scope
568
569
570def _traverse_select(scope):
571    yield from _traverse_ctes(scope)
572    yield from _traverse_tables(scope)
573    yield from _traverse_subqueries(scope)
574
575
576def _traverse_union(scope):
577    prev_scope = None
578    union_scope_stack = [scope]
579    expression_stack = [scope.expression.right, scope.expression.left]
580
581    while expression_stack:
582        expression = expression_stack.pop()
583        union_scope = union_scope_stack[-1]
584
585        new_scope = union_scope.branch(
586            expression,
587            outer_columns=union_scope.outer_columns,
588            scope_type=ScopeType.UNION,
589        )
590
591        if isinstance(expression, exp.SetOperation):
592            yield from _traverse_ctes(new_scope)
593
594            union_scope_stack.append(new_scope)
595            expression_stack.extend([expression.right, expression.left])
596            continue
597
598        for scope in _traverse_scope(new_scope):
599            yield scope
600
601        if prev_scope:
602            union_scope_stack.pop()
603            union_scope.union_scopes = [prev_scope, scope]
604            prev_scope = union_scope
605
606            yield union_scope
607        else:
608            prev_scope = scope
609
610
611def _traverse_ctes(scope):
612    sources = {}
613
614    for cte in scope.ctes:
615        cte_name = cte.alias
616
617        # if the scope is a recursive cte, it must be in the form of base_case UNION recursive.
618        # thus the recursive scope is the first section of the union.
619        with_ = scope.expression.args.get("with")
620        if with_ and with_.recursive:
621            union = cte.this
622
623            if isinstance(union, exp.SetOperation):
624                sources[cte_name] = scope.branch(union.this, scope_type=ScopeType.CTE)
625
626        child_scope = None
627
628        for child_scope in _traverse_scope(
629            scope.branch(
630                cte.this,
631                cte_sources=sources,
632                outer_columns=cte.alias_column_names,
633                scope_type=ScopeType.CTE,
634            )
635        ):
636            yield child_scope
637
638        # append the final child_scope yielded
639        if child_scope:
640            sources[cte_name] = child_scope
641            scope.cte_scopes.append(child_scope)
642
643    scope.sources.update(sources)
644    scope.cte_sources.update(sources)
645
646
647def _is_derived_table(expression: exp.Subquery) -> bool:
648    """
649    We represent (tbl1 JOIN tbl2) as a Subquery, but it's not really a "derived table",
650    as it doesn't introduce a new scope. If an alias is present, it shadows all names
651    under the Subquery, so that's one exception to this rule.
652    """
653    return isinstance(expression, exp.Subquery) and bool(
654        expression.alias or isinstance(expression.this, exp.UNWRAPPED_QUERIES)
655    )
656
657
658def _traverse_tables(scope):
659    sources = {}
660
661    # Traverse FROMs, JOINs, and LATERALs in the order they are defined
662    expressions = []
663    from_ = scope.expression.args.get("from")
664    if from_:
665        expressions.append(from_.this)
666
667    for join in scope.expression.args.get("joins") or []:
668        expressions.append(join.this)
669
670    if isinstance(scope.expression, exp.Table):
671        expressions.append(scope.expression)
672
673    expressions.extend(scope.expression.args.get("laterals") or [])
674
675    for expression in expressions:
676        if isinstance(expression, exp.Table):
677            table_name = expression.name
678            source_name = expression.alias_or_name
679
680            if table_name in scope.sources and not expression.db:
681                # This is a reference to a parent source (e.g. a CTE), not an actual table, unless
682                # it is pivoted, because then we get back a new table and hence a new source.
683                pivots = expression.args.get("pivots")
684                if pivots:
685                    sources[pivots[0].alias] = expression
686                else:
687                    sources[source_name] = scope.sources[table_name]
688            elif source_name in sources:
689                sources[find_new_name(sources, table_name)] = expression
690            else:
691                sources[source_name] = expression
692
693            # Make sure to not include the joins twice
694            if expression is not scope.expression:
695                expressions.extend(join.this for join in expression.args.get("joins") or [])
696
697            continue
698
699        if not isinstance(expression, exp.DerivedTable):
700            continue
701
702        if isinstance(expression, exp.UDTF):
703            lateral_sources = sources
704            scope_type = ScopeType.UDTF
705            scopes = scope.udtf_scopes
706        elif _is_derived_table(expression):
707            lateral_sources = None
708            scope_type = ScopeType.DERIVED_TABLE
709            scopes = scope.derived_table_scopes
710            expressions.extend(join.this for join in expression.args.get("joins") or [])
711        else:
712            # Makes sure we check for possible sources in nested table constructs
713            expressions.append(expression.this)
714            expressions.extend(join.this for join in expression.args.get("joins") or [])
715            continue
716
717        for child_scope in _traverse_scope(
718            scope.branch(
719                expression,
720                lateral_sources=lateral_sources,
721                outer_columns=expression.alias_column_names,
722                scope_type=scope_type,
723            )
724        ):
725            yield child_scope
726
727            # Tables without aliases will be set as ""
728            # This shouldn't be a problem once qualify_columns runs, as it adds aliases on everything.
729            # Until then, this means that only a single, unaliased derived table is allowed (rather,
730            # the latest one wins.
731            sources[expression.alias] = child_scope
732
733        # append the final child_scope yielded
734        scopes.append(child_scope)
735        scope.table_scopes.append(child_scope)
736
737    scope.sources.update(sources)
738
739
740def _traverse_subqueries(scope):
741    for subquery in scope.subqueries:
742        top = None
743        for child_scope in _traverse_scope(scope.branch(subquery, scope_type=ScopeType.SUBQUERY)):
744            yield child_scope
745            top = child_scope
746        scope.subquery_scopes.append(top)
747
748
749def _traverse_udtfs(scope):
750    if isinstance(scope.expression, exp.Unnest):
751        expressions = scope.expression.expressions
752    elif isinstance(scope.expression, exp.Lateral):
753        expressions = [scope.expression.this]
754    else:
755        expressions = []
756
757    sources = {}
758    for expression in expressions:
759        if _is_derived_table(expression):
760            top = None
761            for child_scope in _traverse_scope(
762                scope.branch(
763                    expression,
764                    scope_type=ScopeType.SUBQUERY,
765                    outer_columns=expression.alias_column_names,
766                )
767            ):
768                yield child_scope
769                top = child_scope
770                sources[expression.alias] = child_scope
771
772            scope.subquery_scopes.append(top)
773
774    scope.sources.update(sources)
775
776
777def walk_in_scope(expression, bfs=True, prune=None):
778    """
779    Returns a generator object which visits all nodes in the syntrax tree, stopping at
780    nodes that start child scopes.
781
782    Args:
783        expression (exp.Expression):
784        bfs (bool): if set to True the BFS traversal order will be applied,
785            otherwise the DFS traversal will be used instead.
786        prune ((node, parent, arg_key) -> bool): callable that returns True if
787            the generator should stop traversing this branch of the tree.
788
789    Yields:
790        tuple[exp.Expression, Optional[exp.Expression], str]: node, parent, arg key
791    """
792    # We'll use this variable to pass state into the dfs generator.
793    # Whenever we set it to True, we exclude a subtree from traversal.
794    crossed_scope_boundary = False
795
796    for node in expression.walk(
797        bfs=bfs, prune=lambda n: crossed_scope_boundary or (prune and prune(n))
798    ):
799        crossed_scope_boundary = False
800
801        yield node
802
803        if node is expression:
804            continue
805        if (
806            isinstance(node, exp.CTE)
807            or (
808                isinstance(node.parent, (exp.From, exp.Join, exp.Subquery))
809                and (_is_derived_table(node) or isinstance(node, exp.UDTF))
810            )
811            or isinstance(node, exp.UNWRAPPED_QUERIES)
812        ):
813            crossed_scope_boundary = True
814
815            if isinstance(node, (exp.Subquery, exp.UDTF)):
816                # The following args are not actually in the inner scope, so we should visit them
817                for key in ("joins", "laterals", "pivots"):
818                    for arg in node.args.get(key) or []:
819                        yield from walk_in_scope(arg, bfs=bfs)
820
821
822def find_all_in_scope(expression, expression_types, bfs=True):
823    """
824    Returns a generator object which visits all nodes in this scope and only yields those that
825    match at least one of the specified expression types.
826
827    This does NOT traverse into subscopes.
828
829    Args:
830        expression (exp.Expression):
831        expression_types (tuple[type]|type): the expression type(s) to match.
832        bfs (bool): True to use breadth-first search, False to use depth-first.
833
834    Yields:
835        exp.Expression: nodes
836    """
837    for expression in walk_in_scope(expression, bfs=bfs):
838        if isinstance(expression, tuple(ensure_collection(expression_types))):
839            yield expression
840
841
842def find_in_scope(expression, expression_types, bfs=True):
843    """
844    Returns the first node in this scope which matches at least one of the specified types.
845
846    This does NOT traverse into subscopes.
847
848    Args:
849        expression (exp.Expression):
850        expression_types (tuple[type]|type): the expression type(s) to match.
851        bfs (bool): True to use breadth-first search, False to use depth-first.
852
853    Returns:
854        exp.Expression: the node which matches the criteria or None if no node matching
855        the criteria was found.
856    """
857    return next(find_all_in_scope(expression, expression_types, bfs=bfs), None)
logger = <Logger sqlglot (WARNING)>
TRAVERSABLES = (<class 'sqlglot.expressions.Query'>, <class 'sqlglot.expressions.DDL'>, <class 'sqlglot.expressions.DML'>)
class ScopeType(enum.Enum):
19class ScopeType(Enum):
20    ROOT = auto()
21    SUBQUERY = auto()
22    DERIVED_TABLE = auto()
23    CTE = auto()
24    UNION = auto()
25    UDTF = auto()

An enumeration.

ROOT = <ScopeType.ROOT: 1>
SUBQUERY = <ScopeType.SUBQUERY: 2>
DERIVED_TABLE = <ScopeType.DERIVED_TABLE: 3>
CTE = <ScopeType.CTE: 4>
UNION = <ScopeType.UNION: 5>
UDTF = <ScopeType.UDTF: 6>
Inherited Members
enum.Enum
name
value
class Scope:
 28class Scope:
 29    """
 30    Selection scope.
 31
 32    Attributes:
 33        expression (exp.Select|exp.SetOperation): Root expression of this scope
 34        sources (dict[str, exp.Table|Scope]): Mapping of source name to either
 35            a Table expression or another Scope instance. For example:
 36                SELECT * FROM x                     {"x": Table(this="x")}
 37                SELECT * FROM x AS y                {"y": Table(this="x")}
 38                SELECT * FROM (SELECT ...) AS y     {"y": Scope(...)}
 39        lateral_sources (dict[str, exp.Table|Scope]): Sources from laterals
 40            For example:
 41                SELECT c FROM x LATERAL VIEW EXPLODE (a) AS c;
 42            The LATERAL VIEW EXPLODE gets x as a source.
 43        cte_sources (dict[str, Scope]): Sources from CTES
 44        outer_columns (list[str]): If this is a derived table or CTE, and the outer query
 45            defines a column list for the alias of this scope, this is that list of columns.
 46            For example:
 47                SELECT * FROM (SELECT ...) AS y(col1, col2)
 48            The inner query would have `["col1", "col2"]` for its `outer_columns`
 49        parent (Scope): Parent scope
 50        scope_type (ScopeType): Type of this scope, relative to it's parent
 51        subquery_scopes (list[Scope]): List of all child scopes for subqueries
 52        cte_scopes (list[Scope]): List of all child scopes for CTEs
 53        derived_table_scopes (list[Scope]): List of all child scopes for derived_tables
 54        udtf_scopes (list[Scope]): List of all child scopes for user defined tabular functions
 55        table_scopes (list[Scope]): derived_table_scopes + udtf_scopes, in the order that they're defined
 56        union_scopes (list[Scope, Scope]): If this Scope is for a Union expression, this will be
 57            a list of the left and right child scopes.
 58    """
 59
 60    def __init__(
 61        self,
 62        expression,
 63        sources=None,
 64        outer_columns=None,
 65        parent=None,
 66        scope_type=ScopeType.ROOT,
 67        lateral_sources=None,
 68        cte_sources=None,
 69    ):
 70        self.expression = expression
 71        self.sources = sources or {}
 72        self.lateral_sources = lateral_sources or {}
 73        self.cte_sources = cte_sources or {}
 74        self.sources.update(self.lateral_sources)
 75        self.sources.update(self.cte_sources)
 76        self.outer_columns = outer_columns or []
 77        self.parent = parent
 78        self.scope_type = scope_type
 79        self.subquery_scopes = []
 80        self.derived_table_scopes = []
 81        self.table_scopes = []
 82        self.cte_scopes = []
 83        self.union_scopes = []
 84        self.udtf_scopes = []
 85        self.clear_cache()
 86
 87    def clear_cache(self):
 88        self._collected = False
 89        self._raw_columns = None
 90        self._stars = None
 91        self._derived_tables = None
 92        self._udtfs = None
 93        self._tables = None
 94        self._ctes = None
 95        self._subqueries = None
 96        self._selected_sources = None
 97        self._columns = None
 98        self._external_columns = None
 99        self._join_hints = None
100        self._pivots = None
101        self._references = None
102
103    def branch(
104        self, expression, scope_type, sources=None, cte_sources=None, lateral_sources=None, **kwargs
105    ):
106        """Branch from the current scope to a new, inner scope"""
107        return Scope(
108            expression=expression.unnest(),
109            sources=sources.copy() if sources else None,
110            parent=self,
111            scope_type=scope_type,
112            cte_sources={**self.cte_sources, **(cte_sources or {})},
113            lateral_sources=lateral_sources.copy() if lateral_sources else None,
114            **kwargs,
115        )
116
117    def _collect(self):
118        self._tables = []
119        self._ctes = []
120        self._subqueries = []
121        self._derived_tables = []
122        self._udtfs = []
123        self._raw_columns = []
124        self._stars = []
125        self._join_hints = []
126
127        for node in self.walk(bfs=False):
128            if node is self.expression:
129                continue
130
131            if isinstance(node, exp.Dot) and node.is_star:
132                self._stars.append(node)
133            elif isinstance(node, exp.Column):
134                if isinstance(node.this, exp.Star):
135                    self._stars.append(node)
136                else:
137                    self._raw_columns.append(node)
138            elif isinstance(node, exp.Table) and not isinstance(node.parent, exp.JoinHint):
139                self._tables.append(node)
140            elif isinstance(node, exp.JoinHint):
141                self._join_hints.append(node)
142            elif isinstance(node, exp.UDTF):
143                self._udtfs.append(node)
144            elif isinstance(node, exp.CTE):
145                self._ctes.append(node)
146            elif _is_derived_table(node) and isinstance(
147                node.parent, (exp.From, exp.Join, exp.Subquery)
148            ):
149                self._derived_tables.append(node)
150            elif isinstance(node, exp.UNWRAPPED_QUERIES):
151                self._subqueries.append(node)
152
153        self._collected = True
154
155    def _ensure_collected(self):
156        if not self._collected:
157            self._collect()
158
159    def walk(self, bfs=True, prune=None):
160        return walk_in_scope(self.expression, bfs=bfs, prune=None)
161
162    def find(self, *expression_types, bfs=True):
163        return find_in_scope(self.expression, expression_types, bfs=bfs)
164
165    def find_all(self, *expression_types, bfs=True):
166        return find_all_in_scope(self.expression, expression_types, bfs=bfs)
167
168    def replace(self, old, new):
169        """
170        Replace `old` with `new`.
171
172        This can be used instead of `exp.Expression.replace` to ensure the `Scope` is kept up-to-date.
173
174        Args:
175            old (exp.Expression): old node
176            new (exp.Expression): new node
177        """
178        old.replace(new)
179        self.clear_cache()
180
181    @property
182    def tables(self):
183        """
184        List of tables in this scope.
185
186        Returns:
187            list[exp.Table]: tables
188        """
189        self._ensure_collected()
190        return self._tables
191
192    @property
193    def ctes(self):
194        """
195        List of CTEs in this scope.
196
197        Returns:
198            list[exp.CTE]: ctes
199        """
200        self._ensure_collected()
201        return self._ctes
202
203    @property
204    def derived_tables(self):
205        """
206        List of derived tables in this scope.
207
208        For example:
209            SELECT * FROM (SELECT ...) <- that's a derived table
210
211        Returns:
212            list[exp.Subquery]: derived tables
213        """
214        self._ensure_collected()
215        return self._derived_tables
216
217    @property
218    def udtfs(self):
219        """
220        List of "User Defined Tabular Functions" in this scope.
221
222        Returns:
223            list[exp.UDTF]: UDTFs
224        """
225        self._ensure_collected()
226        return self._udtfs
227
228    @property
229    def subqueries(self):
230        """
231        List of subqueries in this scope.
232
233        For example:
234            SELECT * FROM x WHERE a IN (SELECT ...) <- that's a subquery
235
236        Returns:
237            list[exp.Select | exp.SetOperation]: subqueries
238        """
239        self._ensure_collected()
240        return self._subqueries
241
242    @property
243    def stars(self) -> t.List[exp.Column | exp.Dot]:
244        """
245        List of star expressions (columns or dots) in this scope.
246        """
247        self._ensure_collected()
248        return self._stars
249
250    @property
251    def columns(self):
252        """
253        List of columns in this scope.
254
255        Returns:
256            list[exp.Column]: Column instances in this scope, plus any
257                Columns that reference this scope from correlated subqueries.
258        """
259        if self._columns is None:
260            self._ensure_collected()
261            columns = self._raw_columns
262
263            external_columns = [
264                column
265                for scope in itertools.chain(self.subquery_scopes, self.udtf_scopes)
266                for column in scope.external_columns
267            ]
268
269            named_selects = set(self.expression.named_selects)
270
271            self._columns = []
272            for column in columns + external_columns:
273                ancestor = column.find_ancestor(
274                    exp.Select, exp.Qualify, exp.Order, exp.Having, exp.Hint, exp.Table, exp.Star
275                )
276                if (
277                    not ancestor
278                    or column.table
279                    or isinstance(ancestor, exp.Select)
280                    or (isinstance(ancestor, exp.Table) and not isinstance(ancestor.this, exp.Func))
281                    or (
282                        isinstance(ancestor, exp.Order)
283                        and (
284                            isinstance(ancestor.parent, exp.Window)
285                            or column.name not in named_selects
286                        )
287                    )
288                ):
289                    self._columns.append(column)
290
291        return self._columns
292
293    @property
294    def selected_sources(self):
295        """
296        Mapping of nodes and sources that are actually selected from in this scope.
297
298        That is, all tables in a schema are selectable at any point. But a
299        table only becomes a selected source if it's included in a FROM or JOIN clause.
300
301        Returns:
302            dict[str, (exp.Table|exp.Select, exp.Table|Scope)]: selected sources and nodes
303        """
304        if self._selected_sources is None:
305            result = {}
306
307            for name, node in self.references:
308                if name in result:
309                    raise OptimizeError(f"Alias already used: {name}")
310                if name in self.sources:
311                    result[name] = (node, self.sources[name])
312
313            self._selected_sources = result
314        return self._selected_sources
315
316    @property
317    def references(self) -> t.List[t.Tuple[str, exp.Expression]]:
318        if self._references is None:
319            self._references = []
320
321            for table in self.tables:
322                self._references.append((table.alias_or_name, table))
323            for expression in itertools.chain(self.derived_tables, self.udtfs):
324                self._references.append(
325                    (
326                        expression.alias,
327                        expression if expression.args.get("pivots") else expression.unnest(),
328                    )
329                )
330
331        return self._references
332
333    @property
334    def external_columns(self):
335        """
336        Columns that appear to reference sources in outer scopes.
337
338        Returns:
339            list[exp.Column]: Column instances that don't reference
340                sources in the current scope.
341        """
342        if self._external_columns is None:
343            if isinstance(self.expression, exp.SetOperation):
344                left, right = self.union_scopes
345                self._external_columns = left.external_columns + right.external_columns
346            else:
347                self._external_columns = [
348                    c for c in self.columns if c.table not in self.selected_sources
349                ]
350
351        return self._external_columns
352
353    @property
354    def unqualified_columns(self):
355        """
356        Unqualified columns in the current scope.
357
358        Returns:
359             list[exp.Column]: Unqualified columns
360        """
361        return [c for c in self.columns if not c.table]
362
363    @property
364    def join_hints(self):
365        """
366        Hints that exist in the scope that reference tables
367
368        Returns:
369            list[exp.JoinHint]: Join hints that are referenced within the scope
370        """
371        if self._join_hints is None:
372            return []
373        return self._join_hints
374
375    @property
376    def pivots(self):
377        if not self._pivots:
378            self._pivots = [
379                pivot for _, node in self.references for pivot in node.args.get("pivots") or []
380            ]
381
382        return self._pivots
383
384    def source_columns(self, source_name):
385        """
386        Get all columns in the current scope for a particular source.
387
388        Args:
389            source_name (str): Name of the source
390        Returns:
391            list[exp.Column]: Column instances that reference `source_name`
392        """
393        return [column for column in self.columns if column.table == source_name]
394
395    @property
396    def is_subquery(self):
397        """Determine if this scope is a subquery"""
398        return self.scope_type == ScopeType.SUBQUERY
399
400    @property
401    def is_derived_table(self):
402        """Determine if this scope is a derived table"""
403        return self.scope_type == ScopeType.DERIVED_TABLE
404
405    @property
406    def is_union(self):
407        """Determine if this scope is a union"""
408        return self.scope_type == ScopeType.UNION
409
410    @property
411    def is_cte(self):
412        """Determine if this scope is a common table expression"""
413        return self.scope_type == ScopeType.CTE
414
415    @property
416    def is_root(self):
417        """Determine if this is the root scope"""
418        return self.scope_type == ScopeType.ROOT
419
420    @property
421    def is_udtf(self):
422        """Determine if this scope is a UDTF (User Defined Table Function)"""
423        return self.scope_type == ScopeType.UDTF
424
425    @property
426    def is_correlated_subquery(self):
427        """Determine if this scope is a correlated subquery"""
428        return bool(
429            (self.is_subquery or (self.parent and isinstance(self.parent.expression, exp.Lateral)))
430            and self.external_columns
431        )
432
433    def rename_source(self, old_name, new_name):
434        """Rename a source in this scope"""
435        columns = self.sources.pop(old_name or "", [])
436        self.sources[new_name] = columns
437
438    def add_source(self, name, source):
439        """Add a source to this scope"""
440        self.sources[name] = source
441        self.clear_cache()
442
443    def remove_source(self, name):
444        """Remove a source from this scope"""
445        self.sources.pop(name, None)
446        self.clear_cache()
447
448    def __repr__(self):
449        return f"Scope<{self.expression.sql()}>"
450
451    def traverse(self):
452        """
453        Traverse the scope tree from this node.
454
455        Yields:
456            Scope: scope instances in depth-first-search post-order
457        """
458        stack = [self]
459        result = []
460        while stack:
461            scope = stack.pop()
462            result.append(scope)
463            stack.extend(
464                itertools.chain(
465                    scope.cte_scopes,
466                    scope.union_scopes,
467                    scope.table_scopes,
468                    scope.subquery_scopes,
469                )
470            )
471
472        yield from reversed(result)
473
474    def ref_count(self):
475        """
476        Count the number of times each scope in this tree is referenced.
477
478        Returns:
479            dict[int, int]: Mapping of Scope instance ID to reference count
480        """
481        scope_ref_count = defaultdict(lambda: 0)
482
483        for scope in self.traverse():
484            for _, source in scope.selected_sources.values():
485                scope_ref_count[id(source)] += 1
486
487        return scope_ref_count

Selection scope.

Attributes:
  • expression (exp.Select|exp.SetOperation): Root expression of this scope
  • sources (dict[str, exp.Table|Scope]): Mapping of source name to either a Table expression or another Scope instance. For example: SELECT * FROM x {"x": Table(this="x")} SELECT * FROM x AS y {"y": Table(this="x")} SELECT * FROM (SELECT ...) AS y {"y": Scope(...)}
  • lateral_sources (dict[str, exp.Table|Scope]): Sources from laterals For example: SELECT c FROM x LATERAL VIEW EXPLODE (a) AS c; The LATERAL VIEW EXPLODE gets x as a source.
  • cte_sources (dict[str, Scope]): Sources from CTES
  • outer_columns (list[str]): If this is a derived table or CTE, and the outer query defines a column list for the alias of this scope, this is that list of columns. For example: SELECT * FROM (SELECT ...) AS y(col1, col2) The inner query would have ["col1", "col2"] for its outer_columns
  • parent (Scope): Parent scope
  • scope_type (ScopeType): Type of this scope, relative to it's parent
  • subquery_scopes (list[Scope]): List of all child scopes for subqueries
  • cte_scopes (list[Scope]): List of all child scopes for CTEs
  • derived_table_scopes (list[Scope]): List of all child scopes for derived_tables
  • udtf_scopes (list[Scope]): List of all child scopes for user defined tabular functions
  • table_scopes (list[Scope]): derived_table_scopes + udtf_scopes, in the order that they're defined
  • union_scopes (list[Scope, Scope]): If this Scope is for a Union expression, this will be a list of the left and right child scopes.
Scope( expression, sources=None, outer_columns=None, parent=None, scope_type=<ScopeType.ROOT: 1>, lateral_sources=None, cte_sources=None)
60    def __init__(
61        self,
62        expression,
63        sources=None,
64        outer_columns=None,
65        parent=None,
66        scope_type=ScopeType.ROOT,
67        lateral_sources=None,
68        cte_sources=None,
69    ):
70        self.expression = expression
71        self.sources = sources or {}
72        self.lateral_sources = lateral_sources or {}
73        self.cte_sources = cte_sources or {}
74        self.sources.update(self.lateral_sources)
75        self.sources.update(self.cte_sources)
76        self.outer_columns = outer_columns or []
77        self.parent = parent
78        self.scope_type = scope_type
79        self.subquery_scopes = []
80        self.derived_table_scopes = []
81        self.table_scopes = []
82        self.cte_scopes = []
83        self.union_scopes = []
84        self.udtf_scopes = []
85        self.clear_cache()
expression
sources
lateral_sources
cte_sources
outer_columns
parent
scope_type
subquery_scopes
derived_table_scopes
table_scopes
cte_scopes
union_scopes
udtf_scopes
def clear_cache(self):
 87    def clear_cache(self):
 88        self._collected = False
 89        self._raw_columns = None
 90        self._stars = None
 91        self._derived_tables = None
 92        self._udtfs = None
 93        self._tables = None
 94        self._ctes = None
 95        self._subqueries = None
 96        self._selected_sources = None
 97        self._columns = None
 98        self._external_columns = None
 99        self._join_hints = None
100        self._pivots = None
101        self._references = None
def branch( self, expression, scope_type, sources=None, cte_sources=None, lateral_sources=None, **kwargs):
103    def branch(
104        self, expression, scope_type, sources=None, cte_sources=None, lateral_sources=None, **kwargs
105    ):
106        """Branch from the current scope to a new, inner scope"""
107        return Scope(
108            expression=expression.unnest(),
109            sources=sources.copy() if sources else None,
110            parent=self,
111            scope_type=scope_type,
112            cte_sources={**self.cte_sources, **(cte_sources or {})},
113            lateral_sources=lateral_sources.copy() if lateral_sources else None,
114            **kwargs,
115        )

Branch from the current scope to a new, inner scope

def walk(self, bfs=True, prune=None):
159    def walk(self, bfs=True, prune=None):
160        return walk_in_scope(self.expression, bfs=bfs, prune=None)
def find(self, *expression_types, bfs=True):
162    def find(self, *expression_types, bfs=True):
163        return find_in_scope(self.expression, expression_types, bfs=bfs)
def find_all(self, *expression_types, bfs=True):
165    def find_all(self, *expression_types, bfs=True):
166        return find_all_in_scope(self.expression, expression_types, bfs=bfs)
def replace(self, old, new):
168    def replace(self, old, new):
169        """
170        Replace `old` with `new`.
171
172        This can be used instead of `exp.Expression.replace` to ensure the `Scope` is kept up-to-date.
173
174        Args:
175            old (exp.Expression): old node
176            new (exp.Expression): new node
177        """
178        old.replace(new)
179        self.clear_cache()

Replace old with new.

This can be used instead of exp.Expression.replace to ensure the Scope is kept up-to-date.

Arguments:
  • old (exp.Expression): old node
  • new (exp.Expression): new node
tables
181    @property
182    def tables(self):
183        """
184        List of tables in this scope.
185
186        Returns:
187            list[exp.Table]: tables
188        """
189        self._ensure_collected()
190        return self._tables

List of tables in this scope.

Returns:

list[exp.Table]: tables

ctes
192    @property
193    def ctes(self):
194        """
195        List of CTEs in this scope.
196
197        Returns:
198            list[exp.CTE]: ctes
199        """
200        self._ensure_collected()
201        return self._ctes

List of CTEs in this scope.

Returns:

list[exp.CTE]: ctes

derived_tables
203    @property
204    def derived_tables(self):
205        """
206        List of derived tables in this scope.
207
208        For example:
209            SELECT * FROM (SELECT ...) <- that's a derived table
210
211        Returns:
212            list[exp.Subquery]: derived tables
213        """
214        self._ensure_collected()
215        return self._derived_tables

List of derived tables in this scope.

For example:

SELECT * FROM (SELECT ...) <- that's a derived table

Returns:

list[exp.Subquery]: derived tables

udtfs
217    @property
218    def udtfs(self):
219        """
220        List of "User Defined Tabular Functions" in this scope.
221
222        Returns:
223            list[exp.UDTF]: UDTFs
224        """
225        self._ensure_collected()
226        return self._udtfs

List of "User Defined Tabular Functions" in this scope.

Returns:

list[exp.UDTF]: UDTFs

subqueries
228    @property
229    def subqueries(self):
230        """
231        List of subqueries in this scope.
232
233        For example:
234            SELECT * FROM x WHERE a IN (SELECT ...) <- that's a subquery
235
236        Returns:
237            list[exp.Select | exp.SetOperation]: subqueries
238        """
239        self._ensure_collected()
240        return self._subqueries

List of subqueries in this scope.

For example:

SELECT * FROM x WHERE a IN (SELECT ...) <- that's a subquery

Returns:

list[exp.Select | exp.SetOperation]: subqueries

242    @property
243    def stars(self) -> t.List[exp.Column | exp.Dot]:
244        """
245        List of star expressions (columns or dots) in this scope.
246        """
247        self._ensure_collected()
248        return self._stars

List of star expressions (columns or dots) in this scope.

columns
250    @property
251    def columns(self):
252        """
253        List of columns in this scope.
254
255        Returns:
256            list[exp.Column]: Column instances in this scope, plus any
257                Columns that reference this scope from correlated subqueries.
258        """
259        if self._columns is None:
260            self._ensure_collected()
261            columns = self._raw_columns
262
263            external_columns = [
264                column
265                for scope in itertools.chain(self.subquery_scopes, self.udtf_scopes)
266                for column in scope.external_columns
267            ]
268
269            named_selects = set(self.expression.named_selects)
270
271            self._columns = []
272            for column in columns + external_columns:
273                ancestor = column.find_ancestor(
274                    exp.Select, exp.Qualify, exp.Order, exp.Having, exp.Hint, exp.Table, exp.Star
275                )
276                if (
277                    not ancestor
278                    or column.table
279                    or isinstance(ancestor, exp.Select)
280                    or (isinstance(ancestor, exp.Table) and not isinstance(ancestor.this, exp.Func))
281                    or (
282                        isinstance(ancestor, exp.Order)
283                        and (
284                            isinstance(ancestor.parent, exp.Window)
285                            or column.name not in named_selects
286                        )
287                    )
288                ):
289                    self._columns.append(column)
290
291        return self._columns

List of columns in this scope.

Returns:

list[exp.Column]: Column instances in this scope, plus any Columns that reference this scope from correlated subqueries.

selected_sources
293    @property
294    def selected_sources(self):
295        """
296        Mapping of nodes and sources that are actually selected from in this scope.
297
298        That is, all tables in a schema are selectable at any point. But a
299        table only becomes a selected source if it's included in a FROM or JOIN clause.
300
301        Returns:
302            dict[str, (exp.Table|exp.Select, exp.Table|Scope)]: selected sources and nodes
303        """
304        if self._selected_sources is None:
305            result = {}
306
307            for name, node in self.references:
308                if name in result:
309                    raise OptimizeError(f"Alias already used: {name}")
310                if name in self.sources:
311                    result[name] = (node, self.sources[name])
312
313            self._selected_sources = result
314        return self._selected_sources

Mapping of nodes and sources that are actually selected from in this scope.

That is, all tables in a schema are selectable at any point. But a table only becomes a selected source if it's included in a FROM or JOIN clause.

Returns:

dict[str, (exp.Table|exp.Select, exp.Table|Scope)]: selected sources and nodes

references: List[Tuple[str, sqlglot.expressions.Expression]]
316    @property
317    def references(self) -> t.List[t.Tuple[str, exp.Expression]]:
318        if self._references is None:
319            self._references = []
320
321            for table in self.tables:
322                self._references.append((table.alias_or_name, table))
323            for expression in itertools.chain(self.derived_tables, self.udtfs):
324                self._references.append(
325                    (
326                        expression.alias,
327                        expression if expression.args.get("pivots") else expression.unnest(),
328                    )
329                )
330
331        return self._references
external_columns
333    @property
334    def external_columns(self):
335        """
336        Columns that appear to reference sources in outer scopes.
337
338        Returns:
339            list[exp.Column]: Column instances that don't reference
340                sources in the current scope.
341        """
342        if self._external_columns is None:
343            if isinstance(self.expression, exp.SetOperation):
344                left, right = self.union_scopes
345                self._external_columns = left.external_columns + right.external_columns
346            else:
347                self._external_columns = [
348                    c for c in self.columns if c.table not in self.selected_sources
349                ]
350
351        return self._external_columns

Columns that appear to reference sources in outer scopes.

Returns:

list[exp.Column]: Column instances that don't reference sources in the current scope.

unqualified_columns
353    @property
354    def unqualified_columns(self):
355        """
356        Unqualified columns in the current scope.
357
358        Returns:
359             list[exp.Column]: Unqualified columns
360        """
361        return [c for c in self.columns if not c.table]

Unqualified columns in the current scope.

Returns:

list[exp.Column]: Unqualified columns

join_hints
363    @property
364    def join_hints(self):
365        """
366        Hints that exist in the scope that reference tables
367
368        Returns:
369            list[exp.JoinHint]: Join hints that are referenced within the scope
370        """
371        if self._join_hints is None:
372            return []
373        return self._join_hints

Hints that exist in the scope that reference tables

Returns:

list[exp.JoinHint]: Join hints that are referenced within the scope

pivots
375    @property
376    def pivots(self):
377        if not self._pivots:
378            self._pivots = [
379                pivot for _, node in self.references for pivot in node.args.get("pivots") or []
380            ]
381
382        return self._pivots
def source_columns(self, source_name):
384    def source_columns(self, source_name):
385        """
386        Get all columns in the current scope for a particular source.
387
388        Args:
389            source_name (str): Name of the source
390        Returns:
391            list[exp.Column]: Column instances that reference `source_name`
392        """
393        return [column for column in self.columns if column.table == source_name]

Get all columns in the current scope for a particular source.

Arguments:
  • source_name (str): Name of the source
Returns:

list[exp.Column]: Column instances that reference source_name

is_subquery
395    @property
396    def is_subquery(self):
397        """Determine if this scope is a subquery"""
398        return self.scope_type == ScopeType.SUBQUERY

Determine if this scope is a subquery

is_derived_table
400    @property
401    def is_derived_table(self):
402        """Determine if this scope is a derived table"""
403        return self.scope_type == ScopeType.DERIVED_TABLE

Determine if this scope is a derived table

is_union
405    @property
406    def is_union(self):
407        """Determine if this scope is a union"""
408        return self.scope_type == ScopeType.UNION

Determine if this scope is a union

is_cte
410    @property
411    def is_cte(self):
412        """Determine if this scope is a common table expression"""
413        return self.scope_type == ScopeType.CTE

Determine if this scope is a common table expression

is_root
415    @property
416    def is_root(self):
417        """Determine if this is the root scope"""
418        return self.scope_type == ScopeType.ROOT

Determine if this is the root scope

is_udtf
420    @property
421    def is_udtf(self):
422        """Determine if this scope is a UDTF (User Defined Table Function)"""
423        return self.scope_type == ScopeType.UDTF

Determine if this scope is a UDTF (User Defined Table Function)

is_correlated_subquery
425    @property
426    def is_correlated_subquery(self):
427        """Determine if this scope is a correlated subquery"""
428        return bool(
429            (self.is_subquery or (self.parent and isinstance(self.parent.expression, exp.Lateral)))
430            and self.external_columns
431        )

Determine if this scope is a correlated subquery

def rename_source(self, old_name, new_name):
433    def rename_source(self, old_name, new_name):
434        """Rename a source in this scope"""
435        columns = self.sources.pop(old_name or "", [])
436        self.sources[new_name] = columns

Rename a source in this scope

def add_source(self, name, source):
438    def add_source(self, name, source):
439        """Add a source to this scope"""
440        self.sources[name] = source
441        self.clear_cache()

Add a source to this scope

def remove_source(self, name):
443    def remove_source(self, name):
444        """Remove a source from this scope"""
445        self.sources.pop(name, None)
446        self.clear_cache()

Remove a source from this scope

def traverse(self):
451    def traverse(self):
452        """
453        Traverse the scope tree from this node.
454
455        Yields:
456            Scope: scope instances in depth-first-search post-order
457        """
458        stack = [self]
459        result = []
460        while stack:
461            scope = stack.pop()
462            result.append(scope)
463            stack.extend(
464                itertools.chain(
465                    scope.cte_scopes,
466                    scope.union_scopes,
467                    scope.table_scopes,
468                    scope.subquery_scopes,
469                )
470            )
471
472        yield from reversed(result)

Traverse the scope tree from this node.

Yields:

Scope: scope instances in depth-first-search post-order

def ref_count(self):
474    def ref_count(self):
475        """
476        Count the number of times each scope in this tree is referenced.
477
478        Returns:
479            dict[int, int]: Mapping of Scope instance ID to reference count
480        """
481        scope_ref_count = defaultdict(lambda: 0)
482
483        for scope in self.traverse():
484            for _, source in scope.selected_sources.values():
485                scope_ref_count[id(source)] += 1
486
487        return scope_ref_count

Count the number of times each scope in this tree is referenced.

Returns:

dict[int, int]: Mapping of Scope instance ID to reference count

def traverse_scope( expression: sqlglot.expressions.Expression) -> List[Scope]:
490def traverse_scope(expression: exp.Expression) -> t.List[Scope]:
491    """
492    Traverse an expression by its "scopes".
493
494    "Scope" represents the current context of a Select statement.
495
496    This is helpful for optimizing queries, where we need more information than
497    the expression tree itself. For example, we might care about the source
498    names within a subquery. Returns a list because a generator could result in
499    incomplete properties which is confusing.
500
501    Examples:
502        >>> import sqlglot
503        >>> expression = sqlglot.parse_one("SELECT a FROM (SELECT a FROM x) AS y")
504        >>> scopes = traverse_scope(expression)
505        >>> scopes[0].expression.sql(), list(scopes[0].sources)
506        ('SELECT a FROM x', ['x'])
507        >>> scopes[1].expression.sql(), list(scopes[1].sources)
508        ('SELECT a FROM (SELECT a FROM x) AS y', ['y'])
509
510    Args:
511        expression: Expression to traverse
512
513    Returns:
514        A list of the created scope instances
515    """
516    if isinstance(expression, TRAVERSABLES):
517        return list(_traverse_scope(Scope(expression)))
518    return []

Traverse an expression by its "scopes".

"Scope" represents the current context of a Select statement.

This is helpful for optimizing queries, where we need more information than the expression tree itself. For example, we might care about the source names within a subquery. Returns a list because a generator could result in incomplete properties which is confusing.

Examples:
>>> import sqlglot
>>> expression = sqlglot.parse_one("SELECT a FROM (SELECT a FROM x) AS y")
>>> scopes = traverse_scope(expression)
>>> scopes[0].expression.sql(), list(scopes[0].sources)
('SELECT a FROM x', ['x'])
>>> scopes[1].expression.sql(), list(scopes[1].sources)
('SELECT a FROM (SELECT a FROM x) AS y', ['y'])
Arguments:
  • expression: Expression to traverse
Returns:

A list of the created scope instances

def build_scope( expression: sqlglot.expressions.Expression) -> Optional[Scope]:
521def build_scope(expression: exp.Expression) -> t.Optional[Scope]:
522    """
523    Build a scope tree.
524
525    Args:
526        expression: Expression to build the scope tree for.
527
528    Returns:
529        The root scope
530    """
531    return seq_get(traverse_scope(expression), -1)

Build a scope tree.

Arguments:
  • expression: Expression to build the scope tree for.
Returns:

The root scope

def walk_in_scope(expression, bfs=True, prune=None):
778def walk_in_scope(expression, bfs=True, prune=None):
779    """
780    Returns a generator object which visits all nodes in the syntrax tree, stopping at
781    nodes that start child scopes.
782
783    Args:
784        expression (exp.Expression):
785        bfs (bool): if set to True the BFS traversal order will be applied,
786            otherwise the DFS traversal will be used instead.
787        prune ((node, parent, arg_key) -> bool): callable that returns True if
788            the generator should stop traversing this branch of the tree.
789
790    Yields:
791        tuple[exp.Expression, Optional[exp.Expression], str]: node, parent, arg key
792    """
793    # We'll use this variable to pass state into the dfs generator.
794    # Whenever we set it to True, we exclude a subtree from traversal.
795    crossed_scope_boundary = False
796
797    for node in expression.walk(
798        bfs=bfs, prune=lambda n: crossed_scope_boundary or (prune and prune(n))
799    ):
800        crossed_scope_boundary = False
801
802        yield node
803
804        if node is expression:
805            continue
806        if (
807            isinstance(node, exp.CTE)
808            or (
809                isinstance(node.parent, (exp.From, exp.Join, exp.Subquery))
810                and (_is_derived_table(node) or isinstance(node, exp.UDTF))
811            )
812            or isinstance(node, exp.UNWRAPPED_QUERIES)
813        ):
814            crossed_scope_boundary = True
815
816            if isinstance(node, (exp.Subquery, exp.UDTF)):
817                # The following args are not actually in the inner scope, so we should visit them
818                for key in ("joins", "laterals", "pivots"):
819                    for arg in node.args.get(key) or []:
820                        yield from walk_in_scope(arg, bfs=bfs)

Returns a generator object which visits all nodes in the syntrax tree, stopping at nodes that start child scopes.

Arguments:
  • expression (exp.Expression):
  • bfs (bool): if set to True the BFS traversal order will be applied, otherwise the DFS traversal will be used instead.
  • prune ((node, parent, arg_key) -> bool): callable that returns True if the generator should stop traversing this branch of the tree.
Yields:

tuple[exp.Expression, Optional[exp.Expression], str]: node, parent, arg key

def find_all_in_scope(expression, expression_types, bfs=True):
823def find_all_in_scope(expression, expression_types, bfs=True):
824    """
825    Returns a generator object which visits all nodes in this scope and only yields those that
826    match at least one of the specified expression types.
827
828    This does NOT traverse into subscopes.
829
830    Args:
831        expression (exp.Expression):
832        expression_types (tuple[type]|type): the expression type(s) to match.
833        bfs (bool): True to use breadth-first search, False to use depth-first.
834
835    Yields:
836        exp.Expression: nodes
837    """
838    for expression in walk_in_scope(expression, bfs=bfs):
839        if isinstance(expression, tuple(ensure_collection(expression_types))):
840            yield expression

Returns a generator object which visits all nodes in this scope and only yields those that match at least one of the specified expression types.

This does NOT traverse into subscopes.

Arguments:
  • expression (exp.Expression):
  • expression_types (tuple[type]|type): the expression type(s) to match.
  • bfs (bool): True to use breadth-first search, False to use depth-first.
Yields:

exp.Expression: nodes

def find_in_scope(expression, expression_types, bfs=True):
843def find_in_scope(expression, expression_types, bfs=True):
844    """
845    Returns the first node in this scope which matches at least one of the specified types.
846
847    This does NOT traverse into subscopes.
848
849    Args:
850        expression (exp.Expression):
851        expression_types (tuple[type]|type): the expression type(s) to match.
852        bfs (bool): True to use breadth-first search, False to use depth-first.
853
854    Returns:
855        exp.Expression: the node which matches the criteria or None if no node matching
856        the criteria was found.
857    """
858    return next(find_all_in_scope(expression, expression_types, bfs=bfs), None)

Returns the first node in this scope which matches at least one of the specified types.

This does NOT traverse into subscopes.

Arguments:
  • expression (exp.Expression):
  • expression_types (tuple[type]|type): the expression type(s) to match.
  • bfs (bool): True to use breadth-first search, False to use depth-first.
Returns:

exp.Expression: the node which matches the criteria or None if no node matching the criteria was found.