Edit on GitHub

sqlglot.optimizer.scope

  1from __future__ import annotations
  2
  3import itertools
  4import logging
  5import typing as t
  6from collections import defaultdict
  7from enum import Enum, auto
  8
  9from sqlglot import exp
 10from sqlglot.errors import OptimizeError
 11from sqlglot.helper import ensure_collection, find_new_name, seq_get
 12
 13logger = logging.getLogger("sqlglot")
 14
 15TRAVERSABLES = (exp.Query, exp.DDL, exp.DML)
 16
 17
 18class ScopeType(Enum):
 19    ROOT = auto()
 20    SUBQUERY = auto()
 21    DERIVED_TABLE = auto()
 22    CTE = auto()
 23    UNION = auto()
 24    UDTF = auto()
 25
 26
 27class Scope:
 28    """
 29    Selection scope.
 30
 31    Attributes:
 32        expression (exp.Select|exp.SetOperation): Root expression of this scope
 33        sources (dict[str, exp.Table|Scope]): Mapping of source name to either
 34            a Table expression or another Scope instance. For example:
 35                SELECT * FROM x                     {"x": Table(this="x")}
 36                SELECT * FROM x AS y                {"y": Table(this="x")}
 37                SELECT * FROM (SELECT ...) AS y     {"y": Scope(...)}
 38        lateral_sources (dict[str, exp.Table|Scope]): Sources from laterals
 39            For example:
 40                SELECT c FROM x LATERAL VIEW EXPLODE (a) AS c;
 41            The LATERAL VIEW EXPLODE gets x as a source.
 42        cte_sources (dict[str, Scope]): Sources from CTES
 43        outer_columns (list[str]): If this is a derived table or CTE, and the outer query
 44            defines a column list for the alias of this scope, this is that list of columns.
 45            For example:
 46                SELECT * FROM (SELECT ...) AS y(col1, col2)
 47            The inner query would have `["col1", "col2"]` for its `outer_columns`
 48        parent (Scope): Parent scope
 49        scope_type (ScopeType): Type of this scope, relative to it's parent
 50        subquery_scopes (list[Scope]): List of all child scopes for subqueries
 51        cte_scopes (list[Scope]): List of all child scopes for CTEs
 52        derived_table_scopes (list[Scope]): List of all child scopes for derived_tables
 53        udtf_scopes (list[Scope]): List of all child scopes for user defined tabular functions
 54        table_scopes (list[Scope]): derived_table_scopes + udtf_scopes, in the order that they're defined
 55        union_scopes (list[Scope, Scope]): If this Scope is for a Union expression, this will be
 56            a list of the left and right child scopes.
 57    """
 58
 59    def __init__(
 60        self,
 61        expression,
 62        sources=None,
 63        outer_columns=None,
 64        parent=None,
 65        scope_type=ScopeType.ROOT,
 66        lateral_sources=None,
 67        cte_sources=None,
 68    ):
 69        self.expression = expression
 70        self.sources = sources or {}
 71        self.lateral_sources = lateral_sources or {}
 72        self.cte_sources = cte_sources or {}
 73        self.sources.update(self.lateral_sources)
 74        self.sources.update(self.cte_sources)
 75        self.outer_columns = outer_columns or []
 76        self.parent = parent
 77        self.scope_type = scope_type
 78        self.subquery_scopes = []
 79        self.derived_table_scopes = []
 80        self.table_scopes = []
 81        self.cte_scopes = []
 82        self.union_scopes = []
 83        self.udtf_scopes = []
 84        self.clear_cache()
 85
 86    def clear_cache(self):
 87        self._collected = False
 88        self._raw_columns = None
 89        self._stars = None
 90        self._derived_tables = None
 91        self._udtfs = None
 92        self._tables = None
 93        self._ctes = None
 94        self._subqueries = None
 95        self._selected_sources = None
 96        self._columns = None
 97        self._external_columns = None
 98        self._join_hints = None
 99        self._pivots = None
100        self._references = None
101
102    def branch(
103        self, expression, scope_type, sources=None, cte_sources=None, lateral_sources=None, **kwargs
104    ):
105        """Branch from the current scope to a new, inner scope"""
106        return Scope(
107            expression=expression.unnest(),
108            sources=sources.copy() if sources else None,
109            parent=self,
110            scope_type=scope_type,
111            cte_sources={**self.cte_sources, **(cte_sources or {})},
112            lateral_sources=lateral_sources.copy() if lateral_sources else None,
113            **kwargs,
114        )
115
116    def _collect(self):
117        self._tables = []
118        self._ctes = []
119        self._subqueries = []
120        self._derived_tables = []
121        self._udtfs = []
122        self._raw_columns = []
123        self._stars = []
124        self._join_hints = []
125
126        for node in self.walk(bfs=False):
127            if node is self.expression:
128                continue
129
130            if isinstance(node, exp.Dot) and node.is_star:
131                self._stars.append(node)
132            elif isinstance(node, exp.Column):
133                if isinstance(node.this, exp.Star):
134                    self._stars.append(node)
135                else:
136                    self._raw_columns.append(node)
137            elif isinstance(node, exp.Table) and not isinstance(node.parent, exp.JoinHint):
138                self._tables.append(node)
139            elif isinstance(node, exp.JoinHint):
140                self._join_hints.append(node)
141            elif isinstance(node, exp.UDTF):
142                self._udtfs.append(node)
143            elif isinstance(node, exp.CTE):
144                self._ctes.append(node)
145            elif _is_derived_table(node) and isinstance(
146                node.parent, (exp.From, exp.Join, exp.Subquery)
147            ):
148                self._derived_tables.append(node)
149            elif isinstance(node, exp.UNWRAPPED_QUERIES):
150                self._subqueries.append(node)
151
152        self._collected = True
153
154    def _ensure_collected(self):
155        if not self._collected:
156            self._collect()
157
158    def walk(self, bfs=True, prune=None):
159        return walk_in_scope(self.expression, bfs=bfs, prune=None)
160
161    def find(self, *expression_types, bfs=True):
162        return find_in_scope(self.expression, expression_types, bfs=bfs)
163
164    def find_all(self, *expression_types, bfs=True):
165        return find_all_in_scope(self.expression, expression_types, bfs=bfs)
166
167    def replace(self, old, new):
168        """
169        Replace `old` with `new`.
170
171        This can be used instead of `exp.Expression.replace` to ensure the `Scope` is kept up-to-date.
172
173        Args:
174            old (exp.Expression): old node
175            new (exp.Expression): new node
176        """
177        old.replace(new)
178        self.clear_cache()
179
180    @property
181    def tables(self):
182        """
183        List of tables in this scope.
184
185        Returns:
186            list[exp.Table]: tables
187        """
188        self._ensure_collected()
189        return self._tables
190
191    @property
192    def ctes(self):
193        """
194        List of CTEs in this scope.
195
196        Returns:
197            list[exp.CTE]: ctes
198        """
199        self._ensure_collected()
200        return self._ctes
201
202    @property
203    def derived_tables(self):
204        """
205        List of derived tables in this scope.
206
207        For example:
208            SELECT * FROM (SELECT ...) <- that's a derived table
209
210        Returns:
211            list[exp.Subquery]: derived tables
212        """
213        self._ensure_collected()
214        return self._derived_tables
215
216    @property
217    def udtfs(self):
218        """
219        List of "User Defined Tabular Functions" in this scope.
220
221        Returns:
222            list[exp.UDTF]: UDTFs
223        """
224        self._ensure_collected()
225        return self._udtfs
226
227    @property
228    def subqueries(self):
229        """
230        List of subqueries in this scope.
231
232        For example:
233            SELECT * FROM x WHERE a IN (SELECT ...) <- that's a subquery
234
235        Returns:
236            list[exp.Select | exp.SetOperation]: subqueries
237        """
238        self._ensure_collected()
239        return self._subqueries
240
241    @property
242    def stars(self) -> t.List[exp.Column | exp.Dot]:
243        """
244        List of star expressions (columns or dots) in this scope.
245        """
246        self._ensure_collected()
247        return self._stars
248
249    @property
250    def columns(self):
251        """
252        List of columns in this scope.
253
254        Returns:
255            list[exp.Column]: Column instances in this scope, plus any
256                Columns that reference this scope from correlated subqueries.
257        """
258        if self._columns is None:
259            self._ensure_collected()
260            columns = self._raw_columns
261
262            external_columns = [
263                column
264                for scope in itertools.chain(self.subquery_scopes, self.udtf_scopes)
265                for column in scope.external_columns
266            ]
267
268            named_selects = set(self.expression.named_selects)
269
270            self._columns = []
271            for column in columns + external_columns:
272                ancestor = column.find_ancestor(
273                    exp.Select, exp.Qualify, exp.Order, exp.Having, exp.Hint, exp.Table, exp.Star
274                )
275                if (
276                    not ancestor
277                    or column.table
278                    or isinstance(ancestor, exp.Select)
279                    or (isinstance(ancestor, exp.Table) and not isinstance(ancestor.this, exp.Func))
280                    or (
281                        isinstance(ancestor, exp.Order)
282                        and (
283                            isinstance(ancestor.parent, exp.Window)
284                            or column.name not in named_selects
285                        )
286                    )
287                    or (isinstance(ancestor, exp.Star) and not column.arg_key == "except")
288                ):
289                    self._columns.append(column)
290
291        return self._columns
292
293    @property
294    def selected_sources(self):
295        """
296        Mapping of nodes and sources that are actually selected from in this scope.
297
298        That is, all tables in a schema are selectable at any point. But a
299        table only becomes a selected source if it's included in a FROM or JOIN clause.
300
301        Returns:
302            dict[str, (exp.Table|exp.Select, exp.Table|Scope)]: selected sources and nodes
303        """
304        if self._selected_sources is None:
305            result = {}
306
307            for name, node in self.references:
308                if name in result:
309                    raise OptimizeError(f"Alias already used: {name}")
310                if name in self.sources:
311                    result[name] = (node, self.sources[name])
312
313            self._selected_sources = result
314        return self._selected_sources
315
316    @property
317    def references(self) -> t.List[t.Tuple[str, exp.Expression]]:
318        if self._references is None:
319            self._references = []
320
321            for table in self.tables:
322                self._references.append((table.alias_or_name, table))
323            for expression in itertools.chain(self.derived_tables, self.udtfs):
324                self._references.append(
325                    (
326                        expression.alias,
327                        expression if expression.args.get("pivots") else expression.unnest(),
328                    )
329                )
330
331        return self._references
332
333    @property
334    def external_columns(self):
335        """
336        Columns that appear to reference sources in outer scopes.
337
338        Returns:
339            list[exp.Column]: Column instances that don't reference
340                sources in the current scope.
341        """
342        if self._external_columns is None:
343            if isinstance(self.expression, exp.SetOperation):
344                left, right = self.union_scopes
345                self._external_columns = left.external_columns + right.external_columns
346            else:
347                self._external_columns = [
348                    c for c in self.columns if c.table not in self.selected_sources
349                ]
350
351        return self._external_columns
352
353    @property
354    def unqualified_columns(self):
355        """
356        Unqualified columns in the current scope.
357
358        Returns:
359             list[exp.Column]: Unqualified columns
360        """
361        return [c for c in self.columns if not c.table]
362
363    @property
364    def join_hints(self):
365        """
366        Hints that exist in the scope that reference tables
367
368        Returns:
369            list[exp.JoinHint]: Join hints that are referenced within the scope
370        """
371        if self._join_hints is None:
372            return []
373        return self._join_hints
374
375    @property
376    def pivots(self):
377        if not self._pivots:
378            self._pivots = [
379                pivot for _, node in self.references for pivot in node.args.get("pivots") or []
380            ]
381
382        return self._pivots
383
384    def source_columns(self, source_name):
385        """
386        Get all columns in the current scope for a particular source.
387
388        Args:
389            source_name (str): Name of the source
390        Returns:
391            list[exp.Column]: Column instances that reference `source_name`
392        """
393        return [column for column in self.columns if column.table == source_name]
394
395    @property
396    def is_subquery(self):
397        """Determine if this scope is a subquery"""
398        return self.scope_type == ScopeType.SUBQUERY
399
400    @property
401    def is_derived_table(self):
402        """Determine if this scope is a derived table"""
403        return self.scope_type == ScopeType.DERIVED_TABLE
404
405    @property
406    def is_union(self):
407        """Determine if this scope is a union"""
408        return self.scope_type == ScopeType.UNION
409
410    @property
411    def is_cte(self):
412        """Determine if this scope is a common table expression"""
413        return self.scope_type == ScopeType.CTE
414
415    @property
416    def is_root(self):
417        """Determine if this is the root scope"""
418        return self.scope_type == ScopeType.ROOT
419
420    @property
421    def is_udtf(self):
422        """Determine if this scope is a UDTF (User Defined Table Function)"""
423        return self.scope_type == ScopeType.UDTF
424
425    @property
426    def is_correlated_subquery(self):
427        """Determine if this scope is a correlated subquery"""
428        return bool(
429            (self.is_subquery or (self.parent and isinstance(self.parent.expression, exp.Lateral)))
430            and self.external_columns
431        )
432
433    def rename_source(self, old_name, new_name):
434        """Rename a source in this scope"""
435        columns = self.sources.pop(old_name or "", [])
436        self.sources[new_name] = columns
437
438    def add_source(self, name, source):
439        """Add a source to this scope"""
440        self.sources[name] = source
441        self.clear_cache()
442
443    def remove_source(self, name):
444        """Remove a source from this scope"""
445        self.sources.pop(name, None)
446        self.clear_cache()
447
448    def __repr__(self):
449        return f"Scope<{self.expression.sql()}>"
450
451    def traverse(self):
452        """
453        Traverse the scope tree from this node.
454
455        Yields:
456            Scope: scope instances in depth-first-search post-order
457        """
458        stack = [self]
459        result = []
460        while stack:
461            scope = stack.pop()
462            result.append(scope)
463            stack.extend(
464                itertools.chain(
465                    scope.cte_scopes,
466                    scope.union_scopes,
467                    scope.table_scopes,
468                    scope.subquery_scopes,
469                )
470            )
471
472        yield from reversed(result)
473
474    def ref_count(self):
475        """
476        Count the number of times each scope in this tree is referenced.
477
478        Returns:
479            dict[int, int]: Mapping of Scope instance ID to reference count
480        """
481        scope_ref_count = defaultdict(lambda: 0)
482
483        for scope in self.traverse():
484            for _, source in scope.selected_sources.values():
485                scope_ref_count[id(source)] += 1
486
487        return scope_ref_count
488
489
490def traverse_scope(expression: exp.Expression) -> t.List[Scope]:
491    """
492    Traverse an expression by its "scopes".
493
494    "Scope" represents the current context of a Select statement.
495
496    This is helpful for optimizing queries, where we need more information than
497    the expression tree itself. For example, we might care about the source
498    names within a subquery. Returns a list because a generator could result in
499    incomplete properties which is confusing.
500
501    Examples:
502        >>> import sqlglot
503        >>> expression = sqlglot.parse_one("SELECT a FROM (SELECT a FROM x) AS y")
504        >>> scopes = traverse_scope(expression)
505        >>> scopes[0].expression.sql(), list(scopes[0].sources)
506        ('SELECT a FROM x', ['x'])
507        >>> scopes[1].expression.sql(), list(scopes[1].sources)
508        ('SELECT a FROM (SELECT a FROM x) AS y', ['y'])
509
510    Args:
511        expression: Expression to traverse
512
513    Returns:
514        A list of the created scope instances
515    """
516    if isinstance(expression, TRAVERSABLES):
517        return list(_traverse_scope(Scope(expression)))
518    return []
519
520
521def build_scope(expression: exp.Expression) -> t.Optional[Scope]:
522    """
523    Build a scope tree.
524
525    Args:
526        expression: Expression to build the scope tree for.
527
528    Returns:
529        The root scope
530    """
531    return seq_get(traverse_scope(expression), -1)
532
533
534def _traverse_scope(scope):
535    expression = scope.expression
536
537    if isinstance(expression, exp.Select):
538        yield from _traverse_select(scope)
539    elif isinstance(expression, exp.SetOperation):
540        yield from _traverse_ctes(scope)
541        yield from _traverse_union(scope)
542        return
543    elif isinstance(expression, exp.Subquery):
544        if scope.is_root:
545            yield from _traverse_select(scope)
546        else:
547            yield from _traverse_subqueries(scope)
548    elif isinstance(expression, exp.Table):
549        yield from _traverse_tables(scope)
550    elif isinstance(expression, exp.UDTF):
551        yield from _traverse_udtfs(scope)
552    elif isinstance(expression, exp.DDL):
553        if isinstance(expression.expression, exp.Query):
554            yield from _traverse_ctes(scope)
555            yield from _traverse_scope(Scope(expression.expression, cte_sources=scope.cte_sources))
556        return
557    elif isinstance(expression, exp.DML):
558        yield from _traverse_ctes(scope)
559        for query in find_all_in_scope(expression, exp.Query):
560            # This check ensures we don't yield the CTE queries twice
561            if not isinstance(query.parent, exp.CTE):
562                yield from _traverse_scope(Scope(query, cte_sources=scope.cte_sources))
563        return
564    else:
565        logger.warning("Cannot traverse scope %s with type '%s'", expression, type(expression))
566        return
567
568    yield scope
569
570
571def _traverse_select(scope):
572    yield from _traverse_ctes(scope)
573    yield from _traverse_tables(scope)
574    yield from _traverse_subqueries(scope)
575
576
577def _traverse_union(scope):
578    prev_scope = None
579    union_scope_stack = [scope]
580    expression_stack = [scope.expression.right, scope.expression.left]
581
582    while expression_stack:
583        expression = expression_stack.pop()
584        union_scope = union_scope_stack[-1]
585
586        new_scope = union_scope.branch(
587            expression,
588            outer_columns=union_scope.outer_columns,
589            scope_type=ScopeType.UNION,
590        )
591
592        if isinstance(expression, exp.SetOperation):
593            yield from _traverse_ctes(new_scope)
594
595            union_scope_stack.append(new_scope)
596            expression_stack.extend([expression.right, expression.left])
597            continue
598
599        for scope in _traverse_scope(new_scope):
600            yield scope
601
602        if prev_scope:
603            union_scope_stack.pop()
604            union_scope.union_scopes = [prev_scope, scope]
605            prev_scope = union_scope
606
607            yield union_scope
608        else:
609            prev_scope = scope
610
611
612def _traverse_ctes(scope):
613    sources = {}
614
615    for cte in scope.ctes:
616        cte_name = cte.alias
617
618        # if the scope is a recursive cte, it must be in the form of base_case UNION recursive.
619        # thus the recursive scope is the first section of the union.
620        with_ = scope.expression.args.get("with")
621        if with_ and with_.recursive:
622            union = cte.this
623
624            if isinstance(union, exp.SetOperation):
625                sources[cte_name] = scope.branch(union.this, scope_type=ScopeType.CTE)
626
627        child_scope = None
628
629        for child_scope in _traverse_scope(
630            scope.branch(
631                cte.this,
632                cte_sources=sources,
633                outer_columns=cte.alias_column_names,
634                scope_type=ScopeType.CTE,
635            )
636        ):
637            yield child_scope
638
639        # append the final child_scope yielded
640        if child_scope:
641            sources[cte_name] = child_scope
642            scope.cte_scopes.append(child_scope)
643
644    scope.sources.update(sources)
645    scope.cte_sources.update(sources)
646
647
648def _is_derived_table(expression: exp.Subquery) -> bool:
649    """
650    We represent (tbl1 JOIN tbl2) as a Subquery, but it's not really a "derived table",
651    as it doesn't introduce a new scope. If an alias is present, it shadows all names
652    under the Subquery, so that's one exception to this rule.
653    """
654    return isinstance(expression, exp.Subquery) and bool(
655        expression.alias or isinstance(expression.this, exp.UNWRAPPED_QUERIES)
656    )
657
658
659def _traverse_tables(scope):
660    sources = {}
661
662    # Traverse FROMs, JOINs, and LATERALs in the order they are defined
663    expressions = []
664    from_ = scope.expression.args.get("from")
665    if from_:
666        expressions.append(from_.this)
667
668    for join in scope.expression.args.get("joins") or []:
669        expressions.append(join.this)
670
671    if isinstance(scope.expression, exp.Table):
672        expressions.append(scope.expression)
673
674    expressions.extend(scope.expression.args.get("laterals") or [])
675
676    for expression in expressions:
677        if isinstance(expression, exp.Table):
678            table_name = expression.name
679            source_name = expression.alias_or_name
680
681            if table_name in scope.sources and not expression.db:
682                # This is a reference to a parent source (e.g. a CTE), not an actual table, unless
683                # it is pivoted, because then we get back a new table and hence a new source.
684                pivots = expression.args.get("pivots")
685                if pivots:
686                    sources[pivots[0].alias] = expression
687                else:
688                    sources[source_name] = scope.sources[table_name]
689            elif source_name in sources:
690                sources[find_new_name(sources, table_name)] = expression
691            else:
692                sources[source_name] = expression
693
694            # Make sure to not include the joins twice
695            if expression is not scope.expression:
696                expressions.extend(join.this for join in expression.args.get("joins") or [])
697
698            continue
699
700        if not isinstance(expression, exp.DerivedTable):
701            continue
702
703        if isinstance(expression, exp.UDTF):
704            lateral_sources = sources
705            scope_type = ScopeType.UDTF
706            scopes = scope.udtf_scopes
707        elif _is_derived_table(expression):
708            lateral_sources = None
709            scope_type = ScopeType.DERIVED_TABLE
710            scopes = scope.derived_table_scopes
711            expressions.extend(join.this for join in expression.args.get("joins") or [])
712        else:
713            # Makes sure we check for possible sources in nested table constructs
714            expressions.append(expression.this)
715            expressions.extend(join.this for join in expression.args.get("joins") or [])
716            continue
717
718        for child_scope in _traverse_scope(
719            scope.branch(
720                expression,
721                lateral_sources=lateral_sources,
722                outer_columns=expression.alias_column_names,
723                scope_type=scope_type,
724            )
725        ):
726            yield child_scope
727
728            # Tables without aliases will be set as ""
729            # This shouldn't be a problem once qualify_columns runs, as it adds aliases on everything.
730            # Until then, this means that only a single, unaliased derived table is allowed (rather,
731            # the latest one wins.
732            sources[expression.alias] = child_scope
733
734        # append the final child_scope yielded
735        scopes.append(child_scope)
736        scope.table_scopes.append(child_scope)
737
738    scope.sources.update(sources)
739
740
741def _traverse_subqueries(scope):
742    for subquery in scope.subqueries:
743        top = None
744        for child_scope in _traverse_scope(scope.branch(subquery, scope_type=ScopeType.SUBQUERY)):
745            yield child_scope
746            top = child_scope
747        scope.subquery_scopes.append(top)
748
749
750def _traverse_udtfs(scope):
751    if isinstance(scope.expression, exp.Unnest):
752        expressions = scope.expression.expressions
753    elif isinstance(scope.expression, exp.Lateral):
754        expressions = [scope.expression.this]
755    else:
756        expressions = []
757
758    sources = {}
759    for expression in expressions:
760        if _is_derived_table(expression):
761            top = None
762            for child_scope in _traverse_scope(
763                scope.branch(
764                    expression,
765                    scope_type=ScopeType.SUBQUERY,
766                    outer_columns=expression.alias_column_names,
767                )
768            ):
769                yield child_scope
770                top = child_scope
771                sources[expression.alias] = child_scope
772
773            scope.subquery_scopes.append(top)
774
775    scope.sources.update(sources)
776
777
778def walk_in_scope(expression, bfs=True, prune=None):
779    """
780    Returns a generator object which visits all nodes in the syntrax tree, stopping at
781    nodes that start child scopes.
782
783    Args:
784        expression (exp.Expression):
785        bfs (bool): if set to True the BFS traversal order will be applied,
786            otherwise the DFS traversal will be used instead.
787        prune ((node, parent, arg_key) -> bool): callable that returns True if
788            the generator should stop traversing this branch of the tree.
789
790    Yields:
791        tuple[exp.Expression, Optional[exp.Expression], str]: node, parent, arg key
792    """
793    # We'll use this variable to pass state into the dfs generator.
794    # Whenever we set it to True, we exclude a subtree from traversal.
795    crossed_scope_boundary = False
796
797    for node in expression.walk(
798        bfs=bfs, prune=lambda n: crossed_scope_boundary or (prune and prune(n))
799    ):
800        crossed_scope_boundary = False
801
802        yield node
803
804        if node is expression:
805            continue
806        if (
807            isinstance(node, exp.CTE)
808            or (
809                isinstance(node.parent, (exp.From, exp.Join, exp.Subquery))
810                and (_is_derived_table(node) or isinstance(node, exp.UDTF))
811            )
812            or isinstance(node, exp.UNWRAPPED_QUERIES)
813        ):
814            crossed_scope_boundary = True
815
816            if isinstance(node, (exp.Subquery, exp.UDTF)):
817                # The following args are not actually in the inner scope, so we should visit them
818                for key in ("joins", "laterals", "pivots"):
819                    for arg in node.args.get(key) or []:
820                        yield from walk_in_scope(arg, bfs=bfs)
821
822
823def find_all_in_scope(expression, expression_types, bfs=True):
824    """
825    Returns a generator object which visits all nodes in this scope and only yields those that
826    match at least one of the specified expression types.
827
828    This does NOT traverse into subscopes.
829
830    Args:
831        expression (exp.Expression):
832        expression_types (tuple[type]|type): the expression type(s) to match.
833        bfs (bool): True to use breadth-first search, False to use depth-first.
834
835    Yields:
836        exp.Expression: nodes
837    """
838    for expression in walk_in_scope(expression, bfs=bfs):
839        if isinstance(expression, tuple(ensure_collection(expression_types))):
840            yield expression
841
842
843def find_in_scope(expression, expression_types, bfs=True):
844    """
845    Returns the first node in this scope which matches at least one of the specified types.
846
847    This does NOT traverse into subscopes.
848
849    Args:
850        expression (exp.Expression):
851        expression_types (tuple[type]|type): the expression type(s) to match.
852        bfs (bool): True to use breadth-first search, False to use depth-first.
853
854    Returns:
855        exp.Expression: the node which matches the criteria or None if no node matching
856        the criteria was found.
857    """
858    return next(find_all_in_scope(expression, expression_types, bfs=bfs), None)
logger = <Logger sqlglot (WARNING)>
TRAVERSABLES = (<class 'sqlglot.expressions.Query'>, <class 'sqlglot.expressions.DDL'>, <class 'sqlglot.expressions.DML'>)
class ScopeType(enum.Enum):
19class ScopeType(Enum):
20    ROOT = auto()
21    SUBQUERY = auto()
22    DERIVED_TABLE = auto()
23    CTE = auto()
24    UNION = auto()
25    UDTF = auto()

An enumeration.

ROOT = <ScopeType.ROOT: 1>
SUBQUERY = <ScopeType.SUBQUERY: 2>
DERIVED_TABLE = <ScopeType.DERIVED_TABLE: 3>
CTE = <ScopeType.CTE: 4>
UNION = <ScopeType.UNION: 5>
UDTF = <ScopeType.UDTF: 6>
Inherited Members
enum.Enum
name
value
class Scope:
 28class Scope:
 29    """
 30    Selection scope.
 31
 32    Attributes:
 33        expression (exp.Select|exp.SetOperation): Root expression of this scope
 34        sources (dict[str, exp.Table|Scope]): Mapping of source name to either
 35            a Table expression or another Scope instance. For example:
 36                SELECT * FROM x                     {"x": Table(this="x")}
 37                SELECT * FROM x AS y                {"y": Table(this="x")}
 38                SELECT * FROM (SELECT ...) AS y     {"y": Scope(...)}
 39        lateral_sources (dict[str, exp.Table|Scope]): Sources from laterals
 40            For example:
 41                SELECT c FROM x LATERAL VIEW EXPLODE (a) AS c;
 42            The LATERAL VIEW EXPLODE gets x as a source.
 43        cte_sources (dict[str, Scope]): Sources from CTES
 44        outer_columns (list[str]): If this is a derived table or CTE, and the outer query
 45            defines a column list for the alias of this scope, this is that list of columns.
 46            For example:
 47                SELECT * FROM (SELECT ...) AS y(col1, col2)
 48            The inner query would have `["col1", "col2"]` for its `outer_columns`
 49        parent (Scope): Parent scope
 50        scope_type (ScopeType): Type of this scope, relative to it's parent
 51        subquery_scopes (list[Scope]): List of all child scopes for subqueries
 52        cte_scopes (list[Scope]): List of all child scopes for CTEs
 53        derived_table_scopes (list[Scope]): List of all child scopes for derived_tables
 54        udtf_scopes (list[Scope]): List of all child scopes for user defined tabular functions
 55        table_scopes (list[Scope]): derived_table_scopes + udtf_scopes, in the order that they're defined
 56        union_scopes (list[Scope, Scope]): If this Scope is for a Union expression, this will be
 57            a list of the left and right child scopes.
 58    """
 59
 60    def __init__(
 61        self,
 62        expression,
 63        sources=None,
 64        outer_columns=None,
 65        parent=None,
 66        scope_type=ScopeType.ROOT,
 67        lateral_sources=None,
 68        cte_sources=None,
 69    ):
 70        self.expression = expression
 71        self.sources = sources or {}
 72        self.lateral_sources = lateral_sources or {}
 73        self.cte_sources = cte_sources or {}
 74        self.sources.update(self.lateral_sources)
 75        self.sources.update(self.cte_sources)
 76        self.outer_columns = outer_columns or []
 77        self.parent = parent
 78        self.scope_type = scope_type
 79        self.subquery_scopes = []
 80        self.derived_table_scopes = []
 81        self.table_scopes = []
 82        self.cte_scopes = []
 83        self.union_scopes = []
 84        self.udtf_scopes = []
 85        self.clear_cache()
 86
 87    def clear_cache(self):
 88        self._collected = False
 89        self._raw_columns = None
 90        self._stars = None
 91        self._derived_tables = None
 92        self._udtfs = None
 93        self._tables = None
 94        self._ctes = None
 95        self._subqueries = None
 96        self._selected_sources = None
 97        self._columns = None
 98        self._external_columns = None
 99        self._join_hints = None
100        self._pivots = None
101        self._references = None
102
103    def branch(
104        self, expression, scope_type, sources=None, cte_sources=None, lateral_sources=None, **kwargs
105    ):
106        """Branch from the current scope to a new, inner scope"""
107        return Scope(
108            expression=expression.unnest(),
109            sources=sources.copy() if sources else None,
110            parent=self,
111            scope_type=scope_type,
112            cte_sources={**self.cte_sources, **(cte_sources or {})},
113            lateral_sources=lateral_sources.copy() if lateral_sources else None,
114            **kwargs,
115        )
116
117    def _collect(self):
118        self._tables = []
119        self._ctes = []
120        self._subqueries = []
121        self._derived_tables = []
122        self._udtfs = []
123        self._raw_columns = []
124        self._stars = []
125        self._join_hints = []
126
127        for node in self.walk(bfs=False):
128            if node is self.expression:
129                continue
130
131            if isinstance(node, exp.Dot) and node.is_star:
132                self._stars.append(node)
133            elif isinstance(node, exp.Column):
134                if isinstance(node.this, exp.Star):
135                    self._stars.append(node)
136                else:
137                    self._raw_columns.append(node)
138            elif isinstance(node, exp.Table) and not isinstance(node.parent, exp.JoinHint):
139                self._tables.append(node)
140            elif isinstance(node, exp.JoinHint):
141                self._join_hints.append(node)
142            elif isinstance(node, exp.UDTF):
143                self._udtfs.append(node)
144            elif isinstance(node, exp.CTE):
145                self._ctes.append(node)
146            elif _is_derived_table(node) and isinstance(
147                node.parent, (exp.From, exp.Join, exp.Subquery)
148            ):
149                self._derived_tables.append(node)
150            elif isinstance(node, exp.UNWRAPPED_QUERIES):
151                self._subqueries.append(node)
152
153        self._collected = True
154
155    def _ensure_collected(self):
156        if not self._collected:
157            self._collect()
158
159    def walk(self, bfs=True, prune=None):
160        return walk_in_scope(self.expression, bfs=bfs, prune=None)
161
162    def find(self, *expression_types, bfs=True):
163        return find_in_scope(self.expression, expression_types, bfs=bfs)
164
165    def find_all(self, *expression_types, bfs=True):
166        return find_all_in_scope(self.expression, expression_types, bfs=bfs)
167
168    def replace(self, old, new):
169        """
170        Replace `old` with `new`.
171
172        This can be used instead of `exp.Expression.replace` to ensure the `Scope` is kept up-to-date.
173
174        Args:
175            old (exp.Expression): old node
176            new (exp.Expression): new node
177        """
178        old.replace(new)
179        self.clear_cache()
180
181    @property
182    def tables(self):
183        """
184        List of tables in this scope.
185
186        Returns:
187            list[exp.Table]: tables
188        """
189        self._ensure_collected()
190        return self._tables
191
192    @property
193    def ctes(self):
194        """
195        List of CTEs in this scope.
196
197        Returns:
198            list[exp.CTE]: ctes
199        """
200        self._ensure_collected()
201        return self._ctes
202
203    @property
204    def derived_tables(self):
205        """
206        List of derived tables in this scope.
207
208        For example:
209            SELECT * FROM (SELECT ...) <- that's a derived table
210
211        Returns:
212            list[exp.Subquery]: derived tables
213        """
214        self._ensure_collected()
215        return self._derived_tables
216
217    @property
218    def udtfs(self):
219        """
220        List of "User Defined Tabular Functions" in this scope.
221
222        Returns:
223            list[exp.UDTF]: UDTFs
224        """
225        self._ensure_collected()
226        return self._udtfs
227
228    @property
229    def subqueries(self):
230        """
231        List of subqueries in this scope.
232
233        For example:
234            SELECT * FROM x WHERE a IN (SELECT ...) <- that's a subquery
235
236        Returns:
237            list[exp.Select | exp.SetOperation]: subqueries
238        """
239        self._ensure_collected()
240        return self._subqueries
241
242    @property
243    def stars(self) -> t.List[exp.Column | exp.Dot]:
244        """
245        List of star expressions (columns or dots) in this scope.
246        """
247        self._ensure_collected()
248        return self._stars
249
250    @property
251    def columns(self):
252        """
253        List of columns in this scope.
254
255        Returns:
256            list[exp.Column]: Column instances in this scope, plus any
257                Columns that reference this scope from correlated subqueries.
258        """
259        if self._columns is None:
260            self._ensure_collected()
261            columns = self._raw_columns
262
263            external_columns = [
264                column
265                for scope in itertools.chain(self.subquery_scopes, self.udtf_scopes)
266                for column in scope.external_columns
267            ]
268
269            named_selects = set(self.expression.named_selects)
270
271            self._columns = []
272            for column in columns + external_columns:
273                ancestor = column.find_ancestor(
274                    exp.Select, exp.Qualify, exp.Order, exp.Having, exp.Hint, exp.Table, exp.Star
275                )
276                if (
277                    not ancestor
278                    or column.table
279                    or isinstance(ancestor, exp.Select)
280                    or (isinstance(ancestor, exp.Table) and not isinstance(ancestor.this, exp.Func))
281                    or (
282                        isinstance(ancestor, exp.Order)
283                        and (
284                            isinstance(ancestor.parent, exp.Window)
285                            or column.name not in named_selects
286                        )
287                    )
288                    or (isinstance(ancestor, exp.Star) and not column.arg_key == "except")
289                ):
290                    self._columns.append(column)
291
292        return self._columns
293
294    @property
295    def selected_sources(self):
296        """
297        Mapping of nodes and sources that are actually selected from in this scope.
298
299        That is, all tables in a schema are selectable at any point. But a
300        table only becomes a selected source if it's included in a FROM or JOIN clause.
301
302        Returns:
303            dict[str, (exp.Table|exp.Select, exp.Table|Scope)]: selected sources and nodes
304        """
305        if self._selected_sources is None:
306            result = {}
307
308            for name, node in self.references:
309                if name in result:
310                    raise OptimizeError(f"Alias already used: {name}")
311                if name in self.sources:
312                    result[name] = (node, self.sources[name])
313
314            self._selected_sources = result
315        return self._selected_sources
316
317    @property
318    def references(self) -> t.List[t.Tuple[str, exp.Expression]]:
319        if self._references is None:
320            self._references = []
321
322            for table in self.tables:
323                self._references.append((table.alias_or_name, table))
324            for expression in itertools.chain(self.derived_tables, self.udtfs):
325                self._references.append(
326                    (
327                        expression.alias,
328                        expression if expression.args.get("pivots") else expression.unnest(),
329                    )
330                )
331
332        return self._references
333
334    @property
335    def external_columns(self):
336        """
337        Columns that appear to reference sources in outer scopes.
338
339        Returns:
340            list[exp.Column]: Column instances that don't reference
341                sources in the current scope.
342        """
343        if self._external_columns is None:
344            if isinstance(self.expression, exp.SetOperation):
345                left, right = self.union_scopes
346                self._external_columns = left.external_columns + right.external_columns
347            else:
348                self._external_columns = [
349                    c for c in self.columns if c.table not in self.selected_sources
350                ]
351
352        return self._external_columns
353
354    @property
355    def unqualified_columns(self):
356        """
357        Unqualified columns in the current scope.
358
359        Returns:
360             list[exp.Column]: Unqualified columns
361        """
362        return [c for c in self.columns if not c.table]
363
364    @property
365    def join_hints(self):
366        """
367        Hints that exist in the scope that reference tables
368
369        Returns:
370            list[exp.JoinHint]: Join hints that are referenced within the scope
371        """
372        if self._join_hints is None:
373            return []
374        return self._join_hints
375
376    @property
377    def pivots(self):
378        if not self._pivots:
379            self._pivots = [
380                pivot for _, node in self.references for pivot in node.args.get("pivots") or []
381            ]
382
383        return self._pivots
384
385    def source_columns(self, source_name):
386        """
387        Get all columns in the current scope for a particular source.
388
389        Args:
390            source_name (str): Name of the source
391        Returns:
392            list[exp.Column]: Column instances that reference `source_name`
393        """
394        return [column for column in self.columns if column.table == source_name]
395
396    @property
397    def is_subquery(self):
398        """Determine if this scope is a subquery"""
399        return self.scope_type == ScopeType.SUBQUERY
400
401    @property
402    def is_derived_table(self):
403        """Determine if this scope is a derived table"""
404        return self.scope_type == ScopeType.DERIVED_TABLE
405
406    @property
407    def is_union(self):
408        """Determine if this scope is a union"""
409        return self.scope_type == ScopeType.UNION
410
411    @property
412    def is_cte(self):
413        """Determine if this scope is a common table expression"""
414        return self.scope_type == ScopeType.CTE
415
416    @property
417    def is_root(self):
418        """Determine if this is the root scope"""
419        return self.scope_type == ScopeType.ROOT
420
421    @property
422    def is_udtf(self):
423        """Determine if this scope is a UDTF (User Defined Table Function)"""
424        return self.scope_type == ScopeType.UDTF
425
426    @property
427    def is_correlated_subquery(self):
428        """Determine if this scope is a correlated subquery"""
429        return bool(
430            (self.is_subquery or (self.parent and isinstance(self.parent.expression, exp.Lateral)))
431            and self.external_columns
432        )
433
434    def rename_source(self, old_name, new_name):
435        """Rename a source in this scope"""
436        columns = self.sources.pop(old_name or "", [])
437        self.sources[new_name] = columns
438
439    def add_source(self, name, source):
440        """Add a source to this scope"""
441        self.sources[name] = source
442        self.clear_cache()
443
444    def remove_source(self, name):
445        """Remove a source from this scope"""
446        self.sources.pop(name, None)
447        self.clear_cache()
448
449    def __repr__(self):
450        return f"Scope<{self.expression.sql()}>"
451
452    def traverse(self):
453        """
454        Traverse the scope tree from this node.
455
456        Yields:
457            Scope: scope instances in depth-first-search post-order
458        """
459        stack = [self]
460        result = []
461        while stack:
462            scope = stack.pop()
463            result.append(scope)
464            stack.extend(
465                itertools.chain(
466                    scope.cte_scopes,
467                    scope.union_scopes,
468                    scope.table_scopes,
469                    scope.subquery_scopes,
470                )
471            )
472
473        yield from reversed(result)
474
475    def ref_count(self):
476        """
477        Count the number of times each scope in this tree is referenced.
478
479        Returns:
480            dict[int, int]: Mapping of Scope instance ID to reference count
481        """
482        scope_ref_count = defaultdict(lambda: 0)
483
484        for scope in self.traverse():
485            for _, source in scope.selected_sources.values():
486                scope_ref_count[id(source)] += 1
487
488        return scope_ref_count

Selection scope.

Attributes:
  • expression (exp.Select|exp.SetOperation): Root expression of this scope
  • sources (dict[str, exp.Table|Scope]): Mapping of source name to either a Table expression or another Scope instance. For example: SELECT * FROM x {"x": Table(this="x")} SELECT * FROM x AS y {"y": Table(this="x")} SELECT * FROM (SELECT ...) AS y {"y": Scope(...)}
  • lateral_sources (dict[str, exp.Table|Scope]): Sources from laterals For example: SELECT c FROM x LATERAL VIEW EXPLODE (a) AS c; The LATERAL VIEW EXPLODE gets x as a source.
  • cte_sources (dict[str, Scope]): Sources from CTES
  • outer_columns (list[str]): If this is a derived table or CTE, and the outer query defines a column list for the alias of this scope, this is that list of columns. For example: SELECT * FROM (SELECT ...) AS y(col1, col2) The inner query would have ["col1", "col2"] for its outer_columns
  • parent (Scope): Parent scope
  • scope_type (ScopeType): Type of this scope, relative to it's parent
  • subquery_scopes (list[Scope]): List of all child scopes for subqueries
  • cte_scopes (list[Scope]): List of all child scopes for CTEs
  • derived_table_scopes (list[Scope]): List of all child scopes for derived_tables
  • udtf_scopes (list[Scope]): List of all child scopes for user defined tabular functions
  • table_scopes (list[Scope]): derived_table_scopes + udtf_scopes, in the order that they're defined
  • union_scopes (list[Scope, Scope]): If this Scope is for a Union expression, this will be a list of the left and right child scopes.
Scope( expression, sources=None, outer_columns=None, parent=None, scope_type=<ScopeType.ROOT: 1>, lateral_sources=None, cte_sources=None)
60    def __init__(
61        self,
62        expression,
63        sources=None,
64        outer_columns=None,
65        parent=None,
66        scope_type=ScopeType.ROOT,
67        lateral_sources=None,
68        cte_sources=None,
69    ):
70        self.expression = expression
71        self.sources = sources or {}
72        self.lateral_sources = lateral_sources or {}
73        self.cte_sources = cte_sources or {}
74        self.sources.update(self.lateral_sources)
75        self.sources.update(self.cte_sources)
76        self.outer_columns = outer_columns or []
77        self.parent = parent
78        self.scope_type = scope_type
79        self.subquery_scopes = []
80        self.derived_table_scopes = []
81        self.table_scopes = []
82        self.cte_scopes = []
83        self.union_scopes = []
84        self.udtf_scopes = []
85        self.clear_cache()
expression
sources
lateral_sources
cte_sources
outer_columns
parent
scope_type
subquery_scopes
derived_table_scopes
table_scopes
cte_scopes
union_scopes
udtf_scopes
def clear_cache(self):
 87    def clear_cache(self):
 88        self._collected = False
 89        self._raw_columns = None
 90        self._stars = None
 91        self._derived_tables = None
 92        self._udtfs = None
 93        self._tables = None
 94        self._ctes = None
 95        self._subqueries = None
 96        self._selected_sources = None
 97        self._columns = None
 98        self._external_columns = None
 99        self._join_hints = None
100        self._pivots = None
101        self._references = None
def branch( self, expression, scope_type, sources=None, cte_sources=None, lateral_sources=None, **kwargs):
103    def branch(
104        self, expression, scope_type, sources=None, cte_sources=None, lateral_sources=None, **kwargs
105    ):
106        """Branch from the current scope to a new, inner scope"""
107        return Scope(
108            expression=expression.unnest(),
109            sources=sources.copy() if sources else None,
110            parent=self,
111            scope_type=scope_type,
112            cte_sources={**self.cte_sources, **(cte_sources or {})},
113            lateral_sources=lateral_sources.copy() if lateral_sources else None,
114            **kwargs,
115        )

Branch from the current scope to a new, inner scope

def walk(self, bfs=True, prune=None):
159    def walk(self, bfs=True, prune=None):
160        return walk_in_scope(self.expression, bfs=bfs, prune=None)
def find(self, *expression_types, bfs=True):
162    def find(self, *expression_types, bfs=True):
163        return find_in_scope(self.expression, expression_types, bfs=bfs)
def find_all(self, *expression_types, bfs=True):
165    def find_all(self, *expression_types, bfs=True):
166        return find_all_in_scope(self.expression, expression_types, bfs=bfs)
def replace(self, old, new):
168    def replace(self, old, new):
169        """
170        Replace `old` with `new`.
171
172        This can be used instead of `exp.Expression.replace` to ensure the `Scope` is kept up-to-date.
173
174        Args:
175            old (exp.Expression): old node
176            new (exp.Expression): new node
177        """
178        old.replace(new)
179        self.clear_cache()

Replace old with new.

This can be used instead of exp.Expression.replace to ensure the Scope is kept up-to-date.

Arguments:
  • old (exp.Expression): old node
  • new (exp.Expression): new node
tables
181    @property
182    def tables(self):
183        """
184        List of tables in this scope.
185
186        Returns:
187            list[exp.Table]: tables
188        """
189        self._ensure_collected()
190        return self._tables

List of tables in this scope.

Returns:

list[exp.Table]: tables

ctes
192    @property
193    def ctes(self):
194        """
195        List of CTEs in this scope.
196
197        Returns:
198            list[exp.CTE]: ctes
199        """
200        self._ensure_collected()
201        return self._ctes

List of CTEs in this scope.

Returns:

list[exp.CTE]: ctes

derived_tables
203    @property
204    def derived_tables(self):
205        """
206        List of derived tables in this scope.
207
208        For example:
209            SELECT * FROM (SELECT ...) <- that's a derived table
210
211        Returns:
212            list[exp.Subquery]: derived tables
213        """
214        self._ensure_collected()
215        return self._derived_tables

List of derived tables in this scope.

For example:

SELECT * FROM (SELECT ...) <- that's a derived table

Returns:

list[exp.Subquery]: derived tables

udtfs
217    @property
218    def udtfs(self):
219        """
220        List of "User Defined Tabular Functions" in this scope.
221
222        Returns:
223            list[exp.UDTF]: UDTFs
224        """
225        self._ensure_collected()
226        return self._udtfs

List of "User Defined Tabular Functions" in this scope.

Returns:

list[exp.UDTF]: UDTFs

subqueries
228    @property
229    def subqueries(self):
230        """
231        List of subqueries in this scope.
232
233        For example:
234            SELECT * FROM x WHERE a IN (SELECT ...) <- that's a subquery
235
236        Returns:
237            list[exp.Select | exp.SetOperation]: subqueries
238        """
239        self._ensure_collected()
240        return self._subqueries

List of subqueries in this scope.

For example:

SELECT * FROM x WHERE a IN (SELECT ...) <- that's a subquery

Returns:

list[exp.Select | exp.SetOperation]: subqueries

242    @property
243    def stars(self) -> t.List[exp.Column | exp.Dot]:
244        """
245        List of star expressions (columns or dots) in this scope.
246        """
247        self._ensure_collected()
248        return self._stars

List of star expressions (columns or dots) in this scope.

columns
250    @property
251    def columns(self):
252        """
253        List of columns in this scope.
254
255        Returns:
256            list[exp.Column]: Column instances in this scope, plus any
257                Columns that reference this scope from correlated subqueries.
258        """
259        if self._columns is None:
260            self._ensure_collected()
261            columns = self._raw_columns
262
263            external_columns = [
264                column
265                for scope in itertools.chain(self.subquery_scopes, self.udtf_scopes)
266                for column in scope.external_columns
267            ]
268
269            named_selects = set(self.expression.named_selects)
270
271            self._columns = []
272            for column in columns + external_columns:
273                ancestor = column.find_ancestor(
274                    exp.Select, exp.Qualify, exp.Order, exp.Having, exp.Hint, exp.Table, exp.Star
275                )
276                if (
277                    not ancestor
278                    or column.table
279                    or isinstance(ancestor, exp.Select)
280                    or (isinstance(ancestor, exp.Table) and not isinstance(ancestor.this, exp.Func))
281                    or (
282                        isinstance(ancestor, exp.Order)
283                        and (
284                            isinstance(ancestor.parent, exp.Window)
285                            or column.name not in named_selects
286                        )
287                    )
288                    or (isinstance(ancestor, exp.Star) and not column.arg_key == "except")
289                ):
290                    self._columns.append(column)
291
292        return self._columns

List of columns in this scope.

Returns:

list[exp.Column]: Column instances in this scope, plus any Columns that reference this scope from correlated subqueries.

selected_sources
294    @property
295    def selected_sources(self):
296        """
297        Mapping of nodes and sources that are actually selected from in this scope.
298
299        That is, all tables in a schema are selectable at any point. But a
300        table only becomes a selected source if it's included in a FROM or JOIN clause.
301
302        Returns:
303            dict[str, (exp.Table|exp.Select, exp.Table|Scope)]: selected sources and nodes
304        """
305        if self._selected_sources is None:
306            result = {}
307
308            for name, node in self.references:
309                if name in result:
310                    raise OptimizeError(f"Alias already used: {name}")
311                if name in self.sources:
312                    result[name] = (node, self.sources[name])
313
314            self._selected_sources = result
315        return self._selected_sources

Mapping of nodes and sources that are actually selected from in this scope.

That is, all tables in a schema are selectable at any point. But a table only becomes a selected source if it's included in a FROM or JOIN clause.

Returns:

dict[str, (exp.Table|exp.Select, exp.Table|Scope)]: selected sources and nodes

references: List[Tuple[str, sqlglot.expressions.Expression]]
317    @property
318    def references(self) -> t.List[t.Tuple[str, exp.Expression]]:
319        if self._references is None:
320            self._references = []
321
322            for table in self.tables:
323                self._references.append((table.alias_or_name, table))
324            for expression in itertools.chain(self.derived_tables, self.udtfs):
325                self._references.append(
326                    (
327                        expression.alias,
328                        expression if expression.args.get("pivots") else expression.unnest(),
329                    )
330                )
331
332        return self._references
external_columns
334    @property
335    def external_columns(self):
336        """
337        Columns that appear to reference sources in outer scopes.
338
339        Returns:
340            list[exp.Column]: Column instances that don't reference
341                sources in the current scope.
342        """
343        if self._external_columns is None:
344            if isinstance(self.expression, exp.SetOperation):
345                left, right = self.union_scopes
346                self._external_columns = left.external_columns + right.external_columns
347            else:
348                self._external_columns = [
349                    c for c in self.columns if c.table not in self.selected_sources
350                ]
351
352        return self._external_columns

Columns that appear to reference sources in outer scopes.

Returns:

list[exp.Column]: Column instances that don't reference sources in the current scope.

unqualified_columns
354    @property
355    def unqualified_columns(self):
356        """
357        Unqualified columns in the current scope.
358
359        Returns:
360             list[exp.Column]: Unqualified columns
361        """
362        return [c for c in self.columns if not c.table]

Unqualified columns in the current scope.

Returns:

list[exp.Column]: Unqualified columns

join_hints
364    @property
365    def join_hints(self):
366        """
367        Hints that exist in the scope that reference tables
368
369        Returns:
370            list[exp.JoinHint]: Join hints that are referenced within the scope
371        """
372        if self._join_hints is None:
373            return []
374        return self._join_hints

Hints that exist in the scope that reference tables

Returns:

list[exp.JoinHint]: Join hints that are referenced within the scope

pivots
376    @property
377    def pivots(self):
378        if not self._pivots:
379            self._pivots = [
380                pivot for _, node in self.references for pivot in node.args.get("pivots") or []
381            ]
382
383        return self._pivots
def source_columns(self, source_name):
385    def source_columns(self, source_name):
386        """
387        Get all columns in the current scope for a particular source.
388
389        Args:
390            source_name (str): Name of the source
391        Returns:
392            list[exp.Column]: Column instances that reference `source_name`
393        """
394        return [column for column in self.columns if column.table == source_name]

Get all columns in the current scope for a particular source.

Arguments:
  • source_name (str): Name of the source
Returns:

list[exp.Column]: Column instances that reference source_name

is_subquery
396    @property
397    def is_subquery(self):
398        """Determine if this scope is a subquery"""
399        return self.scope_type == ScopeType.SUBQUERY

Determine if this scope is a subquery

is_derived_table
401    @property
402    def is_derived_table(self):
403        """Determine if this scope is a derived table"""
404        return self.scope_type == ScopeType.DERIVED_TABLE

Determine if this scope is a derived table

is_union
406    @property
407    def is_union(self):
408        """Determine if this scope is a union"""
409        return self.scope_type == ScopeType.UNION

Determine if this scope is a union

is_cte
411    @property
412    def is_cte(self):
413        """Determine if this scope is a common table expression"""
414        return self.scope_type == ScopeType.CTE

Determine if this scope is a common table expression

is_root
416    @property
417    def is_root(self):
418        """Determine if this is the root scope"""
419        return self.scope_type == ScopeType.ROOT

Determine if this is the root scope

is_udtf
421    @property
422    def is_udtf(self):
423        """Determine if this scope is a UDTF (User Defined Table Function)"""
424        return self.scope_type == ScopeType.UDTF

Determine if this scope is a UDTF (User Defined Table Function)

is_correlated_subquery
426    @property
427    def is_correlated_subquery(self):
428        """Determine if this scope is a correlated subquery"""
429        return bool(
430            (self.is_subquery or (self.parent and isinstance(self.parent.expression, exp.Lateral)))
431            and self.external_columns
432        )

Determine if this scope is a correlated subquery

def rename_source(self, old_name, new_name):
434    def rename_source(self, old_name, new_name):
435        """Rename a source in this scope"""
436        columns = self.sources.pop(old_name or "", [])
437        self.sources[new_name] = columns

Rename a source in this scope

def add_source(self, name, source):
439    def add_source(self, name, source):
440        """Add a source to this scope"""
441        self.sources[name] = source
442        self.clear_cache()

Add a source to this scope

def remove_source(self, name):
444    def remove_source(self, name):
445        """Remove a source from this scope"""
446        self.sources.pop(name, None)
447        self.clear_cache()

Remove a source from this scope

def traverse(self):
452    def traverse(self):
453        """
454        Traverse the scope tree from this node.
455
456        Yields:
457            Scope: scope instances in depth-first-search post-order
458        """
459        stack = [self]
460        result = []
461        while stack:
462            scope = stack.pop()
463            result.append(scope)
464            stack.extend(
465                itertools.chain(
466                    scope.cte_scopes,
467                    scope.union_scopes,
468                    scope.table_scopes,
469                    scope.subquery_scopes,
470                )
471            )
472
473        yield from reversed(result)

Traverse the scope tree from this node.

Yields:

Scope: scope instances in depth-first-search post-order

def ref_count(self):
475    def ref_count(self):
476        """
477        Count the number of times each scope in this tree is referenced.
478
479        Returns:
480            dict[int, int]: Mapping of Scope instance ID to reference count
481        """
482        scope_ref_count = defaultdict(lambda: 0)
483
484        for scope in self.traverse():
485            for _, source in scope.selected_sources.values():
486                scope_ref_count[id(source)] += 1
487
488        return scope_ref_count

Count the number of times each scope in this tree is referenced.

Returns:

dict[int, int]: Mapping of Scope instance ID to reference count

def traverse_scope( expression: sqlglot.expressions.Expression) -> List[Scope]:
491def traverse_scope(expression: exp.Expression) -> t.List[Scope]:
492    """
493    Traverse an expression by its "scopes".
494
495    "Scope" represents the current context of a Select statement.
496
497    This is helpful for optimizing queries, where we need more information than
498    the expression tree itself. For example, we might care about the source
499    names within a subquery. Returns a list because a generator could result in
500    incomplete properties which is confusing.
501
502    Examples:
503        >>> import sqlglot
504        >>> expression = sqlglot.parse_one("SELECT a FROM (SELECT a FROM x) AS y")
505        >>> scopes = traverse_scope(expression)
506        >>> scopes[0].expression.sql(), list(scopes[0].sources)
507        ('SELECT a FROM x', ['x'])
508        >>> scopes[1].expression.sql(), list(scopes[1].sources)
509        ('SELECT a FROM (SELECT a FROM x) AS y', ['y'])
510
511    Args:
512        expression: Expression to traverse
513
514    Returns:
515        A list of the created scope instances
516    """
517    if isinstance(expression, TRAVERSABLES):
518        return list(_traverse_scope(Scope(expression)))
519    return []

Traverse an expression by its "scopes".

"Scope" represents the current context of a Select statement.

This is helpful for optimizing queries, where we need more information than the expression tree itself. For example, we might care about the source names within a subquery. Returns a list because a generator could result in incomplete properties which is confusing.

Examples:
>>> import sqlglot
>>> expression = sqlglot.parse_one("SELECT a FROM (SELECT a FROM x) AS y")
>>> scopes = traverse_scope(expression)
>>> scopes[0].expression.sql(), list(scopes[0].sources)
('SELECT a FROM x', ['x'])
>>> scopes[1].expression.sql(), list(scopes[1].sources)
('SELECT a FROM (SELECT a FROM x) AS y', ['y'])
Arguments:
  • expression: Expression to traverse
Returns:

A list of the created scope instances

def build_scope( expression: sqlglot.expressions.Expression) -> Optional[Scope]:
522def build_scope(expression: exp.Expression) -> t.Optional[Scope]:
523    """
524    Build a scope tree.
525
526    Args:
527        expression: Expression to build the scope tree for.
528
529    Returns:
530        The root scope
531    """
532    return seq_get(traverse_scope(expression), -1)

Build a scope tree.

Arguments:
  • expression: Expression to build the scope tree for.
Returns:

The root scope

def walk_in_scope(expression, bfs=True, prune=None):
779def walk_in_scope(expression, bfs=True, prune=None):
780    """
781    Returns a generator object which visits all nodes in the syntrax tree, stopping at
782    nodes that start child scopes.
783
784    Args:
785        expression (exp.Expression):
786        bfs (bool): if set to True the BFS traversal order will be applied,
787            otherwise the DFS traversal will be used instead.
788        prune ((node, parent, arg_key) -> bool): callable that returns True if
789            the generator should stop traversing this branch of the tree.
790
791    Yields:
792        tuple[exp.Expression, Optional[exp.Expression], str]: node, parent, arg key
793    """
794    # We'll use this variable to pass state into the dfs generator.
795    # Whenever we set it to True, we exclude a subtree from traversal.
796    crossed_scope_boundary = False
797
798    for node in expression.walk(
799        bfs=bfs, prune=lambda n: crossed_scope_boundary or (prune and prune(n))
800    ):
801        crossed_scope_boundary = False
802
803        yield node
804
805        if node is expression:
806            continue
807        if (
808            isinstance(node, exp.CTE)
809            or (
810                isinstance(node.parent, (exp.From, exp.Join, exp.Subquery))
811                and (_is_derived_table(node) or isinstance(node, exp.UDTF))
812            )
813            or isinstance(node, exp.UNWRAPPED_QUERIES)
814        ):
815            crossed_scope_boundary = True
816
817            if isinstance(node, (exp.Subquery, exp.UDTF)):
818                # The following args are not actually in the inner scope, so we should visit them
819                for key in ("joins", "laterals", "pivots"):
820                    for arg in node.args.get(key) or []:
821                        yield from walk_in_scope(arg, bfs=bfs)

Returns a generator object which visits all nodes in the syntrax tree, stopping at nodes that start child scopes.

Arguments:
  • expression (exp.Expression):
  • bfs (bool): if set to True the BFS traversal order will be applied, otherwise the DFS traversal will be used instead.
  • prune ((node, parent, arg_key) -> bool): callable that returns True if the generator should stop traversing this branch of the tree.
Yields:

tuple[exp.Expression, Optional[exp.Expression], str]: node, parent, arg key

def find_all_in_scope(expression, expression_types, bfs=True):
824def find_all_in_scope(expression, expression_types, bfs=True):
825    """
826    Returns a generator object which visits all nodes in this scope and only yields those that
827    match at least one of the specified expression types.
828
829    This does NOT traverse into subscopes.
830
831    Args:
832        expression (exp.Expression):
833        expression_types (tuple[type]|type): the expression type(s) to match.
834        bfs (bool): True to use breadth-first search, False to use depth-first.
835
836    Yields:
837        exp.Expression: nodes
838    """
839    for expression in walk_in_scope(expression, bfs=bfs):
840        if isinstance(expression, tuple(ensure_collection(expression_types))):
841            yield expression

Returns a generator object which visits all nodes in this scope and only yields those that match at least one of the specified expression types.

This does NOT traverse into subscopes.

Arguments:
  • expression (exp.Expression):
  • expression_types (tuple[type]|type): the expression type(s) to match.
  • bfs (bool): True to use breadth-first search, False to use depth-first.
Yields:

exp.Expression: nodes

def find_in_scope(expression, expression_types, bfs=True):
844def find_in_scope(expression, expression_types, bfs=True):
845    """
846    Returns the first node in this scope which matches at least one of the specified types.
847
848    This does NOT traverse into subscopes.
849
850    Args:
851        expression (exp.Expression):
852        expression_types (tuple[type]|type): the expression type(s) to match.
853        bfs (bool): True to use breadth-first search, False to use depth-first.
854
855    Returns:
856        exp.Expression: the node which matches the criteria or None if no node matching
857        the criteria was found.
858    """
859    return next(find_all_in_scope(expression, expression_types, bfs=bfs), None)

Returns the first node in this scope which matches at least one of the specified types.

This does NOT traverse into subscopes.

Arguments:
  • expression (exp.Expression):
  • expression_types (tuple[type]|type): the expression type(s) to match.
  • bfs (bool): True to use breadth-first search, False to use depth-first.
Returns:

exp.Expression: the node which matches the criteria or None if no node matching the criteria was found.