sqlglot.optimizer.scope
1from __future__ import annotations 2 3import itertools 4import logging 5import typing as t 6from collections import defaultdict 7from enum import Enum, auto 8 9from sqlglot import exp 10from sqlglot.errors import OptimizeError 11from sqlglot.helper import ensure_collection, find_new_name, seq_get 12 13logger = logging.getLogger("sqlglot") 14 15TRAVERSABLES = (exp.Query, exp.DDL, exp.DML) 16 17 18class ScopeType(Enum): 19 ROOT = auto() 20 SUBQUERY = auto() 21 DERIVED_TABLE = auto() 22 CTE = auto() 23 UNION = auto() 24 UDTF = auto() 25 26 27class Scope: 28 """ 29 Selection scope. 30 31 Attributes: 32 expression (exp.Select|exp.SetOperation): Root expression of this scope 33 sources (dict[str, exp.Table|Scope]): Mapping of source name to either 34 a Table expression or another Scope instance. For example: 35 SELECT * FROM x {"x": Table(this="x")} 36 SELECT * FROM x AS y {"y": Table(this="x")} 37 SELECT * FROM (SELECT ...) AS y {"y": Scope(...)} 38 lateral_sources (dict[str, exp.Table|Scope]): Sources from laterals 39 For example: 40 SELECT c FROM x LATERAL VIEW EXPLODE (a) AS c; 41 The LATERAL VIEW EXPLODE gets x as a source. 42 cte_sources (dict[str, Scope]): Sources from CTES 43 outer_columns (list[str]): If this is a derived table or CTE, and the outer query 44 defines a column list for the alias of this scope, this is that list of columns. 45 For example: 46 SELECT * FROM (SELECT ...) AS y(col1, col2) 47 The inner query would have `["col1", "col2"]` for its `outer_columns` 48 parent (Scope): Parent scope 49 scope_type (ScopeType): Type of this scope, relative to it's parent 50 subquery_scopes (list[Scope]): List of all child scopes for subqueries 51 cte_scopes (list[Scope]): List of all child scopes for CTEs 52 derived_table_scopes (list[Scope]): List of all child scopes for derived_tables 53 udtf_scopes (list[Scope]): List of all child scopes for user defined tabular functions 54 table_scopes (list[Scope]): derived_table_scopes + udtf_scopes, in the order that they're defined 55 union_scopes (list[Scope, Scope]): If this Scope is for a Union expression, this will be 56 a list of the left and right child scopes. 57 """ 58 59 def __init__( 60 self, 61 expression, 62 sources=None, 63 outer_columns=None, 64 parent=None, 65 scope_type=ScopeType.ROOT, 66 lateral_sources=None, 67 cte_sources=None, 68 ): 69 self.expression = expression 70 self.sources = sources or {} 71 self.lateral_sources = lateral_sources or {} 72 self.cte_sources = cte_sources or {} 73 self.sources.update(self.lateral_sources) 74 self.sources.update(self.cte_sources) 75 self.outer_columns = outer_columns or [] 76 self.parent = parent 77 self.scope_type = scope_type 78 self.subquery_scopes = [] 79 self.derived_table_scopes = [] 80 self.table_scopes = [] 81 self.cte_scopes = [] 82 self.union_scopes = [] 83 self.udtf_scopes = [] 84 self.clear_cache() 85 86 def clear_cache(self): 87 self._collected = False 88 self._raw_columns = None 89 self._stars = None 90 self._derived_tables = None 91 self._udtfs = None 92 self._tables = None 93 self._ctes = None 94 self._subqueries = None 95 self._selected_sources = None 96 self._columns = None 97 self._external_columns = None 98 self._join_hints = None 99 self._pivots = None 100 self._references = None 101 102 def branch( 103 self, expression, scope_type, sources=None, cte_sources=None, lateral_sources=None, **kwargs 104 ): 105 """Branch from the current scope to a new, inner scope""" 106 return Scope( 107 expression=expression.unnest(), 108 sources=sources.copy() if sources else None, 109 parent=self, 110 scope_type=scope_type, 111 cte_sources={**self.cte_sources, **(cte_sources or {})}, 112 lateral_sources=lateral_sources.copy() if lateral_sources else None, 113 **kwargs, 114 ) 115 116 def _collect(self): 117 self._tables = [] 118 self._ctes = [] 119 self._subqueries = [] 120 self._derived_tables = [] 121 self._udtfs = [] 122 self._raw_columns = [] 123 self._stars = [] 124 self._join_hints = [] 125 126 for node in self.walk(bfs=False): 127 if node is self.expression: 128 continue 129 130 if isinstance(node, exp.Dot) and node.is_star: 131 self._stars.append(node) 132 elif isinstance(node, exp.Column): 133 if isinstance(node.this, exp.Star): 134 self._stars.append(node) 135 else: 136 self._raw_columns.append(node) 137 elif isinstance(node, exp.Table) and not isinstance(node.parent, exp.JoinHint): 138 self._tables.append(node) 139 elif isinstance(node, exp.JoinHint): 140 self._join_hints.append(node) 141 elif isinstance(node, exp.UDTF): 142 self._udtfs.append(node) 143 elif isinstance(node, exp.CTE): 144 self._ctes.append(node) 145 elif _is_derived_table(node) and isinstance( 146 node.parent, (exp.From, exp.Join, exp.Subquery) 147 ): 148 self._derived_tables.append(node) 149 elif isinstance(node, exp.UNWRAPPED_QUERIES): 150 self._subqueries.append(node) 151 152 self._collected = True 153 154 def _ensure_collected(self): 155 if not self._collected: 156 self._collect() 157 158 def walk(self, bfs=True, prune=None): 159 return walk_in_scope(self.expression, bfs=bfs, prune=None) 160 161 def find(self, *expression_types, bfs=True): 162 return find_in_scope(self.expression, expression_types, bfs=bfs) 163 164 def find_all(self, *expression_types, bfs=True): 165 return find_all_in_scope(self.expression, expression_types, bfs=bfs) 166 167 def replace(self, old, new): 168 """ 169 Replace `old` with `new`. 170 171 This can be used instead of `exp.Expression.replace` to ensure the `Scope` is kept up-to-date. 172 173 Args: 174 old (exp.Expression): old node 175 new (exp.Expression): new node 176 """ 177 old.replace(new) 178 self.clear_cache() 179 180 @property 181 def tables(self): 182 """ 183 List of tables in this scope. 184 185 Returns: 186 list[exp.Table]: tables 187 """ 188 self._ensure_collected() 189 return self._tables 190 191 @property 192 def ctes(self): 193 """ 194 List of CTEs in this scope. 195 196 Returns: 197 list[exp.CTE]: ctes 198 """ 199 self._ensure_collected() 200 return self._ctes 201 202 @property 203 def derived_tables(self): 204 """ 205 List of derived tables in this scope. 206 207 For example: 208 SELECT * FROM (SELECT ...) <- that's a derived table 209 210 Returns: 211 list[exp.Subquery]: derived tables 212 """ 213 self._ensure_collected() 214 return self._derived_tables 215 216 @property 217 def udtfs(self): 218 """ 219 List of "User Defined Tabular Functions" in this scope. 220 221 Returns: 222 list[exp.UDTF]: UDTFs 223 """ 224 self._ensure_collected() 225 return self._udtfs 226 227 @property 228 def subqueries(self): 229 """ 230 List of subqueries in this scope. 231 232 For example: 233 SELECT * FROM x WHERE a IN (SELECT ...) <- that's a subquery 234 235 Returns: 236 list[exp.Select | exp.SetOperation]: subqueries 237 """ 238 self._ensure_collected() 239 return self._subqueries 240 241 @property 242 def stars(self) -> t.List[exp.Column | exp.Dot]: 243 """ 244 List of star expressions (columns or dots) in this scope. 245 """ 246 self._ensure_collected() 247 return self._stars 248 249 @property 250 def columns(self): 251 """ 252 List of columns in this scope. 253 254 Returns: 255 list[exp.Column]: Column instances in this scope, plus any 256 Columns that reference this scope from correlated subqueries. 257 """ 258 if self._columns is None: 259 self._ensure_collected() 260 columns = self._raw_columns 261 262 external_columns = [ 263 column 264 for scope in itertools.chain(self.subquery_scopes, self.udtf_scopes) 265 for column in scope.external_columns 266 ] 267 268 named_selects = set(self.expression.named_selects) 269 270 self._columns = [] 271 for column in columns + external_columns: 272 ancestor = column.find_ancestor( 273 exp.Select, exp.Qualify, exp.Order, exp.Having, exp.Hint, exp.Table, exp.Star 274 ) 275 if ( 276 not ancestor 277 or column.table 278 or isinstance(ancestor, exp.Select) 279 or (isinstance(ancestor, exp.Table) and not isinstance(ancestor.this, exp.Func)) 280 or ( 281 isinstance(ancestor, exp.Order) 282 and ( 283 isinstance(ancestor.parent, exp.Window) 284 or column.name not in named_selects 285 ) 286 ) 287 or (isinstance(ancestor, exp.Star) and not column.arg_key == "except") 288 ): 289 self._columns.append(column) 290 291 return self._columns 292 293 @property 294 def selected_sources(self): 295 """ 296 Mapping of nodes and sources that are actually selected from in this scope. 297 298 That is, all tables in a schema are selectable at any point. But a 299 table only becomes a selected source if it's included in a FROM or JOIN clause. 300 301 Returns: 302 dict[str, (exp.Table|exp.Select, exp.Table|Scope)]: selected sources and nodes 303 """ 304 if self._selected_sources is None: 305 result = {} 306 307 for name, node in self.references: 308 if name in result: 309 raise OptimizeError(f"Alias already used: {name}") 310 if name in self.sources: 311 result[name] = (node, self.sources[name]) 312 313 self._selected_sources = result 314 return self._selected_sources 315 316 @property 317 def references(self) -> t.List[t.Tuple[str, exp.Expression]]: 318 if self._references is None: 319 self._references = [] 320 321 for table in self.tables: 322 self._references.append((table.alias_or_name, table)) 323 for expression in itertools.chain(self.derived_tables, self.udtfs): 324 self._references.append( 325 ( 326 expression.alias, 327 expression if expression.args.get("pivots") else expression.unnest(), 328 ) 329 ) 330 331 return self._references 332 333 @property 334 def external_columns(self): 335 """ 336 Columns that appear to reference sources in outer scopes. 337 338 Returns: 339 list[exp.Column]: Column instances that don't reference 340 sources in the current scope. 341 """ 342 if self._external_columns is None: 343 if isinstance(self.expression, exp.SetOperation): 344 left, right = self.union_scopes 345 self._external_columns = left.external_columns + right.external_columns 346 else: 347 self._external_columns = [ 348 c for c in self.columns if c.table not in self.selected_sources 349 ] 350 351 return self._external_columns 352 353 @property 354 def unqualified_columns(self): 355 """ 356 Unqualified columns in the current scope. 357 358 Returns: 359 list[exp.Column]: Unqualified columns 360 """ 361 return [c for c in self.columns if not c.table] 362 363 @property 364 def join_hints(self): 365 """ 366 Hints that exist in the scope that reference tables 367 368 Returns: 369 list[exp.JoinHint]: Join hints that are referenced within the scope 370 """ 371 if self._join_hints is None: 372 return [] 373 return self._join_hints 374 375 @property 376 def pivots(self): 377 if not self._pivots: 378 self._pivots = [ 379 pivot for _, node in self.references for pivot in node.args.get("pivots") or [] 380 ] 381 382 return self._pivots 383 384 def source_columns(self, source_name): 385 """ 386 Get all columns in the current scope for a particular source. 387 388 Args: 389 source_name (str): Name of the source 390 Returns: 391 list[exp.Column]: Column instances that reference `source_name` 392 """ 393 return [column for column in self.columns if column.table == source_name] 394 395 @property 396 def is_subquery(self): 397 """Determine if this scope is a subquery""" 398 return self.scope_type == ScopeType.SUBQUERY 399 400 @property 401 def is_derived_table(self): 402 """Determine if this scope is a derived table""" 403 return self.scope_type == ScopeType.DERIVED_TABLE 404 405 @property 406 def is_union(self): 407 """Determine if this scope is a union""" 408 return self.scope_type == ScopeType.UNION 409 410 @property 411 def is_cte(self): 412 """Determine if this scope is a common table expression""" 413 return self.scope_type == ScopeType.CTE 414 415 @property 416 def is_root(self): 417 """Determine if this is the root scope""" 418 return self.scope_type == ScopeType.ROOT 419 420 @property 421 def is_udtf(self): 422 """Determine if this scope is a UDTF (User Defined Table Function)""" 423 return self.scope_type == ScopeType.UDTF 424 425 @property 426 def is_correlated_subquery(self): 427 """Determine if this scope is a correlated subquery""" 428 return bool( 429 (self.is_subquery or (self.parent and isinstance(self.parent.expression, exp.Lateral))) 430 and self.external_columns 431 ) 432 433 def rename_source(self, old_name, new_name): 434 """Rename a source in this scope""" 435 columns = self.sources.pop(old_name or "", []) 436 self.sources[new_name] = columns 437 438 def add_source(self, name, source): 439 """Add a source to this scope""" 440 self.sources[name] = source 441 self.clear_cache() 442 443 def remove_source(self, name): 444 """Remove a source from this scope""" 445 self.sources.pop(name, None) 446 self.clear_cache() 447 448 def __repr__(self): 449 return f"Scope<{self.expression.sql()}>" 450 451 def traverse(self): 452 """ 453 Traverse the scope tree from this node. 454 455 Yields: 456 Scope: scope instances in depth-first-search post-order 457 """ 458 stack = [self] 459 result = [] 460 while stack: 461 scope = stack.pop() 462 result.append(scope) 463 stack.extend( 464 itertools.chain( 465 scope.cte_scopes, 466 scope.union_scopes, 467 scope.table_scopes, 468 scope.subquery_scopes, 469 ) 470 ) 471 472 yield from reversed(result) 473 474 def ref_count(self): 475 """ 476 Count the number of times each scope in this tree is referenced. 477 478 Returns: 479 dict[int, int]: Mapping of Scope instance ID to reference count 480 """ 481 scope_ref_count = defaultdict(lambda: 0) 482 483 for scope in self.traverse(): 484 for _, source in scope.selected_sources.values(): 485 scope_ref_count[id(source)] += 1 486 487 return scope_ref_count 488 489 490def traverse_scope(expression: exp.Expression) -> t.List[Scope]: 491 """ 492 Traverse an expression by its "scopes". 493 494 "Scope" represents the current context of a Select statement. 495 496 This is helpful for optimizing queries, where we need more information than 497 the expression tree itself. For example, we might care about the source 498 names within a subquery. Returns a list because a generator could result in 499 incomplete properties which is confusing. 500 501 Examples: 502 >>> import sqlglot 503 >>> expression = sqlglot.parse_one("SELECT a FROM (SELECT a FROM x) AS y") 504 >>> scopes = traverse_scope(expression) 505 >>> scopes[0].expression.sql(), list(scopes[0].sources) 506 ('SELECT a FROM x', ['x']) 507 >>> scopes[1].expression.sql(), list(scopes[1].sources) 508 ('SELECT a FROM (SELECT a FROM x) AS y', ['y']) 509 510 Args: 511 expression: Expression to traverse 512 513 Returns: 514 A list of the created scope instances 515 """ 516 if isinstance(expression, TRAVERSABLES): 517 return list(_traverse_scope(Scope(expression))) 518 return [] 519 520 521def build_scope(expression: exp.Expression) -> t.Optional[Scope]: 522 """ 523 Build a scope tree. 524 525 Args: 526 expression: Expression to build the scope tree for. 527 528 Returns: 529 The root scope 530 """ 531 return seq_get(traverse_scope(expression), -1) 532 533 534def _traverse_scope(scope): 535 expression = scope.expression 536 537 if isinstance(expression, exp.Select): 538 yield from _traverse_select(scope) 539 elif isinstance(expression, exp.SetOperation): 540 yield from _traverse_ctes(scope) 541 yield from _traverse_union(scope) 542 return 543 elif isinstance(expression, exp.Subquery): 544 if scope.is_root: 545 yield from _traverse_select(scope) 546 else: 547 yield from _traverse_subqueries(scope) 548 elif isinstance(expression, exp.Table): 549 yield from _traverse_tables(scope) 550 elif isinstance(expression, exp.UDTF): 551 yield from _traverse_udtfs(scope) 552 elif isinstance(expression, exp.DDL): 553 if isinstance(expression.expression, exp.Query): 554 yield from _traverse_ctes(scope) 555 yield from _traverse_scope(Scope(expression.expression, cte_sources=scope.cte_sources)) 556 return 557 elif isinstance(expression, exp.DML): 558 yield from _traverse_ctes(scope) 559 for query in find_all_in_scope(expression, exp.Query): 560 # This check ensures we don't yield the CTE queries twice 561 if not isinstance(query.parent, exp.CTE): 562 yield from _traverse_scope(Scope(query, cte_sources=scope.cte_sources)) 563 return 564 else: 565 logger.warning("Cannot traverse scope %s with type '%s'", expression, type(expression)) 566 return 567 568 yield scope 569 570 571def _traverse_select(scope): 572 yield from _traverse_ctes(scope) 573 yield from _traverse_tables(scope) 574 yield from _traverse_subqueries(scope) 575 576 577def _traverse_union(scope): 578 prev_scope = None 579 union_scope_stack = [scope] 580 expression_stack = [scope.expression.right, scope.expression.left] 581 582 while expression_stack: 583 expression = expression_stack.pop() 584 union_scope = union_scope_stack[-1] 585 586 new_scope = union_scope.branch( 587 expression, 588 outer_columns=union_scope.outer_columns, 589 scope_type=ScopeType.UNION, 590 ) 591 592 if isinstance(expression, exp.SetOperation): 593 yield from _traverse_ctes(new_scope) 594 595 union_scope_stack.append(new_scope) 596 expression_stack.extend([expression.right, expression.left]) 597 continue 598 599 for scope in _traverse_scope(new_scope): 600 yield scope 601 602 if prev_scope: 603 union_scope_stack.pop() 604 union_scope.union_scopes = [prev_scope, scope] 605 prev_scope = union_scope 606 607 yield union_scope 608 else: 609 prev_scope = scope 610 611 612def _traverse_ctes(scope): 613 sources = {} 614 615 for cte in scope.ctes: 616 cte_name = cte.alias 617 618 # if the scope is a recursive cte, it must be in the form of base_case UNION recursive. 619 # thus the recursive scope is the first section of the union. 620 with_ = scope.expression.args.get("with") 621 if with_ and with_.recursive: 622 union = cte.this 623 624 if isinstance(union, exp.SetOperation): 625 sources[cte_name] = scope.branch(union.this, scope_type=ScopeType.CTE) 626 627 child_scope = None 628 629 for child_scope in _traverse_scope( 630 scope.branch( 631 cte.this, 632 cte_sources=sources, 633 outer_columns=cte.alias_column_names, 634 scope_type=ScopeType.CTE, 635 ) 636 ): 637 yield child_scope 638 639 # append the final child_scope yielded 640 if child_scope: 641 sources[cte_name] = child_scope 642 scope.cte_scopes.append(child_scope) 643 644 scope.sources.update(sources) 645 scope.cte_sources.update(sources) 646 647 648def _is_derived_table(expression: exp.Subquery) -> bool: 649 """ 650 We represent (tbl1 JOIN tbl2) as a Subquery, but it's not really a "derived table", 651 as it doesn't introduce a new scope. If an alias is present, it shadows all names 652 under the Subquery, so that's one exception to this rule. 653 """ 654 return isinstance(expression, exp.Subquery) and bool( 655 expression.alias or isinstance(expression.this, exp.UNWRAPPED_QUERIES) 656 ) 657 658 659def _traverse_tables(scope): 660 sources = {} 661 662 # Traverse FROMs, JOINs, and LATERALs in the order they are defined 663 expressions = [] 664 from_ = scope.expression.args.get("from") 665 if from_: 666 expressions.append(from_.this) 667 668 for join in scope.expression.args.get("joins") or []: 669 expressions.append(join.this) 670 671 if isinstance(scope.expression, exp.Table): 672 expressions.append(scope.expression) 673 674 expressions.extend(scope.expression.args.get("laterals") or []) 675 676 for expression in expressions: 677 if isinstance(expression, exp.Table): 678 table_name = expression.name 679 source_name = expression.alias_or_name 680 681 if table_name in scope.sources and not expression.db: 682 # This is a reference to a parent source (e.g. a CTE), not an actual table, unless 683 # it is pivoted, because then we get back a new table and hence a new source. 684 pivots = expression.args.get("pivots") 685 if pivots: 686 sources[pivots[0].alias] = expression 687 else: 688 sources[source_name] = scope.sources[table_name] 689 elif source_name in sources: 690 sources[find_new_name(sources, table_name)] = expression 691 else: 692 sources[source_name] = expression 693 694 # Make sure to not include the joins twice 695 if expression is not scope.expression: 696 expressions.extend(join.this for join in expression.args.get("joins") or []) 697 698 continue 699 700 if not isinstance(expression, exp.DerivedTable): 701 continue 702 703 if isinstance(expression, exp.UDTF): 704 lateral_sources = sources 705 scope_type = ScopeType.UDTF 706 scopes = scope.udtf_scopes 707 elif _is_derived_table(expression): 708 lateral_sources = None 709 scope_type = ScopeType.DERIVED_TABLE 710 scopes = scope.derived_table_scopes 711 expressions.extend(join.this for join in expression.args.get("joins") or []) 712 else: 713 # Makes sure we check for possible sources in nested table constructs 714 expressions.append(expression.this) 715 expressions.extend(join.this for join in expression.args.get("joins") or []) 716 continue 717 718 for child_scope in _traverse_scope( 719 scope.branch( 720 expression, 721 lateral_sources=lateral_sources, 722 outer_columns=expression.alias_column_names, 723 scope_type=scope_type, 724 ) 725 ): 726 yield child_scope 727 728 # Tables without aliases will be set as "" 729 # This shouldn't be a problem once qualify_columns runs, as it adds aliases on everything. 730 # Until then, this means that only a single, unaliased derived table is allowed (rather, 731 # the latest one wins. 732 sources[expression.alias] = child_scope 733 734 # append the final child_scope yielded 735 scopes.append(child_scope) 736 scope.table_scopes.append(child_scope) 737 738 scope.sources.update(sources) 739 740 741def _traverse_subqueries(scope): 742 for subquery in scope.subqueries: 743 top = None 744 for child_scope in _traverse_scope(scope.branch(subquery, scope_type=ScopeType.SUBQUERY)): 745 yield child_scope 746 top = child_scope 747 scope.subquery_scopes.append(top) 748 749 750def _traverse_udtfs(scope): 751 if isinstance(scope.expression, exp.Unnest): 752 expressions = scope.expression.expressions 753 elif isinstance(scope.expression, exp.Lateral): 754 expressions = [scope.expression.this] 755 else: 756 expressions = [] 757 758 sources = {} 759 for expression in expressions: 760 if _is_derived_table(expression): 761 top = None 762 for child_scope in _traverse_scope( 763 scope.branch( 764 expression, 765 scope_type=ScopeType.SUBQUERY, 766 outer_columns=expression.alias_column_names, 767 ) 768 ): 769 yield child_scope 770 top = child_scope 771 sources[expression.alias] = child_scope 772 773 scope.subquery_scopes.append(top) 774 775 scope.sources.update(sources) 776 777 778def walk_in_scope(expression, bfs=True, prune=None): 779 """ 780 Returns a generator object which visits all nodes in the syntrax tree, stopping at 781 nodes that start child scopes. 782 783 Args: 784 expression (exp.Expression): 785 bfs (bool): if set to True the BFS traversal order will be applied, 786 otherwise the DFS traversal will be used instead. 787 prune ((node, parent, arg_key) -> bool): callable that returns True if 788 the generator should stop traversing this branch of the tree. 789 790 Yields: 791 tuple[exp.Expression, Optional[exp.Expression], str]: node, parent, arg key 792 """ 793 # We'll use this variable to pass state into the dfs generator. 794 # Whenever we set it to True, we exclude a subtree from traversal. 795 crossed_scope_boundary = False 796 797 for node in expression.walk( 798 bfs=bfs, prune=lambda n: crossed_scope_boundary or (prune and prune(n)) 799 ): 800 crossed_scope_boundary = False 801 802 yield node 803 804 if node is expression: 805 continue 806 if ( 807 isinstance(node, exp.CTE) 808 or ( 809 isinstance(node.parent, (exp.From, exp.Join, exp.Subquery)) 810 and (_is_derived_table(node) or isinstance(node, exp.UDTF)) 811 ) 812 or isinstance(node, exp.UNWRAPPED_QUERIES) 813 ): 814 crossed_scope_boundary = True 815 816 if isinstance(node, (exp.Subquery, exp.UDTF)): 817 # The following args are not actually in the inner scope, so we should visit them 818 for key in ("joins", "laterals", "pivots"): 819 for arg in node.args.get(key) or []: 820 yield from walk_in_scope(arg, bfs=bfs) 821 822 823def find_all_in_scope(expression, expression_types, bfs=True): 824 """ 825 Returns a generator object which visits all nodes in this scope and only yields those that 826 match at least one of the specified expression types. 827 828 This does NOT traverse into subscopes. 829 830 Args: 831 expression (exp.Expression): 832 expression_types (tuple[type]|type): the expression type(s) to match. 833 bfs (bool): True to use breadth-first search, False to use depth-first. 834 835 Yields: 836 exp.Expression: nodes 837 """ 838 for expression in walk_in_scope(expression, bfs=bfs): 839 if isinstance(expression, tuple(ensure_collection(expression_types))): 840 yield expression 841 842 843def find_in_scope(expression, expression_types, bfs=True): 844 """ 845 Returns the first node in this scope which matches at least one of the specified types. 846 847 This does NOT traverse into subscopes. 848 849 Args: 850 expression (exp.Expression): 851 expression_types (tuple[type]|type): the expression type(s) to match. 852 bfs (bool): True to use breadth-first search, False to use depth-first. 853 854 Returns: 855 exp.Expression: the node which matches the criteria or None if no node matching 856 the criteria was found. 857 """ 858 return next(find_all_in_scope(expression, expression_types, bfs=bfs), None)
19class ScopeType(Enum): 20 ROOT = auto() 21 SUBQUERY = auto() 22 DERIVED_TABLE = auto() 23 CTE = auto() 24 UNION = auto() 25 UDTF = auto()
An enumeration.
Inherited Members
- enum.Enum
- name
- value
28class Scope: 29 """ 30 Selection scope. 31 32 Attributes: 33 expression (exp.Select|exp.SetOperation): Root expression of this scope 34 sources (dict[str, exp.Table|Scope]): Mapping of source name to either 35 a Table expression or another Scope instance. For example: 36 SELECT * FROM x {"x": Table(this="x")} 37 SELECT * FROM x AS y {"y": Table(this="x")} 38 SELECT * FROM (SELECT ...) AS y {"y": Scope(...)} 39 lateral_sources (dict[str, exp.Table|Scope]): Sources from laterals 40 For example: 41 SELECT c FROM x LATERAL VIEW EXPLODE (a) AS c; 42 The LATERAL VIEW EXPLODE gets x as a source. 43 cte_sources (dict[str, Scope]): Sources from CTES 44 outer_columns (list[str]): If this is a derived table or CTE, and the outer query 45 defines a column list for the alias of this scope, this is that list of columns. 46 For example: 47 SELECT * FROM (SELECT ...) AS y(col1, col2) 48 The inner query would have `["col1", "col2"]` for its `outer_columns` 49 parent (Scope): Parent scope 50 scope_type (ScopeType): Type of this scope, relative to it's parent 51 subquery_scopes (list[Scope]): List of all child scopes for subqueries 52 cte_scopes (list[Scope]): List of all child scopes for CTEs 53 derived_table_scopes (list[Scope]): List of all child scopes for derived_tables 54 udtf_scopes (list[Scope]): List of all child scopes for user defined tabular functions 55 table_scopes (list[Scope]): derived_table_scopes + udtf_scopes, in the order that they're defined 56 union_scopes (list[Scope, Scope]): If this Scope is for a Union expression, this will be 57 a list of the left and right child scopes. 58 """ 59 60 def __init__( 61 self, 62 expression, 63 sources=None, 64 outer_columns=None, 65 parent=None, 66 scope_type=ScopeType.ROOT, 67 lateral_sources=None, 68 cte_sources=None, 69 ): 70 self.expression = expression 71 self.sources = sources or {} 72 self.lateral_sources = lateral_sources or {} 73 self.cte_sources = cte_sources or {} 74 self.sources.update(self.lateral_sources) 75 self.sources.update(self.cte_sources) 76 self.outer_columns = outer_columns or [] 77 self.parent = parent 78 self.scope_type = scope_type 79 self.subquery_scopes = [] 80 self.derived_table_scopes = [] 81 self.table_scopes = [] 82 self.cte_scopes = [] 83 self.union_scopes = [] 84 self.udtf_scopes = [] 85 self.clear_cache() 86 87 def clear_cache(self): 88 self._collected = False 89 self._raw_columns = None 90 self._stars = None 91 self._derived_tables = None 92 self._udtfs = None 93 self._tables = None 94 self._ctes = None 95 self._subqueries = None 96 self._selected_sources = None 97 self._columns = None 98 self._external_columns = None 99 self._join_hints = None 100 self._pivots = None 101 self._references = None 102 103 def branch( 104 self, expression, scope_type, sources=None, cte_sources=None, lateral_sources=None, **kwargs 105 ): 106 """Branch from the current scope to a new, inner scope""" 107 return Scope( 108 expression=expression.unnest(), 109 sources=sources.copy() if sources else None, 110 parent=self, 111 scope_type=scope_type, 112 cte_sources={**self.cte_sources, **(cte_sources or {})}, 113 lateral_sources=lateral_sources.copy() if lateral_sources else None, 114 **kwargs, 115 ) 116 117 def _collect(self): 118 self._tables = [] 119 self._ctes = [] 120 self._subqueries = [] 121 self._derived_tables = [] 122 self._udtfs = [] 123 self._raw_columns = [] 124 self._stars = [] 125 self._join_hints = [] 126 127 for node in self.walk(bfs=False): 128 if node is self.expression: 129 continue 130 131 if isinstance(node, exp.Dot) and node.is_star: 132 self._stars.append(node) 133 elif isinstance(node, exp.Column): 134 if isinstance(node.this, exp.Star): 135 self._stars.append(node) 136 else: 137 self._raw_columns.append(node) 138 elif isinstance(node, exp.Table) and not isinstance(node.parent, exp.JoinHint): 139 self._tables.append(node) 140 elif isinstance(node, exp.JoinHint): 141 self._join_hints.append(node) 142 elif isinstance(node, exp.UDTF): 143 self._udtfs.append(node) 144 elif isinstance(node, exp.CTE): 145 self._ctes.append(node) 146 elif _is_derived_table(node) and isinstance( 147 node.parent, (exp.From, exp.Join, exp.Subquery) 148 ): 149 self._derived_tables.append(node) 150 elif isinstance(node, exp.UNWRAPPED_QUERIES): 151 self._subqueries.append(node) 152 153 self._collected = True 154 155 def _ensure_collected(self): 156 if not self._collected: 157 self._collect() 158 159 def walk(self, bfs=True, prune=None): 160 return walk_in_scope(self.expression, bfs=bfs, prune=None) 161 162 def find(self, *expression_types, bfs=True): 163 return find_in_scope(self.expression, expression_types, bfs=bfs) 164 165 def find_all(self, *expression_types, bfs=True): 166 return find_all_in_scope(self.expression, expression_types, bfs=bfs) 167 168 def replace(self, old, new): 169 """ 170 Replace `old` with `new`. 171 172 This can be used instead of `exp.Expression.replace` to ensure the `Scope` is kept up-to-date. 173 174 Args: 175 old (exp.Expression): old node 176 new (exp.Expression): new node 177 """ 178 old.replace(new) 179 self.clear_cache() 180 181 @property 182 def tables(self): 183 """ 184 List of tables in this scope. 185 186 Returns: 187 list[exp.Table]: tables 188 """ 189 self._ensure_collected() 190 return self._tables 191 192 @property 193 def ctes(self): 194 """ 195 List of CTEs in this scope. 196 197 Returns: 198 list[exp.CTE]: ctes 199 """ 200 self._ensure_collected() 201 return self._ctes 202 203 @property 204 def derived_tables(self): 205 """ 206 List of derived tables in this scope. 207 208 For example: 209 SELECT * FROM (SELECT ...) <- that's a derived table 210 211 Returns: 212 list[exp.Subquery]: derived tables 213 """ 214 self._ensure_collected() 215 return self._derived_tables 216 217 @property 218 def udtfs(self): 219 """ 220 List of "User Defined Tabular Functions" in this scope. 221 222 Returns: 223 list[exp.UDTF]: UDTFs 224 """ 225 self._ensure_collected() 226 return self._udtfs 227 228 @property 229 def subqueries(self): 230 """ 231 List of subqueries in this scope. 232 233 For example: 234 SELECT * FROM x WHERE a IN (SELECT ...) <- that's a subquery 235 236 Returns: 237 list[exp.Select | exp.SetOperation]: subqueries 238 """ 239 self._ensure_collected() 240 return self._subqueries 241 242 @property 243 def stars(self) -> t.List[exp.Column | exp.Dot]: 244 """ 245 List of star expressions (columns or dots) in this scope. 246 """ 247 self._ensure_collected() 248 return self._stars 249 250 @property 251 def columns(self): 252 """ 253 List of columns in this scope. 254 255 Returns: 256 list[exp.Column]: Column instances in this scope, plus any 257 Columns that reference this scope from correlated subqueries. 258 """ 259 if self._columns is None: 260 self._ensure_collected() 261 columns = self._raw_columns 262 263 external_columns = [ 264 column 265 for scope in itertools.chain(self.subquery_scopes, self.udtf_scopes) 266 for column in scope.external_columns 267 ] 268 269 named_selects = set(self.expression.named_selects) 270 271 self._columns = [] 272 for column in columns + external_columns: 273 ancestor = column.find_ancestor( 274 exp.Select, exp.Qualify, exp.Order, exp.Having, exp.Hint, exp.Table, exp.Star 275 ) 276 if ( 277 not ancestor 278 or column.table 279 or isinstance(ancestor, exp.Select) 280 or (isinstance(ancestor, exp.Table) and not isinstance(ancestor.this, exp.Func)) 281 or ( 282 isinstance(ancestor, exp.Order) 283 and ( 284 isinstance(ancestor.parent, exp.Window) 285 or column.name not in named_selects 286 ) 287 ) 288 or (isinstance(ancestor, exp.Star) and not column.arg_key == "except") 289 ): 290 self._columns.append(column) 291 292 return self._columns 293 294 @property 295 def selected_sources(self): 296 """ 297 Mapping of nodes and sources that are actually selected from in this scope. 298 299 That is, all tables in a schema are selectable at any point. But a 300 table only becomes a selected source if it's included in a FROM or JOIN clause. 301 302 Returns: 303 dict[str, (exp.Table|exp.Select, exp.Table|Scope)]: selected sources and nodes 304 """ 305 if self._selected_sources is None: 306 result = {} 307 308 for name, node in self.references: 309 if name in result: 310 raise OptimizeError(f"Alias already used: {name}") 311 if name in self.sources: 312 result[name] = (node, self.sources[name]) 313 314 self._selected_sources = result 315 return self._selected_sources 316 317 @property 318 def references(self) -> t.List[t.Tuple[str, exp.Expression]]: 319 if self._references is None: 320 self._references = [] 321 322 for table in self.tables: 323 self._references.append((table.alias_or_name, table)) 324 for expression in itertools.chain(self.derived_tables, self.udtfs): 325 self._references.append( 326 ( 327 expression.alias, 328 expression if expression.args.get("pivots") else expression.unnest(), 329 ) 330 ) 331 332 return self._references 333 334 @property 335 def external_columns(self): 336 """ 337 Columns that appear to reference sources in outer scopes. 338 339 Returns: 340 list[exp.Column]: Column instances that don't reference 341 sources in the current scope. 342 """ 343 if self._external_columns is None: 344 if isinstance(self.expression, exp.SetOperation): 345 left, right = self.union_scopes 346 self._external_columns = left.external_columns + right.external_columns 347 else: 348 self._external_columns = [ 349 c for c in self.columns if c.table not in self.selected_sources 350 ] 351 352 return self._external_columns 353 354 @property 355 def unqualified_columns(self): 356 """ 357 Unqualified columns in the current scope. 358 359 Returns: 360 list[exp.Column]: Unqualified columns 361 """ 362 return [c for c in self.columns if not c.table] 363 364 @property 365 def join_hints(self): 366 """ 367 Hints that exist in the scope that reference tables 368 369 Returns: 370 list[exp.JoinHint]: Join hints that are referenced within the scope 371 """ 372 if self._join_hints is None: 373 return [] 374 return self._join_hints 375 376 @property 377 def pivots(self): 378 if not self._pivots: 379 self._pivots = [ 380 pivot for _, node in self.references for pivot in node.args.get("pivots") or [] 381 ] 382 383 return self._pivots 384 385 def source_columns(self, source_name): 386 """ 387 Get all columns in the current scope for a particular source. 388 389 Args: 390 source_name (str): Name of the source 391 Returns: 392 list[exp.Column]: Column instances that reference `source_name` 393 """ 394 return [column for column in self.columns if column.table == source_name] 395 396 @property 397 def is_subquery(self): 398 """Determine if this scope is a subquery""" 399 return self.scope_type == ScopeType.SUBQUERY 400 401 @property 402 def is_derived_table(self): 403 """Determine if this scope is a derived table""" 404 return self.scope_type == ScopeType.DERIVED_TABLE 405 406 @property 407 def is_union(self): 408 """Determine if this scope is a union""" 409 return self.scope_type == ScopeType.UNION 410 411 @property 412 def is_cte(self): 413 """Determine if this scope is a common table expression""" 414 return self.scope_type == ScopeType.CTE 415 416 @property 417 def is_root(self): 418 """Determine if this is the root scope""" 419 return self.scope_type == ScopeType.ROOT 420 421 @property 422 def is_udtf(self): 423 """Determine if this scope is a UDTF (User Defined Table Function)""" 424 return self.scope_type == ScopeType.UDTF 425 426 @property 427 def is_correlated_subquery(self): 428 """Determine if this scope is a correlated subquery""" 429 return bool( 430 (self.is_subquery or (self.parent and isinstance(self.parent.expression, exp.Lateral))) 431 and self.external_columns 432 ) 433 434 def rename_source(self, old_name, new_name): 435 """Rename a source in this scope""" 436 columns = self.sources.pop(old_name or "", []) 437 self.sources[new_name] = columns 438 439 def add_source(self, name, source): 440 """Add a source to this scope""" 441 self.sources[name] = source 442 self.clear_cache() 443 444 def remove_source(self, name): 445 """Remove a source from this scope""" 446 self.sources.pop(name, None) 447 self.clear_cache() 448 449 def __repr__(self): 450 return f"Scope<{self.expression.sql()}>" 451 452 def traverse(self): 453 """ 454 Traverse the scope tree from this node. 455 456 Yields: 457 Scope: scope instances in depth-first-search post-order 458 """ 459 stack = [self] 460 result = [] 461 while stack: 462 scope = stack.pop() 463 result.append(scope) 464 stack.extend( 465 itertools.chain( 466 scope.cte_scopes, 467 scope.union_scopes, 468 scope.table_scopes, 469 scope.subquery_scopes, 470 ) 471 ) 472 473 yield from reversed(result) 474 475 def ref_count(self): 476 """ 477 Count the number of times each scope in this tree is referenced. 478 479 Returns: 480 dict[int, int]: Mapping of Scope instance ID to reference count 481 """ 482 scope_ref_count = defaultdict(lambda: 0) 483 484 for scope in self.traverse(): 485 for _, source in scope.selected_sources.values(): 486 scope_ref_count[id(source)] += 1 487 488 return scope_ref_count
Selection scope.
Attributes:
- expression (exp.Select|exp.SetOperation): Root expression of this scope
- sources (dict[str, exp.Table|Scope]): Mapping of source name to either a Table expression or another Scope instance. For example: SELECT * FROM x {"x": Table(this="x")} SELECT * FROM x AS y {"y": Table(this="x")} SELECT * FROM (SELECT ...) AS y {"y": Scope(...)}
- lateral_sources (dict[str, exp.Table|Scope]): Sources from laterals For example: SELECT c FROM x LATERAL VIEW EXPLODE (a) AS c; The LATERAL VIEW EXPLODE gets x as a source.
- cte_sources (dict[str, Scope]): Sources from CTES
- outer_columns (list[str]): If this is a derived table or CTE, and the outer query
defines a column list for the alias of this scope, this is that list of columns.
For example:
SELECT * FROM (SELECT ...) AS y(col1, col2)
The inner query would have
["col1", "col2"]
for itsouter_columns
- parent (Scope): Parent scope
- scope_type (ScopeType): Type of this scope, relative to it's parent
- subquery_scopes (list[Scope]): List of all child scopes for subqueries
- cte_scopes (list[Scope]): List of all child scopes for CTEs
- derived_table_scopes (list[Scope]): List of all child scopes for derived_tables
- udtf_scopes (list[Scope]): List of all child scopes for user defined tabular functions
- table_scopes (list[Scope]): derived_table_scopes + udtf_scopes, in the order that they're defined
- union_scopes (list[Scope, Scope]): If this Scope is for a Union expression, this will be a list of the left and right child scopes.
60 def __init__( 61 self, 62 expression, 63 sources=None, 64 outer_columns=None, 65 parent=None, 66 scope_type=ScopeType.ROOT, 67 lateral_sources=None, 68 cte_sources=None, 69 ): 70 self.expression = expression 71 self.sources = sources or {} 72 self.lateral_sources = lateral_sources or {} 73 self.cte_sources = cte_sources or {} 74 self.sources.update(self.lateral_sources) 75 self.sources.update(self.cte_sources) 76 self.outer_columns = outer_columns or [] 77 self.parent = parent 78 self.scope_type = scope_type 79 self.subquery_scopes = [] 80 self.derived_table_scopes = [] 81 self.table_scopes = [] 82 self.cte_scopes = [] 83 self.union_scopes = [] 84 self.udtf_scopes = [] 85 self.clear_cache()
87 def clear_cache(self): 88 self._collected = False 89 self._raw_columns = None 90 self._stars = None 91 self._derived_tables = None 92 self._udtfs = None 93 self._tables = None 94 self._ctes = None 95 self._subqueries = None 96 self._selected_sources = None 97 self._columns = None 98 self._external_columns = None 99 self._join_hints = None 100 self._pivots = None 101 self._references = None
103 def branch( 104 self, expression, scope_type, sources=None, cte_sources=None, lateral_sources=None, **kwargs 105 ): 106 """Branch from the current scope to a new, inner scope""" 107 return Scope( 108 expression=expression.unnest(), 109 sources=sources.copy() if sources else None, 110 parent=self, 111 scope_type=scope_type, 112 cte_sources={**self.cte_sources, **(cte_sources or {})}, 113 lateral_sources=lateral_sources.copy() if lateral_sources else None, 114 **kwargs, 115 )
Branch from the current scope to a new, inner scope
168 def replace(self, old, new): 169 """ 170 Replace `old` with `new`. 171 172 This can be used instead of `exp.Expression.replace` to ensure the `Scope` is kept up-to-date. 173 174 Args: 175 old (exp.Expression): old node 176 new (exp.Expression): new node 177 """ 178 old.replace(new) 179 self.clear_cache()
Replace old
with new
.
This can be used instead of exp.Expression.replace
to ensure the Scope
is kept up-to-date.
Arguments:
- old (exp.Expression): old node
- new (exp.Expression): new node
181 @property 182 def tables(self): 183 """ 184 List of tables in this scope. 185 186 Returns: 187 list[exp.Table]: tables 188 """ 189 self._ensure_collected() 190 return self._tables
List of tables in this scope.
Returns:
list[exp.Table]: tables
192 @property 193 def ctes(self): 194 """ 195 List of CTEs in this scope. 196 197 Returns: 198 list[exp.CTE]: ctes 199 """ 200 self._ensure_collected() 201 return self._ctes
List of CTEs in this scope.
Returns:
list[exp.CTE]: ctes
203 @property 204 def derived_tables(self): 205 """ 206 List of derived tables in this scope. 207 208 For example: 209 SELECT * FROM (SELECT ...) <- that's a derived table 210 211 Returns: 212 list[exp.Subquery]: derived tables 213 """ 214 self._ensure_collected() 215 return self._derived_tables
List of derived tables in this scope.
For example:
SELECT * FROM (SELECT ...) <- that's a derived table
Returns:
list[exp.Subquery]: derived tables
217 @property 218 def udtfs(self): 219 """ 220 List of "User Defined Tabular Functions" in this scope. 221 222 Returns: 223 list[exp.UDTF]: UDTFs 224 """ 225 self._ensure_collected() 226 return self._udtfs
List of "User Defined Tabular Functions" in this scope.
Returns:
list[exp.UDTF]: UDTFs
228 @property 229 def subqueries(self): 230 """ 231 List of subqueries in this scope. 232 233 For example: 234 SELECT * FROM x WHERE a IN (SELECT ...) <- that's a subquery 235 236 Returns: 237 list[exp.Select | exp.SetOperation]: subqueries 238 """ 239 self._ensure_collected() 240 return self._subqueries
List of subqueries in this scope.
For example:
SELECT * FROM x WHERE a IN (SELECT ...) <- that's a subquery
Returns:
list[exp.Select | exp.SetOperation]: subqueries
242 @property 243 def stars(self) -> t.List[exp.Column | exp.Dot]: 244 """ 245 List of star expressions (columns or dots) in this scope. 246 """ 247 self._ensure_collected() 248 return self._stars
List of star expressions (columns or dots) in this scope.
250 @property 251 def columns(self): 252 """ 253 List of columns in this scope. 254 255 Returns: 256 list[exp.Column]: Column instances in this scope, plus any 257 Columns that reference this scope from correlated subqueries. 258 """ 259 if self._columns is None: 260 self._ensure_collected() 261 columns = self._raw_columns 262 263 external_columns = [ 264 column 265 for scope in itertools.chain(self.subquery_scopes, self.udtf_scopes) 266 for column in scope.external_columns 267 ] 268 269 named_selects = set(self.expression.named_selects) 270 271 self._columns = [] 272 for column in columns + external_columns: 273 ancestor = column.find_ancestor( 274 exp.Select, exp.Qualify, exp.Order, exp.Having, exp.Hint, exp.Table, exp.Star 275 ) 276 if ( 277 not ancestor 278 or column.table 279 or isinstance(ancestor, exp.Select) 280 or (isinstance(ancestor, exp.Table) and not isinstance(ancestor.this, exp.Func)) 281 or ( 282 isinstance(ancestor, exp.Order) 283 and ( 284 isinstance(ancestor.parent, exp.Window) 285 or column.name not in named_selects 286 ) 287 ) 288 or (isinstance(ancestor, exp.Star) and not column.arg_key == "except") 289 ): 290 self._columns.append(column) 291 292 return self._columns
List of columns in this scope.
Returns:
list[exp.Column]: Column instances in this scope, plus any Columns that reference this scope from correlated subqueries.
294 @property 295 def selected_sources(self): 296 """ 297 Mapping of nodes and sources that are actually selected from in this scope. 298 299 That is, all tables in a schema are selectable at any point. But a 300 table only becomes a selected source if it's included in a FROM or JOIN clause. 301 302 Returns: 303 dict[str, (exp.Table|exp.Select, exp.Table|Scope)]: selected sources and nodes 304 """ 305 if self._selected_sources is None: 306 result = {} 307 308 for name, node in self.references: 309 if name in result: 310 raise OptimizeError(f"Alias already used: {name}") 311 if name in self.sources: 312 result[name] = (node, self.sources[name]) 313 314 self._selected_sources = result 315 return self._selected_sources
Mapping of nodes and sources that are actually selected from in this scope.
That is, all tables in a schema are selectable at any point. But a table only becomes a selected source if it's included in a FROM or JOIN clause.
Returns:
dict[str, (exp.Table|exp.Select, exp.Table|Scope)]: selected sources and nodes
317 @property 318 def references(self) -> t.List[t.Tuple[str, exp.Expression]]: 319 if self._references is None: 320 self._references = [] 321 322 for table in self.tables: 323 self._references.append((table.alias_or_name, table)) 324 for expression in itertools.chain(self.derived_tables, self.udtfs): 325 self._references.append( 326 ( 327 expression.alias, 328 expression if expression.args.get("pivots") else expression.unnest(), 329 ) 330 ) 331 332 return self._references
334 @property 335 def external_columns(self): 336 """ 337 Columns that appear to reference sources in outer scopes. 338 339 Returns: 340 list[exp.Column]: Column instances that don't reference 341 sources in the current scope. 342 """ 343 if self._external_columns is None: 344 if isinstance(self.expression, exp.SetOperation): 345 left, right = self.union_scopes 346 self._external_columns = left.external_columns + right.external_columns 347 else: 348 self._external_columns = [ 349 c for c in self.columns if c.table not in self.selected_sources 350 ] 351 352 return self._external_columns
Columns that appear to reference sources in outer scopes.
Returns:
list[exp.Column]: Column instances that don't reference sources in the current scope.
354 @property 355 def unqualified_columns(self): 356 """ 357 Unqualified columns in the current scope. 358 359 Returns: 360 list[exp.Column]: Unqualified columns 361 """ 362 return [c for c in self.columns if not c.table]
Unqualified columns in the current scope.
Returns:
list[exp.Column]: Unqualified columns
364 @property 365 def join_hints(self): 366 """ 367 Hints that exist in the scope that reference tables 368 369 Returns: 370 list[exp.JoinHint]: Join hints that are referenced within the scope 371 """ 372 if self._join_hints is None: 373 return [] 374 return self._join_hints
Hints that exist in the scope that reference tables
Returns:
list[exp.JoinHint]: Join hints that are referenced within the scope
385 def source_columns(self, source_name): 386 """ 387 Get all columns in the current scope for a particular source. 388 389 Args: 390 source_name (str): Name of the source 391 Returns: 392 list[exp.Column]: Column instances that reference `source_name` 393 """ 394 return [column for column in self.columns if column.table == source_name]
Get all columns in the current scope for a particular source.
Arguments:
- source_name (str): Name of the source
Returns:
list[exp.Column]: Column instances that reference
source_name
396 @property 397 def is_subquery(self): 398 """Determine if this scope is a subquery""" 399 return self.scope_type == ScopeType.SUBQUERY
Determine if this scope is a subquery
401 @property 402 def is_derived_table(self): 403 """Determine if this scope is a derived table""" 404 return self.scope_type == ScopeType.DERIVED_TABLE
Determine if this scope is a derived table
406 @property 407 def is_union(self): 408 """Determine if this scope is a union""" 409 return self.scope_type == ScopeType.UNION
Determine if this scope is a union
411 @property 412 def is_cte(self): 413 """Determine if this scope is a common table expression""" 414 return self.scope_type == ScopeType.CTE
Determine if this scope is a common table expression
416 @property 417 def is_root(self): 418 """Determine if this is the root scope""" 419 return self.scope_type == ScopeType.ROOT
Determine if this is the root scope
421 @property 422 def is_udtf(self): 423 """Determine if this scope is a UDTF (User Defined Table Function)""" 424 return self.scope_type == ScopeType.UDTF
Determine if this scope is a UDTF (User Defined Table Function)
434 def rename_source(self, old_name, new_name): 435 """Rename a source in this scope""" 436 columns = self.sources.pop(old_name or "", []) 437 self.sources[new_name] = columns
Rename a source in this scope
439 def add_source(self, name, source): 440 """Add a source to this scope""" 441 self.sources[name] = source 442 self.clear_cache()
Add a source to this scope
444 def remove_source(self, name): 445 """Remove a source from this scope""" 446 self.sources.pop(name, None) 447 self.clear_cache()
Remove a source from this scope
452 def traverse(self): 453 """ 454 Traverse the scope tree from this node. 455 456 Yields: 457 Scope: scope instances in depth-first-search post-order 458 """ 459 stack = [self] 460 result = [] 461 while stack: 462 scope = stack.pop() 463 result.append(scope) 464 stack.extend( 465 itertools.chain( 466 scope.cte_scopes, 467 scope.union_scopes, 468 scope.table_scopes, 469 scope.subquery_scopes, 470 ) 471 ) 472 473 yield from reversed(result)
Traverse the scope tree from this node.
Yields:
Scope: scope instances in depth-first-search post-order
475 def ref_count(self): 476 """ 477 Count the number of times each scope in this tree is referenced. 478 479 Returns: 480 dict[int, int]: Mapping of Scope instance ID to reference count 481 """ 482 scope_ref_count = defaultdict(lambda: 0) 483 484 for scope in self.traverse(): 485 for _, source in scope.selected_sources.values(): 486 scope_ref_count[id(source)] += 1 487 488 return scope_ref_count
Count the number of times each scope in this tree is referenced.
Returns:
dict[int, int]: Mapping of Scope instance ID to reference count
491def traverse_scope(expression: exp.Expression) -> t.List[Scope]: 492 """ 493 Traverse an expression by its "scopes". 494 495 "Scope" represents the current context of a Select statement. 496 497 This is helpful for optimizing queries, where we need more information than 498 the expression tree itself. For example, we might care about the source 499 names within a subquery. Returns a list because a generator could result in 500 incomplete properties which is confusing. 501 502 Examples: 503 >>> import sqlglot 504 >>> expression = sqlglot.parse_one("SELECT a FROM (SELECT a FROM x) AS y") 505 >>> scopes = traverse_scope(expression) 506 >>> scopes[0].expression.sql(), list(scopes[0].sources) 507 ('SELECT a FROM x', ['x']) 508 >>> scopes[1].expression.sql(), list(scopes[1].sources) 509 ('SELECT a FROM (SELECT a FROM x) AS y', ['y']) 510 511 Args: 512 expression: Expression to traverse 513 514 Returns: 515 A list of the created scope instances 516 """ 517 if isinstance(expression, TRAVERSABLES): 518 return list(_traverse_scope(Scope(expression))) 519 return []
Traverse an expression by its "scopes".
"Scope" represents the current context of a Select statement.
This is helpful for optimizing queries, where we need more information than the expression tree itself. For example, we might care about the source names within a subquery. Returns a list because a generator could result in incomplete properties which is confusing.
Examples:
>>> import sqlglot >>> expression = sqlglot.parse_one("SELECT a FROM (SELECT a FROM x) AS y") >>> scopes = traverse_scope(expression) >>> scopes[0].expression.sql(), list(scopes[0].sources) ('SELECT a FROM x', ['x']) >>> scopes[1].expression.sql(), list(scopes[1].sources) ('SELECT a FROM (SELECT a FROM x) AS y', ['y'])
Arguments:
- expression: Expression to traverse
Returns:
A list of the created scope instances
522def build_scope(expression: exp.Expression) -> t.Optional[Scope]: 523 """ 524 Build a scope tree. 525 526 Args: 527 expression: Expression to build the scope tree for. 528 529 Returns: 530 The root scope 531 """ 532 return seq_get(traverse_scope(expression), -1)
Build a scope tree.
Arguments:
- expression: Expression to build the scope tree for.
Returns:
The root scope
779def walk_in_scope(expression, bfs=True, prune=None): 780 """ 781 Returns a generator object which visits all nodes in the syntrax tree, stopping at 782 nodes that start child scopes. 783 784 Args: 785 expression (exp.Expression): 786 bfs (bool): if set to True the BFS traversal order will be applied, 787 otherwise the DFS traversal will be used instead. 788 prune ((node, parent, arg_key) -> bool): callable that returns True if 789 the generator should stop traversing this branch of the tree. 790 791 Yields: 792 tuple[exp.Expression, Optional[exp.Expression], str]: node, parent, arg key 793 """ 794 # We'll use this variable to pass state into the dfs generator. 795 # Whenever we set it to True, we exclude a subtree from traversal. 796 crossed_scope_boundary = False 797 798 for node in expression.walk( 799 bfs=bfs, prune=lambda n: crossed_scope_boundary or (prune and prune(n)) 800 ): 801 crossed_scope_boundary = False 802 803 yield node 804 805 if node is expression: 806 continue 807 if ( 808 isinstance(node, exp.CTE) 809 or ( 810 isinstance(node.parent, (exp.From, exp.Join, exp.Subquery)) 811 and (_is_derived_table(node) or isinstance(node, exp.UDTF)) 812 ) 813 or isinstance(node, exp.UNWRAPPED_QUERIES) 814 ): 815 crossed_scope_boundary = True 816 817 if isinstance(node, (exp.Subquery, exp.UDTF)): 818 # The following args are not actually in the inner scope, so we should visit them 819 for key in ("joins", "laterals", "pivots"): 820 for arg in node.args.get(key) or []: 821 yield from walk_in_scope(arg, bfs=bfs)
Returns a generator object which visits all nodes in the syntrax tree, stopping at nodes that start child scopes.
Arguments:
- expression (exp.Expression):
- bfs (bool): if set to True the BFS traversal order will be applied, otherwise the DFS traversal will be used instead.
- prune ((node, parent, arg_key) -> bool): callable that returns True if the generator should stop traversing this branch of the tree.
Yields:
tuple[exp.Expression, Optional[exp.Expression], str]: node, parent, arg key
824def find_all_in_scope(expression, expression_types, bfs=True): 825 """ 826 Returns a generator object which visits all nodes in this scope and only yields those that 827 match at least one of the specified expression types. 828 829 This does NOT traverse into subscopes. 830 831 Args: 832 expression (exp.Expression): 833 expression_types (tuple[type]|type): the expression type(s) to match. 834 bfs (bool): True to use breadth-first search, False to use depth-first. 835 836 Yields: 837 exp.Expression: nodes 838 """ 839 for expression in walk_in_scope(expression, bfs=bfs): 840 if isinstance(expression, tuple(ensure_collection(expression_types))): 841 yield expression
Returns a generator object which visits all nodes in this scope and only yields those that match at least one of the specified expression types.
This does NOT traverse into subscopes.
Arguments:
- expression (exp.Expression):
- expression_types (tuple[type]|type): the expression type(s) to match.
- bfs (bool): True to use breadth-first search, False to use depth-first.
Yields:
exp.Expression: nodes
844def find_in_scope(expression, expression_types, bfs=True): 845 """ 846 Returns the first node in this scope which matches at least one of the specified types. 847 848 This does NOT traverse into subscopes. 849 850 Args: 851 expression (exp.Expression): 852 expression_types (tuple[type]|type): the expression type(s) to match. 853 bfs (bool): True to use breadth-first search, False to use depth-first. 854 855 Returns: 856 exp.Expression: the node which matches the criteria or None if no node matching 857 the criteria was found. 858 """ 859 return next(find_all_in_scope(expression, expression_types, bfs=bfs), None)
Returns the first node in this scope which matches at least one of the specified types.
This does NOT traverse into subscopes.
Arguments:
- expression (exp.Expression):
- expression_types (tuple[type]|type): the expression type(s) to match.
- bfs (bool): True to use breadth-first search, False to use depth-first.
Returns:
exp.Expression: the node which matches the criteria or None if no node matching the criteria was found.