Edit on GitHub

sqlglot.optimizer.annotate_types

  1from __future__ import annotations
  2
  3import functools
  4import typing as t
  5
  6from sqlglot import exp
  7from sqlglot.helper import (
  8    ensure_list,
  9    is_date_unit,
 10    is_iso_date,
 11    is_iso_datetime,
 12    seq_get,
 13)
 14from sqlglot.optimizer.scope import Scope, traverse_scope
 15from sqlglot.schema import MappingSchema, Schema, ensure_schema
 16from sqlglot.dialects.dialect import Dialect
 17
 18if t.TYPE_CHECKING:
 19    from sqlglot._typing import B, E
 20
 21    BinaryCoercionFunc = t.Callable[[exp.Expression, exp.Expression], exp.DataType.Type]
 22    BinaryCoercions = t.Dict[
 23        t.Tuple[exp.DataType.Type, exp.DataType.Type],
 24        BinaryCoercionFunc,
 25    ]
 26
 27    from sqlglot.dialects.dialect import DialectType, AnnotatorsType
 28
 29
 30def annotate_types(
 31    expression: E,
 32    schema: t.Optional[t.Dict | Schema] = None,
 33    annotators: t.Optional[AnnotatorsType] = None,
 34    coerces_to: t.Optional[t.Dict[exp.DataType.Type, t.Set[exp.DataType.Type]]] = None,
 35    dialect: DialectType = None,
 36) -> E:
 37    """
 38    Infers the types of an expression, annotating its AST accordingly.
 39
 40    Example:
 41        >>> import sqlglot
 42        >>> schema = {"y": {"cola": "SMALLINT"}}
 43        >>> sql = "SELECT x.cola + 2.5 AS cola FROM (SELECT y.cola AS cola FROM y AS y) AS x"
 44        >>> annotated_expr = annotate_types(sqlglot.parse_one(sql), schema=schema)
 45        >>> annotated_expr.expressions[0].type.this  # Get the type of "x.cola + 2.5 AS cola"
 46        <Type.DOUBLE: 'DOUBLE'>
 47
 48    Args:
 49        expression: Expression to annotate.
 50        schema: Database schema.
 51        annotators: Maps expression type to corresponding annotation function.
 52        coerces_to: Maps expression type to set of types that it can be coerced into.
 53
 54    Returns:
 55        The expression annotated with types.
 56    """
 57
 58    schema = ensure_schema(schema, dialect=dialect)
 59
 60    return TypeAnnotator(schema, annotators, coerces_to).annotate(expression)
 61
 62
 63def _coerce_date_literal(l: exp.Expression, unit: t.Optional[exp.Expression]) -> exp.DataType.Type:
 64    date_text = l.name
 65    is_iso_date_ = is_iso_date(date_text)
 66
 67    if is_iso_date_ and is_date_unit(unit):
 68        return exp.DataType.Type.DATE
 69
 70    # An ISO date is also an ISO datetime, but not vice versa
 71    if is_iso_date_ or is_iso_datetime(date_text):
 72        return exp.DataType.Type.DATETIME
 73
 74    return exp.DataType.Type.UNKNOWN
 75
 76
 77def _coerce_date(l: exp.Expression, unit: t.Optional[exp.Expression]) -> exp.DataType.Type:
 78    if not is_date_unit(unit):
 79        return exp.DataType.Type.DATETIME
 80    return l.type.this if l.type else exp.DataType.Type.UNKNOWN
 81
 82
 83def swap_args(func: BinaryCoercionFunc) -> BinaryCoercionFunc:
 84    @functools.wraps(func)
 85    def _swapped(l: exp.Expression, r: exp.Expression) -> exp.DataType.Type:
 86        return func(r, l)
 87
 88    return _swapped
 89
 90
 91def swap_all(coercions: BinaryCoercions) -> BinaryCoercions:
 92    return {**coercions, **{(b, a): swap_args(func) for (a, b), func in coercions.items()}}
 93
 94
 95class _TypeAnnotator(type):
 96    def __new__(cls, clsname, bases, attrs):
 97        klass = super().__new__(cls, clsname, bases, attrs)
 98
 99        # Highest-to-lowest type precedence, as specified in Spark's docs (ANSI):
100        # https://spark.apache.org/docs/3.2.0/sql-ref-ansi-compliance.html
101        text_precedence = (
102            exp.DataType.Type.TEXT,
103            exp.DataType.Type.NVARCHAR,
104            exp.DataType.Type.VARCHAR,
105            exp.DataType.Type.NCHAR,
106            exp.DataType.Type.CHAR,
107        )
108        numeric_precedence = (
109            exp.DataType.Type.DOUBLE,
110            exp.DataType.Type.FLOAT,
111            exp.DataType.Type.DECIMAL,
112            exp.DataType.Type.BIGINT,
113            exp.DataType.Type.INT,
114            exp.DataType.Type.SMALLINT,
115            exp.DataType.Type.TINYINT,
116        )
117        timelike_precedence = (
118            exp.DataType.Type.TIMESTAMPLTZ,
119            exp.DataType.Type.TIMESTAMPTZ,
120            exp.DataType.Type.TIMESTAMP,
121            exp.DataType.Type.DATETIME,
122            exp.DataType.Type.DATE,
123        )
124
125        for type_precedence in (text_precedence, numeric_precedence, timelike_precedence):
126            coerces_to = set()
127            for data_type in type_precedence:
128                klass.COERCES_TO[data_type] = coerces_to.copy()
129                coerces_to |= {data_type}
130
131        # NULL can be coerced to any type, so e.g. NULL + 1 will have type INT
132        klass.COERCES_TO[exp.DataType.Type.NULL] = {
133            *text_precedence,
134            *numeric_precedence,
135            *timelike_precedence,
136        }
137
138        return klass
139
140
141class TypeAnnotator(metaclass=_TypeAnnotator):
142    NESTED_TYPES = {
143        exp.DataType.Type.ARRAY,
144    }
145
146    # Specifies what types a given type can be coerced into (autofilled)
147    COERCES_TO: t.Dict[exp.DataType.Type, t.Set[exp.DataType.Type]] = {}
148
149    # Coercion functions for binary operations.
150    # Map of type pairs to a callable that takes both sides of the binary operation and returns the resulting type.
151    BINARY_COERCIONS: BinaryCoercions = {
152        **swap_all(
153            {
154                (t, exp.DataType.Type.INTERVAL): lambda l, r: _coerce_date_literal(
155                    l, r.args.get("unit")
156                )
157                for t in exp.DataType.TEXT_TYPES
158            }
159        ),
160        **swap_all(
161            {
162                # text + numeric will yield the numeric type to match most dialects' semantics
163                (text, numeric): lambda l, r: t.cast(
164                    exp.DataType.Type, l.type if l.type in exp.DataType.NUMERIC_TYPES else r.type
165                )
166                for text in exp.DataType.TEXT_TYPES
167                for numeric in exp.DataType.NUMERIC_TYPES
168            }
169        ),
170        **swap_all(
171            {
172                (exp.DataType.Type.DATE, exp.DataType.Type.INTERVAL): lambda l, r: _coerce_date(
173                    l, r.args.get("unit")
174                ),
175            }
176        ),
177    }
178
179    def __init__(
180        self,
181        schema: Schema,
182        annotators: t.Optional[AnnotatorsType] = None,
183        coerces_to: t.Optional[t.Dict[exp.DataType.Type, t.Set[exp.DataType.Type]]] = None,
184        binary_coercions: t.Optional[BinaryCoercions] = None,
185    ) -> None:
186        self.schema = schema
187        self.annotators = annotators or Dialect.get_or_raise(schema.dialect).ANNOTATORS
188        self.coerces_to = (
189            coerces_to or Dialect.get_or_raise(schema.dialect).COERCES_TO or self.COERCES_TO
190        )
191        self.binary_coercions = binary_coercions or self.BINARY_COERCIONS
192
193        # Caches the ids of annotated sub-Expressions, to ensure we only visit them once
194        self._visited: t.Set[int] = set()
195
196        # Caches NULL-annotated expressions to set them to UNKNOWN after type inference is completed
197        self._null_expressions: t.Dict[int, exp.Expression] = {}
198
199        # Databricks and Spark ≥v3 actually support NULL (i.e., VOID) as a type
200        self._supports_null_type = schema.dialect in ("databricks", "spark")
201
202        # Maps an exp.SetOperation's id (e.g. UNION) to its projection types. This is computed if the
203        # exp.SetOperation is the expression of a scope source, as selecting from it multiple times
204        # would reprocess the entire subtree to coerce the types of its operands' projections
205        self._setop_column_types: t.Dict[int, t.Dict[str, exp.DataType | exp.DataType.Type]] = {}
206
207    def _set_type(
208        self, expression: exp.Expression, target_type: t.Optional[exp.DataType | exp.DataType.Type]
209    ) -> None:
210        prev_type = expression.type
211        expression_id = id(expression)
212
213        expression.type = target_type or exp.DataType.Type.UNKNOWN  # type: ignore
214        self._visited.add(expression_id)
215
216        if (
217            not self._supports_null_type
218            and t.cast(exp.DataType, expression.type).this == exp.DataType.Type.NULL
219        ):
220            self._null_expressions[expression_id] = expression
221        elif prev_type and t.cast(exp.DataType, prev_type).this == exp.DataType.Type.NULL:
222            self._null_expressions.pop(expression_id, None)
223
224    def annotate(self, expression: E) -> E:
225        for scope in traverse_scope(expression):
226            self.annotate_scope(scope)
227
228        # This takes care of non-traversable expressions
229        expression = self._maybe_annotate(expression)
230
231        # Replace NULL type with UNKNOWN, since the former is not an actual type;
232        # it is mostly used to aid type coercion, e.g. in query set operations.
233        for expr in self._null_expressions.values():
234            expr.type = exp.DataType.Type.UNKNOWN
235
236        return expression
237
238    def annotate_scope(self, scope: Scope) -> None:
239        selects = {}
240        for name, source in scope.sources.items():
241            if not isinstance(source, Scope):
242                continue
243
244            expression = source.expression
245            if isinstance(expression, exp.UDTF):
246                values = []
247
248                if isinstance(expression, exp.Lateral):
249                    if isinstance(expression.this, exp.Explode):
250                        values = [expression.this.this]
251                elif isinstance(expression, exp.Unnest):
252                    values = [expression]
253                elif not isinstance(expression, exp.TableFromRows):
254                    values = expression.expressions[0].expressions
255
256                if not values:
257                    continue
258
259                selects[name] = {
260                    alias: column.type
261                    for alias, column in zip(expression.alias_column_names, values)
262                }
263            elif isinstance(expression, exp.SetOperation) and len(expression.left.selects) == len(
264                expression.right.selects
265            ):
266                selects[name] = col_types = self._setop_column_types.setdefault(id(expression), {})
267
268                if not col_types:
269                    # Process a chain / sub-tree of set operations
270                    for set_op in expression.walk(
271                        prune=lambda n: not isinstance(n, (exp.SetOperation, exp.Subquery))
272                    ):
273                        if not isinstance(set_op, exp.SetOperation):
274                            continue
275
276                        if set_op.args.get("by_name"):
277                            r_type_by_select = {
278                                s.alias_or_name: s.type for s in set_op.right.selects
279                            }
280                            setop_cols = {
281                                s.alias_or_name: self._maybe_coerce(
282                                    t.cast(exp.DataType, s.type),
283                                    r_type_by_select.get(s.alias_or_name)
284                                    or exp.DataType.Type.UNKNOWN,
285                                )
286                                for s in set_op.left.selects
287                            }
288                        else:
289                            setop_cols = {
290                                ls.alias_or_name: self._maybe_coerce(
291                                    t.cast(exp.DataType, ls.type), t.cast(exp.DataType, rs.type)
292                                )
293                                for ls, rs in zip(set_op.left.selects, set_op.right.selects)
294                            }
295
296                        # Coerce intermediate results with the previously registered types, if they exist
297                        for col_name, col_type in setop_cols.items():
298                            col_types[col_name] = self._maybe_coerce(
299                                col_type, col_types.get(col_name, exp.DataType.Type.NULL)
300                            )
301
302            else:
303                selects[name] = {s.alias_or_name: s.type for s in expression.selects}
304
305        # First annotate the current scope's column references
306        for col in scope.columns:
307            if not col.table:
308                continue
309
310            source = scope.sources.get(col.table)
311            if isinstance(source, exp.Table):
312                self._set_type(col, self.schema.get_column_type(source, col))
313            elif source:
314                if col.table in selects and col.name in selects[col.table]:
315                    self._set_type(col, selects[col.table][col.name])
316                elif isinstance(source.expression, exp.Unnest):
317                    self._set_type(col, source.expression.type)
318
319        if isinstance(self.schema, MappingSchema):
320            for table_column in scope.table_columns:
321                source = scope.sources.get(table_column.name)
322
323                if isinstance(source, exp.Table):
324                    schema = self.schema.find(
325                        source, raise_on_missing=False, ensure_data_types=True
326                    )
327                    if not isinstance(schema, dict):
328                        continue
329
330                    struct_type = exp.DataType(
331                        this=exp.DataType.Type.STRUCT,
332                        expressions=[
333                            exp.ColumnDef(this=exp.to_identifier(c), kind=kind)
334                            for c, kind in schema.items()
335                        ],
336                        nested=True,
337                    )
338                    self._set_type(table_column, struct_type)
339                elif (
340                    isinstance(source, Scope)
341                    and isinstance(source.expression, exp.Query)
342                    and (
343                        source.expression.meta.get("query_type") or exp.DataType.build("UNKNOWN")
344                    ).is_type(exp.DataType.Type.STRUCT)
345                ):
346                    self._set_type(table_column, source.expression.meta["query_type"])
347
348        # Then (possibly) annotate the remaining expressions in the scope
349        self._maybe_annotate(scope.expression)
350
351        if self.schema.dialect == "bigquery" and isinstance(scope.expression, exp.Query):
352            struct_type = exp.DataType(
353                this=exp.DataType.Type.STRUCT,
354                expressions=[
355                    exp.ColumnDef(
356                        this=exp.to_identifier(select.output_name),
357                        kind=select.type.copy() if select.type else None,
358                    )
359                    for select in scope.expression.selects
360                ],
361                nested=True,
362            )
363
364            if not any(
365                cd.kind.is_type(exp.DataType.Type.UNKNOWN)
366                for cd in struct_type.expressions
367                if cd.kind
368            ):
369                # We don't use `_set_type` on purpose here. If we annotated the query directly, then
370                # using it in other contexts (e.g., ARRAY(<query>)) could result in incorrect type
371                # annotations, i.e., it shouldn't be interpreted as a STRUCT value.
372                scope.expression.meta["query_type"] = struct_type
373
374    def _maybe_annotate(self, expression: E) -> E:
375        if id(expression) in self._visited:
376            return expression  # We've already inferred the expression's type
377
378        annotator = self.annotators.get(expression.__class__)
379
380        return (
381            annotator(self, expression)
382            if annotator
383            else self._annotate_with_type(expression, exp.DataType.Type.UNKNOWN)
384        )
385
386    def _annotate_args(self, expression: E) -> E:
387        for value in expression.iter_expressions():
388            self._maybe_annotate(value)
389
390        return expression
391
392    def _maybe_coerce(
393        self,
394        type1: exp.DataType | exp.DataType.Type,
395        type2: exp.DataType | exp.DataType.Type,
396    ) -> exp.DataType | exp.DataType.Type:
397        """
398        Returns type2 if type1 can be coerced into it, otherwise type1.
399
400        If either type is parameterized (e.g. DECIMAL(18, 2) contains two parameters),
401        we assume type1 does not coerce into type2, so we also return it in this case.
402        """
403        if isinstance(type1, exp.DataType):
404            if type1.expressions:
405                return type1
406            type1_value = type1.this
407        else:
408            type1_value = type1
409
410        if isinstance(type2, exp.DataType):
411            if type2.expressions:
412                return type2
413            type2_value = type2.this
414        else:
415            type2_value = type2
416
417        # We propagate the UNKNOWN type upwards if found
418        if exp.DataType.Type.UNKNOWN in (type1_value, type2_value):
419            return exp.DataType.Type.UNKNOWN
420
421        return type2_value if type2_value in self.coerces_to.get(type1_value, {}) else type1_value
422
423    def _annotate_binary(self, expression: B) -> B:
424        self._annotate_args(expression)
425
426        left, right = expression.left, expression.right
427        left_type, right_type = left.type.this, right.type.this  # type: ignore
428
429        if isinstance(expression, (exp.Connector, exp.Predicate)):
430            self._set_type(expression, exp.DataType.Type.BOOLEAN)
431        elif (left_type, right_type) in self.binary_coercions:
432            self._set_type(expression, self.binary_coercions[(left_type, right_type)](left, right))
433        else:
434            self._set_type(expression, self._maybe_coerce(left_type, right_type))
435
436        return expression
437
438    def _annotate_unary(self, expression: E) -> E:
439        self._annotate_args(expression)
440
441        if isinstance(expression, exp.Not):
442            self._set_type(expression, exp.DataType.Type.BOOLEAN)
443        else:
444            self._set_type(expression, expression.this.type)
445
446        return expression
447
448    def _annotate_literal(self, expression: exp.Literal) -> exp.Literal:
449        if expression.is_string:
450            self._set_type(expression, exp.DataType.Type.VARCHAR)
451        elif expression.is_int:
452            self._set_type(expression, exp.DataType.Type.INT)
453        else:
454            self._set_type(expression, exp.DataType.Type.DOUBLE)
455
456        return expression
457
458    def _annotate_with_type(
459        self, expression: E, target_type: exp.DataType | exp.DataType.Type
460    ) -> E:
461        self._set_type(expression, target_type)
462        return self._annotate_args(expression)
463
464    @t.no_type_check
465    def _annotate_by_args(
466        self,
467        expression: E,
468        *args: str,
469        promote: bool = False,
470        array: bool = False,
471    ) -> E:
472        self._annotate_args(expression)
473
474        expressions: t.List[exp.Expression] = []
475        for arg in args:
476            arg_expr = expression.args.get(arg)
477            expressions.extend(expr for expr in ensure_list(arg_expr) if expr)
478
479        last_datatype = None
480        for expr in expressions:
481            expr_type = expr.type
482
483            # Stop at the first nested data type found - we don't want to _maybe_coerce nested types
484            if expr_type.args.get("nested"):
485                last_datatype = expr_type
486                break
487
488            if not expr_type.is_type(exp.DataType.Type.UNKNOWN):
489                last_datatype = self._maybe_coerce(last_datatype or expr_type, expr_type)
490
491        self._set_type(expression, last_datatype or exp.DataType.Type.UNKNOWN)
492
493        if promote:
494            if expression.type.this in exp.DataType.INTEGER_TYPES:
495                self._set_type(expression, exp.DataType.Type.BIGINT)
496            elif expression.type.this in exp.DataType.FLOAT_TYPES:
497                self._set_type(expression, exp.DataType.Type.DOUBLE)
498
499        if array:
500            self._set_type(
501                expression,
502                exp.DataType(
503                    this=exp.DataType.Type.ARRAY, expressions=[expression.type], nested=True
504                ),
505            )
506
507        return expression
508
509    def _annotate_timeunit(
510        self, expression: exp.TimeUnit | exp.DateTrunc
511    ) -> exp.TimeUnit | exp.DateTrunc:
512        self._annotate_args(expression)
513
514        if expression.this.type.this in exp.DataType.TEXT_TYPES:
515            datatype = _coerce_date_literal(expression.this, expression.unit)
516        elif expression.this.type.this in exp.DataType.TEMPORAL_TYPES:
517            datatype = _coerce_date(expression.this, expression.unit)
518        else:
519            datatype = exp.DataType.Type.UNKNOWN
520
521        self._set_type(expression, datatype)
522        return expression
523
524    def _annotate_bracket(self, expression: exp.Bracket) -> exp.Bracket:
525        self._annotate_args(expression)
526
527        bracket_arg = expression.expressions[0]
528        this = expression.this
529
530        if isinstance(bracket_arg, exp.Slice):
531            self._set_type(expression, this.type)
532        elif this.type.is_type(exp.DataType.Type.ARRAY):
533            self._set_type(expression, seq_get(this.type.expressions, 0))
534        elif isinstance(this, (exp.Map, exp.VarMap)) and bracket_arg in this.keys:
535            index = this.keys.index(bracket_arg)
536            value = seq_get(this.values, index)
537            self._set_type(expression, value.type if value else None)
538        else:
539            self._set_type(expression, exp.DataType.Type.UNKNOWN)
540
541        return expression
542
543    def _annotate_div(self, expression: exp.Div) -> exp.Div:
544        self._annotate_args(expression)
545
546        left_type, right_type = expression.left.type.this, expression.right.type.this  # type: ignore
547
548        if (
549            expression.args.get("typed")
550            and left_type in exp.DataType.INTEGER_TYPES
551            and right_type in exp.DataType.INTEGER_TYPES
552        ):
553            self._set_type(expression, exp.DataType.Type.BIGINT)
554        else:
555            self._set_type(expression, self._maybe_coerce(left_type, right_type))
556            if expression.type and expression.type.this not in exp.DataType.REAL_TYPES:
557                self._set_type(
558                    expression, self._maybe_coerce(expression.type, exp.DataType.Type.DOUBLE)
559                )
560
561        return expression
562
563    def _annotate_dot(self, expression: exp.Dot) -> exp.Dot:
564        self._annotate_args(expression)
565        self._set_type(expression, None)
566        this_type = expression.this.type
567
568        if this_type and this_type.is_type(exp.DataType.Type.STRUCT):
569            for e in this_type.expressions:
570                if e.name == expression.expression.name:
571                    self._set_type(expression, e.kind)
572                    break
573
574        return expression
575
576    def _annotate_explode(self, expression: exp.Explode) -> exp.Explode:
577        self._annotate_args(expression)
578        self._set_type(expression, seq_get(expression.this.type.expressions, 0))
579        return expression
580
581    def _annotate_unnest(self, expression: exp.Unnest) -> exp.Unnest:
582        self._annotate_args(expression)
583        child = seq_get(expression.expressions, 0)
584
585        if child and child.is_type(exp.DataType.Type.ARRAY):
586            expr_type = seq_get(child.type.expressions, 0)
587        else:
588            expr_type = None
589
590        self._set_type(expression, expr_type)
591        return expression
592
593    def _annotate_struct_value(
594        self, expression: exp.Expression
595    ) -> t.Optional[exp.DataType] | exp.ColumnDef:
596        alias = expression.args.get("alias")
597        if alias:
598            return exp.ColumnDef(this=alias.copy(), kind=expression.type)
599
600        # Case: key = value or key := value
601        if expression.expression:
602            return exp.ColumnDef(this=expression.this.copy(), kind=expression.expression.type)
603
604        return expression.type
605
606    def _annotate_struct(self, expression: exp.Struct) -> exp.Struct:
607        self._annotate_args(expression)
608        self._set_type(
609            expression,
610            exp.DataType(
611                this=exp.DataType.Type.STRUCT,
612                expressions=[self._annotate_struct_value(expr) for expr in expression.expressions],
613                nested=True,
614            ),
615        )
616        return expression
617
618    @t.overload
619    def _annotate_map(self, expression: exp.Map) -> exp.Map: ...
620
621    @t.overload
622    def _annotate_map(self, expression: exp.VarMap) -> exp.VarMap: ...
623
624    def _annotate_map(self, expression):
625        self._annotate_args(expression)
626
627        keys = expression.args.get("keys")
628        values = expression.args.get("values")
629
630        map_type = exp.DataType(this=exp.DataType.Type.MAP)
631        if isinstance(keys, exp.Array) and isinstance(values, exp.Array):
632            key_type = seq_get(keys.type.expressions, 0) or exp.DataType.Type.UNKNOWN
633            value_type = seq_get(values.type.expressions, 0) or exp.DataType.Type.UNKNOWN
634
635            if key_type != exp.DataType.Type.UNKNOWN and value_type != exp.DataType.Type.UNKNOWN:
636                map_type.set("expressions", [key_type, value_type])
637                map_type.set("nested", True)
638
639        self._set_type(expression, map_type)
640        return expression
641
642    def _annotate_to_map(self, expression: exp.ToMap) -> exp.ToMap:
643        self._annotate_args(expression)
644
645        map_type = exp.DataType(this=exp.DataType.Type.MAP)
646        arg = expression.this
647        if arg.is_type(exp.DataType.Type.STRUCT):
648            for coldef in arg.type.expressions:
649                kind = coldef.kind
650                if kind != exp.DataType.Type.UNKNOWN:
651                    map_type.set("expressions", [exp.DataType.build("varchar"), kind])
652                    map_type.set("nested", True)
653                    break
654
655        self._set_type(expression, map_type)
656        return expression
657
658    def _annotate_extract(self, expression: exp.Extract) -> exp.Extract:
659        self._annotate_args(expression)
660        part = expression.name
661        if part == "TIME":
662            self._set_type(expression, exp.DataType.Type.TIME)
663        elif part == "DATE":
664            self._set_type(expression, exp.DataType.Type.DATE)
665        else:
666            self._set_type(expression, exp.DataType.Type.INT)
667        return expression
668
669    def _annotate_by_array_element(self, expression: exp.Expression) -> exp.Expression:
670        self._annotate_args(expression)
671
672        array_arg = expression.this
673        if array_arg.type.is_type(exp.DataType.Type.ARRAY):
674            element_type = seq_get(array_arg.type.expressions, 0) or exp.DataType.Type.UNKNOWN
675            self._set_type(expression, element_type)
676        else:
677            self._set_type(expression, exp.DataType.Type.UNKNOWN)
678
679        return expression
def annotate_types( expression: ~E, schema: Union[Dict, sqlglot.schema.Schema, NoneType] = None, annotators: Optional[Dict[Type[~E], Callable[[TypeAnnotator, ~E], ~E]]] = None, coerces_to: Optional[Dict[sqlglot.expressions.DataType.Type, Set[sqlglot.expressions.DataType.Type]]] = None, dialect: Union[str, sqlglot.dialects.Dialect, Type[sqlglot.dialects.Dialect], NoneType] = None) -> ~E:
31def annotate_types(
32    expression: E,
33    schema: t.Optional[t.Dict | Schema] = None,
34    annotators: t.Optional[AnnotatorsType] = None,
35    coerces_to: t.Optional[t.Dict[exp.DataType.Type, t.Set[exp.DataType.Type]]] = None,
36    dialect: DialectType = None,
37) -> E:
38    """
39    Infers the types of an expression, annotating its AST accordingly.
40
41    Example:
42        >>> import sqlglot
43        >>> schema = {"y": {"cola": "SMALLINT"}}
44        >>> sql = "SELECT x.cola + 2.5 AS cola FROM (SELECT y.cola AS cola FROM y AS y) AS x"
45        >>> annotated_expr = annotate_types(sqlglot.parse_one(sql), schema=schema)
46        >>> annotated_expr.expressions[0].type.this  # Get the type of "x.cola + 2.5 AS cola"
47        <Type.DOUBLE: 'DOUBLE'>
48
49    Args:
50        expression: Expression to annotate.
51        schema: Database schema.
52        annotators: Maps expression type to corresponding annotation function.
53        coerces_to: Maps expression type to set of types that it can be coerced into.
54
55    Returns:
56        The expression annotated with types.
57    """
58
59    schema = ensure_schema(schema, dialect=dialect)
60
61    return TypeAnnotator(schema, annotators, coerces_to).annotate(expression)

Infers the types of an expression, annotating its AST accordingly.

Example:
>>> import sqlglot
>>> schema = {"y": {"cola": "SMALLINT"}}
>>> sql = "SELECT x.cola + 2.5 AS cola FROM (SELECT y.cola AS cola FROM y AS y) AS x"
>>> annotated_expr = annotate_types(sqlglot.parse_one(sql), schema=schema)
>>> annotated_expr.expressions[0].type.this  # Get the type of "x.cola + 2.5 AS cola"
<Type.DOUBLE: 'DOUBLE'>
Arguments:
  • expression: Expression to annotate.
  • schema: Database schema.
  • annotators: Maps expression type to corresponding annotation function.
  • coerces_to: Maps expression type to set of types that it can be coerced into.
Returns:

The expression annotated with types.

84def swap_args(func: BinaryCoercionFunc) -> BinaryCoercionFunc:
85    @functools.wraps(func)
86    def _swapped(l: exp.Expression, r: exp.Expression) -> exp.DataType.Type:
87        return func(r, l)
88
89    return _swapped
92def swap_all(coercions: BinaryCoercions) -> BinaryCoercions:
93    return {**coercions, **{(b, a): swap_args(func) for (a, b), func in coercions.items()}}
class TypeAnnotator:
142class TypeAnnotator(metaclass=_TypeAnnotator):
143    NESTED_TYPES = {
144        exp.DataType.Type.ARRAY,
145    }
146
147    # Specifies what types a given type can be coerced into (autofilled)
148    COERCES_TO: t.Dict[exp.DataType.Type, t.Set[exp.DataType.Type]] = {}
149
150    # Coercion functions for binary operations.
151    # Map of type pairs to a callable that takes both sides of the binary operation and returns the resulting type.
152    BINARY_COERCIONS: BinaryCoercions = {
153        **swap_all(
154            {
155                (t, exp.DataType.Type.INTERVAL): lambda l, r: _coerce_date_literal(
156                    l, r.args.get("unit")
157                )
158                for t in exp.DataType.TEXT_TYPES
159            }
160        ),
161        **swap_all(
162            {
163                # text + numeric will yield the numeric type to match most dialects' semantics
164                (text, numeric): lambda l, r: t.cast(
165                    exp.DataType.Type, l.type if l.type in exp.DataType.NUMERIC_TYPES else r.type
166                )
167                for text in exp.DataType.TEXT_TYPES
168                for numeric in exp.DataType.NUMERIC_TYPES
169            }
170        ),
171        **swap_all(
172            {
173                (exp.DataType.Type.DATE, exp.DataType.Type.INTERVAL): lambda l, r: _coerce_date(
174                    l, r.args.get("unit")
175                ),
176            }
177        ),
178    }
179
180    def __init__(
181        self,
182        schema: Schema,
183        annotators: t.Optional[AnnotatorsType] = None,
184        coerces_to: t.Optional[t.Dict[exp.DataType.Type, t.Set[exp.DataType.Type]]] = None,
185        binary_coercions: t.Optional[BinaryCoercions] = None,
186    ) -> None:
187        self.schema = schema
188        self.annotators = annotators or Dialect.get_or_raise(schema.dialect).ANNOTATORS
189        self.coerces_to = (
190            coerces_to or Dialect.get_or_raise(schema.dialect).COERCES_TO or self.COERCES_TO
191        )
192        self.binary_coercions = binary_coercions or self.BINARY_COERCIONS
193
194        # Caches the ids of annotated sub-Expressions, to ensure we only visit them once
195        self._visited: t.Set[int] = set()
196
197        # Caches NULL-annotated expressions to set them to UNKNOWN after type inference is completed
198        self._null_expressions: t.Dict[int, exp.Expression] = {}
199
200        # Databricks and Spark ≥v3 actually support NULL (i.e., VOID) as a type
201        self._supports_null_type = schema.dialect in ("databricks", "spark")
202
203        # Maps an exp.SetOperation's id (e.g. UNION) to its projection types. This is computed if the
204        # exp.SetOperation is the expression of a scope source, as selecting from it multiple times
205        # would reprocess the entire subtree to coerce the types of its operands' projections
206        self._setop_column_types: t.Dict[int, t.Dict[str, exp.DataType | exp.DataType.Type]] = {}
207
208    def _set_type(
209        self, expression: exp.Expression, target_type: t.Optional[exp.DataType | exp.DataType.Type]
210    ) -> None:
211        prev_type = expression.type
212        expression_id = id(expression)
213
214        expression.type = target_type or exp.DataType.Type.UNKNOWN  # type: ignore
215        self._visited.add(expression_id)
216
217        if (
218            not self._supports_null_type
219            and t.cast(exp.DataType, expression.type).this == exp.DataType.Type.NULL
220        ):
221            self._null_expressions[expression_id] = expression
222        elif prev_type and t.cast(exp.DataType, prev_type).this == exp.DataType.Type.NULL:
223            self._null_expressions.pop(expression_id, None)
224
225    def annotate(self, expression: E) -> E:
226        for scope in traverse_scope(expression):
227            self.annotate_scope(scope)
228
229        # This takes care of non-traversable expressions
230        expression = self._maybe_annotate(expression)
231
232        # Replace NULL type with UNKNOWN, since the former is not an actual type;
233        # it is mostly used to aid type coercion, e.g. in query set operations.
234        for expr in self._null_expressions.values():
235            expr.type = exp.DataType.Type.UNKNOWN
236
237        return expression
238
239    def annotate_scope(self, scope: Scope) -> None:
240        selects = {}
241        for name, source in scope.sources.items():
242            if not isinstance(source, Scope):
243                continue
244
245            expression = source.expression
246            if isinstance(expression, exp.UDTF):
247                values = []
248
249                if isinstance(expression, exp.Lateral):
250                    if isinstance(expression.this, exp.Explode):
251                        values = [expression.this.this]
252                elif isinstance(expression, exp.Unnest):
253                    values = [expression]
254                elif not isinstance(expression, exp.TableFromRows):
255                    values = expression.expressions[0].expressions
256
257                if not values:
258                    continue
259
260                selects[name] = {
261                    alias: column.type
262                    for alias, column in zip(expression.alias_column_names, values)
263                }
264            elif isinstance(expression, exp.SetOperation) and len(expression.left.selects) == len(
265                expression.right.selects
266            ):
267                selects[name] = col_types = self._setop_column_types.setdefault(id(expression), {})
268
269                if not col_types:
270                    # Process a chain / sub-tree of set operations
271                    for set_op in expression.walk(
272                        prune=lambda n: not isinstance(n, (exp.SetOperation, exp.Subquery))
273                    ):
274                        if not isinstance(set_op, exp.SetOperation):
275                            continue
276
277                        if set_op.args.get("by_name"):
278                            r_type_by_select = {
279                                s.alias_or_name: s.type for s in set_op.right.selects
280                            }
281                            setop_cols = {
282                                s.alias_or_name: self._maybe_coerce(
283                                    t.cast(exp.DataType, s.type),
284                                    r_type_by_select.get(s.alias_or_name)
285                                    or exp.DataType.Type.UNKNOWN,
286                                )
287                                for s in set_op.left.selects
288                            }
289                        else:
290                            setop_cols = {
291                                ls.alias_or_name: self._maybe_coerce(
292                                    t.cast(exp.DataType, ls.type), t.cast(exp.DataType, rs.type)
293                                )
294                                for ls, rs in zip(set_op.left.selects, set_op.right.selects)
295                            }
296
297                        # Coerce intermediate results with the previously registered types, if they exist
298                        for col_name, col_type in setop_cols.items():
299                            col_types[col_name] = self._maybe_coerce(
300                                col_type, col_types.get(col_name, exp.DataType.Type.NULL)
301                            )
302
303            else:
304                selects[name] = {s.alias_or_name: s.type for s in expression.selects}
305
306        # First annotate the current scope's column references
307        for col in scope.columns:
308            if not col.table:
309                continue
310
311            source = scope.sources.get(col.table)
312            if isinstance(source, exp.Table):
313                self._set_type(col, self.schema.get_column_type(source, col))
314            elif source:
315                if col.table in selects and col.name in selects[col.table]:
316                    self._set_type(col, selects[col.table][col.name])
317                elif isinstance(source.expression, exp.Unnest):
318                    self._set_type(col, source.expression.type)
319
320        if isinstance(self.schema, MappingSchema):
321            for table_column in scope.table_columns:
322                source = scope.sources.get(table_column.name)
323
324                if isinstance(source, exp.Table):
325                    schema = self.schema.find(
326                        source, raise_on_missing=False, ensure_data_types=True
327                    )
328                    if not isinstance(schema, dict):
329                        continue
330
331                    struct_type = exp.DataType(
332                        this=exp.DataType.Type.STRUCT,
333                        expressions=[
334                            exp.ColumnDef(this=exp.to_identifier(c), kind=kind)
335                            for c, kind in schema.items()
336                        ],
337                        nested=True,
338                    )
339                    self._set_type(table_column, struct_type)
340                elif (
341                    isinstance(source, Scope)
342                    and isinstance(source.expression, exp.Query)
343                    and (
344                        source.expression.meta.get("query_type") or exp.DataType.build("UNKNOWN")
345                    ).is_type(exp.DataType.Type.STRUCT)
346                ):
347                    self._set_type(table_column, source.expression.meta["query_type"])
348
349        # Then (possibly) annotate the remaining expressions in the scope
350        self._maybe_annotate(scope.expression)
351
352        if self.schema.dialect == "bigquery" and isinstance(scope.expression, exp.Query):
353            struct_type = exp.DataType(
354                this=exp.DataType.Type.STRUCT,
355                expressions=[
356                    exp.ColumnDef(
357                        this=exp.to_identifier(select.output_name),
358                        kind=select.type.copy() if select.type else None,
359                    )
360                    for select in scope.expression.selects
361                ],
362                nested=True,
363            )
364
365            if not any(
366                cd.kind.is_type(exp.DataType.Type.UNKNOWN)
367                for cd in struct_type.expressions
368                if cd.kind
369            ):
370                # We don't use `_set_type` on purpose here. If we annotated the query directly, then
371                # using it in other contexts (e.g., ARRAY(<query>)) could result in incorrect type
372                # annotations, i.e., it shouldn't be interpreted as a STRUCT value.
373                scope.expression.meta["query_type"] = struct_type
374
375    def _maybe_annotate(self, expression: E) -> E:
376        if id(expression) in self._visited:
377            return expression  # We've already inferred the expression's type
378
379        annotator = self.annotators.get(expression.__class__)
380
381        return (
382            annotator(self, expression)
383            if annotator
384            else self._annotate_with_type(expression, exp.DataType.Type.UNKNOWN)
385        )
386
387    def _annotate_args(self, expression: E) -> E:
388        for value in expression.iter_expressions():
389            self._maybe_annotate(value)
390
391        return expression
392
393    def _maybe_coerce(
394        self,
395        type1: exp.DataType | exp.DataType.Type,
396        type2: exp.DataType | exp.DataType.Type,
397    ) -> exp.DataType | exp.DataType.Type:
398        """
399        Returns type2 if type1 can be coerced into it, otherwise type1.
400
401        If either type is parameterized (e.g. DECIMAL(18, 2) contains two parameters),
402        we assume type1 does not coerce into type2, so we also return it in this case.
403        """
404        if isinstance(type1, exp.DataType):
405            if type1.expressions:
406                return type1
407            type1_value = type1.this
408        else:
409            type1_value = type1
410
411        if isinstance(type2, exp.DataType):
412            if type2.expressions:
413                return type2
414            type2_value = type2.this
415        else:
416            type2_value = type2
417
418        # We propagate the UNKNOWN type upwards if found
419        if exp.DataType.Type.UNKNOWN in (type1_value, type2_value):
420            return exp.DataType.Type.UNKNOWN
421
422        return type2_value if type2_value in self.coerces_to.get(type1_value, {}) else type1_value
423
424    def _annotate_binary(self, expression: B) -> B:
425        self._annotate_args(expression)
426
427        left, right = expression.left, expression.right
428        left_type, right_type = left.type.this, right.type.this  # type: ignore
429
430        if isinstance(expression, (exp.Connector, exp.Predicate)):
431            self._set_type(expression, exp.DataType.Type.BOOLEAN)
432        elif (left_type, right_type) in self.binary_coercions:
433            self._set_type(expression, self.binary_coercions[(left_type, right_type)](left, right))
434        else:
435            self._set_type(expression, self._maybe_coerce(left_type, right_type))
436
437        return expression
438
439    def _annotate_unary(self, expression: E) -> E:
440        self._annotate_args(expression)
441
442        if isinstance(expression, exp.Not):
443            self._set_type(expression, exp.DataType.Type.BOOLEAN)
444        else:
445            self._set_type(expression, expression.this.type)
446
447        return expression
448
449    def _annotate_literal(self, expression: exp.Literal) -> exp.Literal:
450        if expression.is_string:
451            self._set_type(expression, exp.DataType.Type.VARCHAR)
452        elif expression.is_int:
453            self._set_type(expression, exp.DataType.Type.INT)
454        else:
455            self._set_type(expression, exp.DataType.Type.DOUBLE)
456
457        return expression
458
459    def _annotate_with_type(
460        self, expression: E, target_type: exp.DataType | exp.DataType.Type
461    ) -> E:
462        self._set_type(expression, target_type)
463        return self._annotate_args(expression)
464
465    @t.no_type_check
466    def _annotate_by_args(
467        self,
468        expression: E,
469        *args: str,
470        promote: bool = False,
471        array: bool = False,
472    ) -> E:
473        self._annotate_args(expression)
474
475        expressions: t.List[exp.Expression] = []
476        for arg in args:
477            arg_expr = expression.args.get(arg)
478            expressions.extend(expr for expr in ensure_list(arg_expr) if expr)
479
480        last_datatype = None
481        for expr in expressions:
482            expr_type = expr.type
483
484            # Stop at the first nested data type found - we don't want to _maybe_coerce nested types
485            if expr_type.args.get("nested"):
486                last_datatype = expr_type
487                break
488
489            if not expr_type.is_type(exp.DataType.Type.UNKNOWN):
490                last_datatype = self._maybe_coerce(last_datatype or expr_type, expr_type)
491
492        self._set_type(expression, last_datatype or exp.DataType.Type.UNKNOWN)
493
494        if promote:
495            if expression.type.this in exp.DataType.INTEGER_TYPES:
496                self._set_type(expression, exp.DataType.Type.BIGINT)
497            elif expression.type.this in exp.DataType.FLOAT_TYPES:
498                self._set_type(expression, exp.DataType.Type.DOUBLE)
499
500        if array:
501            self._set_type(
502                expression,
503                exp.DataType(
504                    this=exp.DataType.Type.ARRAY, expressions=[expression.type], nested=True
505                ),
506            )
507
508        return expression
509
510    def _annotate_timeunit(
511        self, expression: exp.TimeUnit | exp.DateTrunc
512    ) -> exp.TimeUnit | exp.DateTrunc:
513        self._annotate_args(expression)
514
515        if expression.this.type.this in exp.DataType.TEXT_TYPES:
516            datatype = _coerce_date_literal(expression.this, expression.unit)
517        elif expression.this.type.this in exp.DataType.TEMPORAL_TYPES:
518            datatype = _coerce_date(expression.this, expression.unit)
519        else:
520            datatype = exp.DataType.Type.UNKNOWN
521
522        self._set_type(expression, datatype)
523        return expression
524
525    def _annotate_bracket(self, expression: exp.Bracket) -> exp.Bracket:
526        self._annotate_args(expression)
527
528        bracket_arg = expression.expressions[0]
529        this = expression.this
530
531        if isinstance(bracket_arg, exp.Slice):
532            self._set_type(expression, this.type)
533        elif this.type.is_type(exp.DataType.Type.ARRAY):
534            self._set_type(expression, seq_get(this.type.expressions, 0))
535        elif isinstance(this, (exp.Map, exp.VarMap)) and bracket_arg in this.keys:
536            index = this.keys.index(bracket_arg)
537            value = seq_get(this.values, index)
538            self._set_type(expression, value.type if value else None)
539        else:
540            self._set_type(expression, exp.DataType.Type.UNKNOWN)
541
542        return expression
543
544    def _annotate_div(self, expression: exp.Div) -> exp.Div:
545        self._annotate_args(expression)
546
547        left_type, right_type = expression.left.type.this, expression.right.type.this  # type: ignore
548
549        if (
550            expression.args.get("typed")
551            and left_type in exp.DataType.INTEGER_TYPES
552            and right_type in exp.DataType.INTEGER_TYPES
553        ):
554            self._set_type(expression, exp.DataType.Type.BIGINT)
555        else:
556            self._set_type(expression, self._maybe_coerce(left_type, right_type))
557            if expression.type and expression.type.this not in exp.DataType.REAL_TYPES:
558                self._set_type(
559                    expression, self._maybe_coerce(expression.type, exp.DataType.Type.DOUBLE)
560                )
561
562        return expression
563
564    def _annotate_dot(self, expression: exp.Dot) -> exp.Dot:
565        self._annotate_args(expression)
566        self._set_type(expression, None)
567        this_type = expression.this.type
568
569        if this_type and this_type.is_type(exp.DataType.Type.STRUCT):
570            for e in this_type.expressions:
571                if e.name == expression.expression.name:
572                    self._set_type(expression, e.kind)
573                    break
574
575        return expression
576
577    def _annotate_explode(self, expression: exp.Explode) -> exp.Explode:
578        self._annotate_args(expression)
579        self._set_type(expression, seq_get(expression.this.type.expressions, 0))
580        return expression
581
582    def _annotate_unnest(self, expression: exp.Unnest) -> exp.Unnest:
583        self._annotate_args(expression)
584        child = seq_get(expression.expressions, 0)
585
586        if child and child.is_type(exp.DataType.Type.ARRAY):
587            expr_type = seq_get(child.type.expressions, 0)
588        else:
589            expr_type = None
590
591        self._set_type(expression, expr_type)
592        return expression
593
594    def _annotate_struct_value(
595        self, expression: exp.Expression
596    ) -> t.Optional[exp.DataType] | exp.ColumnDef:
597        alias = expression.args.get("alias")
598        if alias:
599            return exp.ColumnDef(this=alias.copy(), kind=expression.type)
600
601        # Case: key = value or key := value
602        if expression.expression:
603            return exp.ColumnDef(this=expression.this.copy(), kind=expression.expression.type)
604
605        return expression.type
606
607    def _annotate_struct(self, expression: exp.Struct) -> exp.Struct:
608        self._annotate_args(expression)
609        self._set_type(
610            expression,
611            exp.DataType(
612                this=exp.DataType.Type.STRUCT,
613                expressions=[self._annotate_struct_value(expr) for expr in expression.expressions],
614                nested=True,
615            ),
616        )
617        return expression
618
619    @t.overload
620    def _annotate_map(self, expression: exp.Map) -> exp.Map: ...
621
622    @t.overload
623    def _annotate_map(self, expression: exp.VarMap) -> exp.VarMap: ...
624
625    def _annotate_map(self, expression):
626        self._annotate_args(expression)
627
628        keys = expression.args.get("keys")
629        values = expression.args.get("values")
630
631        map_type = exp.DataType(this=exp.DataType.Type.MAP)
632        if isinstance(keys, exp.Array) and isinstance(values, exp.Array):
633            key_type = seq_get(keys.type.expressions, 0) or exp.DataType.Type.UNKNOWN
634            value_type = seq_get(values.type.expressions, 0) or exp.DataType.Type.UNKNOWN
635
636            if key_type != exp.DataType.Type.UNKNOWN and value_type != exp.DataType.Type.UNKNOWN:
637                map_type.set("expressions", [key_type, value_type])
638                map_type.set("nested", True)
639
640        self._set_type(expression, map_type)
641        return expression
642
643    def _annotate_to_map(self, expression: exp.ToMap) -> exp.ToMap:
644        self._annotate_args(expression)
645
646        map_type = exp.DataType(this=exp.DataType.Type.MAP)
647        arg = expression.this
648        if arg.is_type(exp.DataType.Type.STRUCT):
649            for coldef in arg.type.expressions:
650                kind = coldef.kind
651                if kind != exp.DataType.Type.UNKNOWN:
652                    map_type.set("expressions", [exp.DataType.build("varchar"), kind])
653                    map_type.set("nested", True)
654                    break
655
656        self._set_type(expression, map_type)
657        return expression
658
659    def _annotate_extract(self, expression: exp.Extract) -> exp.Extract:
660        self._annotate_args(expression)
661        part = expression.name
662        if part == "TIME":
663            self._set_type(expression, exp.DataType.Type.TIME)
664        elif part == "DATE":
665            self._set_type(expression, exp.DataType.Type.DATE)
666        else:
667            self._set_type(expression, exp.DataType.Type.INT)
668        return expression
669
670    def _annotate_by_array_element(self, expression: exp.Expression) -> exp.Expression:
671        self._annotate_args(expression)
672
673        array_arg = expression.this
674        if array_arg.type.is_type(exp.DataType.Type.ARRAY):
675            element_type = seq_get(array_arg.type.expressions, 0) or exp.DataType.Type.UNKNOWN
676            self._set_type(expression, element_type)
677        else:
678            self._set_type(expression, exp.DataType.Type.UNKNOWN)
679
680        return expression
TypeAnnotator( schema: sqlglot.schema.Schema, annotators: Optional[Dict[Type[~E], Callable[[TypeAnnotator, ~E], ~E]]] = None, coerces_to: Optional[Dict[sqlglot.expressions.DataType.Type, Set[sqlglot.expressions.DataType.Type]]] = None, binary_coercions: Optional[Dict[Tuple[sqlglot.expressions.DataType.Type, sqlglot.expressions.DataType.Type], Callable[[sqlglot.expressions.Expression, sqlglot.expressions.Expression], sqlglot.expressions.DataType.Type]]] = None)
180    def __init__(
181        self,
182        schema: Schema,
183        annotators: t.Optional[AnnotatorsType] = None,
184        coerces_to: t.Optional[t.Dict[exp.DataType.Type, t.Set[exp.DataType.Type]]] = None,
185        binary_coercions: t.Optional[BinaryCoercions] = None,
186    ) -> None:
187        self.schema = schema
188        self.annotators = annotators or Dialect.get_or_raise(schema.dialect).ANNOTATORS
189        self.coerces_to = (
190            coerces_to or Dialect.get_or_raise(schema.dialect).COERCES_TO or self.COERCES_TO
191        )
192        self.binary_coercions = binary_coercions or self.BINARY_COERCIONS
193
194        # Caches the ids of annotated sub-Expressions, to ensure we only visit them once
195        self._visited: t.Set[int] = set()
196
197        # Caches NULL-annotated expressions to set them to UNKNOWN after type inference is completed
198        self._null_expressions: t.Dict[int, exp.Expression] = {}
199
200        # Databricks and Spark ≥v3 actually support NULL (i.e., VOID) as a type
201        self._supports_null_type = schema.dialect in ("databricks", "spark")
202
203        # Maps an exp.SetOperation's id (e.g. UNION) to its projection types. This is computed if the
204        # exp.SetOperation is the expression of a scope source, as selecting from it multiple times
205        # would reprocess the entire subtree to coerce the types of its operands' projections
206        self._setop_column_types: t.Dict[int, t.Dict[str, exp.DataType | exp.DataType.Type]] = {}
NESTED_TYPES = {<Type.ARRAY: 'ARRAY'>}
COERCES_TO: Dict[sqlglot.expressions.DataType.Type, Set[sqlglot.expressions.DataType.Type]] = {<Type.TEXT: 'TEXT'>: set(), <Type.NVARCHAR: 'NVARCHAR'>: {<Type.TEXT: 'TEXT'>}, <Type.VARCHAR: 'VARCHAR'>: {<Type.NVARCHAR: 'NVARCHAR'>, <Type.TEXT: 'TEXT'>}, <Type.NCHAR: 'NCHAR'>: {<Type.NVARCHAR: 'NVARCHAR'>, <Type.TEXT: 'TEXT'>, <Type.VARCHAR: 'VARCHAR'>}, <Type.CHAR: 'CHAR'>: {<Type.NVARCHAR: 'NVARCHAR'>, <Type.TEXT: 'TEXT'>, <Type.VARCHAR: 'VARCHAR'>, <Type.NCHAR: 'NCHAR'>}, <Type.DOUBLE: 'DOUBLE'>: set(), <Type.FLOAT: 'FLOAT'>: {<Type.DOUBLE: 'DOUBLE'>}, <Type.DECIMAL: 'DECIMAL'>: {<Type.BIGDECIMAL: 'BIGDECIMAL'>, <Type.FLOAT: 'FLOAT'>, <Type.DOUBLE: 'DOUBLE'>}, <Type.BIGINT: 'BIGINT'>: {<Type.BIGDECIMAL: 'BIGDECIMAL'>, <Type.DECIMAL: 'DECIMAL'>, <Type.FLOAT: 'FLOAT'>, <Type.DOUBLE: 'DOUBLE'>}, <Type.INT: 'INT'>: {<Type.BIGINT: 'BIGINT'>, <Type.DECIMAL: 'DECIMAL'>, <Type.FLOAT: 'FLOAT'>, <Type.DOUBLE: 'DOUBLE'>}, <Type.SMALLINT: 'SMALLINT'>: {<Type.INT: 'INT'>, <Type.BIGINT: 'BIGINT'>, <Type.FLOAT: 'FLOAT'>, <Type.DECIMAL: 'DECIMAL'>, <Type.DOUBLE: 'DOUBLE'>}, <Type.TINYINT: 'TINYINT'>: {<Type.INT: 'INT'>, <Type.BIGINT: 'BIGINT'>, <Type.FLOAT: 'FLOAT'>, <Type.DECIMAL: 'DECIMAL'>, <Type.SMALLINT: 'SMALLINT'>, <Type.DOUBLE: 'DOUBLE'>}, <Type.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>: set(), <Type.TIMESTAMPTZ: 'TIMESTAMPTZ'>: {<Type.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>}, <Type.TIMESTAMP: 'TIMESTAMP'>: {<Type.TIMESTAMPTZ: 'TIMESTAMPTZ'>, <Type.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>}, <Type.DATETIME: 'DATETIME'>: {<Type.TIMESTAMPTZ: 'TIMESTAMPTZ'>, <Type.TIMESTAMP: 'TIMESTAMP'>, <Type.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>}, <Type.DATE: 'DATE'>: {<Type.TIMESTAMPTZ: 'TIMESTAMPTZ'>, <Type.TIMESTAMP: 'TIMESTAMP'>, <Type.DATETIME: 'DATETIME'>, <Type.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>}, <Type.NULL: 'NULL'>: {<Type.DATETIME: 'DATETIME'>, <Type.DATE: 'DATE'>, <Type.NCHAR: 'NCHAR'>, <Type.DECIMAL: 'DECIMAL'>, <Type.SMALLINT: 'SMALLINT'>, <Type.INT: 'INT'>, <Type.CHAR: 'CHAR'>, <Type.TEXT: 'TEXT'>, <Type.BIGINT: 'BIGINT'>, <Type.TINYINT: 'TINYINT'>, <Type.VARCHAR: 'VARCHAR'>, <Type.FLOAT: 'FLOAT'>, <Type.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>, <Type.TIMESTAMPTZ: 'TIMESTAMPTZ'>, <Type.TIMESTAMP: 'TIMESTAMP'>, <Type.NVARCHAR: 'NVARCHAR'>, <Type.DOUBLE: 'DOUBLE'>}}
BINARY_COERCIONS: Dict[Tuple[sqlglot.expressions.DataType.Type, sqlglot.expressions.DataType.Type], Callable[[sqlglot.expressions.Expression, sqlglot.expressions.Expression], sqlglot.expressions.DataType.Type]] = {(<Type.NCHAR: 'NCHAR'>, <Type.INTERVAL: 'INTERVAL'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NAME: 'NAME'>, <Type.INTERVAL: 'INTERVAL'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.CHAR: 'CHAR'>, <Type.INTERVAL: 'INTERVAL'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.TEXT: 'TEXT'>, <Type.INTERVAL: 'INTERVAL'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.VARCHAR: 'VARCHAR'>, <Type.INTERVAL: 'INTERVAL'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NVARCHAR: 'NVARCHAR'>, <Type.INTERVAL: 'INTERVAL'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.INTERVAL: 'INTERVAL'>, <Type.NCHAR: 'NCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.INTERVAL: 'INTERVAL'>, <Type.NAME: 'NAME'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.INTERVAL: 'INTERVAL'>, <Type.CHAR: 'CHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.INTERVAL: 'INTERVAL'>, <Type.TEXT: 'TEXT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.INTERVAL: 'INTERVAL'>, <Type.VARCHAR: 'VARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.INTERVAL: 'INTERVAL'>, <Type.NVARCHAR: 'NVARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NCHAR: 'NCHAR'>, <Type.UINT128: 'UINT128'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NCHAR: 'NCHAR'>, <Type.UDOUBLE: 'UDOUBLE'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NCHAR: 'NCHAR'>, <Type.DECIMAL: 'DECIMAL'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NCHAR: 'NCHAR'>, <Type.BIGDECIMAL: 'BIGDECIMAL'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NCHAR: 'NCHAR'>, <Type.SMALLINT: 'SMALLINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NCHAR: 'NCHAR'>, <Type.UDECIMAL: 'UDECIMAL'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NCHAR: 'NCHAR'>, <Type.INT: 'INT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NCHAR: 'NCHAR'>, <Type.BIGINT: 'BIGINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NCHAR: 'NCHAR'>, <Type.TINYINT: 'TINYINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NCHAR: 'NCHAR'>, <Type.DECIMAL64: 'DECIMAL64'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NCHAR: 'NCHAR'>, <Type.INT256: 'INT256'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NCHAR: 'NCHAR'>, <Type.UTINYINT: 'UTINYINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NCHAR: 'NCHAR'>, <Type.FLOAT: 'FLOAT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NCHAR: 'NCHAR'>, <Type.INT128: 'INT128'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NCHAR: 'NCHAR'>, <Type.USMALLINT: 'USMALLINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NCHAR: 'NCHAR'>, <Type.UINT256: 'UINT256'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NCHAR: 'NCHAR'>, <Type.MEDIUMINT: 'MEDIUMINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NCHAR: 'NCHAR'>, <Type.DECIMAL128: 'DECIMAL128'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NCHAR: 'NCHAR'>, <Type.MONEY: 'MONEY'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NCHAR: 'NCHAR'>, <Type.DOUBLE: 'DOUBLE'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NCHAR: 'NCHAR'>, <Type.UBIGINT: 'UBIGINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NCHAR: 'NCHAR'>, <Type.DECIMAL32: 'DECIMAL32'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NCHAR: 'NCHAR'>, <Type.SMALLMONEY: 'SMALLMONEY'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NCHAR: 'NCHAR'>, <Type.UINT: 'UINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NCHAR: 'NCHAR'>, <Type.BIT: 'BIT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NCHAR: 'NCHAR'>, <Type.DECIMAL256: 'DECIMAL256'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NCHAR: 'NCHAR'>, <Type.UMEDIUMINT: 'UMEDIUMINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NAME: 'NAME'>, <Type.UINT128: 'UINT128'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NAME: 'NAME'>, <Type.UDOUBLE: 'UDOUBLE'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NAME: 'NAME'>, <Type.DECIMAL: 'DECIMAL'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NAME: 'NAME'>, <Type.BIGDECIMAL: 'BIGDECIMAL'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NAME: 'NAME'>, <Type.SMALLINT: 'SMALLINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NAME: 'NAME'>, <Type.UDECIMAL: 'UDECIMAL'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NAME: 'NAME'>, <Type.INT: 'INT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NAME: 'NAME'>, <Type.BIGINT: 'BIGINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NAME: 'NAME'>, <Type.TINYINT: 'TINYINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NAME: 'NAME'>, <Type.DECIMAL64: 'DECIMAL64'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NAME: 'NAME'>, <Type.INT256: 'INT256'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NAME: 'NAME'>, <Type.UTINYINT: 'UTINYINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NAME: 'NAME'>, <Type.FLOAT: 'FLOAT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NAME: 'NAME'>, <Type.INT128: 'INT128'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NAME: 'NAME'>, <Type.USMALLINT: 'USMALLINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NAME: 'NAME'>, <Type.UINT256: 'UINT256'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NAME: 'NAME'>, <Type.MEDIUMINT: 'MEDIUMINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NAME: 'NAME'>, <Type.DECIMAL128: 'DECIMAL128'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NAME: 'NAME'>, <Type.MONEY: 'MONEY'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NAME: 'NAME'>, <Type.DOUBLE: 'DOUBLE'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NAME: 'NAME'>, <Type.UBIGINT: 'UBIGINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NAME: 'NAME'>, <Type.DECIMAL32: 'DECIMAL32'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NAME: 'NAME'>, <Type.SMALLMONEY: 'SMALLMONEY'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NAME: 'NAME'>, <Type.UINT: 'UINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NAME: 'NAME'>, <Type.BIT: 'BIT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NAME: 'NAME'>, <Type.DECIMAL256: 'DECIMAL256'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NAME: 'NAME'>, <Type.UMEDIUMINT: 'UMEDIUMINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.CHAR: 'CHAR'>, <Type.UINT128: 'UINT128'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.CHAR: 'CHAR'>, <Type.UDOUBLE: 'UDOUBLE'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.CHAR: 'CHAR'>, <Type.DECIMAL: 'DECIMAL'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.CHAR: 'CHAR'>, <Type.BIGDECIMAL: 'BIGDECIMAL'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.CHAR: 'CHAR'>, <Type.SMALLINT: 'SMALLINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.CHAR: 'CHAR'>, <Type.UDECIMAL: 'UDECIMAL'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.CHAR: 'CHAR'>, <Type.INT: 'INT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.CHAR: 'CHAR'>, <Type.BIGINT: 'BIGINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.CHAR: 'CHAR'>, <Type.TINYINT: 'TINYINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.CHAR: 'CHAR'>, <Type.DECIMAL64: 'DECIMAL64'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.CHAR: 'CHAR'>, <Type.INT256: 'INT256'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.CHAR: 'CHAR'>, <Type.UTINYINT: 'UTINYINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.CHAR: 'CHAR'>, <Type.FLOAT: 'FLOAT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.CHAR: 'CHAR'>, <Type.INT128: 'INT128'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.CHAR: 'CHAR'>, <Type.USMALLINT: 'USMALLINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.CHAR: 'CHAR'>, <Type.UINT256: 'UINT256'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.CHAR: 'CHAR'>, <Type.MEDIUMINT: 'MEDIUMINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.CHAR: 'CHAR'>, <Type.DECIMAL128: 'DECIMAL128'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.CHAR: 'CHAR'>, <Type.MONEY: 'MONEY'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.CHAR: 'CHAR'>, <Type.DOUBLE: 'DOUBLE'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.CHAR: 'CHAR'>, <Type.UBIGINT: 'UBIGINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.CHAR: 'CHAR'>, <Type.DECIMAL32: 'DECIMAL32'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.CHAR: 'CHAR'>, <Type.SMALLMONEY: 'SMALLMONEY'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.CHAR: 'CHAR'>, <Type.UINT: 'UINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.CHAR: 'CHAR'>, <Type.BIT: 'BIT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.CHAR: 'CHAR'>, <Type.DECIMAL256: 'DECIMAL256'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.CHAR: 'CHAR'>, <Type.UMEDIUMINT: 'UMEDIUMINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.TEXT: 'TEXT'>, <Type.UINT128: 'UINT128'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.TEXT: 'TEXT'>, <Type.UDOUBLE: 'UDOUBLE'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.TEXT: 'TEXT'>, <Type.DECIMAL: 'DECIMAL'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.TEXT: 'TEXT'>, <Type.BIGDECIMAL: 'BIGDECIMAL'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.TEXT: 'TEXT'>, <Type.SMALLINT: 'SMALLINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.TEXT: 'TEXT'>, <Type.UDECIMAL: 'UDECIMAL'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.TEXT: 'TEXT'>, <Type.INT: 'INT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.TEXT: 'TEXT'>, <Type.BIGINT: 'BIGINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.TEXT: 'TEXT'>, <Type.TINYINT: 'TINYINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.TEXT: 'TEXT'>, <Type.DECIMAL64: 'DECIMAL64'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.TEXT: 'TEXT'>, <Type.INT256: 'INT256'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.TEXT: 'TEXT'>, <Type.UTINYINT: 'UTINYINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.TEXT: 'TEXT'>, <Type.FLOAT: 'FLOAT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.TEXT: 'TEXT'>, <Type.INT128: 'INT128'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.TEXT: 'TEXT'>, <Type.USMALLINT: 'USMALLINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.TEXT: 'TEXT'>, <Type.UINT256: 'UINT256'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.TEXT: 'TEXT'>, <Type.MEDIUMINT: 'MEDIUMINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.TEXT: 'TEXT'>, <Type.DECIMAL128: 'DECIMAL128'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.TEXT: 'TEXT'>, <Type.MONEY: 'MONEY'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.TEXT: 'TEXT'>, <Type.DOUBLE: 'DOUBLE'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.TEXT: 'TEXT'>, <Type.UBIGINT: 'UBIGINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.TEXT: 'TEXT'>, <Type.DECIMAL32: 'DECIMAL32'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.TEXT: 'TEXT'>, <Type.SMALLMONEY: 'SMALLMONEY'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.TEXT: 'TEXT'>, <Type.UINT: 'UINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.TEXT: 'TEXT'>, <Type.BIT: 'BIT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.TEXT: 'TEXT'>, <Type.DECIMAL256: 'DECIMAL256'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.TEXT: 'TEXT'>, <Type.UMEDIUMINT: 'UMEDIUMINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.VARCHAR: 'VARCHAR'>, <Type.UINT128: 'UINT128'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.VARCHAR: 'VARCHAR'>, <Type.UDOUBLE: 'UDOUBLE'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.VARCHAR: 'VARCHAR'>, <Type.DECIMAL: 'DECIMAL'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.VARCHAR: 'VARCHAR'>, <Type.BIGDECIMAL: 'BIGDECIMAL'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.VARCHAR: 'VARCHAR'>, <Type.SMALLINT: 'SMALLINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.VARCHAR: 'VARCHAR'>, <Type.UDECIMAL: 'UDECIMAL'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.VARCHAR: 'VARCHAR'>, <Type.INT: 'INT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.VARCHAR: 'VARCHAR'>, <Type.BIGINT: 'BIGINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.VARCHAR: 'VARCHAR'>, <Type.TINYINT: 'TINYINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.VARCHAR: 'VARCHAR'>, <Type.DECIMAL64: 'DECIMAL64'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.VARCHAR: 'VARCHAR'>, <Type.INT256: 'INT256'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.VARCHAR: 'VARCHAR'>, <Type.UTINYINT: 'UTINYINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.VARCHAR: 'VARCHAR'>, <Type.FLOAT: 'FLOAT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.VARCHAR: 'VARCHAR'>, <Type.INT128: 'INT128'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.VARCHAR: 'VARCHAR'>, <Type.USMALLINT: 'USMALLINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.VARCHAR: 'VARCHAR'>, <Type.UINT256: 'UINT256'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.VARCHAR: 'VARCHAR'>, <Type.MEDIUMINT: 'MEDIUMINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.VARCHAR: 'VARCHAR'>, <Type.DECIMAL128: 'DECIMAL128'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.VARCHAR: 'VARCHAR'>, <Type.MONEY: 'MONEY'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.VARCHAR: 'VARCHAR'>, <Type.DOUBLE: 'DOUBLE'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.VARCHAR: 'VARCHAR'>, <Type.UBIGINT: 'UBIGINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.VARCHAR: 'VARCHAR'>, <Type.DECIMAL32: 'DECIMAL32'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.VARCHAR: 'VARCHAR'>, <Type.SMALLMONEY: 'SMALLMONEY'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.VARCHAR: 'VARCHAR'>, <Type.UINT: 'UINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.VARCHAR: 'VARCHAR'>, <Type.BIT: 'BIT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.VARCHAR: 'VARCHAR'>, <Type.DECIMAL256: 'DECIMAL256'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.VARCHAR: 'VARCHAR'>, <Type.UMEDIUMINT: 'UMEDIUMINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NVARCHAR: 'NVARCHAR'>, <Type.UINT128: 'UINT128'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NVARCHAR: 'NVARCHAR'>, <Type.UDOUBLE: 'UDOUBLE'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NVARCHAR: 'NVARCHAR'>, <Type.DECIMAL: 'DECIMAL'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NVARCHAR: 'NVARCHAR'>, <Type.BIGDECIMAL: 'BIGDECIMAL'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NVARCHAR: 'NVARCHAR'>, <Type.SMALLINT: 'SMALLINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NVARCHAR: 'NVARCHAR'>, <Type.UDECIMAL: 'UDECIMAL'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NVARCHAR: 'NVARCHAR'>, <Type.INT: 'INT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NVARCHAR: 'NVARCHAR'>, <Type.BIGINT: 'BIGINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NVARCHAR: 'NVARCHAR'>, <Type.TINYINT: 'TINYINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NVARCHAR: 'NVARCHAR'>, <Type.DECIMAL64: 'DECIMAL64'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NVARCHAR: 'NVARCHAR'>, <Type.INT256: 'INT256'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NVARCHAR: 'NVARCHAR'>, <Type.UTINYINT: 'UTINYINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NVARCHAR: 'NVARCHAR'>, <Type.FLOAT: 'FLOAT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NVARCHAR: 'NVARCHAR'>, <Type.INT128: 'INT128'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NVARCHAR: 'NVARCHAR'>, <Type.USMALLINT: 'USMALLINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NVARCHAR: 'NVARCHAR'>, <Type.UINT256: 'UINT256'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NVARCHAR: 'NVARCHAR'>, <Type.MEDIUMINT: 'MEDIUMINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NVARCHAR: 'NVARCHAR'>, <Type.DECIMAL128: 'DECIMAL128'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NVARCHAR: 'NVARCHAR'>, <Type.MONEY: 'MONEY'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NVARCHAR: 'NVARCHAR'>, <Type.DOUBLE: 'DOUBLE'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NVARCHAR: 'NVARCHAR'>, <Type.UBIGINT: 'UBIGINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NVARCHAR: 'NVARCHAR'>, <Type.DECIMAL32: 'DECIMAL32'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NVARCHAR: 'NVARCHAR'>, <Type.SMALLMONEY: 'SMALLMONEY'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NVARCHAR: 'NVARCHAR'>, <Type.UINT: 'UINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NVARCHAR: 'NVARCHAR'>, <Type.BIT: 'BIT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NVARCHAR: 'NVARCHAR'>, <Type.DECIMAL256: 'DECIMAL256'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.NVARCHAR: 'NVARCHAR'>, <Type.UMEDIUMINT: 'UMEDIUMINT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UINT128: 'UINT128'>, <Type.NCHAR: 'NCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UDOUBLE: 'UDOUBLE'>, <Type.NCHAR: 'NCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.DECIMAL: 'DECIMAL'>, <Type.NCHAR: 'NCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.BIGDECIMAL: 'BIGDECIMAL'>, <Type.NCHAR: 'NCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.SMALLINT: 'SMALLINT'>, <Type.NCHAR: 'NCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UDECIMAL: 'UDECIMAL'>, <Type.NCHAR: 'NCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.INT: 'INT'>, <Type.NCHAR: 'NCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.BIGINT: 'BIGINT'>, <Type.NCHAR: 'NCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.TINYINT: 'TINYINT'>, <Type.NCHAR: 'NCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.DECIMAL64: 'DECIMAL64'>, <Type.NCHAR: 'NCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.INT256: 'INT256'>, <Type.NCHAR: 'NCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UTINYINT: 'UTINYINT'>, <Type.NCHAR: 'NCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.FLOAT: 'FLOAT'>, <Type.NCHAR: 'NCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.INT128: 'INT128'>, <Type.NCHAR: 'NCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.USMALLINT: 'USMALLINT'>, <Type.NCHAR: 'NCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UINT256: 'UINT256'>, <Type.NCHAR: 'NCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.MEDIUMINT: 'MEDIUMINT'>, <Type.NCHAR: 'NCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.DECIMAL128: 'DECIMAL128'>, <Type.NCHAR: 'NCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.MONEY: 'MONEY'>, <Type.NCHAR: 'NCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.DOUBLE: 'DOUBLE'>, <Type.NCHAR: 'NCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UBIGINT: 'UBIGINT'>, <Type.NCHAR: 'NCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.DECIMAL32: 'DECIMAL32'>, <Type.NCHAR: 'NCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.SMALLMONEY: 'SMALLMONEY'>, <Type.NCHAR: 'NCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UINT: 'UINT'>, <Type.NCHAR: 'NCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.BIT: 'BIT'>, <Type.NCHAR: 'NCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.DECIMAL256: 'DECIMAL256'>, <Type.NCHAR: 'NCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UMEDIUMINT: 'UMEDIUMINT'>, <Type.NCHAR: 'NCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UINT128: 'UINT128'>, <Type.NAME: 'NAME'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UDOUBLE: 'UDOUBLE'>, <Type.NAME: 'NAME'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.DECIMAL: 'DECIMAL'>, <Type.NAME: 'NAME'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.BIGDECIMAL: 'BIGDECIMAL'>, <Type.NAME: 'NAME'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.SMALLINT: 'SMALLINT'>, <Type.NAME: 'NAME'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UDECIMAL: 'UDECIMAL'>, <Type.NAME: 'NAME'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.INT: 'INT'>, <Type.NAME: 'NAME'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.BIGINT: 'BIGINT'>, <Type.NAME: 'NAME'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.TINYINT: 'TINYINT'>, <Type.NAME: 'NAME'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.DECIMAL64: 'DECIMAL64'>, <Type.NAME: 'NAME'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.INT256: 'INT256'>, <Type.NAME: 'NAME'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UTINYINT: 'UTINYINT'>, <Type.NAME: 'NAME'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.FLOAT: 'FLOAT'>, <Type.NAME: 'NAME'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.INT128: 'INT128'>, <Type.NAME: 'NAME'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.USMALLINT: 'USMALLINT'>, <Type.NAME: 'NAME'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UINT256: 'UINT256'>, <Type.NAME: 'NAME'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.MEDIUMINT: 'MEDIUMINT'>, <Type.NAME: 'NAME'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.DECIMAL128: 'DECIMAL128'>, <Type.NAME: 'NAME'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.MONEY: 'MONEY'>, <Type.NAME: 'NAME'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.DOUBLE: 'DOUBLE'>, <Type.NAME: 'NAME'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UBIGINT: 'UBIGINT'>, <Type.NAME: 'NAME'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.DECIMAL32: 'DECIMAL32'>, <Type.NAME: 'NAME'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.SMALLMONEY: 'SMALLMONEY'>, <Type.NAME: 'NAME'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UINT: 'UINT'>, <Type.NAME: 'NAME'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.BIT: 'BIT'>, <Type.NAME: 'NAME'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.DECIMAL256: 'DECIMAL256'>, <Type.NAME: 'NAME'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UMEDIUMINT: 'UMEDIUMINT'>, <Type.NAME: 'NAME'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UINT128: 'UINT128'>, <Type.CHAR: 'CHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UDOUBLE: 'UDOUBLE'>, <Type.CHAR: 'CHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.DECIMAL: 'DECIMAL'>, <Type.CHAR: 'CHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.BIGDECIMAL: 'BIGDECIMAL'>, <Type.CHAR: 'CHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.SMALLINT: 'SMALLINT'>, <Type.CHAR: 'CHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UDECIMAL: 'UDECIMAL'>, <Type.CHAR: 'CHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.INT: 'INT'>, <Type.CHAR: 'CHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.BIGINT: 'BIGINT'>, <Type.CHAR: 'CHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.TINYINT: 'TINYINT'>, <Type.CHAR: 'CHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.DECIMAL64: 'DECIMAL64'>, <Type.CHAR: 'CHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.INT256: 'INT256'>, <Type.CHAR: 'CHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UTINYINT: 'UTINYINT'>, <Type.CHAR: 'CHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.FLOAT: 'FLOAT'>, <Type.CHAR: 'CHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.INT128: 'INT128'>, <Type.CHAR: 'CHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.USMALLINT: 'USMALLINT'>, <Type.CHAR: 'CHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UINT256: 'UINT256'>, <Type.CHAR: 'CHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.MEDIUMINT: 'MEDIUMINT'>, <Type.CHAR: 'CHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.DECIMAL128: 'DECIMAL128'>, <Type.CHAR: 'CHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.MONEY: 'MONEY'>, <Type.CHAR: 'CHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.DOUBLE: 'DOUBLE'>, <Type.CHAR: 'CHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UBIGINT: 'UBIGINT'>, <Type.CHAR: 'CHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.DECIMAL32: 'DECIMAL32'>, <Type.CHAR: 'CHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.SMALLMONEY: 'SMALLMONEY'>, <Type.CHAR: 'CHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UINT: 'UINT'>, <Type.CHAR: 'CHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.BIT: 'BIT'>, <Type.CHAR: 'CHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.DECIMAL256: 'DECIMAL256'>, <Type.CHAR: 'CHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UMEDIUMINT: 'UMEDIUMINT'>, <Type.CHAR: 'CHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UINT128: 'UINT128'>, <Type.TEXT: 'TEXT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UDOUBLE: 'UDOUBLE'>, <Type.TEXT: 'TEXT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.DECIMAL: 'DECIMAL'>, <Type.TEXT: 'TEXT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.BIGDECIMAL: 'BIGDECIMAL'>, <Type.TEXT: 'TEXT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.SMALLINT: 'SMALLINT'>, <Type.TEXT: 'TEXT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UDECIMAL: 'UDECIMAL'>, <Type.TEXT: 'TEXT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.INT: 'INT'>, <Type.TEXT: 'TEXT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.BIGINT: 'BIGINT'>, <Type.TEXT: 'TEXT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.TINYINT: 'TINYINT'>, <Type.TEXT: 'TEXT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.DECIMAL64: 'DECIMAL64'>, <Type.TEXT: 'TEXT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.INT256: 'INT256'>, <Type.TEXT: 'TEXT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UTINYINT: 'UTINYINT'>, <Type.TEXT: 'TEXT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.FLOAT: 'FLOAT'>, <Type.TEXT: 'TEXT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.INT128: 'INT128'>, <Type.TEXT: 'TEXT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.USMALLINT: 'USMALLINT'>, <Type.TEXT: 'TEXT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UINT256: 'UINT256'>, <Type.TEXT: 'TEXT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.MEDIUMINT: 'MEDIUMINT'>, <Type.TEXT: 'TEXT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.DECIMAL128: 'DECIMAL128'>, <Type.TEXT: 'TEXT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.MONEY: 'MONEY'>, <Type.TEXT: 'TEXT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.DOUBLE: 'DOUBLE'>, <Type.TEXT: 'TEXT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UBIGINT: 'UBIGINT'>, <Type.TEXT: 'TEXT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.DECIMAL32: 'DECIMAL32'>, <Type.TEXT: 'TEXT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.SMALLMONEY: 'SMALLMONEY'>, <Type.TEXT: 'TEXT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UINT: 'UINT'>, <Type.TEXT: 'TEXT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.BIT: 'BIT'>, <Type.TEXT: 'TEXT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.DECIMAL256: 'DECIMAL256'>, <Type.TEXT: 'TEXT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UMEDIUMINT: 'UMEDIUMINT'>, <Type.TEXT: 'TEXT'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UINT128: 'UINT128'>, <Type.VARCHAR: 'VARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UDOUBLE: 'UDOUBLE'>, <Type.VARCHAR: 'VARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.DECIMAL: 'DECIMAL'>, <Type.VARCHAR: 'VARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.BIGDECIMAL: 'BIGDECIMAL'>, <Type.VARCHAR: 'VARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.SMALLINT: 'SMALLINT'>, <Type.VARCHAR: 'VARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UDECIMAL: 'UDECIMAL'>, <Type.VARCHAR: 'VARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.INT: 'INT'>, <Type.VARCHAR: 'VARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.BIGINT: 'BIGINT'>, <Type.VARCHAR: 'VARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.TINYINT: 'TINYINT'>, <Type.VARCHAR: 'VARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.DECIMAL64: 'DECIMAL64'>, <Type.VARCHAR: 'VARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.INT256: 'INT256'>, <Type.VARCHAR: 'VARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UTINYINT: 'UTINYINT'>, <Type.VARCHAR: 'VARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.FLOAT: 'FLOAT'>, <Type.VARCHAR: 'VARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.INT128: 'INT128'>, <Type.VARCHAR: 'VARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.USMALLINT: 'USMALLINT'>, <Type.VARCHAR: 'VARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UINT256: 'UINT256'>, <Type.VARCHAR: 'VARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.MEDIUMINT: 'MEDIUMINT'>, <Type.VARCHAR: 'VARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.DECIMAL128: 'DECIMAL128'>, <Type.VARCHAR: 'VARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.MONEY: 'MONEY'>, <Type.VARCHAR: 'VARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.DOUBLE: 'DOUBLE'>, <Type.VARCHAR: 'VARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UBIGINT: 'UBIGINT'>, <Type.VARCHAR: 'VARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.DECIMAL32: 'DECIMAL32'>, <Type.VARCHAR: 'VARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.SMALLMONEY: 'SMALLMONEY'>, <Type.VARCHAR: 'VARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UINT: 'UINT'>, <Type.VARCHAR: 'VARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.BIT: 'BIT'>, <Type.VARCHAR: 'VARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.DECIMAL256: 'DECIMAL256'>, <Type.VARCHAR: 'VARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UMEDIUMINT: 'UMEDIUMINT'>, <Type.VARCHAR: 'VARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UINT128: 'UINT128'>, <Type.NVARCHAR: 'NVARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UDOUBLE: 'UDOUBLE'>, <Type.NVARCHAR: 'NVARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.DECIMAL: 'DECIMAL'>, <Type.NVARCHAR: 'NVARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.BIGDECIMAL: 'BIGDECIMAL'>, <Type.NVARCHAR: 'NVARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.SMALLINT: 'SMALLINT'>, <Type.NVARCHAR: 'NVARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UDECIMAL: 'UDECIMAL'>, <Type.NVARCHAR: 'NVARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.INT: 'INT'>, <Type.NVARCHAR: 'NVARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.BIGINT: 'BIGINT'>, <Type.NVARCHAR: 'NVARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.TINYINT: 'TINYINT'>, <Type.NVARCHAR: 'NVARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.DECIMAL64: 'DECIMAL64'>, <Type.NVARCHAR: 'NVARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.INT256: 'INT256'>, <Type.NVARCHAR: 'NVARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UTINYINT: 'UTINYINT'>, <Type.NVARCHAR: 'NVARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.FLOAT: 'FLOAT'>, <Type.NVARCHAR: 'NVARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.INT128: 'INT128'>, <Type.NVARCHAR: 'NVARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.USMALLINT: 'USMALLINT'>, <Type.NVARCHAR: 'NVARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UINT256: 'UINT256'>, <Type.NVARCHAR: 'NVARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.MEDIUMINT: 'MEDIUMINT'>, <Type.NVARCHAR: 'NVARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.DECIMAL128: 'DECIMAL128'>, <Type.NVARCHAR: 'NVARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.MONEY: 'MONEY'>, <Type.NVARCHAR: 'NVARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.DOUBLE: 'DOUBLE'>, <Type.NVARCHAR: 'NVARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UBIGINT: 'UBIGINT'>, <Type.NVARCHAR: 'NVARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.DECIMAL32: 'DECIMAL32'>, <Type.NVARCHAR: 'NVARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.SMALLMONEY: 'SMALLMONEY'>, <Type.NVARCHAR: 'NVARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UINT: 'UINT'>, <Type.NVARCHAR: 'NVARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.BIT: 'BIT'>, <Type.NVARCHAR: 'NVARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.DECIMAL256: 'DECIMAL256'>, <Type.NVARCHAR: 'NVARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.UMEDIUMINT: 'UMEDIUMINT'>, <Type.NVARCHAR: 'NVARCHAR'>): <function TypeAnnotator.<dictcomp>.<lambda>>, (<Type.DATE: 'DATE'>, <Type.INTERVAL: 'INTERVAL'>): <function TypeAnnotator.<lambda>>, (<Type.INTERVAL: 'INTERVAL'>, <Type.DATE: 'DATE'>): <function TypeAnnotator.<lambda>>}
schema
annotators
coerces_to
binary_coercions
def annotate(self, expression: ~E) -> ~E:
225    def annotate(self, expression: E) -> E:
226        for scope in traverse_scope(expression):
227            self.annotate_scope(scope)
228
229        # This takes care of non-traversable expressions
230        expression = self._maybe_annotate(expression)
231
232        # Replace NULL type with UNKNOWN, since the former is not an actual type;
233        # it is mostly used to aid type coercion, e.g. in query set operations.
234        for expr in self._null_expressions.values():
235            expr.type = exp.DataType.Type.UNKNOWN
236
237        return expression
def annotate_scope(self, scope: sqlglot.optimizer.scope.Scope) -> None:
239    def annotate_scope(self, scope: Scope) -> None:
240        selects = {}
241        for name, source in scope.sources.items():
242            if not isinstance(source, Scope):
243                continue
244
245            expression = source.expression
246            if isinstance(expression, exp.UDTF):
247                values = []
248
249                if isinstance(expression, exp.Lateral):
250                    if isinstance(expression.this, exp.Explode):
251                        values = [expression.this.this]
252                elif isinstance(expression, exp.Unnest):
253                    values = [expression]
254                elif not isinstance(expression, exp.TableFromRows):
255                    values = expression.expressions[0].expressions
256
257                if not values:
258                    continue
259
260                selects[name] = {
261                    alias: column.type
262                    for alias, column in zip(expression.alias_column_names, values)
263                }
264            elif isinstance(expression, exp.SetOperation) and len(expression.left.selects) == len(
265                expression.right.selects
266            ):
267                selects[name] = col_types = self._setop_column_types.setdefault(id(expression), {})
268
269                if not col_types:
270                    # Process a chain / sub-tree of set operations
271                    for set_op in expression.walk(
272                        prune=lambda n: not isinstance(n, (exp.SetOperation, exp.Subquery))
273                    ):
274                        if not isinstance(set_op, exp.SetOperation):
275                            continue
276
277                        if set_op.args.get("by_name"):
278                            r_type_by_select = {
279                                s.alias_or_name: s.type for s in set_op.right.selects
280                            }
281                            setop_cols = {
282                                s.alias_or_name: self._maybe_coerce(
283                                    t.cast(exp.DataType, s.type),
284                                    r_type_by_select.get(s.alias_or_name)
285                                    or exp.DataType.Type.UNKNOWN,
286                                )
287                                for s in set_op.left.selects
288                            }
289                        else:
290                            setop_cols = {
291                                ls.alias_or_name: self._maybe_coerce(
292                                    t.cast(exp.DataType, ls.type), t.cast(exp.DataType, rs.type)
293                                )
294                                for ls, rs in zip(set_op.left.selects, set_op.right.selects)
295                            }
296
297                        # Coerce intermediate results with the previously registered types, if they exist
298                        for col_name, col_type in setop_cols.items():
299                            col_types[col_name] = self._maybe_coerce(
300                                col_type, col_types.get(col_name, exp.DataType.Type.NULL)
301                            )
302
303            else:
304                selects[name] = {s.alias_or_name: s.type for s in expression.selects}
305
306        # First annotate the current scope's column references
307        for col in scope.columns:
308            if not col.table:
309                continue
310
311            source = scope.sources.get(col.table)
312            if isinstance(source, exp.Table):
313                self._set_type(col, self.schema.get_column_type(source, col))
314            elif source:
315                if col.table in selects and col.name in selects[col.table]:
316                    self._set_type(col, selects[col.table][col.name])
317                elif isinstance(source.expression, exp.Unnest):
318                    self._set_type(col, source.expression.type)
319
320        if isinstance(self.schema, MappingSchema):
321            for table_column in scope.table_columns:
322                source = scope.sources.get(table_column.name)
323
324                if isinstance(source, exp.Table):
325                    schema = self.schema.find(
326                        source, raise_on_missing=False, ensure_data_types=True
327                    )
328                    if not isinstance(schema, dict):
329                        continue
330
331                    struct_type = exp.DataType(
332                        this=exp.DataType.Type.STRUCT,
333                        expressions=[
334                            exp.ColumnDef(this=exp.to_identifier(c), kind=kind)
335                            for c, kind in schema.items()
336                        ],
337                        nested=True,
338                    )
339                    self._set_type(table_column, struct_type)
340                elif (
341                    isinstance(source, Scope)
342                    and isinstance(source.expression, exp.Query)
343                    and (
344                        source.expression.meta.get("query_type") or exp.DataType.build("UNKNOWN")
345                    ).is_type(exp.DataType.Type.STRUCT)
346                ):
347                    self._set_type(table_column, source.expression.meta["query_type"])
348
349        # Then (possibly) annotate the remaining expressions in the scope
350        self._maybe_annotate(scope.expression)
351
352        if self.schema.dialect == "bigquery" and isinstance(scope.expression, exp.Query):
353            struct_type = exp.DataType(
354                this=exp.DataType.Type.STRUCT,
355                expressions=[
356                    exp.ColumnDef(
357                        this=exp.to_identifier(select.output_name),
358                        kind=select.type.copy() if select.type else None,
359                    )
360                    for select in scope.expression.selects
361                ],
362                nested=True,
363            )
364
365            if not any(
366                cd.kind.is_type(exp.DataType.Type.UNKNOWN)
367                for cd in struct_type.expressions
368                if cd.kind
369            ):
370                # We don't use `_set_type` on purpose here. If we annotated the query directly, then
371                # using it in other contexts (e.g., ARRAY(<query>)) could result in incorrect type
372                # annotations, i.e., it shouldn't be interpreted as a STRUCT value.
373                scope.expression.meta["query_type"] = struct_type