python · JukkaL · Apr 22, 2026 · Apr 15, 2026 · Apr 22, 2026 · Apr 22, 2026
diff --git a/mypy/build.py b/mypy/build.py
@@ -953,7 +953,7 @@ def __init__(
         # until all the files have been added. This means that a
         # new file can be processed O(n**2) times. This cache
         # avoids most of this redundant work.
-        self.ast_cache: dict[str, tuple[MypyFile, list[ErrorInfo]]] = {}
+        self.ast_cache: dict[str, tuple[MypyFile, list[ErrorInfo], str | None]] = {}
         # Number of times we used GC optimization hack for fresh SCCs.
         self.gc_freeze_cycles = 0
         # Mapping from SCC id to corresponding SCC instance. This is populated
@@ -1039,11 +1039,66 @@ def parse_parallel(self, sequential_states: list[State], parallel_states: list[S
         as an optimization to parallelize only those parts of the code that can be
         parallelized efficiently.
         """
+        parallel_parsed_states, parallel_parsed_states_set = self.parse_files_threaded_raw(
+            sequential_states, parallel_states
+        )
+
+        for state in parallel_parsed_states:
+            # New parser returns serialized ASTs. Deserialize full trees only if not using
+            # parallel workers.
+            with state.wrap_context():
+                assert state.tree is not None
+                raw_data = state.tree.raw_data
+                if raw_data is not None:
+                    # Apply inline mypy config before deserialization, since
+                    # some options (e.g. implicit_optional) affect deserialization
+                    state.source_hash = raw_data.source_hash
+                    state.apply_inline_configuration(raw_data.mypy_comments)
+                    state.tree = load_from_raw(
+                        state.xpath,
+                        state.id,
+                        raw_data,
+                        self.errors,
+                        state.options,
+                        imports_only=bool(self.workers),
+                    )
+                if self.errors.is_blockers():
+                    self.log("Bailing due to parse errors")
+                    self.errors.raise_error()
+
+        for state in parallel_states:
+            assert state.tree is not None
+            if state in parallel_parsed_states_set:
+                if state.tree.raw_data is not None:
+                    # source_hash was already extracted above, but raw_data
+                    # may have been preserved for workers (imports_only=True).
+                    pass
+                elif state.source_hash is None:
+                    # At least namespace packages may not have source.
+                    state.get_source()
+                state.size_hint = os.path.getsize(state.xpath)
+                state.early_errors = list(self.errors.error_info_map.get(state.xpath, []))
+                state.semantic_analysis_pass1()
+                self.ast_cache[state.id] = (state.tree, state.early_errors, state.source_hash)
+            self.modules[state.id] = state.tree
+            state.check_blockers()
+            state.setup_errors()
+
+    def parse_files_threaded_raw(
+        self, sequential_states: list[State], parallel_states: list[State]
+    ) -> tuple[list[State], set[State]]:
+        """Parse files using a thread pool.
+
+        Also parse sequential states while waiting for the parallel results.
+        Trees from the new parser are left in raw (serialized) form.
+
+        Return (list, set) of states that were actually parsed (not cached).
+        """
         futures = []
         # Use both list and a set to have more predictable order of errors,
         # while also not sacrificing performance.
-        parallel_parsed_states = []
-        parallel_parsed_states_set = set()
+        parallel_parsed_states: list[State] = []
+        parallel_parsed_states_set: set[State] = set()
         # Use at least --num-workers if specified by user.
         available_threads = max(get_available_threads(), self.options.num_workers)
         # Overhead from trying to parallelize (small) blocking portion of
@@ -1052,53 +1107,27 @@ def parse_parallel(self, sequential_states: list[State], parallel_states: list[S
         with ThreadPoolExecutor(max_workers=min(available_threads, 8)) as executor:
             for state in parallel_states:
                 state.needs_parse = False
-                # New parser reads source from file directly, we do this only for
-                # the side effect of parsing inline mypy configurations.
-                state.get_source()
                 if state.id not in self.ast_cache:
                     self.log(f"Parsing {state.xpath} ({state.id})")
                     ignore_errors = state.ignore_all or state.options.ignore_errors
                     if ignore_errors:
                         self.errors.ignored_files.add(state.xpath)
-                    futures.append(executor.submit(state.parse_file_inner, state.source or ""))
+                    futures.append(executor.submit(state.parse_file_inner, ""))
                     parallel_parsed_states.append(state)
                     parallel_parsed_states_set.add(state)
                 else:
                     self.log(f"Using cached AST for {state.xpath} ({state.id})")
-                    state.tree, state.early_errors = self.ast_cache[state.id]
+                    state.tree, state.early_errors, source_hash = self.ast_cache[state.id]
+                    state.source_hash = source_hash
 
             # Parse sequential before waiting on parallel.
             for state in sequential_states:
                 state.parse_file()
 
             for fut in wait(futures).done:
                 fut.result()
-            for state in parallel_parsed_states:
-                # New parser returns serialized trees that need to be de-serialized.
-                with state.wrap_context():
-                    assert state.tree is not None
-                    if state.tree.raw_data:
-                        state.tree = load_from_raw(
-                            state.xpath,
-                            state.id,
-                            state.tree.raw_data,
-                            self.errors,
-                            state.options,
-                            imports_only=bool(self.workers),
-                        )
-                    if self.errors.is_blockers():
-                        self.log("Bailing due to parse errors")
-                        self.errors.raise_error()
 
-        for state in parallel_states:
-            assert state.tree is not None
-            if state in parallel_parsed_states_set:
-                state.early_errors = list(self.errors.error_info_map.get(state.xpath, []))
-                state.semantic_analysis_pass1()
-                self.ast_cache[state.id] = (state.tree, state.early_errors)
-            self.modules[state.id] = state.tree
-            state.check_blockers()
-            state.setup_errors()
+        return parallel_parsed_states, parallel_parsed_states_set
 
     def post_parse_all(self, states: list[State]) -> None:
         for state in states:
@@ -3090,7 +3119,6 @@ def get_source(self) -> str:
                 self.source_hash = compute_hash(source)
 
             self.parse_inline_configuration(source)
-            self.check_for_invalid_options()
 
             self.size_hint = len(source)
         self.time_spent_us += time_spent_us(t0)
@@ -3115,7 +3143,10 @@ def parse_file(self, *, temporary: bool = False, raw_data: FileRawData | None =
             # The file was already parsed.
             return
 
-        source = self.get_source()
+        if raw_data is None:
+            source = self.get_source()
+        else:
+            source = ""
         manager = self.manager
         # Can we reuse a previously parsed AST? This avoids redundant work in daemon.
         if self.id not in manager.ast_cache:
@@ -3125,6 +3156,12 @@ def parse_file(self, *, temporary: bool = False, raw_data: FileRawData | None =
                 self.manager.errors.ignored_files.add(self.xpath)
             with self.wrap_context():
                 manager.errors.set_file(self.xpath, self.id, options=self.options)
+                if raw_data is not None:
+                    # Apply inline mypy config before deserialization, since
+                    # some options (e.g. implicit_optional) affect how the
+                    # AST is built during deserialization.
+                    self.source_hash = raw_data.source_hash
+                    self.apply_inline_configuration(raw_data.mypy_comments)
                 self.parse_file_inner(source, raw_data)
                 assert self.tree is not None
                 # New parser returns serialized trees that need to be de-serialized.
@@ -3149,14 +3186,15 @@ def parse_file(self, *, temporary: bool = False, raw_data: FileRawData | None =
         else:
             # Reuse a cached AST
             manager.log(f"Using cached AST for {self.xpath} ({self.id})")
-            self.tree, self.early_errors = manager.ast_cache[self.id]
+            self.tree, self.early_errors, source_hash = manager.ast_cache[self.id]
+            self.source_hash = source_hash
 
         assert self.tree is not None
         if not temporary:
             manager.modules[self.id] = self.tree
             self.check_blockers()
 
-        manager.ast_cache[self.id] = (self.tree, self.early_errors)
+        manager.ast_cache[self.id] = (self.tree, self.early_errors, self.source_hash)
         self.setup_errors()
 
     def setup_errors(self) -> None:
@@ -3169,12 +3207,17 @@ def setup_errors(self) -> None:
     def parse_inline_configuration(self, source: str) -> None:
         """Check for inline mypy: options directive and parse them."""
         flags = get_mypy_comments(source)
+        self.apply_inline_configuration(flags)
+
+    def apply_inline_configuration(self, flags: list[tuple[int, str]] | None) -> None:
+        """Apply inline mypy configuration comments and check for invalid options."""
         if flags:
             changes, config_errors = parse_mypy_comments(flags, self.options)
             self.options = self.options.apply_changes(changes)
             self.manager.errors.set_file(self.xpath, self.id, self.options)
             for lineno, error in config_errors:
                 self.manager.error(lineno, error)
+        self.check_for_invalid_options()
 
     def check_for_invalid_options(self) -> None:
         if self.options.mypyc and not self.options.strict_bytes:

diff --git a/mypy/nativeparse.py b/mypy/nativeparse.py
@@ -210,13 +210,27 @@ def native_parse(
         node.path = filename
         return node, [], []
 
-    b, errors, ignores, import_bytes, is_partial_package, uses_template_strings = (
-        parse_to_binary_ast(filename, options, skip_function_bodies)
-    )
+    (
+        b,
+        errors,
+        ignores,
+        import_bytes,
+        is_partial_package,
+        uses_template_strings,
+        source_hash,
+        mypy_comments,
+    ) = parse_to_binary_ast(filename, options, skip_function_bodies)
     node = MypyFile([], [])
     node.path = filename
     node.raw_data = FileRawData(
-        b, import_bytes, errors, dict(ignores), is_partial_package, uses_template_strings
+        b,
+        import_bytes,
+        errors,
+        dict(ignores),
+        is_partial_package,
+        uses_template_strings,
+        source_hash,
+        mypy_comments,
     )
     return node, errors, ignores
 
@@ -243,7 +257,7 @@ def read_statements(state: State, data: ReadBuffer, n: int) -> list[Statement]:
 
 def parse_to_binary_ast(
     filename: str, options: Options, skip_function_bodies: bool = False
-) -> tuple[bytes, list[ParseError], TypeIgnores, bytes, bool, bool]:
+) -> tuple[bytes, list[ParseError], TypeIgnores, bytes, bool, bool, str, list[tuple[int, str]]]:
     # This is a horrible hack to work around a mypyc bug where imported
     # module may be not ready in a thread sometimes.
     t0 = time.time()
@@ -267,6 +281,8 @@ def parse_to_binary_ast(
         import_bytes,
         ast_data["is_partial_package"],
         ast_data["uses_template_strings"],
+        ast_data["source_hash"],
+        ast_data["mypy_comments"],
     )
 
 

diff --git a/mypy/nodes.py b/mypy/nodes.py
@@ -360,6 +360,8 @@ class FileRawData:
         "ignored_lines",
         "is_partial_stub_package",
         "uses_template_strings",
+        "source_hash",
+        "mypy_comments",
     )
 
     defs: bytes
@@ -368,6 +370,8 @@ class FileRawData:
     ignored_lines: dict[int, list[str]]
     is_partial_stub_package: bool
     uses_template_strings: bool
+    source_hash: str
+    mypy_comments: list[tuple[int, str]]
 
     def __init__(
         self,
@@ -377,13 +381,17 @@ def __init__(
         ignored_lines: dict[int, list[str]],
         is_partial_stub_package: bool,
         uses_template_strings: bool,
+        source_hash: str = "",
+        mypy_comments: list[tuple[int, str]] | None = None,
     ) -> None:
         self.defs = defs
         self.imports = imports
         self.raw_errors = raw_errors
         self.ignored_lines = ignored_lines
         self.is_partial_stub_package = is_partial_stub_package
         self.uses_template_strings = uses_template_strings
+        self.source_hash = source_hash
+        self.mypy_comments = mypy_comments if mypy_comments is not None else []
 
     def write(self, data: WriteBuffer) -> None:
         write_bytes(data, self.defs)
@@ -399,6 +407,12 @@ def write(self, data: WriteBuffer) -> None:
             write_str_list(data, codes)
         write_bool(data, self.is_partial_stub_package)
         write_bool(data, self.uses_template_strings)
+        write_str(data, self.source_hash)
+        write_tag(data, LIST_GEN)
+        write_int_bare(data, len(self.mypy_comments))
+        for line, text in self.mypy_comments:
+            write_int(data, line)
+            write_str(data, text)
 
     @classmethod
     def read(cls, data: ReadBuffer) -> FileRawData:
@@ -408,8 +422,20 @@ def read(cls, data: ReadBuffer) -> FileRawData:
         raw_errors = [read_parse_error(data) for _ in range(read_int_bare(data))]
         assert read_tag(data) == DICT_INT_GEN
         ignored_lines = {read_int(data): read_str_list(data) for _ in range(read_int_bare(data))}
+        is_partial_stub_package = read_bool(data)
+        uses_template_strings = read_bool(data)
+        source_hash = read_str(data)
+        assert read_tag(data) == LIST_GEN
+        mypy_comments = [(read_int(data), read_str(data)) for _ in range(read_int_bare(data))]
         return FileRawData(
-            defs, imports, raw_errors, ignored_lines, read_bool(data), read_bool(data)
+            defs,
+            imports,
+            raw_errors,
+            ignored_lines,
+            is_partial_stub_package,
+            uses_template_strings,
+            source_hash,
+            mypy_comments,
         )
 
 

diff --git a/mypy/parse.py b/mypy/parse.py
@@ -64,7 +64,11 @@ def load_from_raw(
     options: Options,
     imports_only: bool = False,
 ) -> MypyFile:
-    """Load AST from parsed binary data and report stored errors."""
+    """Load AST from parsed binary data and report stored errors.
+
+    If imports_only is true, only deserialize imports and return a mostly
+    empty AST.
+    """
     from mypy.nativeparse import State, deserialize_imports, read_statements
 
     state = State(options)

diff --git a/mypy/test/test_nativeparse.py b/mypy/test/test_nativeparse.py
@@ -251,7 +251,7 @@ def locs(start_line: int, start_column: int, end_line: int, end_column: int) ->
             ]
 
         with temp_source("print('hello')") as fnam:
-            b, _, _, _, _, _ = parse_to_binary_ast(fnam, Options())
+            b, _, _, _, _, _, _, _ = parse_to_binary_ast(fnam, Options())
             assert list(b) == (
                 [LITERAL_INT, 22, nodes.EXPR_STMT, nodes.CALL_EXPR]
                 + [nodes.NAME_EXPR, LITERAL_STR]

diff --git a/test-data/unit/check-optional.test b/test-data/unit/check-optional.test
@@ -1356,3 +1356,9 @@ def f(x: object) -> None:
         with C():
             pass
 [builtins fixtures/tuple.pyi]
+
+[case testInferOptionalFromDefaultNoneInlineConfig]
+# mypy: implicit-optional
+def f(x: int = None) -> None:
+    reveal_type(x)  # N: Revealed type is "builtins.int | None"
+f(None)