diff --git a/mypy/build.py b/mypy/build.py index 4e9480d8d3ef..d79f98f7489e 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -953,7 +953,7 @@ def __init__( # until all the files have been added. This means that a # new file can be processed O(n**2) times. This cache # avoids most of this redundant work. - self.ast_cache: dict[str, tuple[MypyFile, list[ErrorInfo]]] = {} + self.ast_cache: dict[str, tuple[MypyFile, list[ErrorInfo], str | None]] = {} # Number of times we used GC optimization hack for fresh SCCs. self.gc_freeze_cycles = 0 # Mapping from SCC id to corresponding SCC instance. This is populated @@ -1039,11 +1039,66 @@ def parse_parallel(self, sequential_states: list[State], parallel_states: list[S as an optimization to parallelize only those parts of the code that can be parallelized efficiently. """ + parallel_parsed_states, parallel_parsed_states_set = self.parse_files_threaded_raw( + sequential_states, parallel_states + ) + + for state in parallel_parsed_states: + # New parser returns serialized ASTs. Deserialize full trees only if not using + # parallel workers. + with state.wrap_context(): + assert state.tree is not None + raw_data = state.tree.raw_data + if raw_data is not None: + # Apply inline mypy config before deserialization, since + # some options (e.g. implicit_optional) affect deserialization + state.source_hash = raw_data.source_hash + state.apply_inline_configuration(raw_data.mypy_comments) + state.tree = load_from_raw( + state.xpath, + state.id, + raw_data, + self.errors, + state.options, + imports_only=bool(self.workers), + ) + if self.errors.is_blockers(): + self.log("Bailing due to parse errors") + self.errors.raise_error() + + for state in parallel_states: + assert state.tree is not None + if state in parallel_parsed_states_set: + if state.tree.raw_data is not None: + # source_hash was already extracted above, but raw_data + # may have been preserved for workers (imports_only=True). + pass + elif state.source_hash is None: + # At least namespace packages may not have source. + state.get_source() + state.size_hint = os.path.getsize(state.xpath) + state.early_errors = list(self.errors.error_info_map.get(state.xpath, [])) + state.semantic_analysis_pass1() + self.ast_cache[state.id] = (state.tree, state.early_errors, state.source_hash) + self.modules[state.id] = state.tree + state.check_blockers() + state.setup_errors() + + def parse_files_threaded_raw( + self, sequential_states: list[State], parallel_states: list[State] + ) -> tuple[list[State], set[State]]: + """Parse files using a thread pool. + + Also parse sequential states while waiting for the parallel results. + Trees from the new parser are left in raw (serialized) form. + + Return (list, set) of states that were actually parsed (not cached). + """ futures = [] # Use both list and a set to have more predictable order of errors, # while also not sacrificing performance. - parallel_parsed_states = [] - parallel_parsed_states_set = set() + parallel_parsed_states: list[State] = [] + parallel_parsed_states_set: set[State] = set() # Use at least --num-workers if specified by user. available_threads = max(get_available_threads(), self.options.num_workers) # Overhead from trying to parallelize (small) blocking portion of @@ -1052,20 +1107,18 @@ def parse_parallel(self, sequential_states: list[State], parallel_states: list[S with ThreadPoolExecutor(max_workers=min(available_threads, 8)) as executor: for state in parallel_states: state.needs_parse = False - # New parser reads source from file directly, we do this only for - # the side effect of parsing inline mypy configurations. - state.get_source() if state.id not in self.ast_cache: self.log(f"Parsing {state.xpath} ({state.id})") ignore_errors = state.ignore_all or state.options.ignore_errors if ignore_errors: self.errors.ignored_files.add(state.xpath) - futures.append(executor.submit(state.parse_file_inner, state.source or "")) + futures.append(executor.submit(state.parse_file_inner, "")) parallel_parsed_states.append(state) parallel_parsed_states_set.add(state) else: self.log(f"Using cached AST for {state.xpath} ({state.id})") - state.tree, state.early_errors = self.ast_cache[state.id] + state.tree, state.early_errors, source_hash = self.ast_cache[state.id] + state.source_hash = source_hash # Parse sequential before waiting on parallel. for state in sequential_states: @@ -1073,32 +1126,8 @@ def parse_parallel(self, sequential_states: list[State], parallel_states: list[S for fut in wait(futures).done: fut.result() - for state in parallel_parsed_states: - # New parser returns serialized trees that need to be de-serialized. - with state.wrap_context(): - assert state.tree is not None - if state.tree.raw_data: - state.tree = load_from_raw( - state.xpath, - state.id, - state.tree.raw_data, - self.errors, - state.options, - imports_only=bool(self.workers), - ) - if self.errors.is_blockers(): - self.log("Bailing due to parse errors") - self.errors.raise_error() - for state in parallel_states: - assert state.tree is not None - if state in parallel_parsed_states_set: - state.early_errors = list(self.errors.error_info_map.get(state.xpath, [])) - state.semantic_analysis_pass1() - self.ast_cache[state.id] = (state.tree, state.early_errors) - self.modules[state.id] = state.tree - state.check_blockers() - state.setup_errors() + return parallel_parsed_states, parallel_parsed_states_set def post_parse_all(self, states: list[State]) -> None: for state in states: @@ -3090,7 +3119,6 @@ def get_source(self) -> str: self.source_hash = compute_hash(source) self.parse_inline_configuration(source) - self.check_for_invalid_options() self.size_hint = len(source) self.time_spent_us += time_spent_us(t0) @@ -3115,7 +3143,10 @@ def parse_file(self, *, temporary: bool = False, raw_data: FileRawData | None = # The file was already parsed. return - source = self.get_source() + if raw_data is None: + source = self.get_source() + else: + source = "" manager = self.manager # Can we reuse a previously parsed AST? This avoids redundant work in daemon. if self.id not in manager.ast_cache: @@ -3125,6 +3156,12 @@ def parse_file(self, *, temporary: bool = False, raw_data: FileRawData | None = self.manager.errors.ignored_files.add(self.xpath) with self.wrap_context(): manager.errors.set_file(self.xpath, self.id, options=self.options) + if raw_data is not None: + # Apply inline mypy config before deserialization, since + # some options (e.g. implicit_optional) affect how the + # AST is built during deserialization. + self.source_hash = raw_data.source_hash + self.apply_inline_configuration(raw_data.mypy_comments) self.parse_file_inner(source, raw_data) assert self.tree is not None # New parser returns serialized trees that need to be de-serialized. @@ -3149,14 +3186,15 @@ def parse_file(self, *, temporary: bool = False, raw_data: FileRawData | None = else: # Reuse a cached AST manager.log(f"Using cached AST for {self.xpath} ({self.id})") - self.tree, self.early_errors = manager.ast_cache[self.id] + self.tree, self.early_errors, source_hash = manager.ast_cache[self.id] + self.source_hash = source_hash assert self.tree is not None if not temporary: manager.modules[self.id] = self.tree self.check_blockers() - manager.ast_cache[self.id] = (self.tree, self.early_errors) + manager.ast_cache[self.id] = (self.tree, self.early_errors, self.source_hash) self.setup_errors() def setup_errors(self) -> None: @@ -3169,12 +3207,17 @@ def setup_errors(self) -> None: def parse_inline_configuration(self, source: str) -> None: """Check for inline mypy: options directive and parse them.""" flags = get_mypy_comments(source) + self.apply_inline_configuration(flags) + + def apply_inline_configuration(self, flags: list[tuple[int, str]] | None) -> None: + """Apply inline mypy configuration comments and check for invalid options.""" if flags: changes, config_errors = parse_mypy_comments(flags, self.options) self.options = self.options.apply_changes(changes) self.manager.errors.set_file(self.xpath, self.id, self.options) for lineno, error in config_errors: self.manager.error(lineno, error) + self.check_for_invalid_options() def check_for_invalid_options(self) -> None: if self.options.mypyc and not self.options.strict_bytes: diff --git a/mypy/nativeparse.py b/mypy/nativeparse.py index fd90d85fa355..68dea3f44d00 100644 --- a/mypy/nativeparse.py +++ b/mypy/nativeparse.py @@ -210,13 +210,27 @@ def native_parse( node.path = filename return node, [], [] - b, errors, ignores, import_bytes, is_partial_package, uses_template_strings = ( - parse_to_binary_ast(filename, options, skip_function_bodies) - ) + ( + b, + errors, + ignores, + import_bytes, + is_partial_package, + uses_template_strings, + source_hash, + mypy_comments, + ) = parse_to_binary_ast(filename, options, skip_function_bodies) node = MypyFile([], []) node.path = filename node.raw_data = FileRawData( - b, import_bytes, errors, dict(ignores), is_partial_package, uses_template_strings + b, + import_bytes, + errors, + dict(ignores), + is_partial_package, + uses_template_strings, + source_hash, + mypy_comments, ) return node, errors, ignores @@ -243,7 +257,7 @@ def read_statements(state: State, data: ReadBuffer, n: int) -> list[Statement]: def parse_to_binary_ast( filename: str, options: Options, skip_function_bodies: bool = False -) -> tuple[bytes, list[ParseError], TypeIgnores, bytes, bool, bool]: +) -> tuple[bytes, list[ParseError], TypeIgnores, bytes, bool, bool, str, list[tuple[int, str]]]: # This is a horrible hack to work around a mypyc bug where imported # module may be not ready in a thread sometimes. t0 = time.time() @@ -267,6 +281,8 @@ def parse_to_binary_ast( import_bytes, ast_data["is_partial_package"], ast_data["uses_template_strings"], + ast_data["source_hash"], + ast_data["mypy_comments"], ) diff --git a/mypy/nodes.py b/mypy/nodes.py index 4f43ec7eaaa4..3dafffa5570d 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -360,6 +360,8 @@ class FileRawData: "ignored_lines", "is_partial_stub_package", "uses_template_strings", + "source_hash", + "mypy_comments", ) defs: bytes @@ -368,6 +370,8 @@ class FileRawData: ignored_lines: dict[int, list[str]] is_partial_stub_package: bool uses_template_strings: bool + source_hash: str + mypy_comments: list[tuple[int, str]] def __init__( self, @@ -377,6 +381,8 @@ def __init__( ignored_lines: dict[int, list[str]], is_partial_stub_package: bool, uses_template_strings: bool, + source_hash: str = "", + mypy_comments: list[tuple[int, str]] | None = None, ) -> None: self.defs = defs self.imports = imports @@ -384,6 +390,8 @@ def __init__( self.ignored_lines = ignored_lines self.is_partial_stub_package = is_partial_stub_package self.uses_template_strings = uses_template_strings + self.source_hash = source_hash + self.mypy_comments = mypy_comments if mypy_comments is not None else [] def write(self, data: WriteBuffer) -> None: write_bytes(data, self.defs) @@ -399,6 +407,12 @@ def write(self, data: WriteBuffer) -> None: write_str_list(data, codes) write_bool(data, self.is_partial_stub_package) write_bool(data, self.uses_template_strings) + write_str(data, self.source_hash) + write_tag(data, LIST_GEN) + write_int_bare(data, len(self.mypy_comments)) + for line, text in self.mypy_comments: + write_int(data, line) + write_str(data, text) @classmethod def read(cls, data: ReadBuffer) -> FileRawData: @@ -408,8 +422,20 @@ def read(cls, data: ReadBuffer) -> FileRawData: raw_errors = [read_parse_error(data) for _ in range(read_int_bare(data))] assert read_tag(data) == DICT_INT_GEN ignored_lines = {read_int(data): read_str_list(data) for _ in range(read_int_bare(data))} + is_partial_stub_package = read_bool(data) + uses_template_strings = read_bool(data) + source_hash = read_str(data) + assert read_tag(data) == LIST_GEN + mypy_comments = [(read_int(data), read_str(data)) for _ in range(read_int_bare(data))] return FileRawData( - defs, imports, raw_errors, ignored_lines, read_bool(data), read_bool(data) + defs, + imports, + raw_errors, + ignored_lines, + is_partial_stub_package, + uses_template_strings, + source_hash, + mypy_comments, ) diff --git a/mypy/parse.py b/mypy/parse.py index d2626737b8c4..b0901a3a2455 100644 --- a/mypy/parse.py +++ b/mypy/parse.py @@ -64,7 +64,11 @@ def load_from_raw( options: Options, imports_only: bool = False, ) -> MypyFile: - """Load AST from parsed binary data and report stored errors.""" + """Load AST from parsed binary data and report stored errors. + + If imports_only is true, only deserialize imports and return a mostly + empty AST. + """ from mypy.nativeparse import State, deserialize_imports, read_statements state = State(options) diff --git a/mypy/test/test_nativeparse.py b/mypy/test/test_nativeparse.py index f9a18ea992c2..b50da5f5d02c 100644 --- a/mypy/test/test_nativeparse.py +++ b/mypy/test/test_nativeparse.py @@ -251,7 +251,7 @@ def locs(start_line: int, start_column: int, end_line: int, end_column: int) -> ] with temp_source("print('hello')") as fnam: - b, _, _, _, _, _ = parse_to_binary_ast(fnam, Options()) + b, _, _, _, _, _, _, _ = parse_to_binary_ast(fnam, Options()) assert list(b) == ( [LITERAL_INT, 22, nodes.EXPR_STMT, nodes.CALL_EXPR] + [nodes.NAME_EXPR, LITERAL_STR] diff --git a/test-data/unit/check-optional.test b/test-data/unit/check-optional.test index fe8ce9ede9d2..5e3d6e0e58ab 100644 --- a/test-data/unit/check-optional.test +++ b/test-data/unit/check-optional.test @@ -1356,3 +1356,9 @@ def f(x: object) -> None: with C(): pass [builtins fixtures/tuple.pyi] + +[case testInferOptionalFromDefaultNoneInlineConfig] +# mypy: implicit-optional +def f(x: int = None) -> None: + reveal_type(x) # N: Revealed type is "builtins.int | None" +f(None)