From 43902d83018c950c9ac1a97c58abc32838228867 Mon Sep 17 00:00:00 2001 From: Peter Law Date: Thu, 4 Jul 2024 22:39:29 +0100 Subject: [PATCH 1/2] Merge branch 'ensure-unique-subprocess-reference-ids' (cherry picked from commit e839683e91b78355f0363bcc6f74f762995344f8) --- jedi/api/environment.py | 15 +- .../inference/compiled/subprocess/__init__.py | 157 ++++++++++++++++-- 2 files changed, 152 insertions(+), 20 deletions(-) diff --git a/jedi/api/environment.py b/jedi/api/environment.py index 771a9a83..cfe8cfe3 100644 --- a/jedi/api/environment.py +++ b/jedi/api/environment.py @@ -8,6 +8,7 @@ import hashlib import filecmp from collections import namedtuple from shutil import which +from typing import TYPE_CHECKING from jedi.cache import memoize_method, time_cache from jedi.inference.compiled.subprocess import CompiledSubprocess, \ @@ -15,6 +16,10 @@ from jedi.inference.compiled.subprocess import CompiledSubprocess, \ import parso +if TYPE_CHECKING: + from jedi.inference import InferenceState + + _VersionInfo = namedtuple('VersionInfo', 'major minor micro') # type: ignore[name-match] _SUPPORTED_PYTHONS = ['3.12', '3.11', '3.10', '3.9', '3.8', '3.7', '3.6'] @@ -102,7 +107,10 @@ class Environment(_BaseEnvironment): version = '.'.join(str(i) for i in self.version_info) return '<%s: %s in %s>' % (self.__class__.__name__, version, self.path) - def get_inference_state_subprocess(self, inference_state): + def get_inference_state_subprocess( + self, + inference_state: 'InferenceState', + ) -> InferenceStateSubprocess: return InferenceStateSubprocess(inference_state, self._get_subprocess()) @memoize_method @@ -134,7 +142,10 @@ class SameEnvironment(_SameEnvironmentMixin, Environment): class InterpreterEnvironment(_SameEnvironmentMixin, _BaseEnvironment): - def get_inference_state_subprocess(self, inference_state): + def get_inference_state_subprocess( + self, + inference_state: 'InferenceState', + ) -> InferenceStateSameProcess: return InferenceStateSameProcess(inference_state) def get_sys_path(self): diff --git a/jedi/inference/compiled/subprocess/__init__.py b/jedi/inference/compiled/subprocess/__init__.py index cd5fe74c..3a6039f7 100644 --- a/jedi/inference/compiled/subprocess/__init__.py +++ b/jedi/inference/compiled/subprocess/__init__.py @@ -5,6 +5,23 @@ goals: 1. Making it safer - Segfaults and RuntimeErrors as well as stdout/stderr can be ignored and dealt with. 2. Make it possible to handle different Python versions as well as virtualenvs. + +The architecture here is briefly: + - For each Jedi `Environment` there is a corresponding subprocess which + operates within the target environment. If the subprocess dies it is replaced + at this level. + - `CompiledSubprocess` manages exactly one subprocess and handles communication + from the parent side. + - `Listener` runs within the subprocess, processing each request and yielding + results. + - `InterpreterEnvironment` provides an API which matches that of `Environment`, + but runs functionality inline rather than within a subprocess. It is thus + used both directly in places where a subprocess is unnecessary and/or + undesirable and also within subprocesses themselves. + - `InferenceStateSubprocess` (or `InferenceStateSameProcess`) provide high + level access to functionality within the subprocess from within the parent. + Each `InterpreterState` has an instance of one of these, provided by its + environment. """ import collections @@ -16,6 +33,7 @@ import traceback import weakref from functools import partial from threading import Thread +from typing import Dict, TYPE_CHECKING from jedi._compatibility import pickle_dump, pickle_load from jedi import debug @@ -25,6 +43,9 @@ from jedi.inference.compiled.access import DirectObjectAccess, AccessPath, \ SignatureParam from jedi.api.exceptions import InternalError +if TYPE_CHECKING: + from jedi.inference import InferenceState + _MAIN_PATH = os.path.join(os.path.dirname(__file__), '__main__.py') PICKLE_PROTOCOL = 4 @@ -83,10 +104,9 @@ def _cleanup_process(process, thread): class _InferenceStateProcess: - def __init__(self, inference_state): + def __init__(self, inference_state: 'InferenceState') -> None: self._inference_state_weakref = weakref.ref(inference_state) - self._inference_state_id = id(inference_state) - self._handles = {} + self._handles: Dict[int, AccessHandle] = {} def get_or_create_access_handle(self, obj): id_ = id(obj) @@ -116,11 +136,49 @@ class InferenceStateSameProcess(_InferenceStateProcess): class InferenceStateSubprocess(_InferenceStateProcess): - def __init__(self, inference_state, compiled_subprocess): + """ + API to functionality which will run in a subprocess. + + This mediates the interaction between an `InferenceState` and the actual + execution of functionality running within a `CompiledSubprocess`. Available + functions are defined in `.functions`, though should be accessed via + attributes on this class of the same name. + + This class is responsible for indicating that the `InferenceState` within + the subprocess can be removed once the corresponding instance in the parent + goes away. + """ + + def __init__( + self, + inference_state: 'InferenceState', + compiled_subprocess: 'CompiledSubprocess', + ) -> None: super().__init__(inference_state) self._used = False self._compiled_subprocess = compiled_subprocess + # Opaque id we'll pass to the subprocess to identify the context (an + # `InferenceState`) which should be used for the request. This allows us + # to make subsequent requests which operate on results from previous + # ones, while keeping a single subprocess which can work with several + # contexts in the parent process. Once it is no longer needed(i.e: when + # this class goes away), we also use this id to indicate that the + # subprocess can discard the context. + # + # Note: this id is deliberately coupled to this class (and not to + # `InferenceState`) as this class manages access handle mappings which + # must correspond to those in the subprocess. This approach also avoids + # race conditions from successive `InferenceState`s with the same object + # id (as observed while adding support for Python 3.13). + # + # This value does not need to be the `id()` of this instance, we merely + # need to ensure that it enables the (visible) lifetime of the context + # within the subprocess to match that of this class. We therefore also + # depend on the semantics of `CompiledSubprocess.delete_inference_state` + # for correctness. + self._inference_state_id = id(self) + def __getattr__(self, name): func = _get_function(name) @@ -128,7 +186,7 @@ class InferenceStateSubprocess(_InferenceStateProcess): self._used = True result = self._compiled_subprocess.run( - self._inference_state_weakref(), + self._inference_state_id, func, args=args, kwargs=kwargs, @@ -164,6 +222,17 @@ class InferenceStateSubprocess(_InferenceStateProcess): class CompiledSubprocess: + """ + A subprocess which runs inference within a target environment. + + This class manages the interface to a single instance of such a process as + well as the lifecycle of the process itself. See `.__main__` and `Listener` + for the implementation of the subprocess and details of the protocol. + + A single live instance of this is maintained by `jedi.api.environment.Environment`, + so that typically a single subprocess is used at a time. + """ + is_crashed = False def __init__(self, executable, env_vars=None): @@ -213,18 +282,18 @@ class CompiledSubprocess: t) return process - def run(self, inference_state, function, args=(), kwargs={}): + def run(self, inference_state_id, function, args=(), kwargs={}): # Delete old inference_states. while True: try: - inference_state_id = self._inference_state_deletion_queue.pop() + delete_id = self._inference_state_deletion_queue.pop() except IndexError: break else: - self._send(inference_state_id, None) + self._send(delete_id, None) assert callable(function) - return self._send(id(inference_state), function, args, kwargs) + return self._send(inference_state_id, function, args, kwargs) def get_sys_path(self): return self._send(None, functions.get_sys_path, (), {}) @@ -272,21 +341,65 @@ class CompiledSubprocess: def delete_inference_state(self, inference_state_id): """ - Currently we are not deleting inference_state instantly. They only get - deleted once the subprocess is used again. It would probably a better - solution to move all of this into a thread. However, the memory usage - of a single inference_state shouldn't be that high. + Indicate that an inference state (in the subprocess) is no longer + needed. + + The state corresponding to the given id will become inaccessible and the + id may safely be re-used to refer to a different context. + + Note: it is not guaranteed that the corresponding state will actually be + deleted immediately. """ - # With an argument - the inference_state gets deleted. + # Warning: if changing the semantics of context deletion see the comment + # in `InferenceStateSubprocess.__init__` regarding potential race + # conditions. + + # Currently we are not deleting the related state instantly. They only + # get deleted once the subprocess is used again. It would probably a + # better solution to move all of this into a thread. However, the memory + # usage of a single inference_state shouldn't be that high. self._inference_state_deletion_queue.append(inference_state_id) class Listener: + """ + Main loop for the subprocess which actually does the inference. + + This class runs within the target environment. It listens to instructions + from the parent process, runs inference and returns the results. + + The subprocess has a long lifetime and is expected to process several + requests, including for different `InferenceState` instances in the parent. + See `CompiledSubprocess` for the parent half of the system. + + Communication is via pickled data sent serially over stdin and stdout. + Stderr is read only if the child process crashes. + + The request protocol is a 4-tuple of: + * inference_state_id | None: an opaque identifier of the parent's + `InferenceState`. An `InferenceState` operating over an + `InterpreterEnvironment` is created within this process for each of + these, ensuring that each parent context has a corresponding context + here. This allows context to be persisted between requests. Unless + `None`, the local `InferenceState` will be passed to the given function + as the first positional argument. + * function | None: the function to run. This is expected to be a member of + `.functions`. `None` indicates that the corresponding inference state is + no longer needed and should be dropped. + * args: positional arguments to the `function`. If any of these are + `AccessHandle` instances they will be adapted to the local + `InferenceState` before being passed. + * kwargs: keyword arguments to the `function`. If any of these are + `AccessHandle` instances they will be adapted to the local + `InferenceState` before being passed. + + The result protocol is a 3-tuple of either: + * (False, None, function result): if the function returns without error, or + * (True, traceback, exception): if the function raises an exception + """ + def __init__(self): self._inference_states = {} - # TODO refactor so we don't need to process anymore just handle - # controlling. - self._process = _InferenceStateProcess(Listener) def _get_inference_state(self, function, inference_state_id): from jedi.inference import InferenceState @@ -308,6 +421,9 @@ class Listener: if inference_state_id is None: return function(*args, **kwargs) elif function is None: + # Warning: if changing the semantics of context deletion see the comment + # in `InferenceStateSubprocess.__init__` regarding potential race + # conditions. del self._inference_states[inference_state_id] else: inference_state = self._get_inference_state(function, inference_state_id) @@ -348,7 +464,12 @@ class Listener: class AccessHandle: - def __init__(self, subprocess, access, id_): + def __init__( + self, + subprocess: _InferenceStateProcess, + access: DirectObjectAccess, + id_: int, + ) -> None: self.access = access self._subprocess = subprocess self.id = id_ -- 2.45.2