You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
python-jedi/SOURCES/0001-Merge-branch-ensure-un...

320 lines
13 KiB

From 43902d83018c950c9ac1a97c58abc32838228867 Mon Sep 17 00:00:00 2001
From: Peter Law <PeterJCLaw@gmail.com>
Date: Thu, 4 Jul 2024 22:39:29 +0100
Subject: [PATCH 1/2] Merge branch 'ensure-unique-subprocess-reference-ids'
(cherry picked from commit e839683e91b78355f0363bcc6f74f762995344f8)
---
jedi/api/environment.py | 15 +-
.../inference/compiled/subprocess/__init__.py | 157 ++++++++++++++++--
2 files changed, 152 insertions(+), 20 deletions(-)
diff --git a/jedi/api/environment.py b/jedi/api/environment.py
index 771a9a83..cfe8cfe3 100644
--- a/jedi/api/environment.py
+++ b/jedi/api/environment.py
@@ -8,6 +8,7 @@ import hashlib
import filecmp
from collections import namedtuple
from shutil import which
+from typing import TYPE_CHECKING
from jedi.cache import memoize_method, time_cache
from jedi.inference.compiled.subprocess import CompiledSubprocess, \
@@ -15,6 +16,10 @@ from jedi.inference.compiled.subprocess import CompiledSubprocess, \
import parso
+if TYPE_CHECKING:
+ from jedi.inference import InferenceState
+
+
_VersionInfo = namedtuple('VersionInfo', 'major minor micro') # type: ignore[name-match]
_SUPPORTED_PYTHONS = ['3.12', '3.11', '3.10', '3.9', '3.8', '3.7', '3.6']
@@ -102,7 +107,10 @@ class Environment(_BaseEnvironment):
version = '.'.join(str(i) for i in self.version_info)
return '<%s: %s in %s>' % (self.__class__.__name__, version, self.path)
- def get_inference_state_subprocess(self, inference_state):
+ def get_inference_state_subprocess(
+ self,
+ inference_state: 'InferenceState',
+ ) -> InferenceStateSubprocess:
return InferenceStateSubprocess(inference_state, self._get_subprocess())
@memoize_method
@@ -134,7 +142,10 @@ class SameEnvironment(_SameEnvironmentMixin, Environment):
class InterpreterEnvironment(_SameEnvironmentMixin, _BaseEnvironment):
- def get_inference_state_subprocess(self, inference_state):
+ def get_inference_state_subprocess(
+ self,
+ inference_state: 'InferenceState',
+ ) -> InferenceStateSameProcess:
return InferenceStateSameProcess(inference_state)
def get_sys_path(self):
diff --git a/jedi/inference/compiled/subprocess/__init__.py b/jedi/inference/compiled/subprocess/__init__.py
index cd5fe74c..3a6039f7 100644
--- a/jedi/inference/compiled/subprocess/__init__.py
+++ b/jedi/inference/compiled/subprocess/__init__.py
@@ -5,6 +5,23 @@ goals:
1. Making it safer - Segfaults and RuntimeErrors as well as stdout/stderr can
be ignored and dealt with.
2. Make it possible to handle different Python versions as well as virtualenvs.
+
+The architecture here is briefly:
+ - For each Jedi `Environment` there is a corresponding subprocess which
+ operates within the target environment. If the subprocess dies it is replaced
+ at this level.
+ - `CompiledSubprocess` manages exactly one subprocess and handles communication
+ from the parent side.
+ - `Listener` runs within the subprocess, processing each request and yielding
+ results.
+ - `InterpreterEnvironment` provides an API which matches that of `Environment`,
+ but runs functionality inline rather than within a subprocess. It is thus
+ used both directly in places where a subprocess is unnecessary and/or
+ undesirable and also within subprocesses themselves.
+ - `InferenceStateSubprocess` (or `InferenceStateSameProcess`) provide high
+ level access to functionality within the subprocess from within the parent.
+ Each `InterpreterState` has an instance of one of these, provided by its
+ environment.
"""
import collections
@@ -16,6 +33,7 @@ import traceback
import weakref
from functools import partial
from threading import Thread
+from typing import Dict, TYPE_CHECKING
from jedi._compatibility import pickle_dump, pickle_load
from jedi import debug
@@ -25,6 +43,9 @@ from jedi.inference.compiled.access import DirectObjectAccess, AccessPath, \
SignatureParam
from jedi.api.exceptions import InternalError
+if TYPE_CHECKING:
+ from jedi.inference import InferenceState
+
_MAIN_PATH = os.path.join(os.path.dirname(__file__), '__main__.py')
PICKLE_PROTOCOL = 4
@@ -83,10 +104,9 @@ def _cleanup_process(process, thread):
class _InferenceStateProcess:
- def __init__(self, inference_state):
+ def __init__(self, inference_state: 'InferenceState') -> None:
self._inference_state_weakref = weakref.ref(inference_state)
- self._inference_state_id = id(inference_state)
- self._handles = {}
+ self._handles: Dict[int, AccessHandle] = {}
def get_or_create_access_handle(self, obj):
id_ = id(obj)
@@ -116,11 +136,49 @@ class InferenceStateSameProcess(_InferenceStateProcess):
class InferenceStateSubprocess(_InferenceStateProcess):
- def __init__(self, inference_state, compiled_subprocess):
+ """
+ API to functionality which will run in a subprocess.
+
+ This mediates the interaction between an `InferenceState` and the actual
+ execution of functionality running within a `CompiledSubprocess`. Available
+ functions are defined in `.functions`, though should be accessed via
+ attributes on this class of the same name.
+
+ This class is responsible for indicating that the `InferenceState` within
+ the subprocess can be removed once the corresponding instance in the parent
+ goes away.
+ """
+
+ def __init__(
+ self,
+ inference_state: 'InferenceState',
+ compiled_subprocess: 'CompiledSubprocess',
+ ) -> None:
super().__init__(inference_state)
self._used = False
self._compiled_subprocess = compiled_subprocess
+ # Opaque id we'll pass to the subprocess to identify the context (an
+ # `InferenceState`) which should be used for the request. This allows us
+ # to make subsequent requests which operate on results from previous
+ # ones, while keeping a single subprocess which can work with several
+ # contexts in the parent process. Once it is no longer needed(i.e: when
+ # this class goes away), we also use this id to indicate that the
+ # subprocess can discard the context.
+ #
+ # Note: this id is deliberately coupled to this class (and not to
+ # `InferenceState`) as this class manages access handle mappings which
+ # must correspond to those in the subprocess. This approach also avoids
+ # race conditions from successive `InferenceState`s with the same object
+ # id (as observed while adding support for Python 3.13).
+ #
+ # This value does not need to be the `id()` of this instance, we merely
+ # need to ensure that it enables the (visible) lifetime of the context
+ # within the subprocess to match that of this class. We therefore also
+ # depend on the semantics of `CompiledSubprocess.delete_inference_state`
+ # for correctness.
+ self._inference_state_id = id(self)
+
def __getattr__(self, name):
func = _get_function(name)
@@ -128,7 +186,7 @@ class InferenceStateSubprocess(_InferenceStateProcess):
self._used = True
result = self._compiled_subprocess.run(
- self._inference_state_weakref(),
+ self._inference_state_id,
func,
args=args,
kwargs=kwargs,
@@ -164,6 +222,17 @@ class InferenceStateSubprocess(_InferenceStateProcess):
class CompiledSubprocess:
+ """
+ A subprocess which runs inference within a target environment.
+
+ This class manages the interface to a single instance of such a process as
+ well as the lifecycle of the process itself. See `.__main__` and `Listener`
+ for the implementation of the subprocess and details of the protocol.
+
+ A single live instance of this is maintained by `jedi.api.environment.Environment`,
+ so that typically a single subprocess is used at a time.
+ """
+
is_crashed = False
def __init__(self, executable, env_vars=None):
@@ -213,18 +282,18 @@ class CompiledSubprocess:
t)
return process
- def run(self, inference_state, function, args=(), kwargs={}):
+ def run(self, inference_state_id, function, args=(), kwargs={}):
# Delete old inference_states.
while True:
try:
- inference_state_id = self._inference_state_deletion_queue.pop()
+ delete_id = self._inference_state_deletion_queue.pop()
except IndexError:
break
else:
- self._send(inference_state_id, None)
+ self._send(delete_id, None)
assert callable(function)
- return self._send(id(inference_state), function, args, kwargs)
+ return self._send(inference_state_id, function, args, kwargs)
def get_sys_path(self):
return self._send(None, functions.get_sys_path, (), {})
@@ -272,21 +341,65 @@ class CompiledSubprocess:
def delete_inference_state(self, inference_state_id):
"""
- Currently we are not deleting inference_state instantly. They only get
- deleted once the subprocess is used again. It would probably a better
- solution to move all of this into a thread. However, the memory usage
- of a single inference_state shouldn't be that high.
+ Indicate that an inference state (in the subprocess) is no longer
+ needed.
+
+ The state corresponding to the given id will become inaccessible and the
+ id may safely be re-used to refer to a different context.
+
+ Note: it is not guaranteed that the corresponding state will actually be
+ deleted immediately.
"""
- # With an argument - the inference_state gets deleted.
+ # Warning: if changing the semantics of context deletion see the comment
+ # in `InferenceStateSubprocess.__init__` regarding potential race
+ # conditions.
+
+ # Currently we are not deleting the related state instantly. They only
+ # get deleted once the subprocess is used again. It would probably a
+ # better solution to move all of this into a thread. However, the memory
+ # usage of a single inference_state shouldn't be that high.
self._inference_state_deletion_queue.append(inference_state_id)
class Listener:
+ """
+ Main loop for the subprocess which actually does the inference.
+
+ This class runs within the target environment. It listens to instructions
+ from the parent process, runs inference and returns the results.
+
+ The subprocess has a long lifetime and is expected to process several
+ requests, including for different `InferenceState` instances in the parent.
+ See `CompiledSubprocess` for the parent half of the system.
+
+ Communication is via pickled data sent serially over stdin and stdout.
+ Stderr is read only if the child process crashes.
+
+ The request protocol is a 4-tuple of:
+ * inference_state_id | None: an opaque identifier of the parent's
+ `InferenceState`. An `InferenceState` operating over an
+ `InterpreterEnvironment` is created within this process for each of
+ these, ensuring that each parent context has a corresponding context
+ here. This allows context to be persisted between requests. Unless
+ `None`, the local `InferenceState` will be passed to the given function
+ as the first positional argument.
+ * function | None: the function to run. This is expected to be a member of
+ `.functions`. `None` indicates that the corresponding inference state is
+ no longer needed and should be dropped.
+ * args: positional arguments to the `function`. If any of these are
+ `AccessHandle` instances they will be adapted to the local
+ `InferenceState` before being passed.
+ * kwargs: keyword arguments to the `function`. If any of these are
+ `AccessHandle` instances they will be adapted to the local
+ `InferenceState` before being passed.
+
+ The result protocol is a 3-tuple of either:
+ * (False, None, function result): if the function returns without error, or
+ * (True, traceback, exception): if the function raises an exception
+ """
+
def __init__(self):
self._inference_states = {}
- # TODO refactor so we don't need to process anymore just handle
- # controlling.
- self._process = _InferenceStateProcess(Listener)
def _get_inference_state(self, function, inference_state_id):
from jedi.inference import InferenceState
@@ -308,6 +421,9 @@ class Listener:
if inference_state_id is None:
return function(*args, **kwargs)
elif function is None:
+ # Warning: if changing the semantics of context deletion see the comment
+ # in `InferenceStateSubprocess.__init__` regarding potential race
+ # conditions.
del self._inference_states[inference_state_id]
else:
inference_state = self._get_inference_state(function, inference_state_id)
@@ -348,7 +464,12 @@ class Listener:
class AccessHandle:
- def __init__(self, subprocess, access, id_):
+ def __init__(
+ self,
+ subprocess: _InferenceStateProcess,
+ access: DirectObjectAccess,
+ id_: int,
+ ) -> None:
self.access = access
self._subprocess = subprocess
self.id = id_
--
2.45.2