From a5a7f5e16d8c3938d266703ea8fba8ffee3e3ae5 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 16 Oct 2024 16:50:46 -0600 Subject: [PATCH] gh-124694: Add concurrent.futures.InterpreterPoolExecutor (gh-124548) This is an implementation of InterpreterPoolExecutor that builds on ThreadPoolExecutor. (Note that this is not tied to PEP 734, which is strictly about adding a new stdlib module.) Possible future improvements: * support passing a script for the initializer or to submit() * support passing (most) arbitrary functions without pickling * support passing closures * optionally exec functions against __main__ instead of the their original module --- Doc/library/asyncio-dev.rst | 6 +- Doc/library/asyncio-eventloop.rst | 9 +- Doc/library/asyncio-llapi-index.rst | 2 +- Doc/library/concurrent.futures.rst | 135 ++++++- Doc/whatsnew/3.14.rst | 8 + Lib/concurrent/futures/__init__.py | 12 +- Lib/concurrent/futures/interpreter.py | 241 ++++++++++++ Lib/concurrent/futures/thread.py | 90 +++-- Lib/test/test_concurrent_futures/executor.py | 4 +- .../test_interpreter_pool.py | 346 ++++++++++++++++++ Lib/test/test_concurrent_futures/util.py | 5 + ...-09-27-15-42-55.gh-issue-124694.uUy32y.rst | 6 + 12 files changed, 826 insertions(+), 38 deletions(-) create mode 100644 Lib/concurrent/futures/interpreter.py create mode 100644 Lib/test/test_concurrent_futures/test_interpreter_pool.py create mode 100644 Misc/NEWS.d/next/Library/2024-09-27-15-42-55.gh-issue-124694.uUy32y.rst diff --git a/Doc/library/asyncio-dev.rst b/Doc/library/asyncio-dev.rst index a9c3a0183bb72d..44b507a9811116 100644 --- a/Doc/library/asyncio-dev.rst +++ b/Doc/library/asyncio-dev.rst @@ -103,7 +103,8 @@ To handle signals the event loop must be run in the main thread. The :meth:`loop.run_in_executor` method can be used with a -:class:`concurrent.futures.ThreadPoolExecutor` to execute +:class:`concurrent.futures.ThreadPoolExecutor` or +:class:`~concurrent.futures.InterpreterPoolExecutor` to execute blocking code in a different OS thread without blocking the OS thread that the event loop runs in. @@ -128,7 +129,8 @@ if a function performs a CPU-intensive calculation for 1 second, all concurrent asyncio Tasks and IO operations would be delayed by 1 second. -An executor can be used to run a task in a different thread or even in +An executor can be used to run a task in a different thread, +including in a different interpreter, or even in a different process to avoid blocking the OS thread with the event loop. See the :meth:`loop.run_in_executor` method for more details. diff --git a/Doc/library/asyncio-eventloop.rst b/Doc/library/asyncio-eventloop.rst index 943683f6b8a7f6..14fd153f640f05 100644 --- a/Doc/library/asyncio-eventloop.rst +++ b/Doc/library/asyncio-eventloop.rst @@ -1305,6 +1305,12 @@ Executing code in thread or process pools pool, cpu_bound) print('custom process pool', result) + # 4. Run in a custom interpreter pool: + with concurrent.futures.InterpreterPoolExecutor() as pool: + result = await loop.run_in_executor( + pool, cpu_bound) + print('custom interpreter pool', result) + if __name__ == '__main__': asyncio.run(main()) @@ -1329,7 +1335,8 @@ Executing code in thread or process pools Set *executor* as the default executor used by :meth:`run_in_executor`. *executor* must be an instance of - :class:`~concurrent.futures.ThreadPoolExecutor`. + :class:`~concurrent.futures.ThreadPoolExecutor`, which includes + :class:`~concurrent.futures.InterpreterPoolExecutor`. .. versionchanged:: 3.11 *executor* must be an instance of diff --git a/Doc/library/asyncio-llapi-index.rst b/Doc/library/asyncio-llapi-index.rst index 3e21054aa4fe9e..f5af888f31f186 100644 --- a/Doc/library/asyncio-llapi-index.rst +++ b/Doc/library/asyncio-llapi-index.rst @@ -96,7 +96,7 @@ See also the main documentation section about the - Invoke a callback *at* the given time. -.. rubric:: Thread/Process Pool +.. rubric:: Thread/Interpreter/Process Pool .. list-table:: :widths: 50 50 :class: full-width-table diff --git a/Doc/library/concurrent.futures.rst b/Doc/library/concurrent.futures.rst index ce72127127c7a6..45a73705f10e92 100644 --- a/Doc/library/concurrent.futures.rst +++ b/Doc/library/concurrent.futures.rst @@ -15,9 +15,10 @@ The :mod:`concurrent.futures` module provides a high-level interface for asynchronously executing callables. The asynchronous execution can be performed with threads, using -:class:`ThreadPoolExecutor`, or separate processes, using -:class:`ProcessPoolExecutor`. Both implement the same interface, which is -defined by the abstract :class:`Executor` class. +:class:`ThreadPoolExecutor` or :class:`InterpreterPoolExecutor`, +or separate processes, using :class:`ProcessPoolExecutor`. +Each implements the same interface, which is defined +by the abstract :class:`Executor` class. .. include:: ../includes/wasm-notavail.rst @@ -63,7 +64,8 @@ Executor Objects setting *chunksize* to a positive integer. For very long iterables, using a large value for *chunksize* can significantly improve performance compared to the default size of 1. With - :class:`ThreadPoolExecutor`, *chunksize* has no effect. + :class:`ThreadPoolExecutor` and :class:`InterpreterPoolExecutor`, + *chunksize* has no effect. .. versionchanged:: 3.5 Added the *chunksize* argument. @@ -227,6 +229,111 @@ ThreadPoolExecutor Example print('%r page is %d bytes' % (url, len(data))) +InterpreterPoolExecutor +----------------------- + +The :class:`InterpreterPoolExecutor` class uses a pool of interpreters +to execute calls asynchronously. It is a :class:`ThreadPoolExecutor` +subclass, which means each worker is running in its own thread. +The difference here is that each worker has its own interpreter, +and runs each task using that interpreter. + +The biggest benefit to using interpreters instead of only threads +is true multi-core parallelism. Each interpreter has its own +:term:`Global Interpreter Lock `, so code +running in one interpreter can run on one CPU core, while code in +another interpreter runs unblocked on a different core. + +The tradeoff is that writing concurrent code for use with multiple +interpreters can take extra effort. However, this is because it +forces you to be deliberate about how and when interpreters interact, +and to be explicit about what data is shared between interpreters. +This results in several benefits that help balance the extra effort, +including true multi-core parallelism, For example, code written +this way can make it easier to reason about concurrency. Another +major benefit is that you don't have to deal with several of the +big pain points of using threads, like nrace conditions. + +Each worker's interpreter is isolated from all the other interpreters. +"Isolated" means each interpreter has its own runtime state and +operates completely independently. For example, if you redirect +:data:`sys.stdout` in one interpreter, it will not be automatically +redirected any other interpreter. If you import a module in one +interpreter, it is not automatically imported in any other. You +would need to import the module separately in interpreter where +you need it. In fact, each module imported in an interpreter is +a completely separate object from the same module in a different +interpreter, including :mod:`sys`, :mod:`builtins`, +and even ``__main__``. + +Isolation means a mutable object, or other data, cannot be used +by more than one interpreter at the same time. That effectively means +interpreters cannot actually share such objects or data. Instead, +each interpreter must have its own copy, and you will have to +synchronize any changes between the copies manually. Immutable +objects and data, like the builtin singletons, strings, and tuples +of immutable objects, don't have these limitations. + +Communicating and synchronizing between interpreters is most effectively +done using dedicated tools, like those proposed in :pep:`734`. One less +efficient alternative is to serialize with :mod:`pickle` and then send +the bytes over a shared :mod:`socket ` or +:func:`pipe `. + +.. class:: InterpreterPoolExecutor(max_workers=None, thread_name_prefix='', initializer=None, initargs=(), shared=None) + + A :class:`ThreadPoolExecutor` subclass that executes calls asynchronously + using a pool of at most *max_workers* threads. Each thread runs + tasks in its own interpreter. The worker interpreters are isolated + from each other, which means each has its own runtime state and that + they can't share any mutable objects or other data. Each interpreter + has its own :term:`Global Interpreter Lock `, + which means code run with this executor has true multi-core parallelism. + + The optional *initializer* and *initargs* arguments have the same + meaning as for :class:`!ThreadPoolExecutor`: the initializer is run + when each worker is created, though in this case it is run.in + the worker's interpreter. The executor serializes the *initializer* + and *initargs* using :mod:`pickle` when sending them to the worker's + interpreter. + + .. note:: + Functions defined in the ``__main__`` module cannot be pickled + and thus cannot be used. + + .. note:: + The executor may replace uncaught exceptions from *initializer* + with :class:`~concurrent.futures.interpreter.ExecutionFailed`. + + The optional *shared* argument is a :class:`dict` of objects that all + interpreters in the pool share. The *shared* items are added to each + interpreter's ``__main__`` module. Not all objects are shareable. + Shareable objects include the builtin singletons, :class:`str` + and :class:`bytes`, and :class:`memoryview`. See :pep:`734` + for more info. + + Other caveats from parent :class:`ThreadPoolExecutor` apply here. + +:meth:`~Executor.submit` and :meth:`~Executor.map` work like normal, +except the worker serializes the callable and arguments using +:mod:`pickle` when sending them to its interpreter. The worker +likewise serializes the return value when sending it back. + +.. note:: + Functions defined in the ``__main__`` module cannot be pickled + and thus cannot be used. + +When a worker's current task raises an uncaught exception, the worker +always tries to preserve the exception as-is. If that is successful +then it also sets the ``__cause__`` to a corresponding +:class:`~concurrent.futures.interpreter.ExecutionFailed` +instance, which contains a summary of the original exception. +In the uncommon case that the worker is not able to preserve the +original as-is then it directly preserves the corresponding +:class:`~concurrent.futures.interpreter.ExecutionFailed` +instance instead. + + ProcessPoolExecutor ------------------- @@ -574,6 +681,26 @@ Exception classes .. versionadded:: 3.7 +.. currentmodule:: concurrent.futures.interpreter + +.. exception:: BrokenInterpreterPool + + Derived from :exc:`~concurrent.futures.thread.BrokenThreadPool`, + this exception class is raised when one of the workers + of a :class:`~concurrent.futures.InterpreterPoolExecutor` + has failed initializing. + + .. versionadded:: next + +.. exception:: ExecutionFailed + + Raised from :class:`~concurrent.futures.InterpreterPoolExecutor` when + the given initializer fails or from + :meth:`~concurrent.futures.Executor.submit` when there's an uncaught + exception from the submitted task. + + .. versionadded:: next + .. currentmodule:: concurrent.futures.process .. exception:: BrokenProcessPool diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index b106578fe9e8b0..9543af3c7ca225 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -225,6 +225,14 @@ ast * The ``repr()`` output for AST nodes now includes more information. (Contributed by Tomas R in :gh:`116022`.) +concurrent.futures +------------------ + +* Add :class:`~concurrent.futures.InterpreterPoolExecutor`, + which exposes "subinterpreters (multiple Python interpreters in the + same process) to Python code. This is separate from the proposed API + in :pep:`734`. + (Contributed by Eric Snow in :gh:`124548`.) ctypes ------ diff --git a/Lib/concurrent/futures/__init__.py b/Lib/concurrent/futures/__init__.py index 72de617a5b6f61..7ada7431c1ab8c 100644 --- a/Lib/concurrent/futures/__init__.py +++ b/Lib/concurrent/futures/__init__.py @@ -29,6 +29,7 @@ 'Executor', 'wait', 'as_completed', + 'InterpreterPoolExecutor', 'ProcessPoolExecutor', 'ThreadPoolExecutor', ) @@ -39,7 +40,7 @@ def __dir__(): def __getattr__(name): - global ProcessPoolExecutor, ThreadPoolExecutor + global ProcessPoolExecutor, ThreadPoolExecutor, InterpreterPoolExecutor if name == 'ProcessPoolExecutor': from .process import ProcessPoolExecutor as pe @@ -51,4 +52,13 @@ def __getattr__(name): ThreadPoolExecutor = te return te + if name == 'InterpreterPoolExecutor': + try: + from .interpreter import InterpreterPoolExecutor as ie + except ModuleNotFoundError: + ie = InterpreterPoolExecutor = None + else: + InterpreterPoolExecutor = ie + return ie + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") diff --git a/Lib/concurrent/futures/interpreter.py b/Lib/concurrent/futures/interpreter.py new file mode 100644 index 00000000000000..fd7941adb766bb --- /dev/null +++ b/Lib/concurrent/futures/interpreter.py @@ -0,0 +1,241 @@ +"""Implements InterpreterPoolExecutor.""" + +import contextlib +import pickle +import textwrap +from . import thread as _thread +import _interpreters +import _interpqueues + + +class ExecutionFailed(_interpreters.InterpreterError): + """An unhandled exception happened during execution.""" + + def __init__(self, excinfo): + msg = excinfo.formatted + if not msg: + if excinfo.type and excinfo.msg: + msg = f'{excinfo.type.__name__}: {excinfo.msg}' + else: + msg = excinfo.type.__name__ or excinfo.msg + super().__init__(msg) + self.excinfo = excinfo + + def __str__(self): + try: + formatted = self.excinfo.errdisplay + except Exception: + return super().__str__() + else: + return textwrap.dedent(f""" +{super().__str__()} + +Uncaught in the interpreter: + +{formatted} + """.strip()) + + +UNBOUND = 2 # error; this should not happen. + + +class WorkerContext(_thread.WorkerContext): + + @classmethod + def prepare(cls, initializer, initargs, shared): + def resolve_task(fn, args, kwargs): + if isinstance(fn, str): + # XXX Circle back to this later. + raise TypeError('scripts not supported') + if args or kwargs: + raise ValueError(f'a script does not take args or kwargs, got {args!r} and {kwargs!r}') + data = textwrap.dedent(fn) + kind = 'script' + # Make sure the script compiles. + # Ideally we wouldn't throw away the resulting code + # object. However, there isn't much to be done until + # code objects are shareable and/or we do a better job + # of supporting code objects in _interpreters.exec(). + compile(data, '', 'exec') + else: + # Functions defined in the __main__ module can't be pickled, + # so they can't be used here. In the future, we could possibly + # borrow from multiprocessing to work around this. + data = pickle.dumps((fn, args, kwargs)) + kind = 'function' + return (data, kind) + + if initializer is not None: + try: + initdata = resolve_task(initializer, initargs, {}) + except ValueError: + if isinstance(initializer, str) and initargs: + raise ValueError(f'an initializer script does not take args, got {initargs!r}') + raise # re-raise + else: + initdata = None + def create_context(): + return cls(initdata, shared) + return create_context, resolve_task + + @classmethod + @contextlib.contextmanager + def _capture_exc(cls, resultsid): + try: + yield + except BaseException as exc: + # Send the captured exception out on the results queue, + # but still leave it unhandled for the interpreter to handle. + err = pickle.dumps(exc) + _interpqueues.put(resultsid, (None, err), 1, UNBOUND) + raise # re-raise + + @classmethod + def _send_script_result(cls, resultsid): + _interpqueues.put(resultsid, (None, None), 0, UNBOUND) + + @classmethod + def _call(cls, func, args, kwargs, resultsid): + with cls._capture_exc(resultsid): + res = func(*args or (), **kwargs or {}) + # Send the result back. + try: + _interpqueues.put(resultsid, (res, None), 0, UNBOUND) + except _interpreters.NotShareableError: + res = pickle.dumps(res) + _interpqueues.put(resultsid, (res, None), 1, UNBOUND) + + @classmethod + def _call_pickled(cls, pickled, resultsid): + fn, args, kwargs = pickle.loads(pickled) + cls._call(fn, args, kwargs, resultsid) + + def __init__(self, initdata, shared=None): + self.initdata = initdata + self.shared = dict(shared) if shared else None + self.interpid = None + self.resultsid = None + + def __del__(self): + if self.interpid is not None: + self.finalize() + + def _exec(self, script): + assert self.interpid is not None + excinfo = _interpreters.exec(self.interpid, script, restrict=True) + if excinfo is not None: + raise ExecutionFailed(excinfo) + + def initialize(self): + assert self.interpid is None, self.interpid + self.interpid = _interpreters.create(reqrefs=True) + try: + _interpreters.incref(self.interpid) + + maxsize = 0 + fmt = 0 + self.resultsid = _interpqueues.create(maxsize, fmt, UNBOUND) + + self._exec(f'from {__name__} import WorkerContext') + + if self.shared: + _interpreters.set___main___attrs( + self.interpid, self.shared, restrict=True) + + if self.initdata: + self.run(self.initdata) + except BaseException: + self.finalize() + raise # re-raise + + def finalize(self): + interpid = self.interpid + resultsid = self.resultsid + self.resultsid = None + self.interpid = None + if resultsid is not None: + try: + _interpqueues.destroy(resultsid) + except _interpqueues.QueueNotFoundError: + pass + if interpid is not None: + try: + _interpreters.decref(interpid) + except _interpreters.InterpreterNotFoundError: + pass + + def run(self, task): + data, kind = task + if kind == 'script': + raise NotImplementedError('script kind disabled') + script = f""" +with WorkerContext._capture_exc({self.resultsid}): +{textwrap.indent(data, ' ')} +WorkerContext._send_script_result({self.resultsid})""" + elif kind == 'function': + script = f'WorkerContext._call_pickled({data!r}, {self.resultsid})' + else: + raise NotImplementedError(kind) + + try: + self._exec(script) + except ExecutionFailed as exc: + exc_wrapper = exc + else: + exc_wrapper = None + + # Return the result, or raise the exception. + while True: + try: + obj = _interpqueues.get(self.resultsid) + except _interpqueues.QueueNotFoundError: + raise # re-raise + except _interpqueues.QueueError: + continue + except ModuleNotFoundError: + # interpreters.queues doesn't exist, which means + # QueueEmpty doesn't. Act as though it does. + continue + else: + break + (res, excdata), pickled, unboundop = obj + assert unboundop is None, unboundop + if excdata is not None: + assert res is None, res + assert pickled + assert exc_wrapper is not None + exc = pickle.loads(excdata) + raise exc from exc_wrapper + return pickle.loads(res) if pickled else res + + +class BrokenInterpreterPool(_thread.BrokenThreadPool): + """ + Raised when a worker thread in an InterpreterPoolExecutor failed initializing. + """ + + +class InterpreterPoolExecutor(_thread.ThreadPoolExecutor): + + BROKEN = BrokenInterpreterPool + + @classmethod + def prepare_context(cls, initializer, initargs, shared): + return WorkerContext.prepare(initializer, initargs, shared) + + def __init__(self, max_workers=None, thread_name_prefix='', + initializer=None, initargs=(), shared=None): + """Initializes a new InterpreterPoolExecutor instance. + + Args: + max_workers: The maximum number of interpreters that can be used to + execute the given calls. + thread_name_prefix: An optional name prefix to give our threads. + initializer: A callable or script used to initialize + each worker interpreter. + initargs: A tuple of arguments to pass to the initializer. + shared: A mapping of shareabled objects to be inserted into + each worker interpreter. + """ + super().__init__(max_workers, thread_name_prefix, + initializer, initargs, shared=shared) diff --git a/Lib/concurrent/futures/thread.py b/Lib/concurrent/futures/thread.py index a024033f35fb54..16cc5533d429ef 100644 --- a/Lib/concurrent/futures/thread.py +++ b/Lib/concurrent/futures/thread.py @@ -43,19 +43,46 @@ def _python_exit(): after_in_parent=_global_shutdown_lock.release) +class WorkerContext: + + @classmethod + def prepare(cls, initializer, initargs): + if initializer is not None: + if not callable(initializer): + raise TypeError("initializer must be a callable") + def create_context(): + return cls(initializer, initargs) + def resolve_task(fn, args, kwargs): + return (fn, args, kwargs) + return create_context, resolve_task + + def __init__(self, initializer, initargs): + self.initializer = initializer + self.initargs = initargs + + def initialize(self): + if self.initializer is not None: + self.initializer(*self.initargs) + + def finalize(self): + pass + + def run(self, task): + fn, args, kwargs = task + return fn(*args, **kwargs) + + class _WorkItem: - def __init__(self, future, fn, args, kwargs): + def __init__(self, future, task): self.future = future - self.fn = fn - self.args = args - self.kwargs = kwargs + self.task = task - def run(self): + def run(self, ctx): if not self.future.set_running_or_notify_cancel(): return try: - result = self.fn(*self.args, **self.kwargs) + result = ctx.run(self.task) except BaseException as exc: self.future.set_exception(exc) # Break a reference cycle with the exception 'exc' @@ -66,16 +93,15 @@ def run(self): __class_getitem__ = classmethod(types.GenericAlias) -def _worker(executor_reference, work_queue, initializer, initargs): - if initializer is not None: - try: - initializer(*initargs) - except BaseException: - _base.LOGGER.critical('Exception in initializer:', exc_info=True) - executor = executor_reference() - if executor is not None: - executor._initializer_failed() - return +def _worker(executor_reference, ctx, work_queue): + try: + ctx.initialize() + except BaseException: + _base.LOGGER.critical('Exception in initializer:', exc_info=True) + executor = executor_reference() + if executor is not None: + executor._initializer_failed() + return try: while True: try: @@ -89,7 +115,7 @@ def _worker(executor_reference, work_queue, initializer, initargs): work_item = work_queue.get(block=True) if work_item is not None: - work_item.run() + work_item.run(ctx) # Delete references to object. See GH-60488 del work_item continue @@ -110,6 +136,8 @@ def _worker(executor_reference, work_queue, initializer, initargs): del executor except BaseException: _base.LOGGER.critical('Exception in worker', exc_info=True) + finally: + ctx.finalize() class BrokenThreadPool(_base.BrokenExecutor): @@ -120,11 +148,17 @@ class BrokenThreadPool(_base.BrokenExecutor): class ThreadPoolExecutor(_base.Executor): + BROKEN = BrokenThreadPool + # Used to assign unique thread names when thread_name_prefix is not supplied. _counter = itertools.count().__next__ + @classmethod + def prepare_context(cls, initializer, initargs): + return WorkerContext.prepare(initializer, initargs) + def __init__(self, max_workers=None, thread_name_prefix='', - initializer=None, initargs=()): + initializer=None, initargs=(), **ctxkwargs): """Initializes a new ThreadPoolExecutor instance. Args: @@ -133,6 +167,7 @@ def __init__(self, max_workers=None, thread_name_prefix='', thread_name_prefix: An optional name prefix to give our threads. initializer: A callable used to initialize worker threads. initargs: A tuple of arguments to pass to the initializer. + ctxkwargs: Additional arguments to cls.prepare_context(). """ if max_workers is None: # ThreadPoolExecutor is often used to: @@ -146,8 +181,9 @@ def __init__(self, max_workers=None, thread_name_prefix='', if max_workers <= 0: raise ValueError("max_workers must be greater than 0") - if initializer is not None and not callable(initializer): - raise TypeError("initializer must be a callable") + (self._create_worker_context, + self._resolve_work_item_task, + ) = type(self).prepare_context(initializer, initargs, **ctxkwargs) self._max_workers = max_workers self._work_queue = queue.SimpleQueue() @@ -158,13 +194,11 @@ def __init__(self, max_workers=None, thread_name_prefix='', self._shutdown_lock = threading.Lock() self._thread_name_prefix = (thread_name_prefix or ("ThreadPoolExecutor-%d" % self._counter())) - self._initializer = initializer - self._initargs = initargs def submit(self, fn, /, *args, **kwargs): with self._shutdown_lock, _global_shutdown_lock: if self._broken: - raise BrokenThreadPool(self._broken) + raise self.BROKEN(self._broken) if self._shutdown: raise RuntimeError('cannot schedule new futures after shutdown') @@ -173,7 +207,8 @@ def submit(self, fn, /, *args, **kwargs): 'interpreter shutdown') f = _base.Future() - w = _WorkItem(f, fn, args, kwargs) + task = self._resolve_work_item_task(fn, args, kwargs) + w = _WorkItem(f, task) self._work_queue.put(w) self._adjust_thread_count() @@ -196,9 +231,8 @@ def weakref_cb(_, q=self._work_queue): num_threads) t = threading.Thread(name=thread_name, target=_worker, args=(weakref.ref(self, weakref_cb), - self._work_queue, - self._initializer, - self._initargs)) + self._create_worker_context(), + self._work_queue)) t.start() self._threads.add(t) _threads_queues[t] = self._work_queue @@ -214,7 +248,7 @@ def _initializer_failed(self): except queue.Empty: break if work_item is not None: - work_item.future.set_exception(BrokenThreadPool(self._broken)) + work_item.future.set_exception(self.BROKEN(self._broken)) def shutdown(self, wait=True, *, cancel_futures=False): with self._shutdown_lock: diff --git a/Lib/test/test_concurrent_futures/executor.py b/Lib/test/test_concurrent_futures/executor.py index 4160656cb133ab..b97d9ffd94b1f8 100644 --- a/Lib/test/test_concurrent_futures/executor.py +++ b/Lib/test/test_concurrent_futures/executor.py @@ -23,6 +23,7 @@ def make_dummy_object(_): class ExecutorTest: + # Executor.shutdown() and context manager usage is tested by # ExecutorShutdownTest. def test_submit(self): @@ -52,7 +53,8 @@ def test_map_exception(self): i = self.executor.map(divmod, [1, 1, 1, 1], [2, 3, 0, 5]) self.assertEqual(i.__next__(), (0, 1)) self.assertEqual(i.__next__(), (0, 1)) - self.assertRaises(ZeroDivisionError, i.__next__) + with self.assertRaises(ZeroDivisionError): + i.__next__() @support.requires_resource('walltime') def test_map_timeout(self): diff --git a/Lib/test/test_concurrent_futures/test_interpreter_pool.py b/Lib/test/test_concurrent_futures/test_interpreter_pool.py new file mode 100644 index 00000000000000..0de03c0d669399 --- /dev/null +++ b/Lib/test/test_concurrent_futures/test_interpreter_pool.py @@ -0,0 +1,346 @@ +import asyncio +import contextlib +import io +import os +import pickle +import sys +import time +import unittest +from concurrent.futures.interpreter import ( + ExecutionFailed, BrokenInterpreterPool, +) +import _interpreters +from test import support +import test.test_asyncio.utils as testasyncio_utils +from test.support.interpreters import queues + +from .executor import ExecutorTest, mul +from .util import BaseTestCase, InterpreterPoolMixin, setup_module + + +def noop(): + pass + + +def write_msg(fd, msg): + os.write(fd, msg + b'\0') + + +def read_msg(fd): + msg = b'' + while ch := os.read(fd, 1): + if ch == b'\0': + return msg + msg += ch + + +def get_current_name(): + return __name__ + + +def fail(exctype, msg=None): + raise exctype(msg) + + +def get_current_interpid(*extra): + interpid, _ = _interpreters.get_current() + return (interpid, *extra) + + +class InterpretersMixin(InterpreterPoolMixin): + + def pipe(self): + r, w = os.pipe() + self.addCleanup(lambda: os.close(r)) + self.addCleanup(lambda: os.close(w)) + return r, w + + +class InterpreterPoolExecutorTest( + InterpretersMixin, ExecutorTest, BaseTestCase): + + @unittest.expectedFailure + def test_init_script(self): + msg1 = b'step: init' + msg2 = b'step: run' + r, w = self.pipe() + initscript = f""" + import os + msg = {msg2!r} + os.write({w}, {msg1!r} + b'\\0') + """ + script = f""" + os.write({w}, msg + b'\\0') + """ + os.write(w, b'\0') + + executor = self.executor_type(initializer=initscript) + before_init = os.read(r, 100) + fut = executor.submit(script) + after_init = read_msg(r) + fut.result() + after_run = read_msg(r) + + self.assertEqual(before_init, b'\0') + self.assertEqual(after_init, msg1) + self.assertEqual(after_run, msg2) + + @unittest.expectedFailure + def test_init_script_args(self): + with self.assertRaises(ValueError): + self.executor_type(initializer='pass', initargs=('spam',)) + + def test_init_func(self): + msg = b'step: init' + r, w = self.pipe() + os.write(w, b'\0') + + executor = self.executor_type( + initializer=write_msg, initargs=(w, msg)) + before = os.read(r, 100) + executor.submit(mul, 10, 10) + after = read_msg(r) + + self.assertEqual(before, b'\0') + self.assertEqual(after, msg) + + def test_init_closure(self): + count = 0 + def init1(): + assert count == 0, count + def init2(): + nonlocal count + count += 1 + + with self.assertRaises(pickle.PicklingError): + self.executor_type(initializer=init1) + with self.assertRaises(pickle.PicklingError): + self.executor_type(initializer=init2) + + def test_init_instance_method(self): + class Spam: + def initializer(self): + raise NotImplementedError + spam = Spam() + + with self.assertRaises(pickle.PicklingError): + self.executor_type(initializer=spam.initializer) + + def test_init_shared(self): + msg = b'eggs' + r, w = self.pipe() + script = f"""if True: + import os + if __name__ != '__main__': + import __main__ + spam = __main__.spam + os.write({w}, spam + b'\\0') + """ + + executor = self.executor_type(shared={'spam': msg}) + fut = executor.submit(exec, script) + fut.result() + after = read_msg(r) + + self.assertEqual(after, msg) + + @unittest.expectedFailure + def test_init_exception_in_script(self): + executor = self.executor_type(initializer='raise Exception("spam")') + with executor: + with contextlib.redirect_stderr(io.StringIO()) as stderr: + fut = executor.submit('pass') + with self.assertRaises(BrokenInterpreterPool): + fut.result() + stderr = stderr.getvalue() + self.assertIn('ExecutionFailed: Exception: spam', stderr) + self.assertIn('Uncaught in the interpreter:', stderr) + self.assertIn('The above exception was the direct cause of the following exception:', + stderr) + + def test_init_exception_in_func(self): + executor = self.executor_type(initializer=fail, + initargs=(Exception, 'spam')) + with executor: + with contextlib.redirect_stderr(io.StringIO()) as stderr: + fut = executor.submit(noop) + with self.assertRaises(BrokenInterpreterPool): + fut.result() + stderr = stderr.getvalue() + self.assertIn('ExecutionFailed: Exception: spam', stderr) + self.assertIn('Uncaught in the interpreter:', stderr) + self.assertIn('The above exception was the direct cause of the following exception:', + stderr) + + @unittest.expectedFailure + def test_submit_script(self): + msg = b'spam' + r, w = self.pipe() + script = f""" + import os + os.write({w}, __name__.encode('utf-8') + b'\\0') + """ + executor = self.executor_type() + + fut = executor.submit(script) + res = fut.result() + after = read_msg(r) + + self.assertEqual(after, b'__main__') + self.assertIs(res, None) + + def test_submit_closure(self): + spam = True + def task1(): + return spam + def task2(): + nonlocal spam + spam += 1 + return spam + + executor = self.executor_type() + with self.assertRaises(pickle.PicklingError): + executor.submit(task1) + with self.assertRaises(pickle.PicklingError): + executor.submit(task2) + + def test_submit_local_instance(self): + class Spam: + def __init__(self): + self.value = True + + executor = self.executor_type() + with self.assertRaises(pickle.PicklingError): + executor.submit(Spam) + + def test_submit_instance_method(self): + class Spam: + def run(self): + return True + spam = Spam() + + executor = self.executor_type() + with self.assertRaises(pickle.PicklingError): + executor.submit(spam.run) + + def test_submit_func_globals(self): + executor = self.executor_type() + fut = executor.submit(get_current_name) + name = fut.result() + + self.assertEqual(name, __name__) + self.assertNotEqual(name, '__main__') + + @unittest.expectedFailure + def test_submit_exception_in_script(self): + fut = self.executor.submit('raise Exception("spam")') + with self.assertRaises(Exception) as captured: + fut.result() + self.assertIs(type(captured.exception), Exception) + self.assertEqual(str(captured.exception), 'spam') + cause = captured.exception.__cause__ + self.assertIs(type(cause), ExecutionFailed) + for attr in ('__name__', '__qualname__', '__module__'): + self.assertEqual(getattr(cause.excinfo.type, attr), + getattr(Exception, attr)) + self.assertEqual(cause.excinfo.msg, 'spam') + + def test_submit_exception_in_func(self): + fut = self.executor.submit(fail, Exception, 'spam') + with self.assertRaises(Exception) as captured: + fut.result() + self.assertIs(type(captured.exception), Exception) + self.assertEqual(str(captured.exception), 'spam') + cause = captured.exception.__cause__ + self.assertIs(type(cause), ExecutionFailed) + for attr in ('__name__', '__qualname__', '__module__'): + self.assertEqual(getattr(cause.excinfo.type, attr), + getattr(Exception, attr)) + self.assertEqual(cause.excinfo.msg, 'spam') + + def test_saturation(self): + blocker = queues.create() + executor = self.executor_type(4, shared=dict(blocker=blocker)) + + for i in range(15 * executor._max_workers): + executor.submit(exec, 'import __main__; __main__.blocker.get()') + #executor.submit('blocker.get()') + self.assertEqual(len(executor._threads), executor._max_workers) + for i in range(15 * executor._max_workers): + blocker.put_nowait(None) + executor.shutdown(wait=True) + + @support.requires_gil_enabled("gh-117344: test is flaky without the GIL") + def test_idle_thread_reuse(self): + executor = self.executor_type() + executor.submit(mul, 21, 2).result() + executor.submit(mul, 6, 7).result() + executor.submit(mul, 3, 14).result() + self.assertEqual(len(executor._threads), 1) + executor.shutdown(wait=True) + + +class AsyncioTest(InterpretersMixin, testasyncio_utils.TestCase): + + def setUp(self): + super().setUp() + self.loop = asyncio.new_event_loop() + self.set_event_loop(self.loop) + + self.executor = self.executor_type() + self.addCleanup(lambda: self.executor.shutdown()) + + def tearDown(self): + if not self.loop.is_closed(): + testasyncio_utils.run_briefly(self.loop) + + self.doCleanups() + support.gc_collect() + super().tearDown() + + def test_run_in_executor(self): + unexpected, _ = _interpreters.get_current() + + func = get_current_interpid + fut = self.loop.run_in_executor(self.executor, func, 'yo') + interpid, res = self.loop.run_until_complete(fut) + + self.assertEqual(res, 'yo') + self.assertNotEqual(interpid, unexpected) + + def test_run_in_executor_cancel(self): + executor = self.executor_type() + + called = False + + def patched_call_soon(*args): + nonlocal called + called = True + + func = time.sleep + fut = self.loop.run_in_executor(self.executor, func, 0.05) + fut.cancel() + self.loop.run_until_complete( + self.loop.shutdown_default_executor()) + self.loop.close() + self.loop.call_soon = patched_call_soon + self.loop.call_soon_threadsafe = patched_call_soon + time.sleep(0.4) + self.assertFalse(called) + + def test_default_executor(self): + unexpected, _ = _interpreters.get_current() + + self.loop.set_default_executor(self.executor) + fut = self.loop.run_in_executor(None, get_current_interpid) + interpid, = self.loop.run_until_complete(fut) + + self.assertNotEqual(interpid, unexpected) + + +def setUpModule(): + setup_module() + + +if __name__ == "__main__": + unittest.main() diff --git a/Lib/test/test_concurrent_futures/util.py b/Lib/test/test_concurrent_futures/util.py index 3b8ec3e205d5aa..52baab51340fc9 100644 --- a/Lib/test/test_concurrent_futures/util.py +++ b/Lib/test/test_concurrent_futures/util.py @@ -74,6 +74,10 @@ class ThreadPoolMixin(ExecutorMixin): executor_type = futures.ThreadPoolExecutor +class InterpreterPoolMixin(ExecutorMixin): + executor_type = futures.InterpreterPoolExecutor + + class ProcessPoolForkMixin(ExecutorMixin): executor_type = futures.ProcessPoolExecutor ctx = "fork" @@ -120,6 +124,7 @@ def get_context(self): def create_executor_tests(remote_globals, mixin, bases=(BaseTestCase,), executor_mixins=(ThreadPoolMixin, + InterpreterPoolMixin, ProcessPoolForkMixin, ProcessPoolForkserverMixin, ProcessPoolSpawnMixin)): diff --git a/Misc/NEWS.d/next/Library/2024-09-27-15-42-55.gh-issue-124694.uUy32y.rst b/Misc/NEWS.d/next/Library/2024-09-27-15-42-55.gh-issue-124694.uUy32y.rst new file mode 100644 index 00000000000000..1aa1a463b0c63a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-09-27-15-42-55.gh-issue-124694.uUy32y.rst @@ -0,0 +1,6 @@ +We've added :class:`concurrent.futures.InterpreterPoolExecutor`, which +allows you to run code in multiple isolated interpreters. This allows you +to circumvent the limitations of CPU-bound threads (due to the GIL). Patch +by Eric Snow. + +This addition is unrelated to :pep:`734`.