From 31f6d82210d9ca0acf30d3182409a5d1b2bee0fd Mon Sep 17 00:00:00 2001 From: Joshua Kriegshauser Date: Wed, 13 Mar 2024 18:36:11 -0700 Subject: [PATCH 1/6] Unit test (fails about 50% of the time, 80% at 60 seconds) --- Lib/test/test_asyncio/test_windows_events.py | 49 ++++++++++++++++++-- 1 file changed, 44 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_asyncio/test_windows_events.py b/Lib/test/test_asyncio/test_windows_events.py index 6e6c90a247b291..b770c71acbbb51 100644 --- a/Lib/test/test_asyncio/test_windows_events.py +++ b/Lib/test/test_asyncio/test_windows_events.py @@ -36,7 +36,23 @@ def data_received(self, data): self.trans.close() -class ProactorLoopCtrlC(test_utils.TestCase): +class WindowsEventsTestCase(test_utils.TestCase): + def _unraisablehook(self, unraisable): + # Storing unraisable.object can resurrect an object which is being + # finalized. Storing unraisable.exc_value creates a reference cycle. + self._unraisable = unraisable + print(unraisable) + + def setUp(self): + self._prev_unraisablehook = sys.unraisablehook + self._unraisable = None + sys.unraisablehook = self._unraisablehook + + def tearDown(self): + sys.unraisablehook = self._prev_unraisablehook + self.assertIsNone(self._unraisable) + +class ProactorLoopCtrlC(WindowsEventsTestCase): def test_ctrl_c(self): @@ -58,7 +74,7 @@ def SIGINT_after_delay(): thread.join() -class ProactorMultithreading(test_utils.TestCase): +class ProactorMultithreading(WindowsEventsTestCase): def test_run_from_nonmain_thread(self): finished = False @@ -79,7 +95,7 @@ def func(): self.assertTrue(finished) -class ProactorTests(test_utils.TestCase): +class ProactorTests(WindowsEventsTestCase): def setUp(self): super().setUp() @@ -283,8 +299,31 @@ async def probe(): return "done" - -class WinPolicyTests(test_utils.TestCase): + def test_loop_restart(self): + # We're fishing for the "RuntimeError: <_overlapped.Overlapped object at XXX> still has pending operation at deallocation, the process may crash" error + stop = threading.Event() + def threadMain(): + while not stop.is_set(): + self.loop.call_soon_threadsafe(lambda: None) + time.sleep(0.01) + thr = threading.Thread(target=threadMain) + + # In 10 60-second runs of this test prior to the fix: + # time in seconds until failure: (none), 15.0, 6.4, (none), 7.6, 8.3, 1.7, 22.2, 23.5, 8.3 + # 10 seconds had a 50% failure rate but longer would be more costly + end_time = time.time() + 10 # Run for 10 seconds + self.loop.call_soon(thr.start) + while not self._unraisable: # Stop if we got an unraisable exc + self.loop.stop() + self.loop.run_forever() + if time.time() >= end_time: + break + + stop.set() + thr.join() + + +class WinPolicyTests(WindowsEventsTestCase): def test_selector_win_policy(self): async def main(): From 4c994131aa85438129e595c533e5e94888edd2cd Mon Sep 17 00:00:00 2001 From: Joshua Kriegshauser Date: Wed, 13 Mar 2024 18:40:00 -0700 Subject: [PATCH 2/6] Fix --- Lib/asyncio/windows_events.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Lib/asyncio/windows_events.py b/Lib/asyncio/windows_events.py index b62ea75fee3858..aca063e54c73f6 100644 --- a/Lib/asyncio/windows_events.py +++ b/Lib/asyncio/windows_events.py @@ -324,13 +324,13 @@ def _run_forever_cleanup(self): if self._self_reading_future is not None: ov = self._self_reading_future._ov self._self_reading_future.cancel() - # self_reading_future was just cancelled so if it hasn't been - # finished yet, it never will be (it's possible that it has - # already finished and its callback is waiting in the queue, - # where it could still happen if the event loop is restarted). - # Unregister it otherwise IocpProactor.close will wait for it - # forever - if ov is not None: + # self_reading_future always uses IOCP, so even though it's + # been cancelled, we need to make sure that the IOCP message + # is received so that the kernel is not holding on to the + # memory, possibly causing memory corruption later. Only + # unregister it if IO is complete in all respects. Otherwise + # we need another _poll() later to complete the IO. + if ov is not None and not ov.pending: self._proactor._unregister(ov) self._self_reading_future = None From c05511e58a7bdc5e1e402fd965fb1cd0c577899b Mon Sep 17 00:00:00 2001 From: Joshua Kriegshauser Date: Wed, 13 Mar 2024 18:40:04 -0700 Subject: [PATCH 3/6] Comment --- Modules/overlapped.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/Modules/overlapped.c b/Modules/overlapped.c index fd40e91d0f50c4..4c6c7531cdeab0 100644 --- a/Modules/overlapped.c +++ b/Modules/overlapped.c @@ -723,6 +723,24 @@ Overlapped_dealloc(OverlappedObject *self) if (!HasOverlappedIoCompleted(&self->overlapped) && self->type != TYPE_NOT_STARTED) { + // NOTE: We should not get here, if we do then something is wrong in + // the IocpProactor or ProactorEventLoop. Since everything uses IOCP if + // the overlapped IO hasn't completed yet then we should not be + // deallocating! + // + // The problem is likely that this OverlappedObject was removed from + // the IocpProactor._cache before it was complete. The _cache holds a + // reference while IO is pending so that it does not get deallocated + // while the kernel has retained the OVERLAPPED structure. + // + // CancelIoEx (likely called from self.cancel()) may have successfully + // completed, but the OVERLAPPED is still in use until either + // HasOverlappedIoCompleted() is true or GetQueuedCompletionStatus has + // returned this OVERLAPPED object. + // + // NOTE: Waiting when IOCP is in use can hang indefinitely, but this + // CancelIoEx is superfluous in that self.cancel() was already called, + // so I've only ever seen this return FALSE with GLE=ERROR_NOT_FOUND Py_BEGIN_ALLOW_THREADS if (CancelIoEx(self->handle, &self->overlapped)) wait = TRUE; From 1829c448bf0e7ce7410809c1e4c04c5ce9f3e65b Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Thu, 14 Mar 2024 01:58:23 +0000 Subject: [PATCH 4/6] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../next/Windows/2024-03-14-01-58-22.gh-issue-116773.H2UldY.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Windows/2024-03-14-01-58-22.gh-issue-116773.H2UldY.rst diff --git a/Misc/NEWS.d/next/Windows/2024-03-14-01-58-22.gh-issue-116773.H2UldY.rst b/Misc/NEWS.d/next/Windows/2024-03-14-01-58-22.gh-issue-116773.H2UldY.rst new file mode 100644 index 00000000000000..582fdb00e57a25 --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2024-03-14-01-58-22.gh-issue-116773.H2UldY.rst @@ -0,0 +1 @@ +Fix instances of `<_overlapped.Overlapped object at 0xXXX> still has pending operation at deallocation, the process may crash`. From 76e7dd8f0e0d9d68e562a03301f3e4c3e91039a3 Mon Sep 17 00:00:00 2001 From: Joshua Kriegshauser Date: Wed, 13 Mar 2024 19:03:52 -0700 Subject: [PATCH 5/6] Fixes --- Lib/test/test_asyncio/test_windows_events.py | 4 ++-- .../Windows/2024-03-14-01-58-22.gh-issue-116773.H2UldY.rst | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_asyncio/test_windows_events.py b/Lib/test/test_asyncio/test_windows_events.py index b770c71acbbb51..a3ca5d0e17e046 100644 --- a/Lib/test/test_asyncio/test_windows_events.py +++ b/Lib/test/test_asyncio/test_windows_events.py @@ -318,10 +318,10 @@ def threadMain(): self.loop.run_forever() if time.time() >= end_time: break - + stop.set() thr.join() - + class WinPolicyTests(WindowsEventsTestCase): diff --git a/Misc/NEWS.d/next/Windows/2024-03-14-01-58-22.gh-issue-116773.H2UldY.rst b/Misc/NEWS.d/next/Windows/2024-03-14-01-58-22.gh-issue-116773.H2UldY.rst index 582fdb00e57a25..8fc3fe80041d26 100644 --- a/Misc/NEWS.d/next/Windows/2024-03-14-01-58-22.gh-issue-116773.H2UldY.rst +++ b/Misc/NEWS.d/next/Windows/2024-03-14-01-58-22.gh-issue-116773.H2UldY.rst @@ -1 +1 @@ -Fix instances of `<_overlapped.Overlapped object at 0xXXX> still has pending operation at deallocation, the process may crash`. +Fix instances of ``<_overlapped.Overlapped object at 0xXXX> still has pending operation at deallocation, the process may crash``. From 3d4bc0efc52e7ac5fbb250a392b6409eb886a6f2 Mon Sep 17 00:00:00 2001 From: jkriegshauser Date: Mon, 18 Mar 2024 09:42:16 -0700 Subject: [PATCH 6/6] PR feedback Co-authored-by: Steve Dower --- Lib/test/test_asyncio/test_windows_events.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_asyncio/test_windows_events.py b/Lib/test/test_asyncio/test_windows_events.py index a3ca5d0e17e046..0c128c599ba011 100644 --- a/Lib/test/test_asyncio/test_windows_events.py +++ b/Lib/test/test_asyncio/test_windows_events.py @@ -300,7 +300,8 @@ async def probe(): return "done" def test_loop_restart(self): - # We're fishing for the "RuntimeError: <_overlapped.Overlapped object at XXX> still has pending operation at deallocation, the process may crash" error + # We're fishing for the "RuntimeError: <_overlapped.Overlapped object at XXX> + # still has pending operation at deallocation, the process may crash" error stop = threading.Event() def threadMain(): while not stop.is_set():