From 42abb05326d67e50b089f60c39461983d3e942fe Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Mon, 30 Jun 2025 15:22:17 -0700 Subject: [PATCH 01/12] Make the GC clear weakrefs later. Clear the weakrefs to unreachable objects after finalizers are called. --- Lib/_threading_local.py | 2 +- Lib/test/test_finalization.py | 11 ++- Lib/test/test_gc.py | 34 ++++--- Lib/test/test_io.py | 22 ++++- Lib/test/test_weakref.py | 2 +- Modules/_datetimemodule.c | 2 +- Modules/gc_weakref.txt | 10 ++ Python/gc.c | 169 ++++++++++++++++++++++------------ Python/gc_free_threading.c | 88 ++++++++++++++---- 9 files changed, 238 insertions(+), 102 deletions(-) diff --git a/Lib/_threading_local.py b/Lib/_threading_local.py index 0b9e5d3bbf6ef6..bd57c3b33efc64 100644 --- a/Lib/_threading_local.py +++ b/Lib/_threading_local.py @@ -49,7 +49,7 @@ def local_deleted(_, key=key): # When the localimpl is deleted, remove the thread attribute. thread = wrthread() if thread is not None: - del thread.__dict__[key] + thread.__dict__.pop(key, None) def thread_deleted(_, idt=idt): # When the thread is deleted, remove the local dict. # Note that this is suboptimal if the thread object gets diff --git a/Lib/test/test_finalization.py b/Lib/test/test_finalization.py index 42871f8a09b16b..1d838b9ed9187e 100644 --- a/Lib/test/test_finalization.py +++ b/Lib/test/test_finalization.py @@ -280,8 +280,9 @@ def test_simple_resurrect(self): gc.collect() self.assert_del_calls(ids) self.assert_survivors(ids) - # XXX is this desirable? - self.assertIs(wr(), None) + # This used to be None because weakrefs were cleared before + # calling finalizers. Now they are cleared after. + self.assertIsNot(wr(), None) # When trying to destroy the object a second time, __del__ # isn't called anymore (and the object isn't resurrected). self.clear_survivors() @@ -388,8 +389,10 @@ def check_resurrecting_chain(self, classes): gc.collect() self.assert_del_calls(ids) self.assert_survivors(survivor_ids) - # XXX desirable? - self.assertEqual([wr() for wr in wrs], [None] * N) + for wr in wrs: + # These values used to be None because weakrefs were cleared + # before calling finalizers. Now they are cleared after. + self.assertIsNotNone(wr()) self.clear_survivors() gc.collect() self.assert_del_calls(ids) diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index b4cbfb6d774080..6ec0c6414fd090 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -262,9 +262,11 @@ class Cyclic(tuple): # finalizer. def __del__(self): - # 5. Create a weakref to `func` now. If we had created - # it earlier, it would have been cleared by the - # garbage collector before calling the finalizers. + # 5. Create a weakref to `func` now. In previous + # versions of Python, this would avoid having it + # cleared by the garbage collector before calling + # the finalizers. Now, weakrefs get cleared after + # calling finalizers. self[1].ref = weakref.ref(self[0]) # 6. Drop the global reference to `latefin`. The only @@ -293,14 +295,18 @@ def func(): # which will find `cyc` and `func` as garbage. gc.collect() - # 9. Previously, this would crash because `func_qualname` - # had been NULL-ed out by func_clear(). + # 9. Previously, this would crash because the weakref + # created in the finalizer revealed the function after + # `tp_clear` was called and `func_qualname` + # had been NULL-ed out by func_clear(). Now, we clear + # weakrefs to unreachable objects before calling `tp_clear` + # but after calling finalizers. print(f"{func=}") """ - # We're mostly just checking that this doesn't crash. rc, stdout, stderr = assert_python_ok("-c", code) self.assertEqual(rc, 0) - self.assertRegex(stdout, rb"""\A\s*func=\s*\z""") + # The `func` global is None because the weakref was cleared. + self.assertRegex(stdout, rb"""\A\s*func=None""") self.assertFalse(stderr) @refcount_test @@ -652,9 +658,11 @@ def callback(ignored): gc.collect() self.assertEqual(len(ouch), 2) # else the callbacks didn't run for x in ouch: - # If the callback resurrected one of these guys, the instance - # would be damaged, with an empty __dict__. - self.assertEqual(x, None) + # In previous versions of Python, the weakrefs are cleared + # before calling finalizers. Now they are cleared after. + # So when the object is resurrected, the weakref is not + # cleared since it is no longer unreachable. + self.assertIsInstance(x, C1055820) def test_bug21435(self): # This is a poor test - its only virtue is that it happened to @@ -1041,8 +1049,8 @@ class Z: # release references and create trash del a, wr_cycle gc.collect() - # if called, it means there is a bug in the GC. The weakref should be - # cleared before Z dies. + # In older versions of Python, the weakref was cleared by the + # gc. Now it is not cleared and so the callback is run. callback.assert_not_called() gc.enable() @@ -1324,6 +1332,7 @@ def setUp(self): def tearDown(self): gc.disable() + @unittest.skipIf(Py_GIL_DISABLED, "requires GC generations or increments") def test_bug1055820c(self): # Corresponds to temp2c.py in the bug report. This is pretty # elaborate. @@ -1399,6 +1408,7 @@ def callback(ignored): self.assertEqual(x, None) @gc_threshold(1000, 0, 0) + @unittest.skipIf(Py_GIL_DISABLED, "requires GC generations or increments") def test_bug1055820d(self): # Corresponds to temp2d.py in the bug report. This is very much like # test_bug1055820c, but uses a __del__ method instead of a weakref diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 0c921ffbc2576a..0efe7afe3a5499 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -807,7 +807,12 @@ def test_closefd_attr(self): def test_garbage_collection(self): # FileIO objects are collected, and collecting them flushes # all data to disk. - with warnings_helper.check_warnings(('', ResourceWarning)): + # + # Note that using warnings_helper.check_warnings() will keep the + # file alive due to the `source` argument to warn(). So, use + # catch_warnings() instead. + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ResourceWarning) f = self.FileIO(os_helper.TESTFN, "wb") f.write(b"abcxxx") f.f = f @@ -1808,7 +1813,11 @@ def test_garbage_collection(self): # C BufferedReader objects are collected. # The Python version has __del__, so it ends into gc.garbage instead self.addCleanup(os_helper.unlink, os_helper.TESTFN) - with warnings_helper.check_warnings(('', ResourceWarning)): + # Note that using warnings_helper.check_warnings() will keep the + # file alive due to the `source` argument to warn(). So, use + # catch_warnings() instead. + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ResourceWarning) rawio = self.FileIO(os_helper.TESTFN, "w+b") f = self.tp(rawio) f.f = f @@ -2157,7 +2166,11 @@ def test_garbage_collection(self): # all data to disk. # The Python version has __del__, so it ends into gc.garbage instead self.addCleanup(os_helper.unlink, os_helper.TESTFN) - with warnings_helper.check_warnings(('', ResourceWarning)): + # Note that using warnings_helper.check_warnings() will keep the + # file alive due to the `source` argument to warn(). So, use + # catch_warnings() instead. + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ResourceWarning) rawio = self.FileIO(os_helper.TESTFN, "w+b") f = self.tp(rawio) f.write(b"123xxx") @@ -4079,7 +4092,8 @@ def test_garbage_collection(self): # C TextIOWrapper objects are collected, and collecting them flushes # all data to disk. # The Python version has __del__, so it ends in gc.garbage instead. - with warnings_helper.check_warnings(('', ResourceWarning)): + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ResourceWarning) rawio = self.FileIO(os_helper.TESTFN, "wb") b = self.BufferedWriter(rawio) t = self.TextIOWrapper(b, encoding="ascii") diff --git a/Lib/test/test_weakref.py b/Lib/test/test_weakref.py index 4c7c900eb56ae1..2f8679487134ee 100644 --- a/Lib/test/test_weakref.py +++ b/Lib/test/test_weakref.py @@ -1314,7 +1314,7 @@ def check_len_cycles(self, dict_type, cons): n2 = len(dct) # one item may be kept alive inside the iterator self.assertIn(n1, (0, 1)) - self.assertEqual(n2, 0) + self.assertIn(n2, (0, 1)) def test_weak_keyed_len_cycles(self): self.check_len_cycles(weakref.WeakKeyDictionary, lambda k: (k, 1)) diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index eb90be81c8d34c..00bd1dcf2ce8df 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -218,7 +218,7 @@ clear_current_module(PyInterpreterState *interp, PyObject *expected) if (PyDict_GetItemRef(dict, INTERP_KEY, &ref) < 0) { goto error; } - if (ref != NULL) { + if (ref != NULL && ref != Py_None) { PyObject *current = NULL; int rc = PyWeakref_GetRef(ref, ¤t); /* We only need "current" for pointer comparison. */ diff --git a/Modules/gc_weakref.txt b/Modules/gc_weakref.txt index f53fb99dd6cdcb..c3b8cc743ccd21 100644 --- a/Modules/gc_weakref.txt +++ b/Modules/gc_weakref.txt @@ -1,6 +1,16 @@ Intro ===== +************************************************************************** +Note: this document was written long ago, before PEP 442 (safe object +finalization) was implemented. While that has changed some things, this +document is still mostly accurate. Just note that the rules being discussed +here apply to the unreachable set of objects *after* non-legacy finalizers +have been called. Also, the clearing of weakrefs has been changed to happen +later in the collection (after running finalizers but before tp_clear is +called). +************************************************************************** + The basic rule for dealing with weakref callbacks (and __del__ methods too, for that matter) during cyclic gc: diff --git a/Python/gc.c b/Python/gc.c index 7b0e6d6e803504..4cbca252a02cda 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -858,58 +858,31 @@ move_legacy_finalizer_reachable(PyGC_Head *finalizers) } } -/* Clear all weakrefs to unreachable objects, and if such a weakref has a - * callback, invoke it if necessary. Note that it's possible for such - * weakrefs to be outside the unreachable set -- indeed, those are precisely - * the weakrefs whose callbacks must be invoked. See gc_weakref.txt for - * overview & some details. Some weakrefs with callbacks may be reclaimed - * directly by this routine; the number reclaimed is the return value. Other - * weakrefs with callbacks may be moved into the `old` generation. Objects - * moved into `old` have gc_refs set to GC_REACHABLE; the objects remaining in - * unreachable are left at GC_TENTATIVELY_UNREACHABLE. When this returns, - * no object in `unreachable` is weakly referenced anymore. +/* Handle weakref callbacks. Note that it's possible for such weakrefs to be + * outside the unreachable set -- indeed, those are precisely the weakrefs + * whose callbacks must be invoked. See gc_weakref.txt for overview & some + * details. */ static int -handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) +handle_weakref_callbacks(PyGC_Head *unreachable, PyGC_Head *old) { PyGC_Head *gc; - PyObject *op; /* generally FROM_GC(gc) */ - PyWeakReference *wr; /* generally a cast of op */ PyGC_Head wrcb_to_call; /* weakrefs with callbacks to call */ PyGC_Head *next; int num_freed = 0; gc_list_init(&wrcb_to_call); - /* Clear all weakrefs to the objects in unreachable. If such a weakref - * also has a callback, move it into `wrcb_to_call` if the callback - * needs to be invoked. Note that we cannot invoke any callbacks until - * all weakrefs to unreachable objects are cleared, lest the callback - * resurrect an unreachable object via a still-active weakref. We - * make another pass over wrcb_to_call, invoking callbacks, after this - * pass completes. + /* Find all weakrefs with callbacks and move into `wrcb_to_call` if the + * callback needs to be invoked. We make another pass over wrcb_to_call, + * invoking callbacks, after this pass completes. */ for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) { PyWeakReference **wrlist; - op = FROM_GC(gc); + PyObject *op = FROM_GC(gc); next = GC_NEXT(gc); - if (PyWeakref_Check(op)) { - /* A weakref inside the unreachable set must be cleared. If we - * allow its callback to execute inside delete_garbage(), it - * could expose objects that have tp_clear already called on - * them. Or, it could resurrect unreachable objects. One way - * this can happen is if some container objects do not implement - * tp_traverse. Then, wr_object can be outside the unreachable - * set but can be deallocated as a result of breaking the - * reference cycle. If we don't clear the weakref, the callback - * will run and potentially cause a crash. See bpo-38006 for - * one example. - */ - _PyWeakref_ClearRef((PyWeakReference *)op); - } - if (! _PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) { continue; } @@ -921,20 +894,13 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) */ wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op); - /* `op` may have some weakrefs. March over the list, clear - * all the weakrefs, and move the weakrefs with callbacks - * that must be called into wrcb_to_call. + /* `op` may have some weakrefs. March over the list and move the + * weakrefs with callbacks that must be called into wrcb_to_call. */ - for (wr = *wrlist; wr != NULL; wr = *wrlist) { - PyGC_Head *wrasgc; /* AS_GC(wr) */ + PyWeakReference *next_wr; + for (PyWeakReference *wr = *wrlist; wr != NULL; wr = next_wr) { + next_wr = wr->wr_next; - /* _PyWeakref_ClearRef clears the weakref but leaves - * the callback pointer intact. Obscure: it also - * changes *wrlist. - */ - _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); - _PyWeakref_ClearRef(wr); - _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); if (wr->wr_callback == NULL) { /* no callback */ continue; @@ -955,10 +921,10 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) * outside the current generation, CT may be reachable from the * callback. Then the callback could resurrect insane objects. * - * Since the callback is never needed and may be unsafe in this case, - * wr is simply left in the unreachable set. Note that because we - * already called _PyWeakref_ClearRef(wr), its callback will never - * trigger. + * Since the callback is never needed and may be unsafe in this + * case, wr is simply left in the unreachable set. Note that + * clear_weakrefs() will ensure its callback will not trigger + * inside delete_garbage(). * * OTOH, if wr isn't part of CT, we should invoke the callback: the * weakref outlived the trash. Note that since wr isn't CT in this @@ -969,8 +935,11 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) * is moved to wrcb_to_call in this case. */ if (gc_is_collecting(AS_GC((PyObject *)wr))) { - /* it should already have been cleared above */ - _PyObject_ASSERT((PyObject*)wr, wr->wr_object == Py_None); + continue; + } + + if (_PyGC_FINALIZED((PyObject *)wr)) { + /* Callback was already run (weakref must have been resurrected). */ continue; } @@ -980,7 +949,7 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) Py_INCREF(wr); /* Move wr to wrcb_to_call, for the next pass. */ - wrasgc = AS_GC((PyObject *)wr); + PyGC_Head *wrasgc = AS_GC((PyObject *)wr); // wrasgc is reachable, but next isn't, so they can't be the same _PyObject_ASSERT((PyObject *)wr, wrasgc != next); gc_list_move(wrasgc, &wrcb_to_call); @@ -996,12 +965,16 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) PyObject *callback; gc = (PyGC_Head*)wrcb_to_call._gc_next; - op = FROM_GC(gc); + PyObject *op = FROM_GC(gc); _PyObject_ASSERT(op, PyWeakref_Check(op)); - wr = (PyWeakReference *)op; + PyWeakReference *wr = (PyWeakReference *)op; callback = wr->wr_callback; _PyObject_ASSERT(op, callback != NULL); + /* Ensure we don't execute the callback again if the weakref is + * resurrected. */ + _PyGC_SET_FINALIZED(op); + /* copy-paste of weakrefobject.c's handle_callback() */ temp = PyObject_CallOneArg(callback, (PyObject *)wr); if (temp == NULL) { @@ -1037,6 +1010,68 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) return num_freed; } +/* Clear all weakrefs to unreachable objects. When this returns, no object in + * `unreachable` is weakly referenced anymore. + */ +static void +clear_weakrefs(PyGC_Head *unreachable) +{ + PyGC_Head *gc; + PyGC_Head *next; + + /* Clear all weakrefs to the objects in unreachable. If such a weakref + * also has a callback, move it into `wrcb_to_call` if the callback + * needs to be invoked. Note that we cannot call `tp_clear` until + * all weakrefs to unreachable objects are cleared, lest finalizers + * resurrect an unreachable object via a still-active weakref. + */ + for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) { + PyWeakReference **wrlist; + + PyObject *op = FROM_GC(gc); + next = GC_NEXT(gc); + + if (PyWeakref_Check(op)) { + /* A weakref inside the unreachable set must be cleared. If we + * allow its callback to execute inside delete_garbage(), it + * could expose objects that have tp_clear already called on + * them. Or, it could resurrect unreachable objects. One way + * this can happen is if some container objects do not implement + * tp_traverse. Then, wr_object can be outside the unreachable + * set but can be deallocated as a result of breaking the + * reference cycle. If we don't clear the weakref, the callback + * will run and potentially cause a crash. See bpo-38006 for + * one example. + */ + _PyWeakref_ClearRef((PyWeakReference *)op); + } + + if (! _PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) { + continue; + } + + /* It supports weakrefs. Does it have any? + * + * This is never triggered for static types so we can avoid the + * (slightly) more costly _PyObject_GET_WEAKREFS_LISTPTR(). + */ + wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op); + + /* `op` may have some weakrefs. March over the list, clear + * all the weakrefs. + */ + for (PyWeakReference *wr = *wrlist; wr != NULL; wr = *wrlist) { + /* _PyWeakref_ClearRef clears the weakref but leaves + * the callback pointer intact. Obscure: it also + * changes *wrlist. + */ + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); + _PyWeakref_ClearRef(wr); + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); + } + } +} + static void debug_cycle(const char *msg, PyObject *op) { @@ -1736,8 +1771,8 @@ gc_collect_region(PyThreadState *tstate, } } - /* Clear weakrefs and invoke callbacks as necessary. */ - stats->collected += handle_weakrefs(&unreachable, to); + /* Invoke weakref callbacks as necessary. */ + stats->collected += handle_weakref_callbacks(&unreachable, to); gc_list_validate_space(to, gcstate->visited_space); validate_list(to, collecting_clear_unreachable_clear); validate_list(&unreachable, collecting_set_unreachable_clear); @@ -1751,6 +1786,22 @@ gc_collect_region(PyThreadState *tstate, gc_list_init(&final_unreachable); handle_resurrected_objects(&unreachable, &final_unreachable, to); + /* Clear weakrefs to objects in the unreachable set. No Python-level + * code must be allowed to access those unreachable objects. During + * delete_garbage(), finalizers outside the unreachable set might run + * and if those weakrefs were not cleared, that could reveal unreachable + * objects. + * + * We used to clear weakrefs earlier, before calling finalizers. That + * causes at least two problems. First, the finalizers could create + * new weakrefs, that refer to unreachable objects. Those would not be + * cleared and could cause the problem described above (see GH-91636 as + * an example). Second, we need the weakrefs in the tp_subclasses to + * *not* be cleared so that caches based on the type version are correctly + * invalidated (see GH-135552 as a bug caused by this). + */ + clear_weakrefs(&final_unreachable); + /* Call tp_clear on objects in the final_unreachable set. This will cause * the reference cycles to be broken. It may also cause some objects * in finalizers to be freed. diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 5aaa68c5b51f95..67ddcf1db1128c 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -1491,8 +1491,53 @@ move_legacy_finalizer_reachable(struct collection_state *state) return 0; } -// Clear all weakrefs to unreachable objects. Weakrefs with callbacks are -// enqueued in `wrcb_to_call`, but not invoked yet. +// Weakrefs with callbacks are enqueued in `wrcb_to_call`, but not invoked +// yet. +static void +find_weakref_callbacks(struct collection_state *state) +{ + PyObject *op; + WORKSTACK_FOR_EACH(&state->unreachable, op) { + if (!_PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) { + continue; + } + + // NOTE: This is never triggered for static types so we can avoid the + // (slightly) more costly _PyObject_GET_WEAKREFS_LISTPTR(). + PyWeakReference **wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op); + + // `op` may have some weakrefs. March over the list, clear + // all the weakrefs, and enqueue the weakrefs with callbacks + // that must be called into wrcb_to_call. + PyWeakReference *next_wr; + for (PyWeakReference *wr = *wrlist; wr != NULL; wr = next_wr) { + next_wr = wr->wr_next; + + // We do not invoke callbacks for weakrefs that are themselves + // unreachable. This is partly for historical reasons: weakrefs + // predate safe object finalization, and a weakref that is itself + // unreachable may have a callback that resurrects other + // unreachable objects. + if (wr->wr_callback == NULL || gc_is_unreachable((PyObject *)wr)) { + continue; + } + + if (_PyGC_FINALIZED((PyObject *)wr)) { + // Callback was already run (weakref must have been resurrected). + continue; + } + + // Create a new reference so that wr can't go away before we can + // process it again. + merge_refcount((PyObject *)wr, 1); + + // Enqueue weakref to be called later. + worklist_push(&state->wrcb_to_call, (PyObject *)wr); + } + } +} + +// Clear all weakrefs to unreachable objects. static void clear_weakrefs(struct collection_state *state) { @@ -1525,22 +1570,6 @@ clear_weakrefs(struct collection_state *state) _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); _PyWeakref_ClearRef(wr); _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); - - // We do not invoke callbacks for weakrefs that are themselves - // unreachable. This is partly for historical reasons: weakrefs - // predate safe object finalization, and a weakref that is itself - // unreachable may have a callback that resurrects other - // unreachable objects. - if (wr->wr_callback == NULL || gc_is_unreachable((PyObject *)wr)) { - continue; - } - - // Create a new reference so that wr can't go away before we can - // process it again. - merge_refcount((PyObject *)wr, 1); - - // Enqueue weakref to be called later. - worklist_push(&state->wrcb_to_call, (PyObject *)wr); } } } @@ -1557,6 +1586,10 @@ call_weakref_callbacks(struct collection_state *state) PyObject *callback = wr->wr_callback; _PyObject_ASSERT(op, callback != NULL); + /* Ensure we don't execute the callback again if the weakref is + * resurrected. */ + _PyGC_SET_FINALIZED(op); + /* copy-paste of weakrefobject.c's handle_callback() */ PyObject *temp = PyObject_CallOneArg(callback, (PyObject *)wr); if (temp == NULL) { @@ -2210,8 +2243,8 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state, // Record the number of live GC objects interp->gc.long_lived_total = state->long_lived_total; - // Clear weakrefs and enqueue callbacks (but do not call them). - clear_weakrefs(state); + // Find weakref callbacks we will honor (but do not call them). + find_weakref_callbacks(state); _PyEval_StartTheWorld(interp); // Deallocate any object from the refcount merge step @@ -2238,6 +2271,21 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state, return; } + // Clear weakrefs to objects in the unreachable set. No Python-level + // code must be allowed to access those unreachable objects. During + // delete_garbage(), finalizers outside the unreachable set might run + // and if those weakrefs were not cleared, that could reveal unreachable + // objects. + // + // We used to clear weakrefs earlier, before calling finalizers. That + // causes at least two problems. First, the finalizers could create + // new weakrefs, that refer to unreachable objects. Those would not be + // cleared and could cause the problem described above (see GH-91636 as + // an example). Second, we need the weakrefs in the tp_subclasses to + // *not* be cleared so that caches based on the type version are correctly + // invalidated (see GH-135552 as a bug caused by this). + clear_weakrefs(state); + // Call tp_clear on objects in the unreachable set. This will cause // the reference cycles to be broken. It may also cause some objects // to be freed. From 17a4f9eff1d65fb20504192de05bb95770aedc56 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Tue, 1 Jul 2025 13:49:15 -0700 Subject: [PATCH 02/12] Remove inaccurate comment. --- Python/gc.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/Python/gc.c b/Python/gc.c index 4cbca252a02cda..74bb3590e89f29 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1019,12 +1019,6 @@ clear_weakrefs(PyGC_Head *unreachable) PyGC_Head *gc; PyGC_Head *next; - /* Clear all weakrefs to the objects in unreachable. If such a weakref - * also has a callback, move it into `wrcb_to_call` if the callback - * needs to be invoked. Note that we cannot call `tp_clear` until - * all weakrefs to unreachable objects are cleared, lest finalizers - * resurrect an unreachable object via a still-active weakref. - */ for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) { PyWeakReference **wrlist; From 12f0b5c6a495c830dede2e1638f0eab0c9750de9 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Tue, 1 Jul 2025 15:05:25 -0700 Subject: [PATCH 03/12] Run clear_weakrefs() with world stopped. --- Python/gc_free_threading.c | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 67ddcf1db1128c..230f214ec288dc 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -2255,11 +2255,28 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state, call_weakref_callbacks(state); finalize_garbage(state); - // Handle any objects that may have resurrected after the finalization. _PyEval_StopTheWorld(interp); + // Handle any objects that may have resurrected after the finalization. err = handle_resurrected_objects(state); // Clear free lists in all threads _PyGC_ClearAllFreeLists(interp); + if (err == 0) { + // Clear weakrefs to objects in the unreachable set. No Python-level + // code must be allowed to access those unreachable objects. During + // delete_garbage(), finalizers outside the unreachable set might + // run and if those weakrefs were not cleared, that could reveal + // unreachable objects. + // + // We used to clear weakrefs earlier, before calling finalizers. + // That causes at least two problems. First, the finalizers could + // create new weakrefs, that refer to unreachable objects. Those + // would not be cleared and could cause the problem described above + // (see GH-91636 as an example). Second, we need the weakrefs in the + // tp_subclasses to *not* be cleared so that caches based on the type + // version are correctly invalidated (see GH-135552 as a bug caused by + // this). + clear_weakrefs(state); + } _PyEval_StartTheWorld(interp); if (err < 0) { @@ -2271,21 +2288,6 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state, return; } - // Clear weakrefs to objects in the unreachable set. No Python-level - // code must be allowed to access those unreachable objects. During - // delete_garbage(), finalizers outside the unreachable set might run - // and if those weakrefs were not cleared, that could reveal unreachable - // objects. - // - // We used to clear weakrefs earlier, before calling finalizers. That - // causes at least two problems. First, the finalizers could create - // new weakrefs, that refer to unreachable objects. Those would not be - // cleared and could cause the problem described above (see GH-91636 as - // an example). Second, we need the weakrefs in the tp_subclasses to - // *not* be cleared so that caches based on the type version are correctly - // invalidated (see GH-135552 as a bug caused by this). - clear_weakrefs(state); - // Call tp_clear on objects in the unreachable set. This will cause // the reference cycles to be broken. It may also cause some objects // to be freed. From 2f3dabab662dabe7d4975eede0369a8e66d85fb9 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 2 Jul 2025 16:09:20 -0700 Subject: [PATCH 04/12] Defer weakref clears only for refs to classes. This avoids breaking tests while fixing the issue with tp_subclasses. In the long term, it would be better to defer the clear of all weakrefs, not just the ones referring to classes. However, that is a more distruptive change and would seem to have a higher chance of breaking user code. So, it would not be something to do in a bugfix release. --- Lib/_threading_local.py | 2 +- Lib/test/test_finalization.py | 11 ++++------- Lib/test/test_gc.py | 14 +++++--------- Lib/test/test_io.py | 22 ++++------------------ Lib/test/test_weakref.py | 2 +- Modules/_datetimemodule.c | 2 +- Modules/gc_weakref.txt | 10 ---------- Python/gc.c | 24 ++++++++++++++++++++++++ Python/gc_free_threading.c | 24 ++++++++++++++++++++++++ 9 files changed, 64 insertions(+), 47 deletions(-) diff --git a/Lib/_threading_local.py b/Lib/_threading_local.py index bd57c3b33efc64..0b9e5d3bbf6ef6 100644 --- a/Lib/_threading_local.py +++ b/Lib/_threading_local.py @@ -49,7 +49,7 @@ def local_deleted(_, key=key): # When the localimpl is deleted, remove the thread attribute. thread = wrthread() if thread is not None: - thread.__dict__.pop(key, None) + del thread.__dict__[key] def thread_deleted(_, idt=idt): # When the thread is deleted, remove the local dict. # Note that this is suboptimal if the thread object gets diff --git a/Lib/test/test_finalization.py b/Lib/test/test_finalization.py index 1d838b9ed9187e..42871f8a09b16b 100644 --- a/Lib/test/test_finalization.py +++ b/Lib/test/test_finalization.py @@ -280,9 +280,8 @@ def test_simple_resurrect(self): gc.collect() self.assert_del_calls(ids) self.assert_survivors(ids) - # This used to be None because weakrefs were cleared before - # calling finalizers. Now they are cleared after. - self.assertIsNot(wr(), None) + # XXX is this desirable? + self.assertIs(wr(), None) # When trying to destroy the object a second time, __del__ # isn't called anymore (and the object isn't resurrected). self.clear_survivors() @@ -389,10 +388,8 @@ def check_resurrecting_chain(self, classes): gc.collect() self.assert_del_calls(ids) self.assert_survivors(survivor_ids) - for wr in wrs: - # These values used to be None because weakrefs were cleared - # before calling finalizers. Now they are cleared after. - self.assertIsNotNone(wr()) + # XXX desirable? + self.assertEqual([wr() for wr in wrs], [None] * N) self.clear_survivors() gc.collect() self.assert_del_calls(ids) diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 6ec0c6414fd090..85c43055d0dcec 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -658,11 +658,9 @@ def callback(ignored): gc.collect() self.assertEqual(len(ouch), 2) # else the callbacks didn't run for x in ouch: - # In previous versions of Python, the weakrefs are cleared - # before calling finalizers. Now they are cleared after. - # So when the object is resurrected, the weakref is not - # cleared since it is no longer unreachable. - self.assertIsInstance(x, C1055820) + # If the callback resurrected one of these guys, the instance + # would be damaged, with an empty __dict__. + self.assertEqual(x, None) def test_bug21435(self): # This is a poor test - its only virtue is that it happened to @@ -1049,8 +1047,8 @@ class Z: # release references and create trash del a, wr_cycle gc.collect() - # In older versions of Python, the weakref was cleared by the - # gc. Now it is not cleared and so the callback is run. + # if called, it means there is a bug in the GC. The weakref should be + # cleared before Z dies. callback.assert_not_called() gc.enable() @@ -1332,7 +1330,6 @@ def setUp(self): def tearDown(self): gc.disable() - @unittest.skipIf(Py_GIL_DISABLED, "requires GC generations or increments") def test_bug1055820c(self): # Corresponds to temp2c.py in the bug report. This is pretty # elaborate. @@ -1408,7 +1405,6 @@ def callback(ignored): self.assertEqual(x, None) @gc_threshold(1000, 0, 0) - @unittest.skipIf(Py_GIL_DISABLED, "requires GC generations or increments") def test_bug1055820d(self): # Corresponds to temp2d.py in the bug report. This is very much like # test_bug1055820c, but uses a __del__ method instead of a weakref diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 0efe7afe3a5499..0c921ffbc2576a 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -807,12 +807,7 @@ def test_closefd_attr(self): def test_garbage_collection(self): # FileIO objects are collected, and collecting them flushes # all data to disk. - # - # Note that using warnings_helper.check_warnings() will keep the - # file alive due to the `source` argument to warn(). So, use - # catch_warnings() instead. - with warnings.catch_warnings(): - warnings.simplefilter("ignore", ResourceWarning) + with warnings_helper.check_warnings(('', ResourceWarning)): f = self.FileIO(os_helper.TESTFN, "wb") f.write(b"abcxxx") f.f = f @@ -1813,11 +1808,7 @@ def test_garbage_collection(self): # C BufferedReader objects are collected. # The Python version has __del__, so it ends into gc.garbage instead self.addCleanup(os_helper.unlink, os_helper.TESTFN) - # Note that using warnings_helper.check_warnings() will keep the - # file alive due to the `source` argument to warn(). So, use - # catch_warnings() instead. - with warnings.catch_warnings(): - warnings.simplefilter("ignore", ResourceWarning) + with warnings_helper.check_warnings(('', ResourceWarning)): rawio = self.FileIO(os_helper.TESTFN, "w+b") f = self.tp(rawio) f.f = f @@ -2166,11 +2157,7 @@ def test_garbage_collection(self): # all data to disk. # The Python version has __del__, so it ends into gc.garbage instead self.addCleanup(os_helper.unlink, os_helper.TESTFN) - # Note that using warnings_helper.check_warnings() will keep the - # file alive due to the `source` argument to warn(). So, use - # catch_warnings() instead. - with warnings.catch_warnings(): - warnings.simplefilter("ignore", ResourceWarning) + with warnings_helper.check_warnings(('', ResourceWarning)): rawio = self.FileIO(os_helper.TESTFN, "w+b") f = self.tp(rawio) f.write(b"123xxx") @@ -4092,8 +4079,7 @@ def test_garbage_collection(self): # C TextIOWrapper objects are collected, and collecting them flushes # all data to disk. # The Python version has __del__, so it ends in gc.garbage instead. - with warnings.catch_warnings(): - warnings.simplefilter("ignore", ResourceWarning) + with warnings_helper.check_warnings(('', ResourceWarning)): rawio = self.FileIO(os_helper.TESTFN, "wb") b = self.BufferedWriter(rawio) t = self.TextIOWrapper(b, encoding="ascii") diff --git a/Lib/test/test_weakref.py b/Lib/test/test_weakref.py index 2f8679487134ee..4c7c900eb56ae1 100644 --- a/Lib/test/test_weakref.py +++ b/Lib/test/test_weakref.py @@ -1314,7 +1314,7 @@ def check_len_cycles(self, dict_type, cons): n2 = len(dct) # one item may be kept alive inside the iterator self.assertIn(n1, (0, 1)) - self.assertIn(n2, (0, 1)) + self.assertEqual(n2, 0) def test_weak_keyed_len_cycles(self): self.check_len_cycles(weakref.WeakKeyDictionary, lambda k: (k, 1)) diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index 00bd1dcf2ce8df..eb90be81c8d34c 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -218,7 +218,7 @@ clear_current_module(PyInterpreterState *interp, PyObject *expected) if (PyDict_GetItemRef(dict, INTERP_KEY, &ref) < 0) { goto error; } - if (ref != NULL && ref != Py_None) { + if (ref != NULL) { PyObject *current = NULL; int rc = PyWeakref_GetRef(ref, ¤t); /* We only need "current" for pointer comparison. */ diff --git a/Modules/gc_weakref.txt b/Modules/gc_weakref.txt index c3b8cc743ccd21..f53fb99dd6cdcb 100644 --- a/Modules/gc_weakref.txt +++ b/Modules/gc_weakref.txt @@ -1,16 +1,6 @@ Intro ===== -************************************************************************** -Note: this document was written long ago, before PEP 442 (safe object -finalization) was implemented. While that has changed some things, this -document is still mostly accurate. Just note that the rules being discussed -here apply to the unreachable set of objects *after* non-legacy finalizers -have been called. Also, the clearing of weakrefs has been changed to happen -later in the collection (after running finalizers but before tp_clear is -called). -************************************************************************** - The basic rule for dealing with weakref callbacks (and __del__ methods too, for that matter) during cyclic gc: diff --git a/Python/gc.c b/Python/gc.c index 74bb3590e89f29..65355b8352316a 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -901,6 +901,30 @@ handle_weakref_callbacks(PyGC_Head *unreachable, PyGC_Head *old) for (PyWeakReference *wr = *wrlist; wr != NULL; wr = next_wr) { next_wr = wr->wr_next; + // Since Python 2.3, weakrefs to cyclic garbage have been cleared + // *before* calling finalizers. However, since tp_subclasses + // started being necessary to invalidate caches (e.g. by + // PyType_Modified()), that clearing has created a bug. If the + // weakref to the subclass is cleared before a finalizer is + // called, the cache may not be correctly invalidated. That can + // lead to segfaults since the caches can refer to deallocated + // objects. Delaying the clear of weakrefs until *after* + // finalizers have been called fixes that bug. However, that can + // introduce other problems since some finalizer code expects that + // the weakrefs will be cleared first. The "multiprocessing" + // package contains finalizer logic like this, for example. So, + // we have the PyType_Check() test above and only defer the clear + // of types. That solves the issue for tp_subclasses. In a + // future version of Python, we should likely defer the weakref + // clear for all objects, not just types. + if (!PyType_Check(wr->wr_object)) { + // _PyWeakref_ClearRef clears the weakref but leaves the + // callback pointer intact. Obscure: it also changes *wrlist. + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); + _PyWeakref_ClearRef(wr); + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); + } + if (wr->wr_callback == NULL) { /* no callback */ continue; diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 230f214ec288dc..3220f326077bbf 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -1513,6 +1513,30 @@ find_weakref_callbacks(struct collection_state *state) for (PyWeakReference *wr = *wrlist; wr != NULL; wr = next_wr) { next_wr = wr->wr_next; + // Since Python 2.3, weakrefs to cyclic garbage have been cleared + // *before* calling finalizers. However, since tp_subclasses + // started being necessary to invalidate caches (e.g. by + // PyType_Modified()), that clearing has created a bug. If the + // weakref to the subclass is cleared before a finalizer is + // called, the cache may not be correctly invalidated. That can + // lead to segfaults since the caches can refer to deallocated + // objects. Delaying the clear of weakrefs until *after* + // finalizers have been called fixes that bug. However, that can + // introduce other problems since some finalizer code expects that + // the weakrefs will be cleared first. The "multiprocessing" + // package contains finalizer logic like this, for example. So, + // we have the PyType_Check() test above and only defer the clear + // of types. That solves the issue for tp_subclasses. In a + // future version of Python, we should likely defer the weakref + // clear for all objects, not just types. + if (!PyType_Check(wr->wr_object)) { + // _PyWeakref_ClearRef clears the weakref but leaves the + // callback pointer intact. Obscure: it also changes *wrlist. + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); + _PyWeakref_ClearRef(wr); + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); + } + // We do not invoke callbacks for weakrefs that are themselves // unreachable. This is partly for historical reasons: weakrefs // predate safe object finalization, and a weakref that is itself From 123bc251b60de9029e653d6a30e8d917a399b8fa Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 2 Jul 2025 21:51:32 -0700 Subject: [PATCH 05/12] Ensure weakrefs with callbacks are cleared early. We need to clear those before executing the callback. Since this ensures they can't run a second time, we don't need _PyGC_SET_FINALIZED(). Revise comments. --- Python/gc.c | 48 ++++++++++++++++++-------------------- Python/gc_free_threading.c | 48 ++++++++++++++++++-------------------- 2 files changed, 46 insertions(+), 50 deletions(-) diff --git a/Python/gc.c b/Python/gc.c index 65355b8352316a..21a653b40ecec1 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -901,23 +901,30 @@ handle_weakref_callbacks(PyGC_Head *unreachable, PyGC_Head *old) for (PyWeakReference *wr = *wrlist; wr != NULL; wr = next_wr) { next_wr = wr->wr_next; - // Since Python 2.3, weakrefs to cyclic garbage have been cleared - // *before* calling finalizers. However, since tp_subclasses - // started being necessary to invalidate caches (e.g. by - // PyType_Modified()), that clearing has created a bug. If the - // weakref to the subclass is cleared before a finalizer is - // called, the cache may not be correctly invalidated. That can - // lead to segfaults since the caches can refer to deallocated + // Weakrefs with callbacks always need to be cleared before + // executing the callback. Sometimes the callback will call + // the ref object, to check if it's actually a dead reference + // (KeyedRef does this, for example). We want to indicate that it + // is dead, even though it is possible a finalizer might resurrect + // it. Clearing also prevents the callback from being executing + // more than once. + // + // Since Python 2.3, all weakrefs to cyclic garbage have + // been cleared *before* calling finalizers. However, since + // tp_subclasses started being necessary to invalidate caches + // (e.g. by PyType_Modified()), that clearing has created a bug. + // If the weakref to the subclass is cleared before a finalizer + // is called, the cache may not be correctly invalidated. That + // can lead to segfaults since the caches can refer to deallocated // objects. Delaying the clear of weakrefs until *after* - // finalizers have been called fixes that bug. However, that can - // introduce other problems since some finalizer code expects that - // the weakrefs will be cleared first. The "multiprocessing" - // package contains finalizer logic like this, for example. So, - // we have the PyType_Check() test above and only defer the clear - // of types. That solves the issue for tp_subclasses. In a - // future version of Python, we should likely defer the weakref - // clear for all objects, not just types. - if (!PyType_Check(wr->wr_object)) { + // finalizers have been called fixes that bug. However, that + // deferral could introduce other problems if some finalizer + // code expects that the weakrefs will be cleared first. So, we + // have the PyType_Check() test below to only defer the clear of + // weakrefs to types. That solves the issue for tp_subclasses. + // In a future version of Python, we should likely defer the + // weakref clear for all objects, not just types. + if (wr->wr_callback != NULL || !PyType_Check(wr->wr_object)) { // _PyWeakref_ClearRef clears the weakref but leaves the // callback pointer intact. Obscure: it also changes *wrlist. _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); @@ -962,11 +969,6 @@ handle_weakref_callbacks(PyGC_Head *unreachable, PyGC_Head *old) continue; } - if (_PyGC_FINALIZED((PyObject *)wr)) { - /* Callback was already run (weakref must have been resurrected). */ - continue; - } - /* Create a new reference so that wr can't go away * before we can process it again. */ @@ -995,10 +997,6 @@ handle_weakref_callbacks(PyGC_Head *unreachable, PyGC_Head *old) callback = wr->wr_callback; _PyObject_ASSERT(op, callback != NULL); - /* Ensure we don't execute the callback again if the weakref is - * resurrected. */ - _PyGC_SET_FINALIZED(op); - /* copy-paste of weakrefobject.c's handle_callback() */ temp = PyObject_CallOneArg(callback, (PyObject *)wr); if (temp == NULL) { diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 3220f326077bbf..f2ad0d799e06ca 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -1513,23 +1513,30 @@ find_weakref_callbacks(struct collection_state *state) for (PyWeakReference *wr = *wrlist; wr != NULL; wr = next_wr) { next_wr = wr->wr_next; - // Since Python 2.3, weakrefs to cyclic garbage have been cleared - // *before* calling finalizers. However, since tp_subclasses - // started being necessary to invalidate caches (e.g. by - // PyType_Modified()), that clearing has created a bug. If the - // weakref to the subclass is cleared before a finalizer is - // called, the cache may not be correctly invalidated. That can - // lead to segfaults since the caches can refer to deallocated + // Weakrefs with callbacks always need to be cleared before + // executing the callback. Sometimes the callback will call + // the ref object, to check if it's actually a dead reference + // (KeyedRef does this, for example). We want to indicate that it + // is dead, even though it is possible a finalizer might resurrect + // it. Clearing also prevents the callback from being executing + // more than once. + // + // Since Python 2.3, all weakrefs to cyclic garbage have + // been cleared *before* calling finalizers. However, since + // tp_subclasses started being necessary to invalidate caches + // (e.g. by PyType_Modified()), that clearing has created a bug. + // If the weakref to the subclass is cleared before a finalizer + // is called, the cache may not be correctly invalidated. That + // can lead to segfaults since the caches can refer to deallocated // objects. Delaying the clear of weakrefs until *after* - // finalizers have been called fixes that bug. However, that can - // introduce other problems since some finalizer code expects that - // the weakrefs will be cleared first. The "multiprocessing" - // package contains finalizer logic like this, for example. So, - // we have the PyType_Check() test above and only defer the clear - // of types. That solves the issue for tp_subclasses. In a - // future version of Python, we should likely defer the weakref - // clear for all objects, not just types. - if (!PyType_Check(wr->wr_object)) { + // finalizers have been called fixes that bug. However, that + // deferral could introduce other problems if some finalizer + // code expects that the weakrefs will be cleared first. So, we + // have the PyType_Check() test below to only defer the clear of + // weakrefs to types. That solves the issue for tp_subclasses. + // In a future version of Python, we should likely defer the + // weakref clear for all objects, not just types. + if (wr->wr_callback != NULL || !PyType_Check(wr->wr_object)) { // _PyWeakref_ClearRef clears the weakref but leaves the // callback pointer intact. Obscure: it also changes *wrlist. _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); @@ -1546,11 +1553,6 @@ find_weakref_callbacks(struct collection_state *state) continue; } - if (_PyGC_FINALIZED((PyObject *)wr)) { - // Callback was already run (weakref must have been resurrected). - continue; - } - // Create a new reference so that wr can't go away before we can // process it again. merge_refcount((PyObject *)wr, 1); @@ -1610,10 +1612,6 @@ call_weakref_callbacks(struct collection_state *state) PyObject *callback = wr->wr_callback; _PyObject_ASSERT(op, callback != NULL); - /* Ensure we don't execute the callback again if the weakref is - * resurrected. */ - _PyGC_SET_FINALIZED(op); - /* copy-paste of weakrefobject.c's handle_callback() */ PyObject *temp = PyObject_CallOneArg(callback, (PyObject *)wr); if (temp == NULL) { From 496c0b17fa98e673a45f67e60fe443144b35e8f0 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Thu, 3 Jul 2025 06:04:45 -0700 Subject: [PATCH 06/12] Add NEWS. --- ...025-07-03-06-04-42.gh-issue-135552.CbBQof.rst | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-07-03-06-04-42.gh-issue-135552.CbBQof.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-07-03-06-04-42.gh-issue-135552.CbBQof.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-07-03-06-04-42.gh-issue-135552.CbBQof.rst new file mode 100644 index 00000000000000..af1c8cac3e547a --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-07-03-06-04-42.gh-issue-135552.CbBQof.rst @@ -0,0 +1,16 @@ +Fix two bugs related to the interaction of weakrefs and the garbage +collector. The weakrefs in the ``tp_subclasses`` dictionary are needed in +order to correctly invalidate type caches (for example, by calling +``PyType_Modified()``). Clearing weakrefs before calling finalizers causes +the caches to not be correctly invalidated. That can cause crashes since the +caches can refer to invalid objects. This is fixed by deferring the +clearing of weakrefs to classes and without callbacks until after finalizers +are executed. + +The second bug is caused by weakrefs created while running finalizers. Those +weakrefs can be outside the set of unreachable garbage and therefore survive +the ``delete_garbage()`` phase (where ``tp_clear()`` is called on objects). +Those weakrefs can expose to Python-level code objects that have had +``tp_clear()`` called on them. See GH-91636 as an example of this kind of +bug. This is fixes be clearing all weakrefs to unreachable objects after +finalizers have been executed. From 8a553d14a7fe71a303390b18357a54e6088770ae Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Thu, 3 Jul 2025 06:07:03 -0700 Subject: [PATCH 07/12] Add comment about wrlist iteration. This is a bit trickly because the wrlist can be changing as we iterate over it. --- Python/gc.c | 3 +++ Python/gc_free_threading.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/Python/gc.c b/Python/gc.c index 21a653b40ecec1..c8a09e22a910c9 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -899,6 +899,9 @@ handle_weakref_callbacks(PyGC_Head *unreachable, PyGC_Head *old) */ PyWeakReference *next_wr; for (PyWeakReference *wr = *wrlist; wr != NULL; wr = next_wr) { + // Get the next list element to get iterator progress if we omit + // clearing of the weakref (because _PyWeakref_ClearRef changes + // next pointer in the wrlist). next_wr = wr->wr_next; // Weakrefs with callbacks always need to be cleared before diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index f2ad0d799e06ca..0a2d75836a217b 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -1511,6 +1511,9 @@ find_weakref_callbacks(struct collection_state *state) // that must be called into wrcb_to_call. PyWeakReference *next_wr; for (PyWeakReference *wr = *wrlist; wr != NULL; wr = next_wr) { + // Get the next list element to get iterator progress if we omit + // clearing of the weakref (because _PyWeakref_ClearRef changes + // next pointer in the wrlist). next_wr = wr->wr_next; // Weakrefs with callbacks always need to be cleared before From 900022b72f4dc5e49a6ad7367b704763894eff35 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Tue, 8 Jul 2025 12:39:12 -0700 Subject: [PATCH 08/12] Revise NEWS. --- ...-07-03-06-04-42.gh-issue-135552.CbBQof.rst | 23 ++++++------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-07-03-06-04-42.gh-issue-135552.CbBQof.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-07-03-06-04-42.gh-issue-135552.CbBQof.rst index af1c8cac3e547a..ea30a43fc25d41 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2025-07-03-06-04-42.gh-issue-135552.CbBQof.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-07-03-06-04-42.gh-issue-135552.CbBQof.rst @@ -1,16 +1,7 @@ -Fix two bugs related to the interaction of weakrefs and the garbage -collector. The weakrefs in the ``tp_subclasses`` dictionary are needed in -order to correctly invalidate type caches (for example, by calling -``PyType_Modified()``). Clearing weakrefs before calling finalizers causes -the caches to not be correctly invalidated. That can cause crashes since the -caches can refer to invalid objects. This is fixed by deferring the -clearing of weakrefs to classes and without callbacks until after finalizers -are executed. - -The second bug is caused by weakrefs created while running finalizers. Those -weakrefs can be outside the set of unreachable garbage and therefore survive -the ``delete_garbage()`` phase (where ``tp_clear()`` is called on objects). -Those weakrefs can expose to Python-level code objects that have had -``tp_clear()`` called on them. See GH-91636 as an example of this kind of -bug. This is fixes be clearing all weakrefs to unreachable objects after -finalizers have been executed. +Fix a bug caused by the garbage collector clearing weakrefs too early. The +weakrefs in the ``tp_subclasses`` dictionary are needed in order to correctly +invalidate type caches (for example, by calling ``PyType_Modified()``). +Clearing weakrefs before calling finalizers causes the caches to not be +correctly invalidated. That can cause crashes since the caches can refer to +invalid objects. Defer the clearing of weakrefs without callbacks until after +finalizers are executed. From 84bd12347c873f43ae03b076402ef6f253ccadbd Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Tue, 8 Jul 2025 14:25:13 -0700 Subject: [PATCH 09/12] Defer clear for weakrefs without callbacks. This fixes GH-132413 and GH-135552. --- Lib/test/test_finalization.py | 11 +++++++---- Lib/test/test_gc.py | 11 ++++++----- Lib/test/test_io.py | 22 ++++++++++++++++++---- Lib/test/test_weakref.py | 2 +- Modules/_datetimemodule.c | 2 +- Modules/gc_weakref.txt | 10 ++++++++++ Python/gc.c | 10 ++-------- Python/gc_free_threading.c | 10 ++-------- 8 files changed, 47 insertions(+), 31 deletions(-) diff --git a/Lib/test/test_finalization.py b/Lib/test/test_finalization.py index 42871f8a09b16b..1d838b9ed9187e 100644 --- a/Lib/test/test_finalization.py +++ b/Lib/test/test_finalization.py @@ -280,8 +280,9 @@ def test_simple_resurrect(self): gc.collect() self.assert_del_calls(ids) self.assert_survivors(ids) - # XXX is this desirable? - self.assertIs(wr(), None) + # This used to be None because weakrefs were cleared before + # calling finalizers. Now they are cleared after. + self.assertIsNot(wr(), None) # When trying to destroy the object a second time, __del__ # isn't called anymore (and the object isn't resurrected). self.clear_survivors() @@ -388,8 +389,10 @@ def check_resurrecting_chain(self, classes): gc.collect() self.assert_del_calls(ids) self.assert_survivors(survivor_ids) - # XXX desirable? - self.assertEqual([wr() for wr in wrs], [None] * N) + for wr in wrs: + # These values used to be None because weakrefs were cleared + # before calling finalizers. Now they are cleared after. + self.assertIsNotNone(wr()) self.clear_survivors() gc.collect() self.assert_del_calls(ids) diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 85c43055d0dcec..2e8627e9597653 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -658,9 +658,8 @@ def callback(ignored): gc.collect() self.assertEqual(len(ouch), 2) # else the callbacks didn't run for x in ouch: - # If the callback resurrected one of these guys, the instance - # would be damaged, with an empty __dict__. - self.assertEqual(x, None) + # The weakref should be cleared before executing the callback. + self.assertIsNone(x) def test_bug21435(self): # This is a poor test - its only virtue is that it happened to @@ -1047,8 +1046,8 @@ class Z: # release references and create trash del a, wr_cycle gc.collect() - # if called, it means there is a bug in the GC. The weakref should be - # cleared before Z dies. + # In older versions of Python, the weakref was cleared by the + # gc. Now it is not cleared and so the callback is run. callback.assert_not_called() gc.enable() @@ -1330,6 +1329,7 @@ def setUp(self): def tearDown(self): gc.disable() + @unittest.skipIf(Py_GIL_DISABLED, "requires GC generations or increments") def test_bug1055820c(self): # Corresponds to temp2c.py in the bug report. This is pretty # elaborate. @@ -1405,6 +1405,7 @@ def callback(ignored): self.assertEqual(x, None) @gc_threshold(1000, 0, 0) + @unittest.skipIf(Py_GIL_DISABLED, "requires GC generations or increments") def test_bug1055820d(self): # Corresponds to temp2d.py in the bug report. This is very much like # test_bug1055820c, but uses a __del__ method instead of a weakref diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index b487bcabf01ca4..92be2763e5ed1e 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -808,7 +808,12 @@ def test_closefd_attr(self): def test_garbage_collection(self): # FileIO objects are collected, and collecting them flushes # all data to disk. - with warnings_helper.check_warnings(('', ResourceWarning)): + # + # Note that using warnings_helper.check_warnings() will keep the + # file alive due to the `source` argument to warn(). So, use + # catch_warnings() instead. + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ResourceWarning) f = self.FileIO(os_helper.TESTFN, "wb") f.write(b"abcxxx") f.f = f @@ -1809,7 +1814,11 @@ def test_garbage_collection(self): # C BufferedReader objects are collected. # The Python version has __del__, so it ends into gc.garbage instead self.addCleanup(os_helper.unlink, os_helper.TESTFN) - with warnings_helper.check_warnings(('', ResourceWarning)): + # Note that using warnings_helper.check_warnings() will keep the + # file alive due to the `source` argument to warn(). So, use + # catch_warnings() instead. + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ResourceWarning) rawio = self.FileIO(os_helper.TESTFN, "w+b") f = self.tp(rawio) f.f = f @@ -2158,7 +2167,11 @@ def test_garbage_collection(self): # all data to disk. # The Python version has __del__, so it ends into gc.garbage instead self.addCleanup(os_helper.unlink, os_helper.TESTFN) - with warnings_helper.check_warnings(('', ResourceWarning)): + # Note that using warnings_helper.check_warnings() will keep the + # file alive due to the `source` argument to warn(). So, use + # catch_warnings() instead. + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ResourceWarning) rawio = self.FileIO(os_helper.TESTFN, "w+b") f = self.tp(rawio) f.write(b"123xxx") @@ -4080,7 +4093,8 @@ def test_garbage_collection(self): # C TextIOWrapper objects are collected, and collecting them flushes # all data to disk. # The Python version has __del__, so it ends in gc.garbage instead. - with warnings_helper.check_warnings(('', ResourceWarning)): + with warnings.catch_warnings(): + warnings.simplefilter("ignore", ResourceWarning) rawio = self.FileIO(os_helper.TESTFN, "wb") b = self.BufferedWriter(rawio) t = self.TextIOWrapper(b, encoding="ascii") diff --git a/Lib/test/test_weakref.py b/Lib/test/test_weakref.py index 4c7c900eb56ae1..2f8679487134ee 100644 --- a/Lib/test/test_weakref.py +++ b/Lib/test/test_weakref.py @@ -1314,7 +1314,7 @@ def check_len_cycles(self, dict_type, cons): n2 = len(dct) # one item may be kept alive inside the iterator self.assertIn(n1, (0, 1)) - self.assertEqual(n2, 0) + self.assertIn(n2, (0, 1)) def test_weak_keyed_len_cycles(self): self.check_len_cycles(weakref.WeakKeyDictionary, lambda k: (k, 1)) diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index 7a6426593d021f..b9f16f3210dc8d 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -218,7 +218,7 @@ clear_current_module(PyInterpreterState *interp, PyObject *expected) if (PyDict_GetItemRef(dict, INTERP_KEY, &ref) < 0) { goto error; } - if (ref != NULL) { + if (ref != NULL && ref != Py_None) { PyObject *current = NULL; int rc = PyWeakref_GetRef(ref, ¤t); /* We only need "current" for pointer comparison. */ diff --git a/Modules/gc_weakref.txt b/Modules/gc_weakref.txt index f53fb99dd6cdcb..c3b8cc743ccd21 100644 --- a/Modules/gc_weakref.txt +++ b/Modules/gc_weakref.txt @@ -1,6 +1,16 @@ Intro ===== +************************************************************************** +Note: this document was written long ago, before PEP 442 (safe object +finalization) was implemented. While that has changed some things, this +document is still mostly accurate. Just note that the rules being discussed +here apply to the unreachable set of objects *after* non-legacy finalizers +have been called. Also, the clearing of weakrefs has been changed to happen +later in the collection (after running finalizers but before tp_clear is +called). +************************************************************************** + The basic rule for dealing with weakref callbacks (and __del__ methods too, for that matter) during cyclic gc: diff --git a/Python/gc.c b/Python/gc.c index 4471cc3f220adf..09c2da64414847 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -920,14 +920,8 @@ handle_weakref_callbacks(PyGC_Head *unreachable, PyGC_Head *old) // is called, the cache may not be correctly invalidated. That // can lead to segfaults since the caches can refer to deallocated // objects. Delaying the clear of weakrefs until *after* - // finalizers have been called fixes that bug. However, that - // deferral could introduce other problems if some finalizer - // code expects that the weakrefs will be cleared first. So, we - // have the PyType_Check() test below to only defer the clear of - // weakrefs to types. That solves the issue for tp_subclasses. - // In a future version of Python, we should likely defer the - // weakref clear for all objects, not just types. - if (wr->wr_callback != NULL || !PyType_Check(wr->wr_object)) { + // finalizers have been called fixes that bug. + if (wr->wr_callback != NULL) { // _PyWeakref_ClearRef clears the weakref but leaves the // callback pointer intact. Obscure: it also changes *wrlist. _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 0a2d75836a217b..876bd29e803f47 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -1532,14 +1532,8 @@ find_weakref_callbacks(struct collection_state *state) // is called, the cache may not be correctly invalidated. That // can lead to segfaults since the caches can refer to deallocated // objects. Delaying the clear of weakrefs until *after* - // finalizers have been called fixes that bug. However, that - // deferral could introduce other problems if some finalizer - // code expects that the weakrefs will be cleared first. So, we - // have the PyType_Check() test below to only defer the clear of - // weakrefs to types. That solves the issue for tp_subclasses. - // In a future version of Python, we should likely defer the - // weakref clear for all objects, not just types. - if (wr->wr_callback != NULL || !PyType_Check(wr->wr_object)) { + // finalizers have been called fixes that bug. + if (wr->wr_callback != NULL) { // _PyWeakref_ClearRef clears the weakref but leaves the // callback pointer intact. Obscure: it also changes *wrlist. _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); From bb29ea19438ceb44e0154d04d636b881cfa039a5 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Tue, 8 Jul 2025 14:40:45 -0700 Subject: [PATCH 10/12] Add unit test for GH-132413. --- Lib/test/test_gc.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 2e8627e9597653..4d6ef012f72c39 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -309,6 +309,26 @@ def func(): self.assertRegex(stdout, rb"""\A\s*func=None""") self.assertFalse(stderr) + def test_datetime_weakref_cycle(self): + # https://github.com/python/cpython/issues/132413 + # If the weakref used by the datetime extension gets cleared by the GC (due to being + # in an unreachable cycle) then datetime functions would crash (get_module_state() + # was returning a NULL pointer). This bug is fixed by clearing weakrefs without + # callbacks *after* running finalizers. + code = """if 1: + import _datetime + class C: + def __del__(self): + print('__del__ called') + _datetime.timedelta(days=1) # crash? + + l = [C()] + l.append(l) + """ + rc, stdout, stderr = assert_python_ok("-c", code) + self.assertEqual(rc, 0) + self.assertEqual(stdout.strip(), b'__del__ called') + @refcount_test def test_frame(self): def f(): From 073409b66d91e89f1cbeb945cbe7d5f199c34c38 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 9 Jul 2025 13:42:10 -0700 Subject: [PATCH 11/12] Add additional tests for weakref clearing. --- Lib/test/test_finalization.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Lib/test/test_finalization.py b/Lib/test/test_finalization.py index 1d838b9ed9187e..f8f20d4ebd6bc0 100644 --- a/Lib/test/test_finalization.py +++ b/Lib/test/test_finalization.py @@ -276,6 +276,7 @@ def test_simple_resurrect(self): s = SelfCycleResurrector() ids = [id(s)] wr = weakref.ref(s) + wrc = weakref.ref(s, lambda x: None) del s gc.collect() self.assert_del_calls(ids) @@ -283,6 +284,9 @@ def test_simple_resurrect(self): # This used to be None because weakrefs were cleared before # calling finalizers. Now they are cleared after. self.assertIsNot(wr(), None) + # A weakref with a callback is still cleared before calling + # finalizers. + self.assertIsNone(wrc()) # When trying to destroy the object a second time, __del__ # isn't called anymore (and the object isn't resurrected). self.clear_survivors() @@ -379,12 +383,15 @@ def check_non_resurrecting_chain(self, classes): def check_resurrecting_chain(self, classes): N = len(classes) + def dummy_callback(ref): + pass with SimpleBase.test(): nodes = self.build_chain(classes) N = len(nodes) ids = [id(s) for s in nodes] survivor_ids = [id(s) for s in nodes if isinstance(s, SimpleResurrector)] wrs = [weakref.ref(s) for s in nodes] + wrcs = [weakref.ref(s, dummy_callback) for s in nodes] del nodes gc.collect() self.assert_del_calls(ids) @@ -393,6 +400,10 @@ def check_resurrecting_chain(self, classes): # These values used to be None because weakrefs were cleared # before calling finalizers. Now they are cleared after. self.assertIsNotNone(wr()) + for wr in wrcs: + # Weakrefs with callbacks are still cleared before calling + # finalizers. + self.assertIsNone(wr()) self.clear_survivors() gc.collect() self.assert_del_calls(ids) From 135223e6c76d5c3e41165de4691cbc5f5fb5834f Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 9 Jul 2025 13:54:27 -0700 Subject: [PATCH 12/12] Revert unneeded changes to unit tests. --- Lib/test/test_gc.py | 4 ++-- Lib/test/test_weakref.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 4d6ef012f72c39..ff60d7c7a8447d 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -1066,8 +1066,8 @@ class Z: # release references and create trash del a, wr_cycle gc.collect() - # In older versions of Python, the weakref was cleared by the - # gc. Now it is not cleared and so the callback is run. + # if called, it means there is a bug in the GC. The weakref should be + # cleared before Z dies. callback.assert_not_called() gc.enable() diff --git a/Lib/test/test_weakref.py b/Lib/test/test_weakref.py index 2f8679487134ee..4c7c900eb56ae1 100644 --- a/Lib/test/test_weakref.py +++ b/Lib/test/test_weakref.py @@ -1314,7 +1314,7 @@ def check_len_cycles(self, dict_type, cons): n2 = len(dct) # one item may be kept alive inside the iterator self.assertIn(n1, (0, 1)) - self.assertIn(n2, (0, 1)) + self.assertEqual(n2, 0) def test_weak_keyed_len_cycles(self): self.check_len_cycles(weakref.WeakKeyDictionary, lambda k: (k, 1)) pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy