Skip to content

Commit c66b54f

Browse files
authored
[3.14] gh-91048: Fix external inspection multi-threaded performance (GH-136005) (#136080)
(cherry picked from commit 5334732)
1 parent 42e13b8 commit c66b54f

9 files changed

+619
-494
lines changed

Doc/data/python3.14.abi

Lines changed: 393 additions & 390 deletions
Large diffs are not rendered by default.

Include/internal/pycore_global_objects_fini_generated.h

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Include/internal/pycore_global_strings.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -631,6 +631,7 @@ struct _Py_global_strings {
631631
STRUCT_FOR_ID(offset_src)
632632
STRUCT_FOR_ID(on_type_read)
633633
STRUCT_FOR_ID(onceregistry)
634+
STRUCT_FOR_ID(only_active_thread)
634635
STRUCT_FOR_ID(only_keys)
635636
STRUCT_FOR_ID(oparg)
636637
STRUCT_FOR_ID(opcode)

Include/internal/pycore_runtime_init_generated.h

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Include/internal/pycore_unicodeobject_generated.h

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Lib/test/test_external_inspection.py

Lines changed: 121 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import threading
88
from asyncio import staggered, taskgroups, base_events, tasks
99
from unittest.mock import ANY
10-
from test.support import os_helper, SHORT_TIMEOUT, busy_retry
10+
from test.support import os_helper, SHORT_TIMEOUT, busy_retry, requires_gil_enabled
1111
from test.support.script_helper import make_script
1212
from test.support.socket_helper import find_unused_port
1313

@@ -876,6 +876,126 @@ def test_self_trace(self):
876876
],
877877
)
878878

879+
@skip_if_not_supported
880+
@unittest.skipIf(
881+
sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED,
882+
"Test only runs on Linux with process_vm_readv support",
883+
)
884+
@requires_gil_enabled("Free threaded builds don't have an 'active thread'")
885+
def test_only_active_thread(self):
886+
# Test that only_active_thread parameter works correctly
887+
port = find_unused_port()
888+
script = textwrap.dedent(
889+
f"""\
890+
import time, sys, socket, threading
891+
892+
# Connect to the test process
893+
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
894+
sock.connect(('localhost', {port}))
895+
896+
def worker_thread(name, barrier, ready_event):
897+
barrier.wait() # Synchronize thread start
898+
ready_event.wait() # Wait for main thread signal
899+
# Sleep to keep thread alive
900+
time.sleep(10_000)
901+
902+
def main_work():
903+
# Do busy work to hold the GIL
904+
sock.sendall(b"working\\n")
905+
count = 0
906+
while count < 100000000:
907+
count += 1
908+
if count % 10000000 == 0:
909+
pass # Keep main thread busy
910+
sock.sendall(b"done\\n")
911+
912+
# Create synchronization primitives
913+
num_threads = 3
914+
barrier = threading.Barrier(num_threads + 1) # +1 for main thread
915+
ready_event = threading.Event()
916+
917+
# Start worker threads
918+
threads = []
919+
for i in range(num_threads):
920+
t = threading.Thread(target=worker_thread, args=(f"Worker-{{i}}", barrier, ready_event))
921+
t.start()
922+
threads.append(t)
923+
924+
# Wait for all threads to be ready
925+
barrier.wait()
926+
927+
# Signal ready to parent process
928+
sock.sendall(b"ready\\n")
929+
930+
# Signal threads to start waiting
931+
ready_event.set()
932+
933+
# Give threads time to start sleeping
934+
time.sleep(0.1)
935+
936+
# Now do busy work to hold the GIL
937+
main_work()
938+
"""
939+
)
940+
941+
with os_helper.temp_dir() as work_dir:
942+
script_dir = os.path.join(work_dir, "script_pkg")
943+
os.mkdir(script_dir)
944+
945+
# Create a socket server to communicate with the target process
946+
server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
947+
server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
948+
server_socket.bind(("localhost", port))
949+
server_socket.settimeout(SHORT_TIMEOUT)
950+
server_socket.listen(1)
951+
952+
script_name = _make_test_script(script_dir, "script", script)
953+
client_socket = None
954+
try:
955+
p = subprocess.Popen([sys.executable, script_name])
956+
client_socket, _ = server_socket.accept()
957+
server_socket.close()
958+
959+
# Wait for ready signal
960+
response = b""
961+
while b"ready" not in response:
962+
response += client_socket.recv(1024)
963+
964+
# Wait for the main thread to start its busy work
965+
while b"working" not in response:
966+
response += client_socket.recv(1024)
967+
968+
# Get stack trace with all threads
969+
unwinder_all = RemoteUnwinder(p.pid, all_threads=True)
970+
all_traces = unwinder_all.get_stack_trace()
971+
972+
# Get stack trace with only GIL holder
973+
unwinder_gil = RemoteUnwinder(p.pid, only_active_thread=True)
974+
gil_traces = unwinder_gil.get_stack_trace()
975+
976+
except PermissionError:
977+
self.skipTest(
978+
"Insufficient permissions to read the stack trace"
979+
)
980+
finally:
981+
if client_socket is not None:
982+
client_socket.close()
983+
p.kill()
984+
p.terminate()
985+
p.wait(timeout=SHORT_TIMEOUT)
986+
987+
# Verify we got multiple threads in all_traces
988+
self.assertGreater(len(all_traces), 1, "Should have multiple threads")
989+
990+
# Verify we got exactly one thread in gil_traces
991+
self.assertEqual(len(gil_traces), 1, "Should have exactly one GIL holder")
992+
993+
# The GIL holder should be in the all_traces list
994+
gil_thread_id = gil_traces[0][0]
995+
all_thread_ids = [trace[0] for trace in all_traces]
996+
self.assertIn(gil_thread_id, all_thread_ids,
997+
"GIL holder should be among all threads")
998+
879999

8801000
if __name__ == "__main__":
8811001
unittest.main()

Modules/_remote_debugging_module.c

Lines changed: 68 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -64,12 +64,14 @@
6464
#endif
6565

6666
#ifdef Py_GIL_DISABLED
67-
#define INTERP_STATE_MIN_SIZE MAX(MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \
68-
offsetof(PyInterpreterState, tlbc_indices.tlbc_generation) + sizeof(uint32_t)), \
69-
offsetof(PyInterpreterState, threads.head) + sizeof(void*))
67+
#define INTERP_STATE_MIN_SIZE MAX(MAX(MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \
68+
offsetof(PyInterpreterState, tlbc_indices.tlbc_generation) + sizeof(uint32_t)), \
69+
offsetof(PyInterpreterState, threads.head) + sizeof(void*)), \
70+
offsetof(PyInterpreterState, _gil.last_holder) + sizeof(PyThreadState*))
7071
#else
71-
#define INTERP_STATE_MIN_SIZE MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \
72-
offsetof(PyInterpreterState, threads.head) + sizeof(void*))
72+
#define INTERP_STATE_MIN_SIZE MAX(MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \
73+
offsetof(PyInterpreterState, threads.head) + sizeof(void*)), \
74+
offsetof(PyInterpreterState, _gil.last_holder) + sizeof(PyThreadState*))
7375
#endif
7476
#define INTERP_STATE_BUFFER_SIZE MAX(INTERP_STATE_MIN_SIZE, 256)
7577

@@ -206,6 +208,7 @@ typedef struct {
206208
uint64_t code_object_generation;
207209
_Py_hashtable_t *code_object_cache;
208210
int debug;
211+
int only_active_thread;
209212
RemoteDebuggingState *cached_state; // Cached module state
210213
#ifdef Py_GIL_DISABLED
211214
// TLBC cache invalidation tracking
@@ -2496,6 +2499,7 @@ _remote_debugging.RemoteUnwinder.__init__
24962499
pid: int
24972500
*
24982501
all_threads: bool = False
2502+
only_active_thread: bool = False
24992503
debug: bool = False
25002504
25012505
Initialize a new RemoteUnwinder object for debugging a remote Python process.
@@ -2504,6 +2508,8 @@ Initialize a new RemoteUnwinder object for debugging a remote Python process.
25042508
pid: Process ID of the target Python process to debug
25052509
all_threads: If True, initialize state for all threads in the process.
25062510
If False, only initialize for the main thread.
2511+
only_active_thread: If True, only sample the thread holding the GIL.
2512+
Cannot be used together with all_threads=True.
25072513
debug: If True, chain exceptions to explain the sequence of events that
25082514
lead to the exception.
25092515
@@ -2514,15 +2520,33 @@ process, including examining thread states, stack frames and other runtime data.
25142520
PermissionError: If access to the target process is denied
25152521
OSError: If unable to attach to the target process or access its memory
25162522
RuntimeError: If unable to read debug information from the target process
2523+
ValueError: If both all_threads and only_active_thread are True
25172524
[clinic start generated code]*/
25182525

25192526
static int
25202527
_remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self,
25212528
int pid, int all_threads,
2529+
int only_active_thread,
25222530
int debug)
2523-
/*[clinic end generated code: output=3982f2a7eba49334 input=48a762566b828e91]*/
2531+
/*[clinic end generated code: output=13ba77598ecdcbe1 input=8f8f12504e17da04]*/
25242532
{
2533+
// Validate that all_threads and only_active_thread are not both True
2534+
if (all_threads && only_active_thread) {
2535+
PyErr_SetString(PyExc_ValueError,
2536+
"all_threads and only_active_thread cannot both be True");
2537+
return -1;
2538+
}
2539+
2540+
#ifdef Py_GIL_DISABLED
2541+
if (only_active_thread) {
2542+
PyErr_SetString(PyExc_ValueError,
2543+
"only_active_thread is not supported when Py_GIL_DISABLED is not defined");
2544+
return -1;
2545+
}
2546+
#endif
2547+
25252548
self->debug = debug;
2549+
self->only_active_thread = only_active_thread;
25262550
self->cached_state = NULL;
25272551
if (_Py_RemoteDebug_InitProcHandle(&self->handle, pid) < 0) {
25282552
set_exception_cause(self, PyExc_RuntimeError, "Failed to initialize process handle");
@@ -2602,13 +2626,18 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self,
26022626
@critical_section
26032627
_remote_debugging.RemoteUnwinder.get_stack_trace
26042628
2605-
Returns a list of stack traces for all threads in the target process.
2629+
Returns a list of stack traces for threads in the target process.
26062630
26072631
Each element in the returned list is a tuple of (thread_id, frame_list), where:
26082632
- thread_id is the OS thread identifier
26092633
- frame_list is a list of tuples (function_name, filename, line_number) representing
26102634
the Python stack frames for that thread, ordered from most recent to oldest
26112635
2636+
The threads returned depend on the initialization parameters:
2637+
- If only_active_thread was True: returns only the thread holding the GIL
2638+
- If all_threads was True: returns all threads
2639+
- Otherwise: returns only the main thread
2640+
26122641
Example:
26132642
[
26142643
(1234, [
@@ -2632,7 +2661,7 @@ Each element in the returned list is a tuple of (thread_id, frame_list), where:
26322661

26332662
static PyObject *
26342663
_remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self)
2635-
/*[clinic end generated code: output=666192b90c69d567 input=331dbe370578badf]*/
2664+
/*[clinic end generated code: output=666192b90c69d567 input=f756f341206f9116]*/
26362665
{
26372666
PyObject* result = NULL;
26382667
// Read interpreter state into opaque buffer
@@ -2655,6 +2684,28 @@ _remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self
26552684
_Py_hashtable_clear(self->code_object_cache);
26562685
}
26572686

2687+
// If only_active_thread is true, we need to determine which thread holds the GIL
2688+
PyThreadState* gil_holder = NULL;
2689+
if (self->only_active_thread) {
2690+
// The GIL state is already in interp_state_buffer, just read from there
2691+
// Check if GIL is locked
2692+
int gil_locked = GET_MEMBER(int, interp_state_buffer,
2693+
self->debug_offsets.interpreter_state.gil_runtime_state_locked);
2694+
2695+
if (gil_locked) {
2696+
// Get the last holder (current holder when GIL is locked)
2697+
gil_holder = GET_MEMBER(PyThreadState*, interp_state_buffer,
2698+
self->debug_offsets.interpreter_state.gil_runtime_state_holder);
2699+
} else {
2700+
// GIL is not locked, return empty list
2701+
result = PyList_New(0);
2702+
if (!result) {
2703+
set_exception_cause(self, PyExc_MemoryError, "Failed to create empty result list");
2704+
}
2705+
goto exit;
2706+
}
2707+
}
2708+
26582709
#ifdef Py_GIL_DISABLED
26592710
// Check TLBC generation and invalidate cache if needed
26602711
uint32_t current_tlbc_generation = GET_MEMBER(uint32_t, interp_state_buffer,
@@ -2666,7 +2717,10 @@ _remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self
26662717
#endif
26672718

26682719
uintptr_t current_tstate;
2669-
if (self->tstate_addr == 0) {
2720+
if (self->only_active_thread && gil_holder != NULL) {
2721+
// We have the GIL holder, process only that thread
2722+
current_tstate = (uintptr_t)gil_holder;
2723+
} else if (self->tstate_addr == 0) {
26702724
// Get threads head from buffer
26712725
current_tstate = GET_MEMBER(uintptr_t, interp_state_buffer,
26722726
self->debug_offsets.interpreter_state.threads_head);
@@ -2700,10 +2754,14 @@ _remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self
27002754
if (self->tstate_addr) {
27012755
break;
27022756
}
2757+
2758+
// If we're only processing the GIL holder, we're done after one iteration
2759+
if (self->only_active_thread && gil_holder != NULL) {
2760+
break;
2761+
}
27032762
}
27042763

27052764
exit:
2706-
_Py_RemoteDebug_ClearCache(&self->handle);
27072765
return result;
27082766
}
27092767

@@ -2827,11 +2885,9 @@ _remote_debugging_RemoteUnwinder_get_all_awaited_by_impl(RemoteUnwinderObject *s
28272885
goto result_err;
28282886
}
28292887

2830-
_Py_RemoteDebug_ClearCache(&self->handle);
28312888
return result;
28322889

28332890
result_err:
2834-
_Py_RemoteDebug_ClearCache(&self->handle);
28352891
Py_XDECREF(result);
28362892
return NULL;
28372893
}
@@ -2898,11 +2954,9 @@ _remote_debugging_RemoteUnwinder_get_async_stack_trace_impl(RemoteUnwinderObject
28982954
goto cleanup;
28992955
}
29002956

2901-
_Py_RemoteDebug_ClearCache(&self->handle);
29022957
return result;
29032958

29042959
cleanup:
2905-
_Py_RemoteDebug_ClearCache(&self->handle);
29062960
Py_XDECREF(result);
29072961
return NULL;
29082962
}
@@ -2928,7 +2982,6 @@ RemoteUnwinder_dealloc(PyObject *op)
29282982
}
29292983
#endif
29302984
if (self->handle.pid != 0) {
2931-
_Py_RemoteDebug_ClearCache(&self->handle);
29322985
_Py_RemoteDebug_CleanupProcHandle(&self->handle);
29332986
}
29342987
PyObject_Del(self);

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy