Skip to content

Commit f8abfa3

Browse files
gh-103323: Get the "Current" Thread State from a Thread-Local Variable (gh-103324)
We replace _PyRuntime.tstate_current with a thread-local variable. As part of this change, we add a _Py_thread_local macro in pyport.h (only for the core runtime) to smooth out the compiler differences. The main motivation here is in support of a per-interpreter GIL, but this change also provides some performance improvement opportunities. Note that we do not provide a fallback to the thread-local, either falling back to the old tstate_current or to thread-specific storage (PyThread_tss_*()). If that proves problematic then we can circle back. I consider it unlikely, but will run the buildbots to double-check. Also note that this does not change any of the code related to the GILState API, where it uses a thread state stored in thread-specific storage. I suspect we can combine that with _Py_tss_tstate (from here). However, that can be addressed separately and is not urgent (nor critical). (While this change was mostly done independently, I did take some inspiration from earlier (~2020) work by @markshannon (main...markshannon:threadstate_in_tls) and @vstinner (#23976).)
1 parent 7ef614c commit f8abfa3

File tree

5 files changed

+73
-18
lines changed

5 files changed

+73
-18
lines changed

Include/internal/pycore_pystate.h

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -64,27 +64,35 @@ _Py_ThreadCanHandlePendingCalls(void)
6464
/* Variable and macro for in-line access to current thread
6565
and interpreter state */
6666

67-
static inline PyThreadState*
68-
_PyRuntimeState_GetThreadState(_PyRuntimeState *runtime)
69-
{
70-
return (PyThreadState*)_Py_atomic_load_relaxed(&runtime->tstate_current);
71-
}
67+
#if defined(HAVE_THREAD_LOCAL) && !defined(Py_BUILD_CORE_MODULE)
68+
extern _Py_thread_local PyThreadState *_Py_tss_tstate;
69+
#endif
70+
PyAPI_DATA(PyThreadState *) _PyThreadState_GetCurrent(void);
7271

7372
/* Get the current Python thread state.
7473
75-
Efficient macro reading directly the 'tstate_current' atomic
76-
variable. The macro is unsafe: it does not check for error and it can
77-
return NULL.
74+
This function is unsafe: it does not check for error and it can return NULL.
7875
7976
The caller must hold the GIL.
8077
8178
See also PyThreadState_Get() and _PyThreadState_UncheckedGet(). */
8279
static inline PyThreadState*
8380
_PyThreadState_GET(void)
8481
{
85-
return _PyRuntimeState_GetThreadState(&_PyRuntime);
82+
#if defined(HAVE_THREAD_LOCAL) && !defined(Py_BUILD_CORE_MODULE)
83+
return _Py_tss_tstate;
84+
#else
85+
return _PyThreadState_GetCurrent();
86+
#endif
87+
}
88+
89+
static inline PyThreadState*
90+
_PyRuntimeState_GetThreadState(_PyRuntimeState *Py_UNUSED(runtime))
91+
{
92+
return _PyThreadState_GET();
8693
}
8794

95+
8896
static inline void
8997
_Py_EnsureFuncTstateNotNULL(const char *func, PyThreadState *tstate)
9098
{

Include/internal/pycore_runtime.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -119,9 +119,6 @@ typedef struct pyruntimestate {
119119

120120
unsigned long main_thread;
121121

122-
/* Assuming the current thread holds the GIL, this is the
123-
PyThreadState for the current thread. */
124-
_Py_atomic_address tstate_current;
125122
/* Used for the thread state bound to the current thread. */
126123
Py_tss_t autoTSSkey;
127124

Include/pyport.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -662,6 +662,27 @@ extern char * _getpty(int *, int, mode_t, int);
662662
# define WITH_THREAD
663663
#endif
664664

665+
#ifdef WITH_THREAD
666+
# ifdef Py_BUILD_CORE
667+
# ifdef HAVE_THREAD_LOCAL
668+
# error "HAVE_THREAD_LOCAL is already defined"
669+
# endif
670+
# define HAVE_THREAD_LOCAL 1
671+
# ifdef thread_local
672+
# define _Py_thread_local thread_local
673+
# elif __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_THREADS__)
674+
# define _Py_thread_local _Thread_local
675+
# elif defined(_MSC_VER) /* AKA NT_THREADS */
676+
# define _Py_thread_local __declspec(thread)
677+
# elif defined(__GNUC__) /* includes clang */
678+
# define _Py_thread_local __thread
679+
# else
680+
// fall back to the PyThread_tss_*() API, or ignore.
681+
# undef HAVE_THREAD_LOCAL
682+
# endif
683+
# endif
684+
#endif
685+
665686
/* Check that ALT_SOABI is consistent with Py_TRACE_REFS:
666687
./configure --with-trace-refs should must be used to define Py_TRACE_REFS */
667688
#if defined(ALT_SOABI) && defined(Py_TRACE_REFS)
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
We've replaced our use of ``_PyRuntime.tstate_current`` with a thread-local
2+
variable. This is a fairly low-level implementation detail, and there
3+
should be no change in behavior.

Python/pystate.c

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -60,30 +60,56 @@ extern "C" {
6060
For each of these functions, the GIL must be held by the current thread.
6161
*/
6262

63+
64+
#ifdef HAVE_THREAD_LOCAL
65+
_Py_thread_local PyThreadState *_Py_tss_tstate = NULL;
66+
#endif
67+
6368
static inline PyThreadState *
64-
current_fast_get(_PyRuntimeState *runtime)
69+
current_fast_get(_PyRuntimeState *Py_UNUSED(runtime))
6570
{
66-
return (PyThreadState*)_Py_atomic_load_relaxed(&runtime->tstate_current);
71+
#ifdef HAVE_THREAD_LOCAL
72+
return _Py_tss_tstate;
73+
#else
74+
// XXX Fall back to the PyThread_tss_*() API.
75+
# error "no supported thread-local variable storage classifier"
76+
#endif
6777
}
6878

6979
static inline void
70-
current_fast_set(_PyRuntimeState *runtime, PyThreadState *tstate)
80+
current_fast_set(_PyRuntimeState *Py_UNUSED(runtime), PyThreadState *tstate)
7181
{
7282
assert(tstate != NULL);
73-
_Py_atomic_store_relaxed(&runtime->tstate_current, (uintptr_t)tstate);
83+
#ifdef HAVE_THREAD_LOCAL
84+
_Py_tss_tstate = tstate;
85+
#else
86+
// XXX Fall back to the PyThread_tss_*() API.
87+
# error "no supported thread-local variable storage classifier"
88+
#endif
7489
}
7590

7691
static inline void
77-
current_fast_clear(_PyRuntimeState *runtime)
92+
current_fast_clear(_PyRuntimeState *Py_UNUSED(runtime))
7893
{
79-
_Py_atomic_store_relaxed(&runtime->tstate_current, (uintptr_t)NULL);
94+
#ifdef HAVE_THREAD_LOCAL
95+
_Py_tss_tstate = NULL;
96+
#else
97+
// XXX Fall back to the PyThread_tss_*() API.
98+
# error "no supported thread-local variable storage classifier"
99+
#endif
80100
}
81101

82102
#define tstate_verify_not_active(tstate) \
83103
if (tstate == current_fast_get((tstate)->interp->runtime)) { \
84104
_Py_FatalErrorFormat(__func__, "tstate %p is still current", tstate); \
85105
}
86106

107+
PyThreadState *
108+
_PyThreadState_GetCurrent(void)
109+
{
110+
return current_fast_get(&_PyRuntime);
111+
}
112+
87113

88114
//------------------------------------------------
89115
// the thread state bound to the current OS thread

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy