Skip to content

Commit 49d7236

Browse files
encukoufthainvstinnerglaubitz
authored
gh-127545: Add _Py_ALIGNED_DEF(N, T) and use it for PyObject (GH-135209)
* Replace _Py_ALIGN_AS(V) by _Py_ALIGNED_DEF(N, T) This is now a common façade for the various `_Alignas` alternatives, which behave in interesting ways -- see the source comment. The new macro (and MSVC's `__declspec(align)`) should not be used on a variable/member declaration that includes a struct declaraton. A workaround is to separate the struct definition. Do that for `PyASCIIObject.state`. * Specify minimum PyGC_Head and PyObject alignment As documented in InternalDocs/garbage_collector.md, the garbage collector stores flags in the least significant two bits of the _gc_prev pointer in struct PyGC_Head. Consequently, this pointer is only capable of storing a location that's aligned to a 4-byte boundary. Encode this requirement using _Py_ALIGNED_DEF. This patch fixes a segfault in m68k, which was previously investigated by Adrian Glaubitz here: https://lists.debian.org/debian-68k/2024/11/msg00020.html https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1087600 Original patch (using the GCC-only Py_ALIGNED) by Finn Thain. Co-authored-by: Finn Thain <fthain@linux-m68k.org> Co-authored-by: Victor Stinner <vstinner@python.org> Co-authored-by: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
1 parent 2b8b477 commit 49d7236

File tree

6 files changed

+122
-106
lines changed

6 files changed

+122
-106
lines changed

Include/Python.h

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -59,14 +59,6 @@
5959
# include <intrin.h> // __readgsqword()
6060
#endif
6161

62-
// Suppress known warnings in Python header files.
63-
#if defined(_MSC_VER)
64-
// Warning that alignas behaviour has changed. Doesn't affect us, because we
65-
// never relied on the old behaviour.
66-
#pragma warning(push)
67-
#pragma warning(disable: 5274)
68-
#endif
69-
7062
// Include Python header files
7163
#include "pyport.h"
7264
#include "pymacro.h"
@@ -146,9 +138,4 @@
146138
#include "cpython/pyfpe.h"
147139
#include "cpython/tracemalloc.h"
148140

149-
// Restore warning filter
150-
#ifdef _MSC_VER
151-
#pragma warning(pop)
152-
#endif
153-
154141
#endif /* !Py_PYTHON_H */

Include/cpython/unicodeobject.h

Lines changed: 59 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,63 @@ static inline Py_UCS4 Py_UNICODE_LOW_SURROGATE(Py_UCS4 ch) {
4747

4848
/* --- Unicode Type ------------------------------------------------------- */
4949

50+
struct _PyUnicodeObject_state {
51+
/* If interned is non-zero, the two references from the
52+
dictionary to this object are *not* counted in ob_refcnt.
53+
The possible values here are:
54+
0: Not Interned
55+
1: Interned
56+
2: Interned and Immortal
57+
3: Interned, Immortal, and Static
58+
This categorization allows the runtime to determine the right
59+
cleanup mechanism at runtime shutdown. */
60+
#ifdef Py_GIL_DISABLED
61+
// Needs to be accessed atomically, so can't be a bit field.
62+
unsigned char interned;
63+
#else
64+
unsigned int interned:2;
65+
#endif
66+
/* Character size:
67+
68+
- PyUnicode_1BYTE_KIND (1):
69+
70+
* character type = Py_UCS1 (8 bits, unsigned)
71+
* all characters are in the range U+0000-U+00FF (latin1)
72+
* if ascii is set, all characters are in the range U+0000-U+007F
73+
(ASCII), otherwise at least one character is in the range
74+
U+0080-U+00FF
75+
76+
- PyUnicode_2BYTE_KIND (2):
77+
78+
* character type = Py_UCS2 (16 bits, unsigned)
79+
* all characters are in the range U+0000-U+FFFF (BMP)
80+
* at least one character is in the range U+0100-U+FFFF
81+
82+
- PyUnicode_4BYTE_KIND (4):
83+
84+
* character type = Py_UCS4 (32 bits, unsigned)
85+
* all characters are in the range U+0000-U+10FFFF
86+
* at least one character is in the range U+10000-U+10FFFF
87+
*/
88+
unsigned int kind:3;
89+
/* Compact is with respect to the allocation scheme. Compact unicode
90+
objects only require one memory block while non-compact objects use
91+
one block for the PyUnicodeObject struct and another for its data
92+
buffer. */
93+
unsigned int compact:1;
94+
/* The string only contains characters in the range U+0000-U+007F (ASCII)
95+
and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
96+
set, use the PyASCIIObject structure. */
97+
unsigned int ascii:1;
98+
/* The object is statically allocated. */
99+
unsigned int statically_allocated:1;
100+
#ifndef Py_GIL_DISABLED
101+
/* Historical: padding to ensure that PyUnicode_DATA() is always aligned to
102+
4 bytes (see issue gh-63736 on m68k) */
103+
unsigned int :24;
104+
#endif
105+
};
106+
50107
/* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
51108
structure. state.ascii and state.compact are set, and the data
52109
immediately follow the structure. utf8_length can be found
@@ -99,67 +156,8 @@ typedef struct {
99156
PyObject_HEAD
100157
Py_ssize_t length; /* Number of code points in the string */
101158
Py_hash_t hash; /* Hash value; -1 if not set */
102-
#ifdef Py_GIL_DISABLED
103-
/* Ensure 4 byte alignment for PyUnicode_DATA(), see gh-63736 on m68k.
104-
In the non-free-threaded build, we'll use explicit padding instead */
105-
_Py_ALIGN_AS(4)
106-
#endif
107-
struct {
108-
/* If interned is non-zero, the two references from the
109-
dictionary to this object are *not* counted in ob_refcnt.
110-
The possible values here are:
111-
0: Not Interned
112-
1: Interned
113-
2: Interned and Immortal
114-
3: Interned, Immortal, and Static
115-
This categorization allows the runtime to determine the right
116-
cleanup mechanism at runtime shutdown. */
117-
#ifdef Py_GIL_DISABLED
118-
// Needs to be accessed atomically, so can't be a bit field.
119-
unsigned char interned;
120-
#else
121-
unsigned int interned:2;
122-
#endif
123-
/* Character size:
124-
125-
- PyUnicode_1BYTE_KIND (1):
126-
127-
* character type = Py_UCS1 (8 bits, unsigned)
128-
* all characters are in the range U+0000-U+00FF (latin1)
129-
* if ascii is set, all characters are in the range U+0000-U+007F
130-
(ASCII), otherwise at least one character is in the range
131-
U+0080-U+00FF
132-
133-
- PyUnicode_2BYTE_KIND (2):
134-
135-
* character type = Py_UCS2 (16 bits, unsigned)
136-
* all characters are in the range U+0000-U+FFFF (BMP)
137-
* at least one character is in the range U+0100-U+FFFF
138-
139-
- PyUnicode_4BYTE_KIND (4):
140-
141-
* character type = Py_UCS4 (32 bits, unsigned)
142-
* all characters are in the range U+0000-U+10FFFF
143-
* at least one character is in the range U+10000-U+10FFFF
144-
*/
145-
unsigned int kind:3;
146-
/* Compact is with respect to the allocation scheme. Compact unicode
147-
objects only require one memory block while non-compact objects use
148-
one block for the PyUnicodeObject struct and another for its data
149-
buffer. */
150-
unsigned int compact:1;
151-
/* The string only contains characters in the range U+0000-U+007F (ASCII)
152-
and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
153-
set, use the PyASCIIObject structure. */
154-
unsigned int ascii:1;
155-
/* The object is statically allocated. */
156-
unsigned int statically_allocated:1;
157-
#ifndef Py_GIL_DISABLED
158-
/* Padding to ensure that PyUnicode_DATA() is always aligned to
159-
4 bytes (see issue gh-63736 on m68k) */
160-
unsigned int :24;
161-
#endif
162-
} state;
159+
/* Ensure 4 byte alignment for PyUnicode_DATA(), see gh-63736 on m68k. */
160+
_Py_ALIGNED_DEF(4, struct _PyUnicodeObject_state) state;
163161
} PyASCIIObject;
164162

165163
/* Non-ASCII strings allocated through PyUnicode_New use the

Include/internal/pycore_interp_structs.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,10 +159,11 @@ struct atexit_state {
159159
typedef struct {
160160
// Tagged pointer to next object in the list.
161161
// 0 means the object is not tracked
162-
uintptr_t _gc_next;
162+
_Py_ALIGNED_DEF(_PyObject_MIN_ALIGNMENT, uintptr_t) _gc_next;
163163

164164
// Tagged pointer to previous object in the list.
165165
// Lowest two bits are used for flags documented later.
166+
// Those bits are made available by the struct's minimum alignment.
166167
uintptr_t _gc_prev;
167168
} PyGC_Head;
168169

Include/object.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,12 @@ whose size is determined when the object is allocated.
101101
#define PyObject_VAR_HEAD PyVarObject ob_base;
102102
#define Py_INVALID_SIZE (Py_ssize_t)-1
103103

104+
/* PyObjects are given a minimum alignment so that the least significant bits
105+
* of an object pointer become available for other purposes.
106+
* This must be an integer literal with the value (1 << _PyGC_PREV_SHIFT), number of bytes.
107+
*/
108+
#define _PyObject_MIN_ALIGNMENT 4
109+
104110
/* Nothing is actually declared to be a PyObject, but every pointer to
105111
* a Python object can be cast to a PyObject*. This is inheritance built
106112
* by hand. Similarly every pointer to a variable-size Python object can,
@@ -136,6 +142,7 @@ struct _object {
136142
#else
137143
Py_ssize_t ob_refcnt;
138144
#endif
145+
_Py_ALIGNED_DEF(_PyObject_MIN_ALIGNMENT, char) _aligner;
139146
};
140147
#ifdef _MSC_VER
141148
__pragma(warning(pop))
@@ -153,7 +160,7 @@ struct _object {
153160
// ob_tid stores the thread id (or zero). It is also used by the GC and the
154161
// trashcan mechanism as a linked list pointer and by the GC to store the
155162
// computed "gc_refs" refcount.
156-
uintptr_t ob_tid;
163+
_Py_ALIGNED_DEF(_PyObject_MIN_ALIGNMENT, uintptr_t) ob_tid;
157164
uint16_t ob_flags;
158165
PyMutex ob_mutex; // per-object lock
159166
uint8_t ob_gc_bits; // gc-related state

Include/pymacro.h

Lines changed: 52 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -24,44 +24,66 @@
2424
#endif
2525

2626

27-
// _Py_ALIGN_AS: this compiler's spelling of `alignas` keyword,
28-
// We currently use alignas for free-threaded builds only; additional compat
29-
// checking would be great before we add it to the default build.
30-
// Standards/compiler support:
27+
// _Py_ALIGNED_DEF(N, T): Define a variable/member with increased alignment
28+
//
29+
// `N`: the desired minimum alignment, an integer literal, number of bytes
30+
// `T`: the type of the defined variable
31+
// (or a type with at least the defined variable's alignment)
32+
//
33+
// May not be used on a struct definition.
34+
//
35+
// Standards/compiler support for `alignas` alternatives:
3136
// - `alignas` is a keyword in C23 and C++11.
3237
// - `_Alignas` is a keyword in C11
3338
// - GCC & clang has __attribute__((aligned))
3439
// (use that for older standards in pedantic mode)
3540
// - MSVC has __declspec(align)
3641
// - `_Alignas` is common C compiler extension
37-
// Older compilers may name it differently; to allow compilation on such
38-
// unsupported platforms, we don't redefine _Py_ALIGN_AS if it's already
42+
// Older compilers may name `alignas` differently; to allow compilation on such
43+
// unsupported platforms, we don't redefine _Py_ALIGNED_DEF if it's already
3944
// defined. Note that defining it wrong (including defining it to nothing) will
4045
// cause ABI incompatibilities.
41-
#ifdef Py_GIL_DISABLED
42-
# ifndef _Py_ALIGN_AS
43-
# ifdef __cplusplus
44-
# if __cplusplus >= 201103L
45-
# define _Py_ALIGN_AS(V) alignas(V)
46-
# elif defined(__GNUC__) || defined(__clang__)
47-
# define _Py_ALIGN_AS(V) __attribute__((aligned(V)))
48-
# elif defined(_MSC_VER)
49-
# define _Py_ALIGN_AS(V) __declspec(align(V))
50-
# else
51-
# define _Py_ALIGN_AS(V) alignas(V)
52-
# endif
53-
# elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
54-
# define _Py_ALIGN_AS(V) alignas(V)
55-
# elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
56-
# define _Py_ALIGN_AS(V) _Alignas(V)
57-
# elif (defined(__GNUC__) || defined(__clang__))
58-
# define _Py_ALIGN_AS(V) __attribute__((aligned(V)))
59-
# elif defined(_MSC_VER)
60-
# define _Py_ALIGN_AS(V) __declspec(align(V))
61-
# else
62-
# define _Py_ALIGN_AS(V) _Alignas(V)
63-
# endif
64-
# endif
46+
//
47+
// Behavior of `alignas` alternatives:
48+
// - `alignas` & `_Alignas`:
49+
// - Can be used multiple times; the greatest alignment applies.
50+
// - It is an *error* if the combined effect of all `alignas` modifiers would
51+
// decrease the alignment.
52+
// - Takes types or numbers.
53+
// - May not be used on a struct definition, unless also defining a variable.
54+
// - `__declspec(align)`:
55+
// - Has no effect if it would decrease alignment.
56+
// - Only takes an integer literal.
57+
// - May be used on struct or variable definitions.
58+
// However, when defining both the struct and the variable at once,
59+
// `declspec(aligned)` causes compiler warning 5274 and possible ABI
60+
// incompatibility.
61+
// - ` __attribute__((aligned))`:
62+
// - Has no effect if it would decrease alignment.
63+
// - Takes types or numbers
64+
// - May be used on struct or variable definitions.
65+
#ifndef _Py_ALIGNED_DEF
66+
# ifdef __cplusplus
67+
# if __cplusplus >= 201103L
68+
# define _Py_ALIGNED_DEF(N, T) alignas(N) alignas(T) T
69+
# elif defined(__GNUC__) || defined(__clang__)
70+
# define _Py_ALIGNED_DEF(N, T) __attribute__((aligned(N))) T
71+
# elif defined(_MSC_VER)
72+
# define _Py_ALIGNED_DEF(N, T) __declspec(align(N)) T
73+
# else
74+
# define _Py_ALIGNED_DEF(N, T) alignas(N) alignas(T) T
75+
# endif
76+
# elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
77+
# define _Py_ALIGNED_DEF(N, T) alignas(N) alignas(T) T
78+
# elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
79+
# define _Py_ALIGNED_DEF(N, T) _Alignas(N) _Alignas(T) T
80+
# elif (defined(__GNUC__) || defined(__clang__))
81+
# define _Py_ALIGNED_DEF(N, T) __attribute__((aligned(N))) T
82+
# elif defined(_MSC_VER)
83+
# define _Py_ALIGNED_DEF(N, T) __declspec(align(N)) T
84+
# else
85+
# define _Py_ALIGNED_DEF(N, T) _Alignas(N) _Alignas(T) T
86+
# endif
6587
#endif
6688

6789
/* Minimum value between x and y */
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix crash when building on Linux/m68k.

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy