From 9a6ccb54c1052aa7fd2cd8d486e703c8da848d9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 5 Oct 2024 20:15:28 +0200 Subject: [PATCH 01/78] support simple SIMD detection --- Include/internal/pycore_cpuinfo.h | 26 ++ Makefile.pre.in | 2 + PCbuild/pythoncore.vcxproj | 2 + PCbuild/pythoncore.vcxproj.filters | 6 + Python/cpuinfo.c | 110 ++++++++ configure | 395 +++++++++++++++++++++++++++++ configure.ac | 30 +++ pyconfig.h.in | 24 ++ 8 files changed, 595 insertions(+) create mode 100644 Include/internal/pycore_cpuinfo.h create mode 100644 Python/cpuinfo.c diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h new file mode 100644 index 00000000000000..d4e9428dfb49dd --- /dev/null +++ b/Include/internal/pycore_cpuinfo.h @@ -0,0 +1,26 @@ +#ifndef Py_INTERNAL_CPUINFO_H +#define Py_INTERNAL_CPUINFO_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include + +typedef struct { + bool sse, sse2, sse3, sse41, sse42, avx, avx2, avx512vbmi; + bool done; +} cpu_simd_flags; + +extern void +detect_cpu_simd_features(cpu_simd_flags *flags); + +#ifdef __cplusplus +} +#endif + +#endif /* !Py_INTERNAL_CPUINFO_H */ diff --git a/Makefile.pre.in b/Makefile.pre.in index 07c8a4d20142db..f3640921a501b6 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -434,6 +434,7 @@ PYTHON_OBJS= \ Python/context.o \ Python/critical_section.o \ Python/crossinterp.o \ + Python/cpuinfo.o \ Python/dynamic_annotations.o \ Python/errors.o \ Python/flowgraph.o \ @@ -1191,6 +1192,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_complexobject.h \ $(srcdir)/Include/internal/pycore_condvar.h \ $(srcdir)/Include/internal/pycore_context.h \ + $(srcdir)/Include/internal/pycore_cpuinfo.h \ $(srcdir)/Include/internal/pycore_critical_section.h \ $(srcdir)/Include/internal/pycore_crossinterp.h \ $(srcdir)/Include/internal/pycore_descrobject.h \ diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 3b33c6bf6bb91d..989c82e396128c 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -225,6 +225,7 @@ + @@ -584,6 +585,7 @@ + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index ee2930b10439a9..d60294818c8fb8 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -597,6 +597,9 @@ Include\internal + + Include\cpython + Include\internal @@ -1304,6 +1307,9 @@ Python + + Source Files + Python diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c new file mode 100644 index 00000000000000..2eaafe1380b418 --- /dev/null +++ b/Python/cpuinfo.c @@ -0,0 +1,110 @@ +/* + * Naive CPU SIMD features detection. + * + * See Modules/black2module.c. + */ + +#include "Python.h" +#include "pycore_cpuinfo.h" + +#include + +#if defined(__x86_64__) && defined(__GNUC__) +#include +#elif defined(_M_X64) +#include +#endif + +// AVX2 cannot be compiled on macOS ARM64 (yet it can be compiled on x86_64). +// However, since autoconf incorrectly assumes so when compiling a universal2 +// binary, we disable all AVX-related instructions. +#if defined(__APPLE__) && defined(__arm64__) +# undef CAN_COMPILE_SIMD_AVX_INSTRUCTIONS +# undef CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS +# undef CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS +#endif + +#define EDX1_SSE (1 << 25) // sse, EDX, page 1, bit 25 +#define EDX1_SSE2 (1 << 26) // sse2, EDX, page 1, bit 26 +#define ECX1_SSE3 (1 << 9) // sse3, ECX, page 1, bit 0 +#define ECX1_SSE4_1 (1 << 19) // sse4.1, ECX, page 1, bit 19 +#define ECX1_SSE4_2 (1 << 20) // sse4.2, ECX, page 1, bit 20 +#define ECX1_AVX (1 << 28) // avx, ECX, page 1, bit 28 +#define EBX7_AVX2 (1 << 5) // avx2, EBX, page 7, bit 5 +#define ECX7_AVX512_VBMI (1 << 1) // avx512-vbmi, ECX, page 7, bit 1 + +void +detect_cpu_simd_features(cpu_simd_flags *flags) +{ + if (flags->done) { + return; + } + + int eax1 = 0, ebx1 = 0, ecx1 = 0, edx1 = 0; + int eax7 = 0, ebx7 = 0, ecx7 = 0, edx7 = 0; +#if defined(__x86_64__) && defined(__GNUC__) + __cpuid_count(1, 0, eax1, ebx1, ecx1, edx1); + __cpuid_count(7, 0, eax7, ebx7, ecx7, edx7); +#elif defined(_M_X64) + int info1[4] = {0}; + __cpuidex(info1, 1, 0); + eax1 = info1[0]; + ebx1 = info1[1]; + ecx1 = info1[2]; + edx1 = info1[3]; + + int info7[4] = {0}; + __cpuidex(info7, 7, 0); + eax7 = info7[0]; + ebx7 = info7[1]; + ecx7 = info7[2]; + edx7 = info7[3]; +#else + // use (void) expressions to avoid warnings + (void) eax1; (void) ebx1; (void) ecx1; (void) edx1; + (void) eax7; (void) ebx7; (void) ecx7; (void) edx7; +#endif + +#ifdef CAN_COMPILE_SIMD_SSE_INSTRUCTIONS + flags->sse = (edx1 & EDX1_SSE) != 0; +#else + flags->sse = false; +#endif +#ifdef CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS + flags->sse2 = (edx1 & EDX1_SSE2) != 0; +#else + flags->sse2 = false; +#endif +#ifdef CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS + flags->sse3 = (ecx1 & ECX1_SSE3) != 0; + #else +#endif + flags->sse3 = false; +#ifdef CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS + flags->sse41 = (ecx1 & ECX1_SSE4_1) != 0; +#else + flags->sse41 = false; +#endif +#ifdef CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS + flags->sse42 = (ecx1 & ECX1_SSE4_2) != 0; +#else + flags->sse42 = false; +#endif +#ifdef CAN_COMPILE_SIMD_AVX_INSTRUCTIONS + flags->avx = (ecx1 & ECX1_AVX) != 0; +#else + flags->avx = false; +#endif +#ifdef CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS + flags->avx2 = (ebx7 & EBX7_AVX2) != 0; +#else + flags->avx2 = false; +#endif +#ifdef CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS + flags->avx512vbmi = (ecx7 & ECX7_AVX512_VBMI) != 0; +#else + flags->avx512vbmi = false; +#endif + + flags->done = true; +} diff --git a/configure b/configure index 0cc73e4e66552d..8899ad2eb4f5de 100755 --- a/configure +++ b/configure @@ -30617,6 +30617,401 @@ fi printf "%s\n" "$py_cv_module__blake2" >&6; } + + +# Detection of suported SIMD instruction sets for CPython. Since +# we do not necessarily know which instruction sets will be used, +# we disable SIMD support on some older Android platforms. +# +# Detection for more instruction sets can be added. By default, we detect +# SSE-based instruction sets, AVX/AVX2 and AVX512 VBMI. +if test "$ac_sys_system" != "Linux-android" || test "$ANDROID_API_LEVEL" -ge 28; then + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse" >&5 +printf %s "checking whether C compiler accepts -msse... " >&6; } +if test ${ax_cv_check_cflags___msse+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -msse" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___msse=yes +else $as_nop + ax_cv_check_cflags___msse=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___msse" >&5 +printf "%s\n" "$ax_cv_check_cflags___msse" >&6; } +if test "x$ax_cv_check_cflags___msse" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_SSE_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse2" >&5 +printf %s "checking whether C compiler accepts -msse2... " >&6; } +if test ${ax_cv_check_cflags___msse2+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -msse2" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___msse2=yes +else $as_nop + ax_cv_check_cflags___msse2=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___msse2" >&5 +printf "%s\n" "$ax_cv_check_cflags___msse2" >&6; } +if test "x$ax_cv_check_cflags___msse2" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse3" >&5 +printf %s "checking whether C compiler accepts -msse3... " >&6; } +if test ${ax_cv_check_cflags___msse3+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -msse3" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___msse3=yes +else $as_nop + ax_cv_check_cflags___msse3=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___msse3" >&5 +printf "%s\n" "$ax_cv_check_cflags___msse3" >&6; } +if test "x$ax_cv_check_cflags___msse3" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse4.2" >&5 +printf %s "checking whether C compiler accepts -msse4.2... " >&6; } +if test ${ax_cv_check_cflags___msse4_2+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -msse4.2" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___msse4_2=yes +else $as_nop + ax_cv_check_cflags___msse4_2=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___msse4_2" >&5 +printf "%s\n" "$ax_cv_check_cflags___msse4_2" >&6; } +if test "x$ax_cv_check_cflags___msse4_2" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse4.2" >&5 +printf %s "checking whether C compiler accepts -msse4.2... " >&6; } +if test ${ax_cv_check_cflags___msse4_2+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -msse4.2" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___msse4_2=yes +else $as_nop + ax_cv_check_cflags___msse4_2=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___msse4_2" >&5 +printf "%s\n" "$ax_cv_check_cflags___msse4_2" >&6; } +if test "x$ax_cv_check_cflags___msse4_2" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx" >&5 +printf %s "checking whether C compiler accepts -mavx... " >&6; } +if test ${ax_cv_check_cflags___mavx+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx=yes +else $as_nop + ax_cv_check_cflags___mavx=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx" >&6; } +if test "x$ax_cv_check_cflags___mavx" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx2" >&5 +printf %s "checking whether C compiler accepts -mavx2... " >&6; } +if test ${ax_cv_check_cflags___mavx2+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx2" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx2=yes +else $as_nop + ax_cv_check_cflags___mavx2=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx2" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx2" >&6; } +if test "x$ax_cv_check_cflags___mavx2" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vbmi" >&5 +printf %s "checking whether C compiler accepts -mavx512vbmi... " >&6; } +if test ${ax_cv_check_cflags___mavx512vbmi+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512vbmi" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512vbmi=yes +else $as_nop + ax_cv_check_cflags___mavx512vbmi=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vbmi" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512vbmi" >&6; } +if test "x$ax_cv_check_cflags___mavx512vbmi" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + +fi + LIBHACL_CFLAGS='-I$(srcdir)/Modules/_hacl -I$(srcdir)/Modules/_hacl/include -D_BSD_SOURCE -D_DEFAULT_SOURCE $(PY_STDMODULE_CFLAGS) $(CCSHARED)' case "$ac_sys_system" in Linux*) diff --git a/configure.ac b/configure.ac index 1864e94ace9243..dae39e0e5d1edb 100644 --- a/configure.ac +++ b/configure.ac @@ -7789,6 +7789,36 @@ PY_STDLIB_MOD([_sha2], [test "$with_builtin_sha2" = yes]) PY_STDLIB_MOD([_sha3], [test "$with_builtin_sha3" = yes]) PY_STDLIB_MOD([_blake2], [test "$with_builtin_blake2" = yes]) +dnl PY_SIMD_DETECT(INSTRUCTION_SET_NAME, COMPILER_FLAG, DEFINE_CONSTANT_SUFFIX) +AC_DEFUN([PY_SIMD_DETECT], [ + AS_VAR_PUSHDEF([py_var], [[ac_cv_simd_]m4_tolower($1)]) + AS_VAR_PUSHDEF([py_define], [m4_ifblank([$3], + [[CAN_COMPILE_SIMD_]m4_toupper($1)[_INSTRUCTIONS]], [$3])]) + AC_MSG_CHECKING([checking SIMD instruction set]) + AX_CHECK_COMPILE_FLAG([$2], + [AC_DEFINE([py_define], [1], [Define if '$2' is a valid compiler flag.])], + [], []) + AS_VAR_POPDEF([py_var]) + AS_VAR_POPDEF([py_define]) +]) + +# Detection of suported SIMD instruction sets for CPython. Since +# we do not necessarily know which instruction sets will be used, +# we disable SIMD support on some older Android platforms. +# +# Detection for more instruction sets can be added. By default, we detect +# SSE-based instruction sets, AVX/AVX2 and AVX512 VBMI. +if test "$ac_sys_system" != "Linux-android" || test "$ANDROID_API_LEVEL" -ge 28; then + PY_SIMD_DETECT([SSE], [-msse]) + PY_SIMD_DETECT([SSE2], [-msse2]) + PY_SIMD_DETECT([SSE3], [-msse3]) + PY_SIMD_DETECT([SSE4.1], [-msse4.2], [CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS]) + PY_SIMD_DETECT([SSE4.2], [-msse4.2], [CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS]) + PY_SIMD_DETECT([AVX], [-mavx]) + PY_SIMD_DETECT([AVX2], [-mavx2]) + PY_SIMD_DETECT([AVX512_VBMI], [-mavx512vbmi]) +fi + LIBHACL_CFLAGS='-I$(srcdir)/Modules/_hacl -I$(srcdir)/Modules/_hacl/include -D_BSD_SOURCE -D_DEFAULT_SOURCE $(PY_STDMODULE_CFLAGS) $(CCSHARED)' case "$ac_sys_system" in Linux*) diff --git a/pyconfig.h.in b/pyconfig.h.in index 7f02603e26f5d0..123a4cc40936ae 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -32,6 +32,30 @@ /* The Android API level. */ #undef ANDROID_API_LEVEL +/* Define if '-mavx2' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS + +/* Define if '-mavx512vbmi' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS + +/* Define if '-mavx' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX_INSTRUCTIONS + +/* Define if '-msse2' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS + +/* Define if '-msse3' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS + +/* Define if '-msse4.2' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS + +/* Define if '-msse4.2' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS + +/* Define if '-msse' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_SSE_INSTRUCTIONS + /* Define if C doubles are 64-bit IEEE 754 binary format, stored in ARM mixed-endian order (byte order 45670123) */ #undef DOUBLE_IS_ARM_MIXED_ENDIAN_IEEE754 From f4e4f99720166179bc1627830e26c1c3664887d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 5 Oct 2024 21:22:01 +0200 Subject: [PATCH 02/78] add _Py prefix --- Include/internal/pycore_cpuinfo.h | 2 +- Python/cpuinfo.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index d4e9428dfb49dd..1c8a040d664ddf 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -17,7 +17,7 @@ typedef struct { } cpu_simd_flags; extern void -detect_cpu_simd_features(cpu_simd_flags *flags); +_Py_detect_cpu_simd_features(cpu_simd_flags *flags); #ifdef __cplusplus } diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 2eaafe1380b418..aa2361373688be 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -34,7 +34,7 @@ #define ECX7_AVX512_VBMI (1 << 1) // avx512-vbmi, ECX, page 7, bit 1 void -detect_cpu_simd_features(cpu_simd_flags *flags) +_Py_detect_cpu_simd_features(cpu_simd_flags *flags) { if (flags->done) { return; From 5006686633e9dc61ad607f4adf523605c0dcdcd1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 6 Oct 2024 00:25:48 +0200 Subject: [PATCH 03/78] Use `_py` prefix --- Include/internal/pycore_cpuinfo.h | 4 ++-- Python/cpuinfo.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index 1c8a040d664ddf..27b4bc0fad8638 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -14,10 +14,10 @@ extern "C" { typedef struct { bool sse, sse2, sse3, sse41, sse42, avx, avx2, avx512vbmi; bool done; -} cpu_simd_flags; +} _py_cpu_simd_flags; extern void -_Py_detect_cpu_simd_features(cpu_simd_flags *flags); +_Py_detect_cpu_simd_features(_py_cpu_simd_flags *flags); #ifdef __cplusplus } diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index aa2361373688be..28ad48ab52bd73 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -10,9 +10,9 @@ #include #if defined(__x86_64__) && defined(__GNUC__) -#include +# include #elif defined(_M_X64) -#include +# include #endif // AVX2 cannot be compiled on macOS ARM64 (yet it can be compiled on x86_64). @@ -34,7 +34,7 @@ #define ECX7_AVX512_VBMI (1 << 1) // avx512-vbmi, ECX, page 7, bit 1 void -_Py_detect_cpu_simd_features(cpu_simd_flags *flags) +_Py_detect_cpu_simd_features(_py_cpu_simd_flags *flags) { if (flags->done) { return; From 3c0b4f1c8182416594c68ea70867bd8ad6cdb3d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 6 Oct 2024 11:16:39 +0200 Subject: [PATCH 04/78] make the interface friendlier for future adjustments --- Include/internal/pycore_cpuinfo.h | 22 +++-- Python/cpuinfo.c | 154 +++++++++++++++++++----------- 2 files changed, 115 insertions(+), 61 deletions(-) diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index 27b4bc0fad8638..c6ac446c2fc135 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -9,15 +9,25 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif -#include - typedef struct { - bool sse, sse2, sse3, sse41, sse42, avx, avx2, avx512vbmi; - bool done; -} _py_cpu_simd_flags; + /* Streaming SIMD Extensions */ + uint8_t sse: 1; + uint8_t sse2: 1; + uint8_t sse3: 1; + uint8_t sse41: 1; // SSE4.1 + uint8_t sse42: 1; // SSE4.2 + + /* Advanced Vector Extensions */ + uint8_t avx: 1; + uint8_t avx2: 1; + uint8_t avx512vbmi: 1; // AVX-512 Vector Byte Manipulation Instructions + + uint8_t done; // indicate whether the structure was filled or not +} py_cpu_simd_flags; +/* Detect the available SIMD features on this machine. */ extern void -_Py_detect_cpu_simd_features(_py_cpu_simd_flags *flags); +_Py_detect_cpu_simd_features(py_cpu_simd_flags *flags); #ifdef __cplusplus } diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 28ad48ab52bd73..d1799264642b71 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -1,18 +1,25 @@ /* - * Naive CPU SIMD features detection. + * Python CPU SIMD features detection. * - * See Modules/black2module.c. + * See https://en.wikipedia.org/wiki/CPUID for details. */ #include "Python.h" #include "pycore_cpuinfo.h" -#include +#define CPUID_REG(ARG) ARG +/* + * For simplicity, we only enable SIMD instructions for Intel CPUs, + * even though we could support ARM NEON and POWER. + */ #if defined(__x86_64__) && defined(__GNUC__) # include #elif defined(_M_X64) # include +#else +# undef CPUID_REG +# define CPUID_REG(ARG) Py_UNUSED(ARG) #endif // AVX2 cannot be compiled on macOS ARM64 (yet it can be compiled on x86_64). @@ -24,6 +31,15 @@ # undef CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS #endif +/* + * The macros below describe masks to apply on CPUID output registers. + * + * Each macro is of the form [REGISTER][PAGE]_[FEATURE] where + * + * - REGISTER is either EBX, ECX or EDX, + * - PAGE is either 1 or 7 depending, and + * - FEATURE is an SIMD instruction set. + */ #define EDX1_SSE (1 << 25) // sse, EDX, page 1, bit 25 #define EDX1_SSE2 (1 << 26) // sse2, EDX, page 1, bit 26 #define ECX1_SSE3 (1 << 9) // sse3, ECX, page 1, bit 0 @@ -33,78 +49,106 @@ #define EBX7_AVX2 (1 << 5) // avx2, EBX, page 7, bit 5 #define ECX7_AVX512_VBMI (1 << 1) // avx512-vbmi, ECX, page 7, bit 1 -void -_Py_detect_cpu_simd_features(_py_cpu_simd_flags *flags) -{ - if (flags->done) { - return; - } +#define CHECK_CPUID_REGISTER(REGISTER, MASK) ((REGISTER) & (MASK)) == 0 ? 0 : 1 - int eax1 = 0, ebx1 = 0, ecx1 = 0, edx1 = 0; - int eax7 = 0, ebx7 = 0, ecx7 = 0, edx7 = 0; +/* + * Indicate whether the CPUID input EAX=1 may be needed to + * detect SIMD basic features (e.g., SSE). + */ +#if defined(CAN_COMPILE_SIMD_SSE_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX_INSTRUCTIONS) +# define MAY_DETECT_CPUID_SIMD_FEATURES +#endif + +/* + * Indicate whether the CPUID input EAX=7 may be needed to + * detect SIMD extended features (e.g., AVX2 or AVX-512). + */ +#if defined(CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS) +# define MAY_DETECT_CPUID_SIMD_EXTENDED_FEATURES +#endif + +static inline void +get_cpuid_info(int32_t level /* input eax */, + int32_t count /* input ecx */, + int32_t *CPUID_REG(eax), + int32_t *CPUID_REG(ebx), + int32_t *CPUID_REG(ecx), + int32_t *CPUID_REG(edx)) +{ #if defined(__x86_64__) && defined(__GNUC__) - __cpuid_count(1, 0, eax1, ebx1, ecx1, edx1); - __cpuid_count(7, 0, eax7, ebx7, ecx7, edx7); + __cpuid_count(level, count, *eax, *ebx, *ecx, *edx); #elif defined(_M_X64) - int info1[4] = {0}; - __cpuidex(info1, 1, 0); - eax1 = info1[0]; - ebx1 = info1[1]; - ecx1 = info1[2]; - edx1 = info1[3]; - - int info7[4] = {0}; - __cpuidex(info7, 7, 0); - eax7 = info7[0]; - ebx7 = info7[1]; - ecx7 = info7[2]; - edx7 = info7[3]; -#else - // use (void) expressions to avoid warnings - (void) eax1; (void) ebx1; (void) ecx1; (void) edx1; - (void) eax7; (void) ebx7; (void) ecx7; (void) edx7; + int32_t info[4] = {0}; + __cpuidex(info, level, count); + *eax = info[0]; + *ebx = info[1]; + *ecx = info[2]; + *edx = info[3]; #endif +} +/* Processor Info and Feature Bits (EAX=1, ECX=0). */ +static inline void +detect_cpu_simd_features(py_cpu_simd_flags *flags) +{ + int32_t eax = 0, ebx = 0, ecx = 0, edx = 0; + get_cpuid_info(1, 0, &eax, &ebx, &ecx, &edx); #ifdef CAN_COMPILE_SIMD_SSE_INSTRUCTIONS - flags->sse = (edx1 & EDX1_SSE) != 0; -#else - flags->sse = false; + flags->sse = CHECK_CPUID_REGISTER(edx, EDX1_SSE); #endif #ifdef CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS - flags->sse2 = (edx1 & EDX1_SSE2) != 0; -#else - flags->sse2 = false; + flags->sse2 = CHECK_CPUID_REGISTER(edx, EDX1_SSE2); #endif #ifdef CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS - flags->sse3 = (ecx1 & ECX1_SSE3) != 0; - #else + flags->sse3 = CHECK_CPUID_REGISTER(ecx, ECX1_SSE3); #endif - flags->sse3 = false; #ifdef CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS - flags->sse41 = (ecx1 & ECX1_SSE4_1) != 0; -#else - flags->sse41 = false; + flags->sse41 = CHECK_CPUID_REGISTER(ecx, ECX1_SSE4_1); #endif #ifdef CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS - flags->sse42 = (ecx1 & ECX1_SSE4_2) != 0; -#else - flags->sse42 = false; + flags->sse42 = CHECK_CPUID_REGISTER(ecx, ECX1_SSE4_2); #endif #ifdef CAN_COMPILE_SIMD_AVX_INSTRUCTIONS - flags->avx = (ecx1 & ECX1_AVX) != 0; -#else - flags->avx = false; + flags->avx = CHECK_CPUID_REGISTER(ecx, ECX1_AVX); #endif +} + +/* Extended feature bits (EAX=7, ECX=0). */ +static inline void +detect_cpu_simd_extended_features(py_cpu_simd_flags *flags) +{ + int32_t eax = 0, ebx = 0, ecx = 0, edx = 0; + get_cpuid_info(7, 0, &eax, &ebx, &ecx, &edx); #ifdef CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS - flags->avx2 = (ebx7 & EBX7_AVX2) != 0; -#else - flags->avx2 = false; + flags->avx2 = CHECK_CPUID_REGISTER(ebx, EBX7_AVX2); #endif #ifdef CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS - flags->avx512vbmi = (ecx7 & ECX7_AVX512_VBMI) != 0; -#else - flags->avx512vbmi = false; + flags->avx512vbmi = CHECK_CPUID_REGISTER(ecx, ECX7_AVX512_VBMI); #endif +} - flags->done = true; +void +_Py_detect_cpu_simd_features(py_cpu_simd_flags *flags) +{ + if (flags->done) { + return; + } +#ifdef MAY_DETECT_CPUID_SIMD_FEATURES + detect_cpu_simd_features(flags); +#else + flags->sse = flags->sse2 = flags->sse3 = flags->sse41 = flags->sse42 = 0; + flags->avx = 0; +#endif +#ifdef MAY_DETECT_CPUID_SIMD_EXTENDED_FEATURES + detect_cpu_simd_extended_features(flags); +#else + flags->avx2 = flags->avx512vbmi = 0; +#endif + flags->done = 1; } From 01ed21af7c750dfd4d94549cf90b957bf822a471 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 6 Oct 2024 11:36:50 +0200 Subject: [PATCH 05/78] Allow `cpu_simd_flags` to be merged. --- Include/internal/pycore_cpuinfo.h | 8 ++++++++ Python/cpuinfo.c | 20 +++++++++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index c6ac446c2fc135..418c3e7d3fb107 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -29,6 +29,14 @@ typedef struct { extern void _Py_detect_cpu_simd_features(py_cpu_simd_flags *flags); +/* + * Apply a bitwise-OR on all flags in 'out' using those in 'src', + * unconditionally updating 'out' (i.e. out->done is ignored) and + * setting 'out->done' to 1. + */ +extern void +_Py_extend_cpu_simd_features(py_cpu_simd_flags *out, const py_cpu_simd_flags *src); + #ifdef __cplusplus } #endif diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index d1799264642b71..121ba59380e667 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -119,7 +119,7 @@ detect_cpu_simd_features(py_cpu_simd_flags *flags) #endif } -/* Extended feature bits (EAX=7, ECX=0). */ +/* Extended Feature Bits (EAX=7, ECX=0). */ static inline void detect_cpu_simd_extended_features(py_cpu_simd_flags *flags) { @@ -152,3 +152,21 @@ _Py_detect_cpu_simd_features(py_cpu_simd_flags *flags) #endif flags->done = 1; } + +void +_Py_extend_cpu_simd_features(py_cpu_simd_flags *out, + const py_cpu_simd_flags *src) +{ +#define UPDATE(FLAG) out->FLAG |= src->FLAG + UPDATE(sse); + UPDATE(sse2); + UPDATE(sse3); + UPDATE(sse41); + UPDATE(sse42); + + UPDATE(avx); + UPDATE(avx2); + UPDATE(avx512vbmi); +#undef UPDATE + out->done = 1; +} From 969a619c82d56741168e31ccfeb8c659a60f074f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 6 Oct 2024 11:39:31 +0200 Subject: [PATCH 06/78] update comments --- Python/cpuinfo.c | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 121ba59380e667..2cd98b3a17fbf4 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -7,7 +7,10 @@ #include "Python.h" #include "pycore_cpuinfo.h" -#define CPUID_REG(ARG) ARG +/* Macro to mark a CPUID register function parameter as being used. */ +#define CPUID_REG(PARAM) PARAM +/* Macro to check a CPUID register bit. */ +#define CPUID_CHECK_REG(REGISTER, MASK) ((REGISTER) & (MASK)) == 0 ? 0 : 1 /* * For simplicity, we only enable SIMD instructions for Intel CPUs, @@ -19,7 +22,7 @@ # include #else # undef CPUID_REG -# define CPUID_REG(ARG) Py_UNUSED(ARG) +# define CPUID_REG(PARAM) Py_UNUSED(PARAM) #endif // AVX2 cannot be compiled on macOS ARM64 (yet it can be compiled on x86_64). @@ -38,18 +41,16 @@ * * - REGISTER is either EBX, ECX or EDX, * - PAGE is either 1 or 7 depending, and - * - FEATURE is an SIMD instruction set. + * - FEATURE is a SIMD feature (with one or more specialized instructions). */ -#define EDX1_SSE (1 << 25) // sse, EDX, page 1, bit 25 -#define EDX1_SSE2 (1 << 26) // sse2, EDX, page 1, bit 26 -#define ECX1_SSE3 (1 << 9) // sse3, ECX, page 1, bit 0 -#define ECX1_SSE4_1 (1 << 19) // sse4.1, ECX, page 1, bit 19 -#define ECX1_SSE4_2 (1 << 20) // sse4.2, ECX, page 1, bit 20 -#define ECX1_AVX (1 << 28) // avx, ECX, page 1, bit 28 -#define EBX7_AVX2 (1 << 5) // avx2, EBX, page 7, bit 5 -#define ECX7_AVX512_VBMI (1 << 1) // avx512-vbmi, ECX, page 7, bit 1 - -#define CHECK_CPUID_REGISTER(REGISTER, MASK) ((REGISTER) & (MASK)) == 0 ? 0 : 1 +#define EDX1_SSE (1 << 25) +#define EDX1_SSE2 (1 << 26) +#define ECX1_SSE3 (1 << 9) +#define ECX1_SSE4_1 (1 << 19) +#define ECX1_SSE4_2 (1 << 20) +#define ECX1_AVX (1 << 28) +#define EBX7_AVX2 (1 << 5) +#define ECX7_AVX512_VBMI (1 << 1) /* * Indicate whether the CPUID input EAX=1 may be needed to @@ -100,22 +101,22 @@ detect_cpu_simd_features(py_cpu_simd_flags *flags) int32_t eax = 0, ebx = 0, ecx = 0, edx = 0; get_cpuid_info(1, 0, &eax, &ebx, &ecx, &edx); #ifdef CAN_COMPILE_SIMD_SSE_INSTRUCTIONS - flags->sse = CHECK_CPUID_REGISTER(edx, EDX1_SSE); + flags->sse = CPUID_CHECK_REG(edx, EDX1_SSE); #endif #ifdef CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS - flags->sse2 = CHECK_CPUID_REGISTER(edx, EDX1_SSE2); + flags->sse2 = CPUID_CHECK_REG(edx, EDX1_SSE2); #endif #ifdef CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS - flags->sse3 = CHECK_CPUID_REGISTER(ecx, ECX1_SSE3); + flags->sse3 = CPUID_CHECK_REG(ecx, ECX1_SSE3); #endif #ifdef CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS - flags->sse41 = CHECK_CPUID_REGISTER(ecx, ECX1_SSE4_1); + flags->sse41 = CPUID_CHECK_REG(ecx, ECX1_SSE4_1); #endif #ifdef CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS - flags->sse42 = CHECK_CPUID_REGISTER(ecx, ECX1_SSE4_2); + flags->sse42 = CPUID_CHECK_REG(ecx, ECX1_SSE4_2); #endif #ifdef CAN_COMPILE_SIMD_AVX_INSTRUCTIONS - flags->avx = CHECK_CPUID_REGISTER(ecx, ECX1_AVX); + flags->avx = CPUID_CHECK_REG(ecx, ECX1_AVX); #endif } @@ -126,10 +127,10 @@ detect_cpu_simd_extended_features(py_cpu_simd_flags *flags) int32_t eax = 0, ebx = 0, ecx = 0, edx = 0; get_cpuid_info(7, 0, &eax, &ebx, &ecx, &edx); #ifdef CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS - flags->avx2 = CHECK_CPUID_REGISTER(ebx, EBX7_AVX2); + flags->avx2 = CPUID_CHECK_REG(ebx, EBX7_AVX2); #endif #ifdef CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS - flags->avx512vbmi = CHECK_CPUID_REGISTER(ecx, ECX7_AVX512_VBMI); + flags->avx512vbmi = CPUID_CHECK_REG(ecx, ECX7_AVX512_VBMI); #endif } From 5a5acc202b830470ceda896e7de59cc8d2050766 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 6 Oct 2024 13:58:19 +0200 Subject: [PATCH 07/78] fix typo --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index dae39e0e5d1edb..3867e30ae90414 100644 --- a/configure.ac +++ b/configure.ac @@ -7812,7 +7812,7 @@ if test "$ac_sys_system" != "Linux-android" || test "$ANDROID_API_LEVEL" -ge 28; PY_SIMD_DETECT([SSE], [-msse]) PY_SIMD_DETECT([SSE2], [-msse2]) PY_SIMD_DETECT([SSE3], [-msse3]) - PY_SIMD_DETECT([SSE4.1], [-msse4.2], [CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS]) + PY_SIMD_DETECT([SSE4.1], [-msse4.1], [CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS]) PY_SIMD_DETECT([SSE4.2], [-msse4.2], [CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS]) PY_SIMD_DETECT([AVX], [-mavx]) PY_SIMD_DETECT([AVX2], [-mavx2]) From ac1b1657939edf880e927a3858267d278a83879d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 6 Oct 2024 15:03:34 +0200 Subject: [PATCH 08/78] fix configure script --- configure | 18 +++++++++--------- configure.ac | 13 ++++++++----- pyconfig.h.in | 2 +- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/configure b/configure index 8899ad2eb4f5de..e749010ccfe815 100755 --- a/configure +++ b/configure @@ -30775,15 +30775,15 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse4.2" >&5 -printf %s "checking whether C compiler accepts -msse4.2... " >&6; } -if test ${ax_cv_check_cflags___msse4_2+y} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse4.1" >&5 +printf %s "checking whether C compiler accepts -msse4.1... " >&6; } +if test ${ax_cv_check_cflags___msse4_1+y} then : printf %s "(cached) " >&6 else $as_nop ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -msse4.2" + CFLAGS="$CFLAGS -msse4.1" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -30797,16 +30797,16 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___msse4_2=yes + ax_cv_check_cflags___msse4_1=yes else $as_nop - ax_cv_check_cflags___msse4_2=no + ax_cv_check_cflags___msse4_1=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___msse4_2" >&5 -printf "%s\n" "$ax_cv_check_cflags___msse4_2" >&6; } -if test "x$ax_cv_check_cflags___msse4_2" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___msse4_1" >&5 +printf "%s\n" "$ax_cv_check_cflags___msse4_1" >&6; } +if test "x$ax_cv_check_cflags___msse4_1" = xyes then : printf "%s\n" "#define CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS 1" >>confdefs.h diff --git a/configure.ac b/configure.ac index 3867e30ae90414..707cda71c9c903 100644 --- a/configure.ac +++ b/configure.ac @@ -7789,11 +7789,14 @@ PY_STDLIB_MOD([_sha2], [test "$with_builtin_sha2" = yes]) PY_STDLIB_MOD([_sha3], [test "$with_builtin_sha3" = yes]) PY_STDLIB_MOD([_blake2], [test "$with_builtin_blake2" = yes]) -dnl PY_SIMD_DETECT(INSTRUCTION_SET_NAME, COMPILER_FLAG, DEFINE_CONSTANT_SUFFIX) +dnl PY_SIMD_DETECT(INSTRUCTION_SET_NAME, COMPILER_FLAG, NORMALIZED_NAME) AC_DEFUN([PY_SIMD_DETECT], [ - AS_VAR_PUSHDEF([py_var], [[ac_cv_simd_]m4_tolower($1)]) + AS_VAR_PUSHDEF([py_var], [m4_ifblank([$3], + [[ac_cv_can_compile_simd_]m4_tolower([$1])], + [[ac_cv_can_compile_simd_]m4_tolower([$3])])]) AS_VAR_PUSHDEF([py_define], [m4_ifblank([$3], - [[CAN_COMPILE_SIMD_]m4_toupper($1)[_INSTRUCTIONS]], [$3])]) + [[CAN_COMPILE_SIMD_]m4_toupper([$1])[_INSTRUCTIONS]], + [[CAN_COMPILE_SIMD_]m4_toupper([$3])[_INSTRUCTIONS]])]) AC_MSG_CHECKING([checking SIMD instruction set]) AX_CHECK_COMPILE_FLAG([$2], [AC_DEFINE([py_define], [1], [Define if '$2' is a valid compiler flag.])], @@ -7812,8 +7815,8 @@ if test "$ac_sys_system" != "Linux-android" || test "$ANDROID_API_LEVEL" -ge 28; PY_SIMD_DETECT([SSE], [-msse]) PY_SIMD_DETECT([SSE2], [-msse2]) PY_SIMD_DETECT([SSE3], [-msse3]) - PY_SIMD_DETECT([SSE4.1], [-msse4.1], [CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS]) - PY_SIMD_DETECT([SSE4.2], [-msse4.2], [CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS]) + PY_SIMD_DETECT([SSE4.1], [-msse4.1], [SSE4_1]) + PY_SIMD_DETECT([SSE4.2], [-msse4.2], [SSE4_2]) PY_SIMD_DETECT([AVX], [-mavx]) PY_SIMD_DETECT([AVX2], [-mavx2]) PY_SIMD_DETECT([AVX512_VBMI], [-mavx512vbmi]) diff --git a/pyconfig.h.in b/pyconfig.h.in index 123a4cc40936ae..b5ad1b310f3e5d 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -47,7 +47,7 @@ /* Define if '-msse3' is a valid compiler flag. */ #undef CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS -/* Define if '-msse4.2' is a valid compiler flag. */ +/* Define if '-msse4.1' is a valid compiler flag. */ #undef CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS /* Define if '-msse4.2' is a valid compiler flag. */ From 6f304f2bea99c1a1102bf84dcd582148167acc1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 6 Oct 2024 15:47:41 +0200 Subject: [PATCH 09/78] fix bit detection --- Python/cpuinfo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 2cd98b3a17fbf4..40423d577b4221 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -9,8 +9,8 @@ /* Macro to mark a CPUID register function parameter as being used. */ #define CPUID_REG(PARAM) PARAM -/* Macro to check a CPUID register bit. */ -#define CPUID_CHECK_REG(REGISTER, MASK) ((REGISTER) & (MASK)) == 0 ? 0 : 1 +/* Macro to check one or more CPUID register bits. */ +#define CPUID_CHECK_REG(REG, MASK) ((((REG) & (MASK)) == (MASK)) ? 0 : 1) /* * For simplicity, we only enable SIMD instructions for Intel CPUs, From f3bd0275f8d5c80a68f196aa4fcbeaa9a5eae721 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 7 Oct 2024 15:47:24 +0200 Subject: [PATCH 10/78] Harden detection of AVX instructions. --- Include/internal/pycore_cpuinfo.h | 117 +++- Python/cpuinfo.c | 568 +++++++++++++--- configure | 1046 ++++++++++++++++++++++++++++- configure.ac | 39 +- pyconfig.h.in | 63 ++ 5 files changed, 1722 insertions(+), 111 deletions(-) diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index 418c3e7d3fb107..145da8c9d2d2ae 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -9,33 +9,114 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif -typedef struct { +#include // uint8_t + +/* Macro indicating that the member is a CPUID bit. */ +#define _Py_SIMD_FEAT uint8_t +/* Macro indicating that the member is a XCR0 bit. */ +#define _Py_SIMD_XCR0_BIT uint8_t + +typedef struct py_simd_features { /* Streaming SIMD Extensions */ - uint8_t sse: 1; - uint8_t sse2: 1; - uint8_t sse3: 1; - uint8_t sse41: 1; // SSE4.1 - uint8_t sse42: 1; // SSE4.2 + _Py_SIMD_FEAT sse: 1; + _Py_SIMD_FEAT sse2: 1; + _Py_SIMD_FEAT sse3: 1; + _Py_SIMD_FEAT ssse3: 1; // Supplemental SSE3 instructions + _Py_SIMD_FEAT sse41: 1; // SSE4.1 + _Py_SIMD_FEAT sse42: 1; // SSE4.2 /* Advanced Vector Extensions */ - uint8_t avx: 1; - uint8_t avx2: 1; - uint8_t avx512vbmi: 1; // AVX-512 Vector Byte Manipulation Instructions + _Py_SIMD_FEAT avx: 1; + _Py_SIMD_FEAT avx_ifma: 1; + _Py_SIMD_FEAT avx_ne_convert: 1; - uint8_t done; // indicate whether the structure was filled or not -} py_cpu_simd_flags; + _Py_SIMD_FEAT avx_vnni: 1; + _Py_SIMD_FEAT avx_vnni_int8: 1; + _Py_SIMD_FEAT avx_vnni_int16: 1; -/* Detect the available SIMD features on this machine. */ -extern void -_Py_detect_cpu_simd_features(py_cpu_simd_flags *flags); + /* Advanced Vector Extensions 2. */ + _Py_SIMD_FEAT avx2: 1; + + /* + * AVX-512 instruction set are grouped by the processor generation + * that implements them (see https://en.wikipedia.org/wiki/AVX-512). + * + * We do not include GFNI, VPCLMULQDQ and VAES instructions since + * they are not exactly AVX-512 per se, nor do we include BF16 or + * FP16 since they operate on bfloat16 and binary16 (half-float). + */ + _Py_SIMD_FEAT avx512_f: 1; + _Py_SIMD_FEAT avx512_cd: 1; + + _Py_SIMD_FEAT avx512_er: 1; + _Py_SIMD_FEAT avx512_pf: 1; + + _Py_SIMD_FEAT avx512_4fmaps: 1; + _Py_SIMD_FEAT avx512_4vnniw: 1; + + _Py_SIMD_FEAT avx512_vpopcntdq: 1; + + _Py_SIMD_FEAT avx512_vl: 1; + _Py_SIMD_FEAT avx512_dq: 1; + _Py_SIMD_FEAT avx512_bw: 1; + + _Py_SIMD_FEAT avx512_ifma: 1; + + _Py_SIMD_FEAT avx512_vbmi: 1; + + _Py_SIMD_FEAT avx512_vnni: 1; + + _Py_SIMD_FEAT avx512_vbmi2: 1; + _Py_SIMD_FEAT avx512_bitalg: 1; + + _Py_SIMD_FEAT avx512_vp2intersect: 1; + + _Py_SIMD_FEAT os_xsave: 1; // XSAVE is supported + + /* XCR0 register bits */ + _Py_SIMD_XCR0_BIT xcr0_sse: 1; + + /* + * On some Intel CPUs, it is possible for the CPU to support AVX2 + * instructions even though the underlying OS does not know about + * AVX. In particular, only (SSE) XMM registers will be saved and + * restored on context-switch, but not (AVX) YMM registers. + */ + _Py_SIMD_XCR0_BIT xcr0_avx: 1; + _Py_SIMD_XCR0_BIT xcr0_avx512_opmask: 1; + _Py_SIMD_XCR0_BIT xcr0_avx512_zmm_hi256: 1; + _Py_SIMD_XCR0_BIT xcr0_avx512_hi16_zmm: 1; + + /* + * We want to align the bit-fields correctly so the bitsize of + * 'done' must be chosen so that the sum of all bit fields is + * a multiple of 8. + * + * Whenever a field is added or removed above, update the + * following number (35) and adjust the bitsize of 'done'. + */ + uint8_t done: 5; // set if the structure was filled +} py_simd_features; /* - * Apply a bitwise-OR on all flags in 'out' using those in 'src', - * unconditionally updating 'out' (i.e. out->done is ignored) and - * setting 'out->done' to 1. + * Explicitly initialize all members to zero to guarantee that + * we never have an un-initialized attribute at runtime which + * could lead to an illegal instruction error. */ extern void -_Py_extend_cpu_simd_features(py_cpu_simd_flags *out, const py_cpu_simd_flags *src); +_Py_disable_simd_features(py_simd_features *flags); + +/* +* Apply a bitwise-OR on all flags in 'out' using those in 'src', +* unconditionally updating 'out' (i.e. out->done is ignored) and +* setting 'out->done' to 1. +*/ +extern void +_Py_update_simd_features(py_simd_features *out, const py_simd_features *src); + +/* Detect the available SIMD features on this machine. */ +extern void +_Py_detect_simd_features(py_simd_features *flags); #ifdef __cplusplus } diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 40423d577b4221..5ab068fa4af0e9 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -4,9 +4,18 @@ * See https://en.wikipedia.org/wiki/CPUID for details. */ +/* + * In order to properly maintain this file, the following rules should + * be observed and enforced if possible: + * + * - Defining the SIMD_*_INSTRUCTIONS_DETECTION_GUARD macros should + */ + #include "Python.h" #include "pycore_cpuinfo.h" +#include // UINT32_C() + /* Macro to mark a CPUID register function parameter as being used. */ #define CPUID_REG(PARAM) PARAM /* Macro to check one or more CPUID register bits. */ @@ -17,70 +26,164 @@ * even though we could support ARM NEON and POWER. */ #if defined(__x86_64__) && defined(__GNUC__) -# include +# include // __cpuid_count() #elif defined(_M_X64) -# include +# include // __cpuidex() #else # undef CPUID_REG # define CPUID_REG(PARAM) Py_UNUSED(PARAM) #endif -// AVX2 cannot be compiled on macOS ARM64 (yet it can be compiled on x86_64). -// However, since autoconf incorrectly assumes so when compiling a universal2 -// binary, we disable all AVX-related instructions. -#if defined(__APPLE__) && defined(__arm64__) -# undef CAN_COMPILE_SIMD_AVX_INSTRUCTIONS -# undef CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS -# undef CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS +#if defined(CAN_COMPILE_SIMD_SSE_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS) \ + // macros above should be sorted in an alphabetical order +/* Used to guard any SSE instructions detection code. */ +# define SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD +#endif + +#if defined(CAN_COMPILE_SIMD_AVX_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS) \ + // macros above should be sorted in an alphabetical order +/* Used to guard any AVX instructions detection code. */ +# define SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD +#endif + +#if defined(CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS) \ + // macros above should be sorted in an alphabetical order +/* Used to guard any AVX-2 instructions detection code. */ +# define SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD +#endif + +#if defined(CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS) \ + || defined(CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS) \ + // macros above should be sorted in an alphabetical order +/* Used to guard any AVX-512 instructions detection code. */ +# define SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD +#endif + +// On macOS, checking the XCR0 register is NOT a guaranteed way +// to ensure the usability of AVX-512. As such, we disable the +// entire set of AVX-512 instructions. +// +// See https://stackoverflow.com/a/72523150/9579194. +// +// Additionally, AVX2 cannot be compiled on macOS ARM64 (yet it can be +// compiled on x86_64). However, since autoconf incorrectly assumes so +// when compiling a universal2 binary, we disable AVX for such builds. +#if defined(__APPLE__) +# undef SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD +# if defined(__arm64__) +# undef SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD +# undef SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD +# endif +#endif + +#if defined(SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD) \ + || defined(SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD) +/* Indicate that cpuid should be called once with EAX=1 and ECX=0. */ +# define SHOULD_DETECT_SIMD_FEATURES_L1 +#endif + +#if defined(SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD) \ + || defined(SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD) +/* Indicate that cpuid should be called once with EAX=7 and ECX=0. */ +# define SHOULD_DETECT_SIMD_FEATURES_L7 +# define SHOULD_DETECT_SIMD_FEATURES_L7S0 +#endif + +#if defined(SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD) +/* Indicate that cpuid should be called once with EAX=7 and ECX=1. */ +# define SHOULD_DETECT_SIMD_FEATURES_L7 +# define SHOULD_DETECT_SIMD_FEATURES_L7S1 #endif /* * The macros below describe masks to apply on CPUID output registers. * - * Each macro is of the form [REGISTER][PAGE]_[FEATURE] where + * Each macro is of the form _L[S]_, + * where <> (resp. []) denotes a required (resp. optional) group and: * - * - REGISTER is either EBX, ECX or EDX, - * - PAGE is either 1 or 7 depending, and + * - REGISTER is EAX, EBX, ECX or EDX, + * - LEAF is the initial value of the EAX register (1 or 7), + * - SUBLEAF is the initial value of the ECX register (omitted if 0), and * - FEATURE is a SIMD feature (with one or more specialized instructions). + * + * For maintainability, the flags are ordered by registers, leafs, subleafs, + * and bits. See https://en.wikipedia.org/wiki/CPUID for the values. + * + * Note 1: The LEAF is also called the 'page' or the 'level'. + * Note 2: The SUBLEAF is also referred to as the 'count'. */ -#define EDX1_SSE (1 << 25) -#define EDX1_SSE2 (1 << 26) -#define ECX1_SSE3 (1 << 9) -#define ECX1_SSE4_1 (1 << 19) -#define ECX1_SSE4_2 (1 << 20) -#define ECX1_AVX (1 << 28) -#define EBX7_AVX2 (1 << 5) -#define ECX7_AVX512_VBMI (1 << 1) -/* - * Indicate whether the CPUID input EAX=1 may be needed to - * detect SIMD basic features (e.g., SSE). - */ -#if defined(CAN_COMPILE_SIMD_SSE_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX_INSTRUCTIONS) -# define MAY_DETECT_CPUID_SIMD_FEATURES -#endif +/* CPUID (LEAF=1, SUBLEAF=0) */ +#define ECX_L1_SSE3 (UINT32_C(1) << 0) +#define ECX_L1_SSSE3 (UINT32_C(1) << 9) +#define ECX_L1_SSE4_1 (UINT32_C(1) << 19) +#define ECX_L1_SSE4_2 (UINT32_C(1) << 20) +#define ECX_L1_OSXSAVE (UINT32_C(1) << 27) +#define ECX_L1_AVX (UINT32_C(1) << 28) -/* - * Indicate whether the CPUID input EAX=7 may be needed to - * detect SIMD extended features (e.g., AVX2 or AVX-512). - */ -#if defined(CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS) -# define MAY_DETECT_CPUID_SIMD_EXTENDED_FEATURES -#endif +#define EDX_L1_SSE (UINT32_C(1) << 25) +#define EDX_L1_SSE2 (UINT32_C(1) << 26) + +/* CPUID (LEAF=7, SUBLEAF=0) */ +#define EBX_L7_AVX2 (UINT32_C(1) << 5) +#define EBX_L7_AVX512_F (UINT32_C(1) << 16) +#define EBX_L7_AVX512_DQ (UINT32_C(1) << 17) +#define EBX_L7_AVX512_IFMA (UINT32_C(1) << 21) +#define EBX_L7_AVX512_PF (UINT32_C(1) << 26) +#define EBX_L7_AVX512_ER (UINT32_C(1) << 27) +#define EBX_L7_AVX512_CD (UINT32_C(1) << 28) +#define EBX_L7_AVX512_BW (UINT32_C(1) << 30) +#define EBX_L7_AVX512_VL (UINT32_C(1) << 31) + +#define ECX_L7_AVX512_VBMI (UINT32_C(1) << 1) +#define ECX_L7_AVX512_VBMI2 (UINT32_C(1) << 6) +#define ECX_L7_AVX512_VNNI (UINT32_C(1) << 11) +#define ECX_L7_AVX512_BITALG (UINT32_C(1) << 12) +#define ECX_L7_AVX512_VPOPCNTDQ (UINT32_C(1) << 14) + +#define EDX_L7_AVX512_4VNNIW (UINT32_C(1) << 2) +#define EDX_L7_AVX512_4FMAPS (UINT32_C(1) << 3) +#define EDX_L7_AVX512_VP2INTERSECT (UINT32_C(1) << 8) + +/* CPUID (LEAF=7, SUBLEAF=1) */ +#define EAX_L7S1_AVX_VNNI (UINT32_C(1) << 4) +#define EAX_L7S1_AVX_IFMA (UINT32_C(1) << 23) + +#define EDX_L7S1_AVX_VNNI_INT8 (UINT32_C(1) << 4) +#define EDX_L7S1_AVX_NE_CONVERT (UINT32_C(1) << 5) +#define EDX_L7S1_AVX_VNNI_INT16 (UINT32_C(1) << 10) static inline void -get_cpuid_info(int32_t level /* input eax */, - int32_t count /* input ecx */, - int32_t *CPUID_REG(eax), - int32_t *CPUID_REG(ebx), - int32_t *CPUID_REG(ecx), - int32_t *CPUID_REG(edx)) +get_cpuid_info(uint32_t level /* input eax */, + uint32_t count /* input ecx */, + uint32_t *CPUID_REG(eax), + uint32_t *CPUID_REG(ebx), + uint32_t *CPUID_REG(ecx), + uint32_t *CPUID_REG(edx)) { #if defined(__x86_64__) && defined(__GNUC__) __cpuid_count(level, count, *eax, *ebx, *ecx, *edx); @@ -94,80 +197,387 @@ get_cpuid_info(int32_t level /* input eax */, #endif } -/* Processor Info and Feature Bits (EAX=1, ECX=0). */ +/* XSAVE State Components. */ +#define XCR0_SSE (UINT32_C(1) << 1) +#define XCR0_AVX (UINT32_C(1) << 2) +#define XCR0_AVX512_OPMASK (UINT32_C(1) << 5) +#define XCR0_AVX512_ZMM_HI256 (UINT32_C(1) << 6) +#define XCR0_AVX512_HI16_ZMM (UINT32_C(1) << 7) + +static inline uint64_t +get_xgetbv(uint32_t index) +{ +#if defined(__x86_64__) && defined(__GNUC__) + uint32_t eax = 0, edx = 0; + __asm__ __volatile__("xgetbv" : "=a" (eax), "=d" (edx) : "c" (index)); + return ((uint64_t)edx << 32) | eax; +#elif defined (_MSC_VER) + return (uint64_t)_xgetbv(index); +#else + (void) index; + return 0; +#endif +} + +/* Highest Function Parameter and Manufacturer ID (LEAF=0, SUBLEAF=0). */ +static inline uint32_t +detect_cpuid_maxleaf(void) +{ + uint32_t maxlevel = 0, ebx = 0, ecx = 0, edx = 0; + get_cpuid_info(0, 0, &maxlevel, &ebx, &ecx, &edx); + return maxlevel; +} + +/* Processor Info and Feature Bits (LEAF=1, SUBLEAF=0). */ static inline void -detect_cpu_simd_features(py_cpu_simd_flags *flags) +detect_simd_features(py_simd_features *flags, + uint32_t eax, uint32_t ebx, + uint32_t ecx, uint32_t edx) { - int32_t eax = 0, ebx = 0, ecx = 0, edx = 0; - get_cpuid_info(1, 0, &eax, &ebx, &ecx, &edx); + // Keep the ordering and newlines as they are declared in the structure. +#ifdef SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD #ifdef CAN_COMPILE_SIMD_SSE_INSTRUCTIONS - flags->sse = CPUID_CHECK_REG(edx, EDX1_SSE); + flags->sse = CPUID_CHECK_REG(edx, EDX_L1_SSE); #endif #ifdef CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS - flags->sse2 = CPUID_CHECK_REG(edx, EDX1_SSE2); + flags->sse2 = CPUID_CHECK_REG(edx, EDX_L1_SSE2); #endif #ifdef CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS - flags->sse3 = CPUID_CHECK_REG(ecx, ECX1_SSE3); + flags->sse3 = CPUID_CHECK_REG(ecx, ECX_L1_SSE3); +#endif +#ifdef CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS + flags->ssse3 = CPUID_CHECK_REG(ecx, ECX_L1_SSSE3); #endif #ifdef CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS - flags->sse41 = CPUID_CHECK_REG(ecx, ECX1_SSE4_1); + flags->sse41 = CPUID_CHECK_REG(ecx, ECX_L1_SSE4_1); #endif #ifdef CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS - flags->sse42 = CPUID_CHECK_REG(ecx, ECX1_SSE4_2); + flags->sse42 = CPUID_CHECK_REG(ecx, ECX_L1_SSE4_2); #endif +#endif + +#ifdef SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD #ifdef CAN_COMPILE_SIMD_AVX_INSTRUCTIONS - flags->avx = CPUID_CHECK_REG(ecx, ECX1_AVX); + flags->avx = CPUID_CHECK_REG(ecx, ECX_L1_AVX); + flags->os_xsave = CPUID_CHECK_REG(ecx, ECX_L1_OSXSAVE); +#endif #endif } -/* Extended Feature Bits (EAX=7, ECX=0). */ +/* Extended Feature Bits (LEAF=7, SUBLEAF=0). */ static inline void -detect_cpu_simd_extended_features(py_cpu_simd_flags *flags) +detect_simd_extended_features_ecx_0(py_simd_features *flags, + uint8_t eax, uint8_t ebx, + uint8_t ecx, uint8_t edx) { - int32_t eax = 0, ebx = 0, ecx = 0, edx = 0; - get_cpuid_info(7, 0, &eax, &ebx, &ecx, &edx); + // Keep the ordering and newlines as they are declared in the structure. +#ifdef SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD #ifdef CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS - flags->avx2 = CPUID_CHECK_REG(ebx, EBX7_AVX2); + flags->avx2 = CPUID_CHECK_REG(ebx, EBX_L7_AVX2); +#endif +#endif + +#ifdef SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD +#ifdef CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS + flags->avx512_f = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_F); +#endif +#ifdef CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS + flags->avx512_cd = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_CD); +#endif + +#ifdef CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS + flags->avx512_er = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_ER); +#endif +#ifdef CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS + flags->avx512_pf = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_PF); #endif + +#ifdef CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS + flags->avx512_4fmaps = CPUID_CHECK_REG(edx, EDX_L7_AVX512_4FMAPS); +#endif +#ifdef CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS + flags->avx512_4vnniw = CPUID_CHECK_REG(edx, EDX_L7_AVX512_4VNNIW); +#endif + +#ifdef CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS + flags->avx512_vpopcntdq = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_VPOPCNTDQ); +#endif + +#ifdef CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS + flags->avx512_vl = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_VL); +#endif +#ifdef CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS + flags->avx512_dq = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_DQ); +#endif +#ifdef CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS + flags->avx512_bw = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_BW); +#endif + +#ifdef CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS + flags->avx512_ifma = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_IFMA); +#endif + #ifdef CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS - flags->avx512vbmi = CPUID_CHECK_REG(ecx, ECX7_AVX512_VBMI); + flags->avx512_vbmi = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_VBMI); +#endif + +#ifdef CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS + flags->avx512_vnni = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_VNNI); +#endif + +#ifdef CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS + flags->avx512_vbmi2 = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_VBMI2); +#endif +#ifdef CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS + flags->avx512_bitalg = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_BITALG); +#endif + +#ifdef CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS + flags->avx512_vp2intersect = CPUID_CHECK_REG(edx, EDX_L7_AVX512_VP2INTERSECT); +#endif #endif } -void -_Py_detect_cpu_simd_features(py_cpu_simd_flags *flags) +/* Extended Feature Bits (LEAF=7, SUBLEAF=1). */ +static inline void +detect_simd_extended_features_ecx_1(py_simd_features *flags, + uint8_t eax, uint8_t ebx, + uint8_t ecx, uint8_t edx) { - if (flags->done) { - return; - } -#ifdef MAY_DETECT_CPUID_SIMD_FEATURES - detect_cpu_simd_features(flags); -#else - flags->sse = flags->sse2 = flags->sse3 = flags->sse41 = flags->sse42 = 0; - flags->avx = 0; + // Keep the ordering and newlines as they are declared in the structure. +#ifdef SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD +#ifdef CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS + flags->avx_ne_convert = CPUID_CHECK_REG(edx, EDX_L7S1_AVX_NE_CONVERT); #endif -#ifdef MAY_DETECT_CPUID_SIMD_EXTENDED_FEATURES - detect_cpu_simd_extended_features(flags); -#else - flags->avx2 = flags->avx512vbmi = 0; + +#ifdef CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS + flags->avx_ifma = CPUID_CHECK_REG(eax, EAX_L7S1_AVX_IFMA); +#endif + +#ifdef CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS + flags->avx_vnni = CPUID_CHECK_REG(eax, EAX_L7S1_AVX_VNNI); +#endif +#ifdef CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS + flags->avx_vnni_int8 = CPUID_CHECK_REG(edx, EDX_L7S1_AVX_VNNI_INT8); #endif +#ifdef CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS + flags->avx_vnni_int16 = CPUID_CHECK_REG(edx, EDX_L7S1_AVX_VNNI_INT16); +#endif +#endif +} + +static inline void +detect_simd_xsave_state(py_simd_features *flags) +{ + uint64_t xcr0 = flags->os_xsave ? get_xgetbv(0) : 0; + flags->xcr0_sse = CPUID_CHECK_REG(xcr0, XCR0_SSE); + + flags->xcr0_avx = CPUID_CHECK_REG(xcr0, XCR0_AVX); + + flags->xcr0_avx512_opmask = CPUID_CHECK_REG(xcr0, XCR0_AVX512_OPMASK); + flags->xcr0_avx512_zmm_hi256 = CPUID_CHECK_REG(xcr0, XCR0_AVX512_ZMM_HI256); + flags->xcr0_avx512_hi16_zmm = CPUID_CHECK_REG(xcr0, XCR0_AVX512_HI16_ZMM); +} + +static inline void +finalize_simd_features(py_simd_features *flags) +{ + assert(flags->done == 0); + // Here, any flag that may depend on others should be correctly set + // at runtime to avoid illegal instruction errors. flags->done = 1; } +/* + * Return 0 if flags are compatible and correctly set and -1 otherwise. + * + * If this function returns -1, 'flags' should disable all SIMD features + * to avoid encountering a possible illegal instruction error at runtime. + */ +static inline int +validate_simd_features(const py_simd_features *flags) +{ + if (flags->done != 1) { + return -1; + } + + // AVX-512/F is required to support any other AVX-512 instruction set + uint8_t avx512_require_f = ( + flags->avx512_cd || flags->avx512_er || flags->avx512_pf || + flags->avx512_vl || flags->avx512_dq || flags->avx512_bw || + flags->avx512_ifma || + flags->avx512_vbmi || + flags->avx512_4fmaps || flags->avx512_4vnniw || + flags->avx512_vpopcntdq || + flags->avx512_vnni || flags->avx512_vbmi2 || flags->avx512_bitalg || + flags->avx512_vp2intersect + ); + if (!flags->avx512_f && !avx512_require_f) { + return -1; + } + + return 0; +} + +void +_Py_disable_simd_features(py_simd_features *flags) +{ + // Keep the ordering and newlines as they are declared in the structure. +#define ZERO(FLAG) flags->FLAG = 0 + ZERO(sse); + ZERO(sse2); + ZERO(sse3); + ZERO(ssse3); + ZERO(sse41); + ZERO(sse42); + + ZERO(avx); + ZERO(avx_ifma); + ZERO(avx_ne_convert); + + ZERO(avx_vnni); + ZERO(avx_vnni_int8); + ZERO(avx_vnni_int16); + + ZERO(avx2); + + ZERO(avx512_f); + ZERO(avx512_cd); + + ZERO(avx512_er); + ZERO(avx512_pf); + + ZERO(avx512_4fmaps); + ZERO(avx512_4vnniw); + + ZERO(avx512_vpopcntdq); + + ZERO(avx512_vl); + ZERO(avx512_dq); + ZERO(avx512_bw); + + ZERO(avx512_ifma); + + ZERO(avx512_vbmi); + + ZERO(avx512_vnni); + + ZERO(avx512_vbmi2); + ZERO(avx512_bitalg); + + ZERO(avx512_vp2intersect); + + ZERO(os_xsave); + + ZERO(xcr0_sse); + ZERO(xcr0_avx); + ZERO(xcr0_avx512_opmask); + ZERO(xcr0_avx512_zmm_hi256); + ZERO(xcr0_avx512_hi16_zmm); +#undef ZERO +} + void -_Py_extend_cpu_simd_features(py_cpu_simd_flags *out, - const py_cpu_simd_flags *src) +_Py_update_simd_features(py_simd_features *out, + const py_simd_features *src) { + // Keep the ordering and newlines as they are declared in the structure. #define UPDATE(FLAG) out->FLAG |= src->FLAG UPDATE(sse); UPDATE(sse2); UPDATE(sse3); + UPDATE(ssse3); UPDATE(sse41); UPDATE(sse42); UPDATE(avx); + UPDATE(avx_ifma); + UPDATE(avx_ne_convert); + + UPDATE(avx_vnni); + UPDATE(avx_vnni_int8); + UPDATE(avx_vnni_int16); + UPDATE(avx2); - UPDATE(avx512vbmi); + + UPDATE(avx512_f); + UPDATE(avx512_cd); + + UPDATE(avx512_er); + UPDATE(avx512_pf); + + UPDATE(avx512_4fmaps); + UPDATE(avx512_4vnniw); + + UPDATE(avx512_vpopcntdq); + + UPDATE(avx512_vl); + UPDATE(avx512_dq); + UPDATE(avx512_bw); + + UPDATE(avx512_ifma); + + UPDATE(avx512_vbmi); + + UPDATE(avx512_vnni); + + UPDATE(avx512_vbmi2); + UPDATE(avx512_bitalg); + + UPDATE(avx512_vp2intersect); + + UPDATE(os_xsave); + + UPDATE(xcr0_sse); + UPDATE(xcr0_avx); + UPDATE(xcr0_avx512_opmask); + UPDATE(xcr0_avx512_zmm_hi256); + UPDATE(xcr0_avx512_hi16_zmm); #undef UPDATE out->done = 1; } + +void +_Py_detect_simd_features(py_simd_features *flags) +{ + if (flags->done) { + return; + } + _Py_disable_simd_features(flags); + uint32_t maxleaf = detect_cpuid_maxleaf(); + uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; +#ifdef SHOULD_DETECT_SIMD_FEATURES_L1 + if (maxleaf >= 1) { + eax = 0, ebx = 0, ecx = 0, edx = 0; + get_cpuid_info(1, 0, &eax, &ebx, &ecx, &edx); + detect_simd_features(flags, eax, ebx, ecx, edx); + if (flags->os_xsave) { + detect_simd_xsave_state(flags); + } + } +#else + (void) maxleaf; + (void) eax; (void) ebx; (void) ecx; (void) edx; +#endif +#ifdef SHOULD_DETECT_SIMD_FEATURES_L7 + if (maxleaf >= 7) { +#ifdef SHOULD_DETECT_SIMD_FEATURES_L7S0 + eax = 0, ebx = 0, ecx = 0, edx = 0; + get_cpuid_info(7, 0, &eax, &ebx, &ecx, &edx); + detect_simd_extended_features_ecx_0(flags, eax, ebx, ecx, edx); +#endif +#ifdef SHOULD_DETECT_SIMD_FEATURES_L7S1 + eax = 0, ebx = 0, ecx = 0, edx = 0; + get_cpuid_info(7, 1, &eax, &ebx, &ecx, &edx); + detect_simd_extended_features_ecx_1(flags, eax, ebx, ecx, edx); +#endif + } +#else + (void) maxleaf; + (void) eax; (void) ebx; (void) ecx; (void) edx; +#endif + finalize_simd_features(flags); + if (validate_simd_features(flags) < 0) { + _Py_disable_simd_features(flags); + } +} diff --git a/configure b/configure index e749010ccfe815..2bcec7f82ce042 100755 --- a/configure +++ b/configure @@ -30623,9 +30623,11 @@ printf "%s\n" "$py_cv_module__blake2" >&6; } # we do not necessarily know which instruction sets will be used, # we disable SIMD support on some older Android platforms. # -# Detection for more instruction sets can be added. By default, we detect -# SSE-based instruction sets, AVX/AVX2 and AVX512 VBMI. +# See py_simd_features in pycore_cpuinfo.h for how to order fields +# and where to put blank lines to separate processor generations +# for AVX-512 instructions. if test "$ac_sys_system" != "Linux-android" || test "$ANDROID_API_LEVEL" -ge 28; then + # SSE @@ -30773,6 +30775,54 @@ fi + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mssse3" >&5 +printf %s "checking whether C compiler accepts -mssse3... " >&6; } +if test ${ax_cv_check_cflags___mssse3+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mssse3" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mssse3=yes +else $as_nop + ax_cv_check_cflags___mssse3=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mssse3" >&5 +printf "%s\n" "$ax_cv_check_cflags___mssse3" >&6; } +if test "x$ax_cv_check_cflags___mssse3" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 printf %s "checking checking SIMD instruction set... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse4.1" >&5 @@ -30866,6 +30916,7 @@ fi + # AVX @@ -30917,6 +30968,248 @@ fi + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxifma" >&5 +printf %s "checking whether C compiler accepts -mavxifma... " >&6; } +if test ${ax_cv_check_cflags___mavxifma+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavxifma" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavxifma=yes +else $as_nop + ax_cv_check_cflags___mavxifma=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavxifma" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavxifma" >&6; } +if test "x$ax_cv_check_cflags___mavxifma" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxneconvert" >&5 +printf %s "checking whether C compiler accepts -mavxneconvert... " >&6; } +if test ${ax_cv_check_cflags___mavxneconvert+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavxneconvert" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavxneconvert=yes +else $as_nop + ax_cv_check_cflags___mavxneconvert=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavxneconvert" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavxneconvert" >&6; } +if test "x$ax_cv_check_cflags___mavxneconvert" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxvnni" >&5 +printf %s "checking whether C compiler accepts -mavxvnni... " >&6; } +if test ${ax_cv_check_cflags___mavxvnni+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavxvnni" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavxvnni=yes +else $as_nop + ax_cv_check_cflags___mavxvnni=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavxvnni" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavxvnni" >&6; } +if test "x$ax_cv_check_cflags___mavxvnni" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxvnniint8" >&5 +printf %s "checking whether C compiler accepts -mavxvnniint8... " >&6; } +if test ${ax_cv_check_cflags___mavxvnniint8+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavxvnniint8" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavxvnniint8=yes +else $as_nop + ax_cv_check_cflags___mavxvnniint8=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavxvnniint8" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavxvnniint8" >&6; } +if test "x$ax_cv_check_cflags___mavxvnniint8" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxvnniint16" >&5 +printf %s "checking whether C compiler accepts -mavxvnniint16... " >&6; } +if test ${ax_cv_check_cflags___mavxvnniint16+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavxvnniint16" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavxvnniint16=yes +else $as_nop + ax_cv_check_cflags___mavxvnniint16=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavxvnniint16" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavxvnniint16" >&6; } +if test "x$ax_cv_check_cflags___mavxvnniint16" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + # AVX 2 + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 printf %s "checking checking SIMD instruction set... " >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx2" >&5 @@ -30962,20 +31255,21 @@ fi + # { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vbmi" >&5 -printf %s "checking whether C compiler accepts -mavx512vbmi... " >&6; } -if test ${ax_cv_check_cflags___mavx512vbmi+y} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512f" >&5 +printf %s "checking whether C compiler accepts -mavx512f... " >&6; } +if test ${ax_cv_check_cflags___mavx512f+y} then : printf %s "(cached) " >&6 else $as_nop ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx512vbmi" + CFLAGS="$CFLAGS -mavx512f" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -30989,19 +31283,747 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavx512vbmi=yes + ax_cv_check_cflags___mavx512f=yes else $as_nop - ax_cv_check_cflags___mavx512vbmi=no + ax_cv_check_cflags___mavx512f=no fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vbmi" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavx512vbmi" >&6; } -if test "x$ax_cv_check_cflags___mavx512vbmi" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512f" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512f" >&6; } +if test "x$ax_cv_check_cflags___mavx512f" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512cd" >&5 +printf %s "checking whether C compiler accepts -mavx512cd... " >&6; } +if test ${ax_cv_check_cflags___mavx512cd+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512cd" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512cd=yes +else $as_nop + ax_cv_check_cflags___mavx512cd=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512cd" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512cd" >&6; } +if test "x$ax_cv_check_cflags___mavx512cd" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512er" >&5 +printf %s "checking whether C compiler accepts -mavx512er... " >&6; } +if test ${ax_cv_check_cflags___mavx512er+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512er" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512er=yes +else $as_nop + ax_cv_check_cflags___mavx512er=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512er" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512er" >&6; } +if test "x$ax_cv_check_cflags___mavx512er" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512pf" >&5 +printf %s "checking whether C compiler accepts -mavx512pf... " >&6; } +if test ${ax_cv_check_cflags___mavx512pf+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512pf" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512pf=yes +else $as_nop + ax_cv_check_cflags___mavx512pf=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512pf" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512pf" >&6; } +if test "x$ax_cv_check_cflags___mavx512pf" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx5124fmaps" >&5 +printf %s "checking whether C compiler accepts -mavx5124fmaps... " >&6; } +if test ${ax_cv_check_cflags___mavx5124fmaps+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx5124fmaps" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx5124fmaps=yes +else $as_nop + ax_cv_check_cflags___mavx5124fmaps=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx5124fmaps" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx5124fmaps" >&6; } +if test "x$ax_cv_check_cflags___mavx5124fmaps" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx5124vnniw" >&5 +printf %s "checking whether C compiler accepts -mavx5124vnniw... " >&6; } +if test ${ax_cv_check_cflags___mavx5124vnniw+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx5124vnniw" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx5124vnniw=yes +else $as_nop + ax_cv_check_cflags___mavx5124vnniw=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx5124vnniw" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx5124vnniw" >&6; } +if test "x$ax_cv_check_cflags___mavx5124vnniw" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vpopcntdq" >&5 +printf %s "checking whether C compiler accepts -mavx512vpopcntdq... " >&6; } +if test ${ax_cv_check_cflags___mavx512vpopcntdq+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512vpopcntdq" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512vpopcntdq=yes +else $as_nop + ax_cv_check_cflags___mavx512vpopcntdq=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vpopcntdq" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512vpopcntdq" >&6; } +if test "x$ax_cv_check_cflags___mavx512vpopcntdq" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vl" >&5 +printf %s "checking whether C compiler accepts -mavx512vl... " >&6; } +if test ${ax_cv_check_cflags___mavx512vl+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512vl" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512vl=yes +else $as_nop + ax_cv_check_cflags___mavx512vl=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vl" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512vl" >&6; } +if test "x$ax_cv_check_cflags___mavx512vl" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512dq" >&5 +printf %s "checking whether C compiler accepts -mavx512dq... " >&6; } +if test ${ax_cv_check_cflags___mavx512dq+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512dq" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512dq=yes +else $as_nop + ax_cv_check_cflags___mavx512dq=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512dq" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512dq" >&6; } +if test "x$ax_cv_check_cflags___mavx512dq" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512bw" >&5 +printf %s "checking whether C compiler accepts -mavx512bw... " >&6; } +if test ${ax_cv_check_cflags___mavx512bw+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512bw" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512bw=yes +else $as_nop + ax_cv_check_cflags___mavx512bw=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512bw" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512bw" >&6; } +if test "x$ax_cv_check_cflags___mavx512bw" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512ifma" >&5 +printf %s "checking whether C compiler accepts -mavx512ifma... " >&6; } +if test ${ax_cv_check_cflags___mavx512ifma+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512ifma" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512ifma=yes +else $as_nop + ax_cv_check_cflags___mavx512ifma=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512ifma" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512ifma" >&6; } +if test "x$ax_cv_check_cflags___mavx512ifma" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vbmi" >&5 +printf %s "checking whether C compiler accepts -mavx512vbmi... " >&6; } +if test ${ax_cv_check_cflags___mavx512vbmi+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512vbmi" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512vbmi=yes +else $as_nop + ax_cv_check_cflags___mavx512vbmi=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vbmi" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512vbmi" >&6; } +if test "x$ax_cv_check_cflags___mavx512vbmi" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vnni" >&5 +printf %s "checking whether C compiler accepts -mavx512vnni... " >&6; } +if test ${ax_cv_check_cflags___mavx512vnni+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512vnni" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512vnni=yes +else $as_nop + ax_cv_check_cflags___mavx512vnni=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vnni" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512vnni" >&6; } +if test "x$ax_cv_check_cflags___mavx512vnni" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vbmi2" >&5 +printf %s "checking whether C compiler accepts -mavx512vbmi2... " >&6; } +if test ${ax_cv_check_cflags___mavx512vbmi2+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512vbmi2" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512vbmi2=yes +else $as_nop + ax_cv_check_cflags___mavx512vbmi2=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vbmi2" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512vbmi2" >&6; } +if test "x$ax_cv_check_cflags___mavx512vbmi2" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512bitalg" >&5 +printf %s "checking whether C compiler accepts -mavx512bitalg... " >&6; } +if test ${ax_cv_check_cflags___mavx512bitalg+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512bitalg" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512bitalg=yes +else $as_nop + ax_cv_check_cflags___mavx512bitalg=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512bitalg" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512bitalg" >&6; } +if test "x$ax_cv_check_cflags___mavx512bitalg" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS 1" >>confdefs.h + +else $as_nop + : +fi + + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 +printf %s "checking checking SIMD instruction set... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vp2intersect" >&5 +printf %s "checking whether C compiler accepts -mavx512vp2intersect... " >&6; } +if test ${ax_cv_check_cflags___mavx512vp2intersect+y} +then : + printf %s "(cached) " >&6 +else $as_nop + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -mavx512vp2intersect" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags___mavx512vp2intersect=yes +else $as_nop + ax_cv_check_cflags___mavx512vp2intersect=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vp2intersect" >&5 +printf "%s\n" "$ax_cv_check_cflags___mavx512vp2intersect" >&6; } +if test "x$ax_cv_check_cflags___mavx512vp2intersect" = xyes +then : + +printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS 1" >>confdefs.h else $as_nop : diff --git a/configure.ac b/configure.ac index 707cda71c9c903..74a8e785c229bf 100644 --- a/configure.ac +++ b/configure.ac @@ -7809,17 +7809,52 @@ AC_DEFUN([PY_SIMD_DETECT], [ # we do not necessarily know which instruction sets will be used, # we disable SIMD support on some older Android platforms. # -# Detection for more instruction sets can be added. By default, we detect -# SSE-based instruction sets, AVX/AVX2 and AVX512 VBMI. +# See py_simd_features in pycore_cpuinfo.h for how to order fields +# and where to put blank lines to separate processor generations +# for AVX-512 instructions. if test "$ac_sys_system" != "Linux-android" || test "$ANDROID_API_LEVEL" -ge 28; then + # SSE PY_SIMD_DETECT([SSE], [-msse]) PY_SIMD_DETECT([SSE2], [-msse2]) PY_SIMD_DETECT([SSE3], [-msse3]) + PY_SIMD_DETECT([SSSE3], [-mssse3]) PY_SIMD_DETECT([SSE4.1], [-msse4.1], [SSE4_1]) PY_SIMD_DETECT([SSE4.2], [-msse4.2], [SSE4_2]) + # AVX PY_SIMD_DETECT([AVX], [-mavx]) + PY_SIMD_DETECT([AVX_IFMA], [-mavxifma]) + PY_SIMD_DETECT([AVX_NE_CONVERT], [-mavxneconvert]) + # + PY_SIMD_DETECT([AVX_VNNI], [-mavxvnni]) + PY_SIMD_DETECT([AVX_VNNI_INT8], [-mavxvnniint8]) + PY_SIMD_DETECT([AVX_VNNI_INT16], [-mavxvnniint16]) + # AVX 2 PY_SIMD_DETECT([AVX2], [-mavx2]) + # + PY_SIMD_DETECT([AVX512_F], [-mavx512f]) + PY_SIMD_DETECT([AVX512_CD], [-mavx512cd]) + PY_SIMD_DETECT([AVX512_ER], [-mavx512er]) + PY_SIMD_DETECT([AVX512_PF], [-mavx512pf]) + # + PY_SIMD_DETECT([AVX512_4FMAPS], [-mavx5124fmaps]) + PY_SIMD_DETECT([AVX512_4VNNIW], [-mavx5124vnniw]) + # + PY_SIMD_DETECT([AVX512_VPOPCNTDQ], [-mavx512vpopcntdq]) + # + PY_SIMD_DETECT([AVX512_VL], [-mavx512vl]) + PY_SIMD_DETECT([AVX512_DQ], [-mavx512dq]) + PY_SIMD_DETECT([AVX512_BW], [-mavx512bw]) + # + PY_SIMD_DETECT([AVX512_IFMA], [-mavx512ifma]) + # PY_SIMD_DETECT([AVX512_VBMI], [-mavx512vbmi]) + # + PY_SIMD_DETECT([AVX512_VNNI], [-mavx512vnni]) + # + PY_SIMD_DETECT([AVX512_VBMI2], [-mavx512vbmi2]) + PY_SIMD_DETECT([AVX512_BITALG], [-mavx512bitalg]) + # + PY_SIMD_DETECT([AVX512_VP2INTERSECT], [-mavx512vp2intersect]) fi LIBHACL_CFLAGS='-I$(srcdir)/Modules/_hacl -I$(srcdir)/Modules/_hacl/include -D_BSD_SOURCE -D_DEFAULT_SOURCE $(PY_STDMODULE_CFLAGS) $(CCSHARED)' diff --git a/pyconfig.h.in b/pyconfig.h.in index b5ad1b310f3e5d..625c9798d6272b 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -35,12 +35,72 @@ /* Define if '-mavx2' is a valid compiler flag. */ #undef CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS +/* Define if '-mavx5124fmaps' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS + +/* Define if '-mavx5124vnniw' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS + +/* Define if '-mavx512bitalg' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS + +/* Define if '-mavx512bw' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS + +/* Define if '-mavx512cd' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS + +/* Define if '-mavx512dq' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS + +/* Define if '-mavx512er' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS + +/* Define if '-mavx512f' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS + +/* Define if '-mavx512ifma' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS + +/* Define if '-mavx512pf' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS + +/* Define if '-mavx512vbmi2' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS + /* Define if '-mavx512vbmi' is a valid compiler flag. */ #undef CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS +/* Define if '-mavx512vl' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS + +/* Define if '-mavx512vnni' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS + +/* Define if '-mavx512vp2intersect' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS + +/* Define if '-mavx512vpopcntdq' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS + +/* Define if '-mavxifma' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS + /* Define if '-mavx' is a valid compiler flag. */ #undef CAN_COMPILE_SIMD_AVX_INSTRUCTIONS +/* Define if '-mavxneconvert' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS + +/* Define if '-mavxvnni' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS + +/* Define if '-mavxvnniint16' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS + +/* Define if '-mavxvnniint8' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS + /* Define if '-msse2' is a valid compiler flag. */ #undef CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS @@ -56,6 +116,9 @@ /* Define if '-msse' is a valid compiler flag. */ #undef CAN_COMPILE_SIMD_SSE_INSTRUCTIONS +/* Define if '-mssse3' is a valid compiler flag. */ +#undef CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS + /* Define if C doubles are 64-bit IEEE 754 binary format, stored in ARM mixed-endian order (byte order 45670123) */ #undef DOUBLE_IS_ARM_MIXED_ENDIAN_IEEE754 From 16b2aed47d05e5de32e90e9ad42cc789b54a2ecf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 7 Oct 2024 15:54:23 +0200 Subject: [PATCH 11/78] do not guard the parsing of `os_xsave` --- Python/cpuinfo.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 5ab068fa4af0e9..8d97edc71b45ca 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -259,9 +259,10 @@ detect_simd_features(py_simd_features *flags, #ifdef SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD #ifdef CAN_COMPILE_SIMD_AVX_INSTRUCTIONS flags->avx = CPUID_CHECK_REG(ecx, ECX_L1_AVX); - flags->os_xsave = CPUID_CHECK_REG(ecx, ECX_L1_OSXSAVE); #endif #endif + + flags->os_xsave = CPUID_CHECK_REG(ecx, ECX_L1_OSXSAVE); } /* Extended Feature Bits (LEAF=7, SUBLEAF=0). */ From 5018fa930f576f8ec8cb7ec10a8fc65b4e1712cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 7 Oct 2024 16:14:59 +0200 Subject: [PATCH 12/78] Remove old comment. --- Python/cpuinfo.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 8d97edc71b45ca..92a0c0e3c64b02 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -4,13 +4,6 @@ * See https://en.wikipedia.org/wiki/CPUID for details. */ -/* - * In order to properly maintain this file, the following rules should - * be observed and enforced if possible: - * - * - Defining the SIMD_*_INSTRUCTIONS_DETECTION_GUARD macros should - */ - #include "Python.h" #include "pycore_cpuinfo.h" From e75806594785f673e216fa23863c9e9fac243457 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 7 Oct 2024 19:27:28 +0200 Subject: [PATCH 13/78] Update cpuinfo.c comments --- Python/cpuinfo.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 92a0c0e3c64b02..853404d00e56e3 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -10,14 +10,12 @@ #include // UINT32_C() /* Macro to mark a CPUID register function parameter as being used. */ -#define CPUID_REG(PARAM) PARAM +#define CPUID_REG(PARAM) PARAM /* Macro to check one or more CPUID register bits. */ #define CPUID_CHECK_REG(REG, MASK) ((((REG) & (MASK)) == (MASK)) ? 0 : 1) -/* - * For simplicity, we only enable SIMD instructions for Intel CPUs, - * even though we could support ARM NEON and POWER. - */ +// For simplicity, we only enable SIMD instructions for Intel CPUs, +// even though we could support ARM NEON and POWER. #if defined(__x86_64__) && defined(__GNUC__) # include // __cpuid_count() #elif defined(_M_X64) @@ -33,7 +31,7 @@ || defined(CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS) \ || defined(CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS) \ || defined(CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS) \ - // macros above should be sorted in an alphabetical order + // macros above should be sorted in alphabetical order /* Used to guard any SSE instructions detection code. */ # define SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD #endif @@ -44,13 +42,13 @@ || defined(CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS) \ || defined(CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS) \ || defined(CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS) \ - // macros above should be sorted in an alphabetical order + // macros above should be sorted in alphabetical order /* Used to guard any AVX instructions detection code. */ # define SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD #endif #if defined(CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS) \ - // macros above should be sorted in an alphabetical order + // macros above should be sorted in alphabetical order /* Used to guard any AVX-2 instructions detection code. */ # define SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD #endif @@ -71,7 +69,7 @@ || defined(CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS) \ || defined(CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS) \ || defined(CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS) \ - // macros above should be sorted in an alphabetical order + // macros above should be sorted in alphabetical order /* Used to guard any AVX-512 instructions detection code. */ # define SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD #endif @@ -84,7 +82,7 @@ // // Additionally, AVX2 cannot be compiled on macOS ARM64 (yet it can be // compiled on x86_64). However, since autoconf incorrectly assumes so -// when compiling a universal2 binary, we disable AVX for such builds. +// when compiling a universal2 binary, we disable AVX on such builds. #if defined(__APPLE__) # undef SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD # if defined(__arm64__) @@ -181,7 +179,7 @@ get_cpuid_info(uint32_t level /* input eax */, #if defined(__x86_64__) && defined(__GNUC__) __cpuid_count(level, count, *eax, *ebx, *ecx, *edx); #elif defined(_M_X64) - int32_t info[4] = {0}; + uint32_t info[4] = {0}; __cpuidex(info, level, count); *eax = info[0]; *ebx = info[1]; @@ -247,13 +245,13 @@ detect_simd_features(py_simd_features *flags, #ifdef CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS flags->sse42 = CPUID_CHECK_REG(ecx, ECX_L1_SSE4_2); #endif -#endif +#endif // !SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD #ifdef SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD #ifdef CAN_COMPILE_SIMD_AVX_INSTRUCTIONS flags->avx = CPUID_CHECK_REG(ecx, ECX_L1_AVX); #endif -#endif +#endif // !SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD flags->os_xsave = CPUID_CHECK_REG(ecx, ECX_L1_OSXSAVE); } @@ -329,7 +327,7 @@ detect_simd_extended_features_ecx_0(py_simd_features *flags, #ifdef CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS flags->avx512_vp2intersect = CPUID_CHECK_REG(edx, EDX_L7_AVX512_VP2INTERSECT); #endif -#endif +#endif // !SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD } /* Extended Feature Bits (LEAF=7, SUBLEAF=1). */ @@ -357,7 +355,7 @@ detect_simd_extended_features_ecx_1(py_simd_features *flags, #ifdef CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS flags->avx_vnni_int16 = CPUID_CHECK_REG(edx, EDX_L7S1_AVX_VNNI_INT16); #endif -#endif +#endif // !SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD } static inline void @@ -552,7 +550,7 @@ _Py_detect_simd_features(py_simd_features *flags) #else (void) maxleaf; (void) eax; (void) ebx; (void) ecx; (void) edx; -#endif +#endif // !SHOULD_DETECT_SIMD_FEATURES_L1 #ifdef SHOULD_DETECT_SIMD_FEATURES_L7 if (maxleaf >= 7) { #ifdef SHOULD_DETECT_SIMD_FEATURES_L7S0 @@ -569,7 +567,7 @@ _Py_detect_simd_features(py_simd_features *flags) #else (void) maxleaf; (void) eax; (void) ebx; (void) ecx; (void) edx; -#endif +#endif // !SHOULD_DETECT_SIMD_FEATURES_L7 finalize_simd_features(flags); if (validate_simd_features(flags) < 0) { _Py_disable_simd_features(flags); From 731be816b460b017b38b90937066c6d19e9e5422 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 7 Oct 2024 19:33:32 +0200 Subject: [PATCH 14/78] Update pycore_cpuinfo.h comments --- Include/internal/pycore_cpuinfo.h | 36 +++++++++++++++---------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index 145da8c9d2d2ae..ad4966e8f8637a 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -76,25 +76,20 @@ typedef struct py_simd_features { /* XCR0 register bits */ _Py_SIMD_XCR0_BIT xcr0_sse: 1; - /* - * On some Intel CPUs, it is possible for the CPU to support AVX2 - * instructions even though the underlying OS does not know about - * AVX. In particular, only (SSE) XMM registers will be saved and - * restored on context-switch, but not (AVX) YMM registers. - */ + // On some Intel CPUs, it is possible for the CPU to support AVX2 + // instructions even though the underlying OS does not know about + // AVX. In particular, only (SSE) XMM registers will be saved and + // restored on context-switch, but not (AVX) YMM registers. _Py_SIMD_XCR0_BIT xcr0_avx: 1; _Py_SIMD_XCR0_BIT xcr0_avx512_opmask: 1; _Py_SIMD_XCR0_BIT xcr0_avx512_zmm_hi256: 1; _Py_SIMD_XCR0_BIT xcr0_avx512_hi16_zmm: 1; - /* - * We want to align the bit-fields correctly so the bitsize of - * 'done' must be chosen so that the sum of all bit fields is - * a multiple of 8. - * - * Whenever a field is added or removed above, update the - * following number (35) and adjust the bitsize of 'done'. - */ + // We want the structure to be aligned correctly, namely + // its size in bits must be a multiple of 8. + // + // Whenever a field is added or removed above, update the + // number of fields (35) and adjust the bitsize of 'done'. uint8_t done: 5; // set if the structure was filled } py_simd_features; @@ -107,10 +102,15 @@ extern void _Py_disable_simd_features(py_simd_features *flags); /* -* Apply a bitwise-OR on all flags in 'out' using those in 'src', -* unconditionally updating 'out' (i.e. out->done is ignored) and -* setting 'out->done' to 1. -*/ + * Apply a bitwise-OR on all flags in 'out' using those in 'src', + * unconditionally updating 'out' (i.e. 'out->done' is ignored). + * + * This also sets 'out->done' to 1 at the end. + * + * Note that the caller is responsible to ensure that the flags set to 1 + * must not lead to illegal instruction errors if the corresponding SIMD + * instruction(s) are used. + */ extern void _Py_update_simd_features(py_simd_features *out, const py_simd_features *src); From 7947715b3d38a7d4e46065de3087323e1f6917db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 7 Oct 2024 19:38:29 +0200 Subject: [PATCH 15/78] fix lint --- Include/internal/pycore_cpuinfo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index ad4966e8f8637a..fbe37fb6a3b936 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -105,7 +105,7 @@ _Py_disable_simd_features(py_simd_features *flags); * Apply a bitwise-OR on all flags in 'out' using those in 'src', * unconditionally updating 'out' (i.e. 'out->done' is ignored). * - * This also sets 'out->done' to 1 at the end. + * This also sets 'out->done' to 1 at the end. * * Note that the caller is responsible to ensure that the flags set to 1 * must not lead to illegal instruction errors if the corresponding SIMD From 7a17cbbe2f03e931115e3f1904b3b497a0b48e20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 7 Oct 2024 19:43:17 +0200 Subject: [PATCH 16/78] I really shouldn't use a Web UI --- Include/internal/pycore_cpuinfo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index fbe37fb6a3b936..92cdff2c3f55f1 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -87,7 +87,7 @@ typedef struct py_simd_features { // We want the structure to be aligned correctly, namely // its size in bits must be a multiple of 8. - // + // // Whenever a field is added or removed above, update the // number of fields (35) and adjust the bitsize of 'done'. uint8_t done: 5; // set if the structure was filled From 76f67b1c527b2b188af24b7ccac4fd3f2f63adca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 7 Oct 2024 19:58:55 +0200 Subject: [PATCH 17/78] Fix _xgetbv() on Windows builds. --- Python/cpuinfo.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 853404d00e56e3..72101a15272bcc 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -17,9 +17,10 @@ // For simplicity, we only enable SIMD instructions for Intel CPUs, // even though we could support ARM NEON and POWER. #if defined(__x86_64__) && defined(__GNUC__) -# include // __cpuid_count() +# include // __cpuid_count() #elif defined(_M_X64) -# include // __cpuidex() +# include // _xgetbv() +# include // __cpuidex() #else # undef CPUID_REG # define CPUID_REG(PARAM) Py_UNUSED(PARAM) @@ -202,7 +203,7 @@ get_xgetbv(uint32_t index) uint32_t eax = 0, edx = 0; __asm__ __volatile__("xgetbv" : "=a" (eax), "=d" (edx) : "c" (index)); return ((uint64_t)edx << 32) | eax; -#elif defined (_MSC_VER) +#elif defined(_M_X64) return (uint64_t)_xgetbv(index); #else (void) index; From 0b49a505205a57f966fc6d40112d89dc8f5f963c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 8 Oct 2024 11:40:28 +0200 Subject: [PATCH 18/78] fix comment --- Python/cpuinfo.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 72101a15272bcc..2c309149fc8102 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -246,13 +246,13 @@ detect_simd_features(py_simd_features *flags, #ifdef CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS flags->sse42 = CPUID_CHECK_REG(ecx, ECX_L1_SSE4_2); #endif -#endif // !SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD +#endif // SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD #ifdef SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD #ifdef CAN_COMPILE_SIMD_AVX_INSTRUCTIONS flags->avx = CPUID_CHECK_REG(ecx, ECX_L1_AVX); #endif -#endif // !SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD +#endif // SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD flags->os_xsave = CPUID_CHECK_REG(ecx, ECX_L1_OSXSAVE); } @@ -328,7 +328,7 @@ detect_simd_extended_features_ecx_0(py_simd_features *flags, #ifdef CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS flags->avx512_vp2intersect = CPUID_CHECK_REG(edx, EDX_L7_AVX512_VP2INTERSECT); #endif -#endif // !SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD +#endif // SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD } /* Extended Feature Bits (LEAF=7, SUBLEAF=1). */ @@ -356,7 +356,7 @@ detect_simd_extended_features_ecx_1(py_simd_features *flags, #ifdef CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS flags->avx_vnni_int16 = CPUID_CHECK_REG(edx, EDX_L7S1_AVX_VNNI_INT16); #endif -#endif // !SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD +#endif // SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD } static inline void @@ -551,7 +551,7 @@ _Py_detect_simd_features(py_simd_features *flags) #else (void) maxleaf; (void) eax; (void) ebx; (void) ecx; (void) edx; -#endif // !SHOULD_DETECT_SIMD_FEATURES_L1 +#endif // SHOULD_DETECT_SIMD_FEATURES_L1 #ifdef SHOULD_DETECT_SIMD_FEATURES_L7 if (maxleaf >= 7) { #ifdef SHOULD_DETECT_SIMD_FEATURES_L7S0 @@ -568,7 +568,7 @@ _Py_detect_simd_features(py_simd_features *flags) #else (void) maxleaf; (void) eax; (void) ebx; (void) ecx; (void) edx; -#endif // !SHOULD_DETECT_SIMD_FEATURES_L7 +#endif // SHOULD_DETECT_SIMD_FEATURES_L7 finalize_simd_features(flags); if (validate_simd_features(flags) < 0) { _Py_disable_simd_features(flags); From 9fd6152c0cf1b54ad737d2ea1460413e96278da3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 8 Oct 2024 19:53:01 +0200 Subject: [PATCH 19/78] harden detection of CPU features --- Include/internal/pycore_cpuinfo.h | 162 ++++++---- Python/cpuinfo.c | 505 +++++++++++++++--------------- configure.ac | 1 - 3 files changed, 354 insertions(+), 314 deletions(-) diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index 92cdff2c3f55f1..fe934fa13a70b1 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -11,112 +11,138 @@ extern "C" { #include // uint8_t -/* Macro indicating that the member is a CPUID bit. */ -#define _Py_SIMD_FEAT uint8_t -/* Macro indicating that the member is a XCR0 bit. */ -#define _Py_SIMD_XCR0_BIT uint8_t - -typedef struct py_simd_features { - /* Streaming SIMD Extensions */ - _Py_SIMD_FEAT sse: 1; - _Py_SIMD_FEAT sse2: 1; - _Py_SIMD_FEAT sse3: 1; - _Py_SIMD_FEAT ssse3: 1; // Supplemental SSE3 instructions - _Py_SIMD_FEAT sse41: 1; // SSE4.1 - _Py_SIMD_FEAT sse42: 1; // SSE4.2 - - /* Advanced Vector Extensions */ - _Py_SIMD_FEAT avx: 1; - _Py_SIMD_FEAT avx_ifma: 1; - _Py_SIMD_FEAT avx_ne_convert: 1; - - _Py_SIMD_FEAT avx_vnni: 1; - _Py_SIMD_FEAT avx_vnni_int8: 1; - _Py_SIMD_FEAT avx_vnni_int16: 1; - - /* Advanced Vector Extensions 2. */ - _Py_SIMD_FEAT avx2: 1; - +/* Declare a member of 'py_cpuid_features' storing a CPUID bit. */ +#define _Py_CPUID_DECL_FEAT(X) uint8_t X:1 +/* Declare a member of 'py_cpuid_features' storing a XCR0 bit. */ +#define _Py_CPUID_DECL_XCR0(X) uint8_t X:1 + +typedef struct py_cpuid_features { + // --- Streaming SIMD Extensions ------------------------------------------ + _Py_CPUID_DECL_FEAT(sse); + _Py_CPUID_DECL_FEAT(sse2); + _Py_CPUID_DECL_FEAT(sse3); + _Py_CPUID_DECL_FEAT(ssse3); // Supplemental SSE3 instructions + _Py_CPUID_DECL_FEAT(sse41); // SSE4.1 + _Py_CPUID_DECL_FEAT(sse42); // SSE4.2 + + // --- Advanced Vector Extensions ----------------------------------------- + _Py_CPUID_DECL_FEAT(avx); + _Py_CPUID_DECL_FEAT(avx_ifma); + _Py_CPUID_DECL_FEAT(avx_ne_convert); + + _Py_CPUID_DECL_FEAT(avx_vnni); + _Py_CPUID_DECL_FEAT(avx_vnni_int8); + _Py_CPUID_DECL_FEAT(avx_vnni_int16); + + // --- Advanced Vector Extensions 2 --------------------------------------- + _Py_CPUID_DECL_FEAT(avx2); + + // --- Advanced Vector Extensions (512-bit) ------------------------------- /* + * * AVX-512 instruction set are grouped by the processor generation * that implements them (see https://en.wikipedia.org/wiki/AVX-512). * * We do not include GFNI, VPCLMULQDQ and VAES instructions since * they are not exactly AVX-512 per se, nor do we include BF16 or * FP16 since they operate on bfloat16 and binary16 (half-float). + * + * See https://en.wikipedia.org/wiki/AVX-512#Instruction_set for + * the meaning of each suffix (e.g., 'f' stands for 'Foundation'). */ - _Py_SIMD_FEAT avx512_f: 1; - _Py_SIMD_FEAT avx512_cd: 1; - - _Py_SIMD_FEAT avx512_er: 1; - _Py_SIMD_FEAT avx512_pf: 1; + _Py_CPUID_DECL_FEAT(avx512_f); + _Py_CPUID_DECL_FEAT(avx512_cd); - _Py_SIMD_FEAT avx512_4fmaps: 1; - _Py_SIMD_FEAT avx512_4vnniw: 1; + _Py_CPUID_DECL_FEAT(avx512_er); + _Py_CPUID_DECL_FEAT(avx512_pf); - _Py_SIMD_FEAT avx512_vpopcntdq: 1; + _Py_CPUID_DECL_FEAT(avx512_4fmaps); + _Py_CPUID_DECL_FEAT(avx512_4vnniw); - _Py_SIMD_FEAT avx512_vl: 1; - _Py_SIMD_FEAT avx512_dq: 1; - _Py_SIMD_FEAT avx512_bw: 1; + _Py_CPUID_DECL_FEAT(avx512_vpopcntdq); - _Py_SIMD_FEAT avx512_ifma: 1; + _Py_CPUID_DECL_FEAT(avx512_vl); + _Py_CPUID_DECL_FEAT(avx512_dq); + _Py_CPUID_DECL_FEAT(avx512_bw); - _Py_SIMD_FEAT avx512_vbmi: 1; + _Py_CPUID_DECL_FEAT(avx512_ifma); + _Py_CPUID_DECL_FEAT(avx512_vbmi); - _Py_SIMD_FEAT avx512_vnni: 1; + _Py_CPUID_DECL_FEAT(avx512_vnni); - _Py_SIMD_FEAT avx512_vbmi2: 1; - _Py_SIMD_FEAT avx512_bitalg: 1; + _Py_CPUID_DECL_FEAT(avx512_vbmi2); + _Py_CPUID_DECL_FEAT(avx512_bitalg); - _Py_SIMD_FEAT avx512_vp2intersect: 1; + _Py_CPUID_DECL_FEAT(avx512_vp2intersect); - _Py_SIMD_FEAT os_xsave: 1; // XSAVE is supported + // --- Instructions ------------------------------------------------------- + _Py_CPUID_DECL_FEAT(cmov); + _Py_CPUID_DECL_FEAT(fma); + _Py_CPUID_DECL_FEAT(popcnt); + _Py_CPUID_DECL_FEAT(pclmulqdq); - /* XCR0 register bits */ - _Py_SIMD_XCR0_BIT xcr0_sse: 1; + _Py_CPUID_DECL_FEAT(xsave); // XSAVE/XRSTOR/XSETBV/XGETBV + _Py_CPUID_DECL_FEAT(os_xsave); // XSAVE is enabled by the OS + // --- XCR0 register bits ------------------------------------------------- + _Py_CPUID_DECL_XCR0(xcr0_sse); // On some Intel CPUs, it is possible for the CPU to support AVX2 // instructions even though the underlying OS does not know about // AVX. In particular, only (SSE) XMM registers will be saved and // restored on context-switch, but not (AVX) YMM registers. - _Py_SIMD_XCR0_BIT xcr0_avx: 1; - _Py_SIMD_XCR0_BIT xcr0_avx512_opmask: 1; - _Py_SIMD_XCR0_BIT xcr0_avx512_zmm_hi256: 1; - _Py_SIMD_XCR0_BIT xcr0_avx512_hi16_zmm: 1; - - // We want the structure to be aligned correctly, namely - // its size in bits must be a multiple of 8. - // + _Py_CPUID_DECL_XCR0(xcr0_avx); + _Py_CPUID_DECL_XCR0(xcr0_avx512_opmask); + _Py_CPUID_DECL_XCR0(xcr0_avx512_zmm_hi256); + _Py_CPUID_DECL_XCR0(xcr0_avx512_hi16_zmm); + // Whenever a field is added or removed above, update the - // number of fields (35) and adjust the bitsize of 'done'. - uint8_t done: 5; // set if the structure was filled -} py_simd_features; + // number of fields (40) and adjust the bitsize of 'ready' + // so that the size of this structure is a multiple of 8. + uint8_t ready; // set if the structure is ready for usage +} py_cpuid_features; /* * Explicitly initialize all members to zero to guarantee that * we never have an un-initialized attribute at runtime which * could lead to an illegal instruction error. + * + * This does not mark 'flags' as being ready yet. */ extern void -_Py_disable_simd_features(py_simd_features *flags); +_Py_cpuid_disable_features(py_cpuid_features *flags); /* - * Apply a bitwise-OR on all flags in 'out' using those in 'src', - * unconditionally updating 'out' (i.e. 'out->done' is ignored). + * Check whether the structure is ready and flags are inter-compatible, + * returning 1 on success and 0 otherwise. * - * This also sets 'out->done' to 1 at the end. + * The caller should disable all CPUID detected features if the check + * fails to avoid encountering runtime illegal instruction errors. + */ +extern int +_Py_cpuid_check_features(const py_cpuid_features *flags); + +/* + * Return 1 if all expected flags are set in 'actual', 0 otherwise. * - * Note that the caller is responsible to ensure that the flags set to 1 - * must not lead to illegal instruction errors if the corresponding SIMD - * instruction(s) are used. + * If 'actual' or 'expect' are not ready yet, this also returns 0. */ -extern void -_Py_update_simd_features(py_simd_features *out, const py_simd_features *src); +extern int +_Py_cpuid_has_features(const py_cpuid_features *actual, + const py_cpuid_features *expect); + + +/* + * Return 1 if 'actual' and 'expect' are identical, 0 otherwise. + * + * If 'actual' or 'expect' are not ready yet, this also returns 0. + */ +extern int +_Py_cpuid_match_features(const py_cpuid_features *actual, + const py_cpuid_features *expect); -/* Detect the available SIMD features on this machine. */ +/* Detect the available features on this machine. */ extern void -_Py_detect_simd_features(py_simd_features *flags); +_Py_cpuid_detect_features(py_cpuid_features *flags); #ifdef __cplusplus } diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 2c309149fc8102..dddacc3d0286ef 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -9,23 +9,33 @@ #include // UINT32_C() -/* Macro to mark a CPUID register function parameter as being used. */ -#define CPUID_REG(PARAM) PARAM -/* Macro to check one or more CPUID register bits. */ +/* CPUID input and output registers are 32-bit unsigned integers */ +#define CPUID_REG uint32_t +/* Check one or more CPUID register bits. */ #define CPUID_CHECK_REG(REG, MASK) ((((REG) & (MASK)) == (MASK)) ? 0 : 1) -// For simplicity, we only enable SIMD instructions for Intel CPUs, -// even though we could support ARM NEON and POWER. +// For now, we only try to enable SIMD instructions for x86-64 Intel CPUs. +// In the future, we should carefully enable support for ARM NEON and POWER +// as well as AMD. #if defined(__x86_64__) && defined(__GNUC__) -# include // __cpuid_count() +# include // __cpuid_count() +# define HAS_CPUID_SUPPORT +# define HAS_XGETBV_SUPPORT #elif defined(_M_X64) -# include // _xgetbv() -# include // __cpuidex() +# include // _xgetbv() +# define HAS_XGETBV_SUPPORT +# include // __cpuidex() +# define HAS_CPUID_SUPPORT #else -# undef CPUID_REG -# define CPUID_REG(PARAM) Py_UNUSED(PARAM) +# undef HAS_CPUID_SUPPORT +# undef HAS_XGETBV_SUPPORT #endif +// Below, we declare macros for guarding the detection of SSE, AVX/AVX2 +// and AVX-512 instructions. If the compiler does not even recognize the +// corresponding flags or if we are not on an 64-bit platform we do not +// even try to inspect the output of CPUID for those specific features. +#ifdef HAS_CPUID_SUPPORT #if defined(CAN_COMPILE_SIMD_SSE_INSTRUCTIONS) \ || defined(CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS) \ || defined(CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS) \ @@ -33,7 +43,6 @@ || defined(CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS) \ || defined(CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS) \ // macros above should be sorted in alphabetical order -/* Used to guard any SSE instructions detection code. */ # define SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD #endif @@ -44,13 +53,11 @@ || defined(CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS) \ || defined(CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS) \ // macros above should be sorted in alphabetical order -/* Used to guard any AVX instructions detection code. */ # define SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD #endif #if defined(CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS) \ // macros above should be sorted in alphabetical order -/* Used to guard any AVX-2 instructions detection code. */ # define SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD #endif @@ -71,44 +78,46 @@ || defined(CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS) \ || defined(CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS) \ // macros above should be sorted in alphabetical order -/* Used to guard any AVX-512 instructions detection code. */ # define SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD #endif +#endif // HAS_CPUID_SUPPORT // On macOS, checking the XCR0 register is NOT a guaranteed way // to ensure the usability of AVX-512. As such, we disable the // entire set of AVX-512 instructions. // // See https://stackoverflow.com/a/72523150/9579194. -// -// Additionally, AVX2 cannot be compiled on macOS ARM64 (yet it can be -// compiled on x86_64). However, since autoconf incorrectly assumes so -// when compiling a universal2 binary, we disable AVX on such builds. #if defined(__APPLE__) # undef SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD -# if defined(__arm64__) + // Additionally, AVX2 cannot be compiled on macOS ARM64 (yet it can be + // compiled on x86_64). However, since autoconf incorrectly assumes so + // when compiling a universal2 binary, we disable SIMD on such builds. +# if defined(__aarch64__) || defined(__arm64__) # undef SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD # undef SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD # endif #endif +// Below, we declare macros indicating how CPUID can be called at runtime, +// so that we only call CPUID with specific inputs when needed. + #if defined(SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD) \ || defined(SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD) /* Indicate that cpuid should be called once with EAX=1 and ECX=0. */ -# define SHOULD_DETECT_SIMD_FEATURES_L1 +# define SHOULD_PARSE_CPUID_L1 #endif #if defined(SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD) \ || defined(SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD) /* Indicate that cpuid should be called once with EAX=7 and ECX=0. */ -# define SHOULD_DETECT_SIMD_FEATURES_L7 -# define SHOULD_DETECT_SIMD_FEATURES_L7S0 +# define SHOULD_PARSE_CPUID_L7 +# define SHOULD_PARSE_CPUID_L7S0 #endif #if defined(SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD) /* Indicate that cpuid should be called once with EAX=7 and ECX=1. */ -# define SHOULD_DETECT_SIMD_FEATURES_L7 -# define SHOULD_DETECT_SIMD_FEATURES_L7S1 +# define SHOULD_PARSE_CPUID_L7 +# define SHOULD_PARSE_CPUID_L7S1 #endif /* @@ -129,84 +138,89 @@ * Note 2: The SUBLEAF is also referred to as the 'count'. */ -/* CPUID (LEAF=1, SUBLEAF=0) */ -#define ECX_L1_SSE3 (UINT32_C(1) << 0) -#define ECX_L1_SSSE3 (UINT32_C(1) << 9) -#define ECX_L1_SSE4_1 (UINT32_C(1) << 19) -#define ECX_L1_SSE4_2 (UINT32_C(1) << 20) -#define ECX_L1_OSXSAVE (UINT32_C(1) << 27) -#define ECX_L1_AVX (UINT32_C(1) << 28) - -#define EDX_L1_SSE (UINT32_C(1) << 25) -#define EDX_L1_SSE2 (UINT32_C(1) << 26) - -/* CPUID (LEAF=7, SUBLEAF=0) */ -#define EBX_L7_AVX2 (UINT32_C(1) << 5) -#define EBX_L7_AVX512_F (UINT32_C(1) << 16) -#define EBX_L7_AVX512_DQ (UINT32_C(1) << 17) -#define EBX_L7_AVX512_IFMA (UINT32_C(1) << 21) -#define EBX_L7_AVX512_PF (UINT32_C(1) << 26) -#define EBX_L7_AVX512_ER (UINT32_C(1) << 27) -#define EBX_L7_AVX512_CD (UINT32_C(1) << 28) -#define EBX_L7_AVX512_BW (UINT32_C(1) << 30) -#define EBX_L7_AVX512_VL (UINT32_C(1) << 31) - -#define ECX_L7_AVX512_VBMI (UINT32_C(1) << 1) -#define ECX_L7_AVX512_VBMI2 (UINT32_C(1) << 6) -#define ECX_L7_AVX512_VNNI (UINT32_C(1) << 11) -#define ECX_L7_AVX512_BITALG (UINT32_C(1) << 12) -#define ECX_L7_AVX512_VPOPCNTDQ (UINT32_C(1) << 14) - -#define EDX_L7_AVX512_4VNNIW (UINT32_C(1) << 2) -#define EDX_L7_AVX512_4FMAPS (UINT32_C(1) << 3) -#define EDX_L7_AVX512_VP2INTERSECT (UINT32_C(1) << 8) - -/* CPUID (LEAF=7, SUBLEAF=1) */ -#define EAX_L7S1_AVX_VNNI (UINT32_C(1) << 4) -#define EAX_L7S1_AVX_IFMA (UINT32_C(1) << 23) - -#define EDX_L7S1_AVX_VNNI_INT8 (UINT32_C(1) << 4) -#define EDX_L7S1_AVX_NE_CONVERT (UINT32_C(1) << 5) -#define EDX_L7S1_AVX_VNNI_INT16 (UINT32_C(1) << 10) +/* CPUID (LEAF=1, SUBLEAF=0) [ECX] */ +#define ECX_L1_SSE3 (UINT32_C(1) << 0) // 0x00000001 +#define ECX_L1_PCLMULQDQ (UINT32_C(1) << 1) // 0x00000002 +#define ECX_L1_SSSE3 (UINT32_C(1) << 9) // 0x00000200 +#define ECX_L1_FMA (UINT32_C(1) << 12) // 0x00001000 +#define ECX_L1_SSE4_1 (UINT32_C(1) << 19) // 0x00080000 +#define ECX_L1_SSE4_2 (UINT32_C(1) << 20) // 0x00100000 +#define ECX_L1_POPCNT (UINT32_C(1) << 23) // 0x00800000 +#define ECX_L1_XSAVE (UINT32_C(1) << 26) // 0x04000000 +#define ECX_L1_OSXSAVE (UINT32_C(1) << 27) // 0x08000000 +#define ECX_L1_AVX (UINT32_C(1) << 28) // 0x10000000 +/* CPUID (LEAF=1, SUBLEAF=0) [EDX] */ +#define EDX_L1_CMOV (UINT32_C(1) << 15) // 0x00008000 +#define EDX_L1_SSE (UINT32_C(1) << 25) // 0x02000000 +#define EDX_L1_SSE2 (UINT32_C(1) << 26) // 0x04000000 +/* CPUID (LEAF=7, SUBLEAF=0) [EBX] */ +#define EBX_L7_AVX2 (UINT32_C(1) << 5) // 0x00000020 +#define EBX_L7_AVX512_F (UINT32_C(1) << 16) // 0x00010000 +#define EBX_L7_AVX512_DQ (UINT32_C(1) << 17) // 0x00020000 +#define EBX_L7_AVX512_IFMA (UINT32_C(1) << 21) // 0x00200000 +#define EBX_L7_AVX512_PF (UINT32_C(1) << 26) // 0x04000000 +#define EBX_L7_AVX512_ER (UINT32_C(1) << 27) // 0x08000000 +#define EBX_L7_AVX512_CD (UINT32_C(1) << 28) // 0x10000000 +#define EBX_L7_AVX512_BW (UINT32_C(1) << 30) // 0x40000000 +#define EBX_L7_AVX512_VL (UINT32_C(1) << 31) // 0x80000000 +/* CPUID (LEAF=7, SUBLEAF=0) [ECX] */ +#define ECX_L7_AVX512_VBMI (UINT32_C(1) << 1) // 0x00000002 +#define ECX_L7_AVX512_VBMI2 (UINT32_C(1) << 6) // 0x00000040 +#define ECX_L7_AVX512_VNNI (UINT32_C(1) << 11) // 0x00000800 +#define ECX_L7_AVX512_BITALG (UINT32_C(1) << 12) // 0x00001000 +#define ECX_L7_AVX512_VPOPCNTDQ (UINT32_C(1) << 14) // 0x00004000 +/* CPUID (LEAF=7, SUBLEAF=0) [EDX] */ +#define EDX_L7_AVX512_4VNNIW (UINT32_C(1) << 2) // 0x00000004 +#define EDX_L7_AVX512_4FMAPS (UINT32_C(1) << 3) // 0x00000008 +#define EDX_L7_AVX512_VP2INTERSECT (UINT32_C(1) << 8) // 0x00000100 +/* CPUID (LEAF=7, SUBLEAF=1) [EAX] */ +#define EAX_L7S1_AVX_VNNI (UINT32_C(1) << 4) // 0x00000010 +#define EAX_L7S1_AVX_IFMA (UINT32_C(1) << 23) // 0x00800000 +/* CPUID (LEAF=7, SUBLEAF=1) [EDX] */ +#define EDX_L7S1_AVX_VNNI_INT8 (UINT32_C(1) << 4) // 0x00000010 +#define EDX_L7S1_AVX_NE_CONVERT (UINT32_C(1) << 5) // 0x00000020 +#define EDX_L7S1_AVX_VNNI_INT16 (UINT32_C(1) << 10) // 0x00000400 +/* + * Call __cpuid_count() or equivalent and get + * its EAX, EBX, ECX and EDX output registers. + * + * If CPUID is not supported, registers are set to 0. + */ static inline void get_cpuid_info(uint32_t level /* input eax */, uint32_t count /* input ecx */, - uint32_t *CPUID_REG(eax), - uint32_t *CPUID_REG(ebx), - uint32_t *CPUID_REG(ecx), - uint32_t *CPUID_REG(edx)) + CPUID_REG *eax, CPUID_REG *ebx, CPUID_REG *ecx, CPUID_REG *edx) { -#if defined(__x86_64__) && defined(__GNUC__) + *eax = *ebx = *ecx = *edx = 0; // ensure the output to be initialized +#if defined(HAS_CPUID_SUPPORT) && defined(__x86_64__) && defined(__GNUC__) __cpuid_count(level, count, *eax, *ebx, *ecx, *edx); -#elif defined(_M_X64) +#elif defined(HAS_CPUID_SUPPORT) && defined(_M_X64) uint32_t info[4] = {0}; __cpuidex(info, level, count); - *eax = info[0]; - *ebx = info[1]; - *ecx = info[2]; - *edx = info[3]; + *eax = info[0], *ebx = info[1], *ecx = info[2], *edx = info[3]; #endif } -/* XSAVE State Components. */ -#define XCR0_SSE (UINT32_C(1) << 1) -#define XCR0_AVX (UINT32_C(1) << 2) -#define XCR0_AVX512_OPMASK (UINT32_C(1) << 5) -#define XCR0_AVX512_ZMM_HI256 (UINT32_C(1) << 6) -#define XCR0_AVX512_HI16_ZMM (UINT32_C(1) << 7) +/* XSAVE state components (XCR0 control register) */ +#define XCR0_SSE (UINT32_C(1) << 1) // 0x00000002 +#define XCR0_AVX (UINT32_C(1) << 2) // 0x00000004 +#define XCR0_AVX512_OPMASK (UINT32_C(1) << 5) // 0x00000020 +#define XCR0_AVX512_ZMM_HI256 (UINT32_C(1) << 6) // 0x00000040 +#define XCR0_AVX512_HI16_ZMM (UINT32_C(1) << 7) // 0x00000080 static inline uint64_t get_xgetbv(uint32_t index) { -#if defined(__x86_64__) && defined(__GNUC__) + assert(index == 0); // only XCR0 is supported for now +#if defined(HAS_CPUID_SUPPORT) && defined(__x86_64__) && defined(__GNUC__) uint32_t eax = 0, edx = 0; __asm__ __volatile__("xgetbv" : "=a" (eax), "=d" (edx) : "c" (index)); return ((uint64_t)edx << 32) | eax; -#elif defined(_M_X64) +#elif defined(HAS_CPUID_SUPPORT) && defined(_M_X64) return (uint64_t)_xgetbv(index); #else - (void) index; + (void)index; return 0; #endif } @@ -215,16 +229,14 @@ get_xgetbv(uint32_t index) static inline uint32_t detect_cpuid_maxleaf(void) { - uint32_t maxlevel = 0, ebx = 0, ecx = 0, edx = 0; + CPUID_REG maxlevel = 0, ebx = 0, ecx = 0, edx = 0; get_cpuid_info(0, 0, &maxlevel, &ebx, &ecx, &edx); return maxlevel; } /* Processor Info and Feature Bits (LEAF=1, SUBLEAF=0). */ static inline void -detect_simd_features(py_simd_features *flags, - uint32_t eax, uint32_t ebx, - uint32_t ecx, uint32_t edx) +detect_cpuid_features(py_cpuid_features *flags, CPUID_REG ecx, CPUID_REG edx) { // Keep the ordering and newlines as they are declared in the structure. #ifdef SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD @@ -254,21 +266,29 @@ detect_simd_features(py_simd_features *flags, #endif #endif // SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD +#ifdef HAS_CPUID_SUPPORT + flags->cmov = CPUID_CHECK_REG(edx, EDX_L1_CMOV); + flags->fma = CPUID_CHECK_REG(ecx, ECX_L1_FMA); + flags->popcnt = CPUID_CHECK_REG(ecx, ECX_L1_POPCNT); + flags->pclmulqdq = CPUID_CHECK_REG(ecx, ECX_L1_PCLMULQDQ); + + flags->xsave = CPUID_CHECK_REG(ecx, ECX_L1_XSAVE); flags->os_xsave = CPUID_CHECK_REG(ecx, ECX_L1_OSXSAVE); +#endif } /* Extended Feature Bits (LEAF=7, SUBLEAF=0). */ static inline void -detect_simd_extended_features_ecx_0(py_simd_features *flags, - uint8_t eax, uint8_t ebx, - uint8_t ecx, uint8_t edx) +detect_cpuid_extended_features_L7S0(py_cpuid_features *flags, + CPUID_REG ebx, CPUID_REG ecx, CPUID_REG edx) { + (void)ebx, (void)ecx, (void)edx; // to suppress unused warnings // Keep the ordering and newlines as they are declared in the structure. #ifdef SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD #ifdef CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS flags->avx2 = CPUID_CHECK_REG(ebx, EBX_L7_AVX2); #endif -#endif +#endif // SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD #ifdef SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD #ifdef CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS @@ -309,7 +329,6 @@ detect_simd_extended_features_ecx_0(py_simd_features *flags, #ifdef CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS flags->avx512_ifma = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_IFMA); #endif - #ifdef CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS flags->avx512_vbmi = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_VBMI); #endif @@ -333,10 +352,13 @@ detect_simd_extended_features_ecx_0(py_simd_features *flags, /* Extended Feature Bits (LEAF=7, SUBLEAF=1). */ static inline void -detect_simd_extended_features_ecx_1(py_simd_features *flags, - uint8_t eax, uint8_t ebx, - uint8_t ecx, uint8_t edx) +detect_cpuid_extended_features_L7S1(py_cpuid_features *flags, + CPUID_REG eax, + CPUID_REG ebx, + CPUID_REG ecx, + CPUID_REG edx) { + (void)eax, (void)ebx, (void)ecx, (void)edx; // to suppress unused warnings // Keep the ordering and newlines as they are declared in the structure. #ifdef SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD #ifdef CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS @@ -360,51 +382,51 @@ detect_simd_extended_features_ecx_1(py_simd_features *flags, } static inline void -detect_simd_xsave_state(py_simd_features *flags) +detect_cpuid_xsave_state(py_cpuid_features *flags) { + // Keep the ordering and newlines as they are declared in the structure. +#ifdef HAS_XGETBV_SUPPORT uint64_t xcr0 = flags->os_xsave ? get_xgetbv(0) : 0; flags->xcr0_sse = CPUID_CHECK_REG(xcr0, XCR0_SSE); - flags->xcr0_avx = CPUID_CHECK_REG(xcr0, XCR0_AVX); - flags->xcr0_avx512_opmask = CPUID_CHECK_REG(xcr0, XCR0_AVX512_OPMASK); flags->xcr0_avx512_zmm_hi256 = CPUID_CHECK_REG(xcr0, XCR0_AVX512_ZMM_HI256); flags->xcr0_avx512_hi16_zmm = CPUID_CHECK_REG(xcr0, XCR0_AVX512_HI16_ZMM); +#endif } static inline void -finalize_simd_features(py_simd_features *flags) +cpuid_features_finalize(py_cpuid_features *flags) { - assert(flags->done == 0); + assert(flags->ready == 0); + // Here, any flag that may depend on others should be correctly set // at runtime to avoid illegal instruction errors. - flags->done = 1; + + flags->ready = 1; } -/* - * Return 0 if flags are compatible and correctly set and -1 otherwise. - * - * If this function returns -1, 'flags' should disable all SIMD features - * to avoid encountering a possible illegal instruction error at runtime. - */ static inline int -validate_simd_features(const py_simd_features *flags) +cpuid_features_validate(const py_cpuid_features *flags) { - if (flags->done != 1) { + if (flags->ready != 1) { return -1; } // AVX-512/F is required to support any other AVX-512 instruction set uint8_t avx512_require_f = ( - flags->avx512_cd || flags->avx512_er || flags->avx512_pf || - flags->avx512_vl || flags->avx512_dq || flags->avx512_bw || - flags->avx512_ifma || - flags->avx512_vbmi || + // newlines are placed according to processor generations + flags->avx512_cd || + flags->avx512_er || flags->avx512_pf || flags->avx512_4fmaps || flags->avx512_4vnniw || flags->avx512_vpopcntdq || - flags->avx512_vnni || flags->avx512_vbmi2 || flags->avx512_bitalg || + flags->avx512_vl || flags->avx512_dq || flags->avx512_bw || + flags->avx512_ifma || flags->avx512_vbmi || + flags->avx512_vnni || + flags->avx512_vbmi2 || flags->avx512_bitalg || flags->avx512_vp2intersect ); + if (!flags->avx512_f && !avx512_require_f) { return -1; } @@ -412,165 +434,158 @@ validate_simd_features(const py_simd_features *flags) return 0; } -void -_Py_disable_simd_features(py_simd_features *flags) +int +_Py_cpuid_check_features(const py_cpuid_features *flags) { - // Keep the ordering and newlines as they are declared in the structure. -#define ZERO(FLAG) flags->FLAG = 0 - ZERO(sse); - ZERO(sse2); - ZERO(sse3); - ZERO(ssse3); - ZERO(sse41); - ZERO(sse42); - - ZERO(avx); - ZERO(avx_ifma); - ZERO(avx_ne_convert); - - ZERO(avx_vnni); - ZERO(avx_vnni_int8); - ZERO(avx_vnni_int16); - - ZERO(avx2); - - ZERO(avx512_f); - ZERO(avx512_cd); - - ZERO(avx512_er); - ZERO(avx512_pf); - - ZERO(avx512_4fmaps); - ZERO(avx512_4vnniw); - - ZERO(avx512_vpopcntdq); - - ZERO(avx512_vl); - ZERO(avx512_dq); - ZERO(avx512_bw); - - ZERO(avx512_ifma); - - ZERO(avx512_vbmi); - - ZERO(avx512_vnni); - - ZERO(avx512_vbmi2); - ZERO(avx512_bitalg); - - ZERO(avx512_vp2intersect); - - ZERO(os_xsave); - - ZERO(xcr0_sse); - ZERO(xcr0_avx); - ZERO(xcr0_avx512_opmask); - ZERO(xcr0_avx512_zmm_hi256); - ZERO(xcr0_avx512_hi16_zmm); -#undef ZERO + return cpuid_features_validate(flags) < 0 ? 0 : 1; } +/* + * Apply a 1-parameter macro MACRO(FLAG) on all members + * of a 'py_cpuid_features' object ('ready' is omitted). + */ +#define CPUID_APPLY_MACRO(MACRO) \ + do { \ + MACRO(sse); \ + MACRO(sse2); \ + MACRO(sse3); \ + MACRO(ssse3); \ + MACRO(sse41); \ + MACRO(sse42); \ + \ + MACRO(avx); \ + MACRO(avx_ifma); \ + MACRO(avx_ne_convert); \ + \ + MACRO(avx_vnni); \ + MACRO(avx_vnni_int8); \ + MACRO(avx_vnni_int16); \ + \ + MACRO(avx2); \ + \ + MACRO(avx512_f); \ + MACRO(avx512_cd); \ + \ + MACRO(avx512_er); \ + MACRO(avx512_pf); \ + \ + MACRO(avx512_4fmaps); \ + MACRO(avx512_4vnniw); \ + \ + MACRO(avx512_vpopcntdq); \ + \ + MACRO(avx512_vl); \ + MACRO(avx512_dq); \ + MACRO(avx512_bw); \ + \ + MACRO(avx512_ifma); \ + MACRO(avx512_vbmi); \ + \ + MACRO(avx512_vnni); \ + \ + MACRO(avx512_vbmi2); \ + MACRO(avx512_bitalg); \ + \ + MACRO(avx512_vp2intersect); \ + \ + MACRO(cmov); \ + MACRO(fma); \ + MACRO(popcnt); \ + MACRO(pclmulqdq); \ + \ + MACRO(xsave); \ + MACRO(os_xsave); \ + \ + MACRO(xcr0_sse); \ + MACRO(xcr0_avx); \ + MACRO(xcr0_avx512_opmask); \ + MACRO(xcr0_avx512_zmm_hi256); \ + MACRO(xcr0_avx512_hi16_zmm); \ + } while (0) + void -_Py_update_simd_features(py_simd_features *out, - const py_simd_features *src) +_Py_cpuid_disable_features(py_cpuid_features *flags) { - // Keep the ordering and newlines as they are declared in the structure. -#define UPDATE(FLAG) out->FLAG |= src->FLAG - UPDATE(sse); - UPDATE(sse2); - UPDATE(sse3); - UPDATE(ssse3); - UPDATE(sse41); - UPDATE(sse42); - - UPDATE(avx); - UPDATE(avx_ifma); - UPDATE(avx_ne_convert); - - UPDATE(avx_vnni); - UPDATE(avx_vnni_int8); - UPDATE(avx_vnni_int16); - - UPDATE(avx2); - - UPDATE(avx512_f); - UPDATE(avx512_cd); - - UPDATE(avx512_er); - UPDATE(avx512_pf); - - UPDATE(avx512_4fmaps); - UPDATE(avx512_4vnniw); - - UPDATE(avx512_vpopcntdq); - - UPDATE(avx512_vl); - UPDATE(avx512_dq); - UPDATE(avx512_bw); - - UPDATE(avx512_ifma); - - UPDATE(avx512_vbmi); - - UPDATE(avx512_vnni); - - UPDATE(avx512_vbmi2); - UPDATE(avx512_bitalg); - - UPDATE(avx512_vp2intersect); +#define CPUID_DISABLE(FLAG) flags->FLAG = 0 + CPUID_APPLY_MACRO(CPUID_DISABLE); +#undef CPUID_DISABLE +} - UPDATE(os_xsave); +int +_Py_cpuid_has_features(const py_cpuid_features *actual, + const py_cpuid_features *expect) +{ +#define CPUID_CHECK_FEATURE(FLAG) \ + do { \ + if (expect->FLAG && !actual->FLAG) { \ + return 0; \ + } \ + } while (0) + CPUID_APPLY_MACRO(CPUID_CHECK_FEATURE); +#undef CPUID_CHECK_FEATURE + return 1; +} - UPDATE(xcr0_sse); - UPDATE(xcr0_avx); - UPDATE(xcr0_avx512_opmask); - UPDATE(xcr0_avx512_zmm_hi256); - UPDATE(xcr0_avx512_hi16_zmm); -#undef UPDATE - out->done = 1; +int +_Py_cpuid_match_features(const py_cpuid_features *actual, + const py_cpuid_features *expect) +{ +#define CPUID_MATCH_FEATURE(FLAG) \ + do { \ + if (expect->FLAG != actual->FLAG) { \ + return 0; \ + } \ + } while (0) + CPUID_APPLY_MACRO(CPUID_MATCH_FEATURE); +#undef CPUID_MATCH_FEATURE + return 1; } +#undef CPUID_APPLY_MACRO + void -_Py_detect_simd_features(py_simd_features *flags) +_Py_cpuid_detect_features(py_cpuid_features *flags) { - if (flags->done) { + if (flags->ready) { return; } - _Py_disable_simd_features(flags); + _Py_cpuid_disable_features(flags); +#ifdef HAS_CPUID_SUPPORT uint32_t maxleaf = detect_cpuid_maxleaf(); - uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; -#ifdef SHOULD_DETECT_SIMD_FEATURES_L1 + (void)maxleaf; // to suppress unused warnings + CPUID_REG eax = 0, ebx = 0, ecx = 0, edx = 0; + (void)eax, (void)ebx, (void)ecx, (void)edx; // to suppress unused warnings + +#ifdef SHOULD_PARSE_CPUID_L1 if (maxleaf >= 1) { eax = 0, ebx = 0, ecx = 0, edx = 0; get_cpuid_info(1, 0, &eax, &ebx, &ecx, &edx); - detect_simd_features(flags, eax, ebx, ecx, edx); + detect_cpuid_features(flags, ecx, edx); if (flags->os_xsave) { - detect_simd_xsave_state(flags); + detect_cpuid_xsave_state(flags); } } -#else - (void) maxleaf; - (void) eax; (void) ebx; (void) ecx; (void) edx; -#endif // SHOULD_DETECT_SIMD_FEATURES_L1 -#ifdef SHOULD_DETECT_SIMD_FEATURES_L7 +#endif // SHOULD_PARSE_CPUID_L1 + +#ifdef SHOULD_PARSE_CPUID_L7 if (maxleaf >= 7) { -#ifdef SHOULD_DETECT_SIMD_FEATURES_L7S0 +#ifdef SHOULD_PARSE_CPUID_L7S0 eax = 0, ebx = 0, ecx = 0, edx = 0; get_cpuid_info(7, 0, &eax, &ebx, &ecx, &edx); - detect_simd_extended_features_ecx_0(flags, eax, ebx, ecx, edx); + detect_cpuid_extended_features_L7S0(flags, ebx, ecx, edx); #endif -#ifdef SHOULD_DETECT_SIMD_FEATURES_L7S1 +#ifdef SHOULD_PARSE_CPUID_L7S1 eax = 0, ebx = 0, ecx = 0, edx = 0; get_cpuid_info(7, 1, &eax, &ebx, &ecx, &edx); - detect_simd_extended_features_ecx_1(flags, eax, ebx, ecx, edx); + detect_cpuid_extended_features_L7S1(flags, eax, ebx, ecx, edx); #endif } -#else - (void) maxleaf; - (void) eax; (void) ebx; (void) ecx; (void) edx; -#endif // SHOULD_DETECT_SIMD_FEATURES_L7 - finalize_simd_features(flags); - if (validate_simd_features(flags) < 0) { - _Py_disable_simd_features(flags); +#endif // SHOULD_PARSE_CPUID_L7 + cpuid_features_finalize(flags); + if (cpuid_features_validate(flags) < 0) { + _Py_cpuid_disable_features(flags); } +#else + flags->ready = 1; +#endif // HAS_CPUID_SUPPORT } diff --git a/configure.ac b/configure.ac index 74a8e785c229bf..84a39e0d402804 100644 --- a/configure.ac +++ b/configure.ac @@ -7846,7 +7846,6 @@ if test "$ac_sys_system" != "Linux-android" || test "$ANDROID_API_LEVEL" -ge 28; PY_SIMD_DETECT([AVX512_BW], [-mavx512bw]) # PY_SIMD_DETECT([AVX512_IFMA], [-mavx512ifma]) - # PY_SIMD_DETECT([AVX512_VBMI], [-mavx512vbmi]) # PY_SIMD_DETECT([AVX512_VNNI], [-mavx512vnni]) From 97a0fc542cbedd813f84c6f65bcddad14be0a8b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 11 Oct 2024 12:10:25 +0200 Subject: [PATCH 20/78] update configure --- configure | 1 - 1 file changed, 1 deletion(-) diff --git a/configure b/configure index 2bcec7f82ce042..08940431c680f1 100755 --- a/configure +++ b/configure @@ -31788,7 +31788,6 @@ fi - # From 5f2884d38e43e04a7c044df6bd185c73d6d5af98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 13 Oct 2024 10:29:57 +0200 Subject: [PATCH 21/78] update comments --- Include/internal/pycore_cpuinfo.h | 90 +++++++++++++++---------------- Python/cpuinfo.c | 6 +++ 2 files changed, 49 insertions(+), 47 deletions(-) diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index fe934fa13a70b1..779601f947111f 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -11,35 +11,31 @@ extern "C" { #include // uint8_t -/* Declare a member of 'py_cpuid_features' storing a CPUID bit. */ -#define _Py_CPUID_DECL_FEAT(X) uint8_t X:1 -/* Declare a member of 'py_cpuid_features' storing a XCR0 bit. */ -#define _Py_CPUID_DECL_XCR0(X) uint8_t X:1 - typedef struct py_cpuid_features { + /* Macro to declare a member flag of 'py_cpuid_features' as a uint8_t. */ +#define _Py_CPUID_DECL_FLAG(MEMBER_NAME) uint8_t MEMBER_NAME:1 // --- Streaming SIMD Extensions ------------------------------------------ - _Py_CPUID_DECL_FEAT(sse); - _Py_CPUID_DECL_FEAT(sse2); - _Py_CPUID_DECL_FEAT(sse3); - _Py_CPUID_DECL_FEAT(ssse3); // Supplemental SSE3 instructions - _Py_CPUID_DECL_FEAT(sse41); // SSE4.1 - _Py_CPUID_DECL_FEAT(sse42); // SSE4.2 + _Py_CPUID_DECL_FLAG(sse); + _Py_CPUID_DECL_FLAG(sse2); + _Py_CPUID_DECL_FLAG(sse3); + _Py_CPUID_DECL_FLAG(ssse3); // Supplemental SSE3 instructions + _Py_CPUID_DECL_FLAG(sse41); // SSE4.1 + _Py_CPUID_DECL_FLAG(sse42); // SSE4.2 // --- Advanced Vector Extensions ----------------------------------------- - _Py_CPUID_DECL_FEAT(avx); - _Py_CPUID_DECL_FEAT(avx_ifma); - _Py_CPUID_DECL_FEAT(avx_ne_convert); + _Py_CPUID_DECL_FLAG(avx); + _Py_CPUID_DECL_FLAG(avx_ifma); + _Py_CPUID_DECL_FLAG(avx_ne_convert); - _Py_CPUID_DECL_FEAT(avx_vnni); - _Py_CPUID_DECL_FEAT(avx_vnni_int8); - _Py_CPUID_DECL_FEAT(avx_vnni_int16); + _Py_CPUID_DECL_FLAG(avx_vnni); + _Py_CPUID_DECL_FLAG(avx_vnni_int8); + _Py_CPUID_DECL_FLAG(avx_vnni_int16); // --- Advanced Vector Extensions 2 --------------------------------------- - _Py_CPUID_DECL_FEAT(avx2); + _Py_CPUID_DECL_FLAG(avx2); // --- Advanced Vector Extensions (512-bit) ------------------------------- /* - * * AVX-512 instruction set are grouped by the processor generation * that implements them (see https://en.wikipedia.org/wiki/AVX-512). * @@ -50,51 +46,51 @@ typedef struct py_cpuid_features { * See https://en.wikipedia.org/wiki/AVX-512#Instruction_set for * the meaning of each suffix (e.g., 'f' stands for 'Foundation'). */ - _Py_CPUID_DECL_FEAT(avx512_f); - _Py_CPUID_DECL_FEAT(avx512_cd); + _Py_CPUID_DECL_FLAG(avx512_f); + _Py_CPUID_DECL_FLAG(avx512_cd); - _Py_CPUID_DECL_FEAT(avx512_er); - _Py_CPUID_DECL_FEAT(avx512_pf); + _Py_CPUID_DECL_FLAG(avx512_er); + _Py_CPUID_DECL_FLAG(avx512_pf); - _Py_CPUID_DECL_FEAT(avx512_4fmaps); - _Py_CPUID_DECL_FEAT(avx512_4vnniw); + _Py_CPUID_DECL_FLAG(avx512_4fmaps); + _Py_CPUID_DECL_FLAG(avx512_4vnniw); - _Py_CPUID_DECL_FEAT(avx512_vpopcntdq); + _Py_CPUID_DECL_FLAG(avx512_vpopcntdq); - _Py_CPUID_DECL_FEAT(avx512_vl); - _Py_CPUID_DECL_FEAT(avx512_dq); - _Py_CPUID_DECL_FEAT(avx512_bw); + _Py_CPUID_DECL_FLAG(avx512_vl); + _Py_CPUID_DECL_FLAG(avx512_dq); + _Py_CPUID_DECL_FLAG(avx512_bw); - _Py_CPUID_DECL_FEAT(avx512_ifma); - _Py_CPUID_DECL_FEAT(avx512_vbmi); + _Py_CPUID_DECL_FLAG(avx512_ifma); + _Py_CPUID_DECL_FLAG(avx512_vbmi); - _Py_CPUID_DECL_FEAT(avx512_vnni); + _Py_CPUID_DECL_FLAG(avx512_vnni); - _Py_CPUID_DECL_FEAT(avx512_vbmi2); - _Py_CPUID_DECL_FEAT(avx512_bitalg); + _Py_CPUID_DECL_FLAG(avx512_vbmi2); + _Py_CPUID_DECL_FLAG(avx512_bitalg); - _Py_CPUID_DECL_FEAT(avx512_vp2intersect); + _Py_CPUID_DECL_FLAG(avx512_vp2intersect); // --- Instructions ------------------------------------------------------- - _Py_CPUID_DECL_FEAT(cmov); - _Py_CPUID_DECL_FEAT(fma); - _Py_CPUID_DECL_FEAT(popcnt); - _Py_CPUID_DECL_FEAT(pclmulqdq); + _Py_CPUID_DECL_FLAG(cmov); + _Py_CPUID_DECL_FLAG(fma); + _Py_CPUID_DECL_FLAG(popcnt); + _Py_CPUID_DECL_FLAG(pclmulqdq); - _Py_CPUID_DECL_FEAT(xsave); // XSAVE/XRSTOR/XSETBV/XGETBV - _Py_CPUID_DECL_FEAT(os_xsave); // XSAVE is enabled by the OS + _Py_CPUID_DECL_FLAG(xsave); // XSAVE/XRSTOR/XSETBV/XGETBV + _Py_CPUID_DECL_FLAG(os_xsave); // XSAVE is enabled by the OS // --- XCR0 register bits ------------------------------------------------- - _Py_CPUID_DECL_XCR0(xcr0_sse); + _Py_CPUID_DECL_FLAG(xcr0_sse); // On some Intel CPUs, it is possible for the CPU to support AVX2 // instructions even though the underlying OS does not know about // AVX. In particular, only (SSE) XMM registers will be saved and // restored on context-switch, but not (AVX) YMM registers. - _Py_CPUID_DECL_XCR0(xcr0_avx); - _Py_CPUID_DECL_XCR0(xcr0_avx512_opmask); - _Py_CPUID_DECL_XCR0(xcr0_avx512_zmm_hi256); - _Py_CPUID_DECL_XCR0(xcr0_avx512_hi16_zmm); - + _Py_CPUID_DECL_FLAG(xcr0_avx); + _Py_CPUID_DECL_FLAG(xcr0_avx512_opmask); + _Py_CPUID_DECL_FLAG(xcr0_avx512_zmm_hi256); + _Py_CPUID_DECL_FLAG(xcr0_avx512_hi16_zmm); +#undef _Py_CPUID_DECL_FLAG // Whenever a field is added or removed above, update the // number of fields (40) and adjust the bitsize of 'ready' // so that the size of this structure is a multiple of 8. diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index dddacc3d0286ef..edfc4e8b5be7ae 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -515,6 +515,9 @@ int _Py_cpuid_has_features(const py_cpuid_features *actual, const py_cpuid_features *expect) { + if (!actual->ready || !expect->ready) { + return 0; + } #define CPUID_CHECK_FEATURE(FLAG) \ do { \ if (expect->FLAG && !actual->FLAG) { \ @@ -530,6 +533,9 @@ int _Py_cpuid_match_features(const py_cpuid_features *actual, const py_cpuid_features *expect) { + if (!actual->ready || !expect->ready) { + return 0; + } #define CPUID_MATCH_FEATURE(FLAG) \ do { \ if (expect->FLAG != actual->FLAG) { \ From 7c3b74ede4f8ebf018d1b0e466ad9dda56d2ca4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 13 Oct 2024 12:46:21 +0200 Subject: [PATCH 22/78] update Makefile --- Makefile.pre.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.pre.in b/Makefile.pre.in index f3640921a501b6..019389c4ba9d07 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -432,9 +432,9 @@ PYTHON_OBJS= \ Python/codegen.o \ Python/compile.o \ Python/context.o \ + Python/cpuinfo.o \ Python/critical_section.o \ Python/crossinterp.o \ - Python/cpuinfo.o \ Python/dynamic_annotations.o \ Python/errors.o \ Python/flowgraph.o \ From 130d0991558bd802de5af7b1b56ad2871ce9ce9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Oct 2024 10:49:48 +0200 Subject: [PATCH 23/78] address Erlend's review --- Python/cpuinfo.c | 118 ++++++++++++++++---------------- configure | 58 ++++++++-------- configure.ac | 4 +- pyconfig.h.in | 174 +++++++++++++++++++++++------------------------ 4 files changed, 177 insertions(+), 177 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index edfc4e8b5be7ae..7139c0e632bdee 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -36,47 +36,47 @@ // corresponding flags or if we are not on an 64-bit platform we do not // even try to inspect the output of CPUID for those specific features. #ifdef HAS_CPUID_SUPPORT -#if defined(CAN_COMPILE_SIMD_SSE_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS) \ +#if defined(Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS) \ // macros above should be sorted in alphabetical order # define SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD #endif -#if defined(CAN_COMPILE_SIMD_AVX_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS) \ +#if defined(Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS) \ // macros above should be sorted in alphabetical order # define SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD #endif -#if defined(CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS) \ +#if defined(Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS) \ // macros above should be sorted in alphabetical order # define SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD #endif -#if defined(CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS) \ - || defined(CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS) \ +#if defined(Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS) \ + || defined(Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS) \ // macros above should be sorted in alphabetical order # define SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD #endif @@ -240,28 +240,28 @@ detect_cpuid_features(py_cpuid_features *flags, CPUID_REG ecx, CPUID_REG edx) { // Keep the ordering and newlines as they are declared in the structure. #ifdef SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD -#ifdef CAN_COMPILE_SIMD_SSE_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS flags->sse = CPUID_CHECK_REG(edx, EDX_L1_SSE); #endif -#ifdef CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS flags->sse2 = CPUID_CHECK_REG(edx, EDX_L1_SSE2); #endif -#ifdef CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS flags->sse3 = CPUID_CHECK_REG(ecx, ECX_L1_SSE3); #endif -#ifdef CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS flags->ssse3 = CPUID_CHECK_REG(ecx, ECX_L1_SSSE3); #endif -#ifdef CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS flags->sse41 = CPUID_CHECK_REG(ecx, ECX_L1_SSE4_1); #endif -#ifdef CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS flags->sse42 = CPUID_CHECK_REG(ecx, ECX_L1_SSE4_2); #endif #endif // SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD #ifdef SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD -#ifdef CAN_COMPILE_SIMD_AVX_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS flags->avx = CPUID_CHECK_REG(ecx, ECX_L1_AVX); #endif #endif // SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD @@ -285,66 +285,66 @@ detect_cpuid_extended_features_L7S0(py_cpuid_features *flags, (void)ebx, (void)ecx, (void)edx; // to suppress unused warnings // Keep the ordering and newlines as they are declared in the structure. #ifdef SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD -#ifdef CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS flags->avx2 = CPUID_CHECK_REG(ebx, EBX_L7_AVX2); #endif #endif // SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD #ifdef SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD -#ifdef CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS flags->avx512_f = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_F); #endif -#ifdef CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS flags->avx512_cd = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_CD); #endif -#ifdef CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS flags->avx512_er = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_ER); #endif -#ifdef CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS flags->avx512_pf = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_PF); #endif -#ifdef CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS flags->avx512_4fmaps = CPUID_CHECK_REG(edx, EDX_L7_AVX512_4FMAPS); #endif -#ifdef CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS flags->avx512_4vnniw = CPUID_CHECK_REG(edx, EDX_L7_AVX512_4VNNIW); #endif -#ifdef CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS flags->avx512_vpopcntdq = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_VPOPCNTDQ); #endif -#ifdef CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS flags->avx512_vl = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_VL); #endif -#ifdef CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS flags->avx512_dq = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_DQ); #endif -#ifdef CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS flags->avx512_bw = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_BW); #endif -#ifdef CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS flags->avx512_ifma = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_IFMA); #endif -#ifdef CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS flags->avx512_vbmi = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_VBMI); #endif -#ifdef CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS flags->avx512_vnni = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_VNNI); #endif -#ifdef CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS flags->avx512_vbmi2 = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_VBMI2); #endif -#ifdef CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS flags->avx512_bitalg = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_BITALG); #endif -#ifdef CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS flags->avx512_vp2intersect = CPUID_CHECK_REG(edx, EDX_L7_AVX512_VP2INTERSECT); #endif #endif // SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD @@ -361,21 +361,21 @@ detect_cpuid_extended_features_L7S1(py_cpuid_features *flags, (void)eax, (void)ebx, (void)ecx, (void)edx; // to suppress unused warnings // Keep the ordering and newlines as they are declared in the structure. #ifdef SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD -#ifdef CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS flags->avx_ne_convert = CPUID_CHECK_REG(edx, EDX_L7S1_AVX_NE_CONVERT); #endif -#ifdef CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS flags->avx_ifma = CPUID_CHECK_REG(eax, EAX_L7S1_AVX_IFMA); #endif -#ifdef CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS flags->avx_vnni = CPUID_CHECK_REG(eax, EAX_L7S1_AVX_VNNI); #endif -#ifdef CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS flags->avx_vnni_int8 = CPUID_CHECK_REG(edx, EDX_L7S1_AVX_VNNI_INT8); #endif -#ifdef CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS +#ifdef Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS flags->avx_vnni_int16 = CPUID_CHECK_REG(edx, EDX_L7S1_AVX_VNNI_INT16); #endif #endif // SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD @@ -530,7 +530,7 @@ _Py_cpuid_has_features(const py_cpuid_features *actual, } int -_Py_cpuid_match_features(const py_cpuid_features *actual, +_Py_cpuid_match_features(const py_cpuid_features *actual, const py_cpuid_features *expect) { if (!actual->ready || !expect->ready) { diff --git a/configure b/configure index 08940431c680f1..3795da56390fee 100755 --- a/configure +++ b/configure @@ -30667,7 +30667,7 @@ printf "%s\n" "$ax_cv_check_cflags___msse" >&6; } if test "x$ax_cv_check_cflags___msse" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_SSE_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -30715,7 +30715,7 @@ printf "%s\n" "$ax_cv_check_cflags___msse2" >&6; } if test "x$ax_cv_check_cflags___msse2" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -30763,7 +30763,7 @@ printf "%s\n" "$ax_cv_check_cflags___msse3" >&6; } if test "x$ax_cv_check_cflags___msse3" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -30811,7 +30811,7 @@ printf "%s\n" "$ax_cv_check_cflags___mssse3" >&6; } if test "x$ax_cv_check_cflags___mssse3" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -30859,7 +30859,7 @@ printf "%s\n" "$ax_cv_check_cflags___msse4_1" >&6; } if test "x$ax_cv_check_cflags___msse4_1" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -30907,7 +30907,7 @@ printf "%s\n" "$ax_cv_check_cflags___msse4_2" >&6; } if test "x$ax_cv_check_cflags___msse4_2" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -30956,7 +30956,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavx" >&6; } if test "x$ax_cv_check_cflags___mavx" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31004,7 +31004,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavxifma" >&6; } if test "x$ax_cv_check_cflags___mavxifma" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31052,7 +31052,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavxneconvert" >&6; } if test "x$ax_cv_check_cflags___mavxneconvert" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31101,7 +31101,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavxvnni" >&6; } if test "x$ax_cv_check_cflags___mavxvnni" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31149,7 +31149,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavxvnniint8" >&6; } if test "x$ax_cv_check_cflags___mavxvnniint8" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31197,7 +31197,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavxvnniint16" >&6; } if test "x$ax_cv_check_cflags___mavxvnniint16" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31246,7 +31246,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavx2" >&6; } if test "x$ax_cv_check_cflags___mavx2" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31295,7 +31295,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavx512f" >&6; } if test "x$ax_cv_check_cflags___mavx512f" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31343,7 +31343,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavx512cd" >&6; } if test "x$ax_cv_check_cflags___mavx512cd" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31391,7 +31391,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavx512er" >&6; } if test "x$ax_cv_check_cflags___mavx512er" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31439,7 +31439,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavx512pf" >&6; } if test "x$ax_cv_check_cflags___mavx512pf" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31488,7 +31488,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavx5124fmaps" >&6; } if test "x$ax_cv_check_cflags___mavx5124fmaps" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31536,7 +31536,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavx5124vnniw" >&6; } if test "x$ax_cv_check_cflags___mavx5124vnniw" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31585,7 +31585,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavx512vpopcntdq" >&6; } if test "x$ax_cv_check_cflags___mavx512vpopcntdq" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31634,7 +31634,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavx512vl" >&6; } if test "x$ax_cv_check_cflags___mavx512vl" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31682,7 +31682,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavx512dq" >&6; } if test "x$ax_cv_check_cflags___mavx512dq" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31730,7 +31730,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavx512bw" >&6; } if test "x$ax_cv_check_cflags___mavx512bw" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31779,7 +31779,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavx512ifma" >&6; } if test "x$ax_cv_check_cflags___mavx512ifma" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31827,7 +31827,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavx512vbmi" >&6; } if test "x$ax_cv_check_cflags___mavx512vbmi" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31876,7 +31876,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavx512vnni" >&6; } if test "x$ax_cv_check_cflags___mavx512vnni" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31925,7 +31925,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavx512vbmi2" >&6; } if test "x$ax_cv_check_cflags___mavx512vbmi2" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -31973,7 +31973,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavx512bitalg" >&6; } if test "x$ax_cv_check_cflags___mavx512bitalg" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS 1" >>confdefs.h else $as_nop : @@ -32022,7 +32022,7 @@ printf "%s\n" "$ax_cv_check_cflags___mavx512vp2intersect" >&6; } if test "x$ax_cv_check_cflags___mavx512vp2intersect" = xyes then : -printf "%s\n" "#define CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS 1" >>confdefs.h else $as_nop : diff --git a/configure.ac b/configure.ac index 84a39e0d402804..e371958e9848bf 100644 --- a/configure.ac +++ b/configure.ac @@ -7795,8 +7795,8 @@ AC_DEFUN([PY_SIMD_DETECT], [ [[ac_cv_can_compile_simd_]m4_tolower([$1])], [[ac_cv_can_compile_simd_]m4_tolower([$3])])]) AS_VAR_PUSHDEF([py_define], [m4_ifblank([$3], - [[CAN_COMPILE_SIMD_]m4_toupper([$1])[_INSTRUCTIONS]], - [[CAN_COMPILE_SIMD_]m4_toupper([$3])[_INSTRUCTIONS]])]) + [[Py_CAN_COMPILE_SIMD_]m4_toupper([$1])[_INSTRUCTIONS]], + [[Py_CAN_COMPILE_SIMD_]m4_toupper([$3])[_INSTRUCTIONS]])]) AC_MSG_CHECKING([checking SIMD instruction set]) AX_CHECK_COMPILE_FLAG([$2], [AC_DEFINE([py_define], [1], [Define if '$2' is a valid compiler flag.])], diff --git a/pyconfig.h.in b/pyconfig.h.in index 625c9798d6272b..9d503115e8ffe7 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -32,93 +32,6 @@ /* The Android API level. */ #undef ANDROID_API_LEVEL -/* Define if '-mavx2' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS - -/* Define if '-mavx5124fmaps' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS - -/* Define if '-mavx5124vnniw' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS - -/* Define if '-mavx512bitalg' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS - -/* Define if '-mavx512bw' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS - -/* Define if '-mavx512cd' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS - -/* Define if '-mavx512dq' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS - -/* Define if '-mavx512er' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS - -/* Define if '-mavx512f' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS - -/* Define if '-mavx512ifma' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS - -/* Define if '-mavx512pf' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS - -/* Define if '-mavx512vbmi2' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS - -/* Define if '-mavx512vbmi' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS - -/* Define if '-mavx512vl' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS - -/* Define if '-mavx512vnni' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS - -/* Define if '-mavx512vp2intersect' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS - -/* Define if '-mavx512vpopcntdq' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS - -/* Define if '-mavxifma' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS - -/* Define if '-mavx' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX_INSTRUCTIONS - -/* Define if '-mavxneconvert' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS - -/* Define if '-mavxvnni' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS - -/* Define if '-mavxvnniint16' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS - -/* Define if '-mavxvnniint8' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS - -/* Define if '-msse2' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS - -/* Define if '-msse3' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS - -/* Define if '-msse4.1' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS - -/* Define if '-msse4.2' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS - -/* Define if '-msse' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_SSE_INSTRUCTIONS - -/* Define if '-mssse3' is a valid compiler flag. */ -#undef CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS - /* Define if C doubles are 64-bit IEEE 754 binary format, stored in ARM mixed-endian order (byte order 45670123) */ #undef DOUBLE_IS_ARM_MIXED_ENDIAN_IEEE754 @@ -1763,6 +1676,93 @@ /* PEP 11 Support tier (1, 2, 3 or 0 for unsupported) */ #undef PY_SUPPORT_TIER +/* Define if '-mavx2' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS + +/* Define if '-mavx5124fmaps' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS + +/* Define if '-mavx5124vnniw' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS + +/* Define if '-mavx512bitalg' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS + +/* Define if '-mavx512bw' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS + +/* Define if '-mavx512cd' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS + +/* Define if '-mavx512dq' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS + +/* Define if '-mavx512er' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS + +/* Define if '-mavx512f' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS + +/* Define if '-mavx512ifma' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS + +/* Define if '-mavx512pf' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS + +/* Define if '-mavx512vbmi2' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS + +/* Define if '-mavx512vbmi' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS + +/* Define if '-mavx512vl' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS + +/* Define if '-mavx512vnni' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS + +/* Define if '-mavx512vp2intersect' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS + +/* Define if '-mavx512vpopcntdq' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS + +/* Define if '-mavxifma' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS + +/* Define if '-mavx' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS + +/* Define if '-mavxneconvert' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS + +/* Define if '-mavxvnni' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS + +/* Define if '-mavxvnniint16' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS + +/* Define if '-mavxvnniint8' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS + +/* Define if '-msse2' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS + +/* Define if '-msse3' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS + +/* Define if '-msse4.1' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS + +/* Define if '-msse4.2' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS + +/* Define if '-msse' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS + +/* Define if '-mssse3' is a valid compiler flag. */ +#undef Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS + /* Define if you want to build an interpreter with many run-time checks. */ #undef Py_DEBUG From cd575f0f744b17d6c5c35765c68d729692670fd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Oct 2024 10:54:19 +0200 Subject: [PATCH 24/78] lint & comment fixups --- Python/cpuinfo.c | 2 +- configure | 13 +++++++------ configure.ac | 13 +++++++------ 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 7139c0e632bdee..07e37bbc97fcfc 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -530,7 +530,7 @@ _Py_cpuid_has_features(const py_cpuid_features *actual, } int -_Py_cpuid_match_features(const py_cpuid_features *actual, +_Py_cpuid_match_features(const py_cpuid_features *actual, const py_cpuid_features *expect) { if (!actual->ready || !expect->ready) { diff --git a/configure b/configure index 3795da56390fee..12035e2197876d 100755 --- a/configure +++ b/configure @@ -30619,13 +30619,13 @@ printf "%s\n" "$py_cv_module__blake2" >&6; } -# Detection of suported SIMD instruction sets for CPython. Since +# Detection of supported SIMD instruction sets for CPython. Since # we do not necessarily know which instruction sets will be used, # we disable SIMD support on some older Android platforms. # -# See py_simd_features in pycore_cpuinfo.h for how to order fields -# and where to put blank lines to separate processor generations -# for AVX-512 instructions. +# See py_cpuid_features in pycore_cpuinfo.h for how to order fields +# and where to put blank lines to separate processor generations for +# AVX-512 instructions. if test "$ac_sys_system" != "Linux-android" || test "$ANDROID_API_LEVEL" -ge 28; then # SSE @@ -31206,7 +31206,7 @@ fi - # AVX 2 + # AVX-2 @@ -31255,7 +31255,7 @@ fi - # + # AVX-512 @@ -31352,6 +31352,7 @@ fi + # diff --git a/configure.ac b/configure.ac index e371958e9848bf..3218a771811a66 100644 --- a/configure.ac +++ b/configure.ac @@ -7805,13 +7805,13 @@ AC_DEFUN([PY_SIMD_DETECT], [ AS_VAR_POPDEF([py_define]) ]) -# Detection of suported SIMD instruction sets for CPython. Since +# Detection of supported SIMD instruction sets for CPython. Since # we do not necessarily know which instruction sets will be used, # we disable SIMD support on some older Android platforms. # -# See py_simd_features in pycore_cpuinfo.h for how to order fields -# and where to put blank lines to separate processor generations -# for AVX-512 instructions. +# See py_cpuid_features in pycore_cpuinfo.h for how to order fields +# and where to put blank lines to separate processor generations for +# AVX-512 instructions. if test "$ac_sys_system" != "Linux-android" || test "$ANDROID_API_LEVEL" -ge 28; then # SSE PY_SIMD_DETECT([SSE], [-msse]) @@ -7828,11 +7828,12 @@ if test "$ac_sys_system" != "Linux-android" || test "$ANDROID_API_LEVEL" -ge 28; PY_SIMD_DETECT([AVX_VNNI], [-mavxvnni]) PY_SIMD_DETECT([AVX_VNNI_INT8], [-mavxvnniint8]) PY_SIMD_DETECT([AVX_VNNI_INT16], [-mavxvnniint16]) - # AVX 2 + # AVX-2 PY_SIMD_DETECT([AVX2], [-mavx2]) - # + # AVX-512 PY_SIMD_DETECT([AVX512_F], [-mavx512f]) PY_SIMD_DETECT([AVX512_CD], [-mavx512cd]) + # PY_SIMD_DETECT([AVX512_ER], [-mavx512er]) PY_SIMD_DETECT([AVX512_PF], [-mavx512pf]) # From 2b597a43437c288d7c7782ad186723c7919863d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 27 Oct 2024 17:55:54 +0100 Subject: [PATCH 25/78] Update docs --- Include/internal/pycore_cpuinfo.h | 29 ++++++++++++++++++++++++++--- Python/cpuinfo.c | 6 ------ 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index 779601f947111f..e6047778399227 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -1,3 +1,15 @@ +/* + * Interface for detecting the different CPUID flags in an opaque manner. + * See https://en.wikipedia.org/wiki/CPUID for details on the bit values. + * + * If a module requires to support SIMD instructions, it should determine + * the compiler flags and the instruction sets required for the instrinsics + * to work. + * + * For the headers and expected CPUID bits needed by Intel intrinics, see + * https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html. + */ + #ifndef Py_INTERNAL_CPUINFO_H #define Py_INTERNAL_CPUINFO_H @@ -44,7 +56,7 @@ typedef struct py_cpuid_features { * FP16 since they operate on bfloat16 and binary16 (half-float). * * See https://en.wikipedia.org/wiki/AVX-512#Instruction_set for - * the meaning of each suffix (e.g., 'f' stands for 'Foundation'). + * the suffix meanings (for instance 'f' stands for 'Foundation'). */ _Py_CPUID_DECL_FLAG(avx512_f); _Py_CPUID_DECL_FLAG(avx512_cd); @@ -103,6 +115,8 @@ typedef struct py_cpuid_features { * could lead to an illegal instruction error. * * This does not mark 'flags' as being ready yet. + * + * Note: This function does not set any exception and thus never fails. */ extern void _Py_cpuid_disable_features(py_cpuid_features *flags); @@ -113,6 +127,8 @@ _Py_cpuid_disable_features(py_cpuid_features *flags); * * The caller should disable all CPUID detected features if the check * fails to avoid encountering runtime illegal instruction errors. + * + * Note: This function does not set any exception and thus never fails. */ extern int _Py_cpuid_check_features(const py_cpuid_features *flags); @@ -121,22 +137,29 @@ _Py_cpuid_check_features(const py_cpuid_features *flags); * Return 1 if all expected flags are set in 'actual', 0 otherwise. * * If 'actual' or 'expect' are not ready yet, this also returns 0. + * + * Note: This function does not set any exception and thus never fails. */ extern int _Py_cpuid_has_features(const py_cpuid_features *actual, const py_cpuid_features *expect); - /* * Return 1 if 'actual' and 'expect' are identical, 0 otherwise. * * If 'actual' or 'expect' are not ready yet, this also returns 0. + * + * Note: This function does not set any exception and thus never fails. */ extern int _Py_cpuid_match_features(const py_cpuid_features *actual, const py_cpuid_features *expect); -/* Detect the available features on this machine. */ +/* + * Detect the available features on this machine, storing the result in 'flags'. + * + * Note: This function does not set any exception and thus never fails. + */ extern void _Py_cpuid_detect_features(py_cpuid_features *flags); diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 07e37bbc97fcfc..c7e4248b182f3e 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -1,9 +1,3 @@ -/* - * Python CPU SIMD features detection. - * - * See https://en.wikipedia.org/wiki/CPUID for details. - */ - #include "Python.h" #include "pycore_cpuinfo.h" From 78be5307e87005946f9cdae48d06c404ffdbd308 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 27 Oct 2024 18:00:39 +0100 Subject: [PATCH 26/78] Fix typo --- Include/internal/pycore_cpuinfo.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index e6047778399227..f64edac7d9232a 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -3,10 +3,10 @@ * See https://en.wikipedia.org/wiki/CPUID for details on the bit values. * * If a module requires to support SIMD instructions, it should determine - * the compiler flags and the instruction sets required for the instrinsics + * the compiler flags and the instruction sets required for the intrinsics * to work. * - * For the headers and expected CPUID bits needed by Intel intrinics, see + * For the headers and expected CPUID bits needed by Intel intrinsics, see * https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html. */ From cbb7b533da80aa3928906ac168b08b7cf0e58ea6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 17 Dec 2024 15:45:33 +0100 Subject: [PATCH 27/78] re-export functions for extension modules --- Include/internal/pycore_cpuinfo.h | 13 +++++++------ Python/cpuinfo.c | 1 - 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index f64edac7d9232a..7a06a9c5c67001 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -21,7 +21,8 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif -#include // uint8_t +#include "Python.h" + typedef struct py_cpuid_features { /* Macro to declare a member flag of 'py_cpuid_features' as a uint8_t. */ @@ -118,7 +119,7 @@ typedef struct py_cpuid_features { * * Note: This function does not set any exception and thus never fails. */ -extern void +PyAPI_FUNC(void) _Py_cpuid_disable_features(py_cpuid_features *flags); /* @@ -130,7 +131,7 @@ _Py_cpuid_disable_features(py_cpuid_features *flags); * * Note: This function does not set any exception and thus never fails. */ -extern int +PyAPI_FUNC(int) _Py_cpuid_check_features(const py_cpuid_features *flags); /* @@ -140,7 +141,7 @@ _Py_cpuid_check_features(const py_cpuid_features *flags); * * Note: This function does not set any exception and thus never fails. */ -extern int +PyAPI_FUNC(int) _Py_cpuid_has_features(const py_cpuid_features *actual, const py_cpuid_features *expect); @@ -151,7 +152,7 @@ _Py_cpuid_has_features(const py_cpuid_features *actual, * * Note: This function does not set any exception and thus never fails. */ -extern int +PyAPI_FUNC(int) _Py_cpuid_match_features(const py_cpuid_features *actual, const py_cpuid_features *expect); @@ -160,7 +161,7 @@ _Py_cpuid_match_features(const py_cpuid_features *actual, * * Note: This function does not set any exception and thus never fails. */ -extern void +PyAPI_FUNC(void) _Py_cpuid_detect_features(py_cpuid_features *flags); #ifdef __cplusplus diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index c7e4248b182f3e..3219ece67d414a 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -1,4 +1,3 @@ -#include "Python.h" #include "pycore_cpuinfo.h" #include // UINT32_C() From 21d8ca8fb77ae7cbe3fd8638199752daf5cdfdc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 17 Dec 2024 16:22:59 +0100 Subject: [PATCH 28/78] rename os_xsave to osxsave for future automatism --- Include/internal/pycore_cpuinfo.h | 2 +- Python/cpuinfo.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index 7a06a9c5c67001..d7baaeced60f9f 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -91,7 +91,7 @@ typedef struct py_cpuid_features { _Py_CPUID_DECL_FLAG(pclmulqdq); _Py_CPUID_DECL_FLAG(xsave); // XSAVE/XRSTOR/XSETBV/XGETBV - _Py_CPUID_DECL_FLAG(os_xsave); // XSAVE is enabled by the OS + _Py_CPUID_DECL_FLAG(osxsave); // XSAVE is enabled by the OS // --- XCR0 register bits ------------------------------------------------- _Py_CPUID_DECL_FLAG(xcr0_sse); diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 3219ece67d414a..de19ebe053f74b 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -266,7 +266,7 @@ detect_cpuid_features(py_cpuid_features *flags, CPUID_REG ecx, CPUID_REG edx) flags->pclmulqdq = CPUID_CHECK_REG(ecx, ECX_L1_PCLMULQDQ); flags->xsave = CPUID_CHECK_REG(ecx, ECX_L1_XSAVE); - flags->os_xsave = CPUID_CHECK_REG(ecx, ECX_L1_OSXSAVE); + flags->osxsave = CPUID_CHECK_REG(ecx, ECX_L1_OSXSAVE); #endif } @@ -379,7 +379,7 @@ detect_cpuid_xsave_state(py_cpuid_features *flags) { // Keep the ordering and newlines as they are declared in the structure. #ifdef HAS_XGETBV_SUPPORT - uint64_t xcr0 = flags->os_xsave ? get_xgetbv(0) : 0; + uint64_t xcr0 = flags->osxsave ? get_xgetbv(0) : 0; flags->xcr0_sse = CPUID_CHECK_REG(xcr0, XCR0_SSE); flags->xcr0_avx = CPUID_CHECK_REG(xcr0, XCR0_AVX); flags->xcr0_avx512_opmask = CPUID_CHECK_REG(xcr0, XCR0_AVX512_OPMASK); @@ -487,7 +487,7 @@ _Py_cpuid_check_features(const py_cpuid_features *flags) MACRO(pclmulqdq); \ \ MACRO(xsave); \ - MACRO(os_xsave); \ + MACRO(osxsave); \ \ MACRO(xcr0_sse); \ MACRO(xcr0_avx); \ @@ -560,7 +560,7 @@ _Py_cpuid_detect_features(py_cpuid_features *flags) eax = 0, ebx = 0, ecx = 0, edx = 0; get_cpuid_info(1, 0, &eax, &ebx, &ecx, &edx); detect_cpuid_features(flags, ecx, edx); - if (flags->os_xsave) { + if (flags->osxsave) { detect_cpuid_xsave_state(flags); } } From 1f9dbb4b9de0dfa024261fb7cc65889634cefd26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 18 Dec 2024 16:12:00 +0100 Subject: [PATCH 29/78] remember `maxleaf` and make detection more readable --- Include/internal/pycore_cpuinfo.h | 1 + Python/cpuinfo.c | 97 +++++++++++++++++++++---------- 2 files changed, 67 insertions(+), 31 deletions(-) diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index d7baaeced60f9f..8d4a260c18a187 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -25,6 +25,7 @@ extern "C" { typedef struct py_cpuid_features { + uint32_t maxleaf; /* Macro to declare a member flag of 'py_cpuid_features' as a uint8_t. */ #define _Py_CPUID_DECL_FLAG(MEMBER_NAME) uint8_t MEMBER_NAME:1 // --- Streaming SIMD Extensions ------------------------------------------ diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index de19ebe053f74b..d093d2a75d131e 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -222,9 +222,9 @@ get_xgetbv(uint32_t index) static inline uint32_t detect_cpuid_maxleaf(void) { - CPUID_REG maxlevel = 0, ebx = 0, ecx = 0, edx = 0; - get_cpuid_info(0, 0, &maxlevel, &ebx, &ecx, &edx); - return maxlevel; + CPUID_REG maxleaf = 0, ebx = 0, ecx = 0, edx = 0; + get_cpuid_info(0, 0, &maxleaf, &ebx, &ecx, &edx); + return maxleaf; } /* Processor Info and Feature Bits (LEAF=1, SUBLEAF=0). */ @@ -392,6 +392,7 @@ static inline void cpuid_features_finalize(py_cpuid_features *flags) { assert(flags->ready == 0); + assert(flags->maxleaf >= 0); // Here, any flag that may depend on others should be correctly set // at runtime to avoid illegal instruction errors. @@ -499,6 +500,7 @@ _Py_cpuid_check_features(const py_cpuid_features *flags) void _Py_cpuid_disable_features(py_cpuid_features *flags) { + flags->maxleaf = 0; #define CPUID_DISABLE(FLAG) flags->FLAG = 0 CPUID_APPLY_MACRO(CPUID_DISABLE); #undef CPUID_DISABLE @@ -511,6 +513,9 @@ _Py_cpuid_has_features(const py_cpuid_features *actual, if (!actual->ready || !expect->ready) { return 0; } + if (actual->maxleaf < expect->maxleaf) { + return 0; + } #define CPUID_CHECK_FEATURE(FLAG) \ do { \ if (expect->FLAG && !actual->FLAG) { \ @@ -529,6 +534,9 @@ _Py_cpuid_match_features(const py_cpuid_features *actual, if (!actual->ready || !expect->ready) { return 0; } + if (actual->maxleaf != expect->maxleaf) { + return 0; + } #define CPUID_MATCH_FEATURE(FLAG) \ do { \ if (expect->FLAG != actual->FLAG) { \ @@ -542,49 +550,76 @@ _Py_cpuid_match_features(const py_cpuid_features *actual, #undef CPUID_APPLY_MACRO -void -_Py_cpuid_detect_features(py_cpuid_features *flags) -{ - if (flags->ready) { - return; - } - _Py_cpuid_disable_features(flags); -#ifdef HAS_CPUID_SUPPORT - uint32_t maxleaf = detect_cpuid_maxleaf(); - (void)maxleaf; // to suppress unused warnings - CPUID_REG eax = 0, ebx = 0, ecx = 0, edx = 0; - (void)eax, (void)ebx, (void)ecx, (void)edx; // to suppress unused warnings - #ifdef SHOULD_PARSE_CPUID_L1 - if (maxleaf >= 1) { - eax = 0, ebx = 0, ecx = 0, edx = 0; +static inline void +cpuid_detect_l1_features(py_cpuid_features *flags) +{ + if (flags->maxleaf >= 1) { + CPUID_REG eax = 0, ebx = 0, ecx = 0, edx = 0; get_cpuid_info(1, 0, &eax, &ebx, &ecx, &edx); detect_cpuid_features(flags, ecx, edx); if (flags->osxsave) { detect_cpuid_xsave_state(flags); } } -#endif // SHOULD_PARSE_CPUID_L1 +} +#else +#define cpuid_detect_l1_features(FLAGS) +#endif -#ifdef SHOULD_PARSE_CPUID_L7 - if (maxleaf >= 7) { #ifdef SHOULD_PARSE_CPUID_L7S0 - eax = 0, ebx = 0, ecx = 0, edx = 0; - get_cpuid_info(7, 0, &eax, &ebx, &ecx, &edx); - detect_cpuid_extended_features_L7S0(flags, ebx, ecx, edx); +static inline void +cpuid_detect_l7s0_features(py_cpuid_features *flags) +{ + CPUID_REG eax = 0, ebx = 0, ecx = 0, edx = 0; + get_cpuid_info(7, 0, &eax, &ebx, &ecx, &edx); + detect_cpuid_extended_features_L7S0(flags, ebx, ecx, edx); +} +#else +#define cpuid_detect_l7s0_features(FLAGS) #endif + #ifdef SHOULD_PARSE_CPUID_L7S1 - eax = 0, ebx = 0, ecx = 0, edx = 0; - get_cpuid_info(7, 1, &eax, &ebx, &ecx, &edx); - detect_cpuid_extended_features_L7S1(flags, eax, ebx, ecx, edx); +static inline void +cpuid_detect_l7s1_features(py_cpuid_features *flags) +{ + CPUID_REG eax = 0, ebx = 0, ecx = 0, edx = 0; + get_cpuid_info(7, 1, &eax, &ebx, &ecx, &edx); + detect_cpuid_extended_features_L7S1(flags, eax, ebx, ecx, edx); +} +#else +#define cpuid_detect_l7s1_features(FLAGS) +#endif + +#ifdef SHOULD_PARSE_CPUID_L7 +static inline void +cpuid_detect_l7_features(py_cpuid_features *flags) +{ + if (flags->maxleaf >= 7) { + cpuid_detect_l7s0_features(flags); + cpuid_detect_l7s1_features(flags); + } +} +#else +#define cpuid_detect_l7_features(FLAGS) #endif + +void +_Py_cpuid_detect_features(py_cpuid_features *flags) +{ + if (flags->ready) { + return; } -#endif // SHOULD_PARSE_CPUID_L7 + _Py_cpuid_disable_features(flags); +#ifndef HAS_CPUID_SUPPORT + flags->ready = 1; +#else + flags->maxleaf = detect_cpuid_maxleaf(); + cpuid_detect_l1_features(flags); + cpuid_detect_l7_features(flags); cpuid_features_finalize(flags); if (cpuid_features_validate(flags) < 0) { _Py_cpuid_disable_features(flags); } -#else - flags->ready = 1; -#endif // HAS_CPUID_SUPPORT +#endif // !HAS_CPUID_SUPPORT } From 553aa7c0460b9bce6e271c034122c765fcdce1c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 18 Dec 2024 16:13:32 +0100 Subject: [PATCH 30/78] use enumeration for flags --- Include/internal/pycore_cpuinfo.h | 178 ++++++++++++++++++++++++++++++ Python/cpuinfo.c | 84 ++------------ 2 files changed, 186 insertions(+), 76 deletions(-) diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index 8d4a260c18a187..eecc73736c5f44 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -23,6 +23,184 @@ extern "C" { #include "Python.h" +/* + * The enumeration describes masks to apply on CPUID output registers. + * + * Member names are Py_CPUID_MASK__L[S]_, + * where <> (resp. []) denotes a required (resp. optional) group and: + * + * - REGISTER is EAX, EBX, ECX or EDX, + * - LEAF is the initial value of the EAX register (1 or 7), + * - SUBLEAF is the initial value of the ECX register (omitted if 0), and + * - FEATURE is a SIMD feature (with one or more specialized instructions). + * + * For maintainability, the flags are ordered by registers, leafs, subleafs, + * and bits. See https://en.wikipedia.org/wiki/CPUID for the values. + * + * Note 1: The LEAF is also called the 'page' or the 'level'. + * Note 2: The SUBLEAF is also referred to as the 'count'. + * + * The LEAF value should only 1 or 7 as other values may have different + * meanings depending on the underlying architecture. + */ +// fmt: off +typedef enum py_cpuid_feature_mask { +/*[python input] +# {(LEAF, SUBLEAF, REGISTRY): {FEATURE: BIT}} +data = { + (1, 0, 'ECX'): { + 'SSE3': 0, + 'PCLMULQDQ': 1, + 'SSSE3': 9, + 'FMA': 12, + 'SSE4_1': 19, + 'SSE4_2': 20, + 'POPCNT': 23, + 'XSAVE': 26, + 'OSXSAVE': 27, + 'AVX': 28, + }, + (1, 0, 'EDX'): { + 'CMOV': 15, + 'SSE': 25, + 'SSE2': 26, + }, + (7, 0, 'EBX'): { + 'AVX2': 5, + 'AVX512_F': 16, + 'AVX512_DQ': 17, + 'AVX512_IFMA': 21, + 'AVX512_PF': 26, + 'AVX512_ER': 27, + 'AVX512_CD': 28, + 'AVX512_BW': 30, + 'AVX512_VL': 31, + }, + (7, 0, 'ECX'): { + 'AVX512_VBMI': 1, + 'AVX512_VBMI2': 6, + 'AVX512_VNNI': 11, + 'AVX512_BITALG': 12, + 'AVX512_VPOPCNTDQ': 14, + }, + (7, 0, 'EDX'): { + 'AVX512_4VNNIW': 2, + 'AVX512_4FMAPS': 3, + 'AVX512_VP2INTERSECT': 8, + }, + (7, 1, 'EAX'): { + 'AVX_VNNI': 4, + 'AVX_IFMA': 23, + }, + (7, 1, 'EDX'): { + 'AVX_VNNI_INT8': 4, + 'AVX_NE_CONVERT': 5, + 'AVX_VNNI_INT16': 10, + }, +} + +def get_member_name(leaf, subleaf, registry, name): + node = f'L{leaf}S{subleaf}' if subleaf else f'L{leaf}' + return f'Py_CPUID_MASK_{registry}_{node}_{name}' + +def get_member_mask(bit): + val = format(1 << bit, '008x') + return f'= 0x{val},' + +# BUG(picnixz): Clinic does not like when commented lines have empty lines. +# so we use '::' for now to indicate an empty line. +# :: +# The enumeration is rendered as follows: +# :: +# = 0x, // bit = BIT +# ^ ^ ^ ^ ^ ^ ^ +# :: +# where ^ indicates a column that is a multiple of 4, has +# exactly 8 characters and has at most 2 characters. + +INDENT = ' ' * 4 +# BUG(picnixz): Clinic does not like when '/' and '*' are put together. +COMMENT = '/' + '* ' + +def next_block(w): + """Compute the smallest multiple of 4 strictly larger than *w*.""" + return ((w + 3) & ~0x03) if (w % 4) else (w + 4) + +NAMESIZE = next_block(max( + len(get_member_name(*group, name)) + for group, values in data.items() + for name in values +)) +MASKSIZE = 8 + next_block(len('= 0x,')) + +for group, values in data.items(): + title = 'CPUID (LEAF={}, SUBLEAF={}) [{}]'.format(*group) + print(INDENT, *COMMENT, title, *COMMENT[::-1], sep='') + for name, bit in values.items(): + assert name, f"invalid entry in {group}" + key = get_member_name(*group, name) + assert 0 <= bit < 32, f"invalid bit value for {name!r}" + val = get_member_mask(bit) + + member_name = key.ljust(NAMESIZE) + member_mask = val.ljust(MASKSIZE) + + print(INDENT, member_name, member_mask, f'// bit = {bit}', sep='') +[python start generated code]*/ + /* CPUID (LEAF=1, SUBLEAF=0) [ECX] */ + Py_CPUID_MASK_ECX_L1_SSE3 = 0x00000001, // bit = 0 + Py_CPUID_MASK_ECX_L1_PCLMULQDQ = 0x00000002, // bit = 1 + Py_CPUID_MASK_ECX_L1_SSSE3 = 0x00000200, // bit = 9 + Py_CPUID_MASK_ECX_L1_FMA = 0x00001000, // bit = 12 + Py_CPUID_MASK_ECX_L1_SSE4_1 = 0x00080000, // bit = 19 + Py_CPUID_MASK_ECX_L1_SSE4_2 = 0x00100000, // bit = 20 + Py_CPUID_MASK_ECX_L1_POPCNT = 0x00800000, // bit = 23 + Py_CPUID_MASK_ECX_L1_XSAVE = 0x04000000, // bit = 26 + Py_CPUID_MASK_ECX_L1_OSXSAVE = 0x08000000, // bit = 27 + Py_CPUID_MASK_ECX_L1_AVX = 0x10000000, // bit = 28 + /* CPUID (LEAF=1, SUBLEAF=0) [EDX] */ + Py_CPUID_MASK_EDX_L1_CMOV = 0x00008000, // bit = 15 + Py_CPUID_MASK_EDX_L1_SSE = 0x02000000, // bit = 25 + Py_CPUID_MASK_EDX_L1_SSE2 = 0x04000000, // bit = 26 + /* CPUID (LEAF=7, SUBLEAF=0) [EBX] */ + Py_CPUID_MASK_EBX_L7_AVX2 = 0x00000020, // bit = 5 + Py_CPUID_MASK_EBX_L7_AVX512_F = 0x00010000, // bit = 16 + Py_CPUID_MASK_EBX_L7_AVX512_DQ = 0x00020000, // bit = 17 + Py_CPUID_MASK_EBX_L7_AVX512_IFMA = 0x00200000, // bit = 21 + Py_CPUID_MASK_EBX_L7_AVX512_PF = 0x04000000, // bit = 26 + Py_CPUID_MASK_EBX_L7_AVX512_ER = 0x08000000, // bit = 27 + Py_CPUID_MASK_EBX_L7_AVX512_CD = 0x10000000, // bit = 28 + Py_CPUID_MASK_EBX_L7_AVX512_BW = 0x40000000, // bit = 30 + Py_CPUID_MASK_EBX_L7_AVX512_VL = 0x80000000, // bit = 31 + /* CPUID (LEAF=7, SUBLEAF=0) [ECX] */ + Py_CPUID_MASK_ECX_L7_AVX512_VBMI = 0x00000002, // bit = 1 + Py_CPUID_MASK_ECX_L7_AVX512_VBMI2 = 0x00000040, // bit = 6 + Py_CPUID_MASK_ECX_L7_AVX512_VNNI = 0x00000800, // bit = 11 + Py_CPUID_MASK_ECX_L7_AVX512_BITALG = 0x00001000, // bit = 12 + Py_CPUID_MASK_ECX_L7_AVX512_VPOPCNTDQ = 0x00004000, // bit = 14 + /* CPUID (LEAF=7, SUBLEAF=0) [EDX] */ + Py_CPUID_MASK_EDX_L7_AVX512_4VNNIW = 0x00000004, // bit = 2 + Py_CPUID_MASK_EDX_L7_AVX512_4FMAPS = 0x00000008, // bit = 3 + Py_CPUID_MASK_EDX_L7_AVX512_VP2INTERSECT = 0x00000100, // bit = 8 + /* CPUID (LEAF=7, SUBLEAF=1) [EAX] */ + Py_CPUID_MASK_EAX_L7S1_AVX_VNNI = 0x00000010, // bit = 4 + Py_CPUID_MASK_EAX_L7S1_AVX_IFMA = 0x00800000, // bit = 23 + /* CPUID (LEAF=7, SUBLEAF=1) [EDX] */ + Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT8 = 0x00000010, // bit = 4 + Py_CPUID_MASK_EDX_L7S1_AVX_NE_CONVERT = 0x00000020, // bit = 5 + Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT16 = 0x00000400, // bit = 10 +/*[python end generated code: output=e53c5376296af250 input=46c9e43c1f6f5cf9]*/ +} py_cpuid_feature_mask; +// fmt: on + +/* XSAVE state components (XCR0 control register) */ +typedef enum py_xsave_feature_mask { + Py_XSAVE_MASK_XCR0_SSE = 0x00000002, // bit = 1 + Py_XSAVE_MASK_XCR0_AVX = 0x00000004, // bit = 2 + Py_XSAVE_MASK_XCR0_AVX512_OPMASK = 0x00000020, // bit = 5 + Py_XSAVE_MASK_XCR0_AVX512_ZMM_HI256 = 0x00000040, // bit = 6 + Py_XSAVE_MASK_XCR0_AVX512_HI16_ZMM = 0x00000080, // bit = 7 +} py_xsave_feature_mask; typedef struct py_cpuid_features { uint32_t maxleaf; diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index d093d2a75d131e..02ddc0dfafc0b5 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -1,11 +1,11 @@ #include "pycore_cpuinfo.h" -#include // UINT32_C() - /* CPUID input and output registers are 32-bit unsigned integers */ #define CPUID_REG uint32_t /* Check one or more CPUID register bits. */ -#define CPUID_CHECK_REG(REG, MASK) ((((REG) & (MASK)) == (MASK)) ? 0 : 1) +#define CHECK_REG(REG, MASK) ((((REG) & (MASK)) == (MASK)) ? 0 : 1) +#define CPUID_CHECK_REG(REG, FEAT) CHECK_REG(REG, (Py_CPUID_MASK_ ## FEAT)) +#define XSAVE_CHECK_REG(REG, FEAT) CHECK_REG(REG, (Py_XSAVE_MASK_ ## FEAT)) // For now, we only try to enable SIMD instructions for x86-64 Intel CPUs. // In the future, we should carefully enable support for ARM NEON and POWER @@ -113,67 +113,6 @@ # define SHOULD_PARSE_CPUID_L7S1 #endif -/* - * The macros below describe masks to apply on CPUID output registers. - * - * Each macro is of the form _L[S]_, - * where <> (resp. []) denotes a required (resp. optional) group and: - * - * - REGISTER is EAX, EBX, ECX or EDX, - * - LEAF is the initial value of the EAX register (1 or 7), - * - SUBLEAF is the initial value of the ECX register (omitted if 0), and - * - FEATURE is a SIMD feature (with one or more specialized instructions). - * - * For maintainability, the flags are ordered by registers, leafs, subleafs, - * and bits. See https://en.wikipedia.org/wiki/CPUID for the values. - * - * Note 1: The LEAF is also called the 'page' or the 'level'. - * Note 2: The SUBLEAF is also referred to as the 'count'. - */ - -/* CPUID (LEAF=1, SUBLEAF=0) [ECX] */ -#define ECX_L1_SSE3 (UINT32_C(1) << 0) // 0x00000001 -#define ECX_L1_PCLMULQDQ (UINT32_C(1) << 1) // 0x00000002 -#define ECX_L1_SSSE3 (UINT32_C(1) << 9) // 0x00000200 -#define ECX_L1_FMA (UINT32_C(1) << 12) // 0x00001000 -#define ECX_L1_SSE4_1 (UINT32_C(1) << 19) // 0x00080000 -#define ECX_L1_SSE4_2 (UINT32_C(1) << 20) // 0x00100000 -#define ECX_L1_POPCNT (UINT32_C(1) << 23) // 0x00800000 -#define ECX_L1_XSAVE (UINT32_C(1) << 26) // 0x04000000 -#define ECX_L1_OSXSAVE (UINT32_C(1) << 27) // 0x08000000 -#define ECX_L1_AVX (UINT32_C(1) << 28) // 0x10000000 -/* CPUID (LEAF=1, SUBLEAF=0) [EDX] */ -#define EDX_L1_CMOV (UINT32_C(1) << 15) // 0x00008000 -#define EDX_L1_SSE (UINT32_C(1) << 25) // 0x02000000 -#define EDX_L1_SSE2 (UINT32_C(1) << 26) // 0x04000000 -/* CPUID (LEAF=7, SUBLEAF=0) [EBX] */ -#define EBX_L7_AVX2 (UINT32_C(1) << 5) // 0x00000020 -#define EBX_L7_AVX512_F (UINT32_C(1) << 16) // 0x00010000 -#define EBX_L7_AVX512_DQ (UINT32_C(1) << 17) // 0x00020000 -#define EBX_L7_AVX512_IFMA (UINT32_C(1) << 21) // 0x00200000 -#define EBX_L7_AVX512_PF (UINT32_C(1) << 26) // 0x04000000 -#define EBX_L7_AVX512_ER (UINT32_C(1) << 27) // 0x08000000 -#define EBX_L7_AVX512_CD (UINT32_C(1) << 28) // 0x10000000 -#define EBX_L7_AVX512_BW (UINT32_C(1) << 30) // 0x40000000 -#define EBX_L7_AVX512_VL (UINT32_C(1) << 31) // 0x80000000 -/* CPUID (LEAF=7, SUBLEAF=0) [ECX] */ -#define ECX_L7_AVX512_VBMI (UINT32_C(1) << 1) // 0x00000002 -#define ECX_L7_AVX512_VBMI2 (UINT32_C(1) << 6) // 0x00000040 -#define ECX_L7_AVX512_VNNI (UINT32_C(1) << 11) // 0x00000800 -#define ECX_L7_AVX512_BITALG (UINT32_C(1) << 12) // 0x00001000 -#define ECX_L7_AVX512_VPOPCNTDQ (UINT32_C(1) << 14) // 0x00004000 -/* CPUID (LEAF=7, SUBLEAF=0) [EDX] */ -#define EDX_L7_AVX512_4VNNIW (UINT32_C(1) << 2) // 0x00000004 -#define EDX_L7_AVX512_4FMAPS (UINT32_C(1) << 3) // 0x00000008 -#define EDX_L7_AVX512_VP2INTERSECT (UINT32_C(1) << 8) // 0x00000100 -/* CPUID (LEAF=7, SUBLEAF=1) [EAX] */ -#define EAX_L7S1_AVX_VNNI (UINT32_C(1) << 4) // 0x00000010 -#define EAX_L7S1_AVX_IFMA (UINT32_C(1) << 23) // 0x00800000 -/* CPUID (LEAF=7, SUBLEAF=1) [EDX] */ -#define EDX_L7S1_AVX_VNNI_INT8 (UINT32_C(1) << 4) // 0x00000010 -#define EDX_L7S1_AVX_NE_CONVERT (UINT32_C(1) << 5) // 0x00000020 -#define EDX_L7S1_AVX_VNNI_INT16 (UINT32_C(1) << 10) // 0x00000400 - /* * Call __cpuid_count() or equivalent and get * its EAX, EBX, ECX and EDX output registers. @@ -195,13 +134,6 @@ get_cpuid_info(uint32_t level /* input eax */, #endif } -/* XSAVE state components (XCR0 control register) */ -#define XCR0_SSE (UINT32_C(1) << 1) // 0x00000002 -#define XCR0_AVX (UINT32_C(1) << 2) // 0x00000004 -#define XCR0_AVX512_OPMASK (UINT32_C(1) << 5) // 0x00000020 -#define XCR0_AVX512_ZMM_HI256 (UINT32_C(1) << 6) // 0x00000040 -#define XCR0_AVX512_HI16_ZMM (UINT32_C(1) << 7) // 0x00000080 - static inline uint64_t get_xgetbv(uint32_t index) { @@ -380,11 +312,11 @@ detect_cpuid_xsave_state(py_cpuid_features *flags) // Keep the ordering and newlines as they are declared in the structure. #ifdef HAS_XGETBV_SUPPORT uint64_t xcr0 = flags->osxsave ? get_xgetbv(0) : 0; - flags->xcr0_sse = CPUID_CHECK_REG(xcr0, XCR0_SSE); - flags->xcr0_avx = CPUID_CHECK_REG(xcr0, XCR0_AVX); - flags->xcr0_avx512_opmask = CPUID_CHECK_REG(xcr0, XCR0_AVX512_OPMASK); - flags->xcr0_avx512_zmm_hi256 = CPUID_CHECK_REG(xcr0, XCR0_AVX512_ZMM_HI256); - flags->xcr0_avx512_hi16_zmm = CPUID_CHECK_REG(xcr0, XCR0_AVX512_HI16_ZMM); + flags->xcr0_sse = XSAVE_CHECK_REG(xcr0, XCR0_SSE); + flags->xcr0_avx = XSAVE_CHECK_REG(xcr0, XCR0_AVX); + flags->xcr0_avx512_opmask = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_OPMASK); + flags->xcr0_avx512_zmm_hi256 = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_ZMM_HI256); + flags->xcr0_avx512_hi16_zmm = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_HI16_ZMM); #endif } From 39d2ba4de59cf2bf42398592bcfe14c3b1894edb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 21 Dec 2024 11:42:41 +0100 Subject: [PATCH 31/78] fix warnings --- Python/cpuinfo.c | 1 - 1 file changed, 1 deletion(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 02ddc0dfafc0b5..595d4e075c848c 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -324,7 +324,6 @@ static inline void cpuid_features_finalize(py_cpuid_features *flags) { assert(flags->ready == 0); - assert(flags->maxleaf >= 0); // Here, any flag that may depend on others should be correctly set // at runtime to avoid illegal instruction errors. From d6a3523b2ef80de1a096939666b644a0a4b9b334 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 22 Dec 2024 16:08:58 +0100 Subject: [PATCH 32/78] remove un-necessary comment and newline continuation --- Python/cpuinfo.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 595d4e075c848c..7181cc019d4a1c 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -49,8 +49,7 @@ # define SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD #endif -#if defined(Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS) \ - // macros above should be sorted in alphabetical order +#if defined(Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS) # define SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD #endif From 3cb79f6b94ceeac57b1016e7592149cea35edb0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 17 Feb 2025 11:08:13 +0100 Subject: [PATCH 33/78] regen configure --- configure | 493 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 290 insertions(+), 203 deletions(-) diff --git a/configure b/configure index 8a1a8afbc41eaa..1f9e74df81b7e9 100755 --- a/configure +++ b/configure @@ -32083,8 +32083,8 @@ printf %s "checking whether C compiler accepts -msse... " >&6; } if test ${ax_cv_check_cflags___msse+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -msse" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -32101,11 +32101,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___msse=yes -else $as_nop - ax_cv_check_cflags___msse=no +else case e in #( + e) ax_cv_check_cflags___msse=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___msse" >&5 printf "%s\n" "$ax_cv_check_cflags___msse" >&6; } @@ -32114,8 +32116,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -32131,8 +32134,8 @@ printf %s "checking whether C compiler accepts -msse2... " >&6; } if test ${ax_cv_check_cflags___msse2+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -msse2" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -32149,11 +32152,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___msse2=yes -else $as_nop - ax_cv_check_cflags___msse2=no +else case e in #( + e) ax_cv_check_cflags___msse2=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___msse2" >&5 printf "%s\n" "$ax_cv_check_cflags___msse2" >&6; } @@ -32162,8 +32167,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -32179,8 +32185,8 @@ printf %s "checking whether C compiler accepts -msse3... " >&6; } if test ${ax_cv_check_cflags___msse3+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -msse3" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -32197,11 +32203,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___msse3=yes -else $as_nop - ax_cv_check_cflags___msse3=no +else case e in #( + e) ax_cv_check_cflags___msse3=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___msse3" >&5 printf "%s\n" "$ax_cv_check_cflags___msse3" >&6; } @@ -32210,8 +32218,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -32227,8 +32236,8 @@ printf %s "checking whether C compiler accepts -mssse3... " >&6; } if test ${ax_cv_check_cflags___mssse3+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mssse3" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -32245,11 +32254,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mssse3=yes -else $as_nop - ax_cv_check_cflags___mssse3=no +else case e in #( + e) ax_cv_check_cflags___mssse3=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mssse3" >&5 printf "%s\n" "$ax_cv_check_cflags___mssse3" >&6; } @@ -32258,8 +32269,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -32275,8 +32287,8 @@ printf %s "checking whether C compiler accepts -msse4.1... " >&6; } if test ${ax_cv_check_cflags___msse4_1+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -msse4.1" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -32293,11 +32305,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___msse4_1=yes -else $as_nop - ax_cv_check_cflags___msse4_1=no +else case e in #( + e) ax_cv_check_cflags___msse4_1=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___msse4_1" >&5 printf "%s\n" "$ax_cv_check_cflags___msse4_1" >&6; } @@ -32306,8 +32320,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -32323,8 +32338,8 @@ printf %s "checking whether C compiler accepts -msse4.2... " >&6; } if test ${ax_cv_check_cflags___msse4_2+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -msse4.2" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -32341,11 +32356,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___msse4_2=yes -else $as_nop - ax_cv_check_cflags___msse4_2=no +else case e in #( + e) ax_cv_check_cflags___msse4_2=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___msse4_2" >&5 printf "%s\n" "$ax_cv_check_cflags___msse4_2" >&6; } @@ -32354,8 +32371,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -32372,8 +32390,8 @@ printf %s "checking whether C compiler accepts -mavx... " >&6; } if test ${ax_cv_check_cflags___mavx+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavx" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -32390,11 +32408,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavx=yes -else $as_nop - ax_cv_check_cflags___mavx=no +else case e in #( + e) ax_cv_check_cflags___mavx=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx" >&5 printf "%s\n" "$ax_cv_check_cflags___mavx" >&6; } @@ -32403,8 +32423,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -32420,8 +32441,8 @@ printf %s "checking whether C compiler accepts -mavxifma... " >&6; } if test ${ax_cv_check_cflags___mavxifma+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavxifma" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -32438,11 +32459,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavxifma=yes -else $as_nop - ax_cv_check_cflags___mavxifma=no +else case e in #( + e) ax_cv_check_cflags___mavxifma=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavxifma" >&5 printf "%s\n" "$ax_cv_check_cflags___mavxifma" >&6; } @@ -32451,8 +32474,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -32468,8 +32492,8 @@ printf %s "checking whether C compiler accepts -mavxneconvert... " >&6; } if test ${ax_cv_check_cflags___mavxneconvert+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavxneconvert" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -32486,11 +32510,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavxneconvert=yes -else $as_nop - ax_cv_check_cflags___mavxneconvert=no +else case e in #( + e) ax_cv_check_cflags___mavxneconvert=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavxneconvert" >&5 printf "%s\n" "$ax_cv_check_cflags___mavxneconvert" >&6; } @@ -32499,8 +32525,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -32517,8 +32544,8 @@ printf %s "checking whether C compiler accepts -mavxvnni... " >&6; } if test ${ax_cv_check_cflags___mavxvnni+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavxvnni" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -32535,11 +32562,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavxvnni=yes -else $as_nop - ax_cv_check_cflags___mavxvnni=no +else case e in #( + e) ax_cv_check_cflags___mavxvnni=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavxvnni" >&5 printf "%s\n" "$ax_cv_check_cflags___mavxvnni" >&6; } @@ -32548,8 +32577,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -32565,8 +32595,8 @@ printf %s "checking whether C compiler accepts -mavxvnniint8... " >&6; } if test ${ax_cv_check_cflags___mavxvnniint8+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavxvnniint8" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -32583,11 +32613,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavxvnniint8=yes -else $as_nop - ax_cv_check_cflags___mavxvnniint8=no +else case e in #( + e) ax_cv_check_cflags___mavxvnniint8=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavxvnniint8" >&5 printf "%s\n" "$ax_cv_check_cflags___mavxvnniint8" >&6; } @@ -32596,8 +32628,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -32613,8 +32646,8 @@ printf %s "checking whether C compiler accepts -mavxvnniint16... " >&6; } if test ${ax_cv_check_cflags___mavxvnniint16+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavxvnniint16" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -32631,11 +32664,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavxvnniint16=yes -else $as_nop - ax_cv_check_cflags___mavxvnniint16=no +else case e in #( + e) ax_cv_check_cflags___mavxvnniint16=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavxvnniint16" >&5 printf "%s\n" "$ax_cv_check_cflags___mavxvnniint16" >&6; } @@ -32644,8 +32679,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -32662,8 +32698,8 @@ printf %s "checking whether C compiler accepts -mavx2... " >&6; } if test ${ax_cv_check_cflags___mavx2+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavx2" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -32680,11 +32716,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavx2=yes -else $as_nop - ax_cv_check_cflags___mavx2=no +else case e in #( + e) ax_cv_check_cflags___mavx2=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx2" >&5 printf "%s\n" "$ax_cv_check_cflags___mavx2" >&6; } @@ -32693,8 +32731,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -32711,8 +32750,8 @@ printf %s "checking whether C compiler accepts -mavx512f... " >&6; } if test ${ax_cv_check_cflags___mavx512f+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavx512f" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -32729,11 +32768,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavx512f=yes -else $as_nop - ax_cv_check_cflags___mavx512f=no +else case e in #( + e) ax_cv_check_cflags___mavx512f=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512f" >&5 printf "%s\n" "$ax_cv_check_cflags___mavx512f" >&6; } @@ -32742,8 +32783,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -32759,8 +32801,8 @@ printf %s "checking whether C compiler accepts -mavx512cd... " >&6; } if test ${ax_cv_check_cflags___mavx512cd+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavx512cd" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -32777,11 +32819,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavx512cd=yes -else $as_nop - ax_cv_check_cflags___mavx512cd=no +else case e in #( + e) ax_cv_check_cflags___mavx512cd=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512cd" >&5 printf "%s\n" "$ax_cv_check_cflags___mavx512cd" >&6; } @@ -32790,8 +32834,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -32808,8 +32853,8 @@ printf %s "checking whether C compiler accepts -mavx512er... " >&6; } if test ${ax_cv_check_cflags___mavx512er+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavx512er" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -32826,11 +32871,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavx512er=yes -else $as_nop - ax_cv_check_cflags___mavx512er=no +else case e in #( + e) ax_cv_check_cflags___mavx512er=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512er" >&5 printf "%s\n" "$ax_cv_check_cflags___mavx512er" >&6; } @@ -32839,8 +32886,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -32856,8 +32904,8 @@ printf %s "checking whether C compiler accepts -mavx512pf... " >&6; } if test ${ax_cv_check_cflags___mavx512pf+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavx512pf" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -32874,11 +32922,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavx512pf=yes -else $as_nop - ax_cv_check_cflags___mavx512pf=no +else case e in #( + e) ax_cv_check_cflags___mavx512pf=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512pf" >&5 printf "%s\n" "$ax_cv_check_cflags___mavx512pf" >&6; } @@ -32887,8 +32937,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -32905,8 +32956,8 @@ printf %s "checking whether C compiler accepts -mavx5124fmaps... " >&6; } if test ${ax_cv_check_cflags___mavx5124fmaps+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavx5124fmaps" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -32923,11 +32974,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavx5124fmaps=yes -else $as_nop - ax_cv_check_cflags___mavx5124fmaps=no +else case e in #( + e) ax_cv_check_cflags___mavx5124fmaps=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx5124fmaps" >&5 printf "%s\n" "$ax_cv_check_cflags___mavx5124fmaps" >&6; } @@ -32936,8 +32989,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -32953,8 +33007,8 @@ printf %s "checking whether C compiler accepts -mavx5124vnniw... " >&6; } if test ${ax_cv_check_cflags___mavx5124vnniw+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavx5124vnniw" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -32971,11 +33025,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavx5124vnniw=yes -else $as_nop - ax_cv_check_cflags___mavx5124vnniw=no +else case e in #( + e) ax_cv_check_cflags___mavx5124vnniw=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx5124vnniw" >&5 printf "%s\n" "$ax_cv_check_cflags___mavx5124vnniw" >&6; } @@ -32984,8 +33040,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -33002,8 +33059,8 @@ printf %s "checking whether C compiler accepts -mavx512vpopcntdq... " >&6; } if test ${ax_cv_check_cflags___mavx512vpopcntdq+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavx512vpopcntdq" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -33020,11 +33077,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavx512vpopcntdq=yes -else $as_nop - ax_cv_check_cflags___mavx512vpopcntdq=no +else case e in #( + e) ax_cv_check_cflags___mavx512vpopcntdq=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vpopcntdq" >&5 printf "%s\n" "$ax_cv_check_cflags___mavx512vpopcntdq" >&6; } @@ -33033,8 +33092,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -33051,8 +33111,8 @@ printf %s "checking whether C compiler accepts -mavx512vl... " >&6; } if test ${ax_cv_check_cflags___mavx512vl+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavx512vl" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -33069,11 +33129,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavx512vl=yes -else $as_nop - ax_cv_check_cflags___mavx512vl=no +else case e in #( + e) ax_cv_check_cflags___mavx512vl=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vl" >&5 printf "%s\n" "$ax_cv_check_cflags___mavx512vl" >&6; } @@ -33082,8 +33144,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -33099,8 +33162,8 @@ printf %s "checking whether C compiler accepts -mavx512dq... " >&6; } if test ${ax_cv_check_cflags___mavx512dq+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavx512dq" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -33117,11 +33180,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavx512dq=yes -else $as_nop - ax_cv_check_cflags___mavx512dq=no +else case e in #( + e) ax_cv_check_cflags___mavx512dq=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512dq" >&5 printf "%s\n" "$ax_cv_check_cflags___mavx512dq" >&6; } @@ -33130,8 +33195,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -33147,8 +33213,8 @@ printf %s "checking whether C compiler accepts -mavx512bw... " >&6; } if test ${ax_cv_check_cflags___mavx512bw+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavx512bw" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -33165,11 +33231,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavx512bw=yes -else $as_nop - ax_cv_check_cflags___mavx512bw=no +else case e in #( + e) ax_cv_check_cflags___mavx512bw=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512bw" >&5 printf "%s\n" "$ax_cv_check_cflags___mavx512bw" >&6; } @@ -33178,8 +33246,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -33196,8 +33265,8 @@ printf %s "checking whether C compiler accepts -mavx512ifma... " >&6; } if test ${ax_cv_check_cflags___mavx512ifma+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavx512ifma" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -33214,11 +33283,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavx512ifma=yes -else $as_nop - ax_cv_check_cflags___mavx512ifma=no +else case e in #( + e) ax_cv_check_cflags___mavx512ifma=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512ifma" >&5 printf "%s\n" "$ax_cv_check_cflags___mavx512ifma" >&6; } @@ -33227,8 +33298,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -33244,8 +33316,8 @@ printf %s "checking whether C compiler accepts -mavx512vbmi... " >&6; } if test ${ax_cv_check_cflags___mavx512vbmi+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavx512vbmi" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -33262,11 +33334,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavx512vbmi=yes -else $as_nop - ax_cv_check_cflags___mavx512vbmi=no +else case e in #( + e) ax_cv_check_cflags___mavx512vbmi=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vbmi" >&5 printf "%s\n" "$ax_cv_check_cflags___mavx512vbmi" >&6; } @@ -33275,8 +33349,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -33293,8 +33368,8 @@ printf %s "checking whether C compiler accepts -mavx512vnni... " >&6; } if test ${ax_cv_check_cflags___mavx512vnni+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavx512vnni" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -33311,11 +33386,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavx512vnni=yes -else $as_nop - ax_cv_check_cflags___mavx512vnni=no +else case e in #( + e) ax_cv_check_cflags___mavx512vnni=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vnni" >&5 printf "%s\n" "$ax_cv_check_cflags___mavx512vnni" >&6; } @@ -33324,8 +33401,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -33342,8 +33420,8 @@ printf %s "checking whether C compiler accepts -mavx512vbmi2... " >&6; } if test ${ax_cv_check_cflags___mavx512vbmi2+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavx512vbmi2" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -33360,11 +33438,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavx512vbmi2=yes -else $as_nop - ax_cv_check_cflags___mavx512vbmi2=no +else case e in #( + e) ax_cv_check_cflags___mavx512vbmi2=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vbmi2" >&5 printf "%s\n" "$ax_cv_check_cflags___mavx512vbmi2" >&6; } @@ -33373,8 +33453,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -33390,8 +33471,8 @@ printf %s "checking whether C compiler accepts -mavx512bitalg... " >&6; } if test ${ax_cv_check_cflags___mavx512bitalg+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavx512bitalg" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -33408,11 +33489,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavx512bitalg=yes -else $as_nop - ax_cv_check_cflags___mavx512bitalg=no +else case e in #( + e) ax_cv_check_cflags___mavx512bitalg=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512bitalg" >&5 printf "%s\n" "$ax_cv_check_cflags___mavx512bitalg" >&6; } @@ -33421,8 +33504,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi @@ -33439,8 +33523,8 @@ printf %s "checking whether C compiler accepts -mavx512vp2intersect... " >&6; } if test ${ax_cv_check_cflags___mavx512vp2intersect+y} then : printf %s "(cached) " >&6 -else $as_nop - +else case e in #( + e) ax_check_save_flags=$CFLAGS CFLAGS="$CFLAGS -mavx512vp2intersect" cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -33457,11 +33541,13 @@ _ACEOF if ac_fn_c_try_compile "$LINENO" then : ax_cv_check_cflags___mavx512vp2intersect=yes -else $as_nop - ax_cv_check_cflags___mavx512vp2intersect=no +else case e in #( + e) ax_cv_check_cflags___mavx512vp2intersect=no ;; +esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags + CFLAGS=$ax_check_save_flags ;; +esac fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vp2intersect" >&5 printf "%s\n" "$ax_cv_check_cflags___mavx512vp2intersect" >&6; } @@ -33470,8 +33556,9 @@ then : printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS 1" >>confdefs.h -else $as_nop - : +else case e in #( + e) : ;; +esac fi From e0a578caf066b74716af8f3b5b9aa47b03cf3e5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 17 Feb 2025 11:18:31 +0100 Subject: [PATCH 34/78] clinic now supports empty comment lines in Python blocks --- Include/internal/pycore_cpuinfo.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index eecc73736c5f44..3b504da9831cdd 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -107,14 +107,11 @@ def get_member_mask(bit): val = format(1 << bit, '008x') return f'= 0x{val},' -# BUG(picnixz): Clinic does not like when commented lines have empty lines. -# so we use '::' for now to indicate an empty line. -# :: # The enumeration is rendered as follows: -# :: +# # = 0x, // bit = BIT # ^ ^ ^ ^ ^ ^ ^ -# :: +# # where ^ indicates a column that is a multiple of 4, has # exactly 8 characters and has at most 2 characters. @@ -189,7 +186,7 @@ for group, values in data.items(): Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT8 = 0x00000010, // bit = 4 Py_CPUID_MASK_EDX_L7S1_AVX_NE_CONVERT = 0x00000020, // bit = 5 Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT16 = 0x00000400, // bit = 10 -/*[python end generated code: output=e53c5376296af250 input=46c9e43c1f6f5cf9]*/ +/*[python end generated code: output=e53c5376296af250 input=4102387db46d5787]*/ } py_cpuid_feature_mask; // fmt: on From c12f9c74fc6b9e728fee3c34fa308444808c308e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 29 Mar 2025 13:20:30 +0100 Subject: [PATCH 35/78] move cpuinfo enumerations to real invokable Python scripts --- Include/internal/pycore_cpuinfo.h | 180 +----------------- .../internal/pycore_cpuinfo_cpuid_features.h | 102 ++++++++++ .../internal/pycore_cpuinfo_xsave_features.h | 47 +++++ Makefile.pre.in | 2 + PCbuild/pythoncore.vcxproj | 2 + PCbuild/pythoncore.vcxproj.filters | 6 + Tools/cpuinfo/__init__.py | 0 Tools/cpuinfo/_util.py | 18 ++ Tools/cpuinfo/cpuid_features_gen.py | 138 ++++++++++++++ Tools/cpuinfo/xsave_features_gen.py | 59 ++++++ 10 files changed, 378 insertions(+), 176 deletions(-) create mode 100644 Include/internal/pycore_cpuinfo_cpuid_features.h create mode 100644 Include/internal/pycore_cpuinfo_xsave_features.h create mode 100644 Tools/cpuinfo/__init__.py create mode 100644 Tools/cpuinfo/_util.py create mode 100644 Tools/cpuinfo/cpuid_features_gen.py create mode 100644 Tools/cpuinfo/xsave_features_gen.py diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index 3b504da9831cdd..c427d8c1fd3585 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -1,4 +1,6 @@ /* + * @author Bénédikt Tran + * * Interface for detecting the different CPUID flags in an opaque manner. * See https://en.wikipedia.org/wiki/CPUID for details on the bit values. * @@ -22,182 +24,8 @@ extern "C" { #endif #include "Python.h" - -/* - * The enumeration describes masks to apply on CPUID output registers. - * - * Member names are Py_CPUID_MASK__L[S]_, - * where <> (resp. []) denotes a required (resp. optional) group and: - * - * - REGISTER is EAX, EBX, ECX or EDX, - * - LEAF is the initial value of the EAX register (1 or 7), - * - SUBLEAF is the initial value of the ECX register (omitted if 0), and - * - FEATURE is a SIMD feature (with one or more specialized instructions). - * - * For maintainability, the flags are ordered by registers, leafs, subleafs, - * and bits. See https://en.wikipedia.org/wiki/CPUID for the values. - * - * Note 1: The LEAF is also called the 'page' or the 'level'. - * Note 2: The SUBLEAF is also referred to as the 'count'. - * - * The LEAF value should only 1 or 7 as other values may have different - * meanings depending on the underlying architecture. - */ -// fmt: off -typedef enum py_cpuid_feature_mask { -/*[python input] -# {(LEAF, SUBLEAF, REGISTRY): {FEATURE: BIT}} -data = { - (1, 0, 'ECX'): { - 'SSE3': 0, - 'PCLMULQDQ': 1, - 'SSSE3': 9, - 'FMA': 12, - 'SSE4_1': 19, - 'SSE4_2': 20, - 'POPCNT': 23, - 'XSAVE': 26, - 'OSXSAVE': 27, - 'AVX': 28, - }, - (1, 0, 'EDX'): { - 'CMOV': 15, - 'SSE': 25, - 'SSE2': 26, - }, - (7, 0, 'EBX'): { - 'AVX2': 5, - 'AVX512_F': 16, - 'AVX512_DQ': 17, - 'AVX512_IFMA': 21, - 'AVX512_PF': 26, - 'AVX512_ER': 27, - 'AVX512_CD': 28, - 'AVX512_BW': 30, - 'AVX512_VL': 31, - }, - (7, 0, 'ECX'): { - 'AVX512_VBMI': 1, - 'AVX512_VBMI2': 6, - 'AVX512_VNNI': 11, - 'AVX512_BITALG': 12, - 'AVX512_VPOPCNTDQ': 14, - }, - (7, 0, 'EDX'): { - 'AVX512_4VNNIW': 2, - 'AVX512_4FMAPS': 3, - 'AVX512_VP2INTERSECT': 8, - }, - (7, 1, 'EAX'): { - 'AVX_VNNI': 4, - 'AVX_IFMA': 23, - }, - (7, 1, 'EDX'): { - 'AVX_VNNI_INT8': 4, - 'AVX_NE_CONVERT': 5, - 'AVX_VNNI_INT16': 10, - }, -} - -def get_member_name(leaf, subleaf, registry, name): - node = f'L{leaf}S{subleaf}' if subleaf else f'L{leaf}' - return f'Py_CPUID_MASK_{registry}_{node}_{name}' - -def get_member_mask(bit): - val = format(1 << bit, '008x') - return f'= 0x{val},' - -# The enumeration is rendered as follows: -# -# = 0x, // bit = BIT -# ^ ^ ^ ^ ^ ^ ^ -# -# where ^ indicates a column that is a multiple of 4, has -# exactly 8 characters and has at most 2 characters. - -INDENT = ' ' * 4 -# BUG(picnixz): Clinic does not like when '/' and '*' are put together. -COMMENT = '/' + '* ' - -def next_block(w): - """Compute the smallest multiple of 4 strictly larger than *w*.""" - return ((w + 3) & ~0x03) if (w % 4) else (w + 4) - -NAMESIZE = next_block(max( - len(get_member_name(*group, name)) - for group, values in data.items() - for name in values -)) -MASKSIZE = 8 + next_block(len('= 0x,')) - -for group, values in data.items(): - title = 'CPUID (LEAF={}, SUBLEAF={}) [{}]'.format(*group) - print(INDENT, *COMMENT, title, *COMMENT[::-1], sep='') - for name, bit in values.items(): - assert name, f"invalid entry in {group}" - key = get_member_name(*group, name) - assert 0 <= bit < 32, f"invalid bit value for {name!r}" - val = get_member_mask(bit) - - member_name = key.ljust(NAMESIZE) - member_mask = val.ljust(MASKSIZE) - - print(INDENT, member_name, member_mask, f'// bit = {bit}', sep='') -[python start generated code]*/ - /* CPUID (LEAF=1, SUBLEAF=0) [ECX] */ - Py_CPUID_MASK_ECX_L1_SSE3 = 0x00000001, // bit = 0 - Py_CPUID_MASK_ECX_L1_PCLMULQDQ = 0x00000002, // bit = 1 - Py_CPUID_MASK_ECX_L1_SSSE3 = 0x00000200, // bit = 9 - Py_CPUID_MASK_ECX_L1_FMA = 0x00001000, // bit = 12 - Py_CPUID_MASK_ECX_L1_SSE4_1 = 0x00080000, // bit = 19 - Py_CPUID_MASK_ECX_L1_SSE4_2 = 0x00100000, // bit = 20 - Py_CPUID_MASK_ECX_L1_POPCNT = 0x00800000, // bit = 23 - Py_CPUID_MASK_ECX_L1_XSAVE = 0x04000000, // bit = 26 - Py_CPUID_MASK_ECX_L1_OSXSAVE = 0x08000000, // bit = 27 - Py_CPUID_MASK_ECX_L1_AVX = 0x10000000, // bit = 28 - /* CPUID (LEAF=1, SUBLEAF=0) [EDX] */ - Py_CPUID_MASK_EDX_L1_CMOV = 0x00008000, // bit = 15 - Py_CPUID_MASK_EDX_L1_SSE = 0x02000000, // bit = 25 - Py_CPUID_MASK_EDX_L1_SSE2 = 0x04000000, // bit = 26 - /* CPUID (LEAF=7, SUBLEAF=0) [EBX] */ - Py_CPUID_MASK_EBX_L7_AVX2 = 0x00000020, // bit = 5 - Py_CPUID_MASK_EBX_L7_AVX512_F = 0x00010000, // bit = 16 - Py_CPUID_MASK_EBX_L7_AVX512_DQ = 0x00020000, // bit = 17 - Py_CPUID_MASK_EBX_L7_AVX512_IFMA = 0x00200000, // bit = 21 - Py_CPUID_MASK_EBX_L7_AVX512_PF = 0x04000000, // bit = 26 - Py_CPUID_MASK_EBX_L7_AVX512_ER = 0x08000000, // bit = 27 - Py_CPUID_MASK_EBX_L7_AVX512_CD = 0x10000000, // bit = 28 - Py_CPUID_MASK_EBX_L7_AVX512_BW = 0x40000000, // bit = 30 - Py_CPUID_MASK_EBX_L7_AVX512_VL = 0x80000000, // bit = 31 - /* CPUID (LEAF=7, SUBLEAF=0) [ECX] */ - Py_CPUID_MASK_ECX_L7_AVX512_VBMI = 0x00000002, // bit = 1 - Py_CPUID_MASK_ECX_L7_AVX512_VBMI2 = 0x00000040, // bit = 6 - Py_CPUID_MASK_ECX_L7_AVX512_VNNI = 0x00000800, // bit = 11 - Py_CPUID_MASK_ECX_L7_AVX512_BITALG = 0x00001000, // bit = 12 - Py_CPUID_MASK_ECX_L7_AVX512_VPOPCNTDQ = 0x00004000, // bit = 14 - /* CPUID (LEAF=7, SUBLEAF=0) [EDX] */ - Py_CPUID_MASK_EDX_L7_AVX512_4VNNIW = 0x00000004, // bit = 2 - Py_CPUID_MASK_EDX_L7_AVX512_4FMAPS = 0x00000008, // bit = 3 - Py_CPUID_MASK_EDX_L7_AVX512_VP2INTERSECT = 0x00000100, // bit = 8 - /* CPUID (LEAF=7, SUBLEAF=1) [EAX] */ - Py_CPUID_MASK_EAX_L7S1_AVX_VNNI = 0x00000010, // bit = 4 - Py_CPUID_MASK_EAX_L7S1_AVX_IFMA = 0x00800000, // bit = 23 - /* CPUID (LEAF=7, SUBLEAF=1) [EDX] */ - Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT8 = 0x00000010, // bit = 4 - Py_CPUID_MASK_EDX_L7S1_AVX_NE_CONVERT = 0x00000020, // bit = 5 - Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT16 = 0x00000400, // bit = 10 -/*[python end generated code: output=e53c5376296af250 input=4102387db46d5787]*/ -} py_cpuid_feature_mask; -// fmt: on - -/* XSAVE state components (XCR0 control register) */ -typedef enum py_xsave_feature_mask { - Py_XSAVE_MASK_XCR0_SSE = 0x00000002, // bit = 1 - Py_XSAVE_MASK_XCR0_AVX = 0x00000004, // bit = 2 - Py_XSAVE_MASK_XCR0_AVX512_OPMASK = 0x00000020, // bit = 5 - Py_XSAVE_MASK_XCR0_AVX512_ZMM_HI256 = 0x00000040, // bit = 6 - Py_XSAVE_MASK_XCR0_AVX512_HI16_ZMM = 0x00000080, // bit = 7 -} py_xsave_feature_mask; +#include "pycore_cpuinfo_cpuid_features.h" +#include "pycore_cpuinfo_xsave_features.h" typedef struct py_cpuid_features { uint32_t maxleaf; diff --git a/Include/internal/pycore_cpuinfo_cpuid_features.h b/Include/internal/pycore_cpuinfo_cpuid_features.h new file mode 100644 index 00000000000000..a67a1472bfb85f --- /dev/null +++ b/Include/internal/pycore_cpuinfo_cpuid_features.h @@ -0,0 +1,102 @@ +/* + * @author Bénédikt Tran + * @seealso Tools/cpuinfo/cpuid_features_gen.py + * + * The enumeration describes masks to apply on CPUID output registers. + * + * Member names are Py_CPUID_MASK__L[S]_, + * where <> (resp. []) denotes a required (resp. optional) group and: + * + * - REGISTER is EAX, EBX, ECX or EDX, + * - LEAF is the initial value of the EAX register (1 or 7), + * - SUBLEAF is the initial value of the ECX register (omitted if 0), and + * - FEATURE is a SIMD feature (with one or more specialized instructions). + * + * For maintainability, the flags are ordered by registers, leafs, subleafs, + * and bits. See https://en.wikipedia.org/wiki/CPUID for the values. + * + * Note 1: The LEAF is also called the 'page' or the 'level'. + * Note 2: The SUBLEAF is also referred to as the 'count'. + * + * The LEAF value should only 1 or 7 as other values may have different + * meanings depending on the underlying architecture. + */ + +#ifndef Py_INTERNAL_CPUINFO_CPUID_FEATURES_H +#define Py_INTERNAL_CPUINFO_CPUID_FEATURES_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "Python.h" + +// fmt: off +/*[python input] +import importlib +import os +import sys + +ROOT = os.getcwd() +TOOL = os.path.join(ROOT, 'Tools/cpuinfo/cpuid_features_gen.py') +TOOL = os.path.realpath(TOOL) + +if not os.path.exists(TOOL): + raise FileNotFoundError(TOOL) + +sys.path.insert(0, os.path.dirname(os.path.dirname(TOOL))) +module = importlib.import_module('cpuinfo.cpuid_features_gen') +print(module.generate_cpuid_features_enum("py_cpuid_feature_mask")) +[python start generated code]*/ +typedef enum py_cpuid_feature_mask { + /* CPUID (LEAF=1, SUBLEAF=0) [ECX] */ + Py_CPUID_MASK_ECX_L1_SSE3 = 0x00000001, // bit = 0 + Py_CPUID_MASK_ECX_L1_PCLMULQDQ = 0x00000002, // bit = 1 + Py_CPUID_MASK_ECX_L1_SSSE3 = 0x00000200, // bit = 9 + Py_CPUID_MASK_ECX_L1_FMA = 0x00001000, // bit = 12 + Py_CPUID_MASK_ECX_L1_SSE4_1 = 0x00080000, // bit = 19 + Py_CPUID_MASK_ECX_L1_SSE4_2 = 0x00100000, // bit = 20 + Py_CPUID_MASK_ECX_L1_POPCNT = 0x00800000, // bit = 23 + Py_CPUID_MASK_ECX_L1_XSAVE = 0x04000000, // bit = 26 + Py_CPUID_MASK_ECX_L1_OSXSAVE = 0x08000000, // bit = 27 + Py_CPUID_MASK_ECX_L1_AVX = 0x10000000, // bit = 28 + /* CPUID (LEAF=1, SUBLEAF=0) [EDX] */ + Py_CPUID_MASK_EDX_L1_CMOV = 0x00008000, // bit = 15 + Py_CPUID_MASK_EDX_L1_SSE = 0x02000000, // bit = 25 + Py_CPUID_MASK_EDX_L1_SSE2 = 0x04000000, // bit = 26 + /* CPUID (LEAF=7, SUBLEAF=0) [EBX] */ + Py_CPUID_MASK_EBX_L7_AVX2 = 0x00000020, // bit = 5 + Py_CPUID_MASK_EBX_L7_AVX512_F = 0x00010000, // bit = 16 + Py_CPUID_MASK_EBX_L7_AVX512_DQ = 0x00020000, // bit = 17 + Py_CPUID_MASK_EBX_L7_AVX512_IFMA = 0x00200000, // bit = 21 + Py_CPUID_MASK_EBX_L7_AVX512_PF = 0x04000000, // bit = 26 + Py_CPUID_MASK_EBX_L7_AVX512_ER = 0x08000000, // bit = 27 + Py_CPUID_MASK_EBX_L7_AVX512_CD = 0x10000000, // bit = 28 + Py_CPUID_MASK_EBX_L7_AVX512_BW = 0x40000000, // bit = 30 + Py_CPUID_MASK_EBX_L7_AVX512_VL = 0x80000000, // bit = 31 + /* CPUID (LEAF=7, SUBLEAF=0) [ECX] */ + Py_CPUID_MASK_ECX_L7_AVX512_VBMI = 0x00000002, // bit = 1 + Py_CPUID_MASK_ECX_L7_AVX512_VBMI2 = 0x00000040, // bit = 6 + Py_CPUID_MASK_ECX_L7_AVX512_VNNI = 0x00000800, // bit = 11 + Py_CPUID_MASK_ECX_L7_AVX512_BITALG = 0x00001000, // bit = 12 + Py_CPUID_MASK_ECX_L7_AVX512_VPOPCNTDQ = 0x00004000, // bit = 14 + /* CPUID (LEAF=7, SUBLEAF=0) [EDX] */ + Py_CPUID_MASK_EDX_L7_AVX512_4VNNIW = 0x00000004, // bit = 2 + Py_CPUID_MASK_EDX_L7_AVX512_4FMAPS = 0x00000008, // bit = 3 + Py_CPUID_MASK_EDX_L7_AVX512_VP2INTERSECT = 0x00000100, // bit = 8 + /* CPUID (LEAF=7, SUBLEAF=1) [EAX] */ + Py_CPUID_MASK_EAX_L7S1_AVX_VNNI = 0x00000010, // bit = 4 + Py_CPUID_MASK_EAX_L7S1_AVX_IFMA = 0x00800000, // bit = 23 + /* CPUID (LEAF=7, SUBLEAF=1) [EDX] */ + Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT8 = 0x00000010, // bit = 4 + Py_CPUID_MASK_EDX_L7S1_AVX_NE_CONVERT = 0x00000020, // bit = 5 + Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT16 = 0x00000400, // bit = 10 +} py_cpuid_feature_mask; +/*[python end generated code: output=c4460242e465fa91 input=a07f431329efd11e]*/ +// fmt: on + +#endif // !Py_INTERNAL_CPUINFO_CPUID_FEATURES_H diff --git a/Include/internal/pycore_cpuinfo_xsave_features.h b/Include/internal/pycore_cpuinfo_xsave_features.h new file mode 100644 index 00000000000000..42097d43529deb --- /dev/null +++ b/Include/internal/pycore_cpuinfo_xsave_features.h @@ -0,0 +1,47 @@ +/* + * @author Bénédikt Tran + * @seealso Tools/cpuinfo/xsave_features_gen.py + * + * XSAVE state components (XCR0 control register) + */ +#ifndef Py_INTERNAL_CPUINFO_XSAVE_FEATURES_H +#define Py_INTERNAL_CPUINFO_XSAVE_FEATURES_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "Python.h" + +// fmt: off +/*[python input] +import importlib +import os +import sys + +ROOT = os.getcwd() +TOOL = os.path.join(ROOT, 'Tools/cpuinfo/xsave_features_gen.py') +TOOL = os.path.realpath(TOOL) + +if not os.path.exists(TOOL): + raise FileNotFoundError(TOOL) + +sys.path.insert(0, os.path.dirname(os.path.dirname(TOOL))) +module = importlib.import_module('cpuinfo.xsave_features_gen') +print(module.generate_xsave_features_enum("py_xsave_feature_mask")) +[python start generated code]*/ +typedef enum py_xsave_feature_mask { + Py_XSAVE_MASK_XCR0_SSE = 0x00000002, // bit = 1 + Py_XSAVE_MASK_XCR0_AVX = 0x00000004, // bit = 2 + Py_XSAVE_MASK_XCR0_AVX512_OPMASK = 0x00000020, // bit = 5 + Py_XSAVE_MASK_XCR0_AVX512_ZMM_HI256 = 0x00000040, // bit = 6 + Py_XSAVE_MASK_XCR0_AVX512_HI16_ZMM = 0x00000080, // bit = 7 +} py_xsave_feature_mask; +/*[python end generated code: output=9a476ed0abbc617b input=78e3d4ff6b796edb]*/ +// fmt: on + +#endif // !Py_INTERNAL_CPUINFO_XSAVE_FEATURES_H diff --git a/Makefile.pre.in b/Makefile.pre.in index 0211ae1804afcf..f23f34c7774018 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1223,6 +1223,8 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_condvar.h \ $(srcdir)/Include/internal/pycore_context.h \ $(srcdir)/Include/internal/pycore_cpuinfo.h \ + $(srcdir)/Include/internal/pycore_cpuinfo_cpuid_features.h \ + $(srcdir)/Include/internal/pycore_cpuinfo_xsave_features.h \ $(srcdir)/Include/internal/pycore_critical_section.h \ $(srcdir)/Include/internal/pycore_crossinterp.h \ $(srcdir)/Include/internal/pycore_crossinterp_data_registry.h \ diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 24c8996e9ebc72..7a0ff28ad0fd59 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -228,6 +228,8 @@ + + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 88845d289c2235..3a8b043f8b9f50 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -603,6 +603,12 @@ Include\cpython + + Include\cpython + + + Include\cpython + Include\internal diff --git a/Tools/cpuinfo/__init__.py b/Tools/cpuinfo/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/Tools/cpuinfo/_util.py b/Tools/cpuinfo/_util.py new file mode 100644 index 00000000000000..9aef599bd8f0e5 --- /dev/null +++ b/Tools/cpuinfo/_util.py @@ -0,0 +1,18 @@ +from __future__ import annotations + +__all__ = ["next_block", "make_enum_member"] + + +def next_block(w: int) -> int: + """Compute the smallest multiple of 4 strictly larger than *w*.""" + return ((w + 3) & ~0x03) if (w % 4) else (w + 4) + + +_MASKSIZE: Final[int] = next_block(len("= 0x00000000,")) + + +def make_enum_member(key: str, bit: int, name_maxsize: int) -> str: + member_name = key.ljust(name_maxsize) + member_mask = format(1 << bit, "008x") + member_mask = f"= 0x{member_mask},".ljust(_MASKSIZE) + return f"{member_name}{member_mask} // bit = {bit}" diff --git a/Tools/cpuinfo/cpuid_features_gen.py b/Tools/cpuinfo/cpuid_features_gen.py new file mode 100644 index 00000000000000..da5dc005bd2bf7 --- /dev/null +++ b/Tools/cpuinfo/cpuid_features_gen.py @@ -0,0 +1,138 @@ +""" +Generate an enumeration describing masks to apply on CPUID output registers. + +Member names are Py_CPUID_MASK__L[S]_, +where <> (resp. []) denotes a required (resp. optional) group and: + +- REGISTER is EAX, EBX, ECX or EDX, +- LEAF is the initial value of the EAX register (1 or 7), +- SUBLEAF is the initial value of the ECX register (omitted if 0), and +- FEATURE is a SIMD feature (with one or more specialized instructions). + +For maintainability, the flags are ordered by registers, leafs, subleafs, +and bits. See https://en.wikipedia.org/wiki/CPUID for the values. + +Note 1: The LEAF is also called the 'page' or the 'level'. +Note 2: The SUBLEAF is also referred to as the 'count'. + +The LEAF value should only 1 or 7 as other values may have different +meanings depending on the underlying architecture. +""" + +from __future__ import annotations + +__all__ = ["generate_cpuid_features_enum"] + +from functools import partial +from io import StringIO +from typing import TYPE_CHECKING +from . import _util as util + +if TYPE_CHECKING: + from typing import Final, IO + + type Leaf = int + type SubLeaf = int + type Registry = str + type FeatureFamily = tuple[Leaf, SubLeaf, Registry] + + type Feature = str + type Bit = int + +CPUID_FEATURES: Final[dict[CPUIDFeatureFamily, dict[Feature, Bit]]] = { + (1, 0, "ECX"): { + "SSE3": 0, + "PCLMULQDQ": 1, + "SSSE3": 9, + "FMA": 12, + "SSE4_1": 19, + "SSE4_2": 20, + "POPCNT": 23, + "XSAVE": 26, + "OSXSAVE": 27, + "AVX": 28, + }, + (1, 0, "EDX"): { + "CMOV": 15, + "SSE": 25, + "SSE2": 26, + }, + (7, 0, "EBX"): { + "AVX2": 5, + "AVX512_F": 16, + "AVX512_DQ": 17, + "AVX512_IFMA": 21, + "AVX512_PF": 26, + "AVX512_ER": 27, + "AVX512_CD": 28, + "AVX512_BW": 30, + "AVX512_VL": 31, + }, + (7, 0, "ECX"): { + "AVX512_VBMI": 1, + "AVX512_VBMI2": 6, + "AVX512_VNNI": 11, + "AVX512_BITALG": 12, + "AVX512_VPOPCNTDQ": 14, + }, + (7, 0, "EDX"): { + "AVX512_4VNNIW": 2, + "AVX512_4FMAPS": 3, + "AVX512_VP2INTERSECT": 8, + }, + (7, 1, "EAX"): { + "AVX_VNNI": 4, + "AVX_IFMA": 23, + }, + (7, 1, "EDX"): { + "AVX_VNNI_INT8": 4, + "AVX_NE_CONVERT": 5, + "AVX_VNNI_INT16": 10, + }, +} + + +def get_member_name( + leaf: Leaf, subleaf: SubLeaf, registry: Registry, name: Feature +) -> str: + node = f"L{leaf}S{subleaf}" if subleaf else f"L{leaf}" + return f"Py_CPUID_MASK_{registry}_{node}_{name}" + + +NAMESIZE: Final[int] = util.next_block( + max( + len(get_member_name(*family, name)) + for family, values in CPUID_FEATURES.items() + for name in values + ) +) + + +def generate_cpuid_features_enum(enum_name: str) -> str: + # The enumeration is rendered as follows: + # + # = 0x, // bit = BIT + # ^ ^ ^ ^ ^ ^ ^ + # + # where ^ indicates a column that is a multiple of 4, has + # exactly 8 characters and has at most 2 characters. + + output = StringIO() + write = partial(print, file=output) + indent = " " * 4 + + write(f"typedef enum {enum_name} {{") + for family, values in CPUID_FEATURES.items(): + leaf, subleaf, registry = family + title = f"CPUID (LEAF={leaf}, SUBLEAF={subleaf}) [{registry}]" + write(indent, "/* ", title, " */", sep="") + for feature_name, bit in values.items(): + if not feature_name: + raise ValueError(f"invalid entry for {family}") + if not 0 <= bit < 32: + raise ValueError(f"invalid bit value for {feature_name!r}") + key = get_member_name(leaf, subleaf, registry, feature_name) + member_def = util.make_enum_member(key, bit, NAMESIZE) + write(indent, member_def, sep="") + write(f"}} {enum_name};") + return output.getvalue().rstrip("\n") diff --git a/Tools/cpuinfo/xsave_features_gen.py b/Tools/cpuinfo/xsave_features_gen.py new file mode 100644 index 00000000000000..fdcbcd2b51af27 --- /dev/null +++ b/Tools/cpuinfo/xsave_features_gen.py @@ -0,0 +1,59 @@ +""" +Generate enumeration for XSAVE state components (XCR0 control register). +""" + +from __future__ import annotations + +__all__ = ["generate_xsave_features_enum"] + +from functools import partial +from io import StringIO +from typing import TYPE_CHECKING +from . import _util as util + +if TYPE_CHECKING: + from typing import Final + + type Feature = str + type Bit = int + +XSAVE_FEATURES: Final[dict[Feature, Bit]] = { + "SSE": 1, + "AVX": 2, + "AVX512_OPMASK": 5, + "AVX512_ZMM_HI256": 6, + "AVX512_HI16_ZMM": 7, +} + + +def get_member_name(feature: Feature) -> str: + return f"Py_XSAVE_MASK_XCR0_{feature}" + + +NAMESIZE: Final[int] = util.next_block( + max(map(len, map(get_member_name, XSAVE_FEATURES))) +) + + +def generate_xsave_features_enum(enum_name: str) -> str: + # The enumeration is rendered as follows: + # + # = 0x, // bit = BIT + # ^ ^ ^ ^ ^ ^ ^ + # + # where ^ indicates a column that is a multiple of 4, has + # exactly 8 characters and has at most 2 characters. + + output = StringIO() + write = partial(print, file=output) + indent = " " * 4 + + write(f"typedef enum {enum_name} {{") + for feature_name, bit in XSAVE_FEATURES.items(): + if not 0 <= bit < 32: + raise ValueError(f"invalid bit value for {feature_name!r}") + key = get_member_name(feature_name) + member_def = util.make_enum_member(key, bit, NAMESIZE) + write(indent, member_def, sep="") + write(f"}} {enum_name};") + return output.getvalue().rstrip("\n") From bd3589feb1adb6f0e8bd0387c784e8e3bcc99cf7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 5 Apr 2025 13:07:28 +0200 Subject: [PATCH 36/78] add comments --- Include/internal/pycore_cpuinfo_cpuid_features.h | 6 +++--- Include/internal/pycore_cpuinfo_xsave_features.h | 10 ++++++---- Tools/cpuinfo/cpuid_features_gen.py | 12 +++++++++++- Tools/cpuinfo/xsave_features_gen.py | 9 +++++++++ 4 files changed, 29 insertions(+), 8 deletions(-) diff --git a/Include/internal/pycore_cpuinfo_cpuid_features.h b/Include/internal/pycore_cpuinfo_cpuid_features.h index a67a1472bfb85f..b8c3eb38f0d0e4 100644 --- a/Include/internal/pycore_cpuinfo_cpuid_features.h +++ b/Include/internal/pycore_cpuinfo_cpuid_features.h @@ -42,14 +42,14 @@ import os import sys ROOT = os.getcwd() -TOOL = os.path.join(ROOT, 'Tools/cpuinfo/cpuid_features_gen.py') +TOOL = os.path.join(ROOT, "Tools/cpuinfo/cpuid_features_gen.py") TOOL = os.path.realpath(TOOL) if not os.path.exists(TOOL): raise FileNotFoundError(TOOL) sys.path.insert(0, os.path.dirname(os.path.dirname(TOOL))) -module = importlib.import_module('cpuinfo.cpuid_features_gen') +module = importlib.import_module("cpuinfo.cpuid_features_gen") print(module.generate_cpuid_features_enum("py_cpuid_feature_mask")) [python start generated code]*/ typedef enum py_cpuid_feature_mask { @@ -96,7 +96,7 @@ typedef enum py_cpuid_feature_mask { Py_CPUID_MASK_EDX_L7S1_AVX_NE_CONVERT = 0x00000020, // bit = 5 Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT16 = 0x00000400, // bit = 10 } py_cpuid_feature_mask; -/*[python end generated code: output=c4460242e465fa91 input=a07f431329efd11e]*/ +/*[python end generated code: output=c4460242e465fa91 input=61d2b5f1bc368b94]*/ // fmt: on #endif // !Py_INTERNAL_CPUINFO_CPUID_FEATURES_H diff --git a/Include/internal/pycore_cpuinfo_xsave_features.h b/Include/internal/pycore_cpuinfo_xsave_features.h index 42097d43529deb..e81e1ab76557df 100644 --- a/Include/internal/pycore_cpuinfo_xsave_features.h +++ b/Include/internal/pycore_cpuinfo_xsave_features.h @@ -2,7 +2,9 @@ * @author Bénédikt Tran * @seealso Tools/cpuinfo/xsave_features_gen.py * - * XSAVE state components (XCR0 control register) + * XSAVE state components (XCR0 control register). + * + * See https://en.wikipedia.org/wiki/Control_register#XCR0_and_XSS. */ #ifndef Py_INTERNAL_CPUINFO_XSAVE_FEATURES_H #define Py_INTERNAL_CPUINFO_XSAVE_FEATURES_H @@ -24,14 +26,14 @@ import os import sys ROOT = os.getcwd() -TOOL = os.path.join(ROOT, 'Tools/cpuinfo/xsave_features_gen.py') +TOOL = os.path.join(ROOT, "Tools/cpuinfo/xsave_features_gen.py") TOOL = os.path.realpath(TOOL) if not os.path.exists(TOOL): raise FileNotFoundError(TOOL) sys.path.insert(0, os.path.dirname(os.path.dirname(TOOL))) -module = importlib.import_module('cpuinfo.xsave_features_gen') +module = importlib.import_module("cpuinfo.xsave_features_gen") print(module.generate_xsave_features_enum("py_xsave_feature_mask")) [python start generated code]*/ typedef enum py_xsave_feature_mask { @@ -41,7 +43,7 @@ typedef enum py_xsave_feature_mask { Py_XSAVE_MASK_XCR0_AVX512_ZMM_HI256 = 0x00000040, // bit = 6 Py_XSAVE_MASK_XCR0_AVX512_HI16_ZMM = 0x00000080, // bit = 7 } py_xsave_feature_mask; -/*[python end generated code: output=9a476ed0abbc617b input=78e3d4ff6b796edb]*/ +/*[python end generated code: output=9a476ed0abbc617b input=41f35058299c0118]*/ // fmt: on #endif // !Py_INTERNAL_CPUINFO_XSAVE_FEATURES_H diff --git a/Tools/cpuinfo/cpuid_features_gen.py b/Tools/cpuinfo/cpuid_features_gen.py index da5dc005bd2bf7..f23a68c141b696 100644 --- a/Tools/cpuinfo/cpuid_features_gen.py +++ b/Tools/cpuinfo/cpuid_features_gen.py @@ -17,6 +17,8 @@ The LEAF value should only 1 or 7 as other values may have different meanings depending on the underlying architecture. + +.. seealso:: Include/internal/pycore_cpuinfo_cpuid_features.h """ from __future__ import annotations @@ -39,7 +41,8 @@ type Feature = str type Bit = int -CPUID_FEATURES: Final[dict[CPUIDFeatureFamily, dict[Feature, Bit]]] = { +CPUID_FEATURES: Final[dict[FeatureFamily, dict[Feature, Bit]]] = { + # See https://en.wikipedia.org/wiki/CPUID#EAX=1:_Processor_Info_and_Feature_Bits. (1, 0, "ECX"): { "SSE3": 0, "PCLMULQDQ": 1, @@ -57,6 +60,7 @@ "SSE": 25, "SSE2": 26, }, + # See https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=0:_Extended_Features. (7, 0, "EBX"): { "AVX2": 5, "AVX512_F": 16, @@ -80,6 +84,7 @@ "AVX512_4FMAPS": 3, "AVX512_VP2INTERSECT": 8, }, + # See https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=1:_Extended_Features. (7, 1, "EAX"): { "AVX_VNNI": 4, "AVX_IFMA": 23, @@ -109,6 +114,11 @@ def get_member_name( def generate_cpuid_features_enum(enum_name: str) -> str: + """Used by Include/internal/pycore_cpuinfo_cpuid_features.h. + + The C enumeration is generated by this function and Argument Clinic. + """ + # The enumeration is rendered as follows: # # = 0x, // bit = BIT diff --git a/Tools/cpuinfo/xsave_features_gen.py b/Tools/cpuinfo/xsave_features_gen.py index fdcbcd2b51af27..bacb4e8b4344a8 100644 --- a/Tools/cpuinfo/xsave_features_gen.py +++ b/Tools/cpuinfo/xsave_features_gen.py @@ -1,5 +1,9 @@ """ Generate enumeration for XSAVE state components (XCR0 control register). + +See https://en.wikipedia.org/wiki/Control_register#XCR0_and_XSS. + +.. seealso:: Include/internal/pycore_cpuinfo_xsave_features.h """ from __future__ import annotations @@ -36,6 +40,11 @@ def get_member_name(feature: Feature) -> str: def generate_xsave_features_enum(enum_name: str) -> str: + """Used by Include/internal/pycore_cpuinfo_xsave_features.h. + + The C enumeration is generated by this function and Argument Clinic. + """ + # The enumeration is rendered as follows: # # = 0x, // bit = BIT From d213b67c423743b084901339bcd22d970599da50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 8 Apr 2025 12:29:28 +0200 Subject: [PATCH 37/78] update C comments --- Include/internal/pycore_cpuinfo.h | 10 +++++++--- Python/cpuinfo.c | 6 ++++++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index c427d8c1fd3585..57ad48efb038c0 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -29,7 +29,11 @@ extern "C" { typedef struct py_cpuid_features { uint32_t maxleaf; - /* Macro to declare a member flag of 'py_cpuid_features' as a uint8_t. */ + /* + * Macro to declare a member flag of 'py_cpuid_features' as a uint8_t. + * Whenever this macro is used, do not forget to update the number of + * fields and the bitsize of the 'ready' member (see structure end). + */ #define _Py_CPUID_DECL_FLAG(MEMBER_NAME) uint8_t MEMBER_NAME:1 // --- Streaming SIMD Extensions ------------------------------------------ _Py_CPUID_DECL_FLAG(sse); @@ -94,8 +98,8 @@ typedef struct py_cpuid_features { _Py_CPUID_DECL_FLAG(popcnt); _Py_CPUID_DECL_FLAG(pclmulqdq); - _Py_CPUID_DECL_FLAG(xsave); // XSAVE/XRSTOR/XSETBV/XGETBV - _Py_CPUID_DECL_FLAG(osxsave); // XSAVE is enabled by the OS + _Py_CPUID_DECL_FLAG(xsave); // XSAVE/XRSTOR/XSETBV/XGETBV + _Py_CPUID_DECL_FLAG(osxsave); // XSAVE is enabled by the OS // --- XCR0 register bits ------------------------------------------------- _Py_CPUID_DECL_FLAG(xcr0_sse); diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 7181cc019d4a1c..0f934d04d76446 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -162,6 +162,7 @@ detect_cpuid_maxleaf(void) static inline void detect_cpuid_features(py_cpuid_features *flags, CPUID_REG ecx, CPUID_REG edx) { + assert(flags->maxleaf >= 1); // Keep the ordering and newlines as they are declared in the structure. #ifdef SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD #ifdef Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS @@ -206,6 +207,7 @@ static inline void detect_cpuid_extended_features_L7S0(py_cpuid_features *flags, CPUID_REG ebx, CPUID_REG ecx, CPUID_REG edx) { + assert(flags->maxleaf >= 7); (void)ebx, (void)ecx, (void)edx; // to suppress unused warnings // Keep the ordering and newlines as they are declared in the structure. #ifdef SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD @@ -282,6 +284,7 @@ detect_cpuid_extended_features_L7S1(py_cpuid_features *flags, CPUID_REG ecx, CPUID_REG edx) { + assert(flags->maxleaf >= 7); (void)eax, (void)ebx, (void)ecx, (void)edx; // to suppress unused warnings // Keep the ordering and newlines as they are declared in the structure. #ifdef SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD @@ -309,6 +312,7 @@ static inline void detect_cpuid_xsave_state(py_cpuid_features *flags) { // Keep the ordering and newlines as they are declared in the structure. + assert(flags->maxleaf >= 1); #ifdef HAS_XGETBV_SUPPORT uint64_t xcr0 = flags->osxsave ? get_xgetbv(0) : 0; flags->xcr0_sse = XSAVE_CHECK_REG(xcr0, XCR0_SSE); @@ -501,6 +505,7 @@ cpuid_detect_l1_features(py_cpuid_features *flags) static inline void cpuid_detect_l7s0_features(py_cpuid_features *flags) { + assert(flags->maxleaf >= 7); CPUID_REG eax = 0, ebx = 0, ecx = 0, edx = 0; get_cpuid_info(7, 0, &eax, &ebx, &ecx, &edx); detect_cpuid_extended_features_L7S0(flags, ebx, ecx, edx); @@ -513,6 +518,7 @@ cpuid_detect_l7s0_features(py_cpuid_features *flags) static inline void cpuid_detect_l7s1_features(py_cpuid_features *flags) { + assert(flags->maxleaf >= 7); CPUID_REG eax = 0, ebx = 0, ecx = 0, edx = 0; get_cpuid_info(7, 1, &eax, &ebx, &ecx, &edx); detect_cpuid_extended_features_L7S1(flags, eax, ebx, ecx, edx); From 19b7d86e374fa94529b110cb57089511c37c1971 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 8 Apr 2025 12:43:13 +0200 Subject: [PATCH 38/78] TMP: usage proof-of-concept --- Modules/blake2module.c | 132 +++++++++-------------------------------- Modules/hmacmodule.c | 67 +++------------------ 2 files changed, 35 insertions(+), 164 deletions(-) diff --git a/Modules/blake2module.c b/Modules/blake2module.c index 0b0642c1e04e5a..4a9f16c3007b23 100644 --- a/Modules/blake2module.c +++ b/Modules/blake2module.c @@ -16,29 +16,11 @@ #include "pyconfig.h" #include "Python.h" #include "hashlib.h" -#include "pycore_strhex.h" // _Py_strhex() +#include "pycore_cpuinfo.h" // py_cpuid_features +#include "pycore_strhex.h" // _Py_strhex() #include "pycore_typeobject.h" #include "pycore_moduleobject.h" -// QUICK CPU AUTODETECTION -// -// See https://github.com/python/cpython/pull/119316 -- we only enable -// vectorized versions for Intel CPUs, even though HACL*'s "vec128" modules also -// run on ARM NEON. (We could enable them on POWER -- but I don't have access to -// a test machine to see if that speeds anything up.) -// -// Note that configure.ac and the rest of the build are written in such a way -// that if the configure script finds suitable flags to compile HACL's SIMD128 -// (resp. SIMD256) files, then Hacl_Hash_Blake2b_Simd128.c (resp. ...) will be -// pulled into the build automatically, and then only the CPU autodetection will -// need to be updated here. - -#if defined(__x86_64__) && defined(__GNUC__) -#include -#elif defined(_M_X64) -#include -#endif - #include // SIMD256 can't be compiled on macOS ARM64, and performance of SIMD128 isn't @@ -51,83 +33,6 @@ # undef HACL_CAN_COMPILE_SIMD256 #endif -// ECX -#define ECX_SSE3 (1 << 0) -#define ECX_SSSE3 (1 << 9) -#define ECX_SSE4_1 (1 << 19) -#define ECX_SSE4_2 (1 << 20) -#define ECX_AVX (1 << 28) - -// EBX -#define EBX_AVX2 (1 << 5) - -// EDX -#define EDX_SSE (1 << 25) -#define EDX_SSE2 (1 << 26) -#define EDX_CMOV (1 << 15) - -// zero-initialized by default -typedef struct { - bool sse, sse2, sse3, sse41, sse42, cmov, avx, avx2; - bool done; -} cpu_flags; - -void detect_cpu_features(cpu_flags *flags) { - if (!flags->done) { - int eax1 = 0, ebx1 = 0, ecx1 = 0, edx1 = 0; - int eax7 = 0, ebx7 = 0, ecx7 = 0, edx7 = 0; -#if defined(__x86_64__) && defined(__GNUC__) - __cpuid_count(1, 0, eax1, ebx1, ecx1, edx1); - __cpuid_count(7, 0, eax7, ebx7, ecx7, edx7); -#elif defined(_M_X64) - int info1[4] = { 0 }; - int info7[4] = { 0 }; - __cpuidex(info1, 1, 0); - __cpuidex(info7, 7, 0); - eax1 = info1[0]; - ebx1 = info1[1]; - ecx1 = info1[2]; - edx1 = info1[3]; - eax7 = info7[0]; - ebx7 = info7[1]; - ecx7 = info7[2]; - edx7 = info7[3]; -#endif - (void) eax1; (void) ebx1; (void) ecx1; (void) edx1; - (void) eax7; (void) ebx7; (void) ecx7; (void) edx7; - - - flags->avx = (ecx1 & ECX_AVX) != 0; - - flags->avx2 = (ebx7 & EBX_AVX2) != 0; - - flags->sse = (edx1 & EDX_SSE) != 0; - flags->sse2 = (edx1 & EDX_SSE2) != 0; - flags->cmov = (edx1 & EDX_CMOV) != 0; - - flags->sse3 = (ecx1 & ECX_SSE3) != 0; - /* ssse3 = (ecx1 & ECX_SSSE3) != 0; */ - flags->sse41 = (ecx1 & ECX_SSE4_1) != 0; - flags->sse42 = (ecx1 & ECX_SSE4_2) != 0; - - flags->done = true; - } -} - -#ifdef HACL_CAN_COMPILE_SIMD128 -static inline bool has_simd128(cpu_flags *flags) { - // For now this is Intel-only, could conceivably be #ifdef'd to something - // else. - return flags->sse && flags->sse2 && flags->sse3 && flags->sse41 && flags->sse42 && flags->cmov; -} -#endif - -#ifdef HACL_CAN_COMPILE_SIMD256 -static inline bool has_simd256(cpu_flags *flags) { - return flags->avx && flags->avx2; -} -#endif - // Small mismatch between the variable names Python defines as part of configure // at the ones HACL* expects to be set in order to enable those headers. #define HACL_CAN_COMPILE_VEC128 HACL_CAN_COMPILE_SIMD128 @@ -154,9 +59,31 @@ PyDoc_STRVAR(blake2mod__doc__, typedef struct { PyTypeObject* blake2b_type; PyTypeObject* blake2s_type; - cpu_flags flags; + + bool can_run_simd128; + bool can_run_simd256; } Blake2State; +static void +blake2_init_cpu_features(Blake2State *state) +{ + py_cpuid_features flags; + _Py_cpuid_detect_features(&flags); +#if HACL_CAN_COMPILE_SIMD128 + state->can_run_simd128 = flags.sse && flags.sse2 && flags.sse3 + && flags.sse41 && flags.sse42 + && flags.cmov; +#else + state->can_run_simd128 = false; +#endif + +#if HACL_CAN_COMPILE_SIMD256 + state->can_run_simd256 = flags.avx && flags.avx2; +#else + state->can_run_simd256 = false; +#endif +} + static inline Blake2State* blake2_get_state(PyObject *module) { @@ -224,10 +151,7 @@ static int blake2_exec(PyObject *m) { Blake2State* st = blake2_get_state(m); - - // This is called at module initialization-time, and so appears to be as - // good a place as any to probe the CPU flags. - detect_cpu_features(&st->flags); + blake2_init_cpu_features(st); st->blake2b_type = (PyTypeObject *)PyType_FromModuleAndSpec( m, &blake2b_type_spec, NULL); @@ -332,14 +256,14 @@ static inline blake2_impl type_to_impl(PyTypeObject *type) { #endif if (!strcmp(type->tp_name, blake2b_type_spec.name)) { #ifdef HACL_CAN_COMPILE_SIMD256 - if (has_simd256(&st->flags)) + if (st->can_run_simd256) return Blake2b_256; else #endif return Blake2b; } else if (!strcmp(type->tp_name, blake2s_type_spec.name)) { #ifdef HACL_CAN_COMPILE_SIMD128 - if (has_simd128(&st->flags)) + if (st->can_run_simd128) return Blake2s_128; else #endif diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index f75854c6ef5c91..4940f58a7c24f6 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -17,6 +17,7 @@ #endif #include "Python.h" +#include "pycore_cpuinfo.h" // py_cpuid_features #include "pycore_hashtable.h" #include "pycore_strhex.h" // _Py_strhex() @@ -1682,73 +1683,19 @@ hmacmodule_init_strings(hmacmodule_state *state) static void hmacmodule_init_cpu_features(hmacmodule_state *state) { - int eax1 = 0, ebx1 = 0, ecx1 = 0, edx1 = 0; - int eax7 = 0, ebx7 = 0, ecx7 = 0, edx7 = 0; -#if defined(__x86_64__) && defined(__GNUC__) - __cpuid_count(1, 0, eax1, ebx1, ecx1, edx1); - __cpuid_count(7, 0, eax7, ebx7, ecx7, edx7); -#elif defined(_M_X64) - int info1[4] = { 0 }; - __cpuidex(info1, 1, 0); - eax1 = info1[0], ebx1 = info1[1], ecx1 = info1[2], edx1 = info1[3]; - - int info7[4] = { 0 }; - __cpuidex(info7, 7, 0); - eax7 = info7[0], ebx7 = info7[1], ecx7 = info7[2], edx7 = info7[3]; -#endif - // fmt: off - (void)eax1; (void)ebx1; (void)ecx1; (void)edx1; - (void)eax7; (void)ebx7; (void)ecx7; (void)edx7; - // fmt: on - -#define EBX_AVX2 (1 << 5) -#define ECX_SSE3 (1 << 0) -#define ECX_SSSE3 (1 << 9) -#define ECX_SSE4_1 (1 << 19) -#define ECX_SSE4_2 (1 << 20) -#define ECX_AVX (1 << 28) -#define EDX_SSE (1 << 25) -#define EDX_SSE2 (1 << 26) -#define EDX_CMOV (1 << 15) - - bool avx = (ecx1 & ECX_AVX) != 0; - bool avx2 = (ebx7 & EBX_AVX2) != 0; - - bool sse = (edx1 & EDX_SSE) != 0; - bool sse2 = (edx1 & EDX_SSE2) != 0; - bool cmov = (edx1 & EDX_CMOV) != 0; - - bool sse3 = (ecx1 & ECX_SSE3) != 0; - bool sse41 = (ecx1 & ECX_SSE4_1) != 0; - bool sse42 = (ecx1 & ECX_SSE4_2) != 0; - -#undef EDX_CMOV -#undef EDX_SSE2 -#undef EDX_SSE -#undef ECX_AVX -#undef ECX_SSE4_2 -#undef ECX_SSE4_1 -#undef ECX_SSSE3 -#undef ECX_SSE3 -#undef EBX_AVX2 - + py_cpuid_features flags; + _Py_cpuid_detect_features(&flags); #if HACL_CAN_COMPILE_SIMD128 - // TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection - state->can_run_simd128 = sse && sse2 && sse3 && sse41 && sse42 && cmov; + state->can_run_simd128 = flags.sse && flags.sse2 && flags.sse3 + && flags.sse41 && flags.sse42 + && flags.cmov; #else - // fmt: off - (void)sse; (void)sse2; (void)sse3; (void)sse41; (void)sse42; (void)cmov; - // fmt: on state->can_run_simd128 = false; #endif #if HACL_CAN_COMPILE_SIMD256 - // TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection - state->can_run_simd256 = state->can_run_simd128 && avx && avx2; + state->can_run_simd256 = flags.avx && flags.avx2; #else - // fmt: off - (void)avx; (void)avx2; - // fmt: on state->can_run_simd256 = false; #endif } From d59d06d985387e2de7ce1f2af4a94c116c1108ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 22 Apr 2025 15:36:07 +0200 Subject: [PATCH 39/78] improve configure.ac --- configure | 552 +++++++++++++++++++++++++++++++-------------------- configure.ac | 131 ++++++------ 2 files changed, 410 insertions(+), 273 deletions(-) diff --git a/configure b/configure index 97e68a7bed7dba..9aa2528b0910ef 100755 --- a/configure +++ b/configure @@ -32122,14 +32122,14 @@ printf "%s\n" "$py_cv_module__blake2" >&6; } # See py_cpuid_features in pycore_cpuinfo.h for how to order fields # and where to put blank lines to separate processor generations for # AVX-512 instructions. -if test "$ac_sys_system" != "Linux-android" || test "$ANDROID_API_LEVEL" -ge 28; then - # SSE +if test "$ac_sys_system" != "Linux-android" -a "$ac_sys_system" != "WASI" || \ + { test -n "$ANDROID_API_LEVEL" && test "$ANDROID_API_LEVEL" -ge 28; } +then + # SSE - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse" >&5 printf %s "checking whether C compiler accepts -msse... " >&6; } if test ${ax_cv_check_cflags___msse+y} then : @@ -32164,23 +32164,27 @@ fi printf "%s\n" "$ax_cv_check_cflags___msse" >&6; } if test "x$ax_cv_check_cflags___msse" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_sse=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_sse=no ;; esac fi + if test "x$ac_cv_can_compile_simd_sse" = xyes +then : +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS 1" >>confdefs.h + + +fi + - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse2" >&5 + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse2" >&5 printf %s "checking whether C compiler accepts -msse2... " >&6; } if test ${ax_cv_check_cflags___msse2+y} then : @@ -32215,23 +32219,27 @@ fi printf "%s\n" "$ax_cv_check_cflags___msse2" >&6; } if test "x$ax_cv_check_cflags___msse2" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_sse2=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_sse2=no ;; esac fi + if test "x$ac_cv_can_compile_simd_sse2" = xyes +then : + + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS 1" >>confdefs.h + + +fi - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse3" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse3" >&5 printf %s "checking whether C compiler accepts -msse3... " >&6; } if test ${ax_cv_check_cflags___msse3+y} then : @@ -32266,23 +32274,27 @@ fi printf "%s\n" "$ax_cv_check_cflags___msse3" >&6; } if test "x$ax_cv_check_cflags___msse3" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_sse3=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_sse3=no ;; esac fi + if test "x$ac_cv_can_compile_simd_sse3" = xyes +then : + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS 1" >>confdefs.h +fi + - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mssse3" >&5 + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mssse3" >&5 printf %s "checking whether C compiler accepts -mssse3... " >&6; } if test ${ax_cv_check_cflags___mssse3+y} then : @@ -32317,23 +32329,27 @@ fi printf "%s\n" "$ax_cv_check_cflags___mssse3" >&6; } if test "x$ax_cv_check_cflags___mssse3" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_ssse3=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_ssse3=no ;; esac fi + if test "x$ac_cv_can_compile_simd_ssse3" = xyes +then : +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS 1" >>confdefs.h +fi + - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse4.1" >&5 + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse4.1" >&5 printf %s "checking whether C compiler accepts -msse4.1... " >&6; } if test ${ax_cv_check_cflags___msse4_1+y} then : @@ -32368,23 +32384,27 @@ fi printf "%s\n" "$ax_cv_check_cflags___msse4_1" >&6; } if test "x$ax_cv_check_cflags___msse4_1" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_sse4_1=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_sse4_1=no ;; esac fi + if test "x$ac_cv_can_compile_simd_sse4_1" = xyes +then : +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS 1" >>confdefs.h +fi + - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse4.2" >&5 + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse4.2" >&5 printf %s "checking whether C compiler accepts -msse4.2... " >&6; } if test ${ax_cv_check_cflags___msse4_2+y} then : @@ -32419,24 +32439,28 @@ fi printf "%s\n" "$ax_cv_check_cflags___msse4_2" >&6; } if test "x$ax_cv_check_cflags___msse4_2" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_sse4_2=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_sse4_2=no ;; esac fi + if test "x$ac_cv_can_compile_simd_sse4_2" = xyes +then : +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS 1" >>confdefs.h - # AVX +fi - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx" >&5 + + # AVX + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx" >&5 printf %s "checking whether C compiler accepts -mavx... " >&6; } if test ${ax_cv_check_cflags___mavx+y} then : @@ -32471,23 +32495,27 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavx" >&6; } if test "x$ax_cv_check_cflags___mavx" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx" = xyes +then : + + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS 1" >>confdefs.h + + +fi - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxifma" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxifma" >&5 printf %s "checking whether C compiler accepts -mavxifma... " >&6; } if test ${ax_cv_check_cflags___mavxifma+y} then : @@ -32522,23 +32550,27 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavxifma" >&6; } if test "x$ax_cv_check_cflags___mavxifma" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx_ifma=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx_ifma=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx_ifma" = xyes +then : + + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS 1" >>confdefs.h + + +fi - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxneconvert" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxneconvert" >&5 printf %s "checking whether C compiler accepts -mavxneconvert... " >&6; } if test ${ax_cv_check_cflags___mavxneconvert+y} then : @@ -32573,24 +32605,28 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavxneconvert" >&6; } if test "x$ax_cv_check_cflags___mavxneconvert" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx_ne_convert=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx_ne_convert=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx_ne_convert" = xyes +then : +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS 1" >>confdefs.h + + +fi + - # + + # - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxvnni" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxvnni" >&5 printf %s "checking whether C compiler accepts -mavxvnni... " >&6; } if test ${ax_cv_check_cflags___mavxvnni+y} then : @@ -32625,23 +32661,27 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavxvnni" >&6; } if test "x$ax_cv_check_cflags___mavxvnni" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx_vnni=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx_vnni=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx_vnni" = xyes +then : + + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS 1" >>confdefs.h + + +fi - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxvnniint8" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxvnniint8" >&5 printf %s "checking whether C compiler accepts -mavxvnniint8... " >&6; } if test ${ax_cv_check_cflags___mavxvnniint8+y} then : @@ -32676,23 +32716,27 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavxvnniint8" >&6; } if test "x$ax_cv_check_cflags___mavxvnniint8" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx_vnni_int8=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx_vnni_int8=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx_vnni_int8" = xyes +then : + + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS 1" >>confdefs.h + +fi - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxvnniint16" >&5 + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxvnniint16" >&5 printf %s "checking whether C compiler accepts -mavxvnniint16... " >&6; } if test ${ax_cv_check_cflags___mavxvnniint16+y} then : @@ -32727,24 +32771,28 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavxvnniint16" >&6; } if test "x$ax_cv_check_cflags___mavxvnniint16" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx_vnni_int16=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx_vnni_int16=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx_vnni_int16" = xyes +then : +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS 1" >>confdefs.h - # AVX-2 +fi - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx2" >&5 + + # AVX-2 + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx2" >&5 printf %s "checking whether C compiler accepts -mavx2... " >&6; } if test ${ax_cv_check_cflags___mavx2+y} then : @@ -32779,24 +32827,28 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavx2" >&6; } if test "x$ax_cv_check_cflags___mavx2" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx2=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx2=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx2" = xyes +then : + + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS 1" >>confdefs.h +fi + - # AVX-512 + # AVX-512 - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512f" >&5 + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512f" >&5 printf %s "checking whether C compiler accepts -mavx512f... " >&6; } if test ${ax_cv_check_cflags___mavx512f+y} then : @@ -32831,23 +32883,27 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavx512f" >&6; } if test "x$ax_cv_check_cflags___mavx512f" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx512_f=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx512_f=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx512_f" = xyes +then : + + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS 1" >>confdefs.h + + +fi - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512cd" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512cd" >&5 printf %s "checking whether C compiler accepts -mavx512cd... " >&6; } if test ${ax_cv_check_cflags___mavx512cd+y} then : @@ -32882,24 +32938,28 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavx512cd" >&6; } if test "x$ax_cv_check_cflags___mavx512cd" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx512_cd=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx512_cd=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx512_cd" = xyes +then : + + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS 1" >>confdefs.h + + +fi - # + # - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512er" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512er" >&5 printf %s "checking whether C compiler accepts -mavx512er... " >&6; } if test ${ax_cv_check_cflags___mavx512er+y} then : @@ -32934,23 +32994,27 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavx512er" >&6; } if test "x$ax_cv_check_cflags___mavx512er" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx512_er=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx512_er=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx512_er" = xyes +then : +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS 1" >>confdefs.h +fi - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512pf" >&5 + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512pf" >&5 printf %s "checking whether C compiler accepts -mavx512pf... " >&6; } if test ${ax_cv_check_cflags___mavx512pf+y} then : @@ -32985,24 +33049,28 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavx512pf" >&6; } if test "x$ax_cv_check_cflags___mavx512pf" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx512_pf=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx512_pf=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx512_pf" = xyes +then : + + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS 1" >>confdefs.h + + +fi - # + # - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx5124fmaps" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx5124fmaps" >&5 printf %s "checking whether C compiler accepts -mavx5124fmaps... " >&6; } if test ${ax_cv_check_cflags___mavx5124fmaps+y} then : @@ -33037,23 +33105,27 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavx5124fmaps" >&6; } if test "x$ax_cv_check_cflags___mavx5124fmaps" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx512_4fmaps=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx512_4fmaps=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx512_4fmaps" = xyes +then : +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS 1" >>confdefs.h + + +fi + - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx5124vnniw" >&5 + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx5124vnniw" >&5 printf %s "checking whether C compiler accepts -mavx5124vnniw... " >&6; } if test ${ax_cv_check_cflags___mavx5124vnniw+y} then : @@ -33088,24 +33160,28 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavx5124vnniw" >&6; } if test "x$ax_cv_check_cflags___mavx5124vnniw" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx512_4vnniw=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx512_4vnniw=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx512_4vnniw" = xyes +then : + + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS 1" >>confdefs.h +fi - # + # - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vpopcntdq" >&5 + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vpopcntdq" >&5 printf %s "checking whether C compiler accepts -mavx512vpopcntdq... " >&6; } if test ${ax_cv_check_cflags___mavx512vpopcntdq+y} then : @@ -33140,24 +33216,28 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavx512vpopcntdq" >&6; } if test "x$ax_cv_check_cflags___mavx512vpopcntdq" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx512_vpopcntdq=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx512_vpopcntdq=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx512_vpopcntdq" = xyes +then : + + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS 1" >>confdefs.h + +fi - # + # - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vl" >&5 + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vl" >&5 printf %s "checking whether C compiler accepts -mavx512vl... " >&6; } if test ${ax_cv_check_cflags___mavx512vl+y} then : @@ -33192,23 +33272,27 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavx512vl" >&6; } if test "x$ax_cv_check_cflags___mavx512vl" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx512_vl=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx512_vl=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx512_vl" = xyes +then : + + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS 1" >>confdefs.h + + +fi - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512dq" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512dq" >&5 printf %s "checking whether C compiler accepts -mavx512dq... " >&6; } if test ${ax_cv_check_cflags___mavx512dq+y} then : @@ -33243,23 +33327,27 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavx512dq" >&6; } if test "x$ax_cv_check_cflags___mavx512dq" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx512_dq=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx512_dq=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx512_dq" = xyes +then : + + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS 1" >>confdefs.h +fi + - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512bw" >&5 + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512bw" >&5 printf %s "checking whether C compiler accepts -mavx512bw... " >&6; } if test ${ax_cv_check_cflags___mavx512bw+y} then : @@ -33294,24 +33382,28 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavx512bw" >&6; } if test "x$ax_cv_check_cflags___mavx512bw" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx512_bw=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx512_bw=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx512_bw" = xyes +then : +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS 1" >>confdefs.h + - # +fi - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512ifma" >&5 + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512ifma" >&5 printf %s "checking whether C compiler accepts -mavx512ifma... " >&6; } if test ${ax_cv_check_cflags___mavx512ifma+y} then : @@ -33346,23 +33438,27 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavx512ifma" >&6; } if test "x$ax_cv_check_cflags___mavx512ifma" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx512_ifma=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx512_ifma=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx512_ifma" = xyes +then : +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS 1" >>confdefs.h +fi + - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vbmi" >&5 + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vbmi" >&5 printf %s "checking whether C compiler accepts -mavx512vbmi... " >&6; } if test ${ax_cv_check_cflags___mavx512vbmi+y} then : @@ -33397,24 +33493,28 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavx512vbmi" >&6; } if test "x$ax_cv_check_cflags___mavx512vbmi" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx512_vbmi=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx512_vbmi=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx512_vbmi" = xyes +then : + + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS 1" >>confdefs.h + +fi - # + + # - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vnni" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vnni" >&5 printf %s "checking whether C compiler accepts -mavx512vnni... " >&6; } if test ${ax_cv_check_cflags___mavx512vnni+y} then : @@ -33449,24 +33549,28 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavx512vnni" >&6; } if test "x$ax_cv_check_cflags___mavx512vnni" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx512_vnni=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx512_vnni=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx512_vnni" = xyes +then : + + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS 1" >>confdefs.h + +fi - # + # - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vbmi2" >&5 + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vbmi2" >&5 printf %s "checking whether C compiler accepts -mavx512vbmi2... " >&6; } if test ${ax_cv_check_cflags___mavx512vbmi2+y} then : @@ -33501,23 +33605,27 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavx512vbmi2" >&6; } if test "x$ax_cv_check_cflags___mavx512vbmi2" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx512_vbmi2=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx512_vbmi2=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx512_vbmi2" = xyes +then : + + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS 1" >>confdefs.h + + +fi - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512bitalg" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512bitalg" >&5 printf %s "checking whether C compiler accepts -mavx512bitalg... " >&6; } if test ${ax_cv_check_cflags___mavx512bitalg+y} then : @@ -33552,24 +33660,28 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavx512bitalg" >&6; } if test "x$ax_cv_check_cflags___mavx512bitalg" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx512_bitalg=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx512_bitalg=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx512_bitalg" = xyes +then : + + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS 1" >>confdefs.h + +fi - # + # - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking checking SIMD instruction set" >&5 -printf %s "checking checking SIMD instruction set... " >&6; } - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vp2intersect" >&5 + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vp2intersect" >&5 printf %s "checking whether C compiler accepts -mavx512vp2intersect... " >&6; } if test ${ax_cv_check_cflags___mavx512vp2intersect+y} then : @@ -33604,14 +33716,20 @@ fi printf "%s\n" "$ax_cv_check_cflags___mavx512vp2intersect" >&6; } if test "x$ax_cv_check_cflags___mavx512vp2intersect" = xyes then : - -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS 1" >>confdefs.h - + ac_cv_can_compile_simd_avx512_vp2intersect=yes else case e in #( - e) : ;; + e) ac_cv_can_compile_simd_avx512_vp2intersect=no ;; esac fi + if test "x$ac_cv_can_compile_simd_avx512_vp2intersect" = xyes +then : + + +printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS 1" >>confdefs.h + + +fi diff --git a/configure.ac b/configure.ac index f64e81ccdc5fab..ce3e48ed876bef 100644 --- a/configure.ac +++ b/configure.ac @@ -7854,20 +7854,37 @@ PY_STDLIB_MOD([_sha2], [test "$with_builtin_sha2" = yes]) PY_STDLIB_MOD([_sha3], [test "$with_builtin_sha3" = yes]) PY_STDLIB_MOD([_blake2], [test "$with_builtin_blake2" = yes]) -dnl PY_SIMD_DETECT(INSTRUCTION_SET_NAME, COMPILER_FLAG, NORMALIZED_NAME) +dnl PY_SIMD_DETECT(INSTRUCTION-SET-NAME, COMPILER-FLAG, [NORMALIZED-NAME]) +dnl ---------------------------------------------------------------------- +dnl +dnl Check if the compiler supports a given COMPILER-FLAG and define: +dnl +dnl ac_cv_can_compile_simd_ = yes +dnl #define Py_CAN_COMPILE_SIMD__INSTRUCTIONS 1 +dnl +dnl or +dnl +dnl ac_cv_can_compile_simd_ = no +dnl #undef Py_CAN_COMPILE_SIMD__INSTRUCTIONS +dnl +dnl where and are the lowercased and uppercased versions +dnl of NORMALIZED-NAME; by default, the latter is INSTRUCTION-SET-NAME. +dnl AC_DEFUN([PY_SIMD_DETECT], [ - AS_VAR_PUSHDEF([py_var], [m4_ifblank([$3], - [[ac_cv_can_compile_simd_]m4_tolower([$1])], - [[ac_cv_can_compile_simd_]m4_tolower([$3])])]) - AS_VAR_PUSHDEF([py_define], [m4_ifblank([$3], - [[Py_CAN_COMPILE_SIMD_]m4_toupper([$1])[_INSTRUCTIONS]], - [[Py_CAN_COMPILE_SIMD_]m4_toupper([$3])[_INSTRUCTIONS]])]) - AC_MSG_CHECKING([checking SIMD instruction set]) - AX_CHECK_COMPILE_FLAG([$2], - [AC_DEFINE([py_define], [1], [Define if '$2' is a valid compiler flag.])], - [], []) - AS_VAR_POPDEF([py_var]) - AS_VAR_POPDEF([py_define]) + AS_VAR_PUSHDEF([py_var], [m4_ifblank([$3], + [[ac_cv_can_compile_simd_]m4_tolower([$1])], + [[ac_cv_can_compile_simd_]m4_tolower([$3])])]) + AS_VAR_PUSHDEF([py_define], [m4_ifblank([$3], + [[Py_CAN_COMPILE_SIMD_]m4_toupper([$1])[_INSTRUCTIONS]], + [[Py_CAN_COMPILE_SIMD_]m4_toupper([$3])[_INSTRUCTIONS]])]) + AX_CHECK_COMPILE_FLAG([$2], + [AS_VAR_SET([py_var], [yes])], + [AS_VAR_SET([py_var], [no])]) + AS_VAR_IF([py_var], [yes], [ + AC_DEFINE([py_define], [1], [Define if '$2' is a valid compiler flag.]) + ]) + AS_VAR_POPDEF([py_var]) + AS_VAR_POPDEF([py_define]) ]) # Detection of supported SIMD instruction sets for CPython. Since @@ -7877,49 +7894,51 @@ AC_DEFUN([PY_SIMD_DETECT], [ # See py_cpuid_features in pycore_cpuinfo.h for how to order fields # and where to put blank lines to separate processor generations for # AVX-512 instructions. -if test "$ac_sys_system" != "Linux-android" || test "$ANDROID_API_LEVEL" -ge 28; then - # SSE - PY_SIMD_DETECT([SSE], [-msse]) - PY_SIMD_DETECT([SSE2], [-msse2]) - PY_SIMD_DETECT([SSE3], [-msse3]) - PY_SIMD_DETECT([SSSE3], [-mssse3]) - PY_SIMD_DETECT([SSE4.1], [-msse4.1], [SSE4_1]) - PY_SIMD_DETECT([SSE4.2], [-msse4.2], [SSE4_2]) - # AVX - PY_SIMD_DETECT([AVX], [-mavx]) - PY_SIMD_DETECT([AVX_IFMA], [-mavxifma]) - PY_SIMD_DETECT([AVX_NE_CONVERT], [-mavxneconvert]) - # - PY_SIMD_DETECT([AVX_VNNI], [-mavxvnni]) - PY_SIMD_DETECT([AVX_VNNI_INT8], [-mavxvnniint8]) - PY_SIMD_DETECT([AVX_VNNI_INT16], [-mavxvnniint16]) - # AVX-2 - PY_SIMD_DETECT([AVX2], [-mavx2]) - # AVX-512 - PY_SIMD_DETECT([AVX512_F], [-mavx512f]) - PY_SIMD_DETECT([AVX512_CD], [-mavx512cd]) - # - PY_SIMD_DETECT([AVX512_ER], [-mavx512er]) - PY_SIMD_DETECT([AVX512_PF], [-mavx512pf]) - # - PY_SIMD_DETECT([AVX512_4FMAPS], [-mavx5124fmaps]) - PY_SIMD_DETECT([AVX512_4VNNIW], [-mavx5124vnniw]) - # - PY_SIMD_DETECT([AVX512_VPOPCNTDQ], [-mavx512vpopcntdq]) - # - PY_SIMD_DETECT([AVX512_VL], [-mavx512vl]) - PY_SIMD_DETECT([AVX512_DQ], [-mavx512dq]) - PY_SIMD_DETECT([AVX512_BW], [-mavx512bw]) - # - PY_SIMD_DETECT([AVX512_IFMA], [-mavx512ifma]) - PY_SIMD_DETECT([AVX512_VBMI], [-mavx512vbmi]) - # - PY_SIMD_DETECT([AVX512_VNNI], [-mavx512vnni]) - # - PY_SIMD_DETECT([AVX512_VBMI2], [-mavx512vbmi2]) - PY_SIMD_DETECT([AVX512_BITALG], [-mavx512bitalg]) - # - PY_SIMD_DETECT([AVX512_VP2INTERSECT], [-mavx512vp2intersect]) +if test "$ac_sys_system" != "Linux-android" -a "$ac_sys_system" != "WASI" || \ + { test -n "$ANDROID_API_LEVEL" && test "$ANDROID_API_LEVEL" -ge 28; } +then + # SSE + PY_SIMD_DETECT([SSE], [-msse]) + PY_SIMD_DETECT([SSE2], [-msse2]) + PY_SIMD_DETECT([SSE3], [-msse3]) + PY_SIMD_DETECT([SSSE3], [-mssse3]) + PY_SIMD_DETECT([SSE4.1], [-msse4.1], [SSE4_1]) + PY_SIMD_DETECT([SSE4.2], [-msse4.2], [SSE4_2]) + # AVX + PY_SIMD_DETECT([AVX], [-mavx]) + PY_SIMD_DETECT([AVX_IFMA], [-mavxifma]) + PY_SIMD_DETECT([AVX_NE_CONVERT], [-mavxneconvert]) + # + PY_SIMD_DETECT([AVX_VNNI], [-mavxvnni]) + PY_SIMD_DETECT([AVX_VNNI_INT8], [-mavxvnniint8]) + PY_SIMD_DETECT([AVX_VNNI_INT16], [-mavxvnniint16]) + # AVX-2 + PY_SIMD_DETECT([AVX2], [-mavx2]) + # AVX-512 + PY_SIMD_DETECT([AVX512_F], [-mavx512f]) + PY_SIMD_DETECT([AVX512_CD], [-mavx512cd]) + # + PY_SIMD_DETECT([AVX512_ER], [-mavx512er]) + PY_SIMD_DETECT([AVX512_PF], [-mavx512pf]) + # + PY_SIMD_DETECT([AVX512_4FMAPS], [-mavx5124fmaps]) + PY_SIMD_DETECT([AVX512_4VNNIW], [-mavx5124vnniw]) + # + PY_SIMD_DETECT([AVX512_VPOPCNTDQ], [-mavx512vpopcntdq]) + # + PY_SIMD_DETECT([AVX512_VL], [-mavx512vl]) + PY_SIMD_DETECT([AVX512_DQ], [-mavx512dq]) + PY_SIMD_DETECT([AVX512_BW], [-mavx512bw]) + # + PY_SIMD_DETECT([AVX512_IFMA], [-mavx512ifma]) + PY_SIMD_DETECT([AVX512_VBMI], [-mavx512vbmi]) + # + PY_SIMD_DETECT([AVX512_VNNI], [-mavx512vnni]) + # + PY_SIMD_DETECT([AVX512_VBMI2], [-mavx512vbmi2]) + PY_SIMD_DETECT([AVX512_BITALG], [-mavx512bitalg]) + # + PY_SIMD_DETECT([AVX512_VP2INTERSECT], [-mavx512vp2intersect]) fi LIBHACL_CFLAGS='-I$(srcdir)/Modules/_hacl -I$(srcdir)/Modules/_hacl/include -D_BSD_SOURCE -D_DEFAULT_SOURCE $(PY_STDMODULE_CFLAGS) $(CCSHARED)' From d00da3e8bae6d8b3b273f64081bdba95f25b61d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 30 Jun 2025 15:04:43 +0200 Subject: [PATCH 40/78] post-merge --- Modules/blake2module.c | 1 - configure | 582 ++++++++++++++++++----------------------- configure.ac | 4 +- pyconfig.h.in | 174 ++++++------ 4 files changed, 344 insertions(+), 417 deletions(-) diff --git a/Modules/blake2module.c b/Modules/blake2module.c index d8ae379f1f6529..e222d6d2e5c298 100644 --- a/Modules/blake2module.c +++ b/Modules/blake2module.c @@ -58,7 +58,6 @@ PyDoc_STRVAR(blake2mod__doc__, typedef struct { PyTypeObject *blake2b_type; PyTypeObject *blake2s_type; - bool can_run_simd128; bool can_run_simd256; } Blake2State; diff --git a/configure b/configure index 11a23302bf0f94..b0a7ed029fb1b0 100755 --- a/configure +++ b/configure @@ -32547,6 +32547,11 @@ fi # See py_cpuid_features in pycore_cpuinfo.h for how to order fields # and where to put blank lines to separate processor generations for # AVX-512 instructions. +# +# Although AVX support is not guaranteed on Android [1], this is safe +# because we do a runtime CPUID check. +# +# [1]: https://developer.android.com/ndk/guides/abis#86-64 if test "$ac_sys_system" != "Linux-android" -a "$ac_sys_system" != "WASI" || \ { test -n "$ANDROID_API_LEVEL" && test "$ANDROID_API_LEVEL" -ge 28; } then @@ -32556,13 +32561,13 @@ then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse" >&5 printf %s "checking whether C compiler accepts -msse... " >&6; } -if test ${ax_cv_check_cflags___msse+y} +if test ${ax_cv_check_cflags__Werror__msse+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -msse" + CFLAGS="$CFLAGS -Werror -msse" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -32576,18 +32581,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___msse=yes + ax_cv_check_cflags__Werror__msse=yes else case e in #( - e) ax_cv_check_cflags___msse=no ;; + e) ax_cv_check_cflags__Werror__msse=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___msse" >&5 -printf "%s\n" "$ax_cv_check_cflags___msse" >&6; } -if test "x$ax_cv_check_cflags___msse" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__msse" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__msse" >&6; } +if test "x$ax_cv_check_cflags__Werror__msse" = xyes then : ac_cv_can_compile_simd_sse=yes else case e in #( @@ -32599,7 +32604,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS 1" >>confdefs.h fi @@ -32611,13 +32616,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse2" >&5 printf %s "checking whether C compiler accepts -msse2... " >&6; } -if test ${ax_cv_check_cflags___msse2+y} +if test ${ax_cv_check_cflags__Werror__msse2+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -msse2" + CFLAGS="$CFLAGS -Werror -msse2" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -32631,18 +32636,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___msse2=yes + ax_cv_check_cflags__Werror__msse2=yes else case e in #( - e) ax_cv_check_cflags___msse2=no ;; + e) ax_cv_check_cflags__Werror__msse2=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___msse2" >&5 -printf "%s\n" "$ax_cv_check_cflags___msse2" >&6; } -if test "x$ax_cv_check_cflags___msse2" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__msse2" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__msse2" >&6; } +if test "x$ax_cv_check_cflags__Werror__msse2" = xyes then : ac_cv_can_compile_simd_sse2=yes else case e in #( @@ -32654,7 +32659,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS 1" >>confdefs.h fi @@ -32666,13 +32671,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse3" >&5 printf %s "checking whether C compiler accepts -msse3... " >&6; } -if test ${ax_cv_check_cflags___msse3+y} +if test ${ax_cv_check_cflags__Werror__msse3+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -msse3" + CFLAGS="$CFLAGS -Werror -msse3" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -32686,18 +32691,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___msse3=yes + ax_cv_check_cflags__Werror__msse3=yes else case e in #( - e) ax_cv_check_cflags___msse3=no ;; + e) ax_cv_check_cflags__Werror__msse3=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___msse3" >&5 -printf "%s\n" "$ax_cv_check_cflags___msse3" >&6; } -if test "x$ax_cv_check_cflags___msse3" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__msse3" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__msse3" >&6; } +if test "x$ax_cv_check_cflags__Werror__msse3" = xyes then : ac_cv_can_compile_simd_sse3=yes else case e in #( @@ -32709,7 +32714,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS 1" >>confdefs.h fi @@ -32721,13 +32726,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mssse3" >&5 printf %s "checking whether C compiler accepts -mssse3... " >&6; } -if test ${ax_cv_check_cflags___mssse3+y} +if test ${ax_cv_check_cflags__Werror__mssse3+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mssse3" + CFLAGS="$CFLAGS -Werror -mssse3" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -32741,18 +32746,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mssse3=yes + ax_cv_check_cflags__Werror__mssse3=yes else case e in #( - e) ax_cv_check_cflags___mssse3=no ;; + e) ax_cv_check_cflags__Werror__mssse3=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mssse3" >&5 -printf "%s\n" "$ax_cv_check_cflags___mssse3" >&6; } -if test "x$ax_cv_check_cflags___mssse3" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mssse3" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mssse3" >&6; } +if test "x$ax_cv_check_cflags__Werror__mssse3" = xyes then : ac_cv_can_compile_simd_ssse3=yes else case e in #( @@ -32764,7 +32769,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS 1" >>confdefs.h fi @@ -32776,13 +32781,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse4.1" >&5 printf %s "checking whether C compiler accepts -msse4.1... " >&6; } -if test ${ax_cv_check_cflags___msse4_1+y} +if test ${ax_cv_check_cflags__Werror__msse4_1+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -msse4.1" + CFLAGS="$CFLAGS -Werror -msse4.1" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -32796,18 +32801,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___msse4_1=yes + ax_cv_check_cflags__Werror__msse4_1=yes else case e in #( - e) ax_cv_check_cflags___msse4_1=no ;; + e) ax_cv_check_cflags__Werror__msse4_1=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___msse4_1" >&5 -printf "%s\n" "$ax_cv_check_cflags___msse4_1" >&6; } -if test "x$ax_cv_check_cflags___msse4_1" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__msse4_1" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__msse4_1" >&6; } +if test "x$ax_cv_check_cflags__Werror__msse4_1" = xyes then : ac_cv_can_compile_simd_sse4_1=yes else case e in #( @@ -32819,7 +32824,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS 1" >>confdefs.h fi @@ -32831,13 +32836,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse4.2" >&5 printf %s "checking whether C compiler accepts -msse4.2... " >&6; } -if test ${ax_cv_check_cflags___msse4_2+y} +if test ${ax_cv_check_cflags__Werror__msse4_2+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -msse4.2" + CFLAGS="$CFLAGS -Werror -msse4.2" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -32851,18 +32856,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___msse4_2=yes + ax_cv_check_cflags__Werror__msse4_2=yes else case e in #( - e) ax_cv_check_cflags___msse4_2=no ;; + e) ax_cv_check_cflags__Werror__msse4_2=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___msse4_2" >&5 -printf "%s\n" "$ax_cv_check_cflags___msse4_2" >&6; } -if test "x$ax_cv_check_cflags___msse4_2" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__msse4_2" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__msse4_2" >&6; } +if test "x$ax_cv_check_cflags__Werror__msse4_2" = xyes then : ac_cv_can_compile_simd_sse4_2=yes else case e in #( @@ -32874,7 +32879,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS 1" >>confdefs.h fi @@ -32887,13 +32892,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx" >&5 printf %s "checking whether C compiler accepts -mavx... " >&6; } -if test ${ax_cv_check_cflags___mavx+y} +if test ${ax_cv_check_cflags__Werror__mavx+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx" + CFLAGS="$CFLAGS -Werror -mavx" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -32907,18 +32912,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavx=yes + ax_cv_check_cflags__Werror__mavx=yes else case e in #( - e) ax_cv_check_cflags___mavx=no ;; + e) ax_cv_check_cflags__Werror__mavx=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavx" >&6; } -if test "x$ax_cv_check_cflags___mavx" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx" = xyes then : ac_cv_can_compile_simd_avx=yes else case e in #( @@ -32930,7 +32935,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS 1" >>confdefs.h fi @@ -32942,13 +32947,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxifma" >&5 printf %s "checking whether C compiler accepts -mavxifma... " >&6; } -if test ${ax_cv_check_cflags___mavxifma+y} +if test ${ax_cv_check_cflags__Werror__mavxifma+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavxifma" + CFLAGS="$CFLAGS -Werror -mavxifma" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -32962,18 +32967,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavxifma=yes + ax_cv_check_cflags__Werror__mavxifma=yes else case e in #( - e) ax_cv_check_cflags___mavxifma=no ;; + e) ax_cv_check_cflags__Werror__mavxifma=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavxifma" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavxifma" >&6; } -if test "x$ax_cv_check_cflags___mavxifma" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavxifma" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavxifma" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavxifma" = xyes then : ac_cv_can_compile_simd_avx_ifma=yes else case e in #( @@ -32985,7 +32990,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS 1" >>confdefs.h fi @@ -32997,13 +33002,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxneconvert" >&5 printf %s "checking whether C compiler accepts -mavxneconvert... " >&6; } -if test ${ax_cv_check_cflags___mavxneconvert+y} +if test ${ax_cv_check_cflags__Werror__mavxneconvert+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavxneconvert" + CFLAGS="$CFLAGS -Werror -mavxneconvert" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -33017,18 +33022,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavxneconvert=yes + ax_cv_check_cflags__Werror__mavxneconvert=yes else case e in #( - e) ax_cv_check_cflags___mavxneconvert=no ;; + e) ax_cv_check_cflags__Werror__mavxneconvert=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavxneconvert" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavxneconvert" >&6; } -if test "x$ax_cv_check_cflags___mavxneconvert" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavxneconvert" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavxneconvert" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavxneconvert" = xyes then : ac_cv_can_compile_simd_avx_ne_convert=yes else case e in #( @@ -33040,7 +33045,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS 1" >>confdefs.h fi @@ -33053,13 +33058,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxvnni" >&5 printf %s "checking whether C compiler accepts -mavxvnni... " >&6; } -if test ${ax_cv_check_cflags___mavxvnni+y} +if test ${ax_cv_check_cflags__Werror__mavxvnni+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavxvnni" + CFLAGS="$CFLAGS -Werror -mavxvnni" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -33073,18 +33078,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavxvnni=yes + ax_cv_check_cflags__Werror__mavxvnni=yes else case e in #( - e) ax_cv_check_cflags___mavxvnni=no ;; + e) ax_cv_check_cflags__Werror__mavxvnni=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavxvnni" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavxvnni" >&6; } -if test "x$ax_cv_check_cflags___mavxvnni" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavxvnni" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavxvnni" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavxvnni" = xyes then : ac_cv_can_compile_simd_avx_vnni=yes else case e in #( @@ -33096,7 +33101,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS 1" >>confdefs.h fi @@ -33108,13 +33113,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxvnniint8" >&5 printf %s "checking whether C compiler accepts -mavxvnniint8... " >&6; } -if test ${ax_cv_check_cflags___mavxvnniint8+y} +if test ${ax_cv_check_cflags__Werror__mavxvnniint8+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavxvnniint8" + CFLAGS="$CFLAGS -Werror -mavxvnniint8" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -33128,18 +33133,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavxvnniint8=yes + ax_cv_check_cflags__Werror__mavxvnniint8=yes else case e in #( - e) ax_cv_check_cflags___mavxvnniint8=no ;; + e) ax_cv_check_cflags__Werror__mavxvnniint8=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavxvnniint8" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavxvnniint8" >&6; } -if test "x$ax_cv_check_cflags___mavxvnniint8" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavxvnniint8" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavxvnniint8" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavxvnniint8" = xyes then : ac_cv_can_compile_simd_avx_vnni_int8=yes else case e in #( @@ -33151,7 +33156,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS 1" >>confdefs.h fi @@ -33163,13 +33168,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxvnniint16" >&5 printf %s "checking whether C compiler accepts -mavxvnniint16... " >&6; } -if test ${ax_cv_check_cflags___mavxvnniint16+y} +if test ${ax_cv_check_cflags__Werror__mavxvnniint16+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavxvnniint16" + CFLAGS="$CFLAGS -Werror -mavxvnniint16" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -33183,18 +33188,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavxvnniint16=yes + ax_cv_check_cflags__Werror__mavxvnniint16=yes else case e in #( - e) ax_cv_check_cflags___mavxvnniint16=no ;; + e) ax_cv_check_cflags__Werror__mavxvnniint16=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavxvnniint16" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavxvnniint16" >&6; } -if test "x$ax_cv_check_cflags___mavxvnniint16" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavxvnniint16" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavxvnniint16" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavxvnniint16" = xyes then : ac_cv_can_compile_simd_avx_vnni_int16=yes else case e in #( @@ -33206,7 +33211,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS 1" >>confdefs.h fi @@ -33219,13 +33224,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx2" >&5 printf %s "checking whether C compiler accepts -mavx2... " >&6; } -if test ${ax_cv_check_cflags___mavx2+y} +if test ${ax_cv_check_cflags__Werror__mavx2+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx2" + CFLAGS="$CFLAGS -Werror -mavx2" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -33239,18 +33244,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavx2=yes + ax_cv_check_cflags__Werror__mavx2=yes else case e in #( - e) ax_cv_check_cflags___mavx2=no ;; + e) ax_cv_check_cflags__Werror__mavx2=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx2" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavx2" >&6; } -if test "x$ax_cv_check_cflags___mavx2" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx2" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx2" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx2" = xyes then : ac_cv_can_compile_simd_avx2=yes else case e in #( @@ -33262,7 +33267,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS 1" >>confdefs.h fi @@ -33275,13 +33280,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512f" >&5 printf %s "checking whether C compiler accepts -mavx512f... " >&6; } -if test ${ax_cv_check_cflags___mavx512f+y} +if test ${ax_cv_check_cflags__Werror__mavx512f+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx512f" + CFLAGS="$CFLAGS -Werror -mavx512f" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -33295,18 +33300,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavx512f=yes + ax_cv_check_cflags__Werror__mavx512f=yes else case e in #( - e) ax_cv_check_cflags___mavx512f=no ;; + e) ax_cv_check_cflags__Werror__mavx512f=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512f" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavx512f" >&6; } -if test "x$ax_cv_check_cflags___mavx512f" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512f" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512f" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512f" = xyes then : ac_cv_can_compile_simd_avx512_f=yes else case e in #( @@ -33318,7 +33323,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS 1" >>confdefs.h fi @@ -33330,13 +33335,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512cd" >&5 printf %s "checking whether C compiler accepts -mavx512cd... " >&6; } -if test ${ax_cv_check_cflags___mavx512cd+y} +if test ${ax_cv_check_cflags__Werror__mavx512cd+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx512cd" + CFLAGS="$CFLAGS -Werror -mavx512cd" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -33350,18 +33355,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavx512cd=yes + ax_cv_check_cflags__Werror__mavx512cd=yes else case e in #( - e) ax_cv_check_cflags___mavx512cd=no ;; + e) ax_cv_check_cflags__Werror__mavx512cd=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512cd" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavx512cd" >&6; } -if test "x$ax_cv_check_cflags___mavx512cd" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512cd" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512cd" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512cd" = xyes then : ac_cv_can_compile_simd_avx512_cd=yes else case e in #( @@ -33373,7 +33378,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS 1" >>confdefs.h fi @@ -33386,13 +33391,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512er" >&5 printf %s "checking whether C compiler accepts -mavx512er... " >&6; } -if test ${ax_cv_check_cflags___mavx512er+y} +if test ${ax_cv_check_cflags__Werror__mavx512er+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx512er" + CFLAGS="$CFLAGS -Werror -mavx512er" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -33406,18 +33411,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavx512er=yes + ax_cv_check_cflags__Werror__mavx512er=yes else case e in #( - e) ax_cv_check_cflags___mavx512er=no ;; + e) ax_cv_check_cflags__Werror__mavx512er=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512er" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavx512er" >&6; } -if test "x$ax_cv_check_cflags___mavx512er" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512er" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512er" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512er" = xyes then : ac_cv_can_compile_simd_avx512_er=yes else case e in #( @@ -33429,7 +33434,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS 1" >>confdefs.h fi @@ -33441,13 +33446,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512pf" >&5 printf %s "checking whether C compiler accepts -mavx512pf... " >&6; } -if test ${ax_cv_check_cflags___mavx512pf+y} +if test ${ax_cv_check_cflags__Werror__mavx512pf+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx512pf" + CFLAGS="$CFLAGS -Werror -mavx512pf" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -33461,18 +33466,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavx512pf=yes + ax_cv_check_cflags__Werror__mavx512pf=yes else case e in #( - e) ax_cv_check_cflags___mavx512pf=no ;; + e) ax_cv_check_cflags__Werror__mavx512pf=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512pf" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavx512pf" >&6; } -if test "x$ax_cv_check_cflags___mavx512pf" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512pf" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512pf" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512pf" = xyes then : ac_cv_can_compile_simd_avx512_pf=yes else case e in #( @@ -33484,7 +33489,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS 1" >>confdefs.h fi @@ -33497,13 +33502,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx5124fmaps" >&5 printf %s "checking whether C compiler accepts -mavx5124fmaps... " >&6; } -if test ${ax_cv_check_cflags___mavx5124fmaps+y} +if test ${ax_cv_check_cflags__Werror__mavx5124fmaps+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx5124fmaps" + CFLAGS="$CFLAGS -Werror -mavx5124fmaps" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -33517,18 +33522,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavx5124fmaps=yes + ax_cv_check_cflags__Werror__mavx5124fmaps=yes else case e in #( - e) ax_cv_check_cflags___mavx5124fmaps=no ;; + e) ax_cv_check_cflags__Werror__mavx5124fmaps=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx5124fmaps" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavx5124fmaps" >&6; } -if test "x$ax_cv_check_cflags___mavx5124fmaps" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx5124fmaps" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx5124fmaps" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx5124fmaps" = xyes then : ac_cv_can_compile_simd_avx512_4fmaps=yes else case e in #( @@ -33540,7 +33545,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS 1" >>confdefs.h fi @@ -33552,13 +33557,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx5124vnniw" >&5 printf %s "checking whether C compiler accepts -mavx5124vnniw... " >&6; } -if test ${ax_cv_check_cflags___mavx5124vnniw+y} +if test ${ax_cv_check_cflags__Werror__mavx5124vnniw+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx5124vnniw" + CFLAGS="$CFLAGS -Werror -mavx5124vnniw" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -33572,18 +33577,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavx5124vnniw=yes + ax_cv_check_cflags__Werror__mavx5124vnniw=yes else case e in #( - e) ax_cv_check_cflags___mavx5124vnniw=no ;; + e) ax_cv_check_cflags__Werror__mavx5124vnniw=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx5124vnniw" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavx5124vnniw" >&6; } -if test "x$ax_cv_check_cflags___mavx5124vnniw" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx5124vnniw" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx5124vnniw" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx5124vnniw" = xyes then : ac_cv_can_compile_simd_avx512_4vnniw=yes else case e in #( @@ -33595,7 +33600,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS 1" >>confdefs.h fi @@ -33608,13 +33613,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vpopcntdq" >&5 printf %s "checking whether C compiler accepts -mavx512vpopcntdq... " >&6; } -if test ${ax_cv_check_cflags___mavx512vpopcntdq+y} +if test ${ax_cv_check_cflags__Werror__mavx512vpopcntdq+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx512vpopcntdq" + CFLAGS="$CFLAGS -Werror -mavx512vpopcntdq" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -33628,18 +33633,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavx512vpopcntdq=yes + ax_cv_check_cflags__Werror__mavx512vpopcntdq=yes else case e in #( - e) ax_cv_check_cflags___mavx512vpopcntdq=no ;; + e) ax_cv_check_cflags__Werror__mavx512vpopcntdq=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vpopcntdq" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavx512vpopcntdq" >&6; } -if test "x$ax_cv_check_cflags___mavx512vpopcntdq" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512vpopcntdq" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512vpopcntdq" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512vpopcntdq" = xyes then : ac_cv_can_compile_simd_avx512_vpopcntdq=yes else case e in #( @@ -33651,7 +33656,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS 1" >>confdefs.h fi @@ -33664,13 +33669,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vl" >&5 printf %s "checking whether C compiler accepts -mavx512vl... " >&6; } -if test ${ax_cv_check_cflags___mavx512vl+y} +if test ${ax_cv_check_cflags__Werror__mavx512vl+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx512vl" + CFLAGS="$CFLAGS -Werror -mavx512vl" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -33684,18 +33689,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavx512vl=yes + ax_cv_check_cflags__Werror__mavx512vl=yes else case e in #( - e) ax_cv_check_cflags___mavx512vl=no ;; + e) ax_cv_check_cflags__Werror__mavx512vl=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vl" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavx512vl" >&6; } -if test "x$ax_cv_check_cflags___mavx512vl" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512vl" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512vl" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512vl" = xyes then : ac_cv_can_compile_simd_avx512_vl=yes else case e in #( @@ -33707,7 +33712,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS 1" >>confdefs.h fi @@ -33719,13 +33724,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512dq" >&5 printf %s "checking whether C compiler accepts -mavx512dq... " >&6; } -if test ${ax_cv_check_cflags___mavx512dq+y} +if test ${ax_cv_check_cflags__Werror__mavx512dq+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx512dq" + CFLAGS="$CFLAGS -Werror -mavx512dq" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -33739,18 +33744,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavx512dq=yes + ax_cv_check_cflags__Werror__mavx512dq=yes else case e in #( - e) ax_cv_check_cflags___mavx512dq=no ;; + e) ax_cv_check_cflags__Werror__mavx512dq=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512dq" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavx512dq" >&6; } -if test "x$ax_cv_check_cflags___mavx512dq" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512dq" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512dq" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512dq" = xyes then : ac_cv_can_compile_simd_avx512_dq=yes else case e in #( @@ -33762,7 +33767,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS 1" >>confdefs.h fi @@ -33774,13 +33779,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512bw" >&5 printf %s "checking whether C compiler accepts -mavx512bw... " >&6; } -if test ${ax_cv_check_cflags___mavx512bw+y} +if test ${ax_cv_check_cflags__Werror__mavx512bw+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx512bw" + CFLAGS="$CFLAGS -Werror -mavx512bw" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -33794,18 +33799,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavx512bw=yes + ax_cv_check_cflags__Werror__mavx512bw=yes else case e in #( - e) ax_cv_check_cflags___mavx512bw=no ;; + e) ax_cv_check_cflags__Werror__mavx512bw=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512bw" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavx512bw" >&6; } -if test "x$ax_cv_check_cflags___mavx512bw" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512bw" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512bw" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512bw" = xyes then : ac_cv_can_compile_simd_avx512_bw=yes else case e in #( @@ -33817,7 +33822,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS 1" >>confdefs.h fi @@ -33830,13 +33835,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512ifma" >&5 printf %s "checking whether C compiler accepts -mavx512ifma... " >&6; } -if test ${ax_cv_check_cflags___mavx512ifma+y} +if test ${ax_cv_check_cflags__Werror__mavx512ifma+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx512ifma" + CFLAGS="$CFLAGS -Werror -mavx512ifma" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -33850,18 +33855,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavx512ifma=yes + ax_cv_check_cflags__Werror__mavx512ifma=yes else case e in #( - e) ax_cv_check_cflags___mavx512ifma=no ;; + e) ax_cv_check_cflags__Werror__mavx512ifma=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512ifma" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavx512ifma" >&6; } -if test "x$ax_cv_check_cflags___mavx512ifma" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512ifma" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512ifma" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512ifma" = xyes then : ac_cv_can_compile_simd_avx512_ifma=yes else case e in #( @@ -33873,7 +33878,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS 1" >>confdefs.h fi @@ -33885,13 +33890,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vbmi" >&5 printf %s "checking whether C compiler accepts -mavx512vbmi... " >&6; } -if test ${ax_cv_check_cflags___mavx512vbmi+y} +if test ${ax_cv_check_cflags__Werror__mavx512vbmi+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx512vbmi" + CFLAGS="$CFLAGS -Werror -mavx512vbmi" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -33905,18 +33910,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavx512vbmi=yes + ax_cv_check_cflags__Werror__mavx512vbmi=yes else case e in #( - e) ax_cv_check_cflags___mavx512vbmi=no ;; + e) ax_cv_check_cflags__Werror__mavx512vbmi=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vbmi" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavx512vbmi" >&6; } -if test "x$ax_cv_check_cflags___mavx512vbmi" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512vbmi" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512vbmi" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512vbmi" = xyes then : ac_cv_can_compile_simd_avx512_vbmi=yes else case e in #( @@ -33928,7 +33933,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS 1" >>confdefs.h fi @@ -33941,13 +33946,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vnni" >&5 printf %s "checking whether C compiler accepts -mavx512vnni... " >&6; } -if test ${ax_cv_check_cflags___mavx512vnni+y} +if test ${ax_cv_check_cflags__Werror__mavx512vnni+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx512vnni" + CFLAGS="$CFLAGS -Werror -mavx512vnni" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -33961,18 +33966,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavx512vnni=yes + ax_cv_check_cflags__Werror__mavx512vnni=yes else case e in #( - e) ax_cv_check_cflags___mavx512vnni=no ;; + e) ax_cv_check_cflags__Werror__mavx512vnni=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vnni" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavx512vnni" >&6; } -if test "x$ax_cv_check_cflags___mavx512vnni" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512vnni" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512vnni" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512vnni" = xyes then : ac_cv_can_compile_simd_avx512_vnni=yes else case e in #( @@ -33984,7 +33989,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS 1" >>confdefs.h fi @@ -33997,13 +34002,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vbmi2" >&5 printf %s "checking whether C compiler accepts -mavx512vbmi2... " >&6; } -if test ${ax_cv_check_cflags___mavx512vbmi2+y} +if test ${ax_cv_check_cflags__Werror__mavx512vbmi2+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx512vbmi2" + CFLAGS="$CFLAGS -Werror -mavx512vbmi2" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -34017,18 +34022,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavx512vbmi2=yes + ax_cv_check_cflags__Werror__mavx512vbmi2=yes else case e in #( - e) ax_cv_check_cflags___mavx512vbmi2=no ;; + e) ax_cv_check_cflags__Werror__mavx512vbmi2=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vbmi2" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavx512vbmi2" >&6; } -if test "x$ax_cv_check_cflags___mavx512vbmi2" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512vbmi2" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512vbmi2" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512vbmi2" = xyes then : ac_cv_can_compile_simd_avx512_vbmi2=yes else case e in #( @@ -34040,7 +34045,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS 1" >>confdefs.h fi @@ -34052,13 +34057,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512bitalg" >&5 printf %s "checking whether C compiler accepts -mavx512bitalg... " >&6; } -if test ${ax_cv_check_cflags___mavx512bitalg+y} +if test ${ax_cv_check_cflags__Werror__mavx512bitalg+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx512bitalg" + CFLAGS="$CFLAGS -Werror -mavx512bitalg" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -34072,18 +34077,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavx512bitalg=yes + ax_cv_check_cflags__Werror__mavx512bitalg=yes else case e in #( - e) ax_cv_check_cflags___mavx512bitalg=no ;; + e) ax_cv_check_cflags__Werror__mavx512bitalg=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512bitalg" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavx512bitalg" >&6; } -if test "x$ax_cv_check_cflags___mavx512bitalg" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512bitalg" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512bitalg" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512bitalg" = xyes then : ac_cv_can_compile_simd_avx512_bitalg=yes else case e in #( @@ -34095,7 +34100,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS 1" >>confdefs.h fi @@ -34108,13 +34113,13 @@ fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vp2intersect" >&5 printf %s "checking whether C compiler accepts -mavx512vp2intersect... " >&6; } -if test ${ax_cv_check_cflags___mavx512vp2intersect+y} +if test ${ax_cv_check_cflags__Werror__mavx512vp2intersect+y} then : printf %s "(cached) " >&6 else case e in #( e) ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -mavx512vp2intersect" + CFLAGS="$CFLAGS -Werror -mavx512vp2intersect" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ @@ -34128,18 +34133,18 @@ main (void) _ACEOF if ac_fn_c_try_compile "$LINENO" then : - ax_cv_check_cflags___mavx512vp2intersect=yes + ax_cv_check_cflags__Werror__mavx512vp2intersect=yes else case e in #( - e) ax_cv_check_cflags___mavx512vp2intersect=no ;; + e) ax_cv_check_cflags__Werror__mavx512vp2intersect=no ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext CFLAGS=$ax_check_save_flags ;; esac fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags___mavx512vp2intersect" >&5 -printf "%s\n" "$ax_cv_check_cflags___mavx512vp2intersect" >&6; } -if test "x$ax_cv_check_cflags___mavx512vp2intersect" = xyes +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512vp2intersect" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512vp2intersect" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512vp2intersect" = xyes then : ac_cv_can_compile_simd_avx512_vp2intersect=yes else case e in #( @@ -34151,7 +34156,7 @@ fi then : -printf "%s\n" "#define Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS 1" >>confdefs.h +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS 1" >>confdefs.h fi @@ -34210,48 +34215,19 @@ else use_hacl_universal2_impl=no fi -# The SIMD files use aligned_alloc, which is not available on older versions of -# Android. -# The *mmintrin.h headers are x86-family-specific, so can't be used on WASI. +# The HACL* SIMD-128 files use aligned_alloc, which is not available +# on older versions of Android. In addition, since the *mmintrin.h +# headers are x86-family-specific, they cannot be used on WASI. if test "$ac_sys_system" != "Linux-android" -a "$ac_sys_system" != "WASI" || \ { test -n "$ANDROID_API_LEVEL" && test "$ANDROID_API_LEVEL" -ge 28; } then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse -msse2 -msse3 -msse4.1 -msse4.2" >&5 -printf %s "checking whether C compiler accepts -msse -msse2 -msse3 -msse4.1 -msse4.2... " >&6; } -if test ${ax_cv_check_cflags__Werror__msse__msse2__msse3__msse4_1__msse4_2+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) - ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -Werror -msse -msse2 -msse3 -msse4.1 -msse4.2" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main (void) -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO" -then : - ax_cv_check_cflags__Werror__msse__msse2__msse3__msse4_1__msse4_2=yes -else case e in #( - e) ax_cv_check_cflags__Werror__msse__msse2__msse3__msse4_1__msse4_2=no ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__msse__msse2__msse3__msse4_1__msse4_2" >&5 -printf "%s\n" "$ax_cv_check_cflags__Werror__msse__msse2__msse3__msse4_1__msse4_2" >&6; } -if test "x$ax_cv_check_cflags__Werror__msse__msse2__msse3__msse4_1__msse4_2" = xyes -then : - + # SIMD-128 + if test "$ac_cv_can_compile_simd_sse" = "yes" \ + -a "$ac_cv_can_compile_simd_sse2" = "yes" \ + -a "$ac_cv_can_compile_simd_sse3" = "yes" \ + -a "$ac_cv_can_compile_simd_sse4_1" = "yes" \ + -a "$ac_cv_can_compile_simd_sse4_2" = "yes" + then LIBHACL_SIMD128_FLAGS="-msse -msse2 -msse3 -msse4.1 -msse4.2" @@ -34272,65 +34248,22 @@ printf "%s\n" "universal2" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: standard" >&5 printf "%s\n" "standard" >&6; } fi - - -else case e in #( - e) : ;; -esac -fi - + fi fi -# The SIMD files use aligned_alloc, which is not available on older versions of -# Android. -# The *mmintrin.h headers are x86-family-specific, so can't be used on WASI. -# -# Although AVX support is not guaranteed on Android -# (https://developer.android.com/ndk/guides/abis#86-64), this is safe because we do a -# runtime CPUID check. +# The HACL* SIMD-256 files use aligned_alloc, which is not available +# on older versions of Android. In addition, since the *mmintrin.h +# headers are x86-family-specific, they cannot be used on WASI. if test "$ac_sys_system" != "Linux-android" -a "$ac_sys_system" != "WASI" || \ { test -n "$ANDROID_API_LEVEL" && test "$ANDROID_API_LEVEL" -ge 28; } then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx2" >&5 -printf %s "checking whether C compiler accepts -mavx2... " >&6; } -if test ${ax_cv_check_cflags__Werror__mavx2+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) - ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -Werror -mavx2" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main (void) -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO" -then : - ax_cv_check_cflags__Werror__mavx2=yes -else case e in #( - e) ax_cv_check_cflags__Werror__mavx2=no ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx2" >&5 -printf "%s\n" "$ax_cv_check_cflags__Werror__mavx2" >&6; } -if test "x$ax_cv_check_cflags__Werror__mavx2" = xyes -then : - + if test "$ac_cv_can_compile_simd_avx2" = "yes" + then LIBHACL_SIMD256_FLAGS="-mavx2" + printf "%s\n" "#define _Py_HACL_CAN_COMPILE_VEC256 1" >>confdefs.h @@ -34349,12 +34282,7 @@ printf "%s\n" "universal2" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: standard" >&5 printf "%s\n" "standard" >&6; } fi - -else case e in #( - e) : ;; -esac -fi - + fi fi diff --git a/configure.ac b/configure.ac index 0c94495da79e90..75778af3de3170 100644 --- a/configure.ac +++ b/configure.ac @@ -8119,8 +8119,8 @@ then if test "$ac_cv_can_compile_simd_sse" = "yes" \ -a "$ac_cv_can_compile_simd_sse2" = "yes" \ -a "$ac_cv_can_compile_simd_sse3" = "yes" \ - -a "$ac_cv_can_compile_simd_sse41" = "yes" \ - -a "$ac_cv_can_compile_simd_sse42" = "yes" + -a "$ac_cv_can_compile_simd_sse4_1" = "yes" \ + -a "$ac_cv_can_compile_simd_sse4_2" = "yes" then [LIBHACL_SIMD128_FLAGS="-msse -msse2 -msse3 -msse4.1 -msse4.2"] diff --git a/pyconfig.h.in b/pyconfig.h.in index db72b1a6a05fee..478855c7022c3a 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -1724,93 +1724,6 @@ /* PEP 11 Support tier (1, 2, 3 or 0 for unsupported) */ #undef PY_SUPPORT_TIER -/* Define if '-mavx2' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS - -/* Define if '-mavx5124fmaps' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS - -/* Define if '-mavx5124vnniw' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS - -/* Define if '-mavx512bitalg' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS - -/* Define if '-mavx512bw' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS - -/* Define if '-mavx512cd' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS - -/* Define if '-mavx512dq' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS - -/* Define if '-mavx512er' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS - -/* Define if '-mavx512f' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS - -/* Define if '-mavx512ifma' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS - -/* Define if '-mavx512pf' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS - -/* Define if '-mavx512vbmi2' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS - -/* Define if '-mavx512vbmi' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS - -/* Define if '-mavx512vl' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS - -/* Define if '-mavx512vnni' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS - -/* Define if '-mavx512vp2intersect' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS - -/* Define if '-mavx512vpopcntdq' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS - -/* Define if '-mavxifma' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS - -/* Define if '-mavx' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS - -/* Define if '-mavxneconvert' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS - -/* Define if '-mavxvnni' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS - -/* Define if '-mavxvnniint16' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS - -/* Define if '-mavxvnniint8' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS - -/* Define if '-msse2' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS - -/* Define if '-msse3' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS - -/* Define if '-msse4.1' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS - -/* Define if '-msse4.2' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS - -/* Define if '-msse' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS - -/* Define if '-mssse3' is a valid compiler flag. */ -#undef Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS - /* Define if you want to build an interpreter with many run-time checks. */ #undef Py_DEBUG @@ -2104,6 +2017,93 @@ /* Maximum length in bytes of a thread name */ #undef _PYTHREAD_NAME_MAXLEN +/* Define if '-mavx2' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS + +/* Define if '-mavx5124fmaps' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS + +/* Define if '-mavx5124vnniw' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS + +/* Define if '-mavx512bitalg' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS + +/* Define if '-mavx512bw' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS + +/* Define if '-mavx512cd' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS + +/* Define if '-mavx512dq' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS + +/* Define if '-mavx512er' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS + +/* Define if '-mavx512f' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS + +/* Define if '-mavx512ifma' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS + +/* Define if '-mavx512pf' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS + +/* Define if '-mavx512vbmi2' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS + +/* Define if '-mavx512vbmi' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS + +/* Define if '-mavx512vl' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS + +/* Define if '-mavx512vnni' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS + +/* Define if '-mavx512vp2intersect' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS + +/* Define if '-mavx512vpopcntdq' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS + +/* Define if '-mavxifma' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS + +/* Define if '-mavx' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS + +/* Define if '-mavxneconvert' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS + +/* Define if '-mavxvnni' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS + +/* Define if '-mavxvnniint16' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS + +/* Define if '-mavxvnniint8' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS + +/* Define if '-msse2' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS + +/* Define if '-msse3' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS + +/* Define if '-msse4.1' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS + +/* Define if '-msse4.2' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS + +/* Define if '-msse' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS + +/* Define if '-mssse3' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS + /* Defined if _Complex C type can be used with libffi. */ #undef _Py_FFI_SUPPORT_C_COMPLEX From 8b7ecfb856508896e90fd9142968ac6cc2389dda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 30 Jun 2025 15:13:46 +0200 Subject: [PATCH 41/78] use `_Py` prefix to prevent public namespace pollution --- Include/internal/pycore_cpuinfo.h | 22 +-- .../internal/pycore_cpuinfo_cpuid_features.h | 103 ++++++------ .../internal/pycore_cpuinfo_xsave_features.h | 41 ++--- Modules/blake2module.c | 4 +- Modules/hmacmodule.c | 4 +- Python/cpuinfo.c | 156 +++++++++--------- Tools/cpuinfo/_util.py | 103 +++++++++++- Tools/cpuinfo/cpuid_features_gen.py | 55 +++--- Tools/cpuinfo/xsave_features_gen.py | 49 +++--- 9 files changed, 308 insertions(+), 229 deletions(-) diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index 57ad48efb038c0..49fe7652f3e74f 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -27,10 +27,10 @@ extern "C" { #include "pycore_cpuinfo_cpuid_features.h" #include "pycore_cpuinfo_xsave_features.h" -typedef struct py_cpuid_features { +typedef struct _Py_cpuid_features_s { uint32_t maxleaf; /* - * Macro to declare a member flag of 'py_cpuid_features' as a uint8_t. + * Macro to declare a member flag of '_Py_cpuid_features' as a uint8_t. * Whenever this macro is used, do not forget to update the number of * fields and the bitsize of the 'ready' member (see structure end). */ @@ -116,7 +116,7 @@ typedef struct py_cpuid_features { // number of fields (40) and adjust the bitsize of 'ready' // so that the size of this structure is a multiple of 8. uint8_t ready; // set if the structure is ready for usage -} py_cpuid_features; +} _Py_cpuid_features; /* * Explicitly initialize all members to zero to guarantee that @@ -128,7 +128,7 @@ typedef struct py_cpuid_features { * Note: This function does not set any exception and thus never fails. */ PyAPI_FUNC(void) -_Py_cpuid_disable_features(py_cpuid_features *flags); +_Py_cpuid_disable_features(_Py_cpuid_features *flags); /* * Check whether the structure is ready and flags are inter-compatible, @@ -140,7 +140,7 @@ _Py_cpuid_disable_features(py_cpuid_features *flags); * Note: This function does not set any exception and thus never fails. */ PyAPI_FUNC(int) -_Py_cpuid_check_features(const py_cpuid_features *flags); +_Py_cpuid_check_features(const _Py_cpuid_features *flags); /* * Return 1 if all expected flags are set in 'actual', 0 otherwise. @@ -150,8 +150,8 @@ _Py_cpuid_check_features(const py_cpuid_features *flags); * Note: This function does not set any exception and thus never fails. */ PyAPI_FUNC(int) -_Py_cpuid_has_features(const py_cpuid_features *actual, - const py_cpuid_features *expect); +_Py_cpuid_has_features(const _Py_cpuid_features *actual, + const _Py_cpuid_features *expect); /* * Return 1 if 'actual' and 'expect' are identical, 0 otherwise. @@ -161,16 +161,16 @@ _Py_cpuid_has_features(const py_cpuid_features *actual, * Note: This function does not set any exception and thus never fails. */ PyAPI_FUNC(int) -_Py_cpuid_match_features(const py_cpuid_features *actual, - const py_cpuid_features *expect); +_Py_cpuid_match_features(const _Py_cpuid_features *actual, + const _Py_cpuid_features *expect); /* - * Detect the available features on this machine, storing the result in 'flags'. + * Detect the available host features, storing the result in 'flags'. * * Note: This function does not set any exception and thus never fails. */ PyAPI_FUNC(void) -_Py_cpuid_detect_features(py_cpuid_features *flags); +_Py_cpuid_detect_features(_Py_cpuid_features *flags); #ifdef __cplusplus } diff --git a/Include/internal/pycore_cpuinfo_cpuid_features.h b/Include/internal/pycore_cpuinfo_cpuid_features.h index b8c3eb38f0d0e4..83aa6bc34c9aed 100644 --- a/Include/internal/pycore_cpuinfo_cpuid_features.h +++ b/Include/internal/pycore_cpuinfo_cpuid_features.h @@ -1,10 +1,10 @@ -/* +/** * @author Bénédikt Tran - * @seealso Tools/cpuinfo/cpuid_features_gen.py + * @seealso @file Tools/cpuinfo/cpuid_features_gen.py * * The enumeration describes masks to apply on CPUID output registers. * - * Member names are Py_CPUID_MASK__L[S]_, + * Member names are _Py_CPUID_MASK__L[S]_, * where <> (resp. []) denotes a required (resp. optional) group and: * * - REGISTER is EAX, EBX, ECX or EDX, @@ -35,68 +35,59 @@ extern "C" { #include "Python.h" -// fmt: off /*[python input] -import importlib -import os -import sys - -ROOT = os.getcwd() -TOOL = os.path.join(ROOT, "Tools/cpuinfo/cpuid_features_gen.py") -TOOL = os.path.realpath(TOOL) - -if not os.path.exists(TOOL): - raise FileNotFoundError(TOOL) - -sys.path.insert(0, os.path.dirname(os.path.dirname(TOOL))) -module = importlib.import_module("cpuinfo.cpuid_features_gen") -print(module.generate_cpuid_features_enum("py_cpuid_feature_mask")) +import os, sys +sys.path.insert(0, os.path.realpath(os.path.join(os.getcwd(), "Tools"))) +from cpuinfo.cpuid_features_gen import generate_cpuid_features_enum +print(generate_cpuid_features_enum("_Py_cpuid_feature_mask")) [python start generated code]*/ -typedef enum py_cpuid_feature_mask { +// fmt: off +/** Enumeration for CPUID features */ +enum _Py_cpuid_feature_mask_e { /* CPUID (LEAF=1, SUBLEAF=0) [ECX] */ - Py_CPUID_MASK_ECX_L1_SSE3 = 0x00000001, // bit = 0 - Py_CPUID_MASK_ECX_L1_PCLMULQDQ = 0x00000002, // bit = 1 - Py_CPUID_MASK_ECX_L1_SSSE3 = 0x00000200, // bit = 9 - Py_CPUID_MASK_ECX_L1_FMA = 0x00001000, // bit = 12 - Py_CPUID_MASK_ECX_L1_SSE4_1 = 0x00080000, // bit = 19 - Py_CPUID_MASK_ECX_L1_SSE4_2 = 0x00100000, // bit = 20 - Py_CPUID_MASK_ECX_L1_POPCNT = 0x00800000, // bit = 23 - Py_CPUID_MASK_ECX_L1_XSAVE = 0x04000000, // bit = 26 - Py_CPUID_MASK_ECX_L1_OSXSAVE = 0x08000000, // bit = 27 - Py_CPUID_MASK_ECX_L1_AVX = 0x10000000, // bit = 28 + _Py_CPUID_MASK_ECX_L1_SSE3 = 0x00000001, // bit = 0 + _Py_CPUID_MASK_ECX_L1_PCLMULQDQ = 0x00000002, // bit = 1 + _Py_CPUID_MASK_ECX_L1_SSSE3 = 0x00000200, // bit = 9 + _Py_CPUID_MASK_ECX_L1_FMA = 0x00001000, // bit = 12 + _Py_CPUID_MASK_ECX_L1_SSE4_1 = 0x00080000, // bit = 19 + _Py_CPUID_MASK_ECX_L1_SSE4_2 = 0x00100000, // bit = 20 + _Py_CPUID_MASK_ECX_L1_POPCNT = 0x00800000, // bit = 23 + _Py_CPUID_MASK_ECX_L1_XSAVE = 0x04000000, // bit = 26 + _Py_CPUID_MASK_ECX_L1_OSXSAVE = 0x08000000, // bit = 27 + _Py_CPUID_MASK_ECX_L1_AVX = 0x10000000, // bit = 28 /* CPUID (LEAF=1, SUBLEAF=0) [EDX] */ - Py_CPUID_MASK_EDX_L1_CMOV = 0x00008000, // bit = 15 - Py_CPUID_MASK_EDX_L1_SSE = 0x02000000, // bit = 25 - Py_CPUID_MASK_EDX_L1_SSE2 = 0x04000000, // bit = 26 + _Py_CPUID_MASK_EDX_L1_CMOV = 0x00008000, // bit = 15 + _Py_CPUID_MASK_EDX_L1_SSE = 0x02000000, // bit = 25 + _Py_CPUID_MASK_EDX_L1_SSE2 = 0x04000000, // bit = 26 /* CPUID (LEAF=7, SUBLEAF=0) [EBX] */ - Py_CPUID_MASK_EBX_L7_AVX2 = 0x00000020, // bit = 5 - Py_CPUID_MASK_EBX_L7_AVX512_F = 0x00010000, // bit = 16 - Py_CPUID_MASK_EBX_L7_AVX512_DQ = 0x00020000, // bit = 17 - Py_CPUID_MASK_EBX_L7_AVX512_IFMA = 0x00200000, // bit = 21 - Py_CPUID_MASK_EBX_L7_AVX512_PF = 0x04000000, // bit = 26 - Py_CPUID_MASK_EBX_L7_AVX512_ER = 0x08000000, // bit = 27 - Py_CPUID_MASK_EBX_L7_AVX512_CD = 0x10000000, // bit = 28 - Py_CPUID_MASK_EBX_L7_AVX512_BW = 0x40000000, // bit = 30 - Py_CPUID_MASK_EBX_L7_AVX512_VL = 0x80000000, // bit = 31 + _Py_CPUID_MASK_EBX_L7_AVX2 = 0x00000020, // bit = 5 + _Py_CPUID_MASK_EBX_L7_AVX512_F = 0x00010000, // bit = 16 + _Py_CPUID_MASK_EBX_L7_AVX512_DQ = 0x00020000, // bit = 17 + _Py_CPUID_MASK_EBX_L7_AVX512_IFMA = 0x00200000, // bit = 21 + _Py_CPUID_MASK_EBX_L7_AVX512_PF = 0x04000000, // bit = 26 + _Py_CPUID_MASK_EBX_L7_AVX512_ER = 0x08000000, // bit = 27 + _Py_CPUID_MASK_EBX_L7_AVX512_CD = 0x10000000, // bit = 28 + _Py_CPUID_MASK_EBX_L7_AVX512_BW = 0x40000000, // bit = 30 + _Py_CPUID_MASK_EBX_L7_AVX512_VL = 0x80000000, // bit = 31 /* CPUID (LEAF=7, SUBLEAF=0) [ECX] */ - Py_CPUID_MASK_ECX_L7_AVX512_VBMI = 0x00000002, // bit = 1 - Py_CPUID_MASK_ECX_L7_AVX512_VBMI2 = 0x00000040, // bit = 6 - Py_CPUID_MASK_ECX_L7_AVX512_VNNI = 0x00000800, // bit = 11 - Py_CPUID_MASK_ECX_L7_AVX512_BITALG = 0x00001000, // bit = 12 - Py_CPUID_MASK_ECX_L7_AVX512_VPOPCNTDQ = 0x00004000, // bit = 14 + _Py_CPUID_MASK_ECX_L7_AVX512_VBMI = 0x00000002, // bit = 1 + _Py_CPUID_MASK_ECX_L7_AVX512_VBMI2 = 0x00000040, // bit = 6 + _Py_CPUID_MASK_ECX_L7_AVX512_VNNI = 0x00000800, // bit = 11 + _Py_CPUID_MASK_ECX_L7_AVX512_BITALG = 0x00001000, // bit = 12 + _Py_CPUID_MASK_ECX_L7_AVX512_VPOPCNTDQ = 0x00004000, // bit = 14 /* CPUID (LEAF=7, SUBLEAF=0) [EDX] */ - Py_CPUID_MASK_EDX_L7_AVX512_4VNNIW = 0x00000004, // bit = 2 - Py_CPUID_MASK_EDX_L7_AVX512_4FMAPS = 0x00000008, // bit = 3 - Py_CPUID_MASK_EDX_L7_AVX512_VP2INTERSECT = 0x00000100, // bit = 8 + _Py_CPUID_MASK_EDX_L7_AVX512_4VNNIW = 0x00000004, // bit = 2 + _Py_CPUID_MASK_EDX_L7_AVX512_4FMAPS = 0x00000008, // bit = 3 + _Py_CPUID_MASK_EDX_L7_AVX512_VP2INTERSECT = 0x00000100, // bit = 8 /* CPUID (LEAF=7, SUBLEAF=1) [EAX] */ - Py_CPUID_MASK_EAX_L7S1_AVX_VNNI = 0x00000010, // bit = 4 - Py_CPUID_MASK_EAX_L7S1_AVX_IFMA = 0x00800000, // bit = 23 + _Py_CPUID_MASK_EAX_L7S1_AVX_VNNI = 0x00000010, // bit = 4 + _Py_CPUID_MASK_EAX_L7S1_AVX_IFMA = 0x00800000, // bit = 23 /* CPUID (LEAF=7, SUBLEAF=1) [EDX] */ - Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT8 = 0x00000010, // bit = 4 - Py_CPUID_MASK_EDX_L7S1_AVX_NE_CONVERT = 0x00000020, // bit = 5 - Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT16 = 0x00000400, // bit = 10 -} py_cpuid_feature_mask; -/*[python end generated code: output=c4460242e465fa91 input=61d2b5f1bc368b94]*/ + _Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT8 = 0x00000010, // bit = 4 + _Py_CPUID_MASK_EDX_L7S1_AVX_NE_CONVERT = 0x00000020, // bit = 5 + _Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT16 = 0x00000400, // bit = 10 +}; // fmt: on +/*[python end generated code: output=8e58b0997d69bbf8 input=fce00935f64021f9]*/ #endif // !Py_INTERNAL_CPUINFO_CPUID_FEATURES_H diff --git a/Include/internal/pycore_cpuinfo_xsave_features.h b/Include/internal/pycore_cpuinfo_xsave_features.h index e81e1ab76557df..c0e33e820b9ef1 100644 --- a/Include/internal/pycore_cpuinfo_xsave_features.h +++ b/Include/internal/pycore_cpuinfo_xsave_features.h @@ -1,11 +1,12 @@ -/* +/** * @author Bénédikt Tran - * @seealso Tools/cpuinfo/xsave_features_gen.py + * @seealso @file Tools/cpuinfo/xsave_features_gen.py * * XSAVE state components (XCR0 control register). * * See https://en.wikipedia.org/wiki/Control_register#XCR0_and_XSS. */ + #ifndef Py_INTERNAL_CPUINFO_XSAVE_FEATURES_H #define Py_INTERNAL_CPUINFO_XSAVE_FEATURES_H @@ -21,29 +22,21 @@ extern "C" { // fmt: off /*[python input] -import importlib -import os -import sys - -ROOT = os.getcwd() -TOOL = os.path.join(ROOT, "Tools/cpuinfo/xsave_features_gen.py") -TOOL = os.path.realpath(TOOL) - -if not os.path.exists(TOOL): - raise FileNotFoundError(TOOL) - -sys.path.insert(0, os.path.dirname(os.path.dirname(TOOL))) -module = importlib.import_module("cpuinfo.xsave_features_gen") -print(module.generate_xsave_features_enum("py_xsave_feature_mask")) +import os, sys +sys.path.insert(0, os.path.realpath(os.path.join(os.getcwd(), "Tools"))) +from cpuinfo.xsave_features_gen import generate_xsave_features_enum +print(generate_xsave_features_enum("_Py_xsave_feature_mask")) [python start generated code]*/ -typedef enum py_xsave_feature_mask { - Py_XSAVE_MASK_XCR0_SSE = 0x00000002, // bit = 1 - Py_XSAVE_MASK_XCR0_AVX = 0x00000004, // bit = 2 - Py_XSAVE_MASK_XCR0_AVX512_OPMASK = 0x00000020, // bit = 5 - Py_XSAVE_MASK_XCR0_AVX512_ZMM_HI256 = 0x00000040, // bit = 6 - Py_XSAVE_MASK_XCR0_AVX512_HI16_ZMM = 0x00000080, // bit = 7 -} py_xsave_feature_mask; -/*[python end generated code: output=9a476ed0abbc617b input=41f35058299c0118]*/ +// fmt: off +/** Enumeration for XSAVE components */ +enum _Py_xsave_feature_mask_e { + _Py_XSAVE_MASK_XCR0_SSE = 0x00000002, // bit = 1 + _Py_XSAVE_MASK_XCR0_AVX = 0x00000004, // bit = 2 + _Py_XSAVE_MASK_XCR0_AVX512_OPMASK = 0x00000020, // bit = 5 + _Py_XSAVE_MASK_XCR0_AVX512_ZMM_HI256 = 0x00000040, // bit = 6 + _Py_XSAVE_MASK_XCR0_AVX512_HI16_ZMM = 0x00000080, // bit = 7 +}; // fmt: on +/*[python end generated code: output=35ea9a165938f8ef input=336793a305515376]*/ #endif // !Py_INTERNAL_CPUINFO_XSAVE_FEATURES_H diff --git a/Modules/blake2module.c b/Modules/blake2module.c index e222d6d2e5c298..2f8baea62d77fc 100644 --- a/Modules/blake2module.c +++ b/Modules/blake2module.c @@ -16,7 +16,7 @@ #include "Python.h" #include "hashlib.h" -#include "pycore_cpuinfo.h" // py_cpuid_features +#include "pycore_cpuinfo.h" // _Py_cpuid_features #include "pycore_strhex.h" // _Py_strhex() #include "pycore_typeobject.h" #include "pycore_moduleobject.h" @@ -111,7 +111,7 @@ _blake2_free(void *module) static void blake2module_init_cpu_features(Blake2State *state) { - py_cpuid_features flags; + _Py_cpuid_features flags; _Py_cpuid_detect_features(&flags); #if _Py_HACL_CAN_COMPILE_VEC128 state->can_run_simd128 = flags.sse && flags.sse2 && flags.sse3 diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index f2e47e0fab15aa..064e31fe830deb 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -17,7 +17,7 @@ #endif #include "Python.h" -#include "pycore_cpuinfo.h" // py_cpuid_features +#include "pycore_cpuinfo.h" // _Py_cpuid_features #include "pycore_hashtable.h" #include "pycore_strhex.h" // _Py_strhex() @@ -1553,7 +1553,7 @@ hmacmodule_init_globals(PyObject *module, hmacmodule_state *state) static void hmacmodule_init_cpu_features(hmacmodule_state *state) { - py_cpuid_features flags; + _Py_cpuid_features flags; _Py_cpuid_detect_features(&flags); #if _Py_HACL_CAN_COMPILE_VEC128 state->can_run_simd128 = flags.sse && flags.sse2 && flags.sse3 diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 0f934d04d76446..6e595f438e1a7e 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -4,8 +4,8 @@ #define CPUID_REG uint32_t /* Check one or more CPUID register bits. */ #define CHECK_REG(REG, MASK) ((((REG) & (MASK)) == (MASK)) ? 0 : 1) -#define CPUID_CHECK_REG(REG, FEAT) CHECK_REG(REG, (Py_CPUID_MASK_ ## FEAT)) -#define XSAVE_CHECK_REG(REG, FEAT) CHECK_REG(REG, (Py_XSAVE_MASK_ ## FEAT)) +#define CPUID_CHECK_REG(REG, FEAT) CHECK_REG(REG, (_Py_CPUID_MASK_ ## FEAT)) +#define XSAVE_CHECK_REG(REG, FEAT) CHECK_REG(REG, (_Py_XSAVE_MASK_ ## FEAT)) // For now, we only try to enable SIMD instructions for x86-64 Intel CPUs. // In the future, we should carefully enable support for ARM NEON and POWER @@ -29,46 +29,46 @@ // corresponding flags or if we are not on an 64-bit platform we do not // even try to inspect the output of CPUID for those specific features. #ifdef HAS_CPUID_SUPPORT -#if defined(Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS) \ +#if defined(_Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS) \ // macros above should be sorted in alphabetical order # define SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD #endif -#if defined(Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS) \ +#if defined(_Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS) \ // macros above should be sorted in alphabetical order # define SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD #endif -#if defined(Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS) +#if defined(_Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS) # define SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD #endif -#if defined(Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS) \ - || defined(Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS) \ +#if defined(_Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS) \ // macros above should be sorted in alphabetical order # define SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD #endif @@ -160,33 +160,33 @@ detect_cpuid_maxleaf(void) /* Processor Info and Feature Bits (LEAF=1, SUBLEAF=0). */ static inline void -detect_cpuid_features(py_cpuid_features *flags, CPUID_REG ecx, CPUID_REG edx) +detect_cpuid_features(_Py_cpuid_features *flags, CPUID_REG ecx, CPUID_REG edx) { assert(flags->maxleaf >= 1); // Keep the ordering and newlines as they are declared in the structure. #ifdef SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD -#ifdef Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS flags->sse = CPUID_CHECK_REG(edx, EDX_L1_SSE); #endif -#ifdef Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS flags->sse2 = CPUID_CHECK_REG(edx, EDX_L1_SSE2); #endif -#ifdef Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS flags->sse3 = CPUID_CHECK_REG(ecx, ECX_L1_SSE3); #endif -#ifdef Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS flags->ssse3 = CPUID_CHECK_REG(ecx, ECX_L1_SSSE3); #endif -#ifdef Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS flags->sse41 = CPUID_CHECK_REG(ecx, ECX_L1_SSE4_1); #endif -#ifdef Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS flags->sse42 = CPUID_CHECK_REG(ecx, ECX_L1_SSE4_2); #endif #endif // SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD #ifdef SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD -#ifdef Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS flags->avx = CPUID_CHECK_REG(ecx, ECX_L1_AVX); #endif #endif // SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD @@ -204,73 +204,73 @@ detect_cpuid_features(py_cpuid_features *flags, CPUID_REG ecx, CPUID_REG edx) /* Extended Feature Bits (LEAF=7, SUBLEAF=0). */ static inline void -detect_cpuid_extended_features_L7S0(py_cpuid_features *flags, +detect_cpuid_extended_features_L7S0(_Py_cpuid_features *flags, CPUID_REG ebx, CPUID_REG ecx, CPUID_REG edx) { assert(flags->maxleaf >= 7); (void)ebx, (void)ecx, (void)edx; // to suppress unused warnings // Keep the ordering and newlines as they are declared in the structure. #ifdef SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD -#ifdef Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS flags->avx2 = CPUID_CHECK_REG(ebx, EBX_L7_AVX2); #endif #endif // SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD #ifdef SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD -#ifdef Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS flags->avx512_f = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_F); #endif -#ifdef Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS flags->avx512_cd = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_CD); #endif -#ifdef Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS flags->avx512_er = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_ER); #endif -#ifdef Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS flags->avx512_pf = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_PF); #endif -#ifdef Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS flags->avx512_4fmaps = CPUID_CHECK_REG(edx, EDX_L7_AVX512_4FMAPS); #endif -#ifdef Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS flags->avx512_4vnniw = CPUID_CHECK_REG(edx, EDX_L7_AVX512_4VNNIW); #endif -#ifdef Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS flags->avx512_vpopcntdq = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_VPOPCNTDQ); #endif -#ifdef Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS flags->avx512_vl = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_VL); #endif -#ifdef Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS flags->avx512_dq = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_DQ); #endif -#ifdef Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS flags->avx512_bw = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_BW); #endif -#ifdef Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS flags->avx512_ifma = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_IFMA); #endif -#ifdef Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS flags->avx512_vbmi = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_VBMI); #endif -#ifdef Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS flags->avx512_vnni = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_VNNI); #endif -#ifdef Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS flags->avx512_vbmi2 = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_VBMI2); #endif -#ifdef Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS flags->avx512_bitalg = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_BITALG); #endif -#ifdef Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS flags->avx512_vp2intersect = CPUID_CHECK_REG(edx, EDX_L7_AVX512_VP2INTERSECT); #endif #endif // SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD @@ -278,7 +278,7 @@ detect_cpuid_extended_features_L7S0(py_cpuid_features *flags, /* Extended Feature Bits (LEAF=7, SUBLEAF=1). */ static inline void -detect_cpuid_extended_features_L7S1(py_cpuid_features *flags, +detect_cpuid_extended_features_L7S1(_Py_cpuid_features *flags, CPUID_REG eax, CPUID_REG ebx, CPUID_REG ecx, @@ -288,28 +288,28 @@ detect_cpuid_extended_features_L7S1(py_cpuid_features *flags, (void)eax, (void)ebx, (void)ecx, (void)edx; // to suppress unused warnings // Keep the ordering and newlines as they are declared in the structure. #ifdef SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD -#ifdef Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS flags->avx_ne_convert = CPUID_CHECK_REG(edx, EDX_L7S1_AVX_NE_CONVERT); #endif -#ifdef Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS flags->avx_ifma = CPUID_CHECK_REG(eax, EAX_L7S1_AVX_IFMA); #endif -#ifdef Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS flags->avx_vnni = CPUID_CHECK_REG(eax, EAX_L7S1_AVX_VNNI); #endif -#ifdef Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS flags->avx_vnni_int8 = CPUID_CHECK_REG(edx, EDX_L7S1_AVX_VNNI_INT8); #endif -#ifdef Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS +#ifdef _Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS flags->avx_vnni_int16 = CPUID_CHECK_REG(edx, EDX_L7S1_AVX_VNNI_INT16); #endif #endif // SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD } static inline void -detect_cpuid_xsave_state(py_cpuid_features *flags) +detect_cpuid_xsave_state(_Py_cpuid_features *flags) { // Keep the ordering and newlines as they are declared in the structure. assert(flags->maxleaf >= 1); @@ -324,7 +324,7 @@ detect_cpuid_xsave_state(py_cpuid_features *flags) } static inline void -cpuid_features_finalize(py_cpuid_features *flags) +cpuid_features_finalize(_Py_cpuid_features *flags) { assert(flags->ready == 0); @@ -335,7 +335,7 @@ cpuid_features_finalize(py_cpuid_features *flags) } static inline int -cpuid_features_validate(const py_cpuid_features *flags) +cpuid_features_validate(const _Py_cpuid_features *flags) { if (flags->ready != 1) { return -1; @@ -363,14 +363,14 @@ cpuid_features_validate(const py_cpuid_features *flags) } int -_Py_cpuid_check_features(const py_cpuid_features *flags) +_Py_cpuid_check_features(const _Py_cpuid_features *flags) { return cpuid_features_validate(flags) < 0 ? 0 : 1; } /* * Apply a 1-parameter macro MACRO(FLAG) on all members - * of a 'py_cpuid_features' object ('ready' is omitted). + * of a '_Py_cpuid_features' object ('ready' is omitted). */ #define CPUID_APPLY_MACRO(MACRO) \ do { \ @@ -432,7 +432,7 @@ _Py_cpuid_check_features(const py_cpuid_features *flags) } while (0) void -_Py_cpuid_disable_features(py_cpuid_features *flags) +_Py_cpuid_disable_features(_Py_cpuid_features *flags) { flags->maxleaf = 0; #define CPUID_DISABLE(FLAG) flags->FLAG = 0 @@ -441,8 +441,8 @@ _Py_cpuid_disable_features(py_cpuid_features *flags) } int -_Py_cpuid_has_features(const py_cpuid_features *actual, - const py_cpuid_features *expect) +_Py_cpuid_has_features(const _Py_cpuid_features *actual, + const _Py_cpuid_features *expect) { if (!actual->ready || !expect->ready) { return 0; @@ -462,8 +462,8 @@ _Py_cpuid_has_features(const py_cpuid_features *actual, } int -_Py_cpuid_match_features(const py_cpuid_features *actual, - const py_cpuid_features *expect) +_Py_cpuid_match_features(const _Py_cpuid_features *actual, + const _Py_cpuid_features *expect) { if (!actual->ready || !expect->ready) { return 0; @@ -486,7 +486,7 @@ _Py_cpuid_match_features(const py_cpuid_features *actual, #ifdef SHOULD_PARSE_CPUID_L1 static inline void -cpuid_detect_l1_features(py_cpuid_features *flags) +cpuid_detect_l1_features(_Py_cpuid_features *flags) { if (flags->maxleaf >= 1) { CPUID_REG eax = 0, ebx = 0, ecx = 0, edx = 0; @@ -503,7 +503,7 @@ cpuid_detect_l1_features(py_cpuid_features *flags) #ifdef SHOULD_PARSE_CPUID_L7S0 static inline void -cpuid_detect_l7s0_features(py_cpuid_features *flags) +cpuid_detect_l7s0_features(_Py_cpuid_features *flags) { assert(flags->maxleaf >= 7); CPUID_REG eax = 0, ebx = 0, ecx = 0, edx = 0; @@ -516,7 +516,7 @@ cpuid_detect_l7s0_features(py_cpuid_features *flags) #ifdef SHOULD_PARSE_CPUID_L7S1 static inline void -cpuid_detect_l7s1_features(py_cpuid_features *flags) +cpuid_detect_l7s1_features(_Py_cpuid_features *flags) { assert(flags->maxleaf >= 7); CPUID_REG eax = 0, ebx = 0, ecx = 0, edx = 0; @@ -529,7 +529,7 @@ cpuid_detect_l7s1_features(py_cpuid_features *flags) #ifdef SHOULD_PARSE_CPUID_L7 static inline void -cpuid_detect_l7_features(py_cpuid_features *flags) +cpuid_detect_l7_features(_Py_cpuid_features *flags) { if (flags->maxleaf >= 7) { cpuid_detect_l7s0_features(flags); @@ -541,7 +541,7 @@ cpuid_detect_l7_features(py_cpuid_features *flags) #endif void -_Py_cpuid_detect_features(py_cpuid_features *flags) +_Py_cpuid_detect_features(_Py_cpuid_features *flags) { if (flags->ready) { return; diff --git a/Tools/cpuinfo/_util.py b/Tools/cpuinfo/_util.py index 9aef599bd8f0e5..e501b2761f9659 100644 --- a/Tools/cpuinfo/_util.py +++ b/Tools/cpuinfo/_util.py @@ -1,6 +1,19 @@ from __future__ import annotations -__all__ = ["next_block", "make_enum_member"] +__all__ = [ + "next_block", "make_enum_name", "make_enum_member", + "Style", "C99_STYLE", "C11_STYLE", "DOXYGEN_STYLE", + "CWriter" +] # fmt: skip + +import contextlib +import enum +from io import StringIO +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from collections.abc import Iterator + from typing import Any, Final def next_block(w: int) -> int: @@ -11,8 +24,96 @@ def next_block(w: int) -> int: _MASKSIZE: Final[int] = next_block(len("= 0x00000000,")) +def make_enum_name(name: str) -> tuple[str, str]: + if name.endswith("_e"): + raise ValueError(f"enumeration must not end by '_e': {name!r}") + return f"{name}_e", name # (enum name, typedef name) + + def make_enum_member(key: str, bit: int, name_maxsize: int) -> str: member_name = key.ljust(name_maxsize) member_mask = format(1 << bit, "008x") member_mask = f"= 0x{member_mask},".ljust(_MASKSIZE) return f"{member_name}{member_mask} // bit = {bit}" + + +class Style(enum.IntEnum): + C99 = enum.auto() + C11 = enum.auto() + DOXYGEN = enum.auto() + + +C99_STYLE = Style.C99 +C11_STYLE = Style.C11 +DOXYGEN_STYLE = Style.DOXYGEN + +_COMMENT_INLINE_STYLE: Final[dict[Style, tuple[str, str, str]]] = { + C99_STYLE: ("// ", "", ""), + C11_STYLE: ("/* ", " */", ""), + DOXYGEN_STYLE: ("/** ", " */", ""), +} + +_COMMENT_BLOCK_STYLE: Final[dict[Style, tuple[str, str, str]]] = { + C99_STYLE: ("// ", "", ""), + C11_STYLE: ("/*", " */", " * "), + DOXYGEN_STYLE: ("/**", " */", " * "), +} + + +class CWriter: + def __init__(self, *, indentsize: int = 4) -> None: + self._stream = StringIO() + self._indent = " " * indentsize + self._prefix = "" + self._disable_external_formatter() + + def _disable_external_formatter(self) -> None: + """Add a directive to suppress external formatters to run.""" + with self.prefixed(""): + self.write("// fmt: off") + + def _enable_external_formatter(self) -> None: + """Add a directive to allow external formatters to run.""" + with self.prefixed(""): + self.write("// fmt: on") + + def comment( + self, text: str, *, level: int = 0, style: Style = C11_STYLE + ) -> None: + """Add a C comment, possibly using doxygen style.""" + if len(text) < 72 and "\n" not in text: + prolog, epilog, _ = _COMMENT_INLINE_STYLE[style] + self.write(prolog, text, epilog, sep="", level=level) + else: + prolog, epilog, prefix = _COMMENT_BLOCK_STYLE[style] + self.write(prolog, level=level) + with self.prefixed(prefix): + for line in text.splitlines(): + self.write(line, level=level) + self.write(epilog, level=level) + + @contextlib.contextmanager + def prefixed(self, prefix: str) -> Iterator[None]: + old_prefix = self._prefix + self._prefix = prefix + try: + yield + finally: + self._prefix = old_prefix + + def _prefix_at(self, level: int) -> str: + return "".join((self._indent * level, self._prefix)) + + def write( + self, *args: Any, sep: str = " ", end: str = "\n", level: int = 0 + ) -> None: + if prefix := self._prefix_at(level): + self._write(prefix, sep="", end="") + self._write(*args, sep=sep, end=end) + + def _write(self, *args: Any, sep: str, end: str) -> None: + print(*args, sep=sep, end=end, file=self._stream) + + def build(self) -> str: + self._enable_external_formatter() + return self._stream.getvalue().rstrip("\n") diff --git a/Tools/cpuinfo/cpuid_features_gen.py b/Tools/cpuinfo/cpuid_features_gen.py index f23a68c141b696..ffbf526c01e37f 100644 --- a/Tools/cpuinfo/cpuid_features_gen.py +++ b/Tools/cpuinfo/cpuid_features_gen.py @@ -1,7 +1,7 @@ """ Generate an enumeration describing masks to apply on CPUID output registers. -Member names are Py_CPUID_MASK__L[S]_, +Member names are _Py_CPUID_MASK__L[S]_, where <> (resp. []) denotes a required (resp. optional) group and: - REGISTER is EAX, EBX, ECX or EDX, @@ -18,20 +18,20 @@ The LEAF value should only 1 or 7 as other values may have different meanings depending on the underlying architecture. -.. seealso:: Include/internal/pycore_cpuinfo_cpuid_features.h +.. seealso:: :file:`Include/internal/pycore_cpuinfo_cpuid_features.h` """ from __future__ import annotations __all__ = ["generate_cpuid_features_enum"] -from functools import partial -from io import StringIO from typing import TYPE_CHECKING + from . import _util as util +from ._util import DOXYGEN_STYLE if TYPE_CHECKING: - from typing import Final, IO + from typing import Final type Leaf = int type SubLeaf = int @@ -39,9 +39,9 @@ type FeatureFamily = tuple[Leaf, SubLeaf, Registry] type Feature = str - type Bit = int + type BitIndex = int -CPUID_FEATURES: Final[dict[FeatureFamily, dict[Feature, Bit]]] = { +CPUID_FEATURES: Final[dict[FeatureFamily, dict[Feature, BitIndex]]] = { # See https://en.wikipedia.org/wiki/CPUID#EAX=1:_Processor_Info_and_Feature_Bits. (1, 0, "ECX"): { "SSE3": 0, @@ -101,10 +101,10 @@ def get_member_name( leaf: Leaf, subleaf: SubLeaf, registry: Registry, name: Feature ) -> str: node = f"L{leaf}S{subleaf}" if subleaf else f"L{leaf}" - return f"Py_CPUID_MASK_{registry}_{node}_{name}" + return f"_Py_CPUID_MASK_{registry}_{node}_{name}" -NAMESIZE: Final[int] = util.next_block( +_NAME_MAXSIZE: Final[int] = util.next_block( max( len(get_member_name(*family, name)) for family, values in CPUID_FEATURES.items() @@ -114,35 +114,32 @@ def get_member_name( def generate_cpuid_features_enum(enum_name: str) -> str: - """Used by Include/internal/pycore_cpuinfo_cpuid_features.h. - - The C enumeration is generated by this function and Argument Clinic. - """ + """Used by :file:`Include/internal/pycore_cpuinfo_cpuid_features.h`. - # The enumeration is rendered as follows: - # - # = 0x, // bit = BIT - # ^ ^ ^ ^ ^ ^ ^ - # - # where ^ indicates a column that is a multiple of 4, has - # exactly 8 characters and has at most 2 characters. + The C enumeration is generated by this function and Argument Clinic, + to be eventually rendred as follows: - output = StringIO() - write = partial(print, file=output) - indent = " " * 4 + = 0x, // bit = BIT + ^ ^ ^ ^ ^ ^ ^ - write(f"typedef enum {enum_name} {{") + where ^ indicates a column that is a multiple of 4, has + exactly 8 characters and has at most 2 characters. + """ + enum_name, _typedef_enum_name = util.make_enum_name(enum_name) + writer = util.CWriter() + writer.comment("Enumeration for CPUID features", style=DOXYGEN_STYLE) + writer.write(f"enum {enum_name} {{") for family, values in CPUID_FEATURES.items(): leaf, subleaf, registry = family title = f"CPUID (LEAF={leaf}, SUBLEAF={subleaf}) [{registry}]" - write(indent, "/* ", title, " */", sep="") + writer.comment(title, level=1) for feature_name, bit in values.items(): if not feature_name: raise ValueError(f"invalid entry for {family}") if not 0 <= bit < 32: raise ValueError(f"invalid bit value for {feature_name!r}") key = get_member_name(leaf, subleaf, registry, feature_name) - member_def = util.make_enum_member(key, bit, NAMESIZE) - write(indent, member_def, sep="") - write(f"}} {enum_name};") - return output.getvalue().rstrip("\n") + member_def = util.make_enum_member(key, bit, _NAME_MAXSIZE) + writer.write(member_def, level=1) + writer.write("};") + return writer.build() diff --git a/Tools/cpuinfo/xsave_features_gen.py b/Tools/cpuinfo/xsave_features_gen.py index bacb4e8b4344a8..858151c1b74956 100644 --- a/Tools/cpuinfo/xsave_features_gen.py +++ b/Tools/cpuinfo/xsave_features_gen.py @@ -3,25 +3,25 @@ See https://en.wikipedia.org/wiki/Control_register#XCR0_and_XSS. -.. seealso:: Include/internal/pycore_cpuinfo_xsave_features.h +.. seealso:: :file:`Include/internal/pycore_cpuinfo_xsave_features.h` """ from __future__ import annotations __all__ = ["generate_xsave_features_enum"] -from functools import partial -from io import StringIO from typing import TYPE_CHECKING + from . import _util as util +from ._util import DOXYGEN_STYLE if TYPE_CHECKING: from typing import Final type Feature = str - type Bit = int + type BitIndex = int -XSAVE_FEATURES: Final[dict[Feature, Bit]] = { +XSAVE_FEATURES: Final[dict[Feature, BitIndex]] = { "SSE": 1, "AVX": 2, "AVX512_OPMASK": 5, @@ -31,38 +31,35 @@ def get_member_name(feature: Feature) -> str: - return f"Py_XSAVE_MASK_XCR0_{feature}" + return f"_Py_XSAVE_MASK_XCR0_{feature}" -NAMESIZE: Final[int] = util.next_block( +_NAME_MAXSIZE: Final[int] = util.next_block( max(map(len, map(get_member_name, XSAVE_FEATURES))) ) def generate_xsave_features_enum(enum_name: str) -> str: - """Used by Include/internal/pycore_cpuinfo_xsave_features.h. - - The C enumeration is generated by this function and Argument Clinic. - """ + """Used by :file:`Include/internal/pycore_cpuinfo_xsave_features.h`. - # The enumeration is rendered as follows: - # - # = 0x, // bit = BIT - # ^ ^ ^ ^ ^ ^ ^ - # - # where ^ indicates a column that is a multiple of 4, has - # exactly 8 characters and has at most 2 characters. + The C enumeration is generated by this function and Argument Clinic, + to be eventually rendred as follows: - output = StringIO() - write = partial(print, file=output) - indent = " " * 4 + = 0x, // bit = BIT + ^ ^ ^ ^ ^ ^ ^ - write(f"typedef enum {enum_name} {{") + where ^ indicates a column that is a multiple of 4, has + exactly 8 characters and has at most 2 characters. + """ + enum_name, _typedef_enum_name = util.make_enum_name(enum_name) + writer = util.CWriter() + writer.comment("Enumeration for XSAVE components", style=DOXYGEN_STYLE) + writer.write(f"enum {enum_name} {{") for feature_name, bit in XSAVE_FEATURES.items(): if not 0 <= bit < 32: raise ValueError(f"invalid bit value for {feature_name!r}") key = get_member_name(feature_name) - member_def = util.make_enum_member(key, bit, NAMESIZE) - write(indent, member_def, sep="") - write(f"}} {enum_name};") - return output.getvalue().rstrip("\n") + member_def = util.make_enum_member(key, bit, _NAME_MAXSIZE) + writer.write(member_def, level=1) + writer.write("};") + return writer.build() From 3c31ba34a28b4839ce0f720c075ad1651383dd4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 30 Jun 2025 15:14:20 +0200 Subject: [PATCH 42/78] let the compiler decide on the inlineness --- Python/cpuinfo.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 6e595f438e1a7e..0a8d1cd6f7dfd0 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -118,7 +118,7 @@ * * If CPUID is not supported, registers are set to 0. */ -static inline void +static void get_cpuid_info(uint32_t level /* input eax */, uint32_t count /* input ecx */, CPUID_REG *eax, CPUID_REG *ebx, CPUID_REG *ecx, CPUID_REG *edx) @@ -133,7 +133,7 @@ get_cpuid_info(uint32_t level /* input eax */, #endif } -static inline uint64_t +static uint64_t get_xgetbv(uint32_t index) { assert(index == 0); // only XCR0 is supported for now @@ -150,7 +150,7 @@ get_xgetbv(uint32_t index) } /* Highest Function Parameter and Manufacturer ID (LEAF=0, SUBLEAF=0). */ -static inline uint32_t +static uint32_t detect_cpuid_maxleaf(void) { CPUID_REG maxleaf = 0, ebx = 0, ecx = 0, edx = 0; @@ -159,7 +159,7 @@ detect_cpuid_maxleaf(void) } /* Processor Info and Feature Bits (LEAF=1, SUBLEAF=0). */ -static inline void +static void detect_cpuid_features(_Py_cpuid_features *flags, CPUID_REG ecx, CPUID_REG edx) { assert(flags->maxleaf >= 1); @@ -203,7 +203,7 @@ detect_cpuid_features(_Py_cpuid_features *flags, CPUID_REG ecx, CPUID_REG edx) } /* Extended Feature Bits (LEAF=7, SUBLEAF=0). */ -static inline void +static void detect_cpuid_extended_features_L7S0(_Py_cpuid_features *flags, CPUID_REG ebx, CPUID_REG ecx, CPUID_REG edx) { @@ -277,7 +277,7 @@ detect_cpuid_extended_features_L7S0(_Py_cpuid_features *flags, } /* Extended Feature Bits (LEAF=7, SUBLEAF=1). */ -static inline void +static void detect_cpuid_extended_features_L7S1(_Py_cpuid_features *flags, CPUID_REG eax, CPUID_REG ebx, @@ -308,7 +308,7 @@ detect_cpuid_extended_features_L7S1(_Py_cpuid_features *flags, #endif // SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD } -static inline void +static void detect_cpuid_xsave_state(_Py_cpuid_features *flags) { // Keep the ordering and newlines as they are declared in the structure. @@ -323,7 +323,7 @@ detect_cpuid_xsave_state(_Py_cpuid_features *flags) #endif } -static inline void +static void cpuid_features_finalize(_Py_cpuid_features *flags) { assert(flags->ready == 0); @@ -334,7 +334,7 @@ cpuid_features_finalize(_Py_cpuid_features *flags) flags->ready = 1; } -static inline int +static int cpuid_features_validate(const _Py_cpuid_features *flags) { if (flags->ready != 1) { @@ -485,7 +485,7 @@ _Py_cpuid_match_features(const _Py_cpuid_features *actual, #undef CPUID_APPLY_MACRO #ifdef SHOULD_PARSE_CPUID_L1 -static inline void +static void cpuid_detect_l1_features(_Py_cpuid_features *flags) { if (flags->maxleaf >= 1) { @@ -502,7 +502,7 @@ cpuid_detect_l1_features(_Py_cpuid_features *flags) #endif #ifdef SHOULD_PARSE_CPUID_L7S0 -static inline void +static void cpuid_detect_l7s0_features(_Py_cpuid_features *flags) { assert(flags->maxleaf >= 7); @@ -515,7 +515,7 @@ cpuid_detect_l7s0_features(_Py_cpuid_features *flags) #endif #ifdef SHOULD_PARSE_CPUID_L7S1 -static inline void +static void cpuid_detect_l7s1_features(_Py_cpuid_features *flags) { assert(flags->maxleaf >= 7); @@ -528,7 +528,7 @@ cpuid_detect_l7s1_features(_Py_cpuid_features *flags) #endif #ifdef SHOULD_PARSE_CPUID_L7 -static inline void +static void cpuid_detect_l7_features(_Py_cpuid_features *flags) { if (flags->maxleaf >= 7) { From 143d57e2f3fc78bbd9a1b0b83530dd2bc4e0c34c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 30 Jun 2025 15:14:47 +0200 Subject: [PATCH 43/78] drop CPUID_REG alias --- Python/cpuinfo.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 0a8d1cd6f7dfd0..fd59137aaab56f 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -1,7 +1,5 @@ #include "pycore_cpuinfo.h" -/* CPUID input and output registers are 32-bit unsigned integers */ -#define CPUID_REG uint32_t /* Check one or more CPUID register bits. */ #define CHECK_REG(REG, MASK) ((((REG) & (MASK)) == (MASK)) ? 0 : 1) #define CPUID_CHECK_REG(REG, FEAT) CHECK_REG(REG, (_Py_CPUID_MASK_ ## FEAT)) @@ -121,7 +119,7 @@ static void get_cpuid_info(uint32_t level /* input eax */, uint32_t count /* input ecx */, - CPUID_REG *eax, CPUID_REG *ebx, CPUID_REG *ecx, CPUID_REG *edx) + uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) { *eax = *ebx = *ecx = *edx = 0; // ensure the output to be initialized #if defined(HAS_CPUID_SUPPORT) && defined(__x86_64__) && defined(__GNUC__) @@ -153,14 +151,14 @@ get_xgetbv(uint32_t index) static uint32_t detect_cpuid_maxleaf(void) { - CPUID_REG maxleaf = 0, ebx = 0, ecx = 0, edx = 0; + uint32_t maxleaf = 0, ebx = 0, ecx = 0, edx = 0; get_cpuid_info(0, 0, &maxleaf, &ebx, &ecx, &edx); return maxleaf; } /* Processor Info and Feature Bits (LEAF=1, SUBLEAF=0). */ static void -detect_cpuid_features(_Py_cpuid_features *flags, CPUID_REG ecx, CPUID_REG edx) +detect_cpuid_features(_Py_cpuid_features *flags, uint32_t ecx, uint32_t edx) { assert(flags->maxleaf >= 1); // Keep the ordering and newlines as they are declared in the structure. @@ -205,7 +203,7 @@ detect_cpuid_features(_Py_cpuid_features *flags, CPUID_REG ecx, CPUID_REG edx) /* Extended Feature Bits (LEAF=7, SUBLEAF=0). */ static void detect_cpuid_extended_features_L7S0(_Py_cpuid_features *flags, - CPUID_REG ebx, CPUID_REG ecx, CPUID_REG edx) + uint32_t ebx, uint32_t ecx, uint32_t edx) { assert(flags->maxleaf >= 7); (void)ebx, (void)ecx, (void)edx; // to suppress unused warnings @@ -279,10 +277,10 @@ detect_cpuid_extended_features_L7S0(_Py_cpuid_features *flags, /* Extended Feature Bits (LEAF=7, SUBLEAF=1). */ static void detect_cpuid_extended_features_L7S1(_Py_cpuid_features *flags, - CPUID_REG eax, - CPUID_REG ebx, - CPUID_REG ecx, - CPUID_REG edx) + uint32_t eax, + uint32_t ebx, + uint32_t ecx, + uint32_t edx) { assert(flags->maxleaf >= 7); (void)eax, (void)ebx, (void)ecx, (void)edx; // to suppress unused warnings @@ -489,7 +487,7 @@ static void cpuid_detect_l1_features(_Py_cpuid_features *flags) { if (flags->maxleaf >= 1) { - CPUID_REG eax = 0, ebx = 0, ecx = 0, edx = 0; + uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; get_cpuid_info(1, 0, &eax, &ebx, &ecx, &edx); detect_cpuid_features(flags, ecx, edx); if (flags->osxsave) { @@ -506,7 +504,7 @@ static void cpuid_detect_l7s0_features(_Py_cpuid_features *flags) { assert(flags->maxleaf >= 7); - CPUID_REG eax = 0, ebx = 0, ecx = 0, edx = 0; + uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; get_cpuid_info(7, 0, &eax, &ebx, &ecx, &edx); detect_cpuid_extended_features_L7S0(flags, ebx, ecx, edx); } @@ -519,7 +517,7 @@ static void cpuid_detect_l7s1_features(_Py_cpuid_features *flags) { assert(flags->maxleaf >= 7); - CPUID_REG eax = 0, ebx = 0, ecx = 0, edx = 0; + uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; get_cpuid_info(7, 1, &eax, &ebx, &ecx, &edx); detect_cpuid_extended_features_L7S1(flags, eax, ebx, ecx, edx); } From ee2a83cd6559e967d6bd0d27c1a11759c8f02191 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 30 Jun 2025 15:16:31 +0200 Subject: [PATCH 44/78] simplify `_Py_cpuid_check_features` --- Python/cpuinfo.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index fd59137aaab56f..eeacf9dd8df3bf 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -332,11 +332,11 @@ cpuid_features_finalize(_Py_cpuid_features *flags) flags->ready = 1; } -static int -cpuid_features_validate(const _Py_cpuid_features *flags) +int +_Py_cpuid_check_features(const _Py_cpuid_features *flags) { if (flags->ready != 1) { - return -1; + return 0; } // AVX-512/F is required to support any other AVX-512 instruction set @@ -354,16 +354,10 @@ cpuid_features_validate(const _Py_cpuid_features *flags) ); if (!flags->avx512_f && !avx512_require_f) { - return -1; + return 0; } - return 0; -} - -int -_Py_cpuid_check_features(const _Py_cpuid_features *flags) -{ - return cpuid_features_validate(flags) < 0 ? 0 : 1; + return 1; } /* @@ -552,7 +546,7 @@ _Py_cpuid_detect_features(_Py_cpuid_features *flags) cpuid_detect_l1_features(flags); cpuid_detect_l7_features(flags); cpuid_features_finalize(flags); - if (cpuid_features_validate(flags) < 0) { + if (!_Py_cpuid_check_features(flags)) { _Py_cpuid_disable_features(flags); } #endif // !HAS_CPUID_SUPPORT From e6d458354166dd298b75f9fcba858ea9e5cd2d81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 30 Jun 2025 15:26:33 +0200 Subject: [PATCH 45/78] amend docs for `_Py_cpuid_disable_features` --- Include/internal/pycore_cpuinfo.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index 49fe7652f3e74f..059653c844394f 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -119,11 +119,11 @@ typedef struct _Py_cpuid_features_s { } _Py_cpuid_features; /* - * Explicitly initialize all members to zero to guarantee that - * we never have an un-initialized attribute at runtime which - * could lead to an illegal instruction error. + * Explicitly set all members to zero to guarantee that + * we never have a non-initialized attribute at runtime + * which could lead to an illegal instruction error. * - * This does not mark 'flags' as being ready yet. + * This readiness state of 'flags' is ignored and left untouched. * * Note: This function does not set any exception and thus never fails. */ From 838f928beb26472023279e45187660488badb070 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 30 Jun 2025 18:41:10 +0200 Subject: [PATCH 46/78] use macros to support larger flag ranges --- .../internal/pycore_cpuinfo_cpuid_features.h | 102 +++++++++--------- .../internal/pycore_cpuinfo_xsave_features.h | 23 ++-- Tools/cpuinfo/__init__.py | 15 +++ Tools/cpuinfo/_util.py | 53 ++++----- Tools/cpuinfo/cpuid_features_gen.py | 34 ++---- Tools/cpuinfo/xsave_features_gen.py | 30 ++---- 6 files changed, 119 insertions(+), 138 deletions(-) diff --git a/Include/internal/pycore_cpuinfo_cpuid_features.h b/Include/internal/pycore_cpuinfo_cpuid_features.h index 83aa6bc34c9aed..a289766534783f 100644 --- a/Include/internal/pycore_cpuinfo_cpuid_features.h +++ b/Include/internal/pycore_cpuinfo_cpuid_features.h @@ -39,55 +39,59 @@ extern "C" { import os, sys sys.path.insert(0, os.path.realpath(os.path.join(os.getcwd(), "Tools"))) from cpuinfo.cpuid_features_gen import generate_cpuid_features_enum -print(generate_cpuid_features_enum("_Py_cpuid_feature_mask")) +print(generate_cpuid_features_enum()) [python start generated code]*/ -// fmt: off -/** Enumeration for CPUID features */ -enum _Py_cpuid_feature_mask_e { - /* CPUID (LEAF=1, SUBLEAF=0) [ECX] */ - _Py_CPUID_MASK_ECX_L1_SSE3 = 0x00000001, // bit = 0 - _Py_CPUID_MASK_ECX_L1_PCLMULQDQ = 0x00000002, // bit = 1 - _Py_CPUID_MASK_ECX_L1_SSSE3 = 0x00000200, // bit = 9 - _Py_CPUID_MASK_ECX_L1_FMA = 0x00001000, // bit = 12 - _Py_CPUID_MASK_ECX_L1_SSE4_1 = 0x00080000, // bit = 19 - _Py_CPUID_MASK_ECX_L1_SSE4_2 = 0x00100000, // bit = 20 - _Py_CPUID_MASK_ECX_L1_POPCNT = 0x00800000, // bit = 23 - _Py_CPUID_MASK_ECX_L1_XSAVE = 0x04000000, // bit = 26 - _Py_CPUID_MASK_ECX_L1_OSXSAVE = 0x08000000, // bit = 27 - _Py_CPUID_MASK_ECX_L1_AVX = 0x10000000, // bit = 28 - /* CPUID (LEAF=1, SUBLEAF=0) [EDX] */ - _Py_CPUID_MASK_EDX_L1_CMOV = 0x00008000, // bit = 15 - _Py_CPUID_MASK_EDX_L1_SSE = 0x02000000, // bit = 25 - _Py_CPUID_MASK_EDX_L1_SSE2 = 0x04000000, // bit = 26 - /* CPUID (LEAF=7, SUBLEAF=0) [EBX] */ - _Py_CPUID_MASK_EBX_L7_AVX2 = 0x00000020, // bit = 5 - _Py_CPUID_MASK_EBX_L7_AVX512_F = 0x00010000, // bit = 16 - _Py_CPUID_MASK_EBX_L7_AVX512_DQ = 0x00020000, // bit = 17 - _Py_CPUID_MASK_EBX_L7_AVX512_IFMA = 0x00200000, // bit = 21 - _Py_CPUID_MASK_EBX_L7_AVX512_PF = 0x04000000, // bit = 26 - _Py_CPUID_MASK_EBX_L7_AVX512_ER = 0x08000000, // bit = 27 - _Py_CPUID_MASK_EBX_L7_AVX512_CD = 0x10000000, // bit = 28 - _Py_CPUID_MASK_EBX_L7_AVX512_BW = 0x40000000, // bit = 30 - _Py_CPUID_MASK_EBX_L7_AVX512_VL = 0x80000000, // bit = 31 - /* CPUID (LEAF=7, SUBLEAF=0) [ECX] */ - _Py_CPUID_MASK_ECX_L7_AVX512_VBMI = 0x00000002, // bit = 1 - _Py_CPUID_MASK_ECX_L7_AVX512_VBMI2 = 0x00000040, // bit = 6 - _Py_CPUID_MASK_ECX_L7_AVX512_VNNI = 0x00000800, // bit = 11 - _Py_CPUID_MASK_ECX_L7_AVX512_BITALG = 0x00001000, // bit = 12 - _Py_CPUID_MASK_ECX_L7_AVX512_VPOPCNTDQ = 0x00004000, // bit = 14 - /* CPUID (LEAF=7, SUBLEAF=0) [EDX] */ - _Py_CPUID_MASK_EDX_L7_AVX512_4VNNIW = 0x00000004, // bit = 2 - _Py_CPUID_MASK_EDX_L7_AVX512_4FMAPS = 0x00000008, // bit = 3 - _Py_CPUID_MASK_EDX_L7_AVX512_VP2INTERSECT = 0x00000100, // bit = 8 - /* CPUID (LEAF=7, SUBLEAF=1) [EAX] */ - _Py_CPUID_MASK_EAX_L7S1_AVX_VNNI = 0x00000010, // bit = 4 - _Py_CPUID_MASK_EAX_L7S1_AVX_IFMA = 0x00800000, // bit = 23 - /* CPUID (LEAF=7, SUBLEAF=1) [EDX] */ - _Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT8 = 0x00000010, // bit = 4 - _Py_CPUID_MASK_EDX_L7S1_AVX_NE_CONVERT = 0x00000020, // bit = 5 - _Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT16 = 0x00000400, // bit = 10 -}; -// fmt: on -/*[python end generated code: output=8e58b0997d69bbf8 input=fce00935f64021f9]*/ +// clang-format off +/** Constants for CPUID features */ +/* CPUID (LEAF=1, SUBLEAF=0) [ECX] */ +#define _Py_CPUID_MASK_ECX_L1_SSE3 0x00000001 // bit = 0 +#define _Py_CPUID_MASK_ECX_L1_PCLMULQDQ 0x00000002 // bit = 1 +#define _Py_CPUID_MASK_ECX_L1_SSSE3 0x00000200 // bit = 9 +#define _Py_CPUID_MASK_ECX_L1_FMA 0x00001000 // bit = 12 +#define _Py_CPUID_MASK_ECX_L1_SSE4_1 0x00080000 // bit = 19 +#define _Py_CPUID_MASK_ECX_L1_SSE4_2 0x00100000 // bit = 20 +#define _Py_CPUID_MASK_ECX_L1_POPCNT 0x00800000 // bit = 23 +#define _Py_CPUID_MASK_ECX_L1_XSAVE 0x04000000 // bit = 26 +#define _Py_CPUID_MASK_ECX_L1_OSXSAVE 0x08000000 // bit = 27 +#define _Py_CPUID_MASK_ECX_L1_AVX 0x10000000 // bit = 28 + +/* CPUID (LEAF=1, SUBLEAF=0) [EDX] */ +#define _Py_CPUID_MASK_EDX_L1_CMOV 0x00008000 // bit = 15 +#define _Py_CPUID_MASK_EDX_L1_SSE 0x02000000 // bit = 25 +#define _Py_CPUID_MASK_EDX_L1_SSE2 0x04000000 // bit = 26 + +/* CPUID (LEAF=7, SUBLEAF=0) [EBX] */ +#define _Py_CPUID_MASK_EBX_L7_AVX2 0x00000020 // bit = 5 +#define _Py_CPUID_MASK_EBX_L7_AVX512_F 0x00010000 // bit = 16 +#define _Py_CPUID_MASK_EBX_L7_AVX512_DQ 0x00020000 // bit = 17 +#define _Py_CPUID_MASK_EBX_L7_AVX512_IFMA 0x00200000 // bit = 21 +#define _Py_CPUID_MASK_EBX_L7_AVX512_PF 0x04000000 // bit = 26 +#define _Py_CPUID_MASK_EBX_L7_AVX512_ER 0x08000000 // bit = 27 +#define _Py_CPUID_MASK_EBX_L7_AVX512_CD 0x10000000 // bit = 28 +#define _Py_CPUID_MASK_EBX_L7_AVX512_BW 0x40000000 // bit = 30 +#define _Py_CPUID_MASK_EBX_L7_AVX512_VL 0x80000000 // bit = 31 + +/* CPUID (LEAF=7, SUBLEAF=0) [ECX] */ +#define _Py_CPUID_MASK_ECX_L7_AVX512_VBMI 0x00000002 // bit = 1 +#define _Py_CPUID_MASK_ECX_L7_AVX512_VBMI2 0x00000040 // bit = 6 +#define _Py_CPUID_MASK_ECX_L7_AVX512_VNNI 0x00000800 // bit = 11 +#define _Py_CPUID_MASK_ECX_L7_AVX512_BITALG 0x00001000 // bit = 12 +#define _Py_CPUID_MASK_ECX_L7_AVX512_VPOPCNTDQ 0x00004000 // bit = 14 + +/* CPUID (LEAF=7, SUBLEAF=0) [EDX] */ +#define _Py_CPUID_MASK_EDX_L7_AVX512_4VNNIW 0x00000004 // bit = 2 +#define _Py_CPUID_MASK_EDX_L7_AVX512_4FMAPS 0x00000008 // bit = 3 +#define _Py_CPUID_MASK_EDX_L7_AVX512_VP2INTERSECT 0x00000100 // bit = 8 + +/* CPUID (LEAF=7, SUBLEAF=1) [EAX] */ +#define _Py_CPUID_MASK_EAX_L7S1_AVX_VNNI 0x00000010 // bit = 4 +#define _Py_CPUID_MASK_EAX_L7S1_AVX_IFMA 0x00800000 // bit = 23 + +/* CPUID (LEAF=7, SUBLEAF=1) [EDX] */ +#define _Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT8 0x00000010 // bit = 4 +#define _Py_CPUID_MASK_EDX_L7S1_AVX_NE_CONVERT 0x00000020 // bit = 5 +#define _Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT16 0x00000400 // bit = 10 +// clang-format on +/*[python end generated code: output=e9112f064e2effec input=d7df15fec9f3daa2]*/ #endif // !Py_INTERNAL_CPUINFO_CPUID_FEATURES_H diff --git a/Include/internal/pycore_cpuinfo_xsave_features.h b/Include/internal/pycore_cpuinfo_xsave_features.h index c0e33e820b9ef1..f9ce25e8a71003 100644 --- a/Include/internal/pycore_cpuinfo_xsave_features.h +++ b/Include/internal/pycore_cpuinfo_xsave_features.h @@ -20,23 +20,20 @@ extern "C" { #include "Python.h" -// fmt: off /*[python input] import os, sys sys.path.insert(0, os.path.realpath(os.path.join(os.getcwd(), "Tools"))) from cpuinfo.xsave_features_gen import generate_xsave_features_enum -print(generate_xsave_features_enum("_Py_xsave_feature_mask")) +print(generate_xsave_features_enum()) [python start generated code]*/ -// fmt: off -/** Enumeration for XSAVE components */ -enum _Py_xsave_feature_mask_e { - _Py_XSAVE_MASK_XCR0_SSE = 0x00000002, // bit = 1 - _Py_XSAVE_MASK_XCR0_AVX = 0x00000004, // bit = 2 - _Py_XSAVE_MASK_XCR0_AVX512_OPMASK = 0x00000020, // bit = 5 - _Py_XSAVE_MASK_XCR0_AVX512_ZMM_HI256 = 0x00000040, // bit = 6 - _Py_XSAVE_MASK_XCR0_AVX512_HI16_ZMM = 0x00000080, // bit = 7 -}; -// fmt: on -/*[python end generated code: output=35ea9a165938f8ef input=336793a305515376]*/ +// clang-format off +/** Constants for XSAVE components */ +#define _Py_XSAVE_MASK_XCR0_SSE 0x00000002 // bit = 1 +#define _Py_XSAVE_MASK_XCR0_AVX 0x00000004 // bit = 2 +#define _Py_XSAVE_MASK_XCR0_AVX512_OPMASK 0x00000020 // bit = 5 +#define _Py_XSAVE_MASK_XCR0_AVX512_ZMM_HI256 0x00000040 // bit = 6 +#define _Py_XSAVE_MASK_XCR0_AVX512_HI16_ZMM 0x00000080 // bit = 7 +// clang-format on +/*[python end generated code: output=ac059b802b4317cb input=6323151855b3c9f0]*/ #endif // !Py_INTERNAL_CPUINFO_XSAVE_FEATURES_H diff --git a/Tools/cpuinfo/__init__.py b/Tools/cpuinfo/__init__.py index e69de29bb2d1d6..434ed9173f403a 100644 --- a/Tools/cpuinfo/__init__.py +++ b/Tools/cpuinfo/__init__.py @@ -0,0 +1,15 @@ +""" +This package provides functions to generate flags for CPUID and XSAVE. + +The constants are macros generated by Argument Clinic as follows: + + #define 0x // bit = BIT + ^ ^ + +where ^ indicates a column that is a multiple of 4, has +exactly 8 characters and has at most 2 characters. + +A C enumeration is NOT generated as the largest member may not fit +on an 'int', which is forbidden as ISO C restricts enumerator values +to that range. +""" diff --git a/Tools/cpuinfo/_util.py b/Tools/cpuinfo/_util.py index e501b2761f9659..9d478ca686f65b 100644 --- a/Tools/cpuinfo/_util.py +++ b/Tools/cpuinfo/_util.py @@ -1,9 +1,9 @@ from __future__ import annotations __all__ = [ - "next_block", "make_enum_name", "make_enum_member", + "next_block", "make_constant", "Style", "C99_STYLE", "C11_STYLE", "DOXYGEN_STYLE", - "CWriter" + "CWriter", ] # fmt: skip import contextlib @@ -13,7 +13,7 @@ if TYPE_CHECKING: from collections.abc import Iterator - from typing import Any, Final + from typing import Any, Final, Literal def next_block(w: int) -> int: @@ -21,20 +21,15 @@ def next_block(w: int) -> int: return ((w + 3) & ~0x03) if (w % 4) else (w + 4) -_MASKSIZE: Final[int] = next_block(len("= 0x00000000,")) +_MASKSIZE: Final[int] = next_block(len("0x00000000")) -def make_enum_name(name: str) -> tuple[str, str]: - if name.endswith("_e"): - raise ValueError(f"enumeration must not end by '_e': {name!r}") - return f"{name}_e", name # (enum name, typedef name) - - -def make_enum_member(key: str, bit: int, name_maxsize: int) -> str: +def make_constant(key: str, bit: int, name_maxsize: int) -> str: + assert bit <= 32, f"{key}: mask does not on an uint32_t" member_name = key.ljust(name_maxsize) member_mask = format(1 << bit, "008x") - member_mask = f"= 0x{member_mask},".ljust(_MASKSIZE) - return f"{member_name}{member_mask} // bit = {bit}" + member_mask = f"0x{member_mask}".ljust(_MASKSIZE) + return f"#define {member_name}{member_mask}// bit = {bit}" class Style(enum.IntEnum): @@ -43,9 +38,9 @@ class Style(enum.IntEnum): DOXYGEN = enum.auto() -C99_STYLE = Style.C99 -C11_STYLE = Style.C11 -DOXYGEN_STYLE = Style.DOXYGEN +C99_STYLE: Final[Literal[Style.C99]] = Style.C99 +C11_STYLE: Final[Literal[Style.C11]] = Style.C11 +DOXYGEN_STYLE: Final[Literal[Style.DOXYGEN]] = Style.DOXYGEN _COMMENT_INLINE_STYLE: Final[dict[Style, tuple[str, str, str]]] = { C99_STYLE: ("// ", "", ""), @@ -65,17 +60,6 @@ def __init__(self, *, indentsize: int = 4) -> None: self._stream = StringIO() self._indent = " " * indentsize self._prefix = "" - self._disable_external_formatter() - - def _disable_external_formatter(self) -> None: - """Add a directive to suppress external formatters to run.""" - with self.prefixed(""): - self.write("// fmt: off") - - def _enable_external_formatter(self) -> None: - """Add a directive to allow external formatters to run.""" - with self.prefixed(""): - self.write("// fmt: on") def comment( self, text: str, *, level: int = 0, style: Style = C11_STYLE @@ -111,9 +95,18 @@ def write( self._write(prefix, sep="", end="") self._write(*args, sep=sep, end=end) - def _write(self, *args: Any, sep: str, end: str) -> None: + def write_blankline(self) -> None: + self._write() + + def _write(self, *args: Any, sep: str = " ", end: str = "\n") -> None: print(*args, sep=sep, end=end, file=self._stream) def build(self) -> str: - self._enable_external_formatter() - return self._stream.getvalue().rstrip("\n") + # inject directives to temporarily disable external C formatters + return "\n".join( + ( + "// clang-format off", + self._stream.getvalue().rstrip(), + "// clang-format on", + ) + ) diff --git a/Tools/cpuinfo/cpuid_features_gen.py b/Tools/cpuinfo/cpuid_features_gen.py index ffbf526c01e37f..84f60dd625797b 100644 --- a/Tools/cpuinfo/cpuid_features_gen.py +++ b/Tools/cpuinfo/cpuid_features_gen.py @@ -1,7 +1,7 @@ """ Generate an enumeration describing masks to apply on CPUID output registers. -Member names are _Py_CPUID_MASK__L[S]_, +Constants are _Py_CPUID_MASK__L[S]_, where <> (resp. []) denotes a required (resp. optional) group and: - REGISTER is EAX, EBX, ECX or EDX, @@ -97,7 +97,7 @@ } -def get_member_name( +def get_constant_name( leaf: Leaf, subleaf: SubLeaf, registry: Registry, name: Feature ) -> str: node = f"L{leaf}S{subleaf}" if subleaf else f"L{leaf}" @@ -106,40 +106,26 @@ def get_member_name( _NAME_MAXSIZE: Final[int] = util.next_block( max( - len(get_member_name(*family, name)) + len(get_constant_name(*family, name)) for family, values in CPUID_FEATURES.items() for name in values ) ) -def generate_cpuid_features_enum(enum_name: str) -> str: - """Used by :file:`Include/internal/pycore_cpuinfo_cpuid_features.h`. - - The C enumeration is generated by this function and Argument Clinic, - to be eventually rendred as follows: - - = 0x, // bit = BIT - ^ ^ ^ ^ ^ ^ ^ - - where ^ indicates a column that is a multiple of 4, has - exactly 8 characters and has at most 2 characters. - """ - enum_name, _typedef_enum_name = util.make_enum_name(enum_name) +def generate_cpuid_features_enum() -> str: + """Used by :file:`Include/internal/pycore_cpuinfo_cpuid_features.h`.""" writer = util.CWriter() - writer.comment("Enumeration for CPUID features", style=DOXYGEN_STYLE) - writer.write(f"enum {enum_name} {{") + writer.comment("Constants for CPUID features", style=DOXYGEN_STYLE) for family, values in CPUID_FEATURES.items(): leaf, subleaf, registry = family - title = f"CPUID (LEAF={leaf}, SUBLEAF={subleaf}) [{registry}]" - writer.comment(title, level=1) + writer.comment(f"CPUID (LEAF={leaf}, SUBLEAF={subleaf}) [{registry}]") for feature_name, bit in values.items(): if not feature_name: raise ValueError(f"invalid entry for {family}") if not 0 <= bit < 32: raise ValueError(f"invalid bit value for {feature_name!r}") - key = get_member_name(leaf, subleaf, registry, feature_name) - member_def = util.make_enum_member(key, bit, _NAME_MAXSIZE) - writer.write(member_def, level=1) - writer.write("};") + key = get_constant_name(leaf, subleaf, registry, feature_name) + writer.write(util.make_constant(key, bit, _NAME_MAXSIZE)) + writer.write_blankline() return writer.build() diff --git a/Tools/cpuinfo/xsave_features_gen.py b/Tools/cpuinfo/xsave_features_gen.py index 858151c1b74956..3d820759ce9f03 100644 --- a/Tools/cpuinfo/xsave_features_gen.py +++ b/Tools/cpuinfo/xsave_features_gen.py @@ -1,5 +1,5 @@ """ -Generate enumeration for XSAVE state components (XCR0 control register). +Generate constants for XSAVE state components (XCR0 control register). See https://en.wikipedia.org/wiki/Control_register#XCR0_and_XSS. @@ -30,36 +30,22 @@ } -def get_member_name(feature: Feature) -> str: +def get_constant_name(feature: Feature) -> str: return f"_Py_XSAVE_MASK_XCR0_{feature}" _NAME_MAXSIZE: Final[int] = util.next_block( - max(map(len, map(get_member_name, XSAVE_FEATURES))) + max(map(len, map(get_constant_name, XSAVE_FEATURES))) ) -def generate_xsave_features_enum(enum_name: str) -> str: - """Used by :file:`Include/internal/pycore_cpuinfo_xsave_features.h`. - - The C enumeration is generated by this function and Argument Clinic, - to be eventually rendred as follows: - - = 0x, // bit = BIT - ^ ^ ^ ^ ^ ^ ^ - - where ^ indicates a column that is a multiple of 4, has - exactly 8 characters and has at most 2 characters. - """ - enum_name, _typedef_enum_name = util.make_enum_name(enum_name) +def generate_xsave_features_enum() -> str: + """Used by :file:`Include/internal/pycore_cpuinfo_xsave_features.h`.""" writer = util.CWriter() - writer.comment("Enumeration for XSAVE components", style=DOXYGEN_STYLE) - writer.write(f"enum {enum_name} {{") + writer.comment("Constants for XSAVE components", style=DOXYGEN_STYLE) for feature_name, bit in XSAVE_FEATURES.items(): if not 0 <= bit < 32: raise ValueError(f"invalid bit value for {feature_name!r}") - key = get_member_name(feature_name) - member_def = util.make_enum_member(key, bit, _NAME_MAXSIZE) - writer.write(member_def, level=1) - writer.write("};") + key = get_constant_name(feature_name) + writer.write(util.make_constant(key, bit, _NAME_MAXSIZE)) return writer.build() From 62c9a405f670668df877844b8dd8235b3d4d0027 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 30 Jun 2025 19:06:59 +0200 Subject: [PATCH 47/78] handle -Wpedantic --- Python/cpuinfo.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index eeacf9dd8df3bf..0a7634ee33198d 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -128,6 +128,8 @@ get_cpuid_info(uint32_t level /* input eax */, uint32_t info[4] = {0}; __cpuidex(info, level, count); *eax = info[0], *ebx = info[1], *ecx = info[2], *edx = info[3]; +#else + (void)level, (void)count; #endif } @@ -151,8 +153,8 @@ get_xgetbv(uint32_t index) static uint32_t detect_cpuid_maxleaf(void) { - uint32_t maxleaf = 0, ebx = 0, ecx = 0, edx = 0; - get_cpuid_info(0, 0, &maxleaf, &ebx, &ecx, &edx); + uint32_t maxleaf = 0, _ebx = 0, _ecx = 0, _edx = 0; + get_cpuid_info(0, 0, &maxleaf, &_ebx, &_ecx, &_edx); return maxleaf; } @@ -160,7 +162,9 @@ detect_cpuid_maxleaf(void) static void detect_cpuid_features(_Py_cpuid_features *flags, uint32_t ecx, uint32_t edx) { + assert(flags->ready == 0); assert(flags->maxleaf >= 1); + (void)flags, (void)ecx, (void)edx; // silence -Wunused-parameter // Keep the ordering and newlines as they are declared in the structure. #ifdef SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD #ifdef _Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS @@ -205,8 +209,9 @@ static void detect_cpuid_extended_features_L7S0(_Py_cpuid_features *flags, uint32_t ebx, uint32_t ecx, uint32_t edx) { + assert(flags->ready == 0); assert(flags->maxleaf >= 7); - (void)ebx, (void)ecx, (void)edx; // to suppress unused warnings + (void)flags, (void)ebx, (void)ecx, (void)edx; // Keep the ordering and newlines as they are declared in the structure. #ifdef SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD #ifdef _Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS @@ -282,8 +287,9 @@ detect_cpuid_extended_features_L7S1(_Py_cpuid_features *flags, uint32_t ecx, uint32_t edx) { + assert(flags->ready == 0); assert(flags->maxleaf >= 7); - (void)eax, (void)ebx, (void)ecx, (void)edx; // to suppress unused warnings + (void)flags, (void)eax, (void)ebx, (void)ecx, (void)edx; // Keep the ordering and newlines as they are declared in the structure. #ifdef SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD #ifdef _Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS @@ -309,8 +315,10 @@ detect_cpuid_extended_features_L7S1(_Py_cpuid_features *flags, static void detect_cpuid_xsave_state(_Py_cpuid_features *flags) { - // Keep the ordering and newlines as they are declared in the structure. + assert(flags->ready == 0); assert(flags->maxleaf >= 1); + (void)flags; + // Keep the ordering and newlines as they are declared in the structure. #ifdef HAS_XGETBV_SUPPORT uint64_t xcr0 = flags->osxsave ? get_xgetbv(0) : 0; flags->xcr0_sse = XSAVE_CHECK_REG(xcr0, XCR0_SSE); @@ -480,6 +488,7 @@ _Py_cpuid_match_features(const _Py_cpuid_features *actual, static void cpuid_detect_l1_features(_Py_cpuid_features *flags) { + assert(flags->ready == 0); if (flags->maxleaf >= 1) { uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; get_cpuid_info(1, 0, &eax, &ebx, &ecx, &edx); @@ -497,9 +506,10 @@ cpuid_detect_l1_features(_Py_cpuid_features *flags) static void cpuid_detect_l7s0_features(_Py_cpuid_features *flags) { + assert(flags->ready == 0); assert(flags->maxleaf >= 7); - uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; - get_cpuid_info(7, 0, &eax, &ebx, &ecx, &edx); + uint32_t _eax = 0, ebx = 0, ecx = 0, edx = 0; + get_cpuid_info(7, 0, &_eax, &ebx, &ecx, &edx); detect_cpuid_extended_features_L7S0(flags, ebx, ecx, edx); } #else @@ -510,6 +520,7 @@ cpuid_detect_l7s0_features(_Py_cpuid_features *flags) static void cpuid_detect_l7s1_features(_Py_cpuid_features *flags) { + assert(flags->ready == 0); assert(flags->maxleaf >= 7); uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; get_cpuid_info(7, 1, &eax, &ebx, &ecx, &edx); @@ -523,6 +534,7 @@ cpuid_detect_l7s1_features(_Py_cpuid_features *flags) static void cpuid_detect_l7_features(_Py_cpuid_features *flags) { + assert(flags->ready == 0); if (flags->maxleaf >= 7) { cpuid_detect_l7s0_features(flags); cpuid_detect_l7s1_features(flags); From a22aa95c44f2cc558be5404f045195342e6e2737 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 8 Jul 2025 20:58:36 +0200 Subject: [PATCH 48/78] reorganize files --- Include/internal/pycore_cpuinfo_cpuid_features.h | 14 +++++++++----- Include/internal/pycore_cpuinfo_xsave_features.h | 14 +++++++++----- PCbuild/pythoncore.vcxproj.filters | 6 +++--- Tools/cpuinfo/{ => libcpuinfo}/__init__.py | 5 +++++ Tools/cpuinfo/libcpuinfo/features/__init__.py | 0 .../features/cpuid.py} | 8 ++++---- .../features/xsave.py} | 8 ++++---- Tools/cpuinfo/{_util.py => libcpuinfo/util.py} | 0 8 files changed, 34 insertions(+), 21 deletions(-) rename Tools/cpuinfo/{ => libcpuinfo}/__init__.py (79%) create mode 100644 Tools/cpuinfo/libcpuinfo/features/__init__.py rename Tools/cpuinfo/{cpuid_features_gen.py => libcpuinfo/features/cpuid.py} (95%) rename Tools/cpuinfo/{xsave_features_gen.py => libcpuinfo/features/xsave.py} (88%) rename Tools/cpuinfo/{_util.py => libcpuinfo/util.py} (100%) diff --git a/Include/internal/pycore_cpuinfo_cpuid_features.h b/Include/internal/pycore_cpuinfo_cpuid_features.h index a289766534783f..8db54e7af37fb1 100644 --- a/Include/internal/pycore_cpuinfo_cpuid_features.h +++ b/Include/internal/pycore_cpuinfo_cpuid_features.h @@ -1,6 +1,6 @@ /** * @author Bénédikt Tran - * @seealso @file Tools/cpuinfo/cpuid_features_gen.py + * @seealso @file Tools/cpuinfo/libcpuinfo/features/cpuid.py * * The enumeration describes masks to apply on CPUID output registers. * @@ -37,9 +37,9 @@ extern "C" { /*[python input] import os, sys -sys.path.insert(0, os.path.realpath(os.path.join(os.getcwd(), "Tools"))) -from cpuinfo.cpuid_features_gen import generate_cpuid_features_enum -print(generate_cpuid_features_enum()) +sys.path.insert(0, os.path.realpath(os.path.join(os.getcwd(), "Tools/cpuinfo"))) +from libcpuinfo.features.cpuid import make_cpuid_features_constants +print(make_cpuid_features_constants()) [python start generated code]*/ // clang-format off /** Constants for CPUID features */ @@ -92,6 +92,10 @@ print(generate_cpuid_features_enum()) #define _Py_CPUID_MASK_EDX_L7S1_AVX_NE_CONVERT 0x00000020 // bit = 5 #define _Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT16 0x00000400 // bit = 10 // clang-format on -/*[python end generated code: output=e9112f064e2effec input=d7df15fec9f3daa2]*/ +/*[python end generated code: output=e9112f064e2effec input=71ec6b4356052ec3]*/ + +#ifdef __cplusplus +} +#endif #endif // !Py_INTERNAL_CPUINFO_CPUID_FEATURES_H diff --git a/Include/internal/pycore_cpuinfo_xsave_features.h b/Include/internal/pycore_cpuinfo_xsave_features.h index f9ce25e8a71003..e8719261b07604 100644 --- a/Include/internal/pycore_cpuinfo_xsave_features.h +++ b/Include/internal/pycore_cpuinfo_xsave_features.h @@ -1,6 +1,6 @@ /** * @author Bénédikt Tran - * @seealso @file Tools/cpuinfo/xsave_features_gen.py + * @seealso @file Tools/cpuinfo/libcpuinfo/features/xsave.py * * XSAVE state components (XCR0 control register). * @@ -22,9 +22,9 @@ extern "C" { /*[python input] import os, sys -sys.path.insert(0, os.path.realpath(os.path.join(os.getcwd(), "Tools"))) -from cpuinfo.xsave_features_gen import generate_xsave_features_enum -print(generate_xsave_features_enum()) +sys.path.insert(0, os.path.realpath(os.path.join(os.getcwd(), "Tools/cpuinfo"))) +from libcpuinfo.features.xsave import make_xsave_features_constants +print(make_xsave_features_constants()) [python start generated code]*/ // clang-format off /** Constants for XSAVE components */ @@ -34,6 +34,10 @@ print(generate_xsave_features_enum()) #define _Py_XSAVE_MASK_XCR0_AVX512_ZMM_HI256 0x00000040 // bit = 6 #define _Py_XSAVE_MASK_XCR0_AVX512_HI16_ZMM 0x00000080 // bit = 7 // clang-format on -/*[python end generated code: output=ac059b802b4317cb input=6323151855b3c9f0]*/ +/*[python end generated code: output=ac059b802b4317cb input=0a1b0774d3271477]*/ + +#ifdef __cplusplus +} +#endif #endif // !Py_INTERNAL_CPUINFO_XSAVE_FEATURES_H diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index b4037a566b3ec5..8afc2010ef93ca 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -601,13 +601,13 @@ Include\internal - Include\cpython + Include\internal - Include\cpython + Include\internal - Include\cpython + Include\internal Include\internal diff --git a/Tools/cpuinfo/__init__.py b/Tools/cpuinfo/libcpuinfo/__init__.py similarity index 79% rename from Tools/cpuinfo/__init__.py rename to Tools/cpuinfo/libcpuinfo/__init__.py index 434ed9173f403a..a935debd4f4bbc 100644 --- a/Tools/cpuinfo/__init__.py +++ b/Tools/cpuinfo/libcpuinfo/__init__.py @@ -12,4 +12,9 @@ A C enumeration is NOT generated as the largest member may not fit on an 'int', which is forbidden as ISO C restricts enumerator values to that range. + +.. note:: + + This package must not be used directly and should only be + invoked from an Argument Clinic "[python input]" directive. """ diff --git a/Tools/cpuinfo/libcpuinfo/features/__init__.py b/Tools/cpuinfo/libcpuinfo/features/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/Tools/cpuinfo/cpuid_features_gen.py b/Tools/cpuinfo/libcpuinfo/features/cpuid.py similarity index 95% rename from Tools/cpuinfo/cpuid_features_gen.py rename to Tools/cpuinfo/libcpuinfo/features/cpuid.py index 84f60dd625797b..65a37860a2778d 100644 --- a/Tools/cpuinfo/cpuid_features_gen.py +++ b/Tools/cpuinfo/libcpuinfo/features/cpuid.py @@ -23,12 +23,12 @@ from __future__ import annotations -__all__ = ["generate_cpuid_features_enum"] +__all__ = ["make_cpuid_features_constants"] from typing import TYPE_CHECKING -from . import _util as util -from ._util import DOXYGEN_STYLE +import libcpuinfo.util as util +from libcpuinfo.util import DOXYGEN_STYLE if TYPE_CHECKING: from typing import Final @@ -113,7 +113,7 @@ def get_constant_name( ) -def generate_cpuid_features_enum() -> str: +def make_cpuid_features_constants() -> str: """Used by :file:`Include/internal/pycore_cpuinfo_cpuid_features.h`.""" writer = util.CWriter() writer.comment("Constants for CPUID features", style=DOXYGEN_STYLE) diff --git a/Tools/cpuinfo/xsave_features_gen.py b/Tools/cpuinfo/libcpuinfo/features/xsave.py similarity index 88% rename from Tools/cpuinfo/xsave_features_gen.py rename to Tools/cpuinfo/libcpuinfo/features/xsave.py index 3d820759ce9f03..474162dfc4463b 100644 --- a/Tools/cpuinfo/xsave_features_gen.py +++ b/Tools/cpuinfo/libcpuinfo/features/xsave.py @@ -8,12 +8,12 @@ from __future__ import annotations -__all__ = ["generate_xsave_features_enum"] +__all__ = ["make_xsave_features_constants"] from typing import TYPE_CHECKING -from . import _util as util -from ._util import DOXYGEN_STYLE +import libcpuinfo.util as util +from libcpuinfo.util import DOXYGEN_STYLE if TYPE_CHECKING: from typing import Final @@ -39,7 +39,7 @@ def get_constant_name(feature: Feature) -> str: ) -def generate_xsave_features_enum() -> str: +def make_xsave_features_constants() -> str: """Used by :file:`Include/internal/pycore_cpuinfo_xsave_features.h`.""" writer = util.CWriter() writer.comment("Constants for XSAVE components", style=DOXYGEN_STYLE) diff --git a/Tools/cpuinfo/_util.py b/Tools/cpuinfo/libcpuinfo/util.py similarity index 100% rename from Tools/cpuinfo/_util.py rename to Tools/cpuinfo/libcpuinfo/util.py From 87039dc140e8158d3a7024eaff82bcd05db7e407 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 8 Jul 2025 21:26:22 +0200 Subject: [PATCH 49/78] suppress compilation warnings --- Python/cpuinfo.c | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 0a7634ee33198d..8f2ad5add1e753 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -110,6 +110,15 @@ # define SHOULD_PARSE_CPUID_L7S1 #endif +#if defined(SHOULD_PARSE_CPUID_L7S1) && !defined(SHOULD_PARSE_CPUID_L7) +#error "SHOULD_PARSE_CPUID_L7S1 requires SHOULD_PARSE_CPUID_L7" +#endif +#if defined(SHOULD_PARSE_CPUID_L7S0) && !defined(SHOULD_PARSE_CPUID_L7) +#error "SHOULD_PARSE_CPUID_L7S0 requires SHOULD_PARSE_CPUID_L7" +#endif + +#undef SHOULD_PARSE_CPUID_L1 + /* * Call __cpuid_count() or equivalent and get * its EAX, EBX, ECX and EDX output registers. @@ -133,7 +142,8 @@ get_cpuid_info(uint32_t level /* input eax */, #endif } -static uint64_t +#if defined(HAS_XGETBV_SUPPORT) && defined(SHOULD_PARSE_CPUID_L1) +static uint64_t /* should only be used after calling cpuid(1, 0, ...) */ get_xgetbv(uint32_t index) { assert(index == 0); // only XCR0 is supported for now @@ -148,6 +158,7 @@ get_xgetbv(uint32_t index) return 0; #endif } +#endif /* Highest Function Parameter and Manufacturer ID (LEAF=0, SUBLEAF=0). */ static uint32_t @@ -159,7 +170,8 @@ detect_cpuid_maxleaf(void) } /* Processor Info and Feature Bits (LEAF=1, SUBLEAF=0). */ -static void +#ifdef SHOULD_PARSE_CPUID_L1 +static void /* should only be used after calling cpuid(1, 0, ...) */ detect_cpuid_features(_Py_cpuid_features *flags, uint32_t ecx, uint32_t edx) { assert(flags->ready == 0); @@ -203,9 +215,11 @@ detect_cpuid_features(_Py_cpuid_features *flags, uint32_t ecx, uint32_t edx) flags->osxsave = CPUID_CHECK_REG(ecx, ECX_L1_OSXSAVE); #endif } +#endif /* Extended Feature Bits (LEAF=7, SUBLEAF=0). */ -static void +#ifdef SHOULD_PARSE_CPUID_L7S0 +static void /* should only be used after calling cpuid(7, 0, ...) */ detect_cpuid_extended_features_L7S0(_Py_cpuid_features *flags, uint32_t ebx, uint32_t ecx, uint32_t edx) { @@ -278,9 +292,11 @@ detect_cpuid_extended_features_L7S0(_Py_cpuid_features *flags, #endif #endif // SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD } +#endif /* Extended Feature Bits (LEAF=7, SUBLEAF=1). */ -static void +#ifdef SHOULD_PARSE_CPUID_L7S1 +static void /* should only be used after calling cpuid(7, 1, ...) */ detect_cpuid_extended_features_L7S1(_Py_cpuid_features *flags, uint32_t eax, uint32_t ebx, @@ -311,23 +327,24 @@ detect_cpuid_extended_features_L7S1(_Py_cpuid_features *flags, #endif #endif // SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD } +#endif -static void +#ifdef SHOULD_PARSE_CPUID_L1 +static void /* should only be used after calling cpuid(1, 0, ...) */ detect_cpuid_xsave_state(_Py_cpuid_features *flags) { assert(flags->ready == 0); assert(flags->maxleaf >= 1); (void)flags; // Keep the ordering and newlines as they are declared in the structure. -#ifdef HAS_XGETBV_SUPPORT uint64_t xcr0 = flags->osxsave ? get_xgetbv(0) : 0; flags->xcr0_sse = XSAVE_CHECK_REG(xcr0, XCR0_SSE); flags->xcr0_avx = XSAVE_CHECK_REG(xcr0, XCR0_AVX); flags->xcr0_avx512_opmask = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_OPMASK); flags->xcr0_avx512_zmm_hi256 = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_ZMM_HI256); flags->xcr0_avx512_hi16_zmm = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_HI16_ZMM); -#endif } +#endif static void cpuid_features_finalize(_Py_cpuid_features *flags) @@ -493,9 +510,7 @@ cpuid_detect_l1_features(_Py_cpuid_features *flags) uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; get_cpuid_info(1, 0, &eax, &ebx, &ecx, &edx); detect_cpuid_features(flags, ecx, edx); - if (flags->osxsave) { - detect_cpuid_xsave_state(flags); - } + detect_cpuid_xsave_state(flags); } } #else @@ -551,9 +566,6 @@ _Py_cpuid_detect_features(_Py_cpuid_features *flags) return; } _Py_cpuid_disable_features(flags); -#ifndef HAS_CPUID_SUPPORT - flags->ready = 1; -#else flags->maxleaf = detect_cpuid_maxleaf(); cpuid_detect_l1_features(flags); cpuid_detect_l7_features(flags); @@ -561,5 +573,4 @@ _Py_cpuid_detect_features(_Py_cpuid_features *flags) if (!_Py_cpuid_check_features(flags)) { _Py_cpuid_disable_features(flags); } -#endif // !HAS_CPUID_SUPPORT } From 8a4b1205b5160ecb80b5becdcf5b22cc96fa1617 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 8 Jul 2025 21:26:27 +0200 Subject: [PATCH 50/78] add linting --- Tools/cpuinfo/.ruff.toml | 16 ++++++++++++++++ Tools/cpuinfo/mypy.ini | 9 +++++++++ 2 files changed, 25 insertions(+) create mode 100644 Tools/cpuinfo/.ruff.toml create mode 100644 Tools/cpuinfo/mypy.ini diff --git a/Tools/cpuinfo/.ruff.toml b/Tools/cpuinfo/.ruff.toml new file mode 100644 index 00000000000000..e49d04c2d4e863 --- /dev/null +++ b/Tools/cpuinfo/.ruff.toml @@ -0,0 +1,16 @@ +# Python 3.12 is required for 'type' statements +target-version = "py312" +line-length = 79 + +[format] +skip-magic-trailing-comma = false + +[lint] +select = [ + "I", # isort + "F841", # unused variable + "RUF100", # Ban unused `# noqa` comments + "PGH004", # Ban blanket `# noqa` comments (only ignore specific error codes) +] + + diff --git a/Tools/cpuinfo/mypy.ini b/Tools/cpuinfo/mypy.ini new file mode 100644 index 00000000000000..914ca082b72189 --- /dev/null +++ b/Tools/cpuinfo/mypy.ini @@ -0,0 +1,9 @@ +[mypy] +files = Tools/cpuinfo/ +pretty = True + +python_version = 3.12 +strict = True +extra_checks = True +enable_error_code = ignore-without-code,redundant-expr,truthy-bool +warn_unreachable = True From 8603e1485b177fa241680e22564651dad8b3a4fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 8 Jul 2025 21:28:40 +0200 Subject: [PATCH 51/78] typo --- Python/cpuinfo.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 8f2ad5add1e753..44190dc9f30352 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -117,8 +117,6 @@ #error "SHOULD_PARSE_CPUID_L7S0 requires SHOULD_PARSE_CPUID_L7" #endif -#undef SHOULD_PARSE_CPUID_L1 - /* * Call __cpuid_count() or equivalent and get * its EAX, EBX, ECX and EDX output registers. From d6213a5f2d115ebc89e64651822fe02b76bfd902 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 8 Jul 2025 21:28:48 +0200 Subject: [PATCH 52/78] typo --- Python/cpuinfo.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 44190dc9f30352..05f116cab4a97f 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -110,12 +110,12 @@ # define SHOULD_PARSE_CPUID_L7S1 #endif -#if defined(SHOULD_PARSE_CPUID_L7S1) && !defined(SHOULD_PARSE_CPUID_L7) -#error "SHOULD_PARSE_CPUID_L7S1 requires SHOULD_PARSE_CPUID_L7" -#endif #if defined(SHOULD_PARSE_CPUID_L7S0) && !defined(SHOULD_PARSE_CPUID_L7) #error "SHOULD_PARSE_CPUID_L7S0 requires SHOULD_PARSE_CPUID_L7" #endif +#if defined(SHOULD_PARSE_CPUID_L7S1) && !defined(SHOULD_PARSE_CPUID_L7) +#error "SHOULD_PARSE_CPUID_L7S1 requires SHOULD_PARSE_CPUID_L7" +#endif /* * Call __cpuid_count() or equivalent and get From 79d5b3453f39ced2fdacf83fee9366fbfacb5bb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 13 Jul 2025 14:10:12 +0200 Subject: [PATCH 53/78] log more! --- Tools/build/check_extension_modules.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/build/check_extension_modules.py b/Tools/build/check_extension_modules.py index 9815bcfe27d995..cff29b4bfaab21 100644 --- a/Tools/build/check_extension_modules.py +++ b/Tools/build/check_extension_modules.py @@ -471,7 +471,7 @@ def main(): if args.debug: args.verbose = True logging.basicConfig( - level=logging.DEBUG if args.debug else logging.INFO, + level=logging.DEBUG, format="[%(levelname)s] %(message)s", ) From f69d74a0037ea71a3a30931081bf4b1615c82bb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 13 Jul 2025 14:12:11 +0200 Subject: [PATCH 54/78] skip CI --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c6171571857af6..15dc80b60cbbc2 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -635,7 +635,7 @@ jobs: - build-tsan - cross-build-linux - cifuzz - if: always() + if: false steps: - name: Check whether the needed jobs succeeded or failed From cb9065da58bed6b7ca95a39c48f6146ded510174 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 13 Jul 2025 14:16:32 +0200 Subject: [PATCH 55/78] more printf --- Python/cpuinfo.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 05f116cab4a97f..0db0eb52efe072 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -506,8 +506,12 @@ cpuid_detect_l1_features(_Py_cpuid_features *flags) assert(flags->ready == 0); if (flags->maxleaf >= 1) { uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; + printf("[L1::get_cpuid_info(1, 0)]\n"); get_cpuid_info(1, 0, &eax, &ebx, &ecx, &edx); + printf("RES: eax=%u, ebx=%u, ecx=%u, edx=%u\n", eax, ebx, ecx, edx); + printf("[L1::detect_cpuid_features]\n"); detect_cpuid_features(flags, ecx, edx); + printf("[L1::detect_cpuid_xsave_state]\n"); detect_cpuid_xsave_state(flags); } } @@ -522,7 +526,9 @@ cpuid_detect_l7s0_features(_Py_cpuid_features *flags) assert(flags->ready == 0); assert(flags->maxleaf >= 7); uint32_t _eax = 0, ebx = 0, ecx = 0, edx = 0; + printf("[L1::get_cpuid_info(7, 0)]\n"); get_cpuid_info(7, 0, &_eax, &ebx, &ecx, &edx); + printf("RES: eax=%u, ebx=%u, ecx=%u, edx=%u\n", _eax, ebx, ecx, edx); detect_cpuid_extended_features_L7S0(flags, ebx, ecx, edx); } #else @@ -536,7 +542,9 @@ cpuid_detect_l7s1_features(_Py_cpuid_features *flags) assert(flags->ready == 0); assert(flags->maxleaf >= 7); uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; + printf("[L1::get_cpuid_info(7, 1)]\n"); get_cpuid_info(7, 1, &eax, &ebx, &ecx, &edx); + printf("RES: eax=%u, ebx=%u, ecx=%u, edx=%u\n", eax, ebx, ecx, edx); detect_cpuid_extended_features_L7S1(flags, eax, ebx, ecx, edx); } #else @@ -563,12 +571,19 @@ _Py_cpuid_detect_features(_Py_cpuid_features *flags) if (flags->ready) { return; } + printf("[disable features]\n"); _Py_cpuid_disable_features(flags); + printf("[detect MAXLEAF]\n"); flags->maxleaf = detect_cpuid_maxleaf(); + printf("[L1, maxleaf=%d]\n", flags->maxleaf); cpuid_detect_l1_features(flags); + printf("[L7, maxleaf=%d]\n", flags->maxleaf); cpuid_detect_l7_features(flags); + printf("finalize\n"); cpuid_features_finalize(flags); if (!_Py_cpuid_check_features(flags)) { + printf("invalid check\n"); _Py_cpuid_disable_features(flags); } + printf("done\n"); } From 88df3b72790ae7f2e4a65a3c9ca7815de2402b29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 13 Jul 2025 14:18:04 +0200 Subject: [PATCH 56/78] faster ci --- .github/workflows/build.yml | 533 ------------------------------------ 1 file changed, 533 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 15dc80b60cbbc2..b5a8afed244a0f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -152,536 +152,3 @@ jobs: - name: Check for unsupported C global variables if: github.event_name == 'pull_request' # $GITHUB_EVENT_NAME run: make check-c-globals - - build-windows: - name: >- - Windows - ${{ fromJSON(matrix.free-threading) && '(free-threading)' || '' }} - needs: build-context - if: fromJSON(needs.build-context.outputs.run-windows-tests) - strategy: - fail-fast: false - matrix: - arch: - - x64 - - Win32 - - arm64 - free-threading: - - false - - true - exclude: - # Skip Win32 on free-threaded builds - - { arch: Win32, free-threading: true } - uses: ./.github/workflows/reusable-windows.yml - with: - arch: ${{ matrix.arch }} - free-threading: ${{ matrix.free-threading }} - - build-windows-msi: - name: >- # ${{ '' } is a hack to nest jobs under the same sidebar category - Windows MSI${{ '' }} - needs: build-context - if: fromJSON(needs.build-context.outputs.run-windows-msi) - strategy: - fail-fast: false - matrix: - arch: - - x86 - - x64 - - arm64 - uses: ./.github/workflows/reusable-windows-msi.yml - with: - arch: ${{ matrix.arch }} - - build-macos: - name: >- - macOS - ${{ fromJSON(matrix.free-threading) && '(free-threading)' || '' }} - needs: build-context - if: needs.build-context.outputs.run-tests == 'true' - strategy: - fail-fast: false - matrix: - # Cirrus and macos-14 are M1, macos-13 is default GHA Intel. - # macOS 13 only runs tests against the GIL-enabled CPython. - # Cirrus used for upstream, macos-14 for forks. - os: - - ghcr.io/cirruslabs/macos-runner:sonoma - - macos-14 - - macos-13 - is-fork: # only used for the exclusion trick - - ${{ github.repository_owner != 'python' }} - free-threading: - - false - - true - exclude: - - os: ghcr.io/cirruslabs/macos-runner:sonoma - is-fork: true - - os: macos-14 - is-fork: false - - os: macos-13 - free-threading: true - uses: ./.github/workflows/reusable-macos.yml - with: - config_hash: ${{ needs.build-context.outputs.config-hash }} - free-threading: ${{ matrix.free-threading }} - os: ${{ matrix.os }} - - build-ubuntu: - name: >- - Ubuntu - ${{ fromJSON(matrix.free-threading) && '(free-threading)' || '' }} - ${{ fromJSON(matrix.bolt) && '(bolt)' || '' }} - needs: build-context - if: needs.build-context.outputs.run-tests == 'true' - strategy: - fail-fast: false - matrix: - bolt: - - false - - true - free-threading: - - false - - true - os: - - ubuntu-24.04 - - ubuntu-24.04-arm - exclude: - # Do not test BOLT with free-threading, to conserve resources - - bolt: true - free-threading: true - # BOLT currently crashes during instrumentation on aarch64 - - os: ubuntu-24.04-arm - bolt: true - uses: ./.github/workflows/reusable-ubuntu.yml - with: - config_hash: ${{ needs.build-context.outputs.config-hash }} - bolt-optimizations: ${{ matrix.bolt }} - free-threading: ${{ matrix.free-threading }} - os: ${{ matrix.os }} - - build-ubuntu-ssltests: - name: 'Ubuntu SSL tests with OpenSSL' - runs-on: ${{ matrix.os }} - timeout-minutes: 60 - needs: build-context - if: needs.build-context.outputs.run-tests == 'true' - strategy: - fail-fast: false - matrix: - os: [ubuntu-24.04] - openssl_ver: [3.0.16, 3.1.8, 3.2.4, 3.3.3, 3.4.1] - # See Tools/ssl/make_ssl_data.py for notes on adding a new version - env: - OPENSSL_VER: ${{ matrix.openssl_ver }} - MULTISSL_DIR: ${{ github.workspace }}/multissl - OPENSSL_DIR: ${{ github.workspace }}/multissl/openssl/${{ matrix.openssl_ver }} - LD_LIBRARY_PATH: ${{ github.workspace }}/multissl/openssl/${{ matrix.openssl_ver }}/lib - steps: - - uses: actions/checkout@v4 - with: - persist-credentials: false - - name: Runner image version - run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" - - name: Restore config.cache - uses: actions/cache@v4 - with: - path: config.cache - key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ needs.build-context.outputs.config-hash }} - - name: Register gcc problem matcher - run: echo "::add-matcher::.github/problem-matchers/gcc.json" - - name: Install dependencies - run: sudo ./.github/workflows/posix-deps-apt.sh - - name: Configure OpenSSL env vars - run: | - echo "MULTISSL_DIR=${GITHUB_WORKSPACE}/multissl" >> "$GITHUB_ENV" - echo "OPENSSL_DIR=${GITHUB_WORKSPACE}/multissl/openssl/${OPENSSL_VER}" >> "$GITHUB_ENV" - echo "LD_LIBRARY_PATH=${GITHUB_WORKSPACE}/multissl/openssl/${OPENSSL_VER}/lib" >> "$GITHUB_ENV" - - name: 'Restore OpenSSL build' - id: cache-openssl - uses: actions/cache@v4 - with: - path: ./multissl/openssl/${{ env.OPENSSL_VER }} - key: ${{ matrix.os }}-multissl-openssl-${{ env.OPENSSL_VER }} - - name: Install OpenSSL - if: steps.cache-openssl.outputs.cache-hit != 'true' - run: python3 Tools/ssl/multissltests.py --steps=library --base-directory "$MULTISSL_DIR" --openssl "$OPENSSL_VER" --system Linux - - name: Add ccache to PATH - run: | - echo "PATH=/usr/lib/ccache:$PATH" >> "$GITHUB_ENV" - - name: Configure ccache action - uses: hendrikmuhs/ccache-action@v1.2 - with: - save: false - - name: Configure CPython - run: ./configure CFLAGS="-fdiagnostics-format=json" --config-cache --enable-slower-safety --with-pydebug --with-openssl="$OPENSSL_DIR" - - name: Build CPython - run: make -j4 - - name: Display build info - run: make pythoninfo - - name: SSL tests - run: ./python Lib/test/ssltests.py - - build-wasi: - name: 'WASI' - needs: build-context - if: needs.build-context.outputs.run-tests == 'true' - uses: ./.github/workflows/reusable-wasi.yml - with: - config_hash: ${{ needs.build-context.outputs.config-hash }} - - test-hypothesis: - name: "Hypothesis tests on Ubuntu" - runs-on: ubuntu-24.04 - timeout-minutes: 60 - needs: build-context - if: needs.build-context.outputs.run-tests == 'true' - env: - OPENSSL_VER: 3.0.16 - PYTHONSTRICTEXTENSIONBUILD: 1 - steps: - - uses: actions/checkout@v4 - with: - persist-credentials: false - - name: Register gcc problem matcher - run: echo "::add-matcher::.github/problem-matchers/gcc.json" - - name: Install dependencies - run: sudo ./.github/workflows/posix-deps-apt.sh - - name: Configure OpenSSL env vars - run: | - echo "MULTISSL_DIR=${GITHUB_WORKSPACE}/multissl" >> "$GITHUB_ENV" - echo "OPENSSL_DIR=${GITHUB_WORKSPACE}/multissl/openssl/${OPENSSL_VER}" >> "$GITHUB_ENV" - echo "LD_LIBRARY_PATH=${GITHUB_WORKSPACE}/multissl/openssl/${OPENSSL_VER}/lib" >> "$GITHUB_ENV" - - name: 'Restore OpenSSL build' - id: cache-openssl - uses: actions/cache@v4 - with: - path: ./multissl/openssl/${{ env.OPENSSL_VER }} - key: ${{ runner.os }}-multissl-openssl-${{ env.OPENSSL_VER }} - - name: Install OpenSSL - if: steps.cache-openssl.outputs.cache-hit != 'true' - run: python3 Tools/ssl/multissltests.py --steps=library --base-directory "$MULTISSL_DIR" --openssl "$OPENSSL_VER" --system Linux - - name: Add ccache to PATH - run: | - echo "PATH=/usr/lib/ccache:$PATH" >> "$GITHUB_ENV" - - name: Configure ccache action - uses: hendrikmuhs/ccache-action@v1.2 - with: - save: false - - name: Setup directory envs for out-of-tree builds - run: | - echo "CPYTHON_RO_SRCDIR=$(realpath -m "${GITHUB_WORKSPACE}"/../cpython-ro-srcdir)" >> "$GITHUB_ENV" - echo "CPYTHON_BUILDDIR=$(realpath -m "${GITHUB_WORKSPACE}"/../cpython-builddir)" >> "$GITHUB_ENV" - - name: Create directories for read-only out-of-tree builds - run: mkdir -p "$CPYTHON_RO_SRCDIR" "$CPYTHON_BUILDDIR" - - name: Bind mount sources read-only - run: sudo mount --bind -o ro "$GITHUB_WORKSPACE" "$CPYTHON_RO_SRCDIR" - - name: Runner image version - run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" - - name: Restore config.cache - uses: actions/cache@v4 - with: - path: ${{ env.CPYTHON_BUILDDIR }}/config.cache - key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ needs.build-context.outputs.config-hash }} - - name: Configure CPython out-of-tree - working-directory: ${{ env.CPYTHON_BUILDDIR }} - run: | - ../cpython-ro-srcdir/configure \ - --config-cache \ - --with-pydebug \ - --enable-slower-safety \ - --with-openssl="$OPENSSL_DIR" - - name: Build CPython out-of-tree - working-directory: ${{ env.CPYTHON_BUILDDIR }} - run: make -j4 - - name: Display build info - working-directory: ${{ env.CPYTHON_BUILDDIR }} - run: make pythoninfo - - name: Remount sources writable for tests - # some tests write to srcdir, lack of pyc files slows down testing - run: sudo mount "$CPYTHON_RO_SRCDIR" -oremount,rw - - name: Setup directory envs for out-of-tree builds - run: | - echo "CPYTHON_BUILDDIR=$(realpath -m "${GITHUB_WORKSPACE}"/../cpython-builddir)" >> "$GITHUB_ENV" - - name: "Create hypothesis venv" - working-directory: ${{ env.CPYTHON_BUILDDIR }} - run: | - VENV_LOC=$(realpath -m .)/hypovenv - VENV_PYTHON=$VENV_LOC/bin/python - echo "HYPOVENV=${VENV_LOC}" >> "$GITHUB_ENV" - echo "VENV_PYTHON=${VENV_PYTHON}" >> "$GITHUB_ENV" - ./python -m venv "$VENV_LOC" && "$VENV_PYTHON" -m pip install -r "${GITHUB_WORKSPACE}/Tools/requirements-hypothesis.txt" - - name: 'Restore Hypothesis database' - id: cache-hypothesis-database - uses: actions/cache@v4 - with: - path: ${{ env.CPYTHON_BUILDDIR }}/.hypothesis/ - key: hypothesis-database-${{ github.head_ref || github.run_id }} - restore-keys: | - hypothesis-database- - - name: "Run tests" - working-directory: ${{ env.CPYTHON_BUILDDIR }} - run: | - # Most of the excluded tests are slow test suites with no property tests - # - # (GH-104097) test_sysconfig is skipped because it has tests that are - # failing when executed from inside a virtual environment. - "${VENV_PYTHON}" -m test \ - -W \ - --slowest \ - -j4 \ - --timeout 900 \ - -x test_asyncio \ - -x test_multiprocessing_fork \ - -x test_multiprocessing_forkserver \ - -x test_multiprocessing_spawn \ - -x test_concurrent_futures \ - -x test_socket \ - -x test_subprocess \ - -x test_signal \ - -x test_sysconfig - - uses: actions/upload-artifact@v4 - if: always() - with: - name: hypothesis-example-db - path: ${{ env.CPYTHON_BUILDDIR }}/.hypothesis/examples/ - - build-asan: - name: 'Address sanitizer' - runs-on: ${{ matrix.os }} - timeout-minutes: 60 - needs: build-context - if: needs.build-context.outputs.run-tests == 'true' - strategy: - fail-fast: false - matrix: - os: [ubuntu-24.04] - env: - OPENSSL_VER: 3.0.16 - PYTHONSTRICTEXTENSIONBUILD: 1 - ASAN_OPTIONS: detect_leaks=0:allocator_may_return_null=1:handle_segv=0 - steps: - - uses: actions/checkout@v4 - with: - persist-credentials: false - - name: Runner image version - run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" - - name: Restore config.cache - uses: actions/cache@v4 - with: - path: config.cache - key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ needs.build-context.outputs.config-hash }} - - name: Register gcc problem matcher - run: echo "::add-matcher::.github/problem-matchers/gcc.json" - - name: Install dependencies - run: sudo ./.github/workflows/posix-deps-apt.sh - - name: Set up GCC-10 for ASAN - uses: egor-tensin/setup-gcc@v1 - with: - version: 10 - - name: Configure OpenSSL env vars - run: | - echo "MULTISSL_DIR=${GITHUB_WORKSPACE}/multissl" >> "$GITHUB_ENV" - echo "OPENSSL_DIR=${GITHUB_WORKSPACE}/multissl/openssl/${OPENSSL_VER}" >> "$GITHUB_ENV" - echo "LD_LIBRARY_PATH=${GITHUB_WORKSPACE}/multissl/openssl/${OPENSSL_VER}/lib" >> "$GITHUB_ENV" - - name: 'Restore OpenSSL build' - id: cache-openssl - uses: actions/cache@v4 - with: - path: ./multissl/openssl/${{ env.OPENSSL_VER }} - key: ${{ matrix.os }}-multissl-openssl-${{ env.OPENSSL_VER }} - - name: Install OpenSSL - if: steps.cache-openssl.outputs.cache-hit != 'true' - run: python3 Tools/ssl/multissltests.py --steps=library --base-directory "$MULTISSL_DIR" --openssl "$OPENSSL_VER" --system Linux - - name: Add ccache to PATH - run: | - echo "PATH=/usr/lib/ccache:$PATH" >> "$GITHUB_ENV" - - name: Configure ccache action - uses: hendrikmuhs/ccache-action@v1.2 - with: - save: ${{ github.event_name == 'push' }} - max-size: "200M" - - name: Configure CPython - run: ./configure --config-cache --with-address-sanitizer --without-pymalloc - - name: Build CPython - run: make -j4 - - name: Display build info - run: make pythoninfo - - name: Tests - run: xvfb-run make ci - - build-tsan: - name: >- - Thread sanitizer - ${{ fromJSON(matrix.free-threading) && '(free-threading)' || '' }} - needs: build-context - if: needs.build-context.outputs.run-tests == 'true' - strategy: - fail-fast: false - matrix: - free-threading: - - false - - true - uses: ./.github/workflows/reusable-tsan.yml - with: - config_hash: ${{ needs.build-context.outputs.config-hash }} - free-threading: ${{ matrix.free-threading }} - - build-ubsan: - name: Undefined behavior sanitizer - needs: build-context - if: needs.build-context.outputs.run-tests == 'true' - uses: ./.github/workflows/reusable-ubsan.yml - with: - config_hash: ${{ needs.build-context.outputs.config-hash }} - - cross-build-linux: - name: Cross build Linux - runs-on: ubuntu-latest - timeout-minutes: 60 - needs: build-context - if: needs.build-context.outputs.run-tests == 'true' - steps: - - uses: actions/checkout@v4 - with: - persist-credentials: false - - name: Runner image version - run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" - - name: Restore config.cache - uses: actions/cache@v4 - with: - path: config.cache - key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ needs.build-context.outputs.config-hash }} - - name: Register gcc problem matcher - run: echo "::add-matcher::.github/problem-matchers/gcc.json" - - name: Set build dir - run: - # an absolute path outside of the working directoy - echo "BUILD_DIR=$(realpath ${{ github.workspace }}/../build)" >> "$GITHUB_ENV" - - name: Install dependencies - run: sudo ./.github/workflows/posix-deps-apt.sh - - name: Configure host build - run: ./configure --prefix="$BUILD_DIR/host-python" - - name: Install host Python - run: make -j8 install - - name: Run test subset with host build - run: | - "$BUILD_DIR/host-python/bin/python3" -m test test_sysconfig test_site test_embed - - name: Configure cross build - run: ./configure --prefix="$BUILD_DIR/cross-python" --with-build-python="$BUILD_DIR/host-python/bin/python3" - - name: Install cross Python - run: make -j8 install - - name: Run test subset with host build - run: | - "$BUILD_DIR/cross-python/bin/python3" -m test test_sysconfig test_site test_embed - - # CIFuzz job based on https://google.github.io/oss-fuzz/getting-started/continuous-integration/ - cifuzz: - name: CIFuzz - runs-on: ubuntu-latest - timeout-minutes: 60 - needs: build-context - if: needs.build-context.outputs.run-ci-fuzz == 'true' - permissions: - security-events: write - strategy: - fail-fast: false - matrix: - sanitizer: [address, undefined, memory] - steps: - - name: Build fuzzers (${{ matrix.sanitizer }}) - id: build - uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master - with: - oss-fuzz-project-name: cpython3 - sanitizer: ${{ matrix.sanitizer }} - - name: Run fuzzers (${{ matrix.sanitizer }}) - uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master - with: - fuzz-seconds: 600 - oss-fuzz-project-name: cpython3 - output-sarif: true - sanitizer: ${{ matrix.sanitizer }} - - name: Upload crash - if: failure() && steps.build.outcome == 'success' - uses: actions/upload-artifact@v4 - with: - name: ${{ matrix.sanitizer }}-artifacts - path: ./out/artifacts - - name: Upload SARIF - if: always() && steps.build.outcome == 'success' - uses: github/codeql-action/upload-sarif@v3 - with: - sarif_file: cifuzz-sarif/results.sarif - checkout_path: cifuzz-sarif - - all-required-green: # This job does nothing and is only used for the branch protection - name: All required checks pass - runs-on: ubuntu-latest - timeout-minutes: 5 - needs: - - build-context # Transitive dependency, needed to access `run-tests` value - - check-docs - - check-autoconf-regen - - check-generated-files - - build-windows - - build-windows-msi - - build-macos - - build-ubuntu - - build-ubuntu-ssltests - - build-wasi - - test-hypothesis - - build-asan - - build-tsan - - cross-build-linux - - cifuzz - if: false - - steps: - - name: Check whether the needed jobs succeeded or failed - uses: re-actors/alls-green@05ac9388f0aebcb5727afa17fcccfecd6f8ec5fe - with: - allowed-failures: >- - build-windows-msi, - build-ubuntu-ssltests, - test-hypothesis, - cifuzz, - allowed-skips: >- - ${{ - !fromJSON(needs.build-context.outputs.run-docs) - && ' - check-docs, - ' - || '' - }} - ${{ - needs.build-context.outputs.run-tests != 'true' - && ' - check-autoconf-regen, - check-generated-files, - build-macos, - build-ubuntu, - build-ubuntu-ssltests, - build-wasi, - test-hypothesis, - build-asan, - build-tsan, - cross-build-linux, - ' - || '' - }} - ${{ - !fromJSON(needs.build-context.outputs.run-windows-tests) - && ' - build-windows, - ' - || '' - }} - ${{ - !fromJSON(needs.build-context.outputs.run-ci-fuzz) - && ' - cifuzz, - ' - || '' - }} - jobs: ${{ toJSON(needs) }} From db64ba5dc0c25719a1cb0df23f0e84320caead8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 13 Jul 2025 14:22:29 +0200 Subject: [PATCH 57/78] !! --- Tools/build/check_extension_modules.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Tools/build/check_extension_modules.py b/Tools/build/check_extension_modules.py index cff29b4bfaab21..feb33be717e512 100644 --- a/Tools/build/check_extension_modules.py +++ b/Tools/build/check_extension_modules.py @@ -470,8 +470,9 @@ def main(): args = parser.parse_args() if args.debug: args.verbose = True + args.verbose = args.debug = True logging.basicConfig( - level=logging.DEBUG, + level=logging.DEBUG if args.debug else logging.INFO, format="[%(levelname)s] %(message)s", ) From 04012b4fb6f338b470801ca72c3ea1f092d1b775 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 13 Jul 2025 14:28:53 +0200 Subject: [PATCH 58/78] !! --- Python/cpuinfo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 0db0eb52efe072..3e246c3df5de83 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -335,7 +335,7 @@ detect_cpuid_xsave_state(_Py_cpuid_features *flags) assert(flags->maxleaf >= 1); (void)flags; // Keep the ordering and newlines as they are declared in the structure. - uint64_t xcr0 = flags->osxsave ? get_xgetbv(0) : 0; + uint64_t xcr0 = flags->xsave && flags->osxsave ? get_xgetbv(0) : 0; flags->xcr0_sse = XSAVE_CHECK_REG(xcr0, XCR0_SSE); flags->xcr0_avx = XSAVE_CHECK_REG(xcr0, XCR0_AVX); flags->xcr0_avx512_opmask = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_OPMASK); From 144d9ef2c54de7451e6283215ef4810b0f5eb226 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Jul 2025 10:01:42 +0200 Subject: [PATCH 59/78] does it work now..? --- Python/cpuinfo.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 3e246c3df5de83..322714abaafd07 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -9,14 +9,17 @@ // In the future, we should carefully enable support for ARM NEON and POWER // as well as AMD. #if defined(__x86_64__) && defined(__GNUC__) -# include // __cpuid_count() +# include // __cpuid_count() # define HAS_CPUID_SUPPORT +# if defined(__clang__) +# include // _xgetbv() +# endif # define HAS_XGETBV_SUPPORT #elif defined(_M_X64) -# include // _xgetbv() -# define HAS_XGETBV_SUPPORT -# include // __cpuidex() +# include // __cpuidex() # define HAS_CPUID_SUPPORT +# include // _xgetbv() +# define HAS_XGETBV_SUPPORT #else # undef HAS_CPUID_SUPPORT # undef HAS_XGETBV_SUPPORT @@ -146,9 +149,18 @@ get_xgetbv(uint32_t index) { assert(index == 0); // only XCR0 is supported for now #if defined(HAS_CPUID_SUPPORT) && defined(__x86_64__) && defined(__GNUC__) +# if defined(__clang__) + return (uint64_t)_xgetbv(index); +# else uint32_t eax = 0, edx = 0; - __asm__ __volatile__("xgetbv" : "=a" (eax), "=d" (edx) : "c" (index)); + __asm__ volatile( + /* raw opcode for xgetbv for compatibility with older toolchains */ + ".byte 0x0f, 0x01, 0xd0" + : "=a" (eax), "=d" (edx) + : "c" (index) + ); return ((uint64_t)edx << 32) | eax; +# endif #elif defined(HAS_CPUID_SUPPORT) && defined(_M_X64) return (uint64_t)_xgetbv(index); #else From b364ad29cc93dedeffde8e405f8d0988a3b0a6dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Jul 2025 10:09:22 +0200 Subject: [PATCH 60/78] does it work now..? --- Python/cpuinfo.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 322714abaafd07..023f4a2193ae4b 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -150,7 +150,11 @@ get_xgetbv(uint32_t index) assert(index == 0); // only XCR0 is supported for now #if defined(HAS_CPUID_SUPPORT) && defined(__x86_64__) && defined(__GNUC__) # if defined(__clang__) +# if defined(_MSC_VER) && _Py__has_builtin(__builtin_ia32_xgetbv) return (uint64_t)_xgetbv(index); +# else + return 0; +# endif # else uint32_t eax = 0, edx = 0; __asm__ volatile( From 0791e890e7e23ef13c6cd1902ab2ce58e6eb17a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Jul 2025 10:23:52 +0200 Subject: [PATCH 61/78] remove xgetbv support? --- Python/cpuinfo.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 023f4a2193ae4b..67cf7d3e300e47 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -25,6 +25,8 @@ # undef HAS_XGETBV_SUPPORT #endif +#undef HAS_XGETBV_SUPPORT + // Below, we declare macros for guarding the detection of SSE, AVX/AVX2 // and AVX-512 instructions. If the compiler does not even recognize the // corresponding flags or if we are not on an 64-bit platform we do not From 48b2cb27f33edf40d9c752ca53e9f4167876c02a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Jul 2025 10:24:11 +0200 Subject: [PATCH 62/78] remove xgetbv support? --- Python/cpuinfo.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 67cf7d3e300e47..66867a19768384 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -353,12 +353,14 @@ detect_cpuid_xsave_state(_Py_cpuid_features *flags) assert(flags->maxleaf >= 1); (void)flags; // Keep the ordering and newlines as they are declared in the structure. +#ifdef HAS_XGETBV_SUPPORT uint64_t xcr0 = flags->xsave && flags->osxsave ? get_xgetbv(0) : 0; flags->xcr0_sse = XSAVE_CHECK_REG(xcr0, XCR0_SSE); flags->xcr0_avx = XSAVE_CHECK_REG(xcr0, XCR0_AVX); flags->xcr0_avx512_opmask = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_OPMASK); flags->xcr0_avx512_zmm_hi256 = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_ZMM_HI256); flags->xcr0_avx512_hi16_zmm = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_HI16_ZMM); +#endif } #endif From 34f1337dc6fb0334d305c63802d7226e53c6ac27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Jul 2025 12:30:32 +0200 Subject: [PATCH 63/78] huh? --- Python/cpuinfo.c | 52 +++++++------------------- Tools/build/check_extension_modules.py | 1 - 2 files changed, 13 insertions(+), 40 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 66867a19768384..d3e99e8f4ecfee 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -7,7 +7,7 @@ // For now, we only try to enable SIMD instructions for x86-64 Intel CPUs. // In the future, we should carefully enable support for ARM NEON and POWER -// as well as AMD. +// as well as AMD. See https://sourceforge.net/p/predef/wiki/Architectures. #if defined(__x86_64__) && defined(__GNUC__) # include // __cpuid_count() # define HAS_CPUID_SUPPORT @@ -15,7 +15,7 @@ # include // _xgetbv() # endif # define HAS_XGETBV_SUPPORT -#elif defined(_M_X64) +#elif defined(_M_X64) || defined(__amd64__) || defined(_M_AMD64) # include // __cpuidex() # define HAS_CPUID_SUPPORT # include // _xgetbv() @@ -25,8 +25,6 @@ # undef HAS_XGETBV_SUPPORT #endif -#undef HAS_XGETBV_SUPPORT - // Below, we declare macros for guarding the detection of SSE, AVX/AVX2 // and AVX-512 instructions. If the compiler does not even recognize the // corresponding flags or if we are not on an 64-bit platform we do not @@ -115,6 +113,8 @@ # define SHOULD_PARSE_CPUID_L7S1 #endif +#undef SHOULD_PARSE_CPUID_L7S1 + #if defined(SHOULD_PARSE_CPUID_L7S0) && !defined(SHOULD_PARSE_CPUID_L7) #error "SHOULD_PARSE_CPUID_L7S0 requires SHOULD_PARSE_CPUID_L7" #endif @@ -150,14 +150,7 @@ static uint64_t /* should only be used after calling cpuid(1, 0, ...) */ get_xgetbv(uint32_t index) { assert(index == 0); // only XCR0 is supported for now -#if defined(HAS_CPUID_SUPPORT) && defined(__x86_64__) && defined(__GNUC__) -# if defined(__clang__) -# if defined(_MSC_VER) && _Py__has_builtin(__builtin_ia32_xgetbv) - return (uint64_t)_xgetbv(index); -# else - return 0; -# endif -# else +# if defined(HAS_CPUID_SUPPORT) && defined(__x86_64__) && defined(__GNUC__) uint32_t eax = 0, edx = 0; __asm__ volatile( /* raw opcode for xgetbv for compatibility with older toolchains */ @@ -166,13 +159,12 @@ get_xgetbv(uint32_t index) : "c" (index) ); return ((uint64_t)edx << 32) | eax; -# endif -#elif defined(HAS_CPUID_SUPPORT) && defined(_M_X64) +# elif defined(HAS_CPUID_SUPPORT) && defined(_M_X64) return (uint64_t)_xgetbv(index); -#else +# else (void)index; return 0; -#endif +# endif } #endif @@ -314,14 +306,11 @@ detect_cpuid_extended_features_L7S0(_Py_cpuid_features *flags, #ifdef SHOULD_PARSE_CPUID_L7S1 static void /* should only be used after calling cpuid(7, 1, ...) */ detect_cpuid_extended_features_L7S1(_Py_cpuid_features *flags, - uint32_t eax, - uint32_t ebx, - uint32_t ecx, - uint32_t edx) + uint32_t eax, uint32_t edx) { assert(flags->ready == 0); assert(flags->maxleaf >= 7); - (void)flags, (void)eax, (void)ebx, (void)ecx, (void)edx; + (void)flags, (void)eax, (void)edx; // Keep the ordering and newlines as they are declared in the structure. #ifdef SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD #ifdef _Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS @@ -526,12 +515,8 @@ cpuid_detect_l1_features(_Py_cpuid_features *flags) assert(flags->ready == 0); if (flags->maxleaf >= 1) { uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; - printf("[L1::get_cpuid_info(1, 0)]\n"); get_cpuid_info(1, 0, &eax, &ebx, &ecx, &edx); - printf("RES: eax=%u, ebx=%u, ecx=%u, edx=%u\n", eax, ebx, ecx, edx); - printf("[L1::detect_cpuid_features]\n"); detect_cpuid_features(flags, ecx, edx); - printf("[L1::detect_cpuid_xsave_state]\n"); detect_cpuid_xsave_state(flags); } } @@ -546,9 +531,7 @@ cpuid_detect_l7s0_features(_Py_cpuid_features *flags) assert(flags->ready == 0); assert(flags->maxleaf >= 7); uint32_t _eax = 0, ebx = 0, ecx = 0, edx = 0; - printf("[L1::get_cpuid_info(7, 0)]\n"); get_cpuid_info(7, 0, &_eax, &ebx, &ecx, &edx); - printf("RES: eax=%u, ebx=%u, ecx=%u, edx=%u\n", _eax, ebx, ecx, edx); detect_cpuid_extended_features_L7S0(flags, ebx, ecx, edx); } #else @@ -561,11 +544,9 @@ cpuid_detect_l7s1_features(_Py_cpuid_features *flags) { assert(flags->ready == 0); assert(flags->maxleaf >= 7); - uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; - printf("[L1::get_cpuid_info(7, 1)]\n"); - get_cpuid_info(7, 1, &eax, &ebx, &ecx, &edx); - printf("RES: eax=%u, ebx=%u, ecx=%u, edx=%u\n", eax, ebx, ecx, edx); - detect_cpuid_extended_features_L7S1(flags, eax, ebx, ecx, edx); + uint32_t eax = 0, _ebx = 0, _ecx = 0, edx = 0; + get_cpuid_info(7, 1, &eax, &_ebx, &_ecx, &edx); + detect_cpuid_extended_features_L7S1(flags, eax, edx); } #else #define cpuid_detect_l7s1_features(FLAGS) @@ -591,19 +572,12 @@ _Py_cpuid_detect_features(_Py_cpuid_features *flags) if (flags->ready) { return; } - printf("[disable features]\n"); _Py_cpuid_disable_features(flags); - printf("[detect MAXLEAF]\n"); flags->maxleaf = detect_cpuid_maxleaf(); - printf("[L1, maxleaf=%d]\n", flags->maxleaf); cpuid_detect_l1_features(flags); - printf("[L7, maxleaf=%d]\n", flags->maxleaf); cpuid_detect_l7_features(flags); - printf("finalize\n"); cpuid_features_finalize(flags); if (!_Py_cpuid_check_features(flags)) { - printf("invalid check\n"); _Py_cpuid_disable_features(flags); } - printf("done\n"); } diff --git a/Tools/build/check_extension_modules.py b/Tools/build/check_extension_modules.py index feb33be717e512..9815bcfe27d995 100644 --- a/Tools/build/check_extension_modules.py +++ b/Tools/build/check_extension_modules.py @@ -470,7 +470,6 @@ def main(): args = parser.parse_args() if args.debug: args.verbose = True - args.verbose = args.debug = True logging.basicConfig( level=logging.DEBUG if args.debug else logging.INFO, format="[%(levelname)s] %(message)s", From 91effb4f6ef809e6065ca98d47d212fd6e0fb9c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Jul 2025 12:35:47 +0200 Subject: [PATCH 64/78] only parse maxleaf --- Python/cpuinfo.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index d3e99e8f4ecfee..3a9e21e135f21d 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -113,6 +113,9 @@ # define SHOULD_PARSE_CPUID_L7S1 #endif +#undef SHOULD_PARSE_CPUID_L1 +#undef SHOULD_PARSE_CPUID_L7 +#undef SHOULD_PARSE_CPUID_L7S0 #undef SHOULD_PARSE_CPUID_L7S1 #if defined(SHOULD_PARSE_CPUID_L7S0) && !defined(SHOULD_PARSE_CPUID_L7) From 6dc532d5c78cf45b5ddd8162f65634a6c0e49a18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Jul 2025 12:45:35 +0200 Subject: [PATCH 65/78] use different variables! --- Python/cpuinfo.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 3a9e21e135f21d..0795f3f845e3bd 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -113,11 +113,6 @@ # define SHOULD_PARSE_CPUID_L7S1 #endif -#undef SHOULD_PARSE_CPUID_L1 -#undef SHOULD_PARSE_CPUID_L7 -#undef SHOULD_PARSE_CPUID_L7S0 -#undef SHOULD_PARSE_CPUID_L7S1 - #if defined(SHOULD_PARSE_CPUID_L7S0) && !defined(SHOULD_PARSE_CPUID_L7) #error "SHOULD_PARSE_CPUID_L7S0 requires SHOULD_PARSE_CPUID_L7" #endif @@ -138,7 +133,9 @@ get_cpuid_info(uint32_t level /* input eax */, { *eax = *ebx = *ecx = *edx = 0; // ensure the output to be initialized #if defined(HAS_CPUID_SUPPORT) && defined(__x86_64__) && defined(__GNUC__) - __cpuid_count(level, count, *eax, *ebx, *ecx, *edx); + uint32_t r_eax = 0, r_ebx = 0, r_ecx = 0, r_edx = 0; + __cpuid_count(level, count, r_eax, r_ebx, r_ecx, r_edx); + *eax = r_eax, *ebx = r_ebx, *ecx = r_ecx, *edx = r_edx; #elif defined(HAS_CPUID_SUPPORT) && defined(_M_X64) uint32_t info[4] = {0}; __cpuidex(info, level, count); From 6d5dd0b142c86c98ff5ef3e23cdf9502a89119c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Jul 2025 12:50:34 +0200 Subject: [PATCH 66/78] disable everything! --- Python/cpuinfo.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 0795f3f845e3bd..5757a27ba01085 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -25,6 +25,9 @@ # undef HAS_XGETBV_SUPPORT #endif +#undef HAS_CPUID_SUPPORT +#undef HAS_XGETBV_SUPPORT + // Below, we declare macros for guarding the detection of SSE, AVX/AVX2 // and AVX-512 instructions. If the compiler does not even recognize the // corresponding flags or if we are not on an 64-bit platform we do not From 8e5b2f0d684c5a954bd44a3fb065a15bf4d20342 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Jul 2025 13:16:47 +0200 Subject: [PATCH 67/78] revert CI --- .github/workflows/build.yml | 611 ++++++++++++++++++++++++++++++++++++ 1 file changed, 611 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b5a8afed244a0f..05f20e12f4653d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -152,3 +152,614 @@ jobs: - name: Check for unsupported C global variables if: github.event_name == 'pull_request' # $GITHUB_EVENT_NAME run: make check-c-globals + + build-windows: + name: >- + Windows + ${{ fromJSON(matrix.free-threading) && '(free-threading)' || '' }} + needs: build-context + if: fromJSON(needs.build-context.outputs.run-windows-tests) + strategy: + fail-fast: false + matrix: + arch: + - x64 + - Win32 + - arm64 + free-threading: + - false + - true + exclude: + # Skip Win32 on free-threaded builds + - { arch: Win32, free-threading: true } + uses: ./.github/workflows/reusable-windows.yml + with: + arch: ${{ matrix.arch }} + free-threading: ${{ matrix.free-threading }} + + build-windows-msi: + name: >- # ${{ '' } is a hack to nest jobs under the same sidebar category + Windows MSI${{ '' }} + needs: build-context + if: fromJSON(needs.build-context.outputs.run-windows-msi) + strategy: + fail-fast: false + matrix: + arch: + - x86 + - x64 + - arm64 + uses: ./.github/workflows/reusable-windows-msi.yml + with: + arch: ${{ matrix.arch }} + + build-macos: + name: >- + macOS + ${{ fromJSON(matrix.free-threading) && '(free-threading)' || '' }} + needs: build-context + if: needs.build-context.outputs.run-tests == 'true' + strategy: + fail-fast: false + matrix: + # Cirrus and macos-14 are M1, macos-13 is default GHA Intel. + # macOS 13 only runs tests against the GIL-enabled CPython. + # Cirrus used for upstream, macos-14 for forks. + os: + - ghcr.io/cirruslabs/macos-runner:sonoma + - macos-14 + - macos-13 + is-fork: # only used for the exclusion trick + - ${{ github.repository_owner != 'python' }} + free-threading: + - false + - true + exclude: + - os: ghcr.io/cirruslabs/macos-runner:sonoma + is-fork: true + - os: macos-14 + is-fork: false + - os: macos-13 + free-threading: true + uses: ./.github/workflows/reusable-macos.yml + with: + config_hash: ${{ needs.build-context.outputs.config-hash }} + free-threading: ${{ matrix.free-threading }} + os: ${{ matrix.os }} + + build-ubuntu: + name: >- + Ubuntu + ${{ fromJSON(matrix.free-threading) && '(free-threading)' || '' }} + ${{ fromJSON(matrix.bolt) && '(bolt)' || '' }} + needs: build-context + if: needs.build-context.outputs.run-tests == 'true' + strategy: + fail-fast: false + matrix: + bolt: + - false + - true + free-threading: + - false + - true + os: + - ubuntu-24.04 + - ubuntu-24.04-arm + exclude: + # Do not test BOLT with free-threading, to conserve resources + - bolt: true + free-threading: true + # BOLT currently crashes during instrumentation on aarch64 + - os: ubuntu-24.04-arm + bolt: true + uses: ./.github/workflows/reusable-ubuntu.yml + with: + config_hash: ${{ needs.build-context.outputs.config-hash }} + bolt-optimizations: ${{ matrix.bolt }} + free-threading: ${{ matrix.free-threading }} + os: ${{ matrix.os }} + + build-ubuntu-ssltests-openssl: + name: 'Ubuntu SSL tests with OpenSSL' + runs-on: ${{ matrix.os }} + timeout-minutes: 60 + needs: build-context + if: needs.build-context.outputs.run-tests == 'true' + strategy: + fail-fast: false + matrix: + os: [ubuntu-24.04] + openssl_ver: [3.0.16, 3.1.8, 3.2.4, 3.3.3, 3.4.1] + # See Tools/ssl/make_ssl_data.py for notes on adding a new version + env: + OPENSSL_VER: ${{ matrix.openssl_ver }} + MULTISSL_DIR: ${{ github.workspace }}/multissl + OPENSSL_DIR: ${{ github.workspace }}/multissl/openssl/${{ matrix.openssl_ver }} + LD_LIBRARY_PATH: ${{ github.workspace }}/multissl/openssl/${{ matrix.openssl_ver }}/lib + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + - name: Runner image version + run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" + - name: Restore config.cache + uses: actions/cache@v4 + with: + path: config.cache + key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ needs.build-context.outputs.config-hash }} + - name: Register gcc problem matcher + run: echo "::add-matcher::.github/problem-matchers/gcc.json" + - name: Install dependencies + run: sudo ./.github/workflows/posix-deps-apt.sh + - name: Configure OpenSSL env vars + run: | + echo "MULTISSL_DIR=${GITHUB_WORKSPACE}/multissl" >> "$GITHUB_ENV" + echo "OPENSSL_DIR=${GITHUB_WORKSPACE}/multissl/openssl/${OPENSSL_VER}" >> "$GITHUB_ENV" + echo "LD_LIBRARY_PATH=${GITHUB_WORKSPACE}/multissl/openssl/${OPENSSL_VER}/lib" >> "$GITHUB_ENV" + - name: 'Restore OpenSSL build' + id: cache-openssl + uses: actions/cache@v4 + with: + path: ./multissl/openssl/${{ env.OPENSSL_VER }} + key: ${{ matrix.os }}-multissl-openssl-${{ env.OPENSSL_VER }} + - name: Install OpenSSL + if: steps.cache-openssl.outputs.cache-hit != 'true' + run: python3 Tools/ssl/multissltests.py --steps=library --base-directory "$MULTISSL_DIR" --openssl "$OPENSSL_VER" --system Linux + - name: Add ccache to PATH + run: | + echo "PATH=/usr/lib/ccache:$PATH" >> "$GITHUB_ENV" + - name: Configure ccache action + uses: hendrikmuhs/ccache-action@v1.2 + with: + save: false + - name: Configure CPython + run: ./configure CFLAGS="-fdiagnostics-format=json" --config-cache --enable-slower-safety --with-pydebug --with-openssl="$OPENSSL_DIR" + - name: Build CPython + run: make -j4 + - name: Display build info + run: make pythoninfo + - name: SSL tests + run: ./python Lib/test/ssltests.py + + build-ubuntu-ssltests-awslc: + name: 'Ubuntu SSL tests with AWS-LC' + runs-on: ${{ matrix.os }} + timeout-minutes: 60 + needs: build-context + if: needs.build-context.outputs.run-tests == 'true' + strategy: + fail-fast: false + matrix: + os: [ubuntu-24.04] + awslc_ver: [1.55.0] + env: + AWSLC_VER: ${{ matrix.awslc_ver}} + MULTISSL_DIR: ${{ github.workspace }}/multissl + OPENSSL_DIR: ${{ github.workspace }}/multissl/aws-lc/${{ matrix.awslc_ver }} + LD_LIBRARY_PATH: ${{ github.workspace }}/multissl/aws-lc/${{ matrix.awslc_ver }}/lib + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + - name: Runner image version + run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" + - name: Restore config.cache + uses: actions/cache@v4 + with: + path: config.cache + key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ needs.build-context.outputs.config-hash }} + - name: Register gcc problem matcher + run: echo "::add-matcher::.github/problem-matchers/gcc.json" + - name: Install dependencies + run: sudo ./.github/workflows/posix-deps-apt.sh + - name: Configure SSL lib env vars + run: | + echo "MULTISSL_DIR=${GITHUB_WORKSPACE}/multissl" >> "$GITHUB_ENV" + echo "OPENSSL_DIR=${GITHUB_WORKSPACE}/multissl/aws-lc/${AWSLC_VER}" >> "$GITHUB_ENV" + echo "LD_LIBRARY_PATH=${GITHUB_WORKSPACE}/multissl/aws-lc/${AWSLC_VER}/lib" >> "$GITHUB_ENV" + - name: 'Restore AWS-LC build' + id: cache-aws-lc + uses: actions/cache@v4 + with: + path: ./multissl/aws-lc/${{ matrix.awslc_ver }} + key: ${{ matrix.os }}-multissl-aws-lc-${{ matrix.awslc_ver }} + - name: Install AWS-LC + if: steps.cache-aws-lc.outputs.cache-hit != 'true' + run: | + python3 Tools/ssl/multissltests.py \ + --steps=library \ + --base-directory "$MULTISSL_DIR" \ + --awslc ${{ matrix.awslc_ver }} \ + --system Linux + - name: Add ccache to PATH + run: | + echo "PATH=/usr/lib/ccache:$PATH" >> "$GITHUB_ENV" + - name: Configure ccache action + uses: hendrikmuhs/ccache-action@v1.2 + with: + save: false + - name: Configure CPython + run: | + ./configure CFLAGS="-fdiagnostics-format=json" \ + --config-cache \ + --enable-slower-safety \ + --with-pydebug \ + --with-openssl="$OPENSSL_DIR" \ + --with-builtin-hashlib-hashes=blake2 \ + --with-ssl-default-suites=openssl + - name: Build CPython + run: make -j + - name: Display build info + run: make pythoninfo + - name: Verify python is linked to AWS-LC + run: ./python -c 'import ssl; print(ssl.OPENSSL_VERSION)' | grep AWS-LC + - name: SSL tests + run: ./python Lib/test/ssltests.py + + build-wasi: + name: 'WASI' + needs: build-context + if: needs.build-context.outputs.run-tests == 'true' + uses: ./.github/workflows/reusable-wasi.yml + with: + config_hash: ${{ needs.build-context.outputs.config-hash }} + + test-hypothesis: + name: "Hypothesis tests on Ubuntu" + runs-on: ubuntu-24.04 + timeout-minutes: 60 + needs: build-context + if: needs.build-context.outputs.run-tests == 'true' + env: + OPENSSL_VER: 3.0.16 + PYTHONSTRICTEXTENSIONBUILD: 1 + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + - name: Register gcc problem matcher + run: echo "::add-matcher::.github/problem-matchers/gcc.json" + - name: Install dependencies + run: sudo ./.github/workflows/posix-deps-apt.sh + - name: Configure OpenSSL env vars + run: | + echo "MULTISSL_DIR=${GITHUB_WORKSPACE}/multissl" >> "$GITHUB_ENV" + echo "OPENSSL_DIR=${GITHUB_WORKSPACE}/multissl/openssl/${OPENSSL_VER}" >> "$GITHUB_ENV" + echo "LD_LIBRARY_PATH=${GITHUB_WORKSPACE}/multissl/openssl/${OPENSSL_VER}/lib" >> "$GITHUB_ENV" + - name: 'Restore OpenSSL build' + id: cache-openssl + uses: actions/cache@v4 + with: + path: ./multissl/openssl/${{ env.OPENSSL_VER }} + key: ${{ runner.os }}-multissl-openssl-${{ env.OPENSSL_VER }} + - name: Install OpenSSL + if: steps.cache-openssl.outputs.cache-hit != 'true' + run: python3 Tools/ssl/multissltests.py --steps=library --base-directory "$MULTISSL_DIR" --openssl "$OPENSSL_VER" --system Linux + - name: Add ccache to PATH + run: | + echo "PATH=/usr/lib/ccache:$PATH" >> "$GITHUB_ENV" + - name: Configure ccache action + uses: hendrikmuhs/ccache-action@v1.2 + with: + save: false + - name: Setup directory envs for out-of-tree builds + run: | + echo "CPYTHON_RO_SRCDIR=$(realpath -m "${GITHUB_WORKSPACE}"/../cpython-ro-srcdir)" >> "$GITHUB_ENV" + echo "CPYTHON_BUILDDIR=$(realpath -m "${GITHUB_WORKSPACE}"/../cpython-builddir)" >> "$GITHUB_ENV" + - name: Create directories for read-only out-of-tree builds + run: mkdir -p "$CPYTHON_RO_SRCDIR" "$CPYTHON_BUILDDIR" + - name: Bind mount sources read-only + run: sudo mount --bind -o ro "$GITHUB_WORKSPACE" "$CPYTHON_RO_SRCDIR" + - name: Runner image version + run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" + - name: Restore config.cache + uses: actions/cache@v4 + with: + path: ${{ env.CPYTHON_BUILDDIR }}/config.cache + key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ needs.build-context.outputs.config-hash }} + - name: Configure CPython out-of-tree + working-directory: ${{ env.CPYTHON_BUILDDIR }} + run: | + ../cpython-ro-srcdir/configure \ + --config-cache \ + --with-pydebug \ + --enable-slower-safety \ + --with-openssl="$OPENSSL_DIR" + - name: Build CPython out-of-tree + working-directory: ${{ env.CPYTHON_BUILDDIR }} + run: make -j4 + - name: Display build info + working-directory: ${{ env.CPYTHON_BUILDDIR }} + run: make pythoninfo + - name: Remount sources writable for tests + # some tests write to srcdir, lack of pyc files slows down testing + run: sudo mount "$CPYTHON_RO_SRCDIR" -oremount,rw + - name: Setup directory envs for out-of-tree builds + run: | + echo "CPYTHON_BUILDDIR=$(realpath -m "${GITHUB_WORKSPACE}"/../cpython-builddir)" >> "$GITHUB_ENV" + - name: "Create hypothesis venv" + working-directory: ${{ env.CPYTHON_BUILDDIR }} + run: | + VENV_LOC=$(realpath -m .)/hypovenv + VENV_PYTHON=$VENV_LOC/bin/python + echo "HYPOVENV=${VENV_LOC}" >> "$GITHUB_ENV" + echo "VENV_PYTHON=${VENV_PYTHON}" >> "$GITHUB_ENV" + ./python -m venv "$VENV_LOC" && "$VENV_PYTHON" -m pip install -r "${GITHUB_WORKSPACE}/Tools/requirements-hypothesis.txt" + - name: 'Restore Hypothesis database' + id: cache-hypothesis-database + uses: actions/cache@v4 + with: + path: ${{ env.CPYTHON_BUILDDIR }}/.hypothesis/ + key: hypothesis-database-${{ github.head_ref || github.run_id }} + restore-keys: | + hypothesis-database- + - name: "Run tests" + working-directory: ${{ env.CPYTHON_BUILDDIR }} + run: | + # Most of the excluded tests are slow test suites with no property tests + # + # (GH-104097) test_sysconfig is skipped because it has tests that are + # failing when executed from inside a virtual environment. + "${VENV_PYTHON}" -m test \ + -W \ + --slowest \ + -j4 \ + --timeout 900 \ + -x test_asyncio \ + -x test_multiprocessing_fork \ + -x test_multiprocessing_forkserver \ + -x test_multiprocessing_spawn \ + -x test_concurrent_futures \ + -x test_socket \ + -x test_subprocess \ + -x test_signal \ + -x test_sysconfig + - uses: actions/upload-artifact@v4 + if: always() + with: + name: hypothesis-example-db + path: ${{ env.CPYTHON_BUILDDIR }}/.hypothesis/examples/ + + build-asan: + name: 'Address sanitizer' + runs-on: ${{ matrix.os }} + timeout-minutes: 60 + needs: build-context + if: needs.build-context.outputs.run-tests == 'true' + strategy: + fail-fast: false + matrix: + os: [ubuntu-24.04] + env: + OPENSSL_VER: 3.0.16 + PYTHONSTRICTEXTENSIONBUILD: 1 + ASAN_OPTIONS: detect_leaks=0:allocator_may_return_null=1:handle_segv=0 + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + - name: Runner image version + run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" + - name: Restore config.cache + uses: actions/cache@v4 + with: + path: config.cache + key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ needs.build-context.outputs.config-hash }} + - name: Register gcc problem matcher + run: echo "::add-matcher::.github/problem-matchers/gcc.json" + - name: Install dependencies + run: sudo ./.github/workflows/posix-deps-apt.sh + - name: Set up GCC-10 for ASAN + uses: egor-tensin/setup-gcc@v1 + with: + version: 10 + - name: Configure OpenSSL env vars + run: | + echo "MULTISSL_DIR=${GITHUB_WORKSPACE}/multissl" >> "$GITHUB_ENV" + echo "OPENSSL_DIR=${GITHUB_WORKSPACE}/multissl/openssl/${OPENSSL_VER}" >> "$GITHUB_ENV" + echo "LD_LIBRARY_PATH=${GITHUB_WORKSPACE}/multissl/openssl/${OPENSSL_VER}/lib" >> "$GITHUB_ENV" + - name: 'Restore OpenSSL build' + id: cache-openssl + uses: actions/cache@v4 + with: + path: ./multissl/openssl/${{ env.OPENSSL_VER }} + key: ${{ matrix.os }}-multissl-openssl-${{ env.OPENSSL_VER }} + - name: Install OpenSSL + if: steps.cache-openssl.outputs.cache-hit != 'true' + run: python3 Tools/ssl/multissltests.py --steps=library --base-directory "$MULTISSL_DIR" --openssl "$OPENSSL_VER" --system Linux + - name: Add ccache to PATH + run: | + echo "PATH=/usr/lib/ccache:$PATH" >> "$GITHUB_ENV" + - name: Configure ccache action + uses: hendrikmuhs/ccache-action@v1.2 + with: + save: ${{ github.event_name == 'push' }} + max-size: "200M" + - name: Configure CPython + run: ./configure --config-cache --with-address-sanitizer --without-pymalloc + - name: Build CPython + run: make -j4 + - name: Display build info + run: make pythoninfo + - name: Tests + run: xvfb-run make ci + + build-tsan: + name: >- + Thread sanitizer + ${{ fromJSON(matrix.free-threading) && '(free-threading)' || '' }} + needs: build-context + if: needs.build-context.outputs.run-tests == 'true' + strategy: + fail-fast: false + matrix: + free-threading: + - false + - true + uses: ./.github/workflows/reusable-tsan.yml + with: + config_hash: ${{ needs.build-context.outputs.config-hash }} + free-threading: ${{ matrix.free-threading }} + + build-ubsan: + name: Undefined behavior sanitizer + needs: build-context + if: needs.build-context.outputs.run-tests == 'true' + uses: ./.github/workflows/reusable-ubsan.yml + with: + config_hash: ${{ needs.build-context.outputs.config-hash }} + + cross-build-linux: + name: Cross build Linux + runs-on: ubuntu-latest + timeout-minutes: 60 + needs: build-context + if: needs.build-context.outputs.run-tests == 'true' + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + - name: Runner image version + run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" + - name: Restore config.cache + uses: actions/cache@v4 + with: + path: config.cache + key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ needs.build-context.outputs.config-hash }} + - name: Register gcc problem matcher + run: echo "::add-matcher::.github/problem-matchers/gcc.json" + - name: Set build dir + run: + # an absolute path outside of the working directoy + echo "BUILD_DIR=$(realpath ${{ github.workspace }}/../build)" >> "$GITHUB_ENV" + - name: Install dependencies + run: sudo ./.github/workflows/posix-deps-apt.sh + - name: Configure host build + run: ./configure --prefix="$BUILD_DIR/host-python" + - name: Install host Python + run: make -j8 install + - name: Run test subset with host build + run: | + "$BUILD_DIR/host-python/bin/python3" -m test test_sysconfig test_site test_embed + - name: Configure cross build + run: ./configure --prefix="$BUILD_DIR/cross-python" --with-build-python="$BUILD_DIR/host-python/bin/python3" + - name: Install cross Python + run: make -j8 install + - name: Run test subset with host build + run: | + "$BUILD_DIR/cross-python/bin/python3" -m test test_sysconfig test_site test_embed + + # CIFuzz job based on https://google.github.io/oss-fuzz/getting-started/continuous-integration/ + cifuzz: + name: CIFuzz + runs-on: ubuntu-latest + timeout-minutes: 60 + needs: build-context + if: needs.build-context.outputs.run-ci-fuzz == 'true' + permissions: + security-events: write + strategy: + fail-fast: false + matrix: + sanitizer: [address, undefined, memory] + steps: + - name: Build fuzzers (${{ matrix.sanitizer }}) + id: build + uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master + with: + oss-fuzz-project-name: cpython3 + sanitizer: ${{ matrix.sanitizer }} + - name: Run fuzzers (${{ matrix.sanitizer }}) + uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master + with: + fuzz-seconds: 600 + oss-fuzz-project-name: cpython3 + output-sarif: true + sanitizer: ${{ matrix.sanitizer }} + - name: Upload crash + if: failure() && steps.build.outcome == 'success' + uses: actions/upload-artifact@v4 + with: + name: ${{ matrix.sanitizer }}-artifacts + path: ./out/artifacts + - name: Upload SARIF + if: always() && steps.build.outcome == 'success' + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: cifuzz-sarif/results.sarif + checkout_path: cifuzz-sarif + + all-required-green: # This job does nothing and is only used for the branch protection + name: All required checks pass + runs-on: ubuntu-latest + timeout-minutes: 5 + needs: + - build-context # Transitive dependency, needed to access `run-tests` value + - check-docs + - check-autoconf-regen + - check-generated-files + - build-windows + - build-windows-msi + - build-macos + - build-ubuntu + - build-ubuntu-ssltests-awslc + - build-ubuntu-ssltests-openssl + - build-wasi + - test-hypothesis + - build-asan + - build-tsan + - cross-build-linux + - cifuzz + if: always() + + steps: + - name: Check whether the needed jobs succeeded or failed + uses: re-actors/alls-green@05ac9388f0aebcb5727afa17fcccfecd6f8ec5fe + with: + allowed-failures: >- + build-windows-msi, + build-ubuntu-ssltests-awslc, + build-ubuntu-ssltests-openssl, + test-hypothesis, + cifuzz, + allowed-skips: >- + ${{ + !fromJSON(needs.build-context.outputs.run-docs) + && ' + check-docs, + ' + || '' + }} + ${{ + needs.build-context.outputs.run-tests != 'true' + && ' + check-autoconf-regen, + check-generated-files, + build-macos, + build-ubuntu, + build-ubuntu-ssltests-awslc, + build-ubuntu-ssltests-openssl, + build-wasi, + test-hypothesis, + build-asan, + build-tsan, + cross-build-linux, + ' + || '' + }} + ${{ + !fromJSON(needs.build-context.outputs.run-windows-tests) + && ' + build-windows, + ' + || '' + }} + ${{ + !fromJSON(needs.build-context.outputs.run-ci-fuzz) + && ' + cifuzz, + ' + || '' + }} + jobs: ${{ toJSON(needs) }} From 3b495f6283d4590c1893f928fa69ab898f3e0e43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Jul 2025 13:23:45 +0200 Subject: [PATCH 68/78] only check maxleaf --- Python/cpuinfo.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 5757a27ba01085..e27277f485b432 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -25,7 +25,6 @@ # undef HAS_XGETBV_SUPPORT #endif -#undef HAS_CPUID_SUPPORT #undef HAS_XGETBV_SUPPORT // Below, we declare macros for guarding the detection of SSE, AVX/AVX2 @@ -116,6 +115,11 @@ # define SHOULD_PARSE_CPUID_L7S1 #endif +#undef SHOULD_PARSE_CPUID_L1 +#undef SHOULD_PARSE_CPUID_L7 +#undef SHOULD_PARSE_CPUID_L7S0 +#undef SHOULD_PARSE_CPUID_L7S1 + #if defined(SHOULD_PARSE_CPUID_L7S0) && !defined(SHOULD_PARSE_CPUID_L7) #error "SHOULD_PARSE_CPUID_L7S0 requires SHOULD_PARSE_CPUID_L7" #endif From 8019f0967b70cb94b3c7ae51e97da34be32dda50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Jul 2025 13:29:22 +0200 Subject: [PATCH 69/78] parse L1 --- Python/cpuinfo.c | 1 - 1 file changed, 1 deletion(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index e27277f485b432..a33cbacd13ab31 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -115,7 +115,6 @@ # define SHOULD_PARSE_CPUID_L7S1 #endif -#undef SHOULD_PARSE_CPUID_L1 #undef SHOULD_PARSE_CPUID_L7 #undef SHOULD_PARSE_CPUID_L7S0 #undef SHOULD_PARSE_CPUID_L7S1 From 820d140186e6e91ab309d3314addcebe6fbbd3b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Jul 2025 13:36:09 +0200 Subject: [PATCH 70/78] parse L7 --- Python/cpuinfo.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index a33cbacd13ab31..eecefd0d43d938 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -99,26 +99,31 @@ #if defined(SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD) \ || defined(SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD) /* Indicate that cpuid should be called once with EAX=1 and ECX=0. */ +# ifndef HAS_CPUID_SUPPORT +# error "HAS_CPUID_SUPPORT must be set" +# endif # define SHOULD_PARSE_CPUID_L1 #endif #if defined(SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD) \ || defined(SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD) /* Indicate that cpuid should be called once with EAX=7 and ECX=0. */ +# ifndef HAS_CPUID_SUPPORT +# error "HAS_CPUID_SUPPORT must be set" +# endif # define SHOULD_PARSE_CPUID_L7 # define SHOULD_PARSE_CPUID_L7S0 #endif #if defined(SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD) /* Indicate that cpuid should be called once with EAX=7 and ECX=1. */ +# ifndef HAS_CPUID_SUPPORT +# error "HAS_CPUID_SUPPORT must be set" +# endif # define SHOULD_PARSE_CPUID_L7 # define SHOULD_PARSE_CPUID_L7S1 #endif -#undef SHOULD_PARSE_CPUID_L7 -#undef SHOULD_PARSE_CPUID_L7S0 -#undef SHOULD_PARSE_CPUID_L7S1 - #if defined(SHOULD_PARSE_CPUID_L7S0) && !defined(SHOULD_PARSE_CPUID_L7) #error "SHOULD_PARSE_CPUID_L7S0 requires SHOULD_PARSE_CPUID_L7" #endif From df85ce5f1c0f1490b23beea35a88992dd9959a6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Jul 2025 14:01:18 +0200 Subject: [PATCH 71/78] it *should* work now --- Makefile.pre.in | 3 +++ Python/cpuinfo.c | 15 +++++++++++++-- configure | 46 +++++++++++++++++++++++++++++++++++++++++++++- configure.ac | 10 +++++++++- 4 files changed, 70 insertions(+), 4 deletions(-) diff --git a/Makefile.pre.in b/Makefile.pre.in index c6cca1301005ab..5c3467a5e33446 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1938,6 +1938,9 @@ Python/sysmodule.o: $(srcdir)/Python/sysmodule.c Makefile $(srcdir)/Include/pydt $(MULTIARCH_CPPFLAGS) \ -o $@ $(srcdir)/Python/sysmodule.c +Python/cpuinfo.o: $(srcdir)/Python/cpuinfo.c Makefile + $(CC) -c $(PY_CORE_CFLAGS) @CORE_CPUINFO_CFLAGS@ -o $@ $(srcdir)/Python/cpuinfo.c + $(IO_OBJS): $(IO_H) .PHONY: regen-pegen-metaparser diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index eecefd0d43d938..9034e6ac03ef67 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -25,8 +25,6 @@ # undef HAS_XGETBV_SUPPORT #endif -#undef HAS_XGETBV_SUPPORT - // Below, we declare macros for guarding the detection of SSE, AVX/AVX2 // and AVX-512 instructions. If the compiler does not even recognize the // corresponding flags or if we are not on an 64-bit platform we do not @@ -162,6 +160,18 @@ get_xgetbv(uint32_t index) { assert(index == 0); // only XCR0 is supported for now # if defined(HAS_CPUID_SUPPORT) && defined(__x86_64__) && defined(__GNUC__) +# if defined(__clang__) +# if _Py__has_builtin(__builtin_ia32_xgetbv) + return (uint64_t)_xgetbv(index); +# else + /* + * Without -mxsave support, directly using xgetbv() with raw opcode + * may still fail on some platforms (e.g., AMD64 + FreeBSD + clang). + * To be on the safe side, we assume that XGETBV is not supported. + */ + return 0; +# endif +# else /* gcc & icc */ uint32_t eax = 0, edx = 0; __asm__ volatile( /* raw opcode for xgetbv for compatibility with older toolchains */ @@ -170,6 +180,7 @@ get_xgetbv(uint32_t index) : "c" (index) ); return ((uint64_t)edx << 32) | eax; +# endif # elif defined(HAS_CPUID_SUPPORT) && defined(_M_X64) return (uint64_t)_xgetbv(index); # else diff --git a/configure b/configure index b0a7ed029fb1b0..9a72d6f30b53ea 100755 --- a/configure +++ b/configure @@ -725,6 +725,7 @@ LIBHACL_BLAKE2_SIMD128_OBJS LIBHACL_SIMD128_FLAGS LIBHACL_LDFLAGS LIBHACL_CFLAGS +CORE_CPUINFO_CFLAGS MODULE_UNICODEDATA_FALSE MODULE_UNICODEDATA_TRUE MODULE__MULTIBYTECODEC_FALSE @@ -32544,7 +32545,7 @@ fi # we do not necessarily know which instruction sets will be used, # we disable SIMD support on some older Android platforms. # -# See py_cpuid_features in pycore_cpuinfo.h for how to order fields +# See _Py_cpuid_features in pycore_cpuinfo.h for how to order fields # and where to put blank lines to separate processor generations for # AVX-512 instructions. # @@ -34165,6 +34166,49 @@ fi fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mxsave" >&5 +printf %s "checking whether C compiler accepts -mxsave... " >&6; } +if test ${ax_cv_check_cflags__Werror__mxsave+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mxsave" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mxsave=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mxsave=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mxsave" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mxsave" >&6; } +if test "x$ax_cv_check_cflags__Werror__mxsave" = xyes +then : + CORE_CPUINFO_CFLAGS=-mxsave +else case e in #( + e) CORE_CPUINFO_CFLAGS= ;; +esac +fi + + + ############################################################################### # HACL* compilation and linking configuration (contact: @picnixz) # diff --git a/configure.ac b/configure.ac index 75778af3de3170..e1c01228450fd6 100644 --- a/configure.ac +++ b/configure.ac @@ -8003,7 +8003,7 @@ AC_DEFUN([PY_SIMD_DETECT], [ # we do not necessarily know which instruction sets will be used, # we disable SIMD support on some older Android platforms. # -# See py_cpuid_features in pycore_cpuinfo.h for how to order fields +# See _Py_cpuid_features in pycore_cpuinfo.h for how to order fields # and where to put blank lines to separate processor generations for # AVX-512 instructions. # @@ -8058,6 +8058,14 @@ then PY_SIMD_DETECT([AVX512_VP2INTERSECT], [-mavx512vp2intersect]) fi +dnl Check that -mxsave can be used for cpuinfo.c as the latter +dnl requires to be compiled with this option for xgetbv() support. +AX_CHECK_COMPILE_FLAG([-mxsave], + [AS_VAR_SET([CORE_CPUINFO_CFLAGS], [-mxsave])], + [AS_VAR_SET([CORE_CPUINFO_CFLAGS], [])], + [-Werror]) +AC_SUBST([CORE_CPUINFO_CFLAGS]) + ############################################################################### # HACL* compilation and linking configuration (contact: @picnixz) # From 915383ef2f42791a84b5108a944983fae585f1b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Jul 2025 15:05:57 +0200 Subject: [PATCH 72/78] :@ --- Python/cpuinfo.c | 41 +++++++---------- configure | 114 +++++++++++++++++++++++++++++++++++++++++++++++ configure.ac | 36 +++++++++++++-- pyconfig.h.in | 6 +++ 4 files changed, 168 insertions(+), 29 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 9034e6ac03ef67..1a9f3237ca6299 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -8,21 +8,15 @@ // For now, we only try to enable SIMD instructions for x86-64 Intel CPUs. // In the future, we should carefully enable support for ARM NEON and POWER // as well as AMD. See https://sourceforge.net/p/predef/wiki/Architectures. +#define HAS_CPUID_SUPPORT #if defined(__x86_64__) && defined(__GNUC__) # include // __cpuid_count() -# define HAS_CPUID_SUPPORT -# if defined(__clang__) -# include // _xgetbv() -# endif -# define HAS_XGETBV_SUPPORT +# include // _xgetbv() #elif defined(_M_X64) || defined(__amd64__) || defined(_M_AMD64) # include // __cpuidex() -# define HAS_CPUID_SUPPORT # include // _xgetbv() -# define HAS_XGETBV_SUPPORT #else # undef HAS_CPUID_SUPPORT -# undef HAS_XGETBV_SUPPORT #endif // Below, we declare macros for guarding the detection of SSE, AVX/AVX2 @@ -30,6 +24,11 @@ // corresponding flags or if we are not on an 64-bit platform we do not // even try to inspect the output of CPUID for those specific features. #ifdef HAS_CPUID_SUPPORT +#if defined(_Py_CPUINFO_USE_XGETBV_FUNC) \ + || defined(_Py_CPUINFO_USE_XGETBV_OPCODE) +# define HAS_XGETBV_SUPPORT +#endif + #if defined(_Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS) \ || defined(_Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS) \ || defined(_Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS) \ @@ -159,19 +158,10 @@ static uint64_t /* should only be used after calling cpuid(1, 0, ...) */ get_xgetbv(uint32_t index) { assert(index == 0); // only XCR0 is supported for now -# if defined(HAS_CPUID_SUPPORT) && defined(__x86_64__) && defined(__GNUC__) -# if defined(__clang__) -# if _Py__has_builtin(__builtin_ia32_xgetbv) +#if defined(_Py_CPUINFO_USE_XGETBV_FUNC) + /* directly use the compiler's helper if -mxsave is available */ return (uint64_t)_xgetbv(index); -# else - /* - * Without -mxsave support, directly using xgetbv() with raw opcode - * may still fail on some platforms (e.g., AMD64 + FreeBSD + clang). - * To be on the safe side, we assume that XGETBV is not supported. - */ - return 0; -# endif -# else /* gcc & icc */ +#elif defined(__x86_64__) && defined(__GNUC__) uint32_t eax = 0, edx = 0; __asm__ volatile( /* raw opcode for xgetbv for compatibility with older toolchains */ @@ -180,14 +170,15 @@ get_xgetbv(uint32_t index) : "c" (index) ); return ((uint64_t)edx << 32) | eax; -# endif -# elif defined(HAS_CPUID_SUPPORT) && defined(_M_X64) +#elif defined(_M_X64) return (uint64_t)_xgetbv(index); -# else +#else (void)index; return 0; -# endif +#endif } +#else +#define get_xgetbv(_INDEX) 0 #endif /* Highest Function Parameter and Manufacturer ID (LEAF=0, SUBLEAF=0). */ @@ -364,14 +355,12 @@ detect_cpuid_xsave_state(_Py_cpuid_features *flags) assert(flags->maxleaf >= 1); (void)flags; // Keep the ordering and newlines as they are declared in the structure. -#ifdef HAS_XGETBV_SUPPORT uint64_t xcr0 = flags->xsave && flags->osxsave ? get_xgetbv(0) : 0; flags->xcr0_sse = XSAVE_CHECK_REG(xcr0, XCR0_SSE); flags->xcr0_avx = XSAVE_CHECK_REG(xcr0, XCR0_AVX); flags->xcr0_avx512_opmask = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_OPMASK); flags->xcr0_avx512_zmm_hi256 = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_ZMM_HI256); flags->xcr0_avx512_hi16_zmm = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_HI16_ZMM); -#endif } #endif diff --git a/configure b/configure index 9a72d6f30b53ea..c32775808851bb 100755 --- a/configure +++ b/configure @@ -34209,6 +34209,120 @@ fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking _xgetbv() is natively supported" >&5 +printf %s "checking _xgetbv() is natively supported... " >&6; } +if test ${ac_cv_use_xgetbv_func+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + save_CFLAGS=$CFLAGS +save_CPPFLAGS=$CPPFLAGS +save_LDFLAGS=$LDFLAGS +save_LIBS=$LIBS + + + CFLAGS="$CFLAGS -Werror -mxsave" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main (void) +{ +_xgetbv(0) + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ac_cv_use_xgetbv_func=yes +else case e in #( + e) ac_cv_use_xgetbv_func=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +CFLAGS=$save_CFLAGS +CPPFLAGS=$save_CPPFLAGS +LDFLAGS=$save_LDFLAGS +LIBS=$save_LIBS + + ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_use_xgetbv_func" >&5 +printf "%s\n" "$ac_cv_use_xgetbv_func" >&6; } +if test "$ac_cv_use_xgetbv_func" = "yes" ; then + +printf "%s\n" "#define _Py_CPUINFO_USE_XGETBV_FUNC 1" >>confdefs.h + +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking xgetbv opcode is supported" >&5 +printf %s "checking xgetbv opcode is supported... " >&6; } +if test ${ac_cv_use_xgetbv_opcode+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + save_CFLAGS=$CFLAGS +save_CPPFLAGS=$CPPFLAGS +save_LDFLAGS=$LDFLAGS +save_LIBS=$LIBS + + + CFLAGS="$CFLAGS -Werror" + if test "$cross_compiling" = yes +then : + ac_cv_use_xgetbv_opcode=no +else case e in #( + e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main (void) +{ + + int main(void) + { + uint32_t eax = 0, edx = 0, index = 0; + __asm__ __volatile__( + ".byte 0x0f, 0x01, 0xd0" : "=a" (eax), "=d" (edx) : "c" (index)); + return 0; + } + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO" +then : + ac_cv_use_xgetbv_opcode=yes +else case e in #( + e) ac_cv_use_xgetbv_opcode=no ;; +esac +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext ;; +esac +fi + +CFLAGS=$save_CFLAGS +CPPFLAGS=$save_CPPFLAGS +LDFLAGS=$save_LDFLAGS +LIBS=$save_LIBS + + ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_use_xgetbv_opcode" >&5 +printf "%s\n" "$ac_cv_use_xgetbv_opcode" >&6; } +if test "$ac_cv_use_xgetbv_opcode" = "yes" ; then + +printf "%s\n" "#define _Py_CPUINFO_USE_XGETBV_OPCODE 1" >>confdefs.h + +fi + ############################################################################### # HACL* compilation and linking configuration (contact: @picnixz) # diff --git a/configure.ac b/configure.ac index e1c01228450fd6..5a37d128d1ce4b 100644 --- a/configure.ac +++ b/configure.ac @@ -8061,11 +8061,41 @@ fi dnl Check that -mxsave can be used for cpuinfo.c as the latter dnl requires to be compiled with this option for xgetbv() support. AX_CHECK_COMPILE_FLAG([-mxsave], - [AS_VAR_SET([CORE_CPUINFO_CFLAGS], [-mxsave])], - [AS_VAR_SET([CORE_CPUINFO_CFLAGS], [])], - [-Werror]) + [AS_VAR_SET([CORE_CPUINFO_CFLAGS], [-mxsave])], + [AS_VAR_SET([CORE_CPUINFO_CFLAGS], [])], + [-Werror]) AC_SUBST([CORE_CPUINFO_CFLAGS]) +AC_CACHE_CHECK([_xgetbv(0) is natively supported], [ac_cv_use_xgetbv_func], [ + WITH_SAVE_ENV([ + CFLAGS="$CFLAGS -Werror -mxsave" + AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM([[@%:@include ]], [[_xgetbv(0)]])], + [ac_cv_use_xgetbv_func=yes], + [ac_cv_use_xgetbv_func=no])])]) +if test "$ac_cv_use_xgetbv_func" = "yes" ; then + AC_DEFINE([_Py_CPUINFO_USE_XGETBV_FUNC], [1], [_xgetbv() is preferred]) +fi + +AC_CACHE_CHECK([xgetbv(0) opcode is supported], [ac_cv_use_xgetbv_opcode], [ + WITH_SAVE_ENV([ + CFLAGS="$CFLAGS -Werror" + AC_RUN_IFELSE([AC_LANG_PROGRAM([[@%:@include ]], [[ + int main(void) + { + uint32_t eax = 0, edx = 0; + __asm__ __volatile__( + ".byte 0x0f, 0x01, 0xd0" : "=a" (eax), "=d" (edx) : "c" (0)); + return 0; + } + ]])], + [ac_cv_use_xgetbv_opcode=yes], + [ac_cv_use_xgetbv_opcode=no], + [ac_cv_use_xgetbv_opcode=no])])]) +if test "$ac_cv_use_xgetbv_opcode" = "yes" ; then + AC_DEFINE([_Py_CPUINFO_USE_XGETBV_OPCODE], [1], [XGETBV opcode is preferred]) +fi + ############################################################################### # HACL* compilation and linking configuration (contact: @picnixz) # diff --git a/pyconfig.h.in b/pyconfig.h.in index 478855c7022c3a..eae7c2d874a3d5 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -2104,6 +2104,12 @@ /* Define if '-mssse3' is a valid compiler flag. */ #undef _Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS +/* _xgetbv() is preferred */ +#undef _Py_CPUINFO_USE_XGETBV_FUNC + +/* XGETBV opcode is preferred */ +#undef _Py_CPUINFO_USE_XGETBV_OPCODE + /* Defined if _Complex C type can be used with libffi. */ #undef _Py_FFI_SUPPORT_C_COMPLEX From c6cf903cec6e4ae125172223351c9fb5e33639c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Jul 2025 15:06:10 +0200 Subject: [PATCH 73/78] :@ --- configure | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/configure b/configure index c32775808851bb..5656baadb32af5 100755 --- a/configure +++ b/configure @@ -34209,8 +34209,8 @@ fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking _xgetbv() is natively supported" >&5 -printf %s "checking _xgetbv() is natively supported... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking _xgetbv(0) is natively supported" >&5 +printf %s "checking _xgetbv(0) is natively supported... " >&6; } if test ${ac_cv_use_xgetbv_func+y} then : printf %s "(cached) " >&6 @@ -34258,8 +34258,8 @@ printf "%s\n" "#define _Py_CPUINFO_USE_XGETBV_FUNC 1" >>confdefs.h fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking xgetbv opcode is supported" >&5 -printf %s "checking xgetbv opcode is supported... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking xgetbv(0) opcode is supported" >&5 +printf %s "checking xgetbv(0) opcode is supported... " >&6; } if test ${ac_cv_use_xgetbv_opcode+y} then : printf %s "(cached) " >&6 @@ -34285,9 +34285,9 @@ main (void) int main(void) { - uint32_t eax = 0, edx = 0, index = 0; + uint32_t eax = 0, edx = 0; __asm__ __volatile__( - ".byte 0x0f, 0x01, 0xd0" : "=a" (eax), "=d" (edx) : "c" (index)); + ".byte 0x0f, 0x01, 0xd0" : "=a" (eax), "=d" (edx) : "c" (0)); return 0; } From 498518fe8efc6f7a35dd332d2d9d654965bc4f8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Jul 2025 15:21:38 +0200 Subject: [PATCH 74/78] waaaaa! --- Python/cpuinfo.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 1a9f3237ca6299..8b7acca3b37e73 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -29,6 +29,8 @@ # define HAS_XGETBV_SUPPORT #endif +#undef HAS_XGETBV_SUPPORT + #if defined(_Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS) \ || defined(_Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS) \ || defined(_Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS) \ From 3d56d9391682d1d6e7fa8155fa5c3b23d448436a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Jul 2025 15:48:57 +0200 Subject: [PATCH 75/78] remove support for XCR0 registers --- Include/internal/pycore_cpuinfo.h | 15 +- .../internal/pycore_cpuinfo_xsave_features.h | 43 ----- Makefile.pre.in | 4 - PCbuild/pythoncore.vcxproj | 1 - PCbuild/pythoncore.vcxproj.filters | 3 - Python/cpuinfo.c | 61 ------- Tools/cpuinfo/libcpuinfo/features/xsave.py | 51 ------ configure | 158 ------------------ configure.ac | 38 ----- pyconfig.h.in | 6 - 10 files changed, 2 insertions(+), 378 deletions(-) delete mode 100644 Include/internal/pycore_cpuinfo_xsave_features.h delete mode 100644 Tools/cpuinfo/libcpuinfo/features/xsave.py diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h index 059653c844394f..c837724c59fc27 100644 --- a/Include/internal/pycore_cpuinfo.h +++ b/Include/internal/pycore_cpuinfo.h @@ -25,7 +25,6 @@ extern "C" { #include "Python.h" #include "pycore_cpuinfo_cpuid_features.h" -#include "pycore_cpuinfo_xsave_features.h" typedef struct _Py_cpuid_features_s { uint32_t maxleaf; @@ -101,21 +100,11 @@ typedef struct _Py_cpuid_features_s { _Py_CPUID_DECL_FLAG(xsave); // XSAVE/XRSTOR/XSETBV/XGETBV _Py_CPUID_DECL_FLAG(osxsave); // XSAVE is enabled by the OS - // --- XCR0 register bits ------------------------------------------------- - _Py_CPUID_DECL_FLAG(xcr0_sse); - // On some Intel CPUs, it is possible for the CPU to support AVX2 - // instructions even though the underlying OS does not know about - // AVX. In particular, only (SSE) XMM registers will be saved and - // restored on context-switch, but not (AVX) YMM registers. - _Py_CPUID_DECL_FLAG(xcr0_avx); - _Py_CPUID_DECL_FLAG(xcr0_avx512_opmask); - _Py_CPUID_DECL_FLAG(xcr0_avx512_zmm_hi256); - _Py_CPUID_DECL_FLAG(xcr0_avx512_hi16_zmm); #undef _Py_CPUID_DECL_FLAG // Whenever a field is added or removed above, update the - // number of fields (40) and adjust the bitsize of 'ready' + // number of fields (35) and adjust the bitsize of 'ready' // so that the size of this structure is a multiple of 8. - uint8_t ready; // set if the structure is ready for usage + uint8_t ready: 5; // set if the structure is ready for usage } _Py_cpuid_features; /* diff --git a/Include/internal/pycore_cpuinfo_xsave_features.h b/Include/internal/pycore_cpuinfo_xsave_features.h deleted file mode 100644 index e8719261b07604..00000000000000 --- a/Include/internal/pycore_cpuinfo_xsave_features.h +++ /dev/null @@ -1,43 +0,0 @@ -/** - * @author Bénédikt Tran - * @seealso @file Tools/cpuinfo/libcpuinfo/features/xsave.py - * - * XSAVE state components (XCR0 control register). - * - * See https://en.wikipedia.org/wiki/Control_register#XCR0_and_XSS. - */ - -#ifndef Py_INTERNAL_CPUINFO_XSAVE_FEATURES_H -#define Py_INTERNAL_CPUINFO_XSAVE_FEATURES_H - -#ifdef __cplusplus -extern "C" { -#endif - -#ifndef Py_BUILD_CORE -# error "this header requires Py_BUILD_CORE define" -#endif - -#include "Python.h" - -/*[python input] -import os, sys -sys.path.insert(0, os.path.realpath(os.path.join(os.getcwd(), "Tools/cpuinfo"))) -from libcpuinfo.features.xsave import make_xsave_features_constants -print(make_xsave_features_constants()) -[python start generated code]*/ -// clang-format off -/** Constants for XSAVE components */ -#define _Py_XSAVE_MASK_XCR0_SSE 0x00000002 // bit = 1 -#define _Py_XSAVE_MASK_XCR0_AVX 0x00000004 // bit = 2 -#define _Py_XSAVE_MASK_XCR0_AVX512_OPMASK 0x00000020 // bit = 5 -#define _Py_XSAVE_MASK_XCR0_AVX512_ZMM_HI256 0x00000040 // bit = 6 -#define _Py_XSAVE_MASK_XCR0_AVX512_HI16_ZMM 0x00000080 // bit = 7 -// clang-format on -/*[python end generated code: output=ac059b802b4317cb input=0a1b0774d3271477]*/ - -#ifdef __cplusplus -} -#endif - -#endif // !Py_INTERNAL_CPUINFO_XSAVE_FEATURES_H diff --git a/Makefile.pre.in b/Makefile.pre.in index 5c3467a5e33446..87fa7d06405aac 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1297,7 +1297,6 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_context.h \ $(srcdir)/Include/internal/pycore_cpuinfo.h \ $(srcdir)/Include/internal/pycore_cpuinfo_cpuid_features.h \ - $(srcdir)/Include/internal/pycore_cpuinfo_xsave_features.h \ $(srcdir)/Include/internal/pycore_critical_section.h \ $(srcdir)/Include/internal/pycore_crossinterp.h \ $(srcdir)/Include/internal/pycore_crossinterp_data_registry.h \ @@ -1938,9 +1937,6 @@ Python/sysmodule.o: $(srcdir)/Python/sysmodule.c Makefile $(srcdir)/Include/pydt $(MULTIARCH_CPPFLAGS) \ -o $@ $(srcdir)/Python/sysmodule.c -Python/cpuinfo.o: $(srcdir)/Python/cpuinfo.c Makefile - $(CC) -c $(PY_CORE_CFLAGS) @CORE_CPUINFO_CFLAGS@ -o $@ $(srcdir)/Python/cpuinfo.c - $(IO_OBJS): $(IO_H) .PHONY: regen-pegen-metaparser diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 2d843ea3bff576..fce0bd72173f6d 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -231,7 +231,6 @@ - diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 8afc2010ef93ca..6dbcb8c70d88e9 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -606,9 +606,6 @@ Include\internal - - Include\internal - Include\internal diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 8b7acca3b37e73..08d61f3deb01cc 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -11,10 +11,8 @@ #define HAS_CPUID_SUPPORT #if defined(__x86_64__) && defined(__GNUC__) # include // __cpuid_count() -# include // _xgetbv() #elif defined(_M_X64) || defined(__amd64__) || defined(_M_AMD64) # include // __cpuidex() -# include // _xgetbv() #else # undef HAS_CPUID_SUPPORT #endif @@ -24,13 +22,6 @@ // corresponding flags or if we are not on an 64-bit platform we do not // even try to inspect the output of CPUID for those specific features. #ifdef HAS_CPUID_SUPPORT -#if defined(_Py_CPUINFO_USE_XGETBV_FUNC) \ - || defined(_Py_CPUINFO_USE_XGETBV_OPCODE) -# define HAS_XGETBV_SUPPORT -#endif - -#undef HAS_XGETBV_SUPPORT - #if defined(_Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS) \ || defined(_Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS) \ || defined(_Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS) \ @@ -155,34 +146,6 @@ get_cpuid_info(uint32_t level /* input eax */, #endif } -#if defined(HAS_XGETBV_SUPPORT) && defined(SHOULD_PARSE_CPUID_L1) -static uint64_t /* should only be used after calling cpuid(1, 0, ...) */ -get_xgetbv(uint32_t index) -{ - assert(index == 0); // only XCR0 is supported for now -#if defined(_Py_CPUINFO_USE_XGETBV_FUNC) - /* directly use the compiler's helper if -mxsave is available */ - return (uint64_t)_xgetbv(index); -#elif defined(__x86_64__) && defined(__GNUC__) - uint32_t eax = 0, edx = 0; - __asm__ volatile( - /* raw opcode for xgetbv for compatibility with older toolchains */ - ".byte 0x0f, 0x01, 0xd0" - : "=a" (eax), "=d" (edx) - : "c" (index) - ); - return ((uint64_t)edx << 32) | eax; -#elif defined(_M_X64) - return (uint64_t)_xgetbv(index); -#else - (void)index; - return 0; -#endif -} -#else -#define get_xgetbv(_INDEX) 0 -#endif - /* Highest Function Parameter and Manufacturer ID (LEAF=0, SUBLEAF=0). */ static uint32_t detect_cpuid_maxleaf(void) @@ -349,23 +312,6 @@ detect_cpuid_extended_features_L7S1(_Py_cpuid_features *flags, } #endif -#ifdef SHOULD_PARSE_CPUID_L1 -static void /* should only be used after calling cpuid(1, 0, ...) */ -detect_cpuid_xsave_state(_Py_cpuid_features *flags) -{ - assert(flags->ready == 0); - assert(flags->maxleaf >= 1); - (void)flags; - // Keep the ordering and newlines as they are declared in the structure. - uint64_t xcr0 = flags->xsave && flags->osxsave ? get_xgetbv(0) : 0; - flags->xcr0_sse = XSAVE_CHECK_REG(xcr0, XCR0_SSE); - flags->xcr0_avx = XSAVE_CHECK_REG(xcr0, XCR0_AVX); - flags->xcr0_avx512_opmask = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_OPMASK); - flags->xcr0_avx512_zmm_hi256 = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_ZMM_HI256); - flags->xcr0_avx512_hi16_zmm = XSAVE_CHECK_REG(xcr0, XCR0_AVX512_HI16_ZMM); -} -#endif - static void cpuid_features_finalize(_Py_cpuid_features *flags) { @@ -460,12 +406,6 @@ _Py_cpuid_check_features(const _Py_cpuid_features *flags) \ MACRO(xsave); \ MACRO(osxsave); \ - \ - MACRO(xcr0_sse); \ - MACRO(xcr0_avx); \ - MACRO(xcr0_avx512_opmask); \ - MACRO(xcr0_avx512_zmm_hi256); \ - MACRO(xcr0_avx512_hi16_zmm); \ } while (0) void @@ -530,7 +470,6 @@ cpuid_detect_l1_features(_Py_cpuid_features *flags) uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; get_cpuid_info(1, 0, &eax, &ebx, &ecx, &edx); detect_cpuid_features(flags, ecx, edx); - detect_cpuid_xsave_state(flags); } } #else diff --git a/Tools/cpuinfo/libcpuinfo/features/xsave.py b/Tools/cpuinfo/libcpuinfo/features/xsave.py deleted file mode 100644 index 474162dfc4463b..00000000000000 --- a/Tools/cpuinfo/libcpuinfo/features/xsave.py +++ /dev/null @@ -1,51 +0,0 @@ -""" -Generate constants for XSAVE state components (XCR0 control register). - -See https://en.wikipedia.org/wiki/Control_register#XCR0_and_XSS. - -.. seealso:: :file:`Include/internal/pycore_cpuinfo_xsave_features.h` -""" - -from __future__ import annotations - -__all__ = ["make_xsave_features_constants"] - -from typing import TYPE_CHECKING - -import libcpuinfo.util as util -from libcpuinfo.util import DOXYGEN_STYLE - -if TYPE_CHECKING: - from typing import Final - - type Feature = str - type BitIndex = int - -XSAVE_FEATURES: Final[dict[Feature, BitIndex]] = { - "SSE": 1, - "AVX": 2, - "AVX512_OPMASK": 5, - "AVX512_ZMM_HI256": 6, - "AVX512_HI16_ZMM": 7, -} - - -def get_constant_name(feature: Feature) -> str: - return f"_Py_XSAVE_MASK_XCR0_{feature}" - - -_NAME_MAXSIZE: Final[int] = util.next_block( - max(map(len, map(get_constant_name, XSAVE_FEATURES))) -) - - -def make_xsave_features_constants() -> str: - """Used by :file:`Include/internal/pycore_cpuinfo_xsave_features.h`.""" - writer = util.CWriter() - writer.comment("Constants for XSAVE components", style=DOXYGEN_STYLE) - for feature_name, bit in XSAVE_FEATURES.items(): - if not 0 <= bit < 32: - raise ValueError(f"invalid bit value for {feature_name!r}") - key = get_constant_name(feature_name) - writer.write(util.make_constant(key, bit, _NAME_MAXSIZE)) - return writer.build() diff --git a/configure b/configure index 5656baadb32af5..a033cb515286da 100755 --- a/configure +++ b/configure @@ -725,7 +725,6 @@ LIBHACL_BLAKE2_SIMD128_OBJS LIBHACL_SIMD128_FLAGS LIBHACL_LDFLAGS LIBHACL_CFLAGS -CORE_CPUINFO_CFLAGS MODULE_UNICODEDATA_FALSE MODULE_UNICODEDATA_TRUE MODULE__MULTIBYTECODEC_FALSE @@ -34164,163 +34163,6 @@ fi -fi - -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mxsave" >&5 -printf %s "checking whether C compiler accepts -mxsave... " >&6; } -if test ${ax_cv_check_cflags__Werror__mxsave+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) - ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -Werror -mxsave" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main (void) -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO" -then : - ax_cv_check_cflags__Werror__mxsave=yes -else case e in #( - e) ax_cv_check_cflags__Werror__mxsave=no ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mxsave" >&5 -printf "%s\n" "$ax_cv_check_cflags__Werror__mxsave" >&6; } -if test "x$ax_cv_check_cflags__Werror__mxsave" = xyes -then : - CORE_CPUINFO_CFLAGS=-mxsave -else case e in #( - e) CORE_CPUINFO_CFLAGS= ;; -esac -fi - - - -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking _xgetbv(0) is natively supported" >&5 -printf %s "checking _xgetbv(0) is natively supported... " >&6; } -if test ${ac_cv_use_xgetbv_func+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) - save_CFLAGS=$CFLAGS -save_CPPFLAGS=$CPPFLAGS -save_LDFLAGS=$LDFLAGS -save_LIBS=$LIBS - - - CFLAGS="$CFLAGS -Werror -mxsave" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include -int -main (void) -{ -_xgetbv(0) - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO" -then : - ac_cv_use_xgetbv_func=yes -else case e in #( - e) ac_cv_use_xgetbv_func=no ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext -CFLAGS=$save_CFLAGS -CPPFLAGS=$save_CPPFLAGS -LDFLAGS=$save_LDFLAGS -LIBS=$save_LIBS - - ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_use_xgetbv_func" >&5 -printf "%s\n" "$ac_cv_use_xgetbv_func" >&6; } -if test "$ac_cv_use_xgetbv_func" = "yes" ; then - -printf "%s\n" "#define _Py_CPUINFO_USE_XGETBV_FUNC 1" >>confdefs.h - -fi - -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking xgetbv(0) opcode is supported" >&5 -printf %s "checking xgetbv(0) opcode is supported... " >&6; } -if test ${ac_cv_use_xgetbv_opcode+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) - save_CFLAGS=$CFLAGS -save_CPPFLAGS=$CPPFLAGS -save_LDFLAGS=$LDFLAGS -save_LIBS=$LIBS - - - CFLAGS="$CFLAGS -Werror" - if test "$cross_compiling" = yes -then : - ac_cv_use_xgetbv_opcode=no -else case e in #( - e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ -#include -int -main (void) -{ - - int main(void) - { - uint32_t eax = 0, edx = 0; - __asm__ __volatile__( - ".byte 0x0f, 0x01, 0xd0" : "=a" (eax), "=d" (edx) : "c" (0)); - return 0; - } - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_run "$LINENO" -then : - ac_cv_use_xgetbv_opcode=yes -else case e in #( - e) ac_cv_use_xgetbv_opcode=no ;; -esac -fi -rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ - conftest.$ac_objext conftest.beam conftest.$ac_ext ;; -esac -fi - -CFLAGS=$save_CFLAGS -CPPFLAGS=$save_CPPFLAGS -LDFLAGS=$save_LDFLAGS -LIBS=$save_LIBS - - ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_use_xgetbv_opcode" >&5 -printf "%s\n" "$ac_cv_use_xgetbv_opcode" >&6; } -if test "$ac_cv_use_xgetbv_opcode" = "yes" ; then - -printf "%s\n" "#define _Py_CPUINFO_USE_XGETBV_OPCODE 1" >>confdefs.h - fi ############################################################################### diff --git a/configure.ac b/configure.ac index 5a37d128d1ce4b..00d57c8a0ae20b 100644 --- a/configure.ac +++ b/configure.ac @@ -8058,44 +8058,6 @@ then PY_SIMD_DETECT([AVX512_VP2INTERSECT], [-mavx512vp2intersect]) fi -dnl Check that -mxsave can be used for cpuinfo.c as the latter -dnl requires to be compiled with this option for xgetbv() support. -AX_CHECK_COMPILE_FLAG([-mxsave], - [AS_VAR_SET([CORE_CPUINFO_CFLAGS], [-mxsave])], - [AS_VAR_SET([CORE_CPUINFO_CFLAGS], [])], - [-Werror]) -AC_SUBST([CORE_CPUINFO_CFLAGS]) - -AC_CACHE_CHECK([_xgetbv(0) is natively supported], [ac_cv_use_xgetbv_func], [ - WITH_SAVE_ENV([ - CFLAGS="$CFLAGS -Werror -mxsave" - AC_COMPILE_IFELSE( - [AC_LANG_PROGRAM([[@%:@include ]], [[_xgetbv(0)]])], - [ac_cv_use_xgetbv_func=yes], - [ac_cv_use_xgetbv_func=no])])]) -if test "$ac_cv_use_xgetbv_func" = "yes" ; then - AC_DEFINE([_Py_CPUINFO_USE_XGETBV_FUNC], [1], [_xgetbv() is preferred]) -fi - -AC_CACHE_CHECK([xgetbv(0) opcode is supported], [ac_cv_use_xgetbv_opcode], [ - WITH_SAVE_ENV([ - CFLAGS="$CFLAGS -Werror" - AC_RUN_IFELSE([AC_LANG_PROGRAM([[@%:@include ]], [[ - int main(void) - { - uint32_t eax = 0, edx = 0; - __asm__ __volatile__( - ".byte 0x0f, 0x01, 0xd0" : "=a" (eax), "=d" (edx) : "c" (0)); - return 0; - } - ]])], - [ac_cv_use_xgetbv_opcode=yes], - [ac_cv_use_xgetbv_opcode=no], - [ac_cv_use_xgetbv_opcode=no])])]) -if test "$ac_cv_use_xgetbv_opcode" = "yes" ; then - AC_DEFINE([_Py_CPUINFO_USE_XGETBV_OPCODE], [1], [XGETBV opcode is preferred]) -fi - ############################################################################### # HACL* compilation and linking configuration (contact: @picnixz) # diff --git a/pyconfig.h.in b/pyconfig.h.in index eae7c2d874a3d5..478855c7022c3a 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -2104,12 +2104,6 @@ /* Define if '-mssse3' is a valid compiler flag. */ #undef _Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS -/* _xgetbv() is preferred */ -#undef _Py_CPUINFO_USE_XGETBV_FUNC - -/* XGETBV opcode is preferred */ -#undef _Py_CPUINFO_USE_XGETBV_OPCODE - /* Defined if _Complex C type can be used with libffi. */ #undef _Py_FFI_SUPPORT_C_COMPLEX From 08daa8abfaf2b18a46a15073e0af7c7af06ec8ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Jul 2025 16:06:55 +0200 Subject: [PATCH 76/78] fix SIMD-256 detection --- Modules/blake2module.c | 3 ++- Modules/hmacmodule.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Modules/blake2module.c b/Modules/blake2module.c index 2f8baea62d77fc..9797c98d2c3091 100644 --- a/Modules/blake2module.c +++ b/Modules/blake2module.c @@ -122,7 +122,8 @@ blake2module_init_cpu_features(Blake2State *state) #endif #if _Py_HACL_CAN_COMPILE_VEC256 - state->can_run_simd256 = flags.avx && flags.avx2; + state->can_run_simd256 = state->can_run_simd128 + && flags.avx && flags.avx2; #else state->can_run_simd256 = false; #endif diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index 064e31fe830deb..cfbccaab136bdf 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -1564,7 +1564,8 @@ hmacmodule_init_cpu_features(hmacmodule_state *state) #endif #if _Py_HACL_CAN_COMPILE_VEC256 - state->can_run_simd256 = flags.avx && flags.avx2; + state->can_run_simd256 = state->can_run_simd128 + && flags.avx && flags.avx2; #else state->can_run_simd256 = false; #endif From afd1137fddfce9fa6e9fb5072b31e44c6706296e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Jul 2025 16:22:52 +0200 Subject: [PATCH 77/78] simplify `get_cpuid_info` --- Python/cpuinfo.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c index 08d61f3deb01cc..3e3feb55b8c9b6 100644 --- a/Python/cpuinfo.c +++ b/Python/cpuinfo.c @@ -134,9 +134,7 @@ get_cpuid_info(uint32_t level /* input eax */, { *eax = *ebx = *ecx = *edx = 0; // ensure the output to be initialized #if defined(HAS_CPUID_SUPPORT) && defined(__x86_64__) && defined(__GNUC__) - uint32_t r_eax = 0, r_ebx = 0, r_ecx = 0, r_edx = 0; - __cpuid_count(level, count, r_eax, r_ebx, r_ecx, r_edx); - *eax = r_eax, *ebx = r_ebx, *ecx = r_ecx, *edx = r_edx; + __cpuid_count(level, count, *eax, *ebx, *ecx, *edx); #elif defined(HAS_CPUID_SUPPORT) && defined(_M_X64) uint32_t info[4] = {0}; __cpuidex(info, level, count); From 79eb72d28d726d43c1bb0a735196bc8b69f2ace6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 15 Jul 2025 11:09:17 +0200 Subject: [PATCH 78/78] add CODEOWNERS --- .github/CODEOWNERS | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 08d7a80d7726d3..1113c272529fac 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -56,6 +56,10 @@ Lib/test/test_capi/test_misc.py @markshannon Lib/test/test_pyrepl/* @pablogsal @lysnikolaou @ambv Tools/c-analyzer/ @ericsnowcurrently +# cpuinfo +Python/cpuinfo.c @picnixz +Python/pycore_cpuinfo*.h @picnixz + # dbm **/*dbm* @corona10 @erlend-aasland @serhiy-storchaka pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy