diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 08d7a80d7726d3..1113c272529fac 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -56,6 +56,10 @@ Lib/test/test_capi/test_misc.py @markshannon Lib/test/test_pyrepl/* @pablogsal @lysnikolaou @ambv Tools/c-analyzer/ @ericsnowcurrently +# cpuinfo +Python/cpuinfo.c @picnixz +Python/pycore_cpuinfo*.h @picnixz + # dbm **/*dbm* @corona10 @erlend-aasland @serhiy-storchaka diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c6171571857af6..05f20e12f4653d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -260,7 +260,7 @@ jobs: free-threading: ${{ matrix.free-threading }} os: ${{ matrix.os }} - build-ubuntu-ssltests: + build-ubuntu-ssltests-openssl: name: 'Ubuntu SSL tests with OpenSSL' runs-on: ${{ matrix.os }} timeout-minutes: 60 @@ -322,6 +322,81 @@ jobs: - name: SSL tests run: ./python Lib/test/ssltests.py + build-ubuntu-ssltests-awslc: + name: 'Ubuntu SSL tests with AWS-LC' + runs-on: ${{ matrix.os }} + timeout-minutes: 60 + needs: build-context + if: needs.build-context.outputs.run-tests == 'true' + strategy: + fail-fast: false + matrix: + os: [ubuntu-24.04] + awslc_ver: [1.55.0] + env: + AWSLC_VER: ${{ matrix.awslc_ver}} + MULTISSL_DIR: ${{ github.workspace }}/multissl + OPENSSL_DIR: ${{ github.workspace }}/multissl/aws-lc/${{ matrix.awslc_ver }} + LD_LIBRARY_PATH: ${{ github.workspace }}/multissl/aws-lc/${{ matrix.awslc_ver }}/lib + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + - name: Runner image version + run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" + - name: Restore config.cache + uses: actions/cache@v4 + with: + path: config.cache + key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ needs.build-context.outputs.config-hash }} + - name: Register gcc problem matcher + run: echo "::add-matcher::.github/problem-matchers/gcc.json" + - name: Install dependencies + run: sudo ./.github/workflows/posix-deps-apt.sh + - name: Configure SSL lib env vars + run: | + echo "MULTISSL_DIR=${GITHUB_WORKSPACE}/multissl" >> "$GITHUB_ENV" + echo "OPENSSL_DIR=${GITHUB_WORKSPACE}/multissl/aws-lc/${AWSLC_VER}" >> "$GITHUB_ENV" + echo "LD_LIBRARY_PATH=${GITHUB_WORKSPACE}/multissl/aws-lc/${AWSLC_VER}/lib" >> "$GITHUB_ENV" + - name: 'Restore AWS-LC build' + id: cache-aws-lc + uses: actions/cache@v4 + with: + path: ./multissl/aws-lc/${{ matrix.awslc_ver }} + key: ${{ matrix.os }}-multissl-aws-lc-${{ matrix.awslc_ver }} + - name: Install AWS-LC + if: steps.cache-aws-lc.outputs.cache-hit != 'true' + run: | + python3 Tools/ssl/multissltests.py \ + --steps=library \ + --base-directory "$MULTISSL_DIR" \ + --awslc ${{ matrix.awslc_ver }} \ + --system Linux + - name: Add ccache to PATH + run: | + echo "PATH=/usr/lib/ccache:$PATH" >> "$GITHUB_ENV" + - name: Configure ccache action + uses: hendrikmuhs/ccache-action@v1.2 + with: + save: false + - name: Configure CPython + run: | + ./configure CFLAGS="-fdiagnostics-format=json" \ + --config-cache \ + --enable-slower-safety \ + --with-pydebug \ + --with-openssl="$OPENSSL_DIR" \ + --with-builtin-hashlib-hashes=blake2 \ + --with-ssl-default-suites=openssl + - name: Build CPython + run: make -j + - name: Display build info + run: make pythoninfo + - name: Verify python is linked to AWS-LC + run: ./python -c 'import ssl; print(ssl.OPENSSL_VERSION)' | grep AWS-LC + - name: SSL tests + run: ./python Lib/test/ssltests.py + build-wasi: name: 'WASI' needs: build-context @@ -628,7 +703,8 @@ jobs: - build-windows-msi - build-macos - build-ubuntu - - build-ubuntu-ssltests + - build-ubuntu-ssltests-awslc + - build-ubuntu-ssltests-openssl - build-wasi - test-hypothesis - build-asan @@ -643,7 +719,8 @@ jobs: with: allowed-failures: >- build-windows-msi, - build-ubuntu-ssltests, + build-ubuntu-ssltests-awslc, + build-ubuntu-ssltests-openssl, test-hypothesis, cifuzz, allowed-skips: >- @@ -661,7 +738,8 @@ jobs: check-generated-files, build-macos, build-ubuntu, - build-ubuntu-ssltests, + build-ubuntu-ssltests-awslc, + build-ubuntu-ssltests-openssl, build-wasi, test-hypothesis, build-asan, diff --git a/Include/internal/pycore_cpuinfo.h b/Include/internal/pycore_cpuinfo.h new file mode 100644 index 00000000000000..c837724c59fc27 --- /dev/null +++ b/Include/internal/pycore_cpuinfo.h @@ -0,0 +1,168 @@ +/* + * @author Bénédikt Tran + * + * Interface for detecting the different CPUID flags in an opaque manner. + * See https://en.wikipedia.org/wiki/CPUID for details on the bit values. + * + * If a module requires to support SIMD instructions, it should determine + * the compiler flags and the instruction sets required for the intrinsics + * to work. + * + * For the headers and expected CPUID bits needed by Intel intrinsics, see + * https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html. + */ + +#ifndef Py_INTERNAL_CPUINFO_H +#define Py_INTERNAL_CPUINFO_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "Python.h" +#include "pycore_cpuinfo_cpuid_features.h" + +typedef struct _Py_cpuid_features_s { + uint32_t maxleaf; + /* + * Macro to declare a member flag of '_Py_cpuid_features' as a uint8_t. + * Whenever this macro is used, do not forget to update the number of + * fields and the bitsize of the 'ready' member (see structure end). + */ +#define _Py_CPUID_DECL_FLAG(MEMBER_NAME) uint8_t MEMBER_NAME:1 + // --- Streaming SIMD Extensions ------------------------------------------ + _Py_CPUID_DECL_FLAG(sse); + _Py_CPUID_DECL_FLAG(sse2); + _Py_CPUID_DECL_FLAG(sse3); + _Py_CPUID_DECL_FLAG(ssse3); // Supplemental SSE3 instructions + _Py_CPUID_DECL_FLAG(sse41); // SSE4.1 + _Py_CPUID_DECL_FLAG(sse42); // SSE4.2 + + // --- Advanced Vector Extensions ----------------------------------------- + _Py_CPUID_DECL_FLAG(avx); + _Py_CPUID_DECL_FLAG(avx_ifma); + _Py_CPUID_DECL_FLAG(avx_ne_convert); + + _Py_CPUID_DECL_FLAG(avx_vnni); + _Py_CPUID_DECL_FLAG(avx_vnni_int8); + _Py_CPUID_DECL_FLAG(avx_vnni_int16); + + // --- Advanced Vector Extensions 2 --------------------------------------- + _Py_CPUID_DECL_FLAG(avx2); + + // --- Advanced Vector Extensions (512-bit) ------------------------------- + /* + * AVX-512 instruction set are grouped by the processor generation + * that implements them (see https://en.wikipedia.org/wiki/AVX-512). + * + * We do not include GFNI, VPCLMULQDQ and VAES instructions since + * they are not exactly AVX-512 per se, nor do we include BF16 or + * FP16 since they operate on bfloat16 and binary16 (half-float). + * + * See https://en.wikipedia.org/wiki/AVX-512#Instruction_set for + * the suffix meanings (for instance 'f' stands for 'Foundation'). + */ + _Py_CPUID_DECL_FLAG(avx512_f); + _Py_CPUID_DECL_FLAG(avx512_cd); + + _Py_CPUID_DECL_FLAG(avx512_er); + _Py_CPUID_DECL_FLAG(avx512_pf); + + _Py_CPUID_DECL_FLAG(avx512_4fmaps); + _Py_CPUID_DECL_FLAG(avx512_4vnniw); + + _Py_CPUID_DECL_FLAG(avx512_vpopcntdq); + + _Py_CPUID_DECL_FLAG(avx512_vl); + _Py_CPUID_DECL_FLAG(avx512_dq); + _Py_CPUID_DECL_FLAG(avx512_bw); + + _Py_CPUID_DECL_FLAG(avx512_ifma); + _Py_CPUID_DECL_FLAG(avx512_vbmi); + + _Py_CPUID_DECL_FLAG(avx512_vnni); + + _Py_CPUID_DECL_FLAG(avx512_vbmi2); + _Py_CPUID_DECL_FLAG(avx512_bitalg); + + _Py_CPUID_DECL_FLAG(avx512_vp2intersect); + + // --- Instructions ------------------------------------------------------- + _Py_CPUID_DECL_FLAG(cmov); + _Py_CPUID_DECL_FLAG(fma); + _Py_CPUID_DECL_FLAG(popcnt); + _Py_CPUID_DECL_FLAG(pclmulqdq); + + _Py_CPUID_DECL_FLAG(xsave); // XSAVE/XRSTOR/XSETBV/XGETBV + _Py_CPUID_DECL_FLAG(osxsave); // XSAVE is enabled by the OS + +#undef _Py_CPUID_DECL_FLAG + // Whenever a field is added or removed above, update the + // number of fields (35) and adjust the bitsize of 'ready' + // so that the size of this structure is a multiple of 8. + uint8_t ready: 5; // set if the structure is ready for usage +} _Py_cpuid_features; + +/* + * Explicitly set all members to zero to guarantee that + * we never have a non-initialized attribute at runtime + * which could lead to an illegal instruction error. + * + * This readiness state of 'flags' is ignored and left untouched. + * + * Note: This function does not set any exception and thus never fails. + */ +PyAPI_FUNC(void) +_Py_cpuid_disable_features(_Py_cpuid_features *flags); + +/* + * Check whether the structure is ready and flags are inter-compatible, + * returning 1 on success and 0 otherwise. + * + * The caller should disable all CPUID detected features if the check + * fails to avoid encountering runtime illegal instruction errors. + * + * Note: This function does not set any exception and thus never fails. + */ +PyAPI_FUNC(int) +_Py_cpuid_check_features(const _Py_cpuid_features *flags); + +/* + * Return 1 if all expected flags are set in 'actual', 0 otherwise. + * + * If 'actual' or 'expect' are not ready yet, this also returns 0. + * + * Note: This function does not set any exception and thus never fails. + */ +PyAPI_FUNC(int) +_Py_cpuid_has_features(const _Py_cpuid_features *actual, + const _Py_cpuid_features *expect); + +/* + * Return 1 if 'actual' and 'expect' are identical, 0 otherwise. + * + * If 'actual' or 'expect' are not ready yet, this also returns 0. + * + * Note: This function does not set any exception and thus never fails. + */ +PyAPI_FUNC(int) +_Py_cpuid_match_features(const _Py_cpuid_features *actual, + const _Py_cpuid_features *expect); + +/* + * Detect the available host features, storing the result in 'flags'. + * + * Note: This function does not set any exception and thus never fails. + */ +PyAPI_FUNC(void) +_Py_cpuid_detect_features(_Py_cpuid_features *flags); + +#ifdef __cplusplus +} +#endif + +#endif /* !Py_INTERNAL_CPUINFO_H */ diff --git a/Include/internal/pycore_cpuinfo_cpuid_features.h b/Include/internal/pycore_cpuinfo_cpuid_features.h new file mode 100644 index 00000000000000..8db54e7af37fb1 --- /dev/null +++ b/Include/internal/pycore_cpuinfo_cpuid_features.h @@ -0,0 +1,101 @@ +/** + * @author Bénédikt Tran + * @seealso @file Tools/cpuinfo/libcpuinfo/features/cpuid.py + * + * The enumeration describes masks to apply on CPUID output registers. + * + * Member names are _Py_CPUID_MASK__L[S]_, + * where <> (resp. []) denotes a required (resp. optional) group and: + * + * - REGISTER is EAX, EBX, ECX or EDX, + * - LEAF is the initial value of the EAX register (1 or 7), + * - SUBLEAF is the initial value of the ECX register (omitted if 0), and + * - FEATURE is a SIMD feature (with one or more specialized instructions). + * + * For maintainability, the flags are ordered by registers, leafs, subleafs, + * and bits. See https://en.wikipedia.org/wiki/CPUID for the values. + * + * Note 1: The LEAF is also called the 'page' or the 'level'. + * Note 2: The SUBLEAF is also referred to as the 'count'. + * + * The LEAF value should only 1 or 7 as other values may have different + * meanings depending on the underlying architecture. + */ + +#ifndef Py_INTERNAL_CPUINFO_CPUID_FEATURES_H +#define Py_INTERNAL_CPUINFO_CPUID_FEATURES_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "Python.h" + +/*[python input] +import os, sys +sys.path.insert(0, os.path.realpath(os.path.join(os.getcwd(), "Tools/cpuinfo"))) +from libcpuinfo.features.cpuid import make_cpuid_features_constants +print(make_cpuid_features_constants()) +[python start generated code]*/ +// clang-format off +/** Constants for CPUID features */ +/* CPUID (LEAF=1, SUBLEAF=0) [ECX] */ +#define _Py_CPUID_MASK_ECX_L1_SSE3 0x00000001 // bit = 0 +#define _Py_CPUID_MASK_ECX_L1_PCLMULQDQ 0x00000002 // bit = 1 +#define _Py_CPUID_MASK_ECX_L1_SSSE3 0x00000200 // bit = 9 +#define _Py_CPUID_MASK_ECX_L1_FMA 0x00001000 // bit = 12 +#define _Py_CPUID_MASK_ECX_L1_SSE4_1 0x00080000 // bit = 19 +#define _Py_CPUID_MASK_ECX_L1_SSE4_2 0x00100000 // bit = 20 +#define _Py_CPUID_MASK_ECX_L1_POPCNT 0x00800000 // bit = 23 +#define _Py_CPUID_MASK_ECX_L1_XSAVE 0x04000000 // bit = 26 +#define _Py_CPUID_MASK_ECX_L1_OSXSAVE 0x08000000 // bit = 27 +#define _Py_CPUID_MASK_ECX_L1_AVX 0x10000000 // bit = 28 + +/* CPUID (LEAF=1, SUBLEAF=0) [EDX] */ +#define _Py_CPUID_MASK_EDX_L1_CMOV 0x00008000 // bit = 15 +#define _Py_CPUID_MASK_EDX_L1_SSE 0x02000000 // bit = 25 +#define _Py_CPUID_MASK_EDX_L1_SSE2 0x04000000 // bit = 26 + +/* CPUID (LEAF=7, SUBLEAF=0) [EBX] */ +#define _Py_CPUID_MASK_EBX_L7_AVX2 0x00000020 // bit = 5 +#define _Py_CPUID_MASK_EBX_L7_AVX512_F 0x00010000 // bit = 16 +#define _Py_CPUID_MASK_EBX_L7_AVX512_DQ 0x00020000 // bit = 17 +#define _Py_CPUID_MASK_EBX_L7_AVX512_IFMA 0x00200000 // bit = 21 +#define _Py_CPUID_MASK_EBX_L7_AVX512_PF 0x04000000 // bit = 26 +#define _Py_CPUID_MASK_EBX_L7_AVX512_ER 0x08000000 // bit = 27 +#define _Py_CPUID_MASK_EBX_L7_AVX512_CD 0x10000000 // bit = 28 +#define _Py_CPUID_MASK_EBX_L7_AVX512_BW 0x40000000 // bit = 30 +#define _Py_CPUID_MASK_EBX_L7_AVX512_VL 0x80000000 // bit = 31 + +/* CPUID (LEAF=7, SUBLEAF=0) [ECX] */ +#define _Py_CPUID_MASK_ECX_L7_AVX512_VBMI 0x00000002 // bit = 1 +#define _Py_CPUID_MASK_ECX_L7_AVX512_VBMI2 0x00000040 // bit = 6 +#define _Py_CPUID_MASK_ECX_L7_AVX512_VNNI 0x00000800 // bit = 11 +#define _Py_CPUID_MASK_ECX_L7_AVX512_BITALG 0x00001000 // bit = 12 +#define _Py_CPUID_MASK_ECX_L7_AVX512_VPOPCNTDQ 0x00004000 // bit = 14 + +/* CPUID (LEAF=7, SUBLEAF=0) [EDX] */ +#define _Py_CPUID_MASK_EDX_L7_AVX512_4VNNIW 0x00000004 // bit = 2 +#define _Py_CPUID_MASK_EDX_L7_AVX512_4FMAPS 0x00000008 // bit = 3 +#define _Py_CPUID_MASK_EDX_L7_AVX512_VP2INTERSECT 0x00000100 // bit = 8 + +/* CPUID (LEAF=7, SUBLEAF=1) [EAX] */ +#define _Py_CPUID_MASK_EAX_L7S1_AVX_VNNI 0x00000010 // bit = 4 +#define _Py_CPUID_MASK_EAX_L7S1_AVX_IFMA 0x00800000 // bit = 23 + +/* CPUID (LEAF=7, SUBLEAF=1) [EDX] */ +#define _Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT8 0x00000010 // bit = 4 +#define _Py_CPUID_MASK_EDX_L7S1_AVX_NE_CONVERT 0x00000020 // bit = 5 +#define _Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT16 0x00000400 // bit = 10 +// clang-format on +/*[python end generated code: output=e9112f064e2effec input=71ec6b4356052ec3]*/ + +#ifdef __cplusplus +} +#endif + +#endif // !Py_INTERNAL_CPUINFO_CPUID_FEATURES_H diff --git a/Makefile.pre.in b/Makefile.pre.in index 66b34b779f27cb..87fa7d06405aac 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -435,6 +435,7 @@ PYTHON_OBJS= \ Python/codegen.o \ Python/compile.o \ Python/context.o \ + Python/cpuinfo.o \ Python/critical_section.o \ Python/crossinterp.o \ Python/dynamic_annotations.o \ @@ -1294,6 +1295,8 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_complexobject.h \ $(srcdir)/Include/internal/pycore_condvar.h \ $(srcdir)/Include/internal/pycore_context.h \ + $(srcdir)/Include/internal/pycore_cpuinfo.h \ + $(srcdir)/Include/internal/pycore_cpuinfo_cpuid_features.h \ $(srcdir)/Include/internal/pycore_critical_section.h \ $(srcdir)/Include/internal/pycore_crossinterp.h \ $(srcdir)/Include/internal/pycore_crossinterp_data_registry.h \ diff --git a/Modules/blake2module.c b/Modules/blake2module.c index 163f238a4268d0..9797c98d2c3091 100644 --- a/Modules/blake2module.c +++ b/Modules/blake2module.c @@ -16,29 +16,11 @@ #include "Python.h" #include "hashlib.h" -#include "pycore_strhex.h" // _Py_strhex() +#include "pycore_cpuinfo.h" // _Py_cpuid_features +#include "pycore_strhex.h" // _Py_strhex() #include "pycore_typeobject.h" #include "pycore_moduleobject.h" -// QUICK CPU AUTODETECTION -// -// See https://github.com/python/cpython/pull/119316 -- we only enable -// vectorized versions for Intel CPUs, even though HACL*'s "vec128" modules also -// run on ARM NEON. (We could enable them on POWER -- but I don't have access to -// a test machine to see if that speeds anything up.) -// -// Note that configure.ac and the rest of the build are written in such a way -// that if the configure script finds suitable flags to compile HACL's SIMD128 -// (resp. SIMD256) files, then Hacl_Hash_Blake2b_Simd128.c (resp. ...) will be -// pulled into the build automatically, and then only the CPU autodetection will -// need to be updated here. - -#if defined(__x86_64__) && defined(__GNUC__) -#include -#elif defined(_M_X64) -#include -#endif - #include // SIMD256 can't be compiled on macOS ARM64, and performance of SIMD128 isn't @@ -129,75 +111,20 @@ _blake2_free(void *module) static void blake2module_init_cpu_features(Blake2State *state) { - /* This must be kept in sync with hmacmodule_init_cpu_features() - * in hmacmodule.c */ - int eax1 = 0, ebx1 = 0, ecx1 = 0, edx1 = 0; - int eax7 = 0, ebx7 = 0, ecx7 = 0, edx7 = 0; -#if defined(__x86_64__) && defined(__GNUC__) - __cpuid_count(1, 0, eax1, ebx1, ecx1, edx1); - __cpuid_count(7, 0, eax7, ebx7, ecx7, edx7); -#elif defined(_M_X64) - int info1[4] = {0}; - __cpuidex(info1, 1, 0); - eax1 = info1[0], ebx1 = info1[1], ecx1 = info1[2], edx1 = info1[3]; - - int info7[4] = {0}; - __cpuidex(info7, 7, 0); - eax7 = info7[0], ebx7 = info7[1], ecx7 = info7[2], edx7 = info7[3]; -#endif - // fmt: off - (void)eax1; (void)ebx1; (void)ecx1; (void)edx1; - (void)eax7; (void)ebx7; (void)ecx7; (void)edx7; - // fmt: on - -#define EBX_AVX2 (1 << 5) -#define ECX_SSE3 (1 << 0) -#define ECX_SSSE3 (1 << 9) -#define ECX_SSE4_1 (1 << 19) -#define ECX_SSE4_2 (1 << 20) -#define ECX_AVX (1 << 28) -#define EDX_SSE (1 << 25) -#define EDX_SSE2 (1 << 26) -#define EDX_CMOV (1 << 15) - - bool avx = (ecx1 & ECX_AVX) != 0; - bool avx2 = (ebx7 & EBX_AVX2) != 0; - - bool sse = (edx1 & EDX_SSE) != 0; - bool sse2 = (edx1 & EDX_SSE2) != 0; - bool cmov = (edx1 & EDX_CMOV) != 0; - - bool sse3 = (ecx1 & ECX_SSE3) != 0; - bool sse41 = (ecx1 & ECX_SSE4_1) != 0; - bool sse42 = (ecx1 & ECX_SSE4_2) != 0; - -#undef EDX_CMOV -#undef EDX_SSE2 -#undef EDX_SSE -#undef ECX_AVX -#undef ECX_SSE4_2 -#undef ECX_SSE4_1 -#undef ECX_SSSE3 -#undef ECX_SSE3 -#undef EBX_AVX2 - + _Py_cpuid_features flags; + _Py_cpuid_detect_features(&flags); #if _Py_HACL_CAN_COMPILE_VEC128 - // TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection - state->can_run_simd128 = sse && sse2 && sse3 && sse41 && sse42 && cmov; + state->can_run_simd128 = flags.sse && flags.sse2 && flags.sse3 + && flags.sse41 && flags.sse42 + && flags.cmov; #else - // fmt: off - (void)sse; (void)sse2; (void)sse3; (void)sse41; (void)sse42; (void)cmov; - // fmt: on state->can_run_simd128 = false; #endif #if _Py_HACL_CAN_COMPILE_VEC256 - // TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection - state->can_run_simd256 = state->can_run_simd128 && avx && avx2; + state->can_run_simd256 = state->can_run_simd128 + && flags.avx && flags.avx2; #else - // fmt: off - (void)avx; (void)avx2; - // fmt: on state->can_run_simd256 = false; #endif } diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index 95e400231bb65c..cfbccaab136bdf 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -17,6 +17,7 @@ #endif #include "Python.h" +#include "pycore_cpuinfo.h" // _Py_cpuid_features #include "pycore_hashtable.h" #include "pycore_strhex.h" // _Py_strhex() @@ -1552,73 +1553,20 @@ hmacmodule_init_globals(PyObject *module, hmacmodule_state *state) static void hmacmodule_init_cpu_features(hmacmodule_state *state) { - int eax1 = 0, ebx1 = 0, ecx1 = 0, edx1 = 0; - int eax7 = 0, ebx7 = 0, ecx7 = 0, edx7 = 0; -#if defined(__x86_64__) && defined(__GNUC__) - __cpuid_count(1, 0, eax1, ebx1, ecx1, edx1); - __cpuid_count(7, 0, eax7, ebx7, ecx7, edx7); -#elif defined(_M_X64) - int info1[4] = {0}; - __cpuidex(info1, 1, 0); - eax1 = info1[0], ebx1 = info1[1], ecx1 = info1[2], edx1 = info1[3]; - - int info7[4] = {0}; - __cpuidex(info7, 7, 0); - eax7 = info7[0], ebx7 = info7[1], ecx7 = info7[2], edx7 = info7[3]; -#endif - // fmt: off - (void)eax1; (void)ebx1; (void)ecx1; (void)edx1; - (void)eax7; (void)ebx7; (void)ecx7; (void)edx7; - // fmt: on - -#define EBX_AVX2 (1 << 5) -#define ECX_SSE3 (1 << 0) -#define ECX_SSSE3 (1 << 9) -#define ECX_SSE4_1 (1 << 19) -#define ECX_SSE4_2 (1 << 20) -#define ECX_AVX (1 << 28) -#define EDX_SSE (1 << 25) -#define EDX_SSE2 (1 << 26) -#define EDX_CMOV (1 << 15) - - bool avx = (ecx1 & ECX_AVX) != 0; - bool avx2 = (ebx7 & EBX_AVX2) != 0; - - bool sse = (edx1 & EDX_SSE) != 0; - bool sse2 = (edx1 & EDX_SSE2) != 0; - bool cmov = (edx1 & EDX_CMOV) != 0; - - bool sse3 = (ecx1 & ECX_SSE3) != 0; - bool sse41 = (ecx1 & ECX_SSE4_1) != 0; - bool sse42 = (ecx1 & ECX_SSE4_2) != 0; - -#undef EDX_CMOV -#undef EDX_SSE2 -#undef EDX_SSE -#undef ECX_AVX -#undef ECX_SSE4_2 -#undef ECX_SSE4_1 -#undef ECX_SSSE3 -#undef ECX_SSE3 -#undef EBX_AVX2 - + _Py_cpuid_features flags; + _Py_cpuid_detect_features(&flags); #if _Py_HACL_CAN_COMPILE_VEC128 - // TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection - state->can_run_simd128 = sse && sse2 && sse3 && sse41 && sse42 && cmov; + state->can_run_simd128 = flags.sse && flags.sse2 && flags.sse3 + && flags.sse41 && flags.sse42 + && flags.cmov; #else - // fmt: off - (void)sse; (void)sse2; (void)sse3; (void)sse41; (void)sse42; (void)cmov; - // fmt: on state->can_run_simd128 = false; #endif #if _Py_HACL_CAN_COMPILE_VEC256 - // TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection - state->can_run_simd256 = state->can_run_simd128 && avx && avx2; + state->can_run_simd256 = state->can_run_simd128 + && flags.avx && flags.avx2; #else - // fmt: off - (void)avx; (void)avx2; - // fmt: on state->can_run_simd256 = false; #endif } diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index b911c9385634d7..fce0bd72173f6d 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -229,6 +229,8 @@ + + @@ -595,6 +597,7 @@ + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 0e6d42cc959ba5..6dbcb8c70d88e9 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -600,6 +600,12 @@ Include\internal + + Include\internal + + + Include\internal + Include\internal @@ -1349,6 +1355,9 @@ Python + + Source Files + Python diff --git a/Python/cpuinfo.c b/Python/cpuinfo.c new file mode 100644 index 00000000000000..3e3feb55b8c9b6 --- /dev/null +++ b/Python/cpuinfo.c @@ -0,0 +1,533 @@ +#include "pycore_cpuinfo.h" + +/* Check one or more CPUID register bits. */ +#define CHECK_REG(REG, MASK) ((((REG) & (MASK)) == (MASK)) ? 0 : 1) +#define CPUID_CHECK_REG(REG, FEAT) CHECK_REG(REG, (_Py_CPUID_MASK_ ## FEAT)) +#define XSAVE_CHECK_REG(REG, FEAT) CHECK_REG(REG, (_Py_XSAVE_MASK_ ## FEAT)) + +// For now, we only try to enable SIMD instructions for x86-64 Intel CPUs. +// In the future, we should carefully enable support for ARM NEON and POWER +// as well as AMD. See https://sourceforge.net/p/predef/wiki/Architectures. +#define HAS_CPUID_SUPPORT +#if defined(__x86_64__) && defined(__GNUC__) +# include // __cpuid_count() +#elif defined(_M_X64) || defined(__amd64__) || defined(_M_AMD64) +# include // __cpuidex() +#else +# undef HAS_CPUID_SUPPORT +#endif + +// Below, we declare macros for guarding the detection of SSE, AVX/AVX2 +// and AVX-512 instructions. If the compiler does not even recognize the +// corresponding flags or if we are not on an 64-bit platform we do not +// even try to inspect the output of CPUID for those specific features. +#ifdef HAS_CPUID_SUPPORT +#if defined(_Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS) \ + // macros above should be sorted in alphabetical order +# define SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD +#endif + +#if defined(_Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS) \ + // macros above should be sorted in alphabetical order +# define SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD +#endif + +#if defined(_Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS) +# define SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD +#endif + +#if defined(_Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS) \ + || defined(_Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS) \ + // macros above should be sorted in alphabetical order +# define SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD +#endif +#endif // HAS_CPUID_SUPPORT + +// On macOS, checking the XCR0 register is NOT a guaranteed way +// to ensure the usability of AVX-512. As such, we disable the +// entire set of AVX-512 instructions. +// +// See https://stackoverflow.com/a/72523150/9579194. +#if defined(__APPLE__) +# undef SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD + // Additionally, AVX2 cannot be compiled on macOS ARM64 (yet it can be + // compiled on x86_64). However, since autoconf incorrectly assumes so + // when compiling a universal2 binary, we disable SIMD on such builds. +# if defined(__aarch64__) || defined(__arm64__) +# undef SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD +# undef SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD +# endif +#endif + +// Below, we declare macros indicating how CPUID can be called at runtime, +// so that we only call CPUID with specific inputs when needed. + +#if defined(SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD) \ + || defined(SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD) +/* Indicate that cpuid should be called once with EAX=1 and ECX=0. */ +# ifndef HAS_CPUID_SUPPORT +# error "HAS_CPUID_SUPPORT must be set" +# endif +# define SHOULD_PARSE_CPUID_L1 +#endif + +#if defined(SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD) \ + || defined(SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD) +/* Indicate that cpuid should be called once with EAX=7 and ECX=0. */ +# ifndef HAS_CPUID_SUPPORT +# error "HAS_CPUID_SUPPORT must be set" +# endif +# define SHOULD_PARSE_CPUID_L7 +# define SHOULD_PARSE_CPUID_L7S0 +#endif + +#if defined(SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD) +/* Indicate that cpuid should be called once with EAX=7 and ECX=1. */ +# ifndef HAS_CPUID_SUPPORT +# error "HAS_CPUID_SUPPORT must be set" +# endif +# define SHOULD_PARSE_CPUID_L7 +# define SHOULD_PARSE_CPUID_L7S1 +#endif + +#if defined(SHOULD_PARSE_CPUID_L7S0) && !defined(SHOULD_PARSE_CPUID_L7) +#error "SHOULD_PARSE_CPUID_L7S0 requires SHOULD_PARSE_CPUID_L7" +#endif +#if defined(SHOULD_PARSE_CPUID_L7S1) && !defined(SHOULD_PARSE_CPUID_L7) +#error "SHOULD_PARSE_CPUID_L7S1 requires SHOULD_PARSE_CPUID_L7" +#endif + +/* + * Call __cpuid_count() or equivalent and get + * its EAX, EBX, ECX and EDX output registers. + * + * If CPUID is not supported, registers are set to 0. + */ +static void +get_cpuid_info(uint32_t level /* input eax */, + uint32_t count /* input ecx */, + uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) +{ + *eax = *ebx = *ecx = *edx = 0; // ensure the output to be initialized +#if defined(HAS_CPUID_SUPPORT) && defined(__x86_64__) && defined(__GNUC__) + __cpuid_count(level, count, *eax, *ebx, *ecx, *edx); +#elif defined(HAS_CPUID_SUPPORT) && defined(_M_X64) + uint32_t info[4] = {0}; + __cpuidex(info, level, count); + *eax = info[0], *ebx = info[1], *ecx = info[2], *edx = info[3]; +#else + (void)level, (void)count; +#endif +} + +/* Highest Function Parameter and Manufacturer ID (LEAF=0, SUBLEAF=0). */ +static uint32_t +detect_cpuid_maxleaf(void) +{ + uint32_t maxleaf = 0, _ebx = 0, _ecx = 0, _edx = 0; + get_cpuid_info(0, 0, &maxleaf, &_ebx, &_ecx, &_edx); + return maxleaf; +} + +/* Processor Info and Feature Bits (LEAF=1, SUBLEAF=0). */ +#ifdef SHOULD_PARSE_CPUID_L1 +static void /* should only be used after calling cpuid(1, 0, ...) */ +detect_cpuid_features(_Py_cpuid_features *flags, uint32_t ecx, uint32_t edx) +{ + assert(flags->ready == 0); + assert(flags->maxleaf >= 1); + (void)flags, (void)ecx, (void)edx; // silence -Wunused-parameter + // Keep the ordering and newlines as they are declared in the structure. +#ifdef SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD +#ifdef _Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS + flags->sse = CPUID_CHECK_REG(edx, EDX_L1_SSE); +#endif +#ifdef _Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS + flags->sse2 = CPUID_CHECK_REG(edx, EDX_L1_SSE2); +#endif +#ifdef _Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS + flags->sse3 = CPUID_CHECK_REG(ecx, ECX_L1_SSE3); +#endif +#ifdef _Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS + flags->ssse3 = CPUID_CHECK_REG(ecx, ECX_L1_SSSE3); +#endif +#ifdef _Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS + flags->sse41 = CPUID_CHECK_REG(ecx, ECX_L1_SSE4_1); +#endif +#ifdef _Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS + flags->sse42 = CPUID_CHECK_REG(ecx, ECX_L1_SSE4_2); +#endif +#endif // SIMD_SSE_INSTRUCTIONS_DETECTION_GUARD + +#ifdef SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD +#ifdef _Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS + flags->avx = CPUID_CHECK_REG(ecx, ECX_L1_AVX); +#endif +#endif // SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD + +#ifdef HAS_CPUID_SUPPORT + flags->cmov = CPUID_CHECK_REG(edx, EDX_L1_CMOV); + flags->fma = CPUID_CHECK_REG(ecx, ECX_L1_FMA); + flags->popcnt = CPUID_CHECK_REG(ecx, ECX_L1_POPCNT); + flags->pclmulqdq = CPUID_CHECK_REG(ecx, ECX_L1_PCLMULQDQ); + + flags->xsave = CPUID_CHECK_REG(ecx, ECX_L1_XSAVE); + flags->osxsave = CPUID_CHECK_REG(ecx, ECX_L1_OSXSAVE); +#endif +} +#endif + +/* Extended Feature Bits (LEAF=7, SUBLEAF=0). */ +#ifdef SHOULD_PARSE_CPUID_L7S0 +static void /* should only be used after calling cpuid(7, 0, ...) */ +detect_cpuid_extended_features_L7S0(_Py_cpuid_features *flags, + uint32_t ebx, uint32_t ecx, uint32_t edx) +{ + assert(flags->ready == 0); + assert(flags->maxleaf >= 7); + (void)flags, (void)ebx, (void)ecx, (void)edx; + // Keep the ordering and newlines as they are declared in the structure. +#ifdef SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD +#ifdef _Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS + flags->avx2 = CPUID_CHECK_REG(ebx, EBX_L7_AVX2); +#endif +#endif // SIMD_AVX2_INSTRUCTIONS_DETECTION_GUARD + +#ifdef SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS + flags->avx512_f = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_F); +#endif +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS + flags->avx512_cd = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_CD); +#endif + +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS + flags->avx512_er = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_ER); +#endif +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS + flags->avx512_pf = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_PF); +#endif + +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS + flags->avx512_4fmaps = CPUID_CHECK_REG(edx, EDX_L7_AVX512_4FMAPS); +#endif +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS + flags->avx512_4vnniw = CPUID_CHECK_REG(edx, EDX_L7_AVX512_4VNNIW); +#endif + +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS + flags->avx512_vpopcntdq = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_VPOPCNTDQ); +#endif + +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS + flags->avx512_vl = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_VL); +#endif +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS + flags->avx512_dq = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_DQ); +#endif +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS + flags->avx512_bw = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_BW); +#endif + +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS + flags->avx512_ifma = CPUID_CHECK_REG(ebx, EBX_L7_AVX512_IFMA); +#endif +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS + flags->avx512_vbmi = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_VBMI); +#endif + +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS + flags->avx512_vnni = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_VNNI); +#endif + +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS + flags->avx512_vbmi2 = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_VBMI2); +#endif +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS + flags->avx512_bitalg = CPUID_CHECK_REG(ecx, ECX_L7_AVX512_BITALG); +#endif + +#ifdef _Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS + flags->avx512_vp2intersect = CPUID_CHECK_REG(edx, EDX_L7_AVX512_VP2INTERSECT); +#endif +#endif // SIMD_AVX512_INSTRUCTIONS_DETECTION_GUARD +} +#endif + +/* Extended Feature Bits (LEAF=7, SUBLEAF=1). */ +#ifdef SHOULD_PARSE_CPUID_L7S1 +static void /* should only be used after calling cpuid(7, 1, ...) */ +detect_cpuid_extended_features_L7S1(_Py_cpuid_features *flags, + uint32_t eax, uint32_t edx) +{ + assert(flags->ready == 0); + assert(flags->maxleaf >= 7); + (void)flags, (void)eax, (void)edx; + // Keep the ordering and newlines as they are declared in the structure. +#ifdef SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD +#ifdef _Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS + flags->avx_ne_convert = CPUID_CHECK_REG(edx, EDX_L7S1_AVX_NE_CONVERT); +#endif + +#ifdef _Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS + flags->avx_ifma = CPUID_CHECK_REG(eax, EAX_L7S1_AVX_IFMA); +#endif + +#ifdef _Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS + flags->avx_vnni = CPUID_CHECK_REG(eax, EAX_L7S1_AVX_VNNI); +#endif +#ifdef _Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS + flags->avx_vnni_int8 = CPUID_CHECK_REG(edx, EDX_L7S1_AVX_VNNI_INT8); +#endif +#ifdef _Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS + flags->avx_vnni_int16 = CPUID_CHECK_REG(edx, EDX_L7S1_AVX_VNNI_INT16); +#endif +#endif // SIMD_AVX_INSTRUCTIONS_DETECTION_GUARD +} +#endif + +static void +cpuid_features_finalize(_Py_cpuid_features *flags) +{ + assert(flags->ready == 0); + + // Here, any flag that may depend on others should be correctly set + // at runtime to avoid illegal instruction errors. + + flags->ready = 1; +} + +int +_Py_cpuid_check_features(const _Py_cpuid_features *flags) +{ + if (flags->ready != 1) { + return 0; + } + + // AVX-512/F is required to support any other AVX-512 instruction set + uint8_t avx512_require_f = ( + // newlines are placed according to processor generations + flags->avx512_cd || + flags->avx512_er || flags->avx512_pf || + flags->avx512_4fmaps || flags->avx512_4vnniw || + flags->avx512_vpopcntdq || + flags->avx512_vl || flags->avx512_dq || flags->avx512_bw || + flags->avx512_ifma || flags->avx512_vbmi || + flags->avx512_vnni || + flags->avx512_vbmi2 || flags->avx512_bitalg || + flags->avx512_vp2intersect + ); + + if (!flags->avx512_f && !avx512_require_f) { + return 0; + } + + return 1; +} + +/* + * Apply a 1-parameter macro MACRO(FLAG) on all members + * of a '_Py_cpuid_features' object ('ready' is omitted). + */ +#define CPUID_APPLY_MACRO(MACRO) \ + do { \ + MACRO(sse); \ + MACRO(sse2); \ + MACRO(sse3); \ + MACRO(ssse3); \ + MACRO(sse41); \ + MACRO(sse42); \ + \ + MACRO(avx); \ + MACRO(avx_ifma); \ + MACRO(avx_ne_convert); \ + \ + MACRO(avx_vnni); \ + MACRO(avx_vnni_int8); \ + MACRO(avx_vnni_int16); \ + \ + MACRO(avx2); \ + \ + MACRO(avx512_f); \ + MACRO(avx512_cd); \ + \ + MACRO(avx512_er); \ + MACRO(avx512_pf); \ + \ + MACRO(avx512_4fmaps); \ + MACRO(avx512_4vnniw); \ + \ + MACRO(avx512_vpopcntdq); \ + \ + MACRO(avx512_vl); \ + MACRO(avx512_dq); \ + MACRO(avx512_bw); \ + \ + MACRO(avx512_ifma); \ + MACRO(avx512_vbmi); \ + \ + MACRO(avx512_vnni); \ + \ + MACRO(avx512_vbmi2); \ + MACRO(avx512_bitalg); \ + \ + MACRO(avx512_vp2intersect); \ + \ + MACRO(cmov); \ + MACRO(fma); \ + MACRO(popcnt); \ + MACRO(pclmulqdq); \ + \ + MACRO(xsave); \ + MACRO(osxsave); \ + } while (0) + +void +_Py_cpuid_disable_features(_Py_cpuid_features *flags) +{ + flags->maxleaf = 0; +#define CPUID_DISABLE(FLAG) flags->FLAG = 0 + CPUID_APPLY_MACRO(CPUID_DISABLE); +#undef CPUID_DISABLE +} + +int +_Py_cpuid_has_features(const _Py_cpuid_features *actual, + const _Py_cpuid_features *expect) +{ + if (!actual->ready || !expect->ready) { + return 0; + } + if (actual->maxleaf < expect->maxleaf) { + return 0; + } +#define CPUID_CHECK_FEATURE(FLAG) \ + do { \ + if (expect->FLAG && !actual->FLAG) { \ + return 0; \ + } \ + } while (0) + CPUID_APPLY_MACRO(CPUID_CHECK_FEATURE); +#undef CPUID_CHECK_FEATURE + return 1; +} + +int +_Py_cpuid_match_features(const _Py_cpuid_features *actual, + const _Py_cpuid_features *expect) +{ + if (!actual->ready || !expect->ready) { + return 0; + } + if (actual->maxleaf != expect->maxleaf) { + return 0; + } +#define CPUID_MATCH_FEATURE(FLAG) \ + do { \ + if (expect->FLAG != actual->FLAG) { \ + return 0; \ + } \ + } while (0) + CPUID_APPLY_MACRO(CPUID_MATCH_FEATURE); +#undef CPUID_MATCH_FEATURE + return 1; +} + +#undef CPUID_APPLY_MACRO + +#ifdef SHOULD_PARSE_CPUID_L1 +static void +cpuid_detect_l1_features(_Py_cpuid_features *flags) +{ + assert(flags->ready == 0); + if (flags->maxleaf >= 1) { + uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; + get_cpuid_info(1, 0, &eax, &ebx, &ecx, &edx); + detect_cpuid_features(flags, ecx, edx); + } +} +#else +#define cpuid_detect_l1_features(FLAGS) +#endif + +#ifdef SHOULD_PARSE_CPUID_L7S0 +static void +cpuid_detect_l7s0_features(_Py_cpuid_features *flags) +{ + assert(flags->ready == 0); + assert(flags->maxleaf >= 7); + uint32_t _eax = 0, ebx = 0, ecx = 0, edx = 0; + get_cpuid_info(7, 0, &_eax, &ebx, &ecx, &edx); + detect_cpuid_extended_features_L7S0(flags, ebx, ecx, edx); +} +#else +#define cpuid_detect_l7s0_features(FLAGS) +#endif + +#ifdef SHOULD_PARSE_CPUID_L7S1 +static void +cpuid_detect_l7s1_features(_Py_cpuid_features *flags) +{ + assert(flags->ready == 0); + assert(flags->maxleaf >= 7); + uint32_t eax = 0, _ebx = 0, _ecx = 0, edx = 0; + get_cpuid_info(7, 1, &eax, &_ebx, &_ecx, &edx); + detect_cpuid_extended_features_L7S1(flags, eax, edx); +} +#else +#define cpuid_detect_l7s1_features(FLAGS) +#endif + +#ifdef SHOULD_PARSE_CPUID_L7 +static void +cpuid_detect_l7_features(_Py_cpuid_features *flags) +{ + assert(flags->ready == 0); + if (flags->maxleaf >= 7) { + cpuid_detect_l7s0_features(flags); + cpuid_detect_l7s1_features(flags); + } +} +#else +#define cpuid_detect_l7_features(FLAGS) +#endif + +void +_Py_cpuid_detect_features(_Py_cpuid_features *flags) +{ + if (flags->ready) { + return; + } + _Py_cpuid_disable_features(flags); + flags->maxleaf = detect_cpuid_maxleaf(); + cpuid_detect_l1_features(flags); + cpuid_detect_l7_features(flags); + cpuid_features_finalize(flags); + if (!_Py_cpuid_check_features(flags)) { + _Py_cpuid_disable_features(flags); + } +} diff --git a/Tools/cpuinfo/.ruff.toml b/Tools/cpuinfo/.ruff.toml new file mode 100644 index 00000000000000..e49d04c2d4e863 --- /dev/null +++ b/Tools/cpuinfo/.ruff.toml @@ -0,0 +1,16 @@ +# Python 3.12 is required for 'type' statements +target-version = "py312" +line-length = 79 + +[format] +skip-magic-trailing-comma = false + +[lint] +select = [ + "I", # isort + "F841", # unused variable + "RUF100", # Ban unused `# noqa` comments + "PGH004", # Ban blanket `# noqa` comments (only ignore specific error codes) +] + + diff --git a/Tools/cpuinfo/libcpuinfo/__init__.py b/Tools/cpuinfo/libcpuinfo/__init__.py new file mode 100644 index 00000000000000..a935debd4f4bbc --- /dev/null +++ b/Tools/cpuinfo/libcpuinfo/__init__.py @@ -0,0 +1,20 @@ +""" +This package provides functions to generate flags for CPUID and XSAVE. + +The constants are macros generated by Argument Clinic as follows: + + #define 0x // bit = BIT + ^ ^ + +where ^ indicates a column that is a multiple of 4, has +exactly 8 characters and has at most 2 characters. + +A C enumeration is NOT generated as the largest member may not fit +on an 'int', which is forbidden as ISO C restricts enumerator values +to that range. + +.. note:: + + This package must not be used directly and should only be + invoked from an Argument Clinic "[python input]" directive. +""" diff --git a/Tools/cpuinfo/libcpuinfo/features/__init__.py b/Tools/cpuinfo/libcpuinfo/features/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/Tools/cpuinfo/libcpuinfo/features/cpuid.py b/Tools/cpuinfo/libcpuinfo/features/cpuid.py new file mode 100644 index 00000000000000..65a37860a2778d --- /dev/null +++ b/Tools/cpuinfo/libcpuinfo/features/cpuid.py @@ -0,0 +1,131 @@ +""" +Generate an enumeration describing masks to apply on CPUID output registers. + +Constants are _Py_CPUID_MASK__L[S]_, +where <> (resp. []) denotes a required (resp. optional) group and: + +- REGISTER is EAX, EBX, ECX or EDX, +- LEAF is the initial value of the EAX register (1 or 7), +- SUBLEAF is the initial value of the ECX register (omitted if 0), and +- FEATURE is a SIMD feature (with one or more specialized instructions). + +For maintainability, the flags are ordered by registers, leafs, subleafs, +and bits. See https://en.wikipedia.org/wiki/CPUID for the values. + +Note 1: The LEAF is also called the 'page' or the 'level'. +Note 2: The SUBLEAF is also referred to as the 'count'. + +The LEAF value should only 1 or 7 as other values may have different +meanings depending on the underlying architecture. + +.. seealso:: :file:`Include/internal/pycore_cpuinfo_cpuid_features.h` +""" + +from __future__ import annotations + +__all__ = ["make_cpuid_features_constants"] + +from typing import TYPE_CHECKING + +import libcpuinfo.util as util +from libcpuinfo.util import DOXYGEN_STYLE + +if TYPE_CHECKING: + from typing import Final + + type Leaf = int + type SubLeaf = int + type Registry = str + type FeatureFamily = tuple[Leaf, SubLeaf, Registry] + + type Feature = str + type BitIndex = int + +CPUID_FEATURES: Final[dict[FeatureFamily, dict[Feature, BitIndex]]] = { + # See https://en.wikipedia.org/wiki/CPUID#EAX=1:_Processor_Info_and_Feature_Bits. + (1, 0, "ECX"): { + "SSE3": 0, + "PCLMULQDQ": 1, + "SSSE3": 9, + "FMA": 12, + "SSE4_1": 19, + "SSE4_2": 20, + "POPCNT": 23, + "XSAVE": 26, + "OSXSAVE": 27, + "AVX": 28, + }, + (1, 0, "EDX"): { + "CMOV": 15, + "SSE": 25, + "SSE2": 26, + }, + # See https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=0:_Extended_Features. + (7, 0, "EBX"): { + "AVX2": 5, + "AVX512_F": 16, + "AVX512_DQ": 17, + "AVX512_IFMA": 21, + "AVX512_PF": 26, + "AVX512_ER": 27, + "AVX512_CD": 28, + "AVX512_BW": 30, + "AVX512_VL": 31, + }, + (7, 0, "ECX"): { + "AVX512_VBMI": 1, + "AVX512_VBMI2": 6, + "AVX512_VNNI": 11, + "AVX512_BITALG": 12, + "AVX512_VPOPCNTDQ": 14, + }, + (7, 0, "EDX"): { + "AVX512_4VNNIW": 2, + "AVX512_4FMAPS": 3, + "AVX512_VP2INTERSECT": 8, + }, + # See https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=1:_Extended_Features. + (7, 1, "EAX"): { + "AVX_VNNI": 4, + "AVX_IFMA": 23, + }, + (7, 1, "EDX"): { + "AVX_VNNI_INT8": 4, + "AVX_NE_CONVERT": 5, + "AVX_VNNI_INT16": 10, + }, +} + + +def get_constant_name( + leaf: Leaf, subleaf: SubLeaf, registry: Registry, name: Feature +) -> str: + node = f"L{leaf}S{subleaf}" if subleaf else f"L{leaf}" + return f"_Py_CPUID_MASK_{registry}_{node}_{name}" + + +_NAME_MAXSIZE: Final[int] = util.next_block( + max( + len(get_constant_name(*family, name)) + for family, values in CPUID_FEATURES.items() + for name in values + ) +) + + +def make_cpuid_features_constants() -> str: + """Used by :file:`Include/internal/pycore_cpuinfo_cpuid_features.h`.""" + writer = util.CWriter() + writer.comment("Constants for CPUID features", style=DOXYGEN_STYLE) + for family, values in CPUID_FEATURES.items(): + leaf, subleaf, registry = family + writer.comment(f"CPUID (LEAF={leaf}, SUBLEAF={subleaf}) [{registry}]") + for feature_name, bit in values.items(): + if not feature_name: + raise ValueError(f"invalid entry for {family}") + if not 0 <= bit < 32: + raise ValueError(f"invalid bit value for {feature_name!r}") + key = get_constant_name(leaf, subleaf, registry, feature_name) + writer.write(util.make_constant(key, bit, _NAME_MAXSIZE)) + writer.write_blankline() + return writer.build() diff --git a/Tools/cpuinfo/libcpuinfo/util.py b/Tools/cpuinfo/libcpuinfo/util.py new file mode 100644 index 00000000000000..9d478ca686f65b --- /dev/null +++ b/Tools/cpuinfo/libcpuinfo/util.py @@ -0,0 +1,112 @@ +from __future__ import annotations + +__all__ = [ + "next_block", "make_constant", + "Style", "C99_STYLE", "C11_STYLE", "DOXYGEN_STYLE", + "CWriter", +] # fmt: skip + +import contextlib +import enum +from io import StringIO +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from collections.abc import Iterator + from typing import Any, Final, Literal + + +def next_block(w: int) -> int: + """Compute the smallest multiple of 4 strictly larger than *w*.""" + return ((w + 3) & ~0x03) if (w % 4) else (w + 4) + + +_MASKSIZE: Final[int] = next_block(len("0x00000000")) + + +def make_constant(key: str, bit: int, name_maxsize: int) -> str: + assert bit <= 32, f"{key}: mask does not on an uint32_t" + member_name = key.ljust(name_maxsize) + member_mask = format(1 << bit, "008x") + member_mask = f"0x{member_mask}".ljust(_MASKSIZE) + return f"#define {member_name}{member_mask}// bit = {bit}" + + +class Style(enum.IntEnum): + C99 = enum.auto() + C11 = enum.auto() + DOXYGEN = enum.auto() + + +C99_STYLE: Final[Literal[Style.C99]] = Style.C99 +C11_STYLE: Final[Literal[Style.C11]] = Style.C11 +DOXYGEN_STYLE: Final[Literal[Style.DOXYGEN]] = Style.DOXYGEN + +_COMMENT_INLINE_STYLE: Final[dict[Style, tuple[str, str, str]]] = { + C99_STYLE: ("// ", "", ""), + C11_STYLE: ("/* ", " */", ""), + DOXYGEN_STYLE: ("/** ", " */", ""), +} + +_COMMENT_BLOCK_STYLE: Final[dict[Style, tuple[str, str, str]]] = { + C99_STYLE: ("// ", "", ""), + C11_STYLE: ("/*", " */", " * "), + DOXYGEN_STYLE: ("/**", " */", " * "), +} + + +class CWriter: + def __init__(self, *, indentsize: int = 4) -> None: + self._stream = StringIO() + self._indent = " " * indentsize + self._prefix = "" + + def comment( + self, text: str, *, level: int = 0, style: Style = C11_STYLE + ) -> None: + """Add a C comment, possibly using doxygen style.""" + if len(text) < 72 and "\n" not in text: + prolog, epilog, _ = _COMMENT_INLINE_STYLE[style] + self.write(prolog, text, epilog, sep="", level=level) + else: + prolog, epilog, prefix = _COMMENT_BLOCK_STYLE[style] + self.write(prolog, level=level) + with self.prefixed(prefix): + for line in text.splitlines(): + self.write(line, level=level) + self.write(epilog, level=level) + + @contextlib.contextmanager + def prefixed(self, prefix: str) -> Iterator[None]: + old_prefix = self._prefix + self._prefix = prefix + try: + yield + finally: + self._prefix = old_prefix + + def _prefix_at(self, level: int) -> str: + return "".join((self._indent * level, self._prefix)) + + def write( + self, *args: Any, sep: str = " ", end: str = "\n", level: int = 0 + ) -> None: + if prefix := self._prefix_at(level): + self._write(prefix, sep="", end="") + self._write(*args, sep=sep, end=end) + + def write_blankline(self) -> None: + self._write() + + def _write(self, *args: Any, sep: str = " ", end: str = "\n") -> None: + print(*args, sep=sep, end=end, file=self._stream) + + def build(self) -> str: + # inject directives to temporarily disable external C formatters + return "\n".join( + ( + "// clang-format off", + self._stream.getvalue().rstrip(), + "// clang-format on", + ) + ) diff --git a/Tools/cpuinfo/mypy.ini b/Tools/cpuinfo/mypy.ini new file mode 100644 index 00000000000000..914ca082b72189 --- /dev/null +++ b/Tools/cpuinfo/mypy.ini @@ -0,0 +1,9 @@ +[mypy] +files = Tools/cpuinfo/ +pretty = True + +python_version = 3.12 +strict = True +extra_checks = True +enable_error_code = ignore-without-code,redundant-expr,truthy-bool +warn_unreachable = True diff --git a/configure b/configure index 75ae1699a8e451..a033cb515286da 100755 --- a/configure +++ b/configure @@ -32538,6 +32538,1633 @@ then : fi + + +# Detection of supported SIMD instruction sets for CPython. Since +# we do not necessarily know which instruction sets will be used, +# we disable SIMD support on some older Android platforms. +# +# See _Py_cpuid_features in pycore_cpuinfo.h for how to order fields +# and where to put blank lines to separate processor generations for +# AVX-512 instructions. +# +# Although AVX support is not guaranteed on Android [1], this is safe +# because we do a runtime CPUID check. +# +# [1]: https://developer.android.com/ndk/guides/abis#86-64 +if test "$ac_sys_system" != "Linux-android" -a "$ac_sys_system" != "WASI" || \ + { test -n "$ANDROID_API_LEVEL" && test "$ANDROID_API_LEVEL" -ge 28; } +then + # SSE + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse" >&5 +printf %s "checking whether C compiler accepts -msse... " >&6; } +if test ${ax_cv_check_cflags__Werror__msse+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -msse" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__msse=yes +else case e in #( + e) ax_cv_check_cflags__Werror__msse=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__msse" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__msse" >&6; } +if test "x$ax_cv_check_cflags__Werror__msse" = xyes +then : + ac_cv_can_compile_simd_sse=yes +else case e in #( + e) ac_cv_can_compile_simd_sse=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_sse" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse2" >&5 +printf %s "checking whether C compiler accepts -msse2... " >&6; } +if test ${ax_cv_check_cflags__Werror__msse2+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -msse2" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__msse2=yes +else case e in #( + e) ax_cv_check_cflags__Werror__msse2=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__msse2" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__msse2" >&6; } +if test "x$ax_cv_check_cflags__Werror__msse2" = xyes +then : + ac_cv_can_compile_simd_sse2=yes +else case e in #( + e) ac_cv_can_compile_simd_sse2=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_sse2" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse3" >&5 +printf %s "checking whether C compiler accepts -msse3... " >&6; } +if test ${ax_cv_check_cflags__Werror__msse3+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -msse3" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__msse3=yes +else case e in #( + e) ax_cv_check_cflags__Werror__msse3=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__msse3" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__msse3" >&6; } +if test "x$ax_cv_check_cflags__Werror__msse3" = xyes +then : + ac_cv_can_compile_simd_sse3=yes +else case e in #( + e) ac_cv_can_compile_simd_sse3=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_sse3" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mssse3" >&5 +printf %s "checking whether C compiler accepts -mssse3... " >&6; } +if test ${ax_cv_check_cflags__Werror__mssse3+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mssse3" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mssse3=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mssse3=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mssse3" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mssse3" >&6; } +if test "x$ax_cv_check_cflags__Werror__mssse3" = xyes +then : + ac_cv_can_compile_simd_ssse3=yes +else case e in #( + e) ac_cv_can_compile_simd_ssse3=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_ssse3" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse4.1" >&5 +printf %s "checking whether C compiler accepts -msse4.1... " >&6; } +if test ${ax_cv_check_cflags__Werror__msse4_1+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -msse4.1" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__msse4_1=yes +else case e in #( + e) ax_cv_check_cflags__Werror__msse4_1=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__msse4_1" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__msse4_1" >&6; } +if test "x$ax_cv_check_cflags__Werror__msse4_1" = xyes +then : + ac_cv_can_compile_simd_sse4_1=yes +else case e in #( + e) ac_cv_can_compile_simd_sse4_1=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_sse4_1" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse4.2" >&5 +printf %s "checking whether C compiler accepts -msse4.2... " >&6; } +if test ${ax_cv_check_cflags__Werror__msse4_2+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -msse4.2" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__msse4_2=yes +else case e in #( + e) ax_cv_check_cflags__Werror__msse4_2=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__msse4_2" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__msse4_2" >&6; } +if test "x$ax_cv_check_cflags__Werror__msse4_2" = xyes +then : + ac_cv_can_compile_simd_sse4_2=yes +else case e in #( + e) ac_cv_can_compile_simd_sse4_2=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_sse4_2" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + # AVX + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx" >&5 +printf %s "checking whether C compiler accepts -mavx... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavx+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavx" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavx=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavx=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx" = xyes +then : + ac_cv_can_compile_simd_avx=yes +else case e in #( + e) ac_cv_can_compile_simd_avx=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxifma" >&5 +printf %s "checking whether C compiler accepts -mavxifma... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavxifma+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavxifma" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavxifma=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavxifma=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavxifma" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavxifma" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavxifma" = xyes +then : + ac_cv_can_compile_simd_avx_ifma=yes +else case e in #( + e) ac_cv_can_compile_simd_avx_ifma=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx_ifma" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxneconvert" >&5 +printf %s "checking whether C compiler accepts -mavxneconvert... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavxneconvert+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavxneconvert" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavxneconvert=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavxneconvert=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavxneconvert" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavxneconvert" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavxneconvert" = xyes +then : + ac_cv_can_compile_simd_avx_ne_convert=yes +else case e in #( + e) ac_cv_can_compile_simd_avx_ne_convert=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx_ne_convert" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxvnni" >&5 +printf %s "checking whether C compiler accepts -mavxvnni... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavxvnni+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavxvnni" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavxvnni=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavxvnni=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavxvnni" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavxvnni" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavxvnni" = xyes +then : + ac_cv_can_compile_simd_avx_vnni=yes +else case e in #( + e) ac_cv_can_compile_simd_avx_vnni=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx_vnni" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxvnniint8" >&5 +printf %s "checking whether C compiler accepts -mavxvnniint8... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavxvnniint8+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavxvnniint8" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavxvnniint8=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavxvnniint8=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavxvnniint8" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavxvnniint8" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavxvnniint8" = xyes +then : + ac_cv_can_compile_simd_avx_vnni_int8=yes +else case e in #( + e) ac_cv_can_compile_simd_avx_vnni_int8=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx_vnni_int8" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavxvnniint16" >&5 +printf %s "checking whether C compiler accepts -mavxvnniint16... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavxvnniint16+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavxvnniint16" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavxvnniint16=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavxvnniint16=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavxvnniint16" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavxvnniint16" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavxvnniint16" = xyes +then : + ac_cv_can_compile_simd_avx_vnni_int16=yes +else case e in #( + e) ac_cv_can_compile_simd_avx_vnni_int16=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx_vnni_int16" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + # AVX-2 + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx2" >&5 +printf %s "checking whether C compiler accepts -mavx2... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavx2+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavx2" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavx2=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavx2=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx2" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx2" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx2" = xyes +then : + ac_cv_can_compile_simd_avx2=yes +else case e in #( + e) ac_cv_can_compile_simd_avx2=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx2" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + # AVX-512 + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512f" >&5 +printf %s "checking whether C compiler accepts -mavx512f... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavx512f+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavx512f" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavx512f=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavx512f=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512f" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512f" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512f" = xyes +then : + ac_cv_can_compile_simd_avx512_f=yes +else case e in #( + e) ac_cv_can_compile_simd_avx512_f=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx512_f" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512cd" >&5 +printf %s "checking whether C compiler accepts -mavx512cd... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavx512cd+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavx512cd" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavx512cd=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavx512cd=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512cd" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512cd" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512cd" = xyes +then : + ac_cv_can_compile_simd_avx512_cd=yes +else case e in #( + e) ac_cv_can_compile_simd_avx512_cd=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx512_cd" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512er" >&5 +printf %s "checking whether C compiler accepts -mavx512er... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavx512er+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavx512er" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavx512er=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavx512er=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512er" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512er" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512er" = xyes +then : + ac_cv_can_compile_simd_avx512_er=yes +else case e in #( + e) ac_cv_can_compile_simd_avx512_er=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx512_er" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512pf" >&5 +printf %s "checking whether C compiler accepts -mavx512pf... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavx512pf+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavx512pf" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavx512pf=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavx512pf=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512pf" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512pf" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512pf" = xyes +then : + ac_cv_can_compile_simd_avx512_pf=yes +else case e in #( + e) ac_cv_can_compile_simd_avx512_pf=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx512_pf" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx5124fmaps" >&5 +printf %s "checking whether C compiler accepts -mavx5124fmaps... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavx5124fmaps+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavx5124fmaps" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavx5124fmaps=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavx5124fmaps=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx5124fmaps" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx5124fmaps" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx5124fmaps" = xyes +then : + ac_cv_can_compile_simd_avx512_4fmaps=yes +else case e in #( + e) ac_cv_can_compile_simd_avx512_4fmaps=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx512_4fmaps" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx5124vnniw" >&5 +printf %s "checking whether C compiler accepts -mavx5124vnniw... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavx5124vnniw+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavx5124vnniw" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavx5124vnniw=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavx5124vnniw=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx5124vnniw" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx5124vnniw" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx5124vnniw" = xyes +then : + ac_cv_can_compile_simd_avx512_4vnniw=yes +else case e in #( + e) ac_cv_can_compile_simd_avx512_4vnniw=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx512_4vnniw" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vpopcntdq" >&5 +printf %s "checking whether C compiler accepts -mavx512vpopcntdq... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavx512vpopcntdq+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavx512vpopcntdq" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavx512vpopcntdq=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavx512vpopcntdq=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512vpopcntdq" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512vpopcntdq" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512vpopcntdq" = xyes +then : + ac_cv_can_compile_simd_avx512_vpopcntdq=yes +else case e in #( + e) ac_cv_can_compile_simd_avx512_vpopcntdq=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx512_vpopcntdq" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vl" >&5 +printf %s "checking whether C compiler accepts -mavx512vl... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavx512vl+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavx512vl" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavx512vl=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavx512vl=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512vl" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512vl" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512vl" = xyes +then : + ac_cv_can_compile_simd_avx512_vl=yes +else case e in #( + e) ac_cv_can_compile_simd_avx512_vl=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx512_vl" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512dq" >&5 +printf %s "checking whether C compiler accepts -mavx512dq... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavx512dq+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavx512dq" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavx512dq=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavx512dq=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512dq" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512dq" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512dq" = xyes +then : + ac_cv_can_compile_simd_avx512_dq=yes +else case e in #( + e) ac_cv_can_compile_simd_avx512_dq=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx512_dq" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512bw" >&5 +printf %s "checking whether C compiler accepts -mavx512bw... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavx512bw+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavx512bw" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavx512bw=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavx512bw=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512bw" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512bw" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512bw" = xyes +then : + ac_cv_can_compile_simd_avx512_bw=yes +else case e in #( + e) ac_cv_can_compile_simd_avx512_bw=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx512_bw" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512ifma" >&5 +printf %s "checking whether C compiler accepts -mavx512ifma... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavx512ifma+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavx512ifma" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavx512ifma=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavx512ifma=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512ifma" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512ifma" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512ifma" = xyes +then : + ac_cv_can_compile_simd_avx512_ifma=yes +else case e in #( + e) ac_cv_can_compile_simd_avx512_ifma=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx512_ifma" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vbmi" >&5 +printf %s "checking whether C compiler accepts -mavx512vbmi... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavx512vbmi+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavx512vbmi" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavx512vbmi=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavx512vbmi=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512vbmi" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512vbmi" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512vbmi" = xyes +then : + ac_cv_can_compile_simd_avx512_vbmi=yes +else case e in #( + e) ac_cv_can_compile_simd_avx512_vbmi=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx512_vbmi" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vnni" >&5 +printf %s "checking whether C compiler accepts -mavx512vnni... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavx512vnni+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavx512vnni" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavx512vnni=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavx512vnni=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512vnni" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512vnni" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512vnni" = xyes +then : + ac_cv_can_compile_simd_avx512_vnni=yes +else case e in #( + e) ac_cv_can_compile_simd_avx512_vnni=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx512_vnni" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vbmi2" >&5 +printf %s "checking whether C compiler accepts -mavx512vbmi2... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavx512vbmi2+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavx512vbmi2" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavx512vbmi2=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavx512vbmi2=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512vbmi2" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512vbmi2" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512vbmi2" = xyes +then : + ac_cv_can_compile_simd_avx512_vbmi2=yes +else case e in #( + e) ac_cv_can_compile_simd_avx512_vbmi2=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx512_vbmi2" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512bitalg" >&5 +printf %s "checking whether C compiler accepts -mavx512bitalg... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavx512bitalg+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavx512bitalg" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavx512bitalg=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavx512bitalg=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512bitalg" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512bitalg" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512bitalg" = xyes +then : + ac_cv_can_compile_simd_avx512_bitalg=yes +else case e in #( + e) ac_cv_can_compile_simd_avx512_bitalg=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx512_bitalg" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + + # + + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx512vp2intersect" >&5 +printf %s "checking whether C compiler accepts -mavx512vp2intersect... " >&6; } +if test ${ax_cv_check_cflags__Werror__mavx512vp2intersect+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS -Werror -mavx512vp2intersect" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + ax_cv_check_cflags__Werror__mavx512vp2intersect=yes +else case e in #( + e) ax_cv_check_cflags__Werror__mavx512vp2intersect=no ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ax_check_save_flags ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx512vp2intersect" >&5 +printf "%s\n" "$ax_cv_check_cflags__Werror__mavx512vp2intersect" >&6; } +if test "x$ax_cv_check_cflags__Werror__mavx512vp2intersect" = xyes +then : + ac_cv_can_compile_simd_avx512_vp2intersect=yes +else case e in #( + e) ac_cv_can_compile_simd_avx512_vp2intersect=no ;; +esac +fi + + if test "x$ac_cv_can_compile_simd_avx512_vp2intersect" = xyes +then : + + +printf "%s\n" "#define _Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS 1" >>confdefs.h + + +fi + + + +fi + ############################################################################### # HACL* compilation and linking configuration (contact: @picnixz) # @@ -32588,48 +34215,19 @@ else use_hacl_universal2_impl=no fi -# The SIMD files use aligned_alloc, which is not available on older versions of -# Android. -# The *mmintrin.h headers are x86-family-specific, so can't be used on WASI. +# The HACL* SIMD-128 files use aligned_alloc, which is not available +# on older versions of Android. In addition, since the *mmintrin.h +# headers are x86-family-specific, they cannot be used on WASI. if test "$ac_sys_system" != "Linux-android" -a "$ac_sys_system" != "WASI" || \ { test -n "$ANDROID_API_LEVEL" && test "$ANDROID_API_LEVEL" -ge 28; } then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse -msse2 -msse3 -msse4.1 -msse4.2" >&5 -printf %s "checking whether C compiler accepts -msse -msse2 -msse3 -msse4.1 -msse4.2... " >&6; } -if test ${ax_cv_check_cflags__Werror__msse__msse2__msse3__msse4_1__msse4_2+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) - ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -Werror -msse -msse2 -msse3 -msse4.1 -msse4.2" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main (void) -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO" -then : - ax_cv_check_cflags__Werror__msse__msse2__msse3__msse4_1__msse4_2=yes -else case e in #( - e) ax_cv_check_cflags__Werror__msse__msse2__msse3__msse4_1__msse4_2=no ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__msse__msse2__msse3__msse4_1__msse4_2" >&5 -printf "%s\n" "$ax_cv_check_cflags__Werror__msse__msse2__msse3__msse4_1__msse4_2" >&6; } -if test "x$ax_cv_check_cflags__Werror__msse__msse2__msse3__msse4_1__msse4_2" = xyes -then : - + # SIMD-128 + if test "$ac_cv_can_compile_simd_sse" = "yes" \ + -a "$ac_cv_can_compile_simd_sse2" = "yes" \ + -a "$ac_cv_can_compile_simd_sse3" = "yes" \ + -a "$ac_cv_can_compile_simd_sse4_1" = "yes" \ + -a "$ac_cv_can_compile_simd_sse4_2" = "yes" + then LIBHACL_SIMD128_FLAGS="-msse -msse2 -msse3 -msse4.1 -msse4.2" @@ -32650,65 +34248,22 @@ printf "%s\n" "universal2" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: standard" >&5 printf "%s\n" "standard" >&6; } fi - - -else case e in #( - e) : ;; -esac -fi - + fi fi -# The SIMD files use aligned_alloc, which is not available on older versions of -# Android. -# The *mmintrin.h headers are x86-family-specific, so can't be used on WASI. -# -# Although AVX support is not guaranteed on Android -# (https://developer.android.com/ndk/guides/abis#86-64), this is safe because we do a -# runtime CPUID check. +# The HACL* SIMD-256 files use aligned_alloc, which is not available +# on older versions of Android. In addition, since the *mmintrin.h +# headers are x86-family-specific, they cannot be used on WASI. if test "$ac_sys_system" != "Linux-android" -a "$ac_sys_system" != "WASI" || \ { test -n "$ANDROID_API_LEVEL" && test "$ANDROID_API_LEVEL" -ge 28; } then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx2" >&5 -printf %s "checking whether C compiler accepts -mavx2... " >&6; } -if test ${ax_cv_check_cflags__Werror__mavx2+y} -then : - printf %s "(cached) " >&6 -else case e in #( - e) - ax_check_save_flags=$CFLAGS - CFLAGS="$CFLAGS -Werror -mavx2" - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -int -main (void) -{ - - ; - return 0; -} -_ACEOF -if ac_fn_c_try_compile "$LINENO" -then : - ax_cv_check_cflags__Werror__mavx2=yes -else case e in #( - e) ax_cv_check_cflags__Werror__mavx2=no ;; -esac -fi -rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext - CFLAGS=$ax_check_save_flags ;; -esac -fi -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_check_cflags__Werror__mavx2" >&5 -printf "%s\n" "$ax_cv_check_cflags__Werror__mavx2" >&6; } -if test "x$ax_cv_check_cflags__Werror__mavx2" = xyes -then : - + if test "$ac_cv_can_compile_simd_avx2" = "yes" + then LIBHACL_SIMD256_FLAGS="-mavx2" + printf "%s\n" "#define _Py_HACL_CAN_COMPILE_VEC256 1" >>confdefs.h @@ -32727,12 +34282,7 @@ printf "%s\n" "universal2" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: standard" >&5 printf "%s\n" "standard" >&6; } fi - -else case e in #( - e) : ;; -esac -fi - + fi fi diff --git a/configure.ac b/configure.ac index 4da1ba78b54b0d..00d57c8a0ae20b 100644 --- a/configure.ac +++ b/configure.ac @@ -7965,6 +7965,99 @@ PY_STDLIB_MOD_SIMPLE([_codecs_tw]) PY_STDLIB_MOD_SIMPLE([_multibytecodec]) PY_STDLIB_MOD_SIMPLE([unicodedata]) +dnl PY_SIMD_DETECT(INSTRUCTION-SET-NAME, COMPILER-FLAG, [NORMALIZED-NAME]) +dnl ---------------------------------------------------------------------- +dnl +dnl Check if the compiler supports a given COMPILER-FLAG and define: +dnl +dnl ac_cv_can_compile_simd_ = yes +dnl #define _Py_CAN_COMPILE_SIMD__INSTRUCTIONS 1 +dnl +dnl or +dnl +dnl ac_cv_can_compile_simd_ = no +dnl #undef _Py_CAN_COMPILE_SIMD__INSTRUCTIONS +dnl +dnl where and are the lowercased and uppercased versions +dnl of NORMALIZED-NAME; by default, the latter is INSTRUCTION-SET-NAME. +dnl +AC_DEFUN([PY_SIMD_DETECT], [ + AS_VAR_PUSHDEF([py_var], [m4_ifblank([$3], + [[ac_cv_can_compile_simd_]m4_tolower([$1])], + [[ac_cv_can_compile_simd_]m4_tolower([$3])])]) + AS_VAR_PUSHDEF([py_define], [m4_ifblank([$3], + [[_Py_CAN_COMPILE_SIMD_]m4_toupper([$1])[_INSTRUCTIONS]], + [[_Py_CAN_COMPILE_SIMD_]m4_toupper([$3])[_INSTRUCTIONS]])]) + AX_CHECK_COMPILE_FLAG([$2], + [AS_VAR_SET([py_var], [yes])], + [AS_VAR_SET([py_var], [no])], + [-Werror]) + AS_VAR_IF([py_var], [yes], [ + AC_DEFINE([py_define], [1], [Define if '$2' is a valid compiler flag.]) + ]) + AS_VAR_POPDEF([py_var]) + AS_VAR_POPDEF([py_define]) +]) + +# Detection of supported SIMD instruction sets for CPython. Since +# we do not necessarily know which instruction sets will be used, +# we disable SIMD support on some older Android platforms. +# +# See _Py_cpuid_features in pycore_cpuinfo.h for how to order fields +# and where to put blank lines to separate processor generations for +# AVX-512 instructions. +# +# Although AVX support is not guaranteed on Android [1], this is safe +# because we do a runtime CPUID check. +# +# [1]: https://developer.android.com/ndk/guides/abis#86-64 +if test "$ac_sys_system" != "Linux-android" -a "$ac_sys_system" != "WASI" || \ + { test -n "$ANDROID_API_LEVEL" && test "$ANDROID_API_LEVEL" -ge 28; } +then + # SSE + PY_SIMD_DETECT([SSE], [-msse]) + PY_SIMD_DETECT([SSE2], [-msse2]) + PY_SIMD_DETECT([SSE3], [-msse3]) + PY_SIMD_DETECT([SSSE3], [-mssse3]) + PY_SIMD_DETECT([SSE4.1], [-msse4.1], [SSE4_1]) + PY_SIMD_DETECT([SSE4.2], [-msse4.2], [SSE4_2]) + # AVX + PY_SIMD_DETECT([AVX], [-mavx]) + PY_SIMD_DETECT([AVX_IFMA], [-mavxifma]) + PY_SIMD_DETECT([AVX_NE_CONVERT], [-mavxneconvert]) + # + PY_SIMD_DETECT([AVX_VNNI], [-mavxvnni]) + PY_SIMD_DETECT([AVX_VNNI_INT8], [-mavxvnniint8]) + PY_SIMD_DETECT([AVX_VNNI_INT16], [-mavxvnniint16]) + # AVX-2 + PY_SIMD_DETECT([AVX2], [-mavx2]) + # AVX-512 + PY_SIMD_DETECT([AVX512_F], [-mavx512f]) + PY_SIMD_DETECT([AVX512_CD], [-mavx512cd]) + # + PY_SIMD_DETECT([AVX512_ER], [-mavx512er]) + PY_SIMD_DETECT([AVX512_PF], [-mavx512pf]) + # + PY_SIMD_DETECT([AVX512_4FMAPS], [-mavx5124fmaps]) + PY_SIMD_DETECT([AVX512_4VNNIW], [-mavx5124vnniw]) + # + PY_SIMD_DETECT([AVX512_VPOPCNTDQ], [-mavx512vpopcntdq]) + # + PY_SIMD_DETECT([AVX512_VL], [-mavx512vl]) + PY_SIMD_DETECT([AVX512_DQ], [-mavx512dq]) + PY_SIMD_DETECT([AVX512_BW], [-mavx512bw]) + # + PY_SIMD_DETECT([AVX512_IFMA], [-mavx512ifma]) + PY_SIMD_DETECT([AVX512_VBMI], [-mavx512vbmi]) + # + PY_SIMD_DETECT([AVX512_VNNI], [-mavx512vnni]) + # + PY_SIMD_DETECT([AVX512_VBMI2], [-mavx512vbmi2]) + PY_SIMD_DETECT([AVX512_BITALG], [-mavx512bitalg]) + # + PY_SIMD_DETECT([AVX512_VP2INTERSECT], [-mavx512vp2intersect]) +fi + ############################################################################### # HACL* compilation and linking configuration (contact: @picnixz) # @@ -8016,14 +8109,19 @@ else use_hacl_universal2_impl=no fi -# The SIMD files use aligned_alloc, which is not available on older versions of -# Android. -# The *mmintrin.h headers are x86-family-specific, so can't be used on WASI. +# The HACL* SIMD-128 files use aligned_alloc, which is not available +# on older versions of Android. In addition, since the *mmintrin.h +# headers are x86-family-specific, they cannot be used on WASI. if test "$ac_sys_system" != "Linux-android" -a "$ac_sys_system" != "WASI" || \ { test -n "$ANDROID_API_LEVEL" && test "$ANDROID_API_LEVEL" -ge 28; } then - dnl This can be extended here to detect e.g. Power8, which HACL* should also support. - AX_CHECK_COMPILE_FLAG([-msse -msse2 -msse3 -msse4.1 -msse4.2],[ + # SIMD-128 + if test "$ac_cv_can_compile_simd_sse" = "yes" \ + -a "$ac_cv_can_compile_simd_sse2" = "yes" \ + -a "$ac_cv_can_compile_simd_sse3" = "yes" \ + -a "$ac_cv_can_compile_simd_sse4_1" = "yes" \ + -a "$ac_cv_can_compile_simd_sse4_2" = "yes" + then [LIBHACL_SIMD128_FLAGS="-msse -msse2 -msse3 -msse4.1 -msse4.2"] AC_DEFINE([_Py_HACL_CAN_COMPILE_VEC128], [1], [ @@ -8040,24 +8138,21 @@ then [LIBHACL_BLAKE2_SIMD128_OBJS="Modules/_hacl/Hacl_Hash_Blake2s_Simd128.o"] AC_MSG_RESULT([standard]) fi - - ], [], [-Werror]) + fi fi AC_SUBST([LIBHACL_SIMD128_FLAGS]) AC_SUBST([LIBHACL_BLAKE2_SIMD128_OBJS]) -# The SIMD files use aligned_alloc, which is not available on older versions of -# Android. -# The *mmintrin.h headers are x86-family-specific, so can't be used on WASI. -# -# Although AVX support is not guaranteed on Android -# (https://developer.android.com/ndk/guides/abis#86-64), this is safe because we do a -# runtime CPUID check. +# The HACL* SIMD-256 files use aligned_alloc, which is not available +# on older versions of Android. In addition, since the *mmintrin.h +# headers are x86-family-specific, they cannot be used on WASI. if test "$ac_sys_system" != "Linux-android" -a "$ac_sys_system" != "WASI" || \ { test -n "$ANDROID_API_LEVEL" && test "$ANDROID_API_LEVEL" -ge 28; } then - AX_CHECK_COMPILE_FLAG([-mavx2],[ + if test "$ac_cv_can_compile_simd_avx2" = "yes" + then [LIBHACL_SIMD256_FLAGS="-mavx2"] + AC_DEFINE([_Py_HACL_CAN_COMPILE_VEC256], [1], [ HACL* library can compile SIMD256 implementations]) @@ -8073,7 +8168,7 @@ then [LIBHACL_BLAKE2_SIMD256_OBJS="Modules/_hacl/Hacl_Hash_Blake2b_Simd256.o"] AC_MSG_RESULT([standard]) fi - ], [], [-Werror]) + fi fi AC_SUBST([LIBHACL_SIMD256_FLAGS]) AC_SUBST([LIBHACL_BLAKE2_SIMD256_OBJS]) diff --git a/pyconfig.h.in b/pyconfig.h.in index d7c496fccc682c..478855c7022c3a 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -2017,6 +2017,93 @@ /* Maximum length in bytes of a thread name */ #undef _PYTHREAD_NAME_MAXLEN +/* Define if '-mavx2' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS + +/* Define if '-mavx5124fmaps' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_4FMAPS_INSTRUCTIONS + +/* Define if '-mavx5124vnniw' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_4VNNIW_INSTRUCTIONS + +/* Define if '-mavx512bitalg' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_BITALG_INSTRUCTIONS + +/* Define if '-mavx512bw' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_BW_INSTRUCTIONS + +/* Define if '-mavx512cd' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_CD_INSTRUCTIONS + +/* Define if '-mavx512dq' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_DQ_INSTRUCTIONS + +/* Define if '-mavx512er' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_ER_INSTRUCTIONS + +/* Define if '-mavx512f' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_F_INSTRUCTIONS + +/* Define if '-mavx512ifma' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_IFMA_INSTRUCTIONS + +/* Define if '-mavx512pf' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_PF_INSTRUCTIONS + +/* Define if '-mavx512vbmi2' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_VBMI2_INSTRUCTIONS + +/* Define if '-mavx512vbmi' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS + +/* Define if '-mavx512vl' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_VL_INSTRUCTIONS + +/* Define if '-mavx512vnni' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_VNNI_INSTRUCTIONS + +/* Define if '-mavx512vp2intersect' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_VP2INTERSECT_INSTRUCTIONS + +/* Define if '-mavx512vpopcntdq' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX512_VPOPCNTDQ_INSTRUCTIONS + +/* Define if '-mavxifma' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX_IFMA_INSTRUCTIONS + +/* Define if '-mavx' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX_INSTRUCTIONS + +/* Define if '-mavxneconvert' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX_NE_CONVERT_INSTRUCTIONS + +/* Define if '-mavxvnni' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX_VNNI_INSTRUCTIONS + +/* Define if '-mavxvnniint16' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX_VNNI_INT16_INSTRUCTIONS + +/* Define if '-mavxvnniint8' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_AVX_VNNI_INT8_INSTRUCTIONS + +/* Define if '-msse2' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS + +/* Define if '-msse3' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS + +/* Define if '-msse4.1' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS + +/* Define if '-msse4.2' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS + +/* Define if '-msse' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_SSE_INSTRUCTIONS + +/* Define if '-mssse3' is a valid compiler flag. */ +#undef _Py_CAN_COMPILE_SIMD_SSSE3_INSTRUCTIONS + /* Defined if _Complex C type can be used with libffi. */ #undef _Py_FFI_SUPPORT_C_COMPLEX pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy