diff --git a/ntl-loadtime-cpu.patch b/ntl-loadtime-cpu.patch index 3bb4d9b..78be8ea 100644 --- a/ntl-loadtime-cpu.patch +++ b/ntl-loadtime-cpu.patch @@ -1,5 +1,5 @@ ---- doc/config.txt.orig 2021-06-20 15:05:49.000000000 -0600 -+++ doc/config.txt 2021-06-23 19:59:29.902142132 -0600 +--- doc/config.txt.orig 2021-06-23 14:39:47.000000000 -0600 ++++ doc/config.txt 2023-08-10 15:35:41.240822372 -0600 @@ -420,6 +420,7 @@ NTL_AVOID_BRANCHING=off NTL_GF2X_NOINLINE=off NTL_GF2X_ALTCODE=off @@ -19,8 +19,8 @@ ########## More GMP Options: ---- include/NTL/config.h.orig 2021-06-20 15:05:49.000000000 -0600 -+++ include/NTL/config.h 2021-06-23 19:59:29.903142133 -0600 +--- include/NTL/config.h.orig 2021-06-23 14:39:46.000000000 -0600 ++++ include/NTL/config.h 2023-08-10 15:35:41.241822358 -0600 @@ -549,6 +549,19 @@ to be defined. Of course, to unset a f #error "NTL_SAFE_VECTORS defined but not NTL_STD_CXX11 or NTL_STD_CXX14" #endif @@ -41,9 +41,9 @@ ---- include/NTL/ctools.h.orig 2021-06-20 15:05:49.000000000 -0600 -+++ include/NTL/ctools.h 2021-06-23 19:59:29.904142134 -0600 -@@ -518,6 +518,155 @@ char *_ntl_make_aligned(char *p, long al +--- include/NTL/ctools.h.orig 2021-06-23 14:39:46.000000000 -0600 ++++ include/NTL/ctools.h 2023-08-10 15:57:28.389567821 -0600 +@@ -518,6 +518,92 @@ char *_ntl_make_aligned(char *p, long al // this should be big enough to satisfy any SIMD instructions, // and it should also be as big as a cache line @@ -59,23 +59,6 @@ +#error Runtime CPU support is only available with GCC 4.6 or later. +#endif + -+#include -+#ifndef bit_SSSE3 -+#define bit_SSSE3 (1 << 9) -+#endif -+#ifndef bit_PCLMUL -+#define bit_PCLMUL (1 << 1) -+#endif -+#ifndef bit_AVX -+#define bit_AVX (1 << 28) -+#endif -+#ifndef bit_FMA -+#define bit_FMA (1 << 12) -+#endif -+#ifndef bit_AVX2 -+#define bit_AVX2 (1 << 5) -+#endif -+ +#define BASE_FUNC(type,name) static type name##_base +#define TARGET_FUNC(arch,suffix,type,name) \ + static type __attribute__((target (arch))) name##_##suffix @@ -87,22 +70,13 @@ +#define SSSE3_RESOLVER(st,type,name,params) \ + extern "C" { \ + static type (*resolve_##name(void)) params { \ -+ if (__builtin_expect(have_avx2, 0) < 0) { \ -+ unsigned int eax, ebx, ecx, edx; \ -+ if (__get_cpuid(7, &eax, &ebx, &ecx, &edx)) { \ -+ have_avx2 = ((ebx & bit_AVX2) != 0); \ -+ } else { \ -+ have_avx2 = 0; \ -+ } \ -+ } \ -+ if (__builtin_expect(have_ssse3, 0) < 0) { \ -+ unsigned int eax, ebx, ecx, edx; \ -+ if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) { \ -+ have_ssse3 = ((ecx & bit_SSSE3) != 0); \ -+ } else { \ -+ have_ssse3 = 0; \ -+ } \ -+ } \ ++ if (__builtin_expect(have_avx2, 0) < 0 || \ ++ __builtin_expect(have_ssse3, 0) < 0) \ ++ __builtin_cpu_init(); \ ++ if (__builtin_expect(have_avx2, 0) < 0) \ ++ have_avx2 = __builtin_cpu_supports("avx2"); \ ++ if (__builtin_expect(have_ssse3, 0) < 0) \ ++ have_ssse3 = __builtin_cpu_supports("ssse3"); \ + if (have_avx2) return &name##_avx2; \ + if (have_ssse3) return &name##_ssse3; \ + return &name##_base; \ @@ -112,18 +86,13 @@ +#define PCLMUL_RESOLVER(st,type,name,params) \ + extern "C" { \ + static type (*resolve_##name(void)) params { \ -+ if (__builtin_expect(have_pclmul, 0) < 0) { \ -+ unsigned int eax, ebx, ecx, edx; \ -+ if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) { \ -+ have_pclmul = ((ecx & bit_PCLMUL) != 0); \ -+ have_avx = ((ecx & bit_AVX) != 0); \ -+ have_fma = ((ecx & bit_FMA) != 0); \ -+ } else { \ -+ have_pclmul = 0; \ -+ have_avx = 0; \ -+ have_fma = 0; \ -+ } \ -+ } \ ++ if (__builtin_expect(have_pclmul, 0) < 0 || \ ++ __builtin_expect(have_avx, 0) < 0) \ ++ __builtin_cpu_init(); \ ++ if (__builtin_expect(have_pclmul, 0) < 0) \ ++ have_pclmul = __builtin_cpu_supports("pclmul"); \ ++ if (__builtin_expect(have_avx, 0) < 0) \ ++ have_avx = __builtin_cpu_supports("avx"); \ + if (have_avx) return &name##_avx; \ + if (have_pclmul) return &name##_pclmul; \ + return &name##_base; \ @@ -133,17 +102,9 @@ +#define AVX_RESOLVER(st,type,name,params) \ + extern "C" { \ + static type (*resolve_##name(void)) params { \ -+ if (__builtin_expect(have_pclmul, 0) < 0) { \ -+ unsigned int eax, ebx, ecx, edx; \ -+ if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) { \ -+ have_pclmul = ((ecx & bit_PCLMUL) != 0); \ -+ have_avx = ((ecx & bit_AVX) != 0); \ -+ have_fma = ((ecx & bit_FMA) != 0); \ -+ } else { \ -+ have_pclmul = 0; \ -+ have_avx = 0; \ -+ have_fma = 0; \ -+ } \ ++ if (__builtin_expect(have_avx, 0) < 0) { \ ++ __builtin_cpu_init(); \ ++ have_avx = __builtin_cpu_supports("avx"); \ + } \ + return have_avx ? &name##_avx : &name##_base; \ + } \ @@ -152,17 +113,9 @@ +#define FMA_RESOLVER(st,type,name,params) \ + extern "C" { \ + static type (*resolve_##name(void)) params { \ -+ if (__builtin_expect(have_pclmul, 0) < 0) { \ -+ unsigned int eax, ebx, ecx, edx; \ -+ if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) { \ -+ have_pclmul = ((ecx & bit_PCLMUL) != 0); \ -+ have_avx = ((ecx & bit_AVX) != 0); \ -+ have_fma = ((ecx & bit_FMA) != 0); \ -+ } else { \ -+ have_pclmul = 0; \ -+ have_avx = 0; \ -+ have_fma = 0; \ -+ } \ ++ if (__builtin_expect(have_fma, 0) < 0) { \ ++ __builtin_cpu_init(); \ ++ have_fma = __builtin_cpu_supports("fma"); \ + } \ + return have_fma ? &name##_fma : &name##_avx; \ + } \ @@ -172,24 +125,8 @@ + extern "C" { \ + static type (*resolve_##name(void)) params { \ + if (__builtin_expect(have_avx2, 0) < 0) { \ -+ unsigned int eax, ebx, ecx, edx; \ -+ if (__get_cpuid(7, &eax, &ebx, &ecx, &edx)) { \ -+ have_avx2 = ((ebx & bit_AVX2) != 0); \ -+ } else { \ -+ have_avx2 = 0; \ -+ } \ -+ } \ -+ if (__builtin_expect(have_pclmul, 0) < 0) { \ -+ unsigned int eax, ebx, ecx, edx; \ -+ if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) { \ -+ have_pclmul = ((ecx & bit_PCLMUL) != 0); \ -+ have_avx = ((ecx & bit_AVX) != 0); \ -+ have_fma = ((ecx & bit_FMA) != 0); \ -+ } else { \ -+ have_pclmul = 0; \ -+ have_avx = 0; \ -+ have_fma = 0; \ -+ } \ ++ __builtin_cpu_init(); \ ++ have_avx2 = __builtin_cpu_supports("avx2"); \ + } \ + return have_avx2 ? &name##_avx2 : &name##_fma; \ + } \ @@ -199,8 +136,8 @@ #ifdef NTL_HAVE_BUILTIN_CLZL ---- include/NTL/MatPrime.h.orig 2021-06-20 15:05:49.000000000 -0600 -+++ include/NTL/MatPrime.h 2021-06-23 19:59:29.904142134 -0600 +--- include/NTL/MatPrime.h.orig 2021-06-23 14:39:46.000000000 -0600 ++++ include/NTL/MatPrime.h 2023-08-10 15:35:41.241822358 -0600 @@ -20,7 +20,7 @@ NTL_OPEN_NNS @@ -210,8 +147,8 @@ #define NTL_MatPrime_NBITS (23) #else #define NTL_MatPrime_NBITS NTL_SP_NBITS ---- include/NTL/REPORT_ALL_FEATURES.h.orig 2021-06-20 15:05:49.000000000 -0600 -+++ include/NTL/REPORT_ALL_FEATURES.h 2021-06-23 19:59:29.905142135 -0600 +--- include/NTL/REPORT_ALL_FEATURES.h.orig 2021-06-23 14:39:47.000000000 -0600 ++++ include/NTL/REPORT_ALL_FEATURES.h 2023-08-10 15:35:41.241822358 -0600 @@ -63,3 +63,6 @@ std::cerr << "NTL_HAVE_KMA\n"; #endif @@ -219,8 +156,8 @@ +#ifdef NTL_LOADTIME_CPU + std::cerr << "NTL_LOADTIME_CPU\n"; +#endif ---- src/cfile.orig 2021-06-20 15:05:49.000000000 -0600 -+++ src/cfile 2021-06-23 19:59:29.906142136 -0600 +--- src/cfile.orig 2021-06-23 14:39:46.000000000 -0600 ++++ src/cfile 2023-08-10 15:35:41.241822358 -0600 @@ -449,6 +449,19 @@ to be defined. Of course, to unset a f #endif @@ -241,8 +178,8 @@ #if @{NTL_CRT_ALTCODE} ---- src/DispSettings.cpp.orig 2021-06-20 15:05:49.000000000 -0600 -+++ src/DispSettings.cpp 2021-06-23 19:59:29.906142136 -0600 +--- src/DispSettings.cpp.orig 2021-06-23 14:39:46.000000000 -0600 ++++ src/DispSettings.cpp 2023-08-10 15:35:41.242822344 -0600 @@ -192,6 +192,9 @@ cout << "Performance Options:\n"; cout << "NTL_RANDOM_AES256CTR\n"; #endif @@ -253,8 +190,8 @@ cout << "***************************/\n"; cout << "\n\n"; ---- src/DoConfig.orig 2021-06-20 15:05:49.000000000 -0600 -+++ src/DoConfig 2021-06-23 19:59:29.907142137 -0600 +--- src/DoConfig.orig 2021-06-23 14:39:46.000000000 -0600 ++++ src/DoConfig 2023-08-10 15:35:41.242822344 -0600 @@ -1,6 +1,7 @@ # This is a perl script, invoked from a shell @@ -286,8 +223,8 @@ # some special MakeVal values that are determined by SHARED ---- src/GF2EX.cpp.orig 2021-06-20 15:05:48.000000000 -0600 -+++ src/GF2EX.cpp 2021-06-23 19:59:29.908142138 -0600 +--- src/GF2EX.cpp.orig 2021-06-23 14:39:46.000000000 -0600 ++++ src/GF2EX.cpp 2023-08-10 15:35:41.242822344 -0600 @@ -801,7 +801,7 @@ void mul(GF2EX& c, const GF2EX& a, const if (GF2E::WordLength() <= 1) use_kron_mul = true; @@ -297,8 +234,8 @@ // With gf2x library and pclmul, KronMul is better in a larger range, but // it is very hard to characterize that range. The following is very // conservative. ---- src/GF2X1.cpp.orig 2021-06-20 15:05:48.000000000 -0600 -+++ src/GF2X1.cpp 2021-06-23 19:59:29.910142141 -0600 +--- src/GF2X1.cpp.orig 2021-06-23 14:39:46.000000000 -0600 ++++ src/GF2X1.cpp 2023-08-10 15:35:41.243822329 -0600 @@ -18,7 +18,7 @@ // simple scaling factor for some crossover points: // we use a lower crossover of the underlying multiplication @@ -308,8 +245,8 @@ #define XOVER_SCALE (1L) #else #define XOVER_SCALE (2L) ---- src/GF2X.cpp.orig 2021-06-20 15:05:48.000000000 -0600 -+++ src/GF2X.cpp 2021-06-23 19:59:29.911142142 -0600 +--- src/GF2X.cpp.orig 2021-06-23 14:39:46.000000000 -0600 ++++ src/GF2X.cpp 2023-08-10 15:35:41.243822329 -0600 @@ -27,6 +27,22 @@ pclmul_mul1 (unsigned long *c, unsigned _mm_storeu_si128((__m128i*)c, _mm_clmulepi64_si128(aa, bb, 0)); } @@ -652,8 +589,8 @@ void LeftShift(GF2X& c, const GF2X& a, long n) ---- src/InitSettings.cpp.orig 2021-06-20 15:05:49.000000000 -0600 -+++ src/InitSettings.cpp 2021-06-23 19:59:29.912142143 -0600 +--- src/InitSettings.cpp.orig 2021-06-23 14:39:46.000000000 -0600 ++++ src/InitSettings.cpp 2023-08-10 15:35:41.243822329 -0600 @@ -190,6 +190,11 @@ int main() cout << "NTL_RANGE_CHECK=0\n"; #endif @@ -666,8 +603,8 @@ // the following are not actual config flags, but help ---- src/mat_lzz_p.cpp.orig 2021-06-20 15:05:48.000000000 -0600 -+++ src/mat_lzz_p.cpp 2021-06-23 19:59:29.915142146 -0600 +--- src/mat_lzz_p.cpp.orig 2021-06-23 14:39:46.000000000 -0600 ++++ src/mat_lzz_p.cpp 2023-08-10 15:35:41.244822315 -0600 @@ -9,6 +9,15 @@ #ifdef NTL_HAVE_AVX @@ -1740,8 +1677,8 @@ V <= (MAX_DBL_INT-(p-1))/(p-1) && V*(p-1) <= (MAX_DBL_INT-(p-1))/(p-1)) { ---- src/QuickTest.cpp.orig 2021-06-20 15:05:49.000000000 -0600 -+++ src/QuickTest.cpp 2021-06-23 19:59:29.916142147 -0600 +--- src/QuickTest.cpp.orig 2021-06-23 14:39:46.000000000 -0600 ++++ src/QuickTest.cpp 2023-08-10 15:35:41.244822315 -0600 @@ -326,6 +326,9 @@ cerr << "Performance Options:\n"; cerr << "NTL_GF2X_NOINLINE\n"; #endif @@ -1752,8 +1689,8 @@ cerr << "\n\n"; ---- src/WizardAux.orig 2021-06-20 15:05:49.000000000 -0600 -+++ src/WizardAux 2021-06-23 19:59:29.916142147 -0600 +--- src/WizardAux.orig 2021-06-23 14:39:46.000000000 -0600 ++++ src/WizardAux 2023-08-10 15:35:41.244822315 -0600 @@ -89,6 +89,7 @@ system("$ARGV[0] InitSettings"); 'NTL_GF2X_NOINLINE' => 0, 'NTL_FFT_BIGTAB' => 0, @@ -1762,8 +1699,8 @@ 'WIZARD_HACK' => '#define NTL_WIZARD_HACK', ---- src/ZZ.cpp.orig 2021-06-20 15:05:48.000000000 -0600 -+++ src/ZZ.cpp 2021-06-23 19:59:29.918142149 -0600 +--- src/ZZ.cpp.orig 2021-06-23 14:39:46.000000000 -0600 ++++ src/ZZ.cpp 2023-08-10 15:35:41.245822302 -0600 @@ -14,6 +14,13 @@ #elif defined(NTL_HAVE_SSSE3) #include diff --git a/ntl.spec b/ntl.spec index e7e6aae..1a63069 100644 --- a/ntl.spec +++ b/ntl.spec @@ -3,7 +3,7 @@ Summary: High-performance algorithms for vectors, matrices, and polynomials Name: ntl Version: 11.5.1 -Release: 6%{?dist} +Release: 7%{?dist} # LGPL-2.1-or-later: the project as a whole # BSD-2-Clause: src/FFT.cpp @@ -128,6 +128,9 @@ done %changelog +* Thu Aug 10 2023 Jerry James - 11.5.1-7 +- Use a more reliable way of detecting CPU features + * Thu Jul 20 2023 Fedora Release Engineering - 11.5.1-6 - Rebuilt for https://fedoraproject.org/wiki/Fedora_39_Mass_Rebuild