diff --git a/.atlas.metadata b/.atlas.metadata index b2c9319..9eaf6d0 100644 --- a/.atlas.metadata +++ b/.atlas.metadata @@ -1,6 +1,9 @@ 0e11ec19a521973eaa551954debd112c21479e9c SOURCES/ARMa732.tar.bz2 9398518fe55b4a544278237bc639656e04543c50 SOURCES/ARMv732NEON.tar.bz2 d2f7a62aacdc5091aaa673a311a23f521e5c6486 SOURCES/IBMz1264.tar.bz2 +042c0b9df85a9a469e20cf0801f83b03ec40425d SOURCES/IBMz1364VXZ.tar.bz2 +352e057319fa7503cd74a0ab81055dc286cc1c45 SOURCES/IBMz1464VXZ2.tar.bz2 +0abb8f638b8ffdc13994d533d8a4febcab364f2f SOURCES/IBMz1564VXZ2.tar.bz2 b3ee9bca1510b11c6aa671ba5ba7dff8918ce0cf SOURCES/IBMz932.tar.bz2 43f8d8eaf8cc62bc4665df3550b77e95f3dced22 SOURCES/IBMz964.tar.bz2 c47ac6f00d7bf4ab882e71fa1ab894cc551c77b7 SOURCES/POWER332.tar.bz2 diff --git a/.gitignore b/.gitignore index 977b861..b749620 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,9 @@ SOURCES/ARMa732.tar.bz2 SOURCES/ARMv732NEON.tar.bz2 SOURCES/IBMz1264.tar.bz2 +SOURCES/IBMz1364VXZ.tar.bz2 +SOURCES/IBMz1464VXZ2.tar.bz2 +SOURCES/IBMz1564VXZ2.tar.bz2 SOURCES/IBMz932.tar.bz2 SOURCES/IBMz964.tar.bz2 SOURCES/POWER332.tar.bz2 diff --git a/SOURCES/0001-Avoid-c99-standard-compiler.patch b/SOURCES/0001-Avoid-c99-standard-compiler.patch new file mode 100644 index 0000000..c9c0c0e --- /dev/null +++ b/SOURCES/0001-Avoid-c99-standard-compiler.patch @@ -0,0 +1,30 @@ +From 036562b66fa607152c6c54f0d6d030cd19bfcb7f Mon Sep 17 00:00:00 2001 +From: Andreas Arnez +Date: Tue, 19 Feb 2019 19:03:52 +0100 +Subject: [PATCH 1/8] Avoid c99 standard compiler + +When probing for a usable GCC, the existing code already dropped path +names that contained "c89" or "c90", because these compilers don't have +the GCC extensions enabled. This patch also drops names with "c99" in +them. +--- + CONFIG/src/atlconf_misc.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/CONFIG/src/atlconf_misc.c b/CONFIG/src/atlconf_misc.c +index 63cb1ef..fb62214 100644 +--- a/CONFIG/src/atlconf_misc.c ++++ b/CONFIG/src/atlconf_misc.c +@@ -824,7 +824,8 @@ int CompIsGcc(char *comp) + int i; + + cmpname = NameWithoutPath(comp); +- if (strstr(cmpname, "c89") || strstr(cmpname, "c90")) ++ if (strstr(cmpname, "c89") || strstr(cmpname, "c90") || ++ strstr(cmpname, "c99")) + { + free(cmpname); + return(0); +-- +2.23.0 + diff --git a/SOURCES/0002-Fix-rpath-link-command-line-options.patch b/SOURCES/0002-Fix-rpath-link-command-line-options.patch new file mode 100644 index 0000000..25c5e4c --- /dev/null +++ b/SOURCES/0002-Fix-rpath-link-command-line-options.patch @@ -0,0 +1,38 @@ +From a8611f5dc19e2c31b810fd2baa31b9cb5fd30d2a Mon Sep 17 00:00:00 2001 +From: Andreas Arnez +Date: Tue, 19 Feb 2019 19:20:19 +0100 +Subject: [PATCH 2/8] Fix -rpath-link command line options + +The "-rpath-link" command line options were written in the wrong syntax, +causing errors in the build. This is fixed. +--- + makes/Make.lib | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/makes/Make.lib b/makes/Make.lib +index 4ceff02..b322a32 100644 +--- a/makes/Make.lib ++++ b/makes/Make.lib +@@ -47,11 +47,11 @@ cshared : fat_cshared + # + LDTRY_WIN: + $(LD) $(LDFLAGS) -shared -soname $(LIBINSTdir)/$(outso) -o $(outso) \ +- -rpath-link $(LIBINSTdir) --output-def=$(outdef) \ ++ -rpath-link=$(LIBINSTdir) --output-def=$(outdef) \ + --whole-archive $(libas) --no-whole-archive $(LIBS) + GCCTRY_WIN: + $(GOODGCC) -shared -o $(outso) -Wl,--output-def=$(outdef) \ +- -Wl,"-rpath-link $(LIBINSTdir)" \ ++ -Wl,"-rpath-link=$(LIBINSTdir)" \ + -Wl,--whole-archive $(libas) -Wl,--no-whole-archive $(LIBS) + GCCTRY_norp_WIN: + $(GOODGCC) -shared -o $(outso) -Wl,--output-def=$(outdef) \ +@@ -113,7 +113,7 @@ TRYALL_WIN : + # + LDTRY: + $(LD) $(LDFLAGS) -shared -soname $(LIBINSTdir)/$(outso) -o $(outso) \ +- -rpath-link $(LIBINSTdir) \ ++ -rpath-link=$(LIBINSTdir) \ + --whole-archive $(libas) --no-whole-archive $(LIBS) + GCCTRY: + $(GOODGCC) -shared -o $(outso).$(so_ver) \ diff --git a/SOURCES/0003-Fix-SIMD-support-on-IBM-z13.patch b/SOURCES/0003-Fix-SIMD-support-on-IBM-z13.patch new file mode 100644 index 0000000..0330ef3 --- /dev/null +++ b/SOURCES/0003-Fix-SIMD-support-on-IBM-z13.patch @@ -0,0 +1,55 @@ +From 999efd5370b33e8b02d9370eda3d454e08fc9d15 Mon Sep 17 00:00:00 2001 +From: Andreas Arnez +Date: Wed, 5 Dec 2018 18:59:15 +0100 +Subject: [PATCH 3/8] Fix SIMD support on IBM z13 + +The header file atlas_simd.h contained a syntax error and a few functional +errors that affected IBM z13. It prevented any SIMD kernels from being +compiled successfully for that platform. This is fixed. The macro +vec_madd is avoided, because some GCC versions don't implement it +correctly; the equivalent GCC builtin __builtin_s390_vec_madd is used +instead. +--- + include/atlas_simd.h | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/include/atlas_simd.h b/include/atlas_simd.h +index baee6b1..68daf79 100644 +--- a/include/atlas_simd.h ++++ b/include/atlas_simd.h +@@ -69,7 +69,7 @@ + #define ATL_FRCGNUVEC + #endif + #elif defined(ATL_VXZ) +- #if ATL_VLEN != 2; ++ #if ATL_VLEN != 2 + #define ATL_FRCGNUVEC + #endif + #elif defined(ATL_NEON) +@@ -390,19 +390,19 @@ + #define ATL_vld(v_, p_) v_ = vec_ld2f(p_); + #define ATL_vst(p_, v_) vec_st2f(v_, p_); + #endif +- #define ATL_vzero(v_) v_ = vec_splats((TYPE)0.0) ++ #define ATL_vzero(v_) v_ = vec_splats((double)0.0) + #define ATL_vcopy(d_, s_) d_ = s_ +- #define ATL_vbcast(v_, p_) v_ = vec_splats(*((TYPE*)(p_))) ++ #define ATL_vbcast(v_, p_) v_ = vec_splats((double)*((TYPE*)(p_))) + #define ATL_vuld(v_, p_) ATL_vld(v_, p_) + #define ATL_vust(p_, v_) ATL_vst(p_, v_) + #define ATL_vadd(d_, s1_, s2_) d_ = s1_ + s2_ + #define ATL_vsub(d_, s1_, s2_) d_ = s1_ - s2_ + #define ATL_vmul(d_, s1_, s2_) d_ = s1_ * s2_ +- #define ATL_vmac(d_, s1_, s2_) d_ = vec_madd(s1_, s2_, d_) ++ #define ATL_vmac(d_, s1_, s2_) d_ = __builtin_s390_vec_madd(s1_, s2_, d_) + #define ATL_vvrsum1(s0_) \ + { ATL_VTYPE t_;\ + t_ = vec_splat(s0_, 1); \ +- s0 += t_; \ ++ s0_ += t_; \ + } + #define ATL_vsplat0(d_, s_) d_ = vec_splat(s_, 0) + #define ATL_vsplat1(d_, s_) d_ = vec_splat(s_, 1) +-- +2.23.0 + diff --git a/SOURCES/0004-Read-L1-data-cache-size-from-sysconf-if-possible.patch b/SOURCES/0004-Read-L1-data-cache-size-from-sysconf-if-possible.patch new file mode 100644 index 0000000..1d6ffb3 --- /dev/null +++ b/SOURCES/0004-Read-L1-data-cache-size-from-sysconf-if-possible.patch @@ -0,0 +1,46 @@ +From a45cebf11522b3112fba3d682224a232ae5e2e98 Mon Sep 17 00:00:00 2001 +From: Andreas Arnez +Date: Wed, 12 Dec 2018 19:44:32 +0100 +Subject: [PATCH 4/8] Read L1 data cache size from sysconf if possible + +The probing of the L1 data cache size is sometimes not reliable. This can +cause the tuning to yield varying, sub-obtimal results. But on Linux the +L1 data cache size can usually be retrieved with sysconf instead, which is +faster and more reliable. Do this whenever possible. +--- + tune/sysinfo/L1CacheSize.c | 12 +++++++++++- + 1 file changed, 11 insertions(+), 1 deletion(-) + +diff --git a/tune/sysinfo/L1CacheSize.c b/tune/sysinfo/L1CacheSize.c +index e62a273..dffa76e 100644 +--- a/tune/sysinfo/L1CacheSize.c ++++ b/tune/sysinfo/L1CacheSize.c +@@ -30,6 +30,7 @@ + + #include + #include ++#include + + #define REPS 4096 + +@@ -276,7 +277,16 @@ int main(int nargs, char *args[]) + exit(-1); + } + if (nargs > 1) MaxSize = atoi(args[1]); +- L1Size = GetL1Size(MaxSize, 1.08); ++ ++#ifdef _SC_LEVEL1_DCACHE_SIZE ++ { ++ long res = sysconf(_SC_LEVEL1_DCACHE_SIZE); ++ L1Size = res > 0 ? (int) (res / 1024) : 0; ++ } ++#endif ++ ++ if (!L1Size) ++ L1Size = GetL1Size(MaxSize, 1.08); + if (!L1Size) + L1Size = GetL1Size(MaxSize, 1.08); + if (!L1Size) +-- +2.23.0 + diff --git a/SOURCES/0005-Optimizations-for-IBM-z13.patch b/SOURCES/0005-Optimizations-for-IBM-z13.patch new file mode 100644 index 0000000..e7fef5a --- /dev/null +++ b/SOURCES/0005-Optimizations-for-IBM-z13.patch @@ -0,0 +1,68 @@ +From ad278554860b0da7d5848262a7bf35e058266cb1 Mon Sep 17 00:00:00 2001 +From: Andreas Arnez +Date: Wed, 12 Dec 2018 20:06:27 +0100 +Subject: [PATCH 5/8] Optimizations for IBM z13 + +Perform some optimizations for IBM z13: +- Compile with -O2 instead of -O. +- Streamline vector loads/stores. +- Define the vvrsum2 macro. + +Also, use the compile option -march=z13 instead of -march=native. +--- + CONFIG/src/atlcomp.txt | 8 +++----- + include/atlas_simd.h | 11 +++++------ + 2 files changed, 8 insertions(+), 11 deletions(-) + +diff --git a/CONFIG/src/atlcomp.txt b/CONFIG/src/atlcomp.txt +index aa31604..2ac71cf 100644 +--- a/CONFIG/src/atlcomp.txt ++++ b/CONFIG/src/atlcomp.txt +@@ -246,12 +246,10 @@ MACH=IBMz9,IBMz10,IBMz196 OS=ALL LVL=500 COMPS=f77 + 'gfortran' '-O3 -funroll-loops' + MACH=IBMz9,IBMz10,IBMz196,IBMz12 OS=ALL LVL=500 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc + 'gcc' '-O3 -funroll-loops' +-MACH=IBMz13 OS=ALL LVL=1000 COMPS=dmc,skc,dkc,icc,xcc,gcc +- 'gcc' '-march=native -O -mvx -mzvector' +-MACH=IBMz13 OS=ALL LVL=1000 COMPS=smc +- 'gcc' '-march=native -O -mvx -mzvector -fno-peephole -fno-peephole2' ++MACH=IBMz13 OS=ALL LVL=1000 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc ++ 'gcc' '-march=z13 -mtune=z13 -O2' + MACH=IBMz13 OS=ALL LVL=1000 COMPS=f77 +- 'gfortran' '-march=native -O -mvx -mzvector' ++ 'gfortran' '-march=z13 -mtune=z13 -O2' + # + # Windows defaults ; need to make SSE/SSE2 arch dep. + # +diff --git a/include/atlas_simd.h b/include/atlas_simd.h +index 68daf79..f171933 100644 +--- a/include/atlas_simd.h ++++ b/include/atlas_simd.h +@@ -384,8 +384,8 @@ + #endif + #define ATL_VTYPE vector double + #if (defined(DREAL) || defined(DCPLX)) +- #define ATL_vld(v_, p_) {v_[0] = *(p_); v_[1] = (p_)[1]; } +- #define ATL_vst(p_, v_) {*(p_) = v_[0]; (p_)[1] = v_[1];} ++ #define ATL_vld(v_, p_) v_ = *(ATL_VTYPE *)(p_) ++ #define ATL_vst(p_, v_) *(ATL_VTYPE *)(p_) = v_ + #else + #define ATL_vld(v_, p_) v_ = vec_ld2f(p_); + #define ATL_vst(p_, v_) vec_st2f(v_, p_); +@@ -400,10 +400,9 @@ + #define ATL_vmul(d_, s1_, s2_) d_ = s1_ * s2_ + #define ATL_vmac(d_, s1_, s2_) d_ = __builtin_s390_vec_madd(s1_, s2_, d_) + #define ATL_vvrsum1(s0_) \ +- { ATL_VTYPE t_;\ +- t_ = vec_splat(s0_, 1); \ +- s0_ += t_; \ +- } ++ { s0_ = vec_mergeh(s0_, s0_) + vec_mergel(s0_, s0_); } ++ #define ATL_vvrsum2(s0_, s1_) \ ++ { s0_ = vec_mergeh(s0_, s1_) + vec_mergel(s0_, s1_); } + #define ATL_vsplat0(d_, s_) d_ = vec_splat(s_, 0) + #define ATL_vsplat1(d_, s_) d_ = vec_splat(s_, 1) + #elif defined(ATL_NEON) && (defined(SREAL) || defined(SCPLX)) +-- +2.23.0 + diff --git a/SOURCES/0006-Add-IBM-z14-support.patch b/SOURCES/0006-Add-IBM-z14-support.patch new file mode 100644 index 0000000..b00761b --- /dev/null +++ b/SOURCES/0006-Add-IBM-z14-support.patch @@ -0,0 +1,276 @@ +From dce732e9fe47b44d1a985d10a0eb97aac6afa28e Mon Sep 17 00:00:00 2001 +From: Andreas Arnez +Date: Wed, 25 Mar 2020 20:11:19 +0100 +Subject: [PATCH 6/8] Add IBM z14 support + +Add general support for IBM z14. Also detect and handle the vector +enhancements facility 1, which specifically adds single-precision FP +arithmetic for vectors. +--- + CONFIG/include/atlconf.h | 14 ++++---- + CONFIG/src/Makefile | 6 ++++ + CONFIG/src/atlcomp.txt | 4 +++ + CONFIG/src/backend/Make.ext | 4 ++- + CONFIG/src/backend/archinfo_linux.c | 3 +- + CONFIG/src/backend/probe_vxz2.c | 12 +++++++ + CONFIG/src/probe_comp.c | 3 +- + include/atlas_prefetch.h | 3 +- + include/atlas_simd.h | 53 +++++++++++++++++++++++++++++ + 9 files changed, 91 insertions(+), 11 deletions(-) + create mode 100644 CONFIG/src/backend/probe_vxz2.c + +diff --git a/CONFIG/include/atlconf.h b/CONFIG/include/atlconf.h +index e51d56d..3828fdb 100644 +--- a/CONFIG/include/atlconf.h ++++ b/CONFIG/include/atlconf.h +@@ -25,11 +25,11 @@ enum ARCHFAM {AFOther=0, AFPPC, AFSPARC, AFALPHA, AFX86, AFIA64, AFMIPS, + * Corei3EP: v3 Haswell, E5-26XX + * Corei4: skylake + */ +-#define NMACH 62 ++#define NMACH 63 + static char *machnam[NMACH] = + {"UNKNOWN", "PPCG4", "PPCG5", "POWER3", "POWER4", "POWER5", + "POWER6", "POWER7", "POWER8", "POWERe6500", +- "IBMz9", "IBMz10", "IBMz196", "IBMz12", "IBMz13", ++ "IBMz9", "IBMz10", "IBMz196", "IBMz12", "IBMz13", "IBMz14", + "x86x87", "x86SSE1", "x86SSE2", "x86SSE3", + "P5", "P5MMX", "PPRO", "PII", "PIII", "PM", "CoreSolo", + "CoreDuo", "Core2Solo", "Core2", "Corei1", "Corei2", "Corei3", +@@ -42,7 +42,7 @@ static char *machnam[NMACH] = + "ARM64xgene1", "ARM64a53", "ARM64a57"}; + enum MACHTYPE {MACHOther, PPCG4, PPCG5, IbmPwr3, IbmPwr4, IbmPwr5, + IbmPwr6, IbmPwr7, IbmPwr8, Pwre6500, +- IbmZ9, IbmZ10, IbmZ196, IbmZ12, IbmZ13, /* s390(x) in Linux */ ++ IbmZ9, IbmZ10, IbmZ196, IbmZ12, IbmZ13, IbmZ14, /* s390(x) */ + x86x87, x86SSE1, x86SSE2, x86SSE3, /* generic targets */ + IntP5, IntP5MMX, IntPPRO, IntPII, IntPIII, IntPM, IntCoreS, + IntCoreDuo, IntCore2Solo, IntCore2, IntCorei1, IntCorei2, +@@ -82,7 +82,7 @@ enum MACHTYPE {MACHOther, PPCG4, PPCG5, IbmPwr3, IbmPwr4, IbmPwr5, + #define MachIsARM64(mach_) \ + ( (mach_) >= ARM64xg && || (mach_) <= ARM64a57) + #define MachIsS390(mach_) \ +- ( (mach_) >= IbmZ9 && (mach_) <= IbmZ13 ) ++ ( (mach_) >= IbmZ9 && (mach_) <= IbmZ14 ) + + + static char *f2c_namestr[5] = {"UNKNOWN","Add_", "Add__", "NoChange", "UpCase"}; +@@ -96,13 +96,13 @@ enum F2CNAME {f2c_NamErr=0, f2c_Add_, f2c_Add__, f2c_NoChange, f2c_UpCase}; + enum F2CINT {f2c_IntErr=0, FintCint, FintClong, FintClonglong, FintCshort}; + enum F2CSTRING {f2c_StrErr=0, fstrSun, fstrCray, fstrStructVal, fstrStructPtr}; + +-#define NISA 15 ++#define NISA 16 + static char *ISAXNAM[NISA] = +- {"", "VSX", "VXZ", "AltiVec", ++ {"", "VSX", "VXZ2", "VXZ", "AltiVec", + "AVXMAC", "AVXFMA4", "AVX", "SSE3", "SSE2", "SSE1", "3DNow", + "FPV3D2MACNEON", "FPV3D16MACNEON", "FPV3D32MAC", "FPV3D16MAC"}; + enum ISAEXT +- {ISA_None=0, ISA_VSX, ISA_VXZ, ISA_AV, ++ {ISA_None=0, ISA_VSX, ISA_VXZ2, ISA_VXZ, ISA_AV, + ISA_AVXMAC, ISA_AVXFMA4, ISA_AVX, ISA_SSE3, ISA_SSE2, ISA_SSE1, ISA_3DNow, + ISA_NEON, ISA_NEON16, ISA_VFP3D32MAC, ISA_VFP3D16MAC}; + +diff --git a/CONFIG/src/Makefile b/CONFIG/src/Makefile +index 212b9d7..782a4cf 100644 +--- a/CONFIG/src/Makefile ++++ b/CONFIG/src/Makefile +@@ -158,6 +158,12 @@ IRun_NEON : + $(MAKE) $(atlrun) atldir=$(mydir) exe=xprobe_neon args="$(args)" \ + redir=config0.out + - cat config0.out ++IRun_VXZ2 : ++ $(CC) $(CCFLAGS) -march=native -mvx -mzvector -o xprobe_vxz2 \ ++ $(SRCdir)/backend/probe_svec.c $(SRCdir)/backend/probe_vxz2.c ++ $(MAKE) $(atlrun) atldir=$(mydir) exe=xprobe_vxz2 args="$(args)" \ ++ redir=config0.out ++ - cat config0.out + IRun_VXZ : + $(CC) $(CCFLAGS) -march=native -mvx -mzvector -o xprobe_vxz \ + $(SRCdir)/backend/probe_dvec.c $(SRCdir)/backend/probe_vxz.c +diff --git a/CONFIG/src/atlcomp.txt b/CONFIG/src/atlcomp.txt +index 2ac71cf..2cfacc2 100644 +--- a/CONFIG/src/atlcomp.txt ++++ b/CONFIG/src/atlcomp.txt +@@ -250,6 +250,10 @@ MACH=IBMz13 OS=ALL LVL=1000 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc + 'gcc' '-march=z13 -mtune=z13 -O2' + MACH=IBMz13 OS=ALL LVL=1000 COMPS=f77 + 'gfortran' '-march=z13 -mtune=z13 -O2' ++MACH=IBMz14 OS=ALL LVL=1000 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc ++ 'gcc' '-march=z14 -mtune=z14 -O2' ++MACH=IBMz14 OS=ALL LVL=1000 COMPS=f77 ++ 'gfortran' '-march=z14 -mtune=z14 -O2' + # + # Windows defaults ; need to make SSE/SSE2 arch dep. + # +diff --git a/CONFIG/src/backend/Make.ext b/CONFIG/src/backend/Make.ext +index 4743353..794babf 100644 +--- a/CONFIG/src/backend/Make.ext ++++ b/CONFIG/src/backend/Make.ext +@@ -39,7 +39,7 @@ files = archinfo_aix.c archinfo_freebsd.c archinfo_irix.c archinfo_linux.c \ + probe_gas_mips.S probe_gas_parisc.S probe_gas_ppc.S probe_gas_s390.S \ + probe_gas_sparc.S probe_gas_wow64.S probe_gas_x8632.S \ + probe_gas_x8664.S probe_smac.c probe_svec.c probe_this_asm.c \ +- probe_vxz.c ++ probe_vxz2.c probe_vxz.c + + all : $(files) + +@@ -107,6 +107,8 @@ flibchkF.f : $(basf) + $(extF) -b $(basf) -o flibchkF.f rout=flibchkF.f + probe_arm32_FPABI.c : $(basf) + $(extC) -b $(basf) -o probe_arm32_FPABI.c rout=probe_arm32_FPABI ++probe_vxz2.c : $(basf) ++ $(extC) -b $(basf) -o probe_vxz2.c rout=probe_vxz2 + probe_vxz.c : $(basf) + $(extC) -b $(basf) -o probe_vxz.c rout=probe_vxz + probe_aff_SETAFFNP.c : $(basf) +diff --git a/CONFIG/src/backend/archinfo_linux.c b/CONFIG/src/backend/archinfo_linux.c +index cdcee92..ed6f476 100644 +--- a/CONFIG/src/backend/archinfo_linux.c ++++ b/CONFIG/src/backend/archinfo_linux.c +@@ -336,7 +336,8 @@ enum MACHTYPE ProbeArch() + else if (strstr(res, "2817") || strstr(res, "2818")) mach = IbmZ196; + else if (strstr(res, "2827") || strstr(res, "2828")) mach = IbmZ12; + else if (strstr(res, "2964") || strstr(res, "2965")) mach = IbmZ13; +- else mach = IbmZ13; /* looks risky to me, but IBM folks did it */ ++ else if (strstr(res, "3906") || strstr(res, "3907")) mach = IbmZ14; ++ else mach = IbmZ14; /* looks risky to me, but IBM folks did it */ + free(res); + } + break; +diff --git a/CONFIG/src/backend/probe_vxz2.c b/CONFIG/src/backend/probe_vxz2.c +new file mode 100644 +index 0000000..a69d92d +--- /dev/null ++++ b/CONFIG/src/backend/probe_vxz2.c +@@ -0,0 +1,12 @@ ++#include ++void do_vsum(float *z, float *x, float *y) // RETURNS: z = x + y ++{ ++ vector float vx, vy; ++ vx = (vector float) {x[0], x[1], x[2], x[3]}; ++ vy = (vector float) {y[0], y[1], y[2], y[3]}; ++ vy += vx; ++ z[0] = vy[0]; ++ z[1] = vy[1]; ++ z[2] = vy[2]; ++ z[3] = vy[3]; ++} +diff --git a/CONFIG/src/probe_comp.c b/CONFIG/src/probe_comp.c +index 1652e24..857ea82 100644 +--- a/CONFIG/src/probe_comp.c ++++ b/CONFIG/src/probe_comp.c +@@ -452,7 +452,7 @@ COMPNODE **GetDefaultComps(enum OSTYPE OS, enum MACHTYPE arch, int verb, + vp = "-mavx2 -mfma"; + else if (vecexts & (1< ++ ++ #define ATL_VPERMI(s_, t_, i_) \ ++ ((ATL_VTYPE) vec_permi((vector double) s_, (vector double) t_, i_)) ++ ++ #if defined(SREAL) || defined(SCPLX) ++ #define ATL_VTYPE vector float ++ #if ATL_VLEN != 4 ++ #error "VSXZ2 supports only VLEN = 4 for floats!" ++ #endif ++ #define ATL_vvrsum4(s0_, s1_, s2_, s3_) \ ++ { ATL_VTYPE t0_, t1_; \ ++ t0_ = vec_mergeh(s0_, s1_) + vec_mergel(s0_, s1_); \ ++ t1_ = vec_mergeh(s2_, s3_) + vec_mergel(s2_, s3_); \ ++ s0_ = ATL_VPERMI(t0_, t1_, 0) + ATL_VPERMI(t0_, t1_, 3); \ ++ } ++ #define ATL_vsplat2(d_, s_) d_ = vec_splat(s_, 2) ++ #define ATL_vsplat3(d_, s_) d_ = vec_splat(s_, 3) ++ #else /* double precision */ ++ #define ATL_VTYPE vector double ++ #if ATL_VLEN != 2 ++ #error "VSXZ2 supports only VLEN = 2 for doubles!" ++ #endif ++ #define ATL_vvrsum1(s0_) \ ++ { s0_ = vec_mergeh(s0_, s0_) + vec_mergel(s0_, s0_); } ++ #define ATL_vvrsum2(s0_, s1_) \ ++ { s0_ = vec_mergeh(s0_, s1_) + vec_mergel(s0_, s1_); } ++ #endif ++ #define ATL_vld(v_, p_) v_ = *(ATL_VTYPE *)(p_) ++ #define ATL_vst(p_, v_) *(ATL_VTYPE *)(p_) = v_ ++ #define ATL_vzero(v_) v_ = vec_splats((TYPE)0.0) ++ #define ATL_vcopy(d_, s_) d_ = s_ ++ #define ATL_vbcast(v_, p_) v_ = vec_splats(*((TYPE*)(p_))) ++ #define ATL_vuld(v_, p_) v_ = vec_xl(0, (TYPE *)(p_)) ++ #define ATL_vust(p_, v_) vec_xst(v_, 0, (TYPE *)(p_)) ++ #define ATL_vadd(d_, s1_, s2_) d_ = s1_ + s2_ ++ #define ATL_vsub(d_, s1_, s2_) d_ = s1_ - s2_ ++ #define ATL_vmul(d_, s1_, s2_) d_ = s1_ * s2_ ++ #define ATL_vmac(d_, s1_, s2_) d_ = __builtin_s390_vec_madd(s1_, s2_, d_) ++ #define ATL_vsplat0(d_, s_) d_ = vec_splat(s_, 0) ++ #define ATL_vsplat1(d_, s_) d_ = vec_splat(s_, 1) + #elif defined(ATL_VXZ) + #include + +-- +2.23.0 + diff --git a/SOURCES/0007-Enable-cross-compile.patch b/SOURCES/0007-Enable-cross-compile.patch new file mode 100644 index 0000000..9ab7d29 --- /dev/null +++ b/SOURCES/0007-Enable-cross-compile.patch @@ -0,0 +1,265 @@ +From 14e717c4367c04570863220c3faf5ce41dabbf05 Mon Sep 17 00:00:00 2001 +From: Andreas Arnez +Date: Wed, 29 May 2019 17:51:34 +0200 +Subject: [PATCH 7/8] Enable "cross-compile" + +This adds support for building ATLAS without running any target code. In +order for this to work, the archdefs must contain some additional files +that would otherwise be built during various tuning steps; see the new +targets extra_get and extra_put in "CONFIG/ARCHS/Makefile". + +Even if the archdefs contain these additional files, cross compilation +is *not* automatically enabled. To activate it and disable tuning at +build time, add the option "-Si archdef 2" when running "configure". +--- + CONFIG/ARCHS/Makefile | 24 ++++++++++++++++++++++++ + bin/atlas_install.c | 2 ++ + makes/Make.aux | 10 +++++----- + makes/Make.bin | 22 ++++++++++++++++++++++ + makes/Make.l3tune | 6 ++++++ + makes/Make.sysinfo | 8 +++++++- + 6 files changed, 66 insertions(+), 6 deletions(-) + +diff --git a/CONFIG/ARCHS/Makefile b/CONFIG/ARCHS/Makefile +index 321e05c..e61b5a0 100644 +--- a/CONFIG/ARCHS/Makefile ++++ b/CONFIG/ARCHS/Makefile +@@ -211,3 +211,27 @@ ArchNew : $(mach) xnegflt + - cp $(BLDdir)/bin/INSTALL_LOG/?PerfSumm.txt $(adefd)/. + rm -f xnegflt + archput : sys_put kern_put gemm_put la_put ++ ++ifdef ATL_NOTUNE ++ ++# To avoid tuning, some extra files are needed. ++ ++extra_get : ++ - cp $(INCAdir)/atlas_type.h $(adefd)/kern/ ++ - cp $(INCAdir)/atlas_[sdcz]sysinfo.h $(adefd)/kern/ ++ - cp $(INCAdir)/atlas_[sd]lamch.h $(adefd)/kern/ ++ - cp $(INCAdir)/atlas_[sdcz]trsmXover.h $(adefd)/kern/ ++ - cp $(INCAdir)/atlas_[sdcz]syr*NX.h $(adefd)/kern/ ++ ++extra_put : ++ - cp $(adefd)/kern/atlas_type.h $(INCAdir)/. ++ - cp $(adefd)/kern/atlas_[sdcz]sysinfo.h $(INCAdir)/. ++ - cp $(adefd)/kern/atlas_[sd]lamch.h $(INCAdir)/. ++ - cp $(adefd)/kern/atlas_[sdcz]trsmXover.h $(INCAdir)/. ++ - cp $(adefd)/kern/atlas_[sdcz]syr*NX.h $(INCAdir)/. ++ ++ArchNew : extra_get ++ ++archput : extra_put ++ ++endif +diff --git a/bin/atlas_install.c b/bin/atlas_install.c +index de3eb3a..3c811e6 100644 +--- a/bin/atlas_install.c ++++ b/bin/atlas_install.c +@@ -697,6 +697,8 @@ void GoToTown(int ARCHDEF, int L1DEF, int TuneLA) + ATL_Cassert(system("make IBozoL1.grd\n")==0, + "USING BOZO L1 DEFAULTS", NULL); + } ++ if (ARCHDEF >= 2) ++ setenv("ATL_NOTUNE", "1", 1); + if (ARCHDEF) + DefInstall = !system("make IArchDef.grd\n"); + +diff --git a/makes/Make.aux b/makes/Make.aux +index 1f769c8..c793028 100644 +--- a/makes/Make.aux ++++ b/makes/Make.aux +@@ -113,23 +113,23 @@ clean : + + $(ATLFWAIT) : + cd $(BINdir) ; $(MAKE) xatlas_waitfile +-$(INCAdir)/atlas_type.h : $(ATLFWAIT) ++$(INCAdir)/atlas_type.h : | $(ATLFWAIT) + cd $(SYSdir) ; $(MAKE) $(INCAdir)/atlas_type.h + $(ATLFWAIT) -f $(INCAdir)/atlas_type.h + sINCdep = $(INCAdir)/atlas_ssysinfo.h $(INCAdir)/atlas_type.h +-$(INCAdir)/atlas_ssysinfo.h : $(ATLFWAIT) ++$(INCAdir)/atlas_ssysinfo.h : | $(ATLFWAIT) + cd $(SYSdir) ; $(MAKE) $(INCAdir)/atlas_ssysinfo.h + $(ATLFWAIT) -f $(INCAdir)/atlas_ssysinfo.h + dINCdep = $(INCAdir)/atlas_dsysinfo.h $(INCAdir)/atlas_type.h +-$(INCAdir)/atlas_dsysinfo.h : $(ATLFWAIT) ++$(INCAdir)/atlas_dsysinfo.h : | $(ATLFWAIT) + cd $(SYSdir) ; $(MAKE) $(INCAdir)/atlas_dsysinfo.h + $(ATLFWAIT) -f $(INCAdir)/atlas_dsysinfo.h + cINCdep = $(INCAdir)/atlas_csysinfo.h $(INCAdir)/atlas_type.h +-$(INCAdir)/atlas_csysinfo.h : $(ATLFWAIT) ++$(INCAdir)/atlas_csysinfo.h : | $(ATLFWAIT) + cd $(SYSdir) ; $(MAKE) $(INCAdir)/atlas_csysinfo.h + $(ATLFWAIT) -f $(INCAdir)/atlas_csysinfo.h + zINCdep = $(INCAdir)/atlas_zsysinfo.h $(INCAdir)/atlas_type.h +-$(INCAdir)/atlas_zsysinfo.h : $(ATLFWAIT) ++$(INCAdir)/atlas_zsysinfo.h : | $(ATLFWAIT) + cd $(SYSdir) ; $(MAKE) $(INCAdir)/atlas_zsysinfo.h + $(ATLFWAIT) -f $(INCAdir)/atlas_zsysinfo.h + +diff --git a/makes/Make.bin b/makes/Make.bin +index 1035cb9..acad578 100644 +--- a/makes/Make.bin ++++ b/makes/Make.bin +@@ -163,7 +163,9 @@ IRunMADef : + cd $(SYSdir) ; $(MAKE) RunMADef pre=$(pre) + + IRunMMDef : ++ifndef ATL_NOTUNE + cd $(MMTdir) ; $(MAKE) RunMMDef pre=$(pre) ++endif + cd $(MMTdir) ; ./xemit_mm -p $(pre) -R -2 + cd $(MMTdir) ; $(MAKE) install pre=$(pre) + IKillL1 : force_build +@@ -303,22 +305,42 @@ INSTALL_LOG/$(pre)bestTT_$(nb)x$(nb)x$(nb) : \ + cp $(MMTdir)/res/$(pre)bestTT_$(nb)x$(nb)x$(nb) INSTALL_LOG/. + + $(R1Tdir)/res/$(pre)R2K.sum : $(R1Tdir)/res/$(pre)R1K.sum force_build ++ifdef ATL_NOTUNE ++ cd $(R1Tdir) ; $(MAKE) $(pre)r2install ++else + cd $(R1Tdir) ; $(MAKE) res/$(pre)R2K.sum pre=$(pre) ++endif + $(R1Tdir)/res/$(pre)R1K.sum : force_build ++ifdef ATL_NOTUNE ++ cd $(R1Tdir) ; $(MAKE) $(pre)r1install ++else + cd $(R1Tdir) ; $(MAKE) res/$(pre)R1K.sum pre=$(pre) ++endif + INSTALL_LOG/$(pre)R1K.sum : $(R1Tdir)/res/$(pre)R1K.sum + cp $(R1Tdir)/res/$(pre)R1K.sum INSTALL_LOG/. + INSTALL_LOG/$(pre)R2K.sum : INSTALL_LOG/$(pre)R1K.sum \ + $(R1Tdir)/res/$(pre)R2K.sum + cp $(R1Tdir)/res/$(pre)R2K.sum INSTALL_LOG/. ++ifndef ATL_NOTUNE + cd $(R1Tdir) ; $(MAKE) $(pre)nxtune ++else ++ cd $(BLDdir)/src/blas/reference/level2 ; make $(pre)lib ++endif + + $(MVTdir)/res/$(pre)MVNK.sum : force_build ++ifdef ATL_NOTUNE ++ cd $(MVTdir) ; $(MAKE) $(pre)mvninstall ++else + cd $(MVTdir) ; $(MAKE) res/$(pre)MVNK.sum pre=$(pre) ++endif + INSTALL_LOG/$(pre)MVNK.sum : $(MVTdir)/res/$(pre)MVNK.sum + cp $(MVTdir)/res/$(pre)MVNK.sum INSTALL_LOG/. + $(MVTdir)/res/$(pre)MVTK.sum : force_build ++ifdef ATL_NOTUNE ++ cd $(MVTdir) ; $(MAKE) $(pre)mvtinstall ++else + cd $(MVTdir) ; $(MAKE) res/$(pre)MVTK.sum pre=$(pre) ++endif + INSTALL_LOG/$(pre)MVTK.sum : $(MVTdir)/res/$(pre)MVTK.sum + cp $(MVTdir)/res/$(pre)MVTK.sum INSTALL_LOG/. + +diff --git a/makes/Make.l3tune b/makes/Make.l3tune +index eaf7d7d..cd7f5f1 100644 +--- a/makes/Make.l3tune ++++ b/makes/Make.l3tune +@@ -118,6 +118,7 @@ res/atlas_strsmXover.h : + cp $(strsmXover) res/. + + stsmfc : ++ifndef ATL_NOTUNE + rm -f $(strsmXover) + cd $(L3Bdir) ; $(MAKE) slib + $(MAKE) xstsmfc2 pre=s typ=SREAL side=$(side) uplo=Upper_ \ +@@ -128,6 +129,7 @@ stsmfc : + tran=NoTranspose_ diag=$(diag) + $(MAKE) xstsmfc2 pre=s typ=SREAL side=$(side) uplo=Lower_ \ + tran=Transpose_ diag=$(diag) ++endif + cd $(L3Bdir) ; $(MAKE) slib + dtrsmXover = $(INCAdir)/atlas_dtrsmXover.h + +@@ -138,6 +140,7 @@ res/atlas_dtrsmXover.h : + cp $(dtrsmXover) res/. + + dtsmfc : ++ifndef ATL_NOTUNE + rm -f $(dtrsmXover) + cd $(L3Bdir) ; $(MAKE) dlib + $(MAKE) xdtsmfc2 pre=d typ=DREAL side=$(side) uplo=Upper_ \ +@@ -148,6 +151,7 @@ dtsmfc : + tran=NoTranspose_ diag=$(diag) + $(MAKE) xdtsmfc2 pre=d typ=DREAL side=$(side) uplo=Lower_ \ + tran=Transpose_ diag=$(diag) ++endif + cd $(L3Bdir) ; $(MAKE) dlib + qtrsmXover = $(INCAdir)/atlas_qtrsmXover.h + +@@ -158,6 +162,7 @@ res/atlas_qtrsmXover.h : + cp $(qtrsmXover) res/. + + qtsmfc : ++ifndef ATL_NOTUNE + rm -f $(qtrsmXover) + cd $(L3Bdir) ; $(MAKE) qlib + $(MAKE) xqtsmfc2 pre=q typ=QREAL side=$(side) uplo=Upper_ \ +@@ -168,6 +173,7 @@ qtsmfc : + tran=NoTranspose_ diag=$(diag) + $(MAKE) xqtsmfc2 pre=q typ=QREAL side=$(side) uplo=Lower_ \ + tran=Transpose_ diag=$(diag) ++endif + cd $(L3Bdir) ; $(MAKE) qlib + + $(pre)tsmfc.o : force_build +diff --git a/makes/Make.sysinfo b/makes/Make.sysinfo +index 2b7dfdc..8e5dab2 100644 +--- a/makes/Make.sysinfo ++++ b/makes/Make.sysinfo +@@ -5,6 +5,7 @@ maxlat=6 + mflop=200 + flags= + ++ifndef ATL_NOTUNE + sTestFlags : force_build + $(MAKE) srbob `cat res/sBEST` pre='s' type=float + +@@ -85,12 +86,14 @@ RunLamch : xemit_lamch + cp res/atlas_?lamch.h $(INCAdir)/. + RunTyp: xemit_typ + $(ATLRUN) $(SYSdir) xemit_typ > $(INCAdir)/atlas_type.h ++endif + + xemit_buildinfo : emit_buildinfo.o + $(XCC) $(XCCFLAGS) -o $@ emit_buildinfo.o + xsyssum : GetSysSum.o + $(XCC) $(XCCFLAGS) -o $@ GetSysSum.o + ++ifndef ATL_NOTUNE + xL1 : time.o L1CacheSize.o + $(KC) $(KCFLAGS) -o $@ L1CacheSize.o time.o + +@@ -125,6 +128,7 @@ smatime.o : $(mySRCdir)/matime.c + $(KC) -c $(KCFLAGS) -DSREAL $(mySRCdir)/matime.c + xmasrch : $(mySRCdir)/masrch.c + $(XCC) $(XCCFLAGS) -o $@ $(mySRCdir)/masrch.c ++endif + + ATL_cputime.c : + cp $(mySRCdir)/ATL_cputime.c . +@@ -143,6 +147,8 @@ emit_buildinfo.o : $(mySRCdir)/emit_buildinfo.c + $(XCC) -c $(XCCFLAGS) $(mySRCdir)/emit_buildinfo.c + GetSysSum.o : $(INCAdir)/atlas_type.h $(mySRCdir)/GetSysSum.c + $(XCC) -c $(XCCFLAGS) $(mySRCdir)/GetSysSum.c ++ ++ifndef ATL_NOTUNE + time.o : $(mySRCdir)/time.c + $(KC) -c $(KCFLAGS) -I./ $(mySRCdir)/time.c + emit_lamch.o : $(mySRCdir)/emit_lamch.c +@@ -155,7 +161,7 @@ findNT.o : $(mySRCdir)/findNT.c + $(KC) -c $(KCFLAGS) $(mySRCdir)/findNT.c + tlb.o : $(mySRCdir)/tlb.c + $(KC) -c $(KCFLAGS) $(mySRCdir)/tlb.c +- ++endif + + + force_build : +-- +2.23.0 + diff --git a/SOURCES/0008-Add-IBM-z15-support.patch b/SOURCES/0008-Add-IBM-z15-support.patch new file mode 100644 index 0000000..526a039 --- /dev/null +++ b/SOURCES/0008-Add-IBM-z15-support.patch @@ -0,0 +1,105 @@ +From d249a8128806d08285eeda00b2a35b62a22236f4 Mon Sep 17 00:00:00 2001 +From: Andreas Arnez +Date: Thu, 26 Mar 2020 17:14:49 +0100 +Subject: [PATCH 8/8] Add IBM z15 support + +Add support for specifying "IBMz15" as target architecture. +--- + CONFIG/include/atlconf.h | 8 ++++---- + CONFIG/src/atlcomp.txt | 4 ++++ + CONFIG/src/backend/archinfo_linux.c | 1 + + CONFIG/src/probe_comp.c | 1 + + include/atlas_prefetch.h | 2 +- + 5 files changed, 11 insertions(+), 5 deletions(-) + +diff --git a/CONFIG/include/atlconf.h b/CONFIG/include/atlconf.h +index 3828fdb..382601f 100644 +--- a/CONFIG/include/atlconf.h ++++ b/CONFIG/include/atlconf.h +@@ -25,11 +25,11 @@ enum ARCHFAM {AFOther=0, AFPPC, AFSPARC, AFALPHA, AFX86, AFIA64, AFMIPS, + * Corei3EP: v3 Haswell, E5-26XX + * Corei4: skylake + */ +-#define NMACH 63 ++#define NMACH 64 + static char *machnam[NMACH] = + {"UNKNOWN", "PPCG4", "PPCG5", "POWER3", "POWER4", "POWER5", + "POWER6", "POWER7", "POWER8", "POWERe6500", +- "IBMz9", "IBMz10", "IBMz196", "IBMz12", "IBMz13", "IBMz14", ++ "IBMz9", "IBMz10", "IBMz196", "IBMz12", "IBMz13", "IBMz14", "IBMz15", + "x86x87", "x86SSE1", "x86SSE2", "x86SSE3", + "P5", "P5MMX", "PPRO", "PII", "PIII", "PM", "CoreSolo", + "CoreDuo", "Core2Solo", "Core2", "Corei1", "Corei2", "Corei3", +@@ -42,7 +42,7 @@ static char *machnam[NMACH] = + "ARM64xgene1", "ARM64a53", "ARM64a57"}; + enum MACHTYPE {MACHOther, PPCG4, PPCG5, IbmPwr3, IbmPwr4, IbmPwr5, + IbmPwr6, IbmPwr7, IbmPwr8, Pwre6500, +- IbmZ9, IbmZ10, IbmZ196, IbmZ12, IbmZ13, IbmZ14, /* s390(x) */ ++ IbmZ9, IbmZ10, IbmZ196, IbmZ12, IbmZ13, IbmZ14, IbmZ15, + x86x87, x86SSE1, x86SSE2, x86SSE3, /* generic targets */ + IntP5, IntP5MMX, IntPPRO, IntPII, IntPIII, IntPM, IntCoreS, + IntCoreDuo, IntCore2Solo, IntCore2, IntCorei1, IntCorei2, +@@ -82,7 +82,7 @@ enum MACHTYPE {MACHOther, PPCG4, PPCG5, IbmPwr3, IbmPwr4, IbmPwr5, + #define MachIsARM64(mach_) \ + ( (mach_) >= ARM64xg && || (mach_) <= ARM64a57) + #define MachIsS390(mach_) \ +- ( (mach_) >= IbmZ9 && (mach_) <= IbmZ14 ) ++ ( (mach_) >= IbmZ9 && (mach_) <= IbmZ15 ) + + + static char *f2c_namestr[5] = {"UNKNOWN","Add_", "Add__", "NoChange", "UpCase"}; +diff --git a/CONFIG/src/atlcomp.txt b/CONFIG/src/atlcomp.txt +index 2cfacc2..acb2c83 100644 +--- a/CONFIG/src/atlcomp.txt ++++ b/CONFIG/src/atlcomp.txt +@@ -254,6 +254,10 @@ MACH=IBMz14 OS=ALL LVL=1000 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc + 'gcc' '-march=z14 -mtune=z14 -O2' + MACH=IBMz14 OS=ALL LVL=1000 COMPS=f77 + 'gfortran' '-march=z14 -mtune=z14 -O2' ++MACH=IBMz15 OS=ALL LVL=1000 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc ++ 'gcc' '-march=arch13 -mtune=arch13 -O2' ++MACH=IBMz15 OS=ALL LVL=1000 COMPS=f77 ++ 'gfortran' '-march=arch13 -mtune=arch13 -O2' + # + # Windows defaults ; need to make SSE/SSE2 arch dep. + # +diff --git a/CONFIG/src/backend/archinfo_linux.c b/CONFIG/src/backend/archinfo_linux.c +index ed6f476..934a005 100644 +--- a/CONFIG/src/backend/archinfo_linux.c ++++ b/CONFIG/src/backend/archinfo_linux.c +@@ -337,6 +337,7 @@ enum MACHTYPE ProbeArch() + else if (strstr(res, "2827") || strstr(res, "2828")) mach = IbmZ12; + else if (strstr(res, "2964") || strstr(res, "2965")) mach = IbmZ13; + else if (strstr(res, "3906") || strstr(res, "3907")) mach = IbmZ14; ++ else if (strstr(res, "8561") || strstr(res, "8562")) mach = IbmZ15; + else mach = IbmZ14; /* looks risky to me, but IBM folks did it */ + free(res); + } +diff --git a/CONFIG/src/probe_comp.c b/CONFIG/src/probe_comp.c +index 857ea82..88bb25e 100644 +--- a/CONFIG/src/probe_comp.c ++++ b/CONFIG/src/probe_comp.c +@@ -1208,6 +1208,7 @@ void GetBestGccVers(enum OSTYPE OS, enum MACHTYPE arch, + case IbmZ12: + case IbmZ13: + case IbmZ14: ++ case IbmZ15: + case IntCorei3: + case IntCorei4: + case IntCorei2: +diff --git a/include/atlas_prefetch.h b/include/atlas_prefetch.h +index fa426ac..583f19d 100644 +--- a/include/atlas_prefetch.h ++++ b/include/atlas_prefetch.h +@@ -156,7 +156,7 @@ + #define ATL_L2LS 64 + #elif defined(ATL_ARCH_IBMz196) || defined(ATL_ARCH_IBMz10) || \ + defined(ATL_ARCH_IBMzEC12) || defined(ATL_ARCH_IBMz13) || \ +- defined(ATL_ARCH_IbmZ14) ++ defined(ATL_ARCH_IbmZ14) || defined(ATL_ARCH_IbmZ15) + #define ATL_pfl1R(mem) __builtin_prefetch(mem, 0, 3) + #define ATL_pfl1W(mem) __builtin_prefetch(mem, 1, 3) + #define ATL_GOT_L1PREFETCH +-- +2.23.0 + diff --git a/SPECS/atlas.spec b/SPECS/atlas.spec index 91d6f0c..409a0f6 100644 --- a/SPECS/atlas.spec +++ b/SPECS/atlas.spec @@ -5,7 +5,7 @@ Version: 3.10.3 %if "%{?enable_native_atlas}" != "0" %define dist .native %endif -Release: 7%{?dist} +Release: 8%{?dist} Summary: Automatically Tuned Linear Algebra Software Group: System Environment/Libraries @@ -31,6 +31,11 @@ Source14: ARMv732NEON.tar.bz2 Source15: IBMz1264.tar.bz2 Source16: ARMa732.tar.bz2 +#provided by IBM (3.10.3-8) +Source17: IBMz1364VXZ.tar.bz2 +Source18: IBMz1464VXZ2.tar.bz2 +Source19: IBMz1564VXZ2.tar.bz2 + # Properly pass -melf_* to the linker with -Wl, fixes FTBFS bug 817552 # https://sourceforge.net/tracker/?func=detail&atid=379484&aid=3555789&group_id=23725 Patch3: atlas-melf.patch @@ -47,7 +52,18 @@ Patch9: atlas.3.10.1-unbundle.patch # Atlas getri patch (covscan) Patch10: atlas-getri.patch -BuildRequires: gcc-gfortran, lapack-static, gcc +# Atlas patches dealing with z{13,14,15} support and crosscompilation provided by IBM +Patch11: 0001-Avoid-c99-standard-compiler.patch +Patch12: 0002-Fix-rpath-link-command-line-options.patch +Patch13: 0003-Fix-SIMD-support-on-IBM-z13.patch +Patch14: 0004-Read-L1-data-cache-size-from-sysconf-if-possible.patch +Patch15: 0005-Optimizations-for-IBM-z13.patch +Patch16: 0006-Add-IBM-z14-support.patch +Patch17: 0007-Enable-cross-compile.patch +Patch18: 0008-Add-IBM-z15-support.patch + + +BuildRequires: gcc-gfortran, lapack-static, gcc, lapack-devel %ifarch x86_64 Obsoletes: atlas-sse3 < 3.10.3-1 @@ -61,8 +77,8 @@ Obsoletes: atlas-sse3 < 3.10.3-1 %endif %ifarch s390 s390x -#Obsoletes: atlas-z10 < 3.10 -#Obsoletes: atlas-z196 < 3.10 +Obsoletes: atlas-z10 < 3.10.3-8 +Obsoletes: atlas-z196 < 3.10.3-8 %endif @@ -100,6 +116,13 @@ Obsoletes: atlas-sse-devel < 3.10.3-1 Obsoletes: atlas-sse2-devel < 3.10.3-1 Obsoletes: atlas-sse3-devel < 3.10.3-1 %endif + + +%ifarch s390 s390x +Obsoletes: atlas-z10-devel < 3.10.3-8 +Obsoletes: atlas-z196-devel < 3.10.3-8 +%endif + %description devel This package contains headers for development with ATLAS (Automatically Tuned Linear Algebra Software). @@ -121,6 +144,11 @@ Obsoletes: atlas-sse-static < 3.10.3-1 Obsoletes: atlas-sse2-static < 3.10.3-1 Obsoletes: atlas-sse3-static < 3.10.3-1 %endif + +%ifarch s390 s390x +Obsoletes: atlas-z10-static < 3.10.3-8 +Obsoletes: atlas-z196-static < 3.10.3-8 +%endif %description static This package contains static version of ATLAS (Automatically Tuned Linear Algebra Software). @@ -133,8 +161,6 @@ Linear Algebra Software). # %ifarch x86_64 %define types base corei2 -#corei4 -# sse3 %package corei2-static Summary: ATLAS libraries for Corei2 (Ivy/Sandy bridge) CPUs @@ -169,114 +195,49 @@ optimizations for the corei2 (Ivy/Sandy bridge) CPUs. %endif %ifarch %{ix86} -%define types base -#corei1 - -#%package corei1 -#Summary: ATLAS libraries for Corei1 (Nehalem/Westmere) CPUs -#Group: System Environment/Libraries - -#%description corei1 -#This package contains ATLAS (Automatically Tuned Linear Algebra Software) -#shared libraries compiled with optimizations for the Corei1 (Nehalem/Westmere) CPUs. -#The base ATLAS builds for the ix86 architecture are made for PIII CPUs. - -#%package corei1-devel -#Summary: Development libraries for ATLAS for Corei1 (Nehalem/Westmere) CPUs -#Group: Development/Libraries -#Requires: %{name}-corei1 = %{version}-%{release} -#Obsoletes: %name-header <= %version-%release -#Requires(posttrans): chkconfig -#Requires(postun): chkconfig - -#%description corei1-devel -#This package contains shared and static versions of the ATLAS -#(Automatically Tuned Linear Algebra Software) libraries compiled with -#optimizations for the corei1 (Nehalem/Westmere) CPUs. - -#%package corei1-static -#Summary: Static libraries for ATLAS for Corei1 (/Nehalem/Westmere) CPUs -#Group: Development/Libraries -#Requires: %{name}-corei1-devel = %{version}-%{release} -#Requires(posttrans): chkconfig -#Requires(postun): chkconfig - -#%description corei1-static -#This package contains the ATLAS (Automatically Tuned Linear Algebra -#Software) static libraries compiled with optimizations for the Corei1 (Nehalem/Westemere) -#CPUs. The base ATLAS builds for the ix86 architecture are made for the PIII CPUs. +%define types base %endif %ifarch s390 s390x -%define types base z196 z10 - -%package z196 -Summary: ATLAS libraries for z196 -Group: System Environment/Libraries - -%description z196 -This package contains the ATLAS (Automatically Tuned Linear Algebra -Software) libraries compiled with optimizations for the z196. +%define types base z14 -%package z196-devel -Summary: Development libraries for ATLAS for z196 -Group: Development/Libraries -Requires: %{name}-z196 = %{version}-%{release} -Obsoletes: %name-z196-header <= %version-%release -Requires(posttrans): chkconfig -Requires(postun): chkconfig - -%description z196-devel -This package contains headers and shared versions of the ATLAS -(Automatically Tuned Linear Algebra Software) libraries compiled with -optimizations for the z196 architecture. - -%package z196-static -Summary: Static libraries for ATLAS -Group: Development/Libraries -Requires: %{name}-z196-devel = %{version}-%{release} -Requires(posttrans): chkconfig -Requires(postun): chkconfig - -%description z196-static -This package contains static version of ATLAS (Automatically Tuned -Linear Algebra Software) for the z196 architecture. +#z14 - -%package z10 -Summary: ATLAS libraries for z10 +%package z14 +Summary: ATLAS libraries for z14 Group: System Environment/Libraries -%description z10 -This package contains the ATLAS (Automatically Tuned Linear Algebra -Software) libraries compiled with optimizations for the z10. +%description z14 +This package contains ATLAS (Automatically Tuned Linear Algebra Software) +shared libraries compiled with optimizations for the z14 CPUs. -%package z10-devel -Summary: Development libraries for ATLAS for z10 +%package z14-devel +Summary: Development libraries for ATLAS for z14 Group: Development/Libraries -Requires: %{name}-z10 = %{version}-%{release} +Requires: %{name}-z14 = %{version}-%{release} Obsoletes: %name-header <= %version-%release Requires(posttrans): chkconfig Requires(postun): chkconfig -%description z10-devel -This package contains headers and shared versions of the ATLAS +%description z14-devel +This package contains shared and static versions of the ATLAS (Automatically Tuned Linear Algebra Software) libraries compiled with -optimizations for the z10 architecture. +optimizations for the z14 CPUs. -%package z10-static -Summary: Static libraries for ATLAS +%package z14-static +Summary: Static libraries for ATLAS for z14 Group: Development/Libraries -Requires: %{name}-devel = %{version}-%{release} +Requires: %{name}-z14-devel = %{version}-%{release} Requires(posttrans): chkconfig Requires(postun): chkconfig -%description z10-static -This package contains static version of ATLAS (Automatically Tuned -Linear Algebra Software) for the z10 architecture. - +%description z14-static +This package contains the ATLAS (Automatically Tuned Linear Algebra +Software) static libraries compiled with optimizations for the z14 +CPUs. +#z15 %endif @@ -356,7 +317,6 @@ CPUs. The base ATLAS builds for the ppc64 architecture are made for the Power 5 %endif %prep -#cat /proc/cpuinfo %setup -q -n ATLAS #patch0 -p0 -b .shared #arm patch not applicable, probably not needed @@ -373,6 +333,19 @@ CPUs. The base ATLAS builds for the ppc64 architecture are made for the Power 5 %patch9 -p1 -b .unbundle %patch10 -p1 -b .getri +#IBM patches +%ifarch s390x s390 +%patch11 -p1 +%patch12 -p1 +%patch13 -p1 +%patch14 -p1 +%patch15 -p1 +%patch16 -p1 +%patch17 -p1 +#As of rhel 8.3, z15 is not supported by the gcc +#%patch18 -p1 +%endif + cp %{SOURCE1} CONFIG/ARCHS/ #cp %{SOURCE2} CONFIG/ARCHS/ cp %{SOURCE3} doc @@ -382,6 +355,13 @@ cp %{SOURCE13} CONFIG/ARCHS/ cp %{SOURCE14} CONFIG/ARCHS/ cp %{SOURCE15} CONFIG/ARCHS/ cp %{SOURCE16} CONFIG/ARCHS/ +#z13 +cp %{SOURCE17} CONFIG/ARCHS/ +#z14 +cp %{SOURCE18} CONFIG/ARCHS/ +#z15 +cp %{SOURCE19} CONFIG/ARCHS/ + #cp %{SOURCE8} CONFIG/ARCHS/ #cp %{SOURCE9} CONFIG/ARCHS/ @@ -400,7 +380,7 @@ mkdir lapacklib cd lapacklib ar x %{_libdir}/liblapack_pic.a # Remove functions that have ATLAS implementations -rm -f cgelqf.o cgels.o cgeqlf.o cgeqrf.o cgerqf.o cgesv.o cgetrf.o cgetri.o cgetrs.o clarfb.o clarft.o clauum.o cposv.o cpotrf.o cpotri.o cpotrs.o ctrtri.o dgelqf.o dgels.o dgeqlf.o dgeqrf.o dgerqf.o dgesv.o dgetrf.o dgetri.o dgetrs.o dlamch.o dlarfb.o dlarft.o dlauum.o dposv.o dpotrf.o dpotri.o dpotrs.o dtrtri.o ieeeck.o ilaenv.o lsame.o sgelqf.o sgels.o sgeqlf.o sgeqrf.o sgerqf.o sgesv.o sgetrf.o sgetri.o sgetrs.o slamch.o slarfb.o slarft.o slauum.o sposv.o spotrf.o spotri.o spotrs.o strtri.o xerbla.o zgelqf.o zgels.o zgeqlf.o zgeqrf.o zgerqf.o zgesv.o zgetrf.o zgetri.o zgetrs.o zlarfb.o zlarft.o zlauum.o zposv.o zpotrf.o zpotri.o zpotrs.o ztrtri.o +rm -f cgelqf.o cgels.o cgeqlf.o cgeqrf.o cgerqf.o cgesv.o cgetrf.o cgetri.o cgetrs.o clarfb.o clarft.o clauum.o cposv.o cpotrf.o cpotri.o cpotrs.o ctrtri.o dgelqf.o dgels.o dgeqlf.o dgeqrf.o dgerqf.o dgesv.o dgetrf.o dgetri.o dgetrs.o dlamch.o dlarfb.o dlarft.o dlauum.o dposv.o dpotrf.o dpotri.o dpotrs.o dtrtri.o ieeeck.o ilaenv.o lsame.o sgelqf.o sgels.o sgeqlf.o sgeqrf.o sgerqf.o sgesv.o sgetrf.o sgetri.o sgetrs.o slamch.o slarfb.o slarft.o slauum.o sposv.o spotrf.o spotri.o spotrs.o strtri.o xerbla.o zgelqf.o zgels.o zgeqlf.o zgeqrf.o zgerqf.o zgesv.o zgetrf.o zgetri.o zgetrs.o zlarfb.o zlarft.o zlauum.o zposv.o zpotrf.o zpotri.o zpotrs.o ztrtri.o # Create new library ar rcs ../liblapack_pic_pruned.a *.o cd .. @@ -420,9 +400,9 @@ p=$(pwd) %define threads_option "-t 2" #Target architectures for the 'base' versions -%ifarch s390x +%ifarch s390x %define flags %{nil} -%define base_options "-A IBMz9 -V 1" +%define base_options "-A IBMz13 -V 8 -Si archdef 2" %endif %ifarch x86_64 @@ -467,10 +447,11 @@ p=$(pwd) %endif for type in %{types}; do + if [ "$type" = "base" ]; then libname=atlas arg_options=%{base_options} - thread_options=%{threads_option} + thread_options=%{threads_option} %define pr_base %(echo $((%{__isa_bits}+0))) else libname=atlas-${type} @@ -481,12 +462,15 @@ for type in %{types}; do elif [ "$type" = "corei1" ]; then arg_options="-A Corei1 -V 896" %define pr_corei1 %(echo $((%{__isa_bits}+2))) - elif [ "$type" = "z10" ]; then - arg_options="-A IBMz10 -V 1" - %define pr_z10 %(echo $((%{__isa_bits}+2))) - elif [ "$type" = "z196" ]; then - arg_options="-A IBMz196 -V 1" - %define pr_z196 %(echo $((%{__isa_bits}+4))) + elif [ "$type" = "z14" ]; then + thread_options="-t 4" + arg_options="-A IBMz14 -V 4 -Si archdef 2" + %define pr_z14 %(echo $((%{__isa_bits}+2))) + #gcc in rhel 8.3 does not support z15, z15 subpackage is thus not being build/shipped + elif [ "$type" = "z15" ]; then + thread_options="-t 0" + arg_options="-A IBMz15 -V 4 -Si archdef 2" + %define pr_z15 %(echo $((%{__isa_bits}+4))) elif [ "$type" = "power7" ]; then thread_options="-t 4" arg_options="-A POWER7 -V 1" @@ -497,12 +481,13 @@ for type in %{types}; do %define pr_power8 %(echo $((%{__isa_bits}+4))) fi fi + mkdir -p %{_arch}_${type} pushd %{_arch}_${type} - ../configure %{mode} $thread_options $arg_options -D c -DWALL -Fa alg '%{flags} -D_FORTIFY_SOURCE=2 -g -Wa,--noexecstack,--generate-missing-build-notes=yes -fstack-protector-strong -fstack-clash-protection -fPIC -fplugin=annobin -Wl,-z,now'\ + ../configure %{mode} $thread_options $arg_options -D c -DWALL -F xc ' ' -Fa alg '%{flags} -D_FORTIFY_SOURCE=2 -g -Wa,--noexecstack,--generate-missing-build-notes=yes -fstack-protector-strong -fstack-clash-protection -fPIC -fplugin=annobin -Wl,-z,now'\ --prefix=%{buildroot}%{_prefix} \ --incdir=%{buildroot}%{_includedir} \ - --libdir=%{buildroot}%{_libdir}/${libname} + --libdir=%{buildroot}%{_libdir}/${libname} #--with-netlib-lapack-tarfile=%{SOURCE10} #matches both SLAPACK and SSLAPACK @@ -526,7 +511,7 @@ for type in %{types}; do popd done -%install +%install for type in %{types}; do pushd %{_arch}_${type} make DESTDIR=%{buildroot} install @@ -567,20 +552,18 @@ mkdir -p %{buildroot}%{_includedir}/atlas %check -# Run make check but don't fail the build on these arches -#%ifarch s390 aarch64 ppc64 -#for type in %{types}; do -# pushd %{_arch}_${type} -# make check ptcheck -# popd -#done -#%else +# Run the check only for the z13. z14/z15 may fail due to illegal instrucitons... for type in %{types}; do - pushd %{_arch}_${type} - make check ptcheck - popd + if [ "$type" = "z14" ] || [ "$type" = "z15" ]; then + # skip the tests (may fail due to illegal instructions). + echo "Skipping tests for the $type subpackage" + else + pushd %{_arch}_${type} + make check ptcheck + popd + fi + done -#%endif %post -p /sbin/ldconfig @@ -632,31 +615,19 @@ fi %ifarch s390 s390x - %post -n atlas-z10 -p /sbin/ldconfig + %post -n atlas-z14 -p /sbin/ldconfig - %postun -n atlas-z10 -p /sbin/ldconfig + %postun -n atlas-z14 -p /sbin/ldconfig - %posttrans z10-devel + %posttrans z14-devel /usr/sbin/alternatives --install %{_includedir}/atlas atlas-inc \ - %{_includedir}/atlas-%{_arch}-z10 %{pr_z10} + %{_includedir}/atlas-%{_arch}-z14 %{pr_z14} - %postun z10-devel + %postun z14-devel if [ $1 -ge 0 ] ; then - /usr/sbin/alternatives --remove atlas-inc %{_includedir}/atlas-%{_arch}-z10 + /usr/sbin/alternatives --remove atlas-inc %{_includedir}/atlas-%{_arch}-z14 fi - - %post -n atlas-z196 -p /sbin/ldconfig - - %postun -n atlas-z196 -p /sbin/ldconfig - - %posttrans z196-devel - /usr/sbin/alternatives --install %{_includedir}/atlas atlas-inc \ - %{_includedir}/atlas-%{_arch}-z196 %{pr_z196} - %postun z196-devel - if [ $1 -ge 0 ] ; then - /usr/sbin/alternatives --remove atlas-inc %{_includedir}/atlas-%{_arch}-z196 - fi %endif @@ -674,7 +645,7 @@ fi if [ $1 -ge 0 ] ; then /usr/sbin/alternatives --remove atlas-inc %{_includedir}/atlas-%{_arch}-power7 fi - + %post -n atlas-ppc8 -p /sbin/ldconfig %postun -n atlas-ppc8 -p /sbin/ldconfig @@ -767,61 +738,45 @@ fi %ifarch %{ix86} -#%files corei1 -#%doc doc/README.dist -#%dir %{_libdir}/atlas-corei1 -#%{_libdir}/atlas-corei1/*.so.* -#%config(noreplace) /etc/ld.so.conf.d/atlas-%{_arch}-corei1.conf - -#%files corei1-devel -#%doc doc -#%{_libdir}/atlas-corei1/*.so -#%{_includedir}/atlas-%{_arch}-corei1/ -#%{_includedir}/*.h -#%ghost %{_includedir}/atlas - -#%files corei1-static -#%{_libdir}/atlas-corei1/*.a %endif %ifarch s390 s390x -%files z10 -%doc doc/README.dist -%dir %{_libdir}/atlas-z10 -%{_libdir}/atlas-z10/*.so.* -%config(noreplace) /etc/ld.so.conf.d/atlas-%{_arch}-z10.conf - -%files z10-devel -%doc doc -%{_libdir}/atlas-z10/*.so -%{_includedir}/atlas-%{_arch}-z10/ -%{_includedir}/*.h -%ghost %{_includedir}/atlas -%files z10-static -%{_libdir}/atlas-z10/*.a -%files z196 +%files z14 %doc doc/README.dist -%dir %{_libdir}/atlas-z196 -%{_libdir}/atlas-z196/*.so.* -%config(noreplace) /etc/ld.so.conf.d/atlas-%{_arch}-z196.conf +%dir %{_libdir}/atlas-z14 +%{_libdir}/atlas-z14/*.so.* +%config(noreplace) /etc/ld.so.conf.d/atlas-%{_arch}-z14.conf -%files z196-devel +%files z14-devel %doc doc -%{_libdir}/atlas-z196/*.so -%{_includedir}/atlas-%{_arch}-z196/ +%{_libdir}/atlas-z14/*.so +%{_includedir}/atlas-%{_arch}-z14/ %{_includedir}/*.h %ghost %{_includedir}/atlas -%files z196-static -%{_libdir}/atlas-z196/*.a +%files z14-static +%{_libdir}/atlas-z14/*.a %endif + #enable_native_atlas if %endif %changelog +* Tue Jun 2 2020 Jakub Martisko - 3.10.3-8 +* Update the s390 subpackages: +- The base pakcage is now optimized to z13 +- New subpackage is introduced: z14 +- All remaining subpackages for s390 are being deprecated +- Clean up of the spec file +- Some commented out subpackages were removed form the spec +- These subpackages were not built/shipped -> no change from the users point of view +Resolves: #1782560 +Resolves: #1780286 +Resolves: #1782561 + * Mon Jun 10 2019 Jakub Martisko - 3.10.3-7 * Fix covscan related issues (getri function) Related: #1602445