import atlas-3.10.3-8.el8

c8 imports/c8/atlas-3.10.3-8.el8
CentOS Sources 4 years ago committed by MSVSphere Packaging Team
commit 0b797e2cc4

@ -0,0 +1,11 @@
0e11ec19a521973eaa551954debd112c21479e9c SOURCES/ARMa732.tar.bz2
9398518fe55b4a544278237bc639656e04543c50 SOURCES/ARMv732NEON.tar.bz2
d2f7a62aacdc5091aaa673a311a23f521e5c6486 SOURCES/IBMz1264.tar.bz2
042c0b9df85a9a469e20cf0801f83b03ec40425d SOURCES/IBMz1364VXZ.tar.bz2
352e057319fa7503cd74a0ab81055dc286cc1c45 SOURCES/IBMz1464VXZ2.tar.bz2
0abb8f638b8ffdc13994d533d8a4febcab364f2f SOURCES/IBMz1564VXZ2.tar.bz2
b3ee9bca1510b11c6aa671ba5ba7dff8918ce0cf SOURCES/IBMz932.tar.bz2
43f8d8eaf8cc62bc4665df3550b77e95f3dced22 SOURCES/IBMz964.tar.bz2
c47ac6f00d7bf4ab882e71fa1ab894cc551c77b7 SOURCES/POWER332.tar.bz2
85c00d3190abbe250d46472824b17d9164e3dfc2 SOURCES/PPRO32.tgz
337eef1167030a9440ea645ce0037abfd4b0be4e SOURCES/atlas3.10.3.tar.bz2

11
.gitignore vendored

@ -0,0 +1,11 @@
SOURCES/ARMa732.tar.bz2
SOURCES/ARMv732NEON.tar.bz2
SOURCES/IBMz1264.tar.bz2
SOURCES/IBMz1364VXZ.tar.bz2
SOURCES/IBMz1464VXZ2.tar.bz2
SOURCES/IBMz1564VXZ2.tar.bz2
SOURCES/IBMz932.tar.bz2
SOURCES/IBMz964.tar.bz2
SOURCES/POWER332.tar.bz2
SOURCES/PPRO32.tgz
SOURCES/atlas3.10.3.tar.bz2

@ -0,0 +1,30 @@
From 036562b66fa607152c6c54f0d6d030cd19bfcb7f Mon Sep 17 00:00:00 2001
From: Andreas Arnez <arnez@linux.ibm.com>
Date: Tue, 19 Feb 2019 19:03:52 +0100
Subject: [PATCH 1/8] Avoid c99 standard compiler
When probing for a usable GCC, the existing code already dropped path
names that contained "c89" or "c90", because these compilers don't have
the GCC extensions enabled. This patch also drops names with "c99" in
them.
---
CONFIG/src/atlconf_misc.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/CONFIG/src/atlconf_misc.c b/CONFIG/src/atlconf_misc.c
index 63cb1ef..fb62214 100644
--- a/CONFIG/src/atlconf_misc.c
+++ b/CONFIG/src/atlconf_misc.c
@@ -824,7 +824,8 @@ int CompIsGcc(char *comp)
int i;
cmpname = NameWithoutPath(comp);
- if (strstr(cmpname, "c89") || strstr(cmpname, "c90"))
+ if (strstr(cmpname, "c89") || strstr(cmpname, "c90") ||
+ strstr(cmpname, "c99"))
{
free(cmpname);
return(0);
--
2.23.0

@ -0,0 +1,38 @@
From a8611f5dc19e2c31b810fd2baa31b9cb5fd30d2a Mon Sep 17 00:00:00 2001
From: Andreas Arnez <arnez@linux.ibm.com>
Date: Tue, 19 Feb 2019 19:20:19 +0100
Subject: [PATCH 2/8] Fix -rpath-link command line options
The "-rpath-link" command line options were written in the wrong syntax,
causing errors in the build. This is fixed.
---
makes/Make.lib | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/makes/Make.lib b/makes/Make.lib
index 4ceff02..b322a32 100644
--- a/makes/Make.lib
+++ b/makes/Make.lib
@@ -47,11 +47,11 @@ cshared : fat_cshared
#
LDTRY_WIN:
$(LD) $(LDFLAGS) -shared -soname $(LIBINSTdir)/$(outso) -o $(outso) \
- -rpath-link $(LIBINSTdir) --output-def=$(outdef) \
+ -rpath-link=$(LIBINSTdir) --output-def=$(outdef) \
--whole-archive $(libas) --no-whole-archive $(LIBS)
GCCTRY_WIN:
$(GOODGCC) -shared -o $(outso) -Wl,--output-def=$(outdef) \
- -Wl,"-rpath-link $(LIBINSTdir)" \
+ -Wl,"-rpath-link=$(LIBINSTdir)" \
-Wl,--whole-archive $(libas) -Wl,--no-whole-archive $(LIBS)
GCCTRY_norp_WIN:
$(GOODGCC) -shared -o $(outso) -Wl,--output-def=$(outdef) \
@@ -113,7 +113,7 @@ TRYALL_WIN :
#
LDTRY:
$(LD) $(LDFLAGS) -shared -soname $(LIBINSTdir)/$(outso) -o $(outso) \
- -rpath-link $(LIBINSTdir) \
+ -rpath-link=$(LIBINSTdir) \
--whole-archive $(libas) --no-whole-archive $(LIBS)
GCCTRY:
$(GOODGCC) -shared -o $(outso).$(so_ver) \

@ -0,0 +1,55 @@
From 999efd5370b33e8b02d9370eda3d454e08fc9d15 Mon Sep 17 00:00:00 2001
From: Andreas Arnez <arnez@linux.ibm.com>
Date: Wed, 5 Dec 2018 18:59:15 +0100
Subject: [PATCH 3/8] Fix SIMD support on IBM z13
The header file atlas_simd.h contained a syntax error and a few functional
errors that affected IBM z13. It prevented any SIMD kernels from being
compiled successfully for that platform. This is fixed. The macro
vec_madd is avoided, because some GCC versions don't implement it
correctly; the equivalent GCC builtin __builtin_s390_vec_madd is used
instead.
---
include/atlas_simd.h | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/include/atlas_simd.h b/include/atlas_simd.h
index baee6b1..68daf79 100644
--- a/include/atlas_simd.h
+++ b/include/atlas_simd.h
@@ -69,7 +69,7 @@
#define ATL_FRCGNUVEC
#endif
#elif defined(ATL_VXZ)
- #if ATL_VLEN != 2;
+ #if ATL_VLEN != 2
#define ATL_FRCGNUVEC
#endif
#elif defined(ATL_NEON)
@@ -390,19 +390,19 @@
#define ATL_vld(v_, p_) v_ = vec_ld2f(p_);
#define ATL_vst(p_, v_) vec_st2f(v_, p_);
#endif
- #define ATL_vzero(v_) v_ = vec_splats((TYPE)0.0)
+ #define ATL_vzero(v_) v_ = vec_splats((double)0.0)
#define ATL_vcopy(d_, s_) d_ = s_
- #define ATL_vbcast(v_, p_) v_ = vec_splats(*((TYPE*)(p_)))
+ #define ATL_vbcast(v_, p_) v_ = vec_splats((double)*((TYPE*)(p_)))
#define ATL_vuld(v_, p_) ATL_vld(v_, p_)
#define ATL_vust(p_, v_) ATL_vst(p_, v_)
#define ATL_vadd(d_, s1_, s2_) d_ = s1_ + s2_
#define ATL_vsub(d_, s1_, s2_) d_ = s1_ - s2_
#define ATL_vmul(d_, s1_, s2_) d_ = s1_ * s2_
- #define ATL_vmac(d_, s1_, s2_) d_ = vec_madd(s1_, s2_, d_)
+ #define ATL_vmac(d_, s1_, s2_) d_ = __builtin_s390_vec_madd(s1_, s2_, d_)
#define ATL_vvrsum1(s0_) \
{ ATL_VTYPE t_;\
t_ = vec_splat(s0_, 1); \
- s0 += t_; \
+ s0_ += t_; \
}
#define ATL_vsplat0(d_, s_) d_ = vec_splat(s_, 0)
#define ATL_vsplat1(d_, s_) d_ = vec_splat(s_, 1)
--
2.23.0

@ -0,0 +1,46 @@
From a45cebf11522b3112fba3d682224a232ae5e2e98 Mon Sep 17 00:00:00 2001
From: Andreas Arnez <arnez@linux.ibm.com>
Date: Wed, 12 Dec 2018 19:44:32 +0100
Subject: [PATCH 4/8] Read L1 data cache size from sysconf if possible
The probing of the L1 data cache size is sometimes not reliable. This can
cause the tuning to yield varying, sub-obtimal results. But on Linux the
L1 data cache size can usually be retrieved with sysconf instead, which is
faster and more reliable. Do this whenever possible.
---
tune/sysinfo/L1CacheSize.c | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/tune/sysinfo/L1CacheSize.c b/tune/sysinfo/L1CacheSize.c
index e62a273..dffa76e 100644
--- a/tune/sysinfo/L1CacheSize.c
+++ b/tune/sysinfo/L1CacheSize.c
@@ -30,6 +30,7 @@
#include <stdio.h>
#include <stdlib.h>
+#include <unistd.h>
#define REPS 4096
@@ -276,7 +277,16 @@ int main(int nargs, char *args[])
exit(-1);
}
if (nargs > 1) MaxSize = atoi(args[1]);
- L1Size = GetL1Size(MaxSize, 1.08);
+
+#ifdef _SC_LEVEL1_DCACHE_SIZE
+ {
+ long res = sysconf(_SC_LEVEL1_DCACHE_SIZE);
+ L1Size = res > 0 ? (int) (res / 1024) : 0;
+ }
+#endif
+
+ if (!L1Size)
+ L1Size = GetL1Size(MaxSize, 1.08);
if (!L1Size)
L1Size = GetL1Size(MaxSize, 1.08);
if (!L1Size)
--
2.23.0

@ -0,0 +1,68 @@
From ad278554860b0da7d5848262a7bf35e058266cb1 Mon Sep 17 00:00:00 2001
From: Andreas Arnez <arnez@linux.ibm.com>
Date: Wed, 12 Dec 2018 20:06:27 +0100
Subject: [PATCH 5/8] Optimizations for IBM z13
Perform some optimizations for IBM z13:
- Compile with -O2 instead of -O.
- Streamline vector loads/stores.
- Define the vvrsum2 macro.
Also, use the compile option -march=z13 instead of -march=native.
---
CONFIG/src/atlcomp.txt | 8 +++-----
include/atlas_simd.h | 11 +++++------
2 files changed, 8 insertions(+), 11 deletions(-)
diff --git a/CONFIG/src/atlcomp.txt b/CONFIG/src/atlcomp.txt
index aa31604..2ac71cf 100644
--- a/CONFIG/src/atlcomp.txt
+++ b/CONFIG/src/atlcomp.txt
@@ -246,12 +246,10 @@ MACH=IBMz9,IBMz10,IBMz196 OS=ALL LVL=500 COMPS=f77
'gfortran' '-O3 -funroll-loops'
MACH=IBMz9,IBMz10,IBMz196,IBMz12 OS=ALL LVL=500 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc
'gcc' '-O3 -funroll-loops'
-MACH=IBMz13 OS=ALL LVL=1000 COMPS=dmc,skc,dkc,icc,xcc,gcc
- 'gcc' '-march=native -O -mvx -mzvector'
-MACH=IBMz13 OS=ALL LVL=1000 COMPS=smc
- 'gcc' '-march=native -O -mvx -mzvector -fno-peephole -fno-peephole2'
+MACH=IBMz13 OS=ALL LVL=1000 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc
+ 'gcc' '-march=z13 -mtune=z13 -O2'
MACH=IBMz13 OS=ALL LVL=1000 COMPS=f77
- 'gfortran' '-march=native -O -mvx -mzvector'
+ 'gfortran' '-march=z13 -mtune=z13 -O2'
#
# Windows defaults ; need to make SSE/SSE2 arch dep.
#
diff --git a/include/atlas_simd.h b/include/atlas_simd.h
index 68daf79..f171933 100644
--- a/include/atlas_simd.h
+++ b/include/atlas_simd.h
@@ -384,8 +384,8 @@
#endif
#define ATL_VTYPE vector double
#if (defined(DREAL) || defined(DCPLX))
- #define ATL_vld(v_, p_) {v_[0] = *(p_); v_[1] = (p_)[1]; }
- #define ATL_vst(p_, v_) {*(p_) = v_[0]; (p_)[1] = v_[1];}
+ #define ATL_vld(v_, p_) v_ = *(ATL_VTYPE *)(p_)
+ #define ATL_vst(p_, v_) *(ATL_VTYPE *)(p_) = v_
#else
#define ATL_vld(v_, p_) v_ = vec_ld2f(p_);
#define ATL_vst(p_, v_) vec_st2f(v_, p_);
@@ -400,10 +400,9 @@
#define ATL_vmul(d_, s1_, s2_) d_ = s1_ * s2_
#define ATL_vmac(d_, s1_, s2_) d_ = __builtin_s390_vec_madd(s1_, s2_, d_)
#define ATL_vvrsum1(s0_) \
- { ATL_VTYPE t_;\
- t_ = vec_splat(s0_, 1); \
- s0_ += t_; \
- }
+ { s0_ = vec_mergeh(s0_, s0_) + vec_mergel(s0_, s0_); }
+ #define ATL_vvrsum2(s0_, s1_) \
+ { s0_ = vec_mergeh(s0_, s1_) + vec_mergel(s0_, s1_); }
#define ATL_vsplat0(d_, s_) d_ = vec_splat(s_, 0)
#define ATL_vsplat1(d_, s_) d_ = vec_splat(s_, 1)
#elif defined(ATL_NEON) && (defined(SREAL) || defined(SCPLX))
--
2.23.0

@ -0,0 +1,276 @@
From dce732e9fe47b44d1a985d10a0eb97aac6afa28e Mon Sep 17 00:00:00 2001
From: Andreas Arnez <arnez@linux.ibm.com>
Date: Wed, 25 Mar 2020 20:11:19 +0100
Subject: [PATCH 6/8] Add IBM z14 support
Add general support for IBM z14. Also detect and handle the vector
enhancements facility 1, which specifically adds single-precision FP
arithmetic for vectors.
---
CONFIG/include/atlconf.h | 14 ++++----
CONFIG/src/Makefile | 6 ++++
CONFIG/src/atlcomp.txt | 4 +++
CONFIG/src/backend/Make.ext | 4 ++-
CONFIG/src/backend/archinfo_linux.c | 3 +-
CONFIG/src/backend/probe_vxz2.c | 12 +++++++
CONFIG/src/probe_comp.c | 3 +-
include/atlas_prefetch.h | 3 +-
include/atlas_simd.h | 53 +++++++++++++++++++++++++++++
9 files changed, 91 insertions(+), 11 deletions(-)
create mode 100644 CONFIG/src/backend/probe_vxz2.c
diff --git a/CONFIG/include/atlconf.h b/CONFIG/include/atlconf.h
index e51d56d..3828fdb 100644
--- a/CONFIG/include/atlconf.h
+++ b/CONFIG/include/atlconf.h
@@ -25,11 +25,11 @@ enum ARCHFAM {AFOther=0, AFPPC, AFSPARC, AFALPHA, AFX86, AFIA64, AFMIPS,
* Corei3EP: v3 Haswell, E5-26XX
* Corei4: skylake
*/
-#define NMACH 62
+#define NMACH 63
static char *machnam[NMACH] =
{"UNKNOWN", "PPCG4", "PPCG5", "POWER3", "POWER4", "POWER5",
"POWER6", "POWER7", "POWER8", "POWERe6500",
- "IBMz9", "IBMz10", "IBMz196", "IBMz12", "IBMz13",
+ "IBMz9", "IBMz10", "IBMz196", "IBMz12", "IBMz13", "IBMz14",
"x86x87", "x86SSE1", "x86SSE2", "x86SSE3",
"P5", "P5MMX", "PPRO", "PII", "PIII", "PM", "CoreSolo",
"CoreDuo", "Core2Solo", "Core2", "Corei1", "Corei2", "Corei3",
@@ -42,7 +42,7 @@ static char *machnam[NMACH] =
"ARM64xgene1", "ARM64a53", "ARM64a57"};
enum MACHTYPE {MACHOther, PPCG4, PPCG5, IbmPwr3, IbmPwr4, IbmPwr5,
IbmPwr6, IbmPwr7, IbmPwr8, Pwre6500,
- IbmZ9, IbmZ10, IbmZ196, IbmZ12, IbmZ13, /* s390(x) in Linux */
+ IbmZ9, IbmZ10, IbmZ196, IbmZ12, IbmZ13, IbmZ14, /* s390(x) */
x86x87, x86SSE1, x86SSE2, x86SSE3, /* generic targets */
IntP5, IntP5MMX, IntPPRO, IntPII, IntPIII, IntPM, IntCoreS,
IntCoreDuo, IntCore2Solo, IntCore2, IntCorei1, IntCorei2,
@@ -82,7 +82,7 @@ enum MACHTYPE {MACHOther, PPCG4, PPCG5, IbmPwr3, IbmPwr4, IbmPwr5,
#define MachIsARM64(mach_) \
( (mach_) >= ARM64xg && || (mach_) <= ARM64a57)
#define MachIsS390(mach_) \
- ( (mach_) >= IbmZ9 && (mach_) <= IbmZ13 )
+ ( (mach_) >= IbmZ9 && (mach_) <= IbmZ14 )
static char *f2c_namestr[5] = {"UNKNOWN","Add_", "Add__", "NoChange", "UpCase"};
@@ -96,13 +96,13 @@ enum F2CNAME {f2c_NamErr=0, f2c_Add_, f2c_Add__, f2c_NoChange, f2c_UpCase};
enum F2CINT {f2c_IntErr=0, FintCint, FintClong, FintClonglong, FintCshort};
enum F2CSTRING {f2c_StrErr=0, fstrSun, fstrCray, fstrStructVal, fstrStructPtr};
-#define NISA 15
+#define NISA 16
static char *ISAXNAM[NISA] =
- {"", "VSX", "VXZ", "AltiVec",
+ {"", "VSX", "VXZ2", "VXZ", "AltiVec",
"AVXMAC", "AVXFMA4", "AVX", "SSE3", "SSE2", "SSE1", "3DNow",
"FPV3D2MACNEON", "FPV3D16MACNEON", "FPV3D32MAC", "FPV3D16MAC"};
enum ISAEXT
- {ISA_None=0, ISA_VSX, ISA_VXZ, ISA_AV,
+ {ISA_None=0, ISA_VSX, ISA_VXZ2, ISA_VXZ, ISA_AV,
ISA_AVXMAC, ISA_AVXFMA4, ISA_AVX, ISA_SSE3, ISA_SSE2, ISA_SSE1, ISA_3DNow,
ISA_NEON, ISA_NEON16, ISA_VFP3D32MAC, ISA_VFP3D16MAC};
diff --git a/CONFIG/src/Makefile b/CONFIG/src/Makefile
index 212b9d7..782a4cf 100644
--- a/CONFIG/src/Makefile
+++ b/CONFIG/src/Makefile
@@ -158,6 +158,12 @@ IRun_NEON :
$(MAKE) $(atlrun) atldir=$(mydir) exe=xprobe_neon args="$(args)" \
redir=config0.out
- cat config0.out
+IRun_VXZ2 :
+ $(CC) $(CCFLAGS) -march=native -mvx -mzvector -o xprobe_vxz2 \
+ $(SRCdir)/backend/probe_svec.c $(SRCdir)/backend/probe_vxz2.c
+ $(MAKE) $(atlrun) atldir=$(mydir) exe=xprobe_vxz2 args="$(args)" \
+ redir=config0.out
+ - cat config0.out
IRun_VXZ :
$(CC) $(CCFLAGS) -march=native -mvx -mzvector -o xprobe_vxz \
$(SRCdir)/backend/probe_dvec.c $(SRCdir)/backend/probe_vxz.c
diff --git a/CONFIG/src/atlcomp.txt b/CONFIG/src/atlcomp.txt
index 2ac71cf..2cfacc2 100644
--- a/CONFIG/src/atlcomp.txt
+++ b/CONFIG/src/atlcomp.txt
@@ -250,6 +250,10 @@ MACH=IBMz13 OS=ALL LVL=1000 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc
'gcc' '-march=z13 -mtune=z13 -O2'
MACH=IBMz13 OS=ALL LVL=1000 COMPS=f77
'gfortran' '-march=z13 -mtune=z13 -O2'
+MACH=IBMz14 OS=ALL LVL=1000 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc
+ 'gcc' '-march=z14 -mtune=z14 -O2'
+MACH=IBMz14 OS=ALL LVL=1000 COMPS=f77
+ 'gfortran' '-march=z14 -mtune=z14 -O2'
#
# Windows defaults ; need to make SSE/SSE2 arch dep.
#
diff --git a/CONFIG/src/backend/Make.ext b/CONFIG/src/backend/Make.ext
index 4743353..794babf 100644
--- a/CONFIG/src/backend/Make.ext
+++ b/CONFIG/src/backend/Make.ext
@@ -39,7 +39,7 @@ files = archinfo_aix.c archinfo_freebsd.c archinfo_irix.c archinfo_linux.c \
probe_gas_mips.S probe_gas_parisc.S probe_gas_ppc.S probe_gas_s390.S \
probe_gas_sparc.S probe_gas_wow64.S probe_gas_x8632.S \
probe_gas_x8664.S probe_smac.c probe_svec.c probe_this_asm.c \
- probe_vxz.c
+ probe_vxz2.c probe_vxz.c
all : $(files)
@@ -107,6 +107,8 @@ flibchkF.f : $(basf)
$(extF) -b $(basf) -o flibchkF.f rout=flibchkF.f
probe_arm32_FPABI.c : $(basf)
$(extC) -b $(basf) -o probe_arm32_FPABI.c rout=probe_arm32_FPABI
+probe_vxz2.c : $(basf)
+ $(extC) -b $(basf) -o probe_vxz2.c rout=probe_vxz2
probe_vxz.c : $(basf)
$(extC) -b $(basf) -o probe_vxz.c rout=probe_vxz
probe_aff_SETAFFNP.c : $(basf)
diff --git a/CONFIG/src/backend/archinfo_linux.c b/CONFIG/src/backend/archinfo_linux.c
index cdcee92..ed6f476 100644
--- a/CONFIG/src/backend/archinfo_linux.c
+++ b/CONFIG/src/backend/archinfo_linux.c
@@ -336,7 +336,8 @@ enum MACHTYPE ProbeArch()
else if (strstr(res, "2817") || strstr(res, "2818")) mach = IbmZ196;
else if (strstr(res, "2827") || strstr(res, "2828")) mach = IbmZ12;
else if (strstr(res, "2964") || strstr(res, "2965")) mach = IbmZ13;
- else mach = IbmZ13; /* looks risky to me, but IBM folks did it */
+ else if (strstr(res, "3906") || strstr(res, "3907")) mach = IbmZ14;
+ else mach = IbmZ14; /* looks risky to me, but IBM folks did it */
free(res);
}
break;
diff --git a/CONFIG/src/backend/probe_vxz2.c b/CONFIG/src/backend/probe_vxz2.c
new file mode 100644
index 0000000..a69d92d
--- /dev/null
+++ b/CONFIG/src/backend/probe_vxz2.c
@@ -0,0 +1,12 @@
+#include <vecintrin.h>
+void do_vsum(float *z, float *x, float *y) // RETURNS: z = x + y
+{
+ vector float vx, vy;
+ vx = (vector float) {x[0], x[1], x[2], x[3]};
+ vy = (vector float) {y[0], y[1], y[2], y[3]};
+ vy += vx;
+ z[0] = vy[0];
+ z[1] = vy[1];
+ z[2] = vy[2];
+ z[3] = vy[3];
+}
diff --git a/CONFIG/src/probe_comp.c b/CONFIG/src/probe_comp.c
index 1652e24..857ea82 100644
--- a/CONFIG/src/probe_comp.c
+++ b/CONFIG/src/probe_comp.c
@@ -452,7 +452,7 @@ COMPNODE **GetDefaultComps(enum OSTYPE OS, enum MACHTYPE arch, int verb,
vp = "-mavx2 -mfma";
else if (vecexts & (1<<ISA_VSX))
vp = "-mvsx";
- else if (vecexts & (1<<ISA_VXZ))
+ else if ((vecexts & (1<<ISA_VXZ)) || (vecexts & (1<<ISA_VXZ2)))
vp = "-mvx -mzvector";
else if (vecexts & (1<<ISA_AV))
vp = "-maltivec";
@@ -1207,6 +1207,7 @@ void GetBestGccVers(enum OSTYPE OS, enum MACHTYPE arch,
{
case IbmZ12:
case IbmZ13:
+ case IbmZ14:
case IntCorei3:
case IntCorei4:
case IntCorei2:
diff --git a/include/atlas_prefetch.h b/include/atlas_prefetch.h
index e7988a7..fa426ac 100644
--- a/include/atlas_prefetch.h
+++ b/include/atlas_prefetch.h
@@ -155,7 +155,8 @@
#define ATL_L1LS 32
#define ATL_L2LS 64
#elif defined(ATL_ARCH_IBMz196) || defined(ATL_ARCH_IBMz10) || \
- defined(ATL_ARCH_IBMzEC12) || defined(ATL_ARCH_IBMz13)
+ defined(ATL_ARCH_IBMzEC12) || defined(ATL_ARCH_IBMz13) || \
+ defined(ATL_ARCH_IbmZ14)
#define ATL_pfl1R(mem) __builtin_prefetch(mem, 0, 3)
#define ATL_pfl1W(mem) __builtin_prefetch(mem, 1, 3)
#define ATL_GOT_L1PREFETCH
diff --git a/include/atlas_simd.h b/include/atlas_simd.h
index f171933..eb75577 100644
--- a/include/atlas_simd.h
+++ b/include/atlas_simd.h
@@ -68,6 +68,11 @@
((defined(DREAL) || defined(DCPLX)) && ATL_VLEN != 2)
#define ATL_FRCGNUVEC
#endif
+ #elif defined(ATL_VXZ2)
+ #if ((defined(SREAL) || defined(SCPLX)) && ATL_VLEN != 4) || \
+ ((defined(DREAL) || defined(DCPLX)) && ATL_VLEN != 2)
+ #define ATL_FRCGNUVEC
+ #endif
#elif defined(ATL_VXZ)
#if ATL_VLEN != 2
#define ATL_FRCGNUVEC
@@ -113,6 +118,12 @@
#else
#define ATL_VLEN 2
#endif
+ #elif defined(ATL_VXZ2)
+ #if defined(SREAL) || defined(SCPLX)
+ #define ATL_VLEN 4
+ #else
+ #define ATL_VLEN 2
+ #endif
#elif defined(ATL_VXZ)
#define ATL_VLEN 2
#elif defined(ATL_NEON)
@@ -376,6 +387,48 @@
#define ATL_vsplat0(d_, s_) d_ = vec_splat(s_, 0)
#define ATL_vsplat1(d_, s_) d_ = vec_splat(s_, 1)
#endif
+#elif defined(ATL_VXZ2)
+ #include <vecintrin.h>
+
+ #define ATL_VPERMI(s_, t_, i_) \
+ ((ATL_VTYPE) vec_permi((vector double) s_, (vector double) t_, i_))
+
+ #if defined(SREAL) || defined(SCPLX)
+ #define ATL_VTYPE vector float
+ #if ATL_VLEN != 4
+ #error "VSXZ2 supports only VLEN = 4 for floats!"
+ #endif
+ #define ATL_vvrsum4(s0_, s1_, s2_, s3_) \
+ { ATL_VTYPE t0_, t1_; \
+ t0_ = vec_mergeh(s0_, s1_) + vec_mergel(s0_, s1_); \
+ t1_ = vec_mergeh(s2_, s3_) + vec_mergel(s2_, s3_); \
+ s0_ = ATL_VPERMI(t0_, t1_, 0) + ATL_VPERMI(t0_, t1_, 3); \
+ }
+ #define ATL_vsplat2(d_, s_) d_ = vec_splat(s_, 2)
+ #define ATL_vsplat3(d_, s_) d_ = vec_splat(s_, 3)
+ #else /* double precision */
+ #define ATL_VTYPE vector double
+ #if ATL_VLEN != 2
+ #error "VSXZ2 supports only VLEN = 2 for doubles!"
+ #endif
+ #define ATL_vvrsum1(s0_) \
+ { s0_ = vec_mergeh(s0_, s0_) + vec_mergel(s0_, s0_); }
+ #define ATL_vvrsum2(s0_, s1_) \
+ { s0_ = vec_mergeh(s0_, s1_) + vec_mergel(s0_, s1_); }
+ #endif
+ #define ATL_vld(v_, p_) v_ = *(ATL_VTYPE *)(p_)
+ #define ATL_vst(p_, v_) *(ATL_VTYPE *)(p_) = v_
+ #define ATL_vzero(v_) v_ = vec_splats((TYPE)0.0)
+ #define ATL_vcopy(d_, s_) d_ = s_
+ #define ATL_vbcast(v_, p_) v_ = vec_splats(*((TYPE*)(p_)))
+ #define ATL_vuld(v_, p_) v_ = vec_xl(0, (TYPE *)(p_))
+ #define ATL_vust(p_, v_) vec_xst(v_, 0, (TYPE *)(p_))
+ #define ATL_vadd(d_, s1_, s2_) d_ = s1_ + s2_
+ #define ATL_vsub(d_, s1_, s2_) d_ = s1_ - s2_
+ #define ATL_vmul(d_, s1_, s2_) d_ = s1_ * s2_
+ #define ATL_vmac(d_, s1_, s2_) d_ = __builtin_s390_vec_madd(s1_, s2_, d_)
+ #define ATL_vsplat0(d_, s_) d_ = vec_splat(s_, 0)
+ #define ATL_vsplat1(d_, s_) d_ = vec_splat(s_, 1)
#elif defined(ATL_VXZ)
#include <vecintrin.h>
--
2.23.0

@ -0,0 +1,265 @@
From 14e717c4367c04570863220c3faf5ce41dabbf05 Mon Sep 17 00:00:00 2001
From: Andreas Arnez <arnez@linux.ibm.com>
Date: Wed, 29 May 2019 17:51:34 +0200
Subject: [PATCH 7/8] Enable "cross-compile"
This adds support for building ATLAS without running any target code. In
order for this to work, the archdefs must contain some additional files
that would otherwise be built during various tuning steps; see the new
targets extra_get and extra_put in "CONFIG/ARCHS/Makefile".
Even if the archdefs contain these additional files, cross compilation
is *not* automatically enabled. To activate it and disable tuning at
build time, add the option "-Si archdef 2" when running "configure".
---
CONFIG/ARCHS/Makefile | 24 ++++++++++++++++++++++++
bin/atlas_install.c | 2 ++
makes/Make.aux | 10 +++++-----
makes/Make.bin | 22 ++++++++++++++++++++++
makes/Make.l3tune | 6 ++++++
makes/Make.sysinfo | 8 +++++++-
6 files changed, 66 insertions(+), 6 deletions(-)
diff --git a/CONFIG/ARCHS/Makefile b/CONFIG/ARCHS/Makefile
index 321e05c..e61b5a0 100644
--- a/CONFIG/ARCHS/Makefile
+++ b/CONFIG/ARCHS/Makefile
@@ -211,3 +211,27 @@ ArchNew : $(mach) xnegflt
- cp $(BLDdir)/bin/INSTALL_LOG/?PerfSumm.txt $(adefd)/.
rm -f xnegflt
archput : sys_put kern_put gemm_put la_put
+
+ifdef ATL_NOTUNE
+
+# To avoid tuning, some extra files are needed.
+
+extra_get :
+ - cp $(INCAdir)/atlas_type.h $(adefd)/kern/
+ - cp $(INCAdir)/atlas_[sdcz]sysinfo.h $(adefd)/kern/
+ - cp $(INCAdir)/atlas_[sd]lamch.h $(adefd)/kern/
+ - cp $(INCAdir)/atlas_[sdcz]trsmXover.h $(adefd)/kern/
+ - cp $(INCAdir)/atlas_[sdcz]syr*NX.h $(adefd)/kern/
+
+extra_put :
+ - cp $(adefd)/kern/atlas_type.h $(INCAdir)/.
+ - cp $(adefd)/kern/atlas_[sdcz]sysinfo.h $(INCAdir)/.
+ - cp $(adefd)/kern/atlas_[sd]lamch.h $(INCAdir)/.
+ - cp $(adefd)/kern/atlas_[sdcz]trsmXover.h $(INCAdir)/.
+ - cp $(adefd)/kern/atlas_[sdcz]syr*NX.h $(INCAdir)/.
+
+ArchNew : extra_get
+
+archput : extra_put
+
+endif
diff --git a/bin/atlas_install.c b/bin/atlas_install.c
index de3eb3a..3c811e6 100644
--- a/bin/atlas_install.c
+++ b/bin/atlas_install.c
@@ -697,6 +697,8 @@ void GoToTown(int ARCHDEF, int L1DEF, int TuneLA)
ATL_Cassert(system("make IBozoL1.grd\n")==0,
"USING BOZO L1 DEFAULTS", NULL);
}
+ if (ARCHDEF >= 2)
+ setenv("ATL_NOTUNE", "1", 1);
if (ARCHDEF)
DefInstall = !system("make IArchDef.grd\n");
diff --git a/makes/Make.aux b/makes/Make.aux
index 1f769c8..c793028 100644
--- a/makes/Make.aux
+++ b/makes/Make.aux
@@ -113,23 +113,23 @@ clean :
$(ATLFWAIT) :
cd $(BINdir) ; $(MAKE) xatlas_waitfile
-$(INCAdir)/atlas_type.h : $(ATLFWAIT)
+$(INCAdir)/atlas_type.h : | $(ATLFWAIT)
cd $(SYSdir) ; $(MAKE) $(INCAdir)/atlas_type.h
$(ATLFWAIT) -f $(INCAdir)/atlas_type.h
sINCdep = $(INCAdir)/atlas_ssysinfo.h $(INCAdir)/atlas_type.h
-$(INCAdir)/atlas_ssysinfo.h : $(ATLFWAIT)
+$(INCAdir)/atlas_ssysinfo.h : | $(ATLFWAIT)
cd $(SYSdir) ; $(MAKE) $(INCAdir)/atlas_ssysinfo.h
$(ATLFWAIT) -f $(INCAdir)/atlas_ssysinfo.h
dINCdep = $(INCAdir)/atlas_dsysinfo.h $(INCAdir)/atlas_type.h
-$(INCAdir)/atlas_dsysinfo.h : $(ATLFWAIT)
+$(INCAdir)/atlas_dsysinfo.h : | $(ATLFWAIT)
cd $(SYSdir) ; $(MAKE) $(INCAdir)/atlas_dsysinfo.h
$(ATLFWAIT) -f $(INCAdir)/atlas_dsysinfo.h
cINCdep = $(INCAdir)/atlas_csysinfo.h $(INCAdir)/atlas_type.h
-$(INCAdir)/atlas_csysinfo.h : $(ATLFWAIT)
+$(INCAdir)/atlas_csysinfo.h : | $(ATLFWAIT)
cd $(SYSdir) ; $(MAKE) $(INCAdir)/atlas_csysinfo.h
$(ATLFWAIT) -f $(INCAdir)/atlas_csysinfo.h
zINCdep = $(INCAdir)/atlas_zsysinfo.h $(INCAdir)/atlas_type.h
-$(INCAdir)/atlas_zsysinfo.h : $(ATLFWAIT)
+$(INCAdir)/atlas_zsysinfo.h : | $(ATLFWAIT)
cd $(SYSdir) ; $(MAKE) $(INCAdir)/atlas_zsysinfo.h
$(ATLFWAIT) -f $(INCAdir)/atlas_zsysinfo.h
diff --git a/makes/Make.bin b/makes/Make.bin
index 1035cb9..acad578 100644
--- a/makes/Make.bin
+++ b/makes/Make.bin
@@ -163,7 +163,9 @@ IRunMADef :
cd $(SYSdir) ; $(MAKE) RunMADef pre=$(pre)
IRunMMDef :
+ifndef ATL_NOTUNE
cd $(MMTdir) ; $(MAKE) RunMMDef pre=$(pre)
+endif
cd $(MMTdir) ; ./xemit_mm -p $(pre) -R -2
cd $(MMTdir) ; $(MAKE) install pre=$(pre)
IKillL1 : force_build
@@ -303,22 +305,42 @@ INSTALL_LOG/$(pre)bestTT_$(nb)x$(nb)x$(nb) : \
cp $(MMTdir)/res/$(pre)bestTT_$(nb)x$(nb)x$(nb) INSTALL_LOG/.
$(R1Tdir)/res/$(pre)R2K.sum : $(R1Tdir)/res/$(pre)R1K.sum force_build
+ifdef ATL_NOTUNE
+ cd $(R1Tdir) ; $(MAKE) $(pre)r2install
+else
cd $(R1Tdir) ; $(MAKE) res/$(pre)R2K.sum pre=$(pre)
+endif
$(R1Tdir)/res/$(pre)R1K.sum : force_build
+ifdef ATL_NOTUNE
+ cd $(R1Tdir) ; $(MAKE) $(pre)r1install
+else
cd $(R1Tdir) ; $(MAKE) res/$(pre)R1K.sum pre=$(pre)
+endif
INSTALL_LOG/$(pre)R1K.sum : $(R1Tdir)/res/$(pre)R1K.sum
cp $(R1Tdir)/res/$(pre)R1K.sum INSTALL_LOG/.
INSTALL_LOG/$(pre)R2K.sum : INSTALL_LOG/$(pre)R1K.sum \
$(R1Tdir)/res/$(pre)R2K.sum
cp $(R1Tdir)/res/$(pre)R2K.sum INSTALL_LOG/.
+ifndef ATL_NOTUNE
cd $(R1Tdir) ; $(MAKE) $(pre)nxtune
+else
+ cd $(BLDdir)/src/blas/reference/level2 ; make $(pre)lib
+endif
$(MVTdir)/res/$(pre)MVNK.sum : force_build
+ifdef ATL_NOTUNE
+ cd $(MVTdir) ; $(MAKE) $(pre)mvninstall
+else
cd $(MVTdir) ; $(MAKE) res/$(pre)MVNK.sum pre=$(pre)
+endif
INSTALL_LOG/$(pre)MVNK.sum : $(MVTdir)/res/$(pre)MVNK.sum
cp $(MVTdir)/res/$(pre)MVNK.sum INSTALL_LOG/.
$(MVTdir)/res/$(pre)MVTK.sum : force_build
+ifdef ATL_NOTUNE
+ cd $(MVTdir) ; $(MAKE) $(pre)mvtinstall
+else
cd $(MVTdir) ; $(MAKE) res/$(pre)MVTK.sum pre=$(pre)
+endif
INSTALL_LOG/$(pre)MVTK.sum : $(MVTdir)/res/$(pre)MVTK.sum
cp $(MVTdir)/res/$(pre)MVTK.sum INSTALL_LOG/.
diff --git a/makes/Make.l3tune b/makes/Make.l3tune
index eaf7d7d..cd7f5f1 100644
--- a/makes/Make.l3tune
+++ b/makes/Make.l3tune
@@ -118,6 +118,7 @@ res/atlas_strsmXover.h :
cp $(strsmXover) res/.
stsmfc :
+ifndef ATL_NOTUNE
rm -f $(strsmXover)
cd $(L3Bdir) ; $(MAKE) slib
$(MAKE) xstsmfc2 pre=s typ=SREAL side=$(side) uplo=Upper_ \
@@ -128,6 +129,7 @@ stsmfc :
tran=NoTranspose_ diag=$(diag)
$(MAKE) xstsmfc2 pre=s typ=SREAL side=$(side) uplo=Lower_ \
tran=Transpose_ diag=$(diag)
+endif
cd $(L3Bdir) ; $(MAKE) slib
dtrsmXover = $(INCAdir)/atlas_dtrsmXover.h
@@ -138,6 +140,7 @@ res/atlas_dtrsmXover.h :
cp $(dtrsmXover) res/.
dtsmfc :
+ifndef ATL_NOTUNE
rm -f $(dtrsmXover)
cd $(L3Bdir) ; $(MAKE) dlib
$(MAKE) xdtsmfc2 pre=d typ=DREAL side=$(side) uplo=Upper_ \
@@ -148,6 +151,7 @@ dtsmfc :
tran=NoTranspose_ diag=$(diag)
$(MAKE) xdtsmfc2 pre=d typ=DREAL side=$(side) uplo=Lower_ \
tran=Transpose_ diag=$(diag)
+endif
cd $(L3Bdir) ; $(MAKE) dlib
qtrsmXover = $(INCAdir)/atlas_qtrsmXover.h
@@ -158,6 +162,7 @@ res/atlas_qtrsmXover.h :
cp $(qtrsmXover) res/.
qtsmfc :
+ifndef ATL_NOTUNE
rm -f $(qtrsmXover)
cd $(L3Bdir) ; $(MAKE) qlib
$(MAKE) xqtsmfc2 pre=q typ=QREAL side=$(side) uplo=Upper_ \
@@ -168,6 +173,7 @@ qtsmfc :
tran=NoTranspose_ diag=$(diag)
$(MAKE) xqtsmfc2 pre=q typ=QREAL side=$(side) uplo=Lower_ \
tran=Transpose_ diag=$(diag)
+endif
cd $(L3Bdir) ; $(MAKE) qlib
$(pre)tsmfc.o : force_build
diff --git a/makes/Make.sysinfo b/makes/Make.sysinfo
index 2b7dfdc..8e5dab2 100644
--- a/makes/Make.sysinfo
+++ b/makes/Make.sysinfo
@@ -5,6 +5,7 @@ maxlat=6
mflop=200
flags=
+ifndef ATL_NOTUNE
sTestFlags : force_build
$(MAKE) srbob `cat res/sBEST` pre='s' type=float
@@ -85,12 +86,14 @@ RunLamch : xemit_lamch
cp res/atlas_?lamch.h $(INCAdir)/.
RunTyp: xemit_typ
$(ATLRUN) $(SYSdir) xemit_typ > $(INCAdir)/atlas_type.h
+endif
xemit_buildinfo : emit_buildinfo.o
$(XCC) $(XCCFLAGS) -o $@ emit_buildinfo.o
xsyssum : GetSysSum.o
$(XCC) $(XCCFLAGS) -o $@ GetSysSum.o
+ifndef ATL_NOTUNE
xL1 : time.o L1CacheSize.o
$(KC) $(KCFLAGS) -o $@ L1CacheSize.o time.o
@@ -125,6 +128,7 @@ smatime.o : $(mySRCdir)/matime.c
$(KC) -c $(KCFLAGS) -DSREAL $(mySRCdir)/matime.c
xmasrch : $(mySRCdir)/masrch.c
$(XCC) $(XCCFLAGS) -o $@ $(mySRCdir)/masrch.c
+endif
ATL_cputime.c :
cp $(mySRCdir)/ATL_cputime.c .
@@ -143,6 +147,8 @@ emit_buildinfo.o : $(mySRCdir)/emit_buildinfo.c
$(XCC) -c $(XCCFLAGS) $(mySRCdir)/emit_buildinfo.c
GetSysSum.o : $(INCAdir)/atlas_type.h $(mySRCdir)/GetSysSum.c
$(XCC) -c $(XCCFLAGS) $(mySRCdir)/GetSysSum.c
+
+ifndef ATL_NOTUNE
time.o : $(mySRCdir)/time.c
$(KC) -c $(KCFLAGS) -I./ $(mySRCdir)/time.c
emit_lamch.o : $(mySRCdir)/emit_lamch.c
@@ -155,7 +161,7 @@ findNT.o : $(mySRCdir)/findNT.c
$(KC) -c $(KCFLAGS) $(mySRCdir)/findNT.c
tlb.o : $(mySRCdir)/tlb.c
$(KC) -c $(KCFLAGS) $(mySRCdir)/tlb.c
-
+endif
force_build :
--
2.23.0

@ -0,0 +1,105 @@
From d249a8128806d08285eeda00b2a35b62a22236f4 Mon Sep 17 00:00:00 2001
From: Andreas Arnez <arnez@linux.ibm.com>
Date: Thu, 26 Mar 2020 17:14:49 +0100
Subject: [PATCH 8/8] Add IBM z15 support
Add support for specifying "IBMz15" as target architecture.
---
CONFIG/include/atlconf.h | 8 ++++----
CONFIG/src/atlcomp.txt | 4 ++++
CONFIG/src/backend/archinfo_linux.c | 1 +
CONFIG/src/probe_comp.c | 1 +
include/atlas_prefetch.h | 2 +-
5 files changed, 11 insertions(+), 5 deletions(-)
diff --git a/CONFIG/include/atlconf.h b/CONFIG/include/atlconf.h
index 3828fdb..382601f 100644
--- a/CONFIG/include/atlconf.h
+++ b/CONFIG/include/atlconf.h
@@ -25,11 +25,11 @@ enum ARCHFAM {AFOther=0, AFPPC, AFSPARC, AFALPHA, AFX86, AFIA64, AFMIPS,
* Corei3EP: v3 Haswell, E5-26XX
* Corei4: skylake
*/
-#define NMACH 63
+#define NMACH 64
static char *machnam[NMACH] =
{"UNKNOWN", "PPCG4", "PPCG5", "POWER3", "POWER4", "POWER5",
"POWER6", "POWER7", "POWER8", "POWERe6500",
- "IBMz9", "IBMz10", "IBMz196", "IBMz12", "IBMz13", "IBMz14",
+ "IBMz9", "IBMz10", "IBMz196", "IBMz12", "IBMz13", "IBMz14", "IBMz15",
"x86x87", "x86SSE1", "x86SSE2", "x86SSE3",
"P5", "P5MMX", "PPRO", "PII", "PIII", "PM", "CoreSolo",
"CoreDuo", "Core2Solo", "Core2", "Corei1", "Corei2", "Corei3",
@@ -42,7 +42,7 @@ static char *machnam[NMACH] =
"ARM64xgene1", "ARM64a53", "ARM64a57"};
enum MACHTYPE {MACHOther, PPCG4, PPCG5, IbmPwr3, IbmPwr4, IbmPwr5,
IbmPwr6, IbmPwr7, IbmPwr8, Pwre6500,
- IbmZ9, IbmZ10, IbmZ196, IbmZ12, IbmZ13, IbmZ14, /* s390(x) */
+ IbmZ9, IbmZ10, IbmZ196, IbmZ12, IbmZ13, IbmZ14, IbmZ15,
x86x87, x86SSE1, x86SSE2, x86SSE3, /* generic targets */
IntP5, IntP5MMX, IntPPRO, IntPII, IntPIII, IntPM, IntCoreS,
IntCoreDuo, IntCore2Solo, IntCore2, IntCorei1, IntCorei2,
@@ -82,7 +82,7 @@ enum MACHTYPE {MACHOther, PPCG4, PPCG5, IbmPwr3, IbmPwr4, IbmPwr5,
#define MachIsARM64(mach_) \
( (mach_) >= ARM64xg && || (mach_) <= ARM64a57)
#define MachIsS390(mach_) \
- ( (mach_) >= IbmZ9 && (mach_) <= IbmZ14 )
+ ( (mach_) >= IbmZ9 && (mach_) <= IbmZ15 )
static char *f2c_namestr[5] = {"UNKNOWN","Add_", "Add__", "NoChange", "UpCase"};
diff --git a/CONFIG/src/atlcomp.txt b/CONFIG/src/atlcomp.txt
index 2cfacc2..acb2c83 100644
--- a/CONFIG/src/atlcomp.txt
+++ b/CONFIG/src/atlcomp.txt
@@ -254,6 +254,10 @@ MACH=IBMz14 OS=ALL LVL=1000 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc
'gcc' '-march=z14 -mtune=z14 -O2'
MACH=IBMz14 OS=ALL LVL=1000 COMPS=f77
'gfortran' '-march=z14 -mtune=z14 -O2'
+MACH=IBMz15 OS=ALL LVL=1000 COMPS=smc,dmc,skc,dkc,icc,xcc,gcc
+ 'gcc' '-march=arch13 -mtune=arch13 -O2'
+MACH=IBMz15 OS=ALL LVL=1000 COMPS=f77
+ 'gfortran' '-march=arch13 -mtune=arch13 -O2'
#
# Windows defaults ; need to make SSE/SSE2 arch dep.
#
diff --git a/CONFIG/src/backend/archinfo_linux.c b/CONFIG/src/backend/archinfo_linux.c
index ed6f476..934a005 100644
--- a/CONFIG/src/backend/archinfo_linux.c
+++ b/CONFIG/src/backend/archinfo_linux.c
@@ -337,6 +337,7 @@ enum MACHTYPE ProbeArch()
else if (strstr(res, "2827") || strstr(res, "2828")) mach = IbmZ12;
else if (strstr(res, "2964") || strstr(res, "2965")) mach = IbmZ13;
else if (strstr(res, "3906") || strstr(res, "3907")) mach = IbmZ14;
+ else if (strstr(res, "8561") || strstr(res, "8562")) mach = IbmZ15;
else mach = IbmZ14; /* looks risky to me, but IBM folks did it */
free(res);
}
diff --git a/CONFIG/src/probe_comp.c b/CONFIG/src/probe_comp.c
index 857ea82..88bb25e 100644
--- a/CONFIG/src/probe_comp.c
+++ b/CONFIG/src/probe_comp.c
@@ -1208,6 +1208,7 @@ void GetBestGccVers(enum OSTYPE OS, enum MACHTYPE arch,
case IbmZ12:
case IbmZ13:
case IbmZ14:
+ case IbmZ15:
case IntCorei3:
case IntCorei4:
case IntCorei2:
diff --git a/include/atlas_prefetch.h b/include/atlas_prefetch.h
index fa426ac..583f19d 100644
--- a/include/atlas_prefetch.h
+++ b/include/atlas_prefetch.h
@@ -156,7 +156,7 @@
#define ATL_L2LS 64
#elif defined(ATL_ARCH_IBMz196) || defined(ATL_ARCH_IBMz10) || \
defined(ATL_ARCH_IBMzEC12) || defined(ATL_ARCH_IBMz13) || \
- defined(ATL_ARCH_IbmZ14)
+ defined(ATL_ARCH_IbmZ14) || defined(ATL_ARCH_IbmZ15)
#define ATL_pfl1R(mem) __builtin_prefetch(mem, 0, 3)
#define ATL_pfl1W(mem) __builtin_prefetch(mem, 1, 3)
#define ATL_GOT_L1PREFETCH
--
2.23.0

@ -0,0 +1,47 @@
Notes on the packaged version of ATLAS
by Quentin Spencer
updated: October 4, 2005
updated by Deji Akingunola
October 15, 2008
updated by Deji Akingunola
June 15, 2011
updated by Frantisek Kluknavsky
Nov 20, 2012
Because ATLAS relies on compile-time optimizations to obtain improved
performance over BLAS and LAPACK, the resulting binaries are closely
tied to the hardware on which they are compiled, and can likely result
in very poor performance on other hardware. For this reason,
including a package like ATLAS in Fedora requires some compromises.
Optimizing ATLAS for the most modern hardware can result in
significant performance penalties for users using the same package on
older hardware. A binary ATLAS package must perform reasonably well on the
entire range of hardware on which it could potentially be installed.
The result is a set of libraries that will not
necessarily achieve optimal performance on any given hardware but
should still offer significant performance gains over the reference
BLAS and LAPACK libraries on most hardware.
In addition to the base 32bit build, subpackages are built for SSE, SSE2,
and SSE3 ix86 extensions.
On 64bit x86 systems the default atlas package was built with SSE3
optimization.
This packaging allows multiple installation of different atlas sub-packages
at the same time. The alternatives system (read 'man alternatives' for usage)
is used in the -devel subpackages to select the appropriate location for the
architectural dependent header files.
For users who want optimal performance on
particular hardware, custom RPMs can be built from the source package
by setting the RPM macro "enable_native_atlas" to a value of 1. This
can be done from the command line as in the following example:
rpmbuild -D "enable_native_atlas 1" --rebuild atlas-3.8.3-1.src.rpm

@ -0,0 +1,14 @@
diff --git a/include/atlas_genparse.h b/include/atlas_genparse.h
index 909a38e..1e6d153 100644
--- a/include/atlas_genparse.h
+++ b/include/atlas_genparse.h
@@ -163,7 +163,8 @@ static int GetDoubleArr(char *str, int N, double *d)
if (!str)
break;
str++;
- assert(sscanf(str, "%le", d+i) == 1);
+ if (sscanf(str, "%le", d+i) != 1)
+ break;
i++;
}
return(i);

@ -0,0 +1,12 @@
diff --git a/src/testing/ATL_f77getri.c b/src/testing/ATL_f77getri.c
index 2cc576c..7ff8eba 100644
--- a/src/testing/ATL_f77getri.c
+++ b/src/testing/ATL_f77getri.c
@@ -97,7 +97,6 @@ int f77getri(const enum ATLAS_ORDER Order, const int N, TYPE *A, const int lda,
#ifdef ATL_FunkyInts
*lwork = F77lwork;
for (i=0; i < MN; i++) ipiv[i] = F77ipiv[i] + 1;
- free(F77ipiv);
#else
for (i=0; i < MN; i++) ipiv[i]++;
#endif

@ -0,0 +1,16 @@
diff --git a/CONFIG/src/SpewMakeInc.c b/CONFIG/src/SpewMakeInc.c
index eed259e..65d68a1 100644
--- a/CONFIG/src/SpewMakeInc.c
+++ b/CONFIG/src/SpewMakeInc.c
@@ -764,9 +764,9 @@ int main(int nargs, char **args)
else
{
if (ptrbits == 32)
- fprintf(fpout, " -melf_i386");
+ fprintf(fpout, " -Wl,-melf_i386");
else if (ptrbits == 64)
- fprintf(fpout, " -melf_x86_64");
+ fprintf(fpout, " -Wl,-melf_x86_64");
if (OS == OSFreeBSD)
fprintf(fpout, "_fbsd");
}

@ -0,0 +1,40 @@
From 3119c671c566761a79ac98405cb619892acde3e8 Mon Sep 17 00:00:00 2001
From: Lukas Slebodnik <lslebodn@redhat.com>
Date: Fri, 20 Sep 2013 09:26:58 +0200
Subject: [PATCH] atlas-shared_libraries
---
ATLAS/makes/Make.lib | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/ATLAS/makes/Make.lib b/ATLAS/makes/Make.lib
index ab1eb9963d36678972a0a410905169aaa563dc64..27c6e316b442e09b0f46afac7940aaa11e25e45c 100644
--- a/ATLAS/makes/Make.lib
+++ b/ATLAS/makes/Make.lib
@@ -4,6 +4,8 @@ mySRCdir = $(SRCdir)/lib
#
# override with libatlas.so only when atlas is built to one lib
#
+so_ver_major=3
+so_ver = $(so_ver_major).10
DYNlibs = liblapack.so libf77blas.so libcblas.so libatlas.so
PTDYNlibs = liblapack.so libptf77blas.so libptcblas.so libatlas.so
CDYNlibs = liblapack.so libcblas.so libatlas.so
@@ -116,9 +116,12 @@ LDTRY:
-rpath-link $(LIBINSTdir) \
--whole-archive $(libas) --no-whole-archive $(LIBS)
GCCTRY:
- $(GOODGCC) -shared -o $(outso) \
- -Wl,"-rpath-link $(LIBINSTdir)" \
+ $(GOODGCC) -shared -o $(outso).$(so_ver) \
+ \
+ -Wl,-soname,"$(outso).$(so_ver_major)" \
-Wl,--whole-archive $(libas) -Wl,--no-whole-archive $(LIBS)
+ ln -s $(outso).$(so_ver) $(outso).$(so_ver_major)
+ ln -s $(outso).$(so_ver) $(outso)
GCCTRY_norp:
$(GOODGCC) -shared -o $(outso) \
-Wl,--whole-archive $(libas) -Wl,--no-whole-archive $(LIBS)
--
1.8.3.1

@ -0,0 +1,12 @@
diff -up ATLAS/CONFIG/src/config.c.zaloha ATLAS/CONFIG/src/config.c
--- ATLAS/CONFIG/src/config.c.zaloha 2012-10-25 11:29:02.495425989 +0200
+++ ATLAS/CONFIG/src/config.c 2012-10-25 11:42:10.218216957 +0200
@@ -711,6 +711,8 @@ int ProbePtrbits(int verb, char *targarg
int ProbeCPUThrottle(int verb, char *targarg, enum OSTYPE OS, enum ASMDIA asmb)
{
+ return 0; /* impossible to turn off cpu throttling => ignore */
+ /* this undermines performance of compiled library */
int i, iret;
char *ln;
i = strlen(targarg) + 22 + 12;

@ -0,0 +1,17 @@
diff -up wrk/makes/Make.lib.wrk wrk/makes/Make.lib
--- wrk/makes/Make.lib.wrk 2015-01-23 21:14:46.465494411 +0100
+++ wrk/makes/Make.lib 2015-01-23 22:48:39.632479588 +0100
@@ -185,11 +185,11 @@ TRYALL :
#
fat_ptshared : # threaded target
$(MAKE) TRYALL outso=libtatlas.so \
- libas="libptlapack.a libptf77blas.a libptcblas.a libatlas.a" \
+ libas="libptlapack.a libptf77blas.a libptcblas.a libatlas.a $(SLAPACKlib)" \
LIBINSTdir="$(LIBINSTdir)"
fat_shared : # serial target
$(MAKE) TRYALL outso=libsatlas.so \
- libas="liblapack.a libf77blas.a libcblas.a libatlas.a" \
+ libas="liblapack.a libf77blas.a libcblas.a libatlas.a $(SLAPACKlib)" \
LIBINSTdir="$(LIBINSTdir)"
#
# Builds shared lib, not include fortran codes from LAPACK

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save