From cbd19933e6622ddda684115c7f4abaac6fd11191 Mon Sep 17 00:00:00 2001 From: Nicolas Chauvet Date: Wed, 29 Jan 2020 08:33:29 +0100 Subject: [PATCH] Backport patch for ppc64le --- ...1bd82852808f7fa403e3ee159bd62b1c08cc.patch | 128 ++++++++++++++++++ opencv.spec | 1 + 2 files changed, 129 insertions(+) create mode 100644 bd531bd82852808f7fa403e3ee159bd62b1c08cc.patch diff --git a/bd531bd82852808f7fa403e3ee159bd62b1c08cc.patch b/bd531bd82852808f7fa403e3ee159bd62b1c08cc.patch new file mode 100644 index 0000000..e83aae3 --- /dev/null +++ b/bd531bd82852808f7fa403e3ee159bd62b1c08cc.patch @@ -0,0 +1,128 @@ +From bd531bd82852808f7fa403e3ee159bd62b1c08cc Mon Sep 17 00:00:00 2001 +From: Sayed Adel +Date: Tue, 28 Jan 2020 15:16:48 +0200 +Subject: [PATCH] core:vsx fix inline asm constraints + + generalize constraints to 'wa' for VSX registers +--- + cmake/checks/cpu_vsx_asm.cpp | 2 +- + .../include/opencv2/core/hal/intrin_vsx.hpp | 4 +- + .../core/include/opencv2/core/vsx_utils.hpp | 50 ++++++++----------- + 3 files changed, 25 insertions(+), 31 deletions(-) + +diff --git a/cmake/checks/cpu_vsx_asm.cpp b/cmake/checks/cpu_vsx_asm.cpp +index bb4c25507e3..9c1bf7a946a 100644 +--- a/cmake/checks/cpu_vsx_asm.cpp ++++ b/cmake/checks/cpu_vsx_asm.cpp +@@ -16,6 +16,6 @@ int main() + { + __vector float vf; + __vector signed int vi; +- __asm__ __volatile__ ("xvcvsxwsp %x0,%x1" : "=wf" (vf) : "wa" (vi)); ++ __asm__ __volatile__ ("xvcvsxwsp %x0,%x1" : "=wa" (vf) : "wa" (vi)); + return 0; + } +\ No newline at end of file +diff --git a/modules/core/include/opencv2/core/hal/intrin_vsx.hpp b/modules/core/include/opencv2/core/hal/intrin_vsx.hpp +index bda1d8558f8..6e8b439182f 100644 +--- a/modules/core/include/opencv2/core/hal/intrin_vsx.hpp ++++ b/modules/core/include/opencv2/core/hal/intrin_vsx.hpp +@@ -1338,7 +1338,7 @@ inline v_float32x4 v_load_expand(const float16_t* ptr) + return v_float32x4(vec_extract_fp_from_shorth(vf16)); + #elif CV_VSX3 && !defined(CV_COMPILER_VSX_BROKEN_ASM) + vec_float4 vf32; +- __asm__ __volatile__ ("xvcvhpsp %x0,%x1" : "=wf" (vf32) : "wa" (vec_mergeh(vf16, vf16))); ++ __asm__ __volatile__ ("xvcvhpsp %x0,%x1" : "=wa" (vf32) : "wa" (vec_mergeh(vf16, vf16))); + return v_float32x4(vf32); + #else + const vec_int4 z = vec_int4_z, delta = vec_int4_sp(0x38000000); +@@ -1363,7 +1363,7 @@ inline void v_pack_store(float16_t* ptr, const v_float32x4& v) + // fixme: Is there any builtin op or intrinsic that cover "xvcvsphp"? + #if CV_VSX3 && !defined(CV_COMPILER_VSX_BROKEN_ASM) + vec_ushort8 vf16; +- __asm__ __volatile__ ("xvcvsphp %x0,%x1" : "=wa" (vf16) : "wf" (v.val)); ++ __asm__ __volatile__ ("xvcvsphp %x0,%x1" : "=wa" (vf16) : "wa" (v.val)); + vec_st_l8(vec_mergesqe(vf16, vf16), ptr); + #else + const vec_int4 signmask = vec_int4_sp(0x80000000); +diff --git a/modules/core/include/opencv2/core/vsx_utils.hpp b/modules/core/include/opencv2/core/vsx_utils.hpp +index d7c71406072..bcc97fe5297 100644 +--- a/modules/core/include/opencv2/core/vsx_utils.hpp ++++ b/modules/core/include/opencv2/core/vsx_utils.hpp +@@ -110,9 +110,9 @@ VSX_FINLINE(rt) fnm(const rg& a, const rg& b) { return fn2(a, b); } + #if defined(__GNUG__) && !defined(__clang__) + + // inline asm helper +-#define VSX_IMPL_1RG(rt, rto, rg, rgo, opc, fnm) \ +-VSX_FINLINE(rt) fnm(const rg& a) \ +-{ rt rs; __asm__ __volatile__(#opc" %x0,%x1" : "="#rto (rs) : #rgo (a)); return rs; } ++#define VSX_IMPL_1RG(rt, rg, opc, fnm) \ ++VSX_FINLINE(rt) fnm(const rg& a) \ ++{ rt rs; __asm__ __volatile__(#opc" %x0,%x1" : "=wa" (rs) : "wa" (a)); return rs; } + + #define VSX_IMPL_1VRG(rt, rg, opc, fnm) \ + VSX_FINLINE(rt) fnm(const rg& a) \ +@@ -257,44 +257,38 @@ VSX_REDIRECT_1RG(vec_float4, vec_double2, vec_cvfo, __builtin_vsx_xvcvdpsp) + VSX_REDIRECT_1RG(vec_double2, vec_float4, vec_cvfo, __builtin_vsx_xvcvspdp) + + // converts word and doubleword to double-precision +-#ifdef vec_ctd +-# undef vec_ctd +-#endif +-VSX_IMPL_1RG(vec_double2, wd, vec_int4, wa, xvcvsxwdp, vec_ctdo) +-VSX_IMPL_1RG(vec_double2, wd, vec_uint4, wa, xvcvuxwdp, vec_ctdo) +-VSX_IMPL_1RG(vec_double2, wd, vec_dword2, wi, xvcvsxddp, vec_ctd) +-VSX_IMPL_1RG(vec_double2, wd, vec_udword2, wi, xvcvuxddp, vec_ctd) ++#undef vec_ctd ++VSX_IMPL_1RG(vec_double2, vec_int4, xvcvsxwdp, vec_ctdo) ++VSX_IMPL_1RG(vec_double2, vec_uint4, xvcvuxwdp, vec_ctdo) ++VSX_IMPL_1RG(vec_double2, vec_dword2, xvcvsxddp, vec_ctd) ++VSX_IMPL_1RG(vec_double2, vec_udword2, xvcvuxddp, vec_ctd) + + // converts word and doubleword to single-precision + #undef vec_ctf +-VSX_IMPL_1RG(vec_float4, wf, vec_int4, wa, xvcvsxwsp, vec_ctf) +-VSX_IMPL_1RG(vec_float4, wf, vec_uint4, wa, xvcvuxwsp, vec_ctf) +-VSX_IMPL_1RG(vec_float4, wf, vec_dword2, wi, xvcvsxdsp, vec_ctfo) +-VSX_IMPL_1RG(vec_float4, wf, vec_udword2, wi, xvcvuxdsp, vec_ctfo) ++VSX_IMPL_1RG(vec_float4, vec_int4, xvcvsxwsp, vec_ctf) ++VSX_IMPL_1RG(vec_float4, vec_uint4, xvcvuxwsp, vec_ctf) ++VSX_IMPL_1RG(vec_float4, vec_dword2, xvcvsxdsp, vec_ctfo) ++VSX_IMPL_1RG(vec_float4, vec_udword2, xvcvuxdsp, vec_ctfo) + + // converts single and double precision to signed word + #undef vec_cts +-VSX_IMPL_1RG(vec_int4, wa, vec_double2, wd, xvcvdpsxws, vec_ctso) +-VSX_IMPL_1RG(vec_int4, wa, vec_float4, wf, xvcvspsxws, vec_cts) ++VSX_IMPL_1RG(vec_int4, vec_double2, xvcvdpsxws, vec_ctso) ++VSX_IMPL_1RG(vec_int4, vec_float4, xvcvspsxws, vec_cts) + + // converts single and double precision to unsigned word + #undef vec_ctu +-VSX_IMPL_1RG(vec_uint4, wa, vec_double2, wd, xvcvdpuxws, vec_ctuo) +-VSX_IMPL_1RG(vec_uint4, wa, vec_float4, wf, xvcvspuxws, vec_ctu) ++VSX_IMPL_1RG(vec_uint4, vec_double2, xvcvdpuxws, vec_ctuo) ++VSX_IMPL_1RG(vec_uint4, vec_float4, xvcvspuxws, vec_ctu) + + // converts single and double precision to signed doubleword +-#ifdef vec_ctsl +-# undef vec_ctsl +-#endif +-VSX_IMPL_1RG(vec_dword2, wi, vec_double2, wd, xvcvdpsxds, vec_ctsl) +-VSX_IMPL_1RG(vec_dword2, wi, vec_float4, wf, xvcvspsxds, vec_ctslo) ++#undef vec_ctsl ++VSX_IMPL_1RG(vec_dword2, vec_double2, xvcvdpsxds, vec_ctsl) ++VSX_IMPL_1RG(vec_dword2, vec_float4, xvcvspsxds, vec_ctslo) + + // converts single and double precision to unsigned doubleword +-#ifdef vec_ctul +-# undef vec_ctul +-#endif +-VSX_IMPL_1RG(vec_udword2, wi, vec_double2, wd, xvcvdpuxds, vec_ctul) +-VSX_IMPL_1RG(vec_udword2, wi, vec_float4, wf, xvcvspuxds, vec_ctulo) ++#undef vec_ctul ++VSX_IMPL_1RG(vec_udword2, vec_double2, xvcvdpuxds, vec_ctul) ++VSX_IMPL_1RG(vec_udword2, vec_float4, xvcvspuxds, vec_ctulo) + + // just in case if GCC doesn't define it + #ifndef vec_xl diff --git a/opencv.spec b/opencv.spec index 036ba89..0151f31 100644 --- a/opencv.spec +++ b/opencv.spec @@ -77,6 +77,7 @@ Source4: b624b995ec9c439cbc2e9e6ee940d3a2-v0.1.1f.zip Patch0: opencv-4.1.0-install_3rdparty_licenses.patch Patch1: https://github.com/opencv/opencv/commit/fb3a334bbee3535d508af6510d9903d26ba37d34.patch +Patch2: https://github.com/opencv/opencv/commit/bd531bd82852808f7fa403e3ee159bd62b1c08cc.patch BuildRequires: gcc-c++ BuildRequires: libtool