You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ffmpeg/ffmpeg-cpu.patch

542 lines
18 KiB

diff -up ffmpeg-20080908/configure.cpu ffmpeg-20080908/configure
--- ffmpeg-20080908/configure.cpu 2008-09-02 06:04:26.000000000 +0200
+++ ffmpeg-20080908/configure 2008-11-01 23:05:08.000000000 +0100
@@ -122,8 +122,12 @@ show_help(){
echo " instruction selection, may crash on older CPUs)"
echo " --enable-powerpc-perf enable performance report on PPC"
echo " (requires enabling PMC)"
+ echo " --disable-amd3dnow disable amd3dnow! usage"
+ echo " --disable-amd3dnowex disable amd3dnowEx! usage"
echo " --disable-mmx disable MMX usage"
echo " --disable-mmx2 disable MMX2 usage"
+ echo " --disable-sse disable SSE usage"
+ echo " --disable-sse2 disable SSE2 usage"
echo " --disable-ssse3 disable SSSE3 usage"
echo " --disable-armv5te disable armv5te usage"
echo " --disable-armv6 disable armv6 usage"
@@ -719,6 +723,8 @@ ARCH_LIST='
'
ARCH_EXT_LIST='
+ amd3dnow
+ amd3dnowex
altivec
armv5te
armv6
@@ -728,6 +734,8 @@ ARCH_EXT_LIST='
mmx
mmx2
neon
+ sse
+ sse2
ssse3
vis
'
@@ -830,6 +838,8 @@ CMDLINE_SET="
# code dependency declarations
# architecture extensions
+amd3dnow_deps="x86"
+amd3dnowex_deps="x86 amd3dnow"
altivec_deps="powerpc"
armv5te_deps="armv4l"
armv6_deps="armv4l"
@@ -839,7 +849,9 @@ mmi_deps="mips"
mmx_deps="x86"
mmx2_deps="x86 mmx"
neon_deps="armv4l"
-ssse3_deps="x86"
+sse_deps="x86"
+sse2_deps="x86 sse"
+ssse3_deps="x86 sse2"
vis_deps="sparc"
# system headers and functions
@@ -1954,7 +1968,13 @@ fi
echo "big-endian ${bigendian-no}"
if test $arch = "x86_32" -o $arch = "x86_64"; then
echo "yasm ${yasm-no}"
+ echo "3DNow! enabled ${amd3dnow-no}"
+ echo "3DNowEx! enabled ${amd3dnowex-no}"
echo "MMX enabled ${mmx-no}"
+ echo "MMX2 enabled ${mmx2-no}"
+ echo "SSE enabled ${sse-no}"
+ echo "SSE2 enabled ${sse2-no}"
+ echo "SSSE3 enabled ${ssse3-no}"
echo "CMOV enabled ${cmov-no}"
echo "CMOV is fast ${fast_cmov-no}"
echo "EBX available ${ebx_available-no}"
diff -up ffmpeg-20080908/libavcodec/fft.c.cpu ffmpeg-20080908/libavcodec/fft.c
--- ffmpeg-20080908/libavcodec/fft.c.cpu 2008-08-12 02:38:30.000000000 +0200
+++ ffmpeg-20080908/libavcodec/fft.c 2008-11-01 16:23:33.000000000 +0100
@@ -91,24 +91,30 @@ int ff_fft_init(FFTContext *s, int nbits
s->imdct_half = ff_imdct_half_c;
s->exptab1 = NULL;
-#if defined HAVE_MMX && defined HAVE_YASM
+#if (defined HAVE_MMX || defined HAVE_SSE || defined HAVE_AMD3DNOW) && defined HAVE_YASM
has_vectors = mm_support();
if (has_vectors & MM_SSE) {
+#ifdef HAVE_SSE
/* SSE for P3/P4/K8 */
s->imdct_calc = ff_imdct_calc_sse;
s->imdct_half = ff_imdct_half_sse;
s->fft_permute = ff_fft_permute_sse;
s->fft_calc = ff_fft_calc_sse;
+#endif
} else if (has_vectors & MM_3DNOWEXT) {
+#ifdef HAVE_AMD3DNOWEX
/* 3DNowEx for K7 */
s->imdct_calc = ff_imdct_calc_3dn2;
s->imdct_half = ff_imdct_half_3dn2;
s->fft_calc = ff_fft_calc_3dn2;
+#endif
} else if (has_vectors & MM_3DNOW) {
+#ifdef HAVE_AMD3DNOW
/* 3DNow! for K6-2/3 */
s->imdct_calc = ff_imdct_calc_3dn;
s->imdct_half = ff_imdct_half_3dn;
s->fft_calc = ff_fft_calc_3dn;
+#endif
}
#elif defined HAVE_ALTIVEC && !defined ALTIVEC_USE_REFERENCE_C_CODE
has_vectors = mm_support();
diff -up ffmpeg-20080908/libavcodec/Makefile.cpu ffmpeg-20080908/libavcodec/Makefile
--- ffmpeg-20080908/libavcodec/Makefile.cpu 2008-09-02 06:04:26.000000000 +0200
+++ ffmpeg-20080908/libavcodec/Makefile 2008-11-01 16:23:33.000000000 +0100
@@ -383,6 +388,12 @@ OBJS += imgresample.o
endif
# processor-specific code
+ifdef HAVE_AMD3DNOW
+OBJS-$(HAVE_YASM) += i386/fft_3dn.o
+endif
+ifdef HAVE_AMD3DNOWEX
+OBJS-$(HAVE_YASM) += i386/fft_3dn2.o
+endif
ifdef HAVE_MMX
OBJS += i386/fdct_mmx.o \
i386/cpuid.o \
@@ -391,12 +402,8 @@ OBJS += i386/fdct_mmx.o \
i386/motion_est_mmx.o \
i386/simple_idct_mmx.o \
i386/idct_mmx_xvid.o \
- i386/idct_sse2_xvid.o \
OBJS-$(HAVE_YASM) += i386/fft_mmx.o \
- i386/fft_sse.o \
- i386/fft_3dn.o \
- i386/fft_3dn2.o \
i386/dsputil_yasm.o \
OBJS-$(CONFIG_GPL) += i386/idct_mmx.o
@@ -414,6 +426,12 @@ OBJS-$(CONFIG_VP6A_DECODER) +
OBJS-$(CONFIG_VP6F_DECODER) += i386/vp3dsp_mmx.o i386/vp3dsp_sse2.o
OBJS-$(CONFIG_WMV3_DECODER) += i386/vc1dsp_mmx.o
endif
+ifdef HAVE_SSE
+OBJS-$(HAVE_YASM) += i386/fft_sse.o
+endif
+ifdef HAVE_SSE2
+OBJS += i386/idct_sse2_xvid.o
+endif
ASM_OBJS-$(ARCH_ARMV4L) += armv4l/jrevdct_arm.o \
armv4l/simple_idct_arm.o \
diff -up ffmpeg-20080908/libpostproc/postprocess.c.cpu ffmpeg-20080908/libpostproc/postprocess.c
--- ffmpeg-20080908/libpostproc/postprocess.c.cpu 2008-09-01 20:00:53.000000000 +0200
+++ ffmpeg-20080908/libpostproc/postprocess.c 2008-11-01 16:23:33.000000000 +0100
@@ -83,7 +83,7 @@ try to unroll inner for(x=0 ... loop to
#include <malloc.h>
#endif
//#undef HAVE_MMX2
-//#define HAVE_3DNOW
+//#define HAVE_AMD3DNOW
//#undef HAVE_MMX
//#undef ARCH_X86
//#define DEBUG_BRIGHTNESS
@@ -567,7 +567,7 @@ static av_always_inline void do_a_debloc
#if defined(ARCH_X86)
-#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
+#if (defined (HAVE_MMX) && !defined (HAVE_AMD3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
#define COMPILE_MMX
#endif
@@ -575,20 +575,20 @@ static av_always_inline void do_a_debloc
#define COMPILE_MMX2
#endif
-#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
+#if (defined (HAVE_AMD3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
#define COMPILE_3DNOW
#endif
#endif /* defined(ARCH_X86) */
#undef HAVE_MMX
#undef HAVE_MMX2
-#undef HAVE_3DNOW
+#undef HAVE_AMD3DNOW
#undef HAVE_ALTIVEC
#ifdef COMPILE_C
#undef HAVE_MMX
#undef HAVE_MMX2
-#undef HAVE_3DNOW
+#undef HAVE_AMD3DNOW
#define RENAME(a) a ## _C
#include "postprocess_template.c"
#endif
@@ -606,7 +606,7 @@ static av_always_inline void do_a_debloc
#undef RENAME
#define HAVE_MMX
#undef HAVE_MMX2
-#undef HAVE_3DNOW
+#undef HAVE_AMD3DNOW
#define RENAME(a) a ## _MMX
#include "postprocess_template.c"
#endif
@@ -616,7 +616,7 @@ static av_always_inline void do_a_debloc
#undef RENAME
#define HAVE_MMX
#define HAVE_MMX2
-#undef HAVE_3DNOW
+#undef HAVE_AMD3DNOW
#define RENAME(a) a ## _MMX2
#include "postprocess_template.c"
#endif
@@ -626,7 +626,7 @@ static av_always_inline void do_a_debloc
#undef RENAME
#define HAVE_MMX
#undef HAVE_MMX2
-#define HAVE_3DNOW
+#define HAVE_AMD3DNOW
#define RENAME(a) a ## _3DNow
#include "postprocess_template.c"
#endif
@@ -665,7 +665,7 @@ static inline void postProcess(const uin
#else //RUNTIME_CPUDETECT
#ifdef HAVE_MMX2
postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
-#elif defined (HAVE_3DNOW)
+#elif defined (HAVE_AMD3DNOW)
postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
#elif defined (HAVE_MMX)
postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
diff -up ffmpeg-20080908/libpostproc/postprocess_template.c.cpu ffmpeg-20080908/libpostproc/postprocess_template.c
--- ffmpeg-20080908/libpostproc/postprocess_template.c.cpu 2008-05-09 13:56:36.000000000 +0200
+++ ffmpeg-20080908/libpostproc/postprocess_template.c 2008-11-01 16:23:33.000000000 +0100
@@ -33,7 +33,7 @@
#ifdef HAVE_MMX2
#define REAL_PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
-#elif defined (HAVE_3DNOW)
+#elif defined (HAVE_AMD3DNOW)
#define REAL_PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
#endif
#define PAVGB(a,b) REAL_PAVGB(a,b)
@@ -179,7 +179,7 @@ static inline int RENAME(vertClassify)(u
#ifndef HAVE_ALTIVEC
static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
{
-#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#if defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW)
src+= stride*3;
asm volatile( //"movv %0 %1 %2\n\t"
"movq %2, %%mm0 \n\t" // QP,..., QP
@@ -306,7 +306,7 @@ static inline void RENAME(doVertLowPass)
: "r" (src), "r" ((long)stride), "m" (c->pQPb)
: "%"REG_a, "%"REG_c
);
-#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#else //defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW)
const int l1= stride;
const int l2= stride + l1;
const int l3= stride + l2;
@@ -345,7 +345,7 @@ static inline void RENAME(doVertLowPass)
src++;
}
-#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#endif //defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW)
}
#endif //HAVE_ALTIVEC
@@ -364,7 +364,7 @@ static inline void RENAME(doVertLowPass)
*/
static inline void RENAME(vertRK1Filter)(uint8_t *src, int stride, int QP)
{
-#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#if defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW)
src+= stride*3;
// FIXME rounding
asm volatile(
@@ -426,7 +426,7 @@ static inline void RENAME(vertRK1Filter)
: "r" (src), "r" ((long)stride)
: "%"REG_a, "%"REG_c
);
-#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#else //defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW)
const int l1= stride;
const int l2= stride + l1;
const int l3= stride + l2;
@@ -449,7 +449,7 @@ static inline void RENAME(vertRK1Filter)
}
}
-#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#endif //defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW)
}
#endif //0
@@ -462,7 +462,7 @@ static inline void RENAME(vertRK1Filter)
*/
static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co)
{
-#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#if defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW)
src+= stride*3;
asm volatile(
@@ -548,7 +548,7 @@ static inline void RENAME(vertX1Filter)(
: "r" (src), "r" ((long)stride), "m" (co->pQPb)
: "%"REG_a, "%"REG_c
);
-#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#else //defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW)
const int l1= stride;
const int l2= stride + l1;
@@ -582,13 +582,13 @@ static inline void RENAME(vertX1Filter)(
}
src++;
}
-#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#endif //defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW)
}
#ifndef HAVE_ALTIVEC
static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext *c)
{
-#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#if defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW)
/*
uint8_t tmp[16];
const int l1= stride;
@@ -1101,7 +1101,7 @@ static inline void RENAME(doVertDefFilte
: "r" ((long)stride), "m" (c->pQPb)
: "%"REG_a, "%"REG_c
);
-#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#else //defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW)
const int l1= stride;
const int l2= stride + l1;
const int l3= stride + l2;
@@ -1139,14 +1139,14 @@ static inline void RENAME(doVertDefFilte
}
src++;
}
-#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#endif //defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW)
}
#endif //HAVE_ALTIVEC
#ifndef HAVE_ALTIVEC
static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c)
{
-#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#if defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW)
asm volatile(
"pxor %%mm6, %%mm6 \n\t"
"pcmpeqb %%mm7, %%mm7 \n\t"
@@ -1370,7 +1370,7 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1,
: : "r" (src), "r" ((long)stride), "m" (c->pQPb), "m"(c->pQPb2)
: "%"REG_a, "%"REG_d, "%"REG_c
);
-#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#else //defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW)
int y;
int min=255;
int max=0;
@@ -1487,7 +1487,7 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1,
// src[0] = src[7]=src[stride*7]=src[stride*7 + 7]=255;
}
#endif
-#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#endif //defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW)
}
#endif //HAVE_ALTIVEC
@@ -1499,7 +1499,7 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1,
*/
static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int stride)
{
-#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#if defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW)
src+= 4*stride;
asm volatile(
"lea (%0, %1), %%"REG_a" \n\t"
@@ -1552,7 +1552,7 @@ static inline void RENAME(deInterlaceInt
*/
static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride)
{
-#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#if defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW)
src+= stride*3;
asm volatile(
"lea (%0, %1), %%"REG_a" \n\t"
@@ -1594,7 +1594,7 @@ DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) ,
: : "r" (src), "r" ((long)stride)
: "%"REG_a, "%"REG_d, "%"REG_c
);
-#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#else //defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW)
int x;
src+= stride*3;
for(x=0; x<8; x++){
@@ -1604,7 +1604,7 @@ DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) ,
src[stride*9] = CLIP((-src[stride*6] + 9*src[stride*8] + 9*src[stride*10] - src[stride*12])>>4);
src++;
}
-#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#endif //defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW)
}
/**
@@ -1616,7 +1616,7 @@ DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) ,
*/
static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp)
{
-#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#if defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW)
src+= stride*4;
asm volatile(
"lea (%0, %1), %%"REG_a" \n\t"
@@ -1665,7 +1665,7 @@ DEINT_FF((%%REGd, %1), (%%REGd, %1, 2),
: : "r" (src), "r" ((long)stride), "r"(tmp)
: "%"REG_a, "%"REG_d
);
-#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#else //defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW)
int x;
src+= stride*4;
for(x=0; x<8; x++){
@@ -1683,7 +1683,7 @@ DEINT_FF((%%REGd, %1), (%%REGd, %1, 2),
src++;
}
-#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#endif //defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW)
}
/**
@@ -1695,7 +1695,7 @@ DEINT_FF((%%REGd, %1), (%%REGd, %1, 2),
*/
static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp, uint8_t *tmp2)
{
-#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#if defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW)
src+= stride*4;
asm volatile(
"lea (%0, %1), %%"REG_a" \n\t"
@@ -1755,7 +1755,7 @@ DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2),
: : "r" (src), "r" ((long)stride), "r"(tmp), "r"(tmp2)
: "%"REG_a, "%"REG_d
);
-#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#else //defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW)
int x;
src+= stride*4;
for(x=0; x<8; x++){
@@ -1784,7 +1784,7 @@ DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2),
src++;
}
-#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#endif //defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW)
}
/**
@@ -1796,7 +1796,7 @@ DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2),
*/
static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uint8_t *tmp)
{
-#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#if defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW)
src+= 4*stride;
asm volatile(
"lea (%0, %1), %%"REG_a" \n\t"
@@ -1843,7 +1843,7 @@ static inline void RENAME(deInterlaceBle
: : "r" (src), "r" ((long)stride), "r" (tmp)
: "%"REG_a, "%"REG_d
);
-#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#else //defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW)
int a, b, c, x;
src+= 4*stride;
@@ -1886,7 +1886,7 @@ static inline void RENAME(deInterlaceBle
src += 4;
tmp += 4;
}
-#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#endif //defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW)
}
/**
@@ -2191,7 +2191,7 @@ static inline void RENAME(tempNoiseReduc
#define FAST_L2_DIFF
//#define L1_DIFF //u should change the thresholds too if u try that one
-#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#if defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW)
asm volatile(
"lea (%2, %2, 2), %%"REG_a" \n\t" // 3*stride
"lea (%2, %2, 4), %%"REG_d" \n\t" // 5*stride
@@ -2479,7 +2479,7 @@ L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc
:: "r" (src), "r" (tempBlurred), "r"((long)stride), "m" (tempBlurredPast)
: "%"REG_a, "%"REG_d, "%"REG_c, "memory"
);
-#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#else //defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW)
{
int y;
int d=0;
@@ -2562,7 +2562,7 @@ Switch between
}
}
}
-#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+#endif //defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW)
}
#endif //HAVE_ALTIVEC
@@ -3411,7 +3411,7 @@ static void RENAME(postProcess)(const ui
: "%"REG_a, "%"REG_d
);
-#elif defined(HAVE_3DNOW)
+#elif defined(HAVE_AMD3DNOW)
//FIXME check if this is faster on an 3dnow chip or if it is faster without the prefetch or ...
/* prefetch(srcBlock + (((x>>3)&3) + 5)*srcStride + 32);
prefetch(srcBlock + (((x>>3)&3) + 9)*srcStride + 32);
@@ -3547,7 +3547,7 @@ static void RENAME(postProcess)(const ui
: "%"REG_a, "%"REG_d
);
-#elif defined(HAVE_3DNOW)
+#elif defined(HAVE_AMD3DNOW)
//FIXME check if this is faster on an 3dnow chip or if it is faster without the prefetch or ...
/* prefetch(srcBlock + (((x>>3)&3) + 5)*srcStride + 32);
prefetch(srcBlock + (((x>>3)&3) + 9)*srcStride + 32);
@@ -3699,7 +3699,7 @@ static void RENAME(postProcess)(const ui
+ dstBlock[x +14*dstStride] + dstBlock[x +15*dstStride];
}*/
}
-#ifdef HAVE_3DNOW
+#ifdef HAVE_AMD3DNOW
asm volatile("femms");
#elif defined (HAVE_MMX)
asm volatile("emms");