diff -up ffmpeg-20080908/configure.cpu ffmpeg-20080908/configure --- ffmpeg-20080908/configure.cpu 2008-09-02 06:04:26.000000000 +0200 +++ ffmpeg-20080908/configure 2008-11-01 23:05:08.000000000 +0100 @@ -122,8 +122,12 @@ show_help(){ echo " instruction selection, may crash on older CPUs)" echo " --enable-powerpc-perf enable performance report on PPC" echo " (requires enabling PMC)" + echo " --disable-amd3dnow disable amd3dnow! usage" + echo " --disable-amd3dnowex disable amd3dnowEx! usage" echo " --disable-mmx disable MMX usage" echo " --disable-mmx2 disable MMX2 usage" + echo " --disable-sse disable SSE usage" + echo " --disable-sse2 disable SSE2 usage" echo " --disable-ssse3 disable SSSE3 usage" echo " --disable-armv5te disable armv5te usage" echo " --disable-armv6 disable armv6 usage" @@ -719,6 +723,8 @@ ARCH_LIST=' ' ARCH_EXT_LIST=' + amd3dnow + amd3dnowex altivec armv5te armv6 @@ -728,6 +734,8 @@ ARCH_EXT_LIST=' mmx mmx2 neon + sse + sse2 ssse3 vis ' @@ -830,6 +838,8 @@ CMDLINE_SET=" # code dependency declarations # architecture extensions +amd3dnow_deps="x86" +amd3dnowex_deps="x86 amd3dnow" altivec_deps="powerpc" armv5te_deps="armv4l" armv6_deps="armv4l" @@ -839,7 +849,9 @@ mmi_deps="mips" mmx_deps="x86" mmx2_deps="x86 mmx" neon_deps="armv4l" -ssse3_deps="x86" +sse_deps="x86" +sse2_deps="x86 sse" +ssse3_deps="x86 sse2" vis_deps="sparc" # system headers and functions @@ -1954,7 +1968,13 @@ fi echo "big-endian ${bigendian-no}" if test $arch = "x86_32" -o $arch = "x86_64"; then echo "yasm ${yasm-no}" + echo "3DNow! enabled ${amd3dnow-no}" + echo "3DNowEx! enabled ${amd3dnowex-no}" echo "MMX enabled ${mmx-no}" + echo "MMX2 enabled ${mmx2-no}" + echo "SSE enabled ${sse-no}" + echo "SSE2 enabled ${sse2-no}" + echo "SSSE3 enabled ${ssse3-no}" echo "CMOV enabled ${cmov-no}" echo "CMOV is fast ${fast_cmov-no}" echo "EBX available ${ebx_available-no}" diff -up ffmpeg-20080908/libavcodec/fft.c.cpu ffmpeg-20080908/libavcodec/fft.c --- ffmpeg-20080908/libavcodec/fft.c.cpu 2008-08-12 02:38:30.000000000 +0200 +++ ffmpeg-20080908/libavcodec/fft.c 2008-11-01 16:23:33.000000000 +0100 @@ -91,24 +91,30 @@ int ff_fft_init(FFTContext *s, int nbits s->imdct_half = ff_imdct_half_c; s->exptab1 = NULL; -#if defined HAVE_MMX && defined HAVE_YASM +#if (defined HAVE_MMX || defined HAVE_SSE || defined HAVE_AMD3DNOW) && defined HAVE_YASM has_vectors = mm_support(); if (has_vectors & MM_SSE) { +#ifdef HAVE_SSE /* SSE for P3/P4/K8 */ s->imdct_calc = ff_imdct_calc_sse; s->imdct_half = ff_imdct_half_sse; s->fft_permute = ff_fft_permute_sse; s->fft_calc = ff_fft_calc_sse; +#endif } else if (has_vectors & MM_3DNOWEXT) { +#ifdef HAVE_AMD3DNOWEX /* 3DNowEx for K7 */ s->imdct_calc = ff_imdct_calc_3dn2; s->imdct_half = ff_imdct_half_3dn2; s->fft_calc = ff_fft_calc_3dn2; +#endif } else if (has_vectors & MM_3DNOW) { +#ifdef HAVE_AMD3DNOW /* 3DNow! for K6-2/3 */ s->imdct_calc = ff_imdct_calc_3dn; s->imdct_half = ff_imdct_half_3dn; s->fft_calc = ff_fft_calc_3dn; +#endif } #elif defined HAVE_ALTIVEC && !defined ALTIVEC_USE_REFERENCE_C_CODE has_vectors = mm_support(); diff -up ffmpeg-20080908/libavcodec/Makefile.cpu ffmpeg-20080908/libavcodec/Makefile --- ffmpeg-20080908/libavcodec/Makefile.cpu 2008-09-02 06:04:26.000000000 +0200 +++ ffmpeg-20080908/libavcodec/Makefile 2008-11-01 16:23:33.000000000 +0100 @@ -383,6 +388,12 @@ OBJS += imgresample.o endif # processor-specific code +ifdef HAVE_AMD3DNOW +OBJS-$(HAVE_YASM) += i386/fft_3dn.o +endif +ifdef HAVE_AMD3DNOWEX +OBJS-$(HAVE_YASM) += i386/fft_3dn2.o +endif ifdef HAVE_MMX OBJS += i386/fdct_mmx.o \ i386/cpuid.o \ @@ -391,12 +402,8 @@ OBJS += i386/fdct_mmx.o \ i386/motion_est_mmx.o \ i386/simple_idct_mmx.o \ i386/idct_mmx_xvid.o \ - i386/idct_sse2_xvid.o \ OBJS-$(HAVE_YASM) += i386/fft_mmx.o \ - i386/fft_sse.o \ - i386/fft_3dn.o \ - i386/fft_3dn2.o \ i386/dsputil_yasm.o \ OBJS-$(CONFIG_GPL) += i386/idct_mmx.o @@ -414,6 +426,12 @@ OBJS-$(CONFIG_VP6A_DECODER) + OBJS-$(CONFIG_VP6F_DECODER) += i386/vp3dsp_mmx.o i386/vp3dsp_sse2.o OBJS-$(CONFIG_WMV3_DECODER) += i386/vc1dsp_mmx.o endif +ifdef HAVE_SSE +OBJS-$(HAVE_YASM) += i386/fft_sse.o +endif +ifdef HAVE_SSE2 +OBJS += i386/idct_sse2_xvid.o +endif ASM_OBJS-$(ARCH_ARMV4L) += armv4l/jrevdct_arm.o \ armv4l/simple_idct_arm.o \ diff -up ffmpeg-20080908/libpostproc/postprocess.c.cpu ffmpeg-20080908/libpostproc/postprocess.c --- ffmpeg-20080908/libpostproc/postprocess.c.cpu 2008-09-01 20:00:53.000000000 +0200 +++ ffmpeg-20080908/libpostproc/postprocess.c 2008-11-01 16:23:33.000000000 +0100 @@ -83,7 +83,7 @@ try to unroll inner for(x=0 ... loop to #include #endif //#undef HAVE_MMX2 -//#define HAVE_3DNOW +//#define HAVE_AMD3DNOW //#undef HAVE_MMX //#undef ARCH_X86 //#define DEBUG_BRIGHTNESS @@ -567,7 +567,7 @@ static av_always_inline void do_a_debloc #if defined(ARCH_X86) -#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT) +#if (defined (HAVE_MMX) && !defined (HAVE_AMD3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT) #define COMPILE_MMX #endif @@ -575,20 +575,20 @@ static av_always_inline void do_a_debloc #define COMPILE_MMX2 #endif -#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT) +#if (defined (HAVE_AMD3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT) #define COMPILE_3DNOW #endif #endif /* defined(ARCH_X86) */ #undef HAVE_MMX #undef HAVE_MMX2 -#undef HAVE_3DNOW +#undef HAVE_AMD3DNOW #undef HAVE_ALTIVEC #ifdef COMPILE_C #undef HAVE_MMX #undef HAVE_MMX2 -#undef HAVE_3DNOW +#undef HAVE_AMD3DNOW #define RENAME(a) a ## _C #include "postprocess_template.c" #endif @@ -606,7 +606,7 @@ static av_always_inline void do_a_debloc #undef RENAME #define HAVE_MMX #undef HAVE_MMX2 -#undef HAVE_3DNOW +#undef HAVE_AMD3DNOW #define RENAME(a) a ## _MMX #include "postprocess_template.c" #endif @@ -616,7 +616,7 @@ static av_always_inline void do_a_debloc #undef RENAME #define HAVE_MMX #define HAVE_MMX2 -#undef HAVE_3DNOW +#undef HAVE_AMD3DNOW #define RENAME(a) a ## _MMX2 #include "postprocess_template.c" #endif @@ -626,7 +626,7 @@ static av_always_inline void do_a_debloc #undef RENAME #define HAVE_MMX #undef HAVE_MMX2 -#define HAVE_3DNOW +#define HAVE_AMD3DNOW #define RENAME(a) a ## _3DNow #include "postprocess_template.c" #endif @@ -665,7 +665,7 @@ static inline void postProcess(const uin #else //RUNTIME_CPUDETECT #ifdef HAVE_MMX2 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); -#elif defined (HAVE_3DNOW) +#elif defined (HAVE_AMD3DNOW) postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); #elif defined (HAVE_MMX) postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); diff -up ffmpeg-20080908/libpostproc/postprocess_template.c.cpu ffmpeg-20080908/libpostproc/postprocess_template.c --- ffmpeg-20080908/libpostproc/postprocess_template.c.cpu 2008-05-09 13:56:36.000000000 +0200 +++ ffmpeg-20080908/libpostproc/postprocess_template.c 2008-11-01 16:23:33.000000000 +0100 @@ -33,7 +33,7 @@ #ifdef HAVE_MMX2 #define REAL_PAVGB(a,b) "pavgb " #a ", " #b " \n\t" -#elif defined (HAVE_3DNOW) +#elif defined (HAVE_AMD3DNOW) #define REAL_PAVGB(a,b) "pavgusb " #a ", " #b " \n\t" #endif #define PAVGB(a,b) REAL_PAVGB(a,b) @@ -179,7 +179,7 @@ static inline int RENAME(vertClassify)(u #ifndef HAVE_ALTIVEC static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) { -#if defined (HAVE_MMX2) || defined (HAVE_3DNOW) +#if defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW) src+= stride*3; asm volatile( //"movv %0 %1 %2\n\t" "movq %2, %%mm0 \n\t" // QP,..., QP @@ -306,7 +306,7 @@ static inline void RENAME(doVertLowPass) : "r" (src), "r" ((long)stride), "m" (c->pQPb) : "%"REG_a, "%"REG_c ); -#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) +#else //defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW) const int l1= stride; const int l2= stride + l1; const int l3= stride + l2; @@ -345,7 +345,7 @@ static inline void RENAME(doVertLowPass) src++; } -#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW) +#endif //defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW) } #endif //HAVE_ALTIVEC @@ -364,7 +364,7 @@ static inline void RENAME(doVertLowPass) */ static inline void RENAME(vertRK1Filter)(uint8_t *src, int stride, int QP) { -#if defined (HAVE_MMX2) || defined (HAVE_3DNOW) +#if defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW) src+= stride*3; // FIXME rounding asm volatile( @@ -426,7 +426,7 @@ static inline void RENAME(vertRK1Filter) : "r" (src), "r" ((long)stride) : "%"REG_a, "%"REG_c ); -#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) +#else //defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW) const int l1= stride; const int l2= stride + l1; const int l3= stride + l2; @@ -449,7 +449,7 @@ static inline void RENAME(vertRK1Filter) } } -#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW) +#endif //defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW) } #endif //0 @@ -462,7 +462,7 @@ static inline void RENAME(vertRK1Filter) */ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co) { -#if defined (HAVE_MMX2) || defined (HAVE_3DNOW) +#if defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW) src+= stride*3; asm volatile( @@ -548,7 +548,7 @@ static inline void RENAME(vertX1Filter)( : "r" (src), "r" ((long)stride), "m" (co->pQPb) : "%"REG_a, "%"REG_c ); -#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) +#else //defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW) const int l1= stride; const int l2= stride + l1; @@ -582,13 +582,13 @@ static inline void RENAME(vertX1Filter)( } src++; } -#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW) +#endif //defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW) } #ifndef HAVE_ALTIVEC static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext *c) { -#if defined (HAVE_MMX2) || defined (HAVE_3DNOW) +#if defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW) /* uint8_t tmp[16]; const int l1= stride; @@ -1101,7 +1101,7 @@ static inline void RENAME(doVertDefFilte : "r" ((long)stride), "m" (c->pQPb) : "%"REG_a, "%"REG_c ); -#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) +#else //defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW) const int l1= stride; const int l2= stride + l1; const int l3= stride + l2; @@ -1139,14 +1139,14 @@ static inline void RENAME(doVertDefFilte } src++; } -#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW) +#endif //defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW) } #endif //HAVE_ALTIVEC #ifndef HAVE_ALTIVEC static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c) { -#if defined (HAVE_MMX2) || defined (HAVE_3DNOW) +#if defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW) asm volatile( "pxor %%mm6, %%mm6 \n\t" "pcmpeqb %%mm7, %%mm7 \n\t" @@ -1370,7 +1370,7 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1, : : "r" (src), "r" ((long)stride), "m" (c->pQPb), "m"(c->pQPb2) : "%"REG_a, "%"REG_d, "%"REG_c ); -#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) +#else //defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW) int y; int min=255; int max=0; @@ -1487,7 +1487,7 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1, // src[0] = src[7]=src[stride*7]=src[stride*7 + 7]=255; } #endif -#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW) +#endif //defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW) } #endif //HAVE_ALTIVEC @@ -1499,7 +1499,7 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1, */ static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int stride) { -#if defined (HAVE_MMX2) || defined (HAVE_3DNOW) +#if defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW) src+= 4*stride; asm volatile( "lea (%0, %1), %%"REG_a" \n\t" @@ -1552,7 +1552,7 @@ static inline void RENAME(deInterlaceInt */ static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride) { -#if defined (HAVE_MMX2) || defined (HAVE_3DNOW) +#if defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW) src+= stride*3; asm volatile( "lea (%0, %1), %%"REG_a" \n\t" @@ -1594,7 +1594,7 @@ DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , : : "r" (src), "r" ((long)stride) : "%"REG_a, "%"REG_d, "%"REG_c ); -#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) +#else //defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW) int x; src+= stride*3; for(x=0; x<8; x++){ @@ -1604,7 +1604,7 @@ DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , src[stride*9] = CLIP((-src[stride*6] + 9*src[stride*8] + 9*src[stride*10] - src[stride*12])>>4); src++; } -#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW) +#endif //defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW) } /** @@ -1616,7 +1616,7 @@ DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , */ static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp) { -#if defined (HAVE_MMX2) || defined (HAVE_3DNOW) +#if defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW) src+= stride*4; asm volatile( "lea (%0, %1), %%"REG_a" \n\t" @@ -1665,7 +1665,7 @@ DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), : : "r" (src), "r" ((long)stride), "r"(tmp) : "%"REG_a, "%"REG_d ); -#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) +#else //defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW) int x; src+= stride*4; for(x=0; x<8; x++){ @@ -1683,7 +1683,7 @@ DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), src++; } -#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW) +#endif //defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW) } /** @@ -1695,7 +1695,7 @@ DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), */ static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp, uint8_t *tmp2) { -#if defined (HAVE_MMX2) || defined (HAVE_3DNOW) +#if defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW) src+= stride*4; asm volatile( "lea (%0, %1), %%"REG_a" \n\t" @@ -1755,7 +1755,7 @@ DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), : : "r" (src), "r" ((long)stride), "r"(tmp), "r"(tmp2) : "%"REG_a, "%"REG_d ); -#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) +#else //defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW) int x; src+= stride*4; for(x=0; x<8; x++){ @@ -1784,7 +1784,7 @@ DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), src++; } -#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW) +#endif //defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW) } /** @@ -1796,7 +1796,7 @@ DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), */ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uint8_t *tmp) { -#if defined (HAVE_MMX2) || defined (HAVE_3DNOW) +#if defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW) src+= 4*stride; asm volatile( "lea (%0, %1), %%"REG_a" \n\t" @@ -1843,7 +1843,7 @@ static inline void RENAME(deInterlaceBle : : "r" (src), "r" ((long)stride), "r" (tmp) : "%"REG_a, "%"REG_d ); -#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) +#else //defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW) int a, b, c, x; src+= 4*stride; @@ -1886,7 +1886,7 @@ static inline void RENAME(deInterlaceBle src += 4; tmp += 4; } -#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW) +#endif //defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW) } /** @@ -2191,7 +2191,7 @@ static inline void RENAME(tempNoiseReduc #define FAST_L2_DIFF //#define L1_DIFF //u should change the thresholds too if u try that one -#if defined (HAVE_MMX2) || defined (HAVE_3DNOW) +#if defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW) asm volatile( "lea (%2, %2, 2), %%"REG_a" \n\t" // 3*stride "lea (%2, %2, 4), %%"REG_d" \n\t" // 5*stride @@ -2479,7 +2479,7 @@ L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc :: "r" (src), "r" (tempBlurred), "r"((long)stride), "m" (tempBlurredPast) : "%"REG_a, "%"REG_d, "%"REG_c, "memory" ); -#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) +#else //defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW) { int y; int d=0; @@ -2562,7 +2562,7 @@ Switch between } } } -#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW) +#endif //defined (HAVE_MMX2) || defined (HAVE_AMD3DNOW) } #endif //HAVE_ALTIVEC @@ -3411,7 +3411,7 @@ static void RENAME(postProcess)(const ui : "%"REG_a, "%"REG_d ); -#elif defined(HAVE_3DNOW) +#elif defined(HAVE_AMD3DNOW) //FIXME check if this is faster on an 3dnow chip or if it is faster without the prefetch or ... /* prefetch(srcBlock + (((x>>3)&3) + 5)*srcStride + 32); prefetch(srcBlock + (((x>>3)&3) + 9)*srcStride + 32); @@ -3547,7 +3547,7 @@ static void RENAME(postProcess)(const ui : "%"REG_a, "%"REG_d ); -#elif defined(HAVE_3DNOW) +#elif defined(HAVE_AMD3DNOW) //FIXME check if this is faster on an 3dnow chip or if it is faster without the prefetch or ... /* prefetch(srcBlock + (((x>>3)&3) + 5)*srcStride + 32); prefetch(srcBlock + (((x>>3)&3) + 9)*srcStride + 32); @@ -3699,7 +3699,7 @@ static void RENAME(postProcess)(const ui + dstBlock[x +14*dstStride] + dstBlock[x +15*dstStride]; }*/ } -#ifdef HAVE_3DNOW +#ifdef HAVE_AMD3DNOW asm volatile("femms"); #elif defined (HAVE_MMX) asm volatile("emms");