chromStride, dstStride, 1);
}
-/**
- * Height should be a multiple of 2 and width should be a multiple of 16.
- * (If this is a problem for anyone then tell me, and I will fix it.)
- */
-static inline void yuy2toyv12_c(const uint8_t *src, uint8_t *ydst,
- uint8_t *udst, uint8_t *vdst,
- int width, int height, int lumStride,
- int chromStride, int srcStride)
-{
- int y;
- const int chromWidth = width >> 1;
-
- for (y = 0; y < height; y += 2) {
- int i;
- for (i = 0; i < chromWidth; i++) {
- ydst[2 * i + 0] = src[4 * i + 0];
- udst[i] = src[4 * i + 1];
- ydst[2 * i + 1] = src[4 * i + 2];
- vdst[i] = src[4 * i + 3];
- }
- ydst += lumStride;
- src += srcStride;
-
- for (i = 0; i < chromWidth; i++) {
- ydst[2 * i + 0] = src[4 * i + 0];
- ydst[2 * i + 1] = src[4 * i + 2];
- }
- udst += chromStride;
- vdst += chromStride;
- ydst += lumStride;
- src += srcStride;
- }
-}
-
static inline void planar2x_c(const uint8_t *src, uint8_t *dst, int srcWidth,
int srcHeight, int srcStride, int dstStride)
{
}
}
-static inline void vu9_to_vu12_c(const uint8_t *src1, const uint8_t *src2,
- uint8_t *dst1, uint8_t *dst2,
- int width, int height,
- int srcStride1, int srcStride2,
- int dstStride1, int dstStride2)
-{
- int x, y;
- int w = width / 2;
- int h = height / 2;
-
- for (y = 0; y < h; y++) {
- const uint8_t *s1 = src1 + srcStride1 * (y >> 1);
- uint8_t *d = dst1 + dstStride1 * y;
- for (x = 0; x < w; x++)
- d[2 * x] = d[2 * x + 1] = s1[x];
- }
- for (y = 0; y < h; y++) {
- const uint8_t *s2 = src2 + srcStride2 * (y >> 1);
- uint8_t *d = dst2 + dstStride2 * y;
- for (x = 0; x < w; x++)
- d[2 * x] = d[2 * x + 1] = s2[x];
- }
-}
-
-static inline void yvu9_to_yuy2_c(const uint8_t *src1, const uint8_t *src2,
- const uint8_t *src3, uint8_t *dst,
- int width, int height,
- int srcStride1, int srcStride2,
- int srcStride3, int dstStride)
-{
- int x, y;
- int w = width / 2;
- int h = height;
-
- for (y = 0; y < h; y++) {
- const uint8_t *yp = src1 + srcStride1 * y;
- const uint8_t *up = src2 + srcStride2 * (y >> 2);
- const uint8_t *vp = src3 + srcStride3 * (y >> 2);
- uint8_t *d = dst + dstStride * y;
- for (x = 0; x < w; x++) {
- const int x2 = x << 2;
- d[8 * x + 0] = yp[x2];
- d[8 * x + 1] = up[x];
- d[8 * x + 2] = yp[x2 + 1];
- d[8 * x + 3] = vp[x];
- d[8 * x + 4] = yp[x2 + 2];
- d[8 * x + 5] = up[x];
- d[8 * x + 6] = yp[x2 + 3];
- d[8 * x + 7] = vp[x];
- }
- }
-}
-
static void extract_even_c(const uint8_t *src, uint8_t *dst, int count)
{
dst += count;
yv12touyvy = yv12touyvy_c;
yuv422ptoyuy2 = yuv422ptoyuy2_c;
yuv422ptouyvy = yuv422ptouyvy_c;
- yuy2toyv12 = yuy2toyv12_c;
planar2x = planar2x_c;
ff_rgb24toyv12 = ff_rgb24toyv12_c;
interleaveBytes = interleaveBytes_c;
deinterleaveBytes = deinterleaveBytes_c;
- vu9_to_vu12 = vu9_to_vu12_c;
- yvu9_to_yuy2 = yvu9_to_yuy2_c;
uyvytoyuv420 = uyvytoyuv420_c;
uyvytoyuv422 = uyvytoyuv422_c;
yuvPlanartoyuy2_mmxext(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1);
}
-/**
- * Height should be a multiple of 2 and width should be a multiple of 16.
- * (If this is a problem for anyone then tell me, and I will fix it.)
- */
-static inline void yuy2toyv12_mmxext(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
- int width, int height,
- int lumStride, int chromStride, int srcStride)
-{
- const x86_reg chromWidth= width>>1;
- for (int y = 0; y < height; y += 2) {
- __asm__ volatile(
- "xor %%"FF_REG_a", %%"FF_REG_a"\n\t"
- "pcmpeqw %%mm7, %%mm7 \n\t"
- "psrlw $8, %%mm7 \n\t" // FF,00,FF,00...
- ".p2align 4 \n\t"
- "1: \n\t"
- PREFETCH" 64(%0, %%"FF_REG_a", 4) \n\t"
- "movq (%0, %%"FF_REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0)
- "movq 8(%0, %%"FF_REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4)
- "movq %%mm0, %%mm2 \n\t" // YUYV YUYV(0)
- "movq %%mm1, %%mm3 \n\t" // YUYV YUYV(4)
- "psrlw $8, %%mm0 \n\t" // U0V0 U0V0(0)
- "psrlw $8, %%mm1 \n\t" // U0V0 U0V0(4)
- "pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(0)
- "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(4)
- "packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
- "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0)
-
- MOVNTQ" %%mm2, (%1, %%"FF_REG_a", 2) \n\t"
-
- "movq 16(%0, %%"FF_REG_a", 4), %%mm1 \n\t" // YUYV YUYV(8)
- "movq 24(%0, %%"FF_REG_a", 4), %%mm2 \n\t" // YUYV YUYV(12)
- "movq %%mm1, %%mm3 \n\t" // YUYV YUYV(8)
- "movq %%mm2, %%mm4 \n\t" // YUYV YUYV(12)
- "psrlw $8, %%mm1 \n\t" // U0V0 U0V0(8)
- "psrlw $8, %%mm2 \n\t" // U0V0 U0V0(12)
- "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(8)
- "pand %%mm7, %%mm4 \n\t" // Y0Y0 Y0Y0(12)
- "packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8)
- "packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8)
-
- MOVNTQ" %%mm3, 8(%1, %%"FF_REG_a", 2) \n\t"
-
- "movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0)
- "movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8)
- "psrlw $8, %%mm0 \n\t" // V0V0 V0V0(0)
- "psrlw $8, %%mm1 \n\t" // V0V0 V0V0(8)
- "pand %%mm7, %%mm2 \n\t" // U0U0 U0U0(0)
- "pand %%mm7, %%mm3 \n\t" // U0U0 U0U0(8)
- "packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0)
- "packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0)
-
- MOVNTQ" %%mm0, (%3, %%"FF_REG_a") \n\t"
- MOVNTQ" %%mm2, (%2, %%"FF_REG_a") \n\t"
-
- "add $8, %%"FF_REG_a" \n\t"
- "cmp %4, %%"FF_REG_a" \n\t"
- " jb 1b \n\t"
- ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
- : "memory", "%"FF_REG_a
- );
-
- ydst += lumStride;
- src += srcStride;
-
- __asm__ volatile(
- "xor %%"FF_REG_a", %%"FF_REG_a"\n\t"
- ".p2align 4 \n\t"
- "1: \n\t"
- PREFETCH" 64(%0, %%"FF_REG_a", 4) \n\t"
- "movq (%0, %%"FF_REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0)
- "movq 8(%0, %%"FF_REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4)
- "movq 16(%0, %%"FF_REG_a", 4), %%mm2 \n\t" // YUYV YUYV(8)
- "movq 24(%0, %%"FF_REG_a", 4), %%mm3 \n\t" // YUYV YUYV(12)
- "pand %%mm7, %%mm0 \n\t" // Y0Y0 Y0Y0(0)
- "pand %%mm7, %%mm1 \n\t" // Y0Y0 Y0Y0(4)
- "pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(8)
- "pand %%mm7, %%mm3 \n\t" // Y0Y0 Y0Y0(12)
- "packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0)
- "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8)
-
- MOVNTQ" %%mm0, (%1, %%"FF_REG_a", 2) \n\t"
- MOVNTQ" %%mm2, 8(%1, %%"FF_REG_a", 2) \n\t"
-
- "add $8, %%"FF_REG_a"\n\t"
- "cmp %4, %%"FF_REG_a"\n\t"
- " jb 1b \n\t"
-
- ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
- : "memory", "%"FF_REG_a
- );
- udst += chromStride;
- vdst += chromStride;
- ydst += lumStride;
- src += srcStride;
- }
- __asm__ volatile(EMMS" \n\t"
- SFENCE" \n\t"
- :::"memory");
-}
-
static inline void planar2x_mmxext(const uint8_t *src, uint8_t *dst, int srcWidth, int srcHeight, int srcStride, int dstStride)
{
dst[0]= src[0];
}
#endif /* HAVE_7REGS */
-static inline void vu9_to_vu12_mmxext(const uint8_t *src1, const uint8_t *src2,
- uint8_t *dst1, uint8_t *dst2,
- int width, int height,
- int srcStride1, int srcStride2,
- int dstStride1, int dstStride2)
-{
- int w,h;
- w=width/2; h=height/2;
- __asm__ volatile(
- PREFETCH" %0 \n\t"
- PREFETCH" %1 \n\t"
- ::"m"(*(src1+srcStride1)),"m"(*(src2+srcStride2)):"memory");
- for (x86_reg y = 0; y < h; y++) {
- const uint8_t* s1=src1+srcStride1*(y>>1);
- uint8_t* d=dst1+dstStride1*y;
- x86_reg x = 0;
- for (;x<w-31;x+=32) {
- __asm__ volatile(
- PREFETCH" 32(%1,%2) \n\t"
- "movq (%1,%2), %%mm0 \n\t"
- "movq 8(%1,%2), %%mm2 \n\t"
- "movq 16(%1,%2), %%mm4 \n\t"
- "movq 24(%1,%2), %%mm6 \n\t"
- "movq %%mm0, %%mm1 \n\t"
- "movq %%mm2, %%mm3 \n\t"
- "movq %%mm4, %%mm5 \n\t"
- "movq %%mm6, %%mm7 \n\t"
- "punpcklbw %%mm0, %%mm0 \n\t"
- "punpckhbw %%mm1, %%mm1 \n\t"
- "punpcklbw %%mm2, %%mm2 \n\t"
- "punpckhbw %%mm3, %%mm3 \n\t"
- "punpcklbw %%mm4, %%mm4 \n\t"
- "punpckhbw %%mm5, %%mm5 \n\t"
- "punpcklbw %%mm6, %%mm6 \n\t"
- "punpckhbw %%mm7, %%mm7 \n\t"
- MOVNTQ" %%mm0, (%0,%2,2) \n\t"
- MOVNTQ" %%mm1, 8(%0,%2,2) \n\t"
- MOVNTQ" %%mm2, 16(%0,%2,2) \n\t"
- MOVNTQ" %%mm3, 24(%0,%2,2) \n\t"
- MOVNTQ" %%mm4, 32(%0,%2,2) \n\t"
- MOVNTQ" %%mm5, 40(%0,%2,2) \n\t"
- MOVNTQ" %%mm6, 48(%0,%2,2) \n\t"
- MOVNTQ" %%mm7, 56(%0,%2,2)"
- :: "r"(d), "r"(s1), "r"(x)
- :"memory");
- }
- for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x];
- }
- for (x86_reg y = 0; y < h; y++) {
- const uint8_t* s2=src2+srcStride2*(y>>1);
- uint8_t* d=dst2+dstStride2*y;
- x86_reg x = 0;
- for (;x<w-31;x+=32) {
- __asm__ volatile(
- PREFETCH" 32(%1,%2) \n\t"
- "movq (%1,%2), %%mm0 \n\t"
- "movq 8(%1,%2), %%mm2 \n\t"
- "movq 16(%1,%2), %%mm4 \n\t"
- "movq 24(%1,%2), %%mm6 \n\t"
- "movq %%mm0, %%mm1 \n\t"
- "movq %%mm2, %%mm3 \n\t"
- "movq %%mm4, %%mm5 \n\t"
- "movq %%mm6, %%mm7 \n\t"
- "punpcklbw %%mm0, %%mm0 \n\t"
- "punpckhbw %%mm1, %%mm1 \n\t"
- "punpcklbw %%mm2, %%mm2 \n\t"
- "punpckhbw %%mm3, %%mm3 \n\t"
- "punpcklbw %%mm4, %%mm4 \n\t"
- "punpckhbw %%mm5, %%mm5 \n\t"
- "punpcklbw %%mm6, %%mm6 \n\t"
- "punpckhbw %%mm7, %%mm7 \n\t"
- MOVNTQ" %%mm0, (%0,%2,2) \n\t"
- MOVNTQ" %%mm1, 8(%0,%2,2) \n\t"
- MOVNTQ" %%mm2, 16(%0,%2,2) \n\t"
- MOVNTQ" %%mm3, 24(%0,%2,2) \n\t"
- MOVNTQ" %%mm4, 32(%0,%2,2) \n\t"
- MOVNTQ" %%mm5, 40(%0,%2,2) \n\t"
- MOVNTQ" %%mm6, 48(%0,%2,2) \n\t"
- MOVNTQ" %%mm7, 56(%0,%2,2)"
- :: "r"(d), "r"(s2), "r"(x)
- :"memory");
- }
- for (;x<w;x++) d[2*x]=d[2*x+1]=s2[x];
- }
- __asm__(
- EMMS" \n\t"
- SFENCE" \n\t"
- ::: "memory"
- );
-}
-
-static inline void yvu9_to_yuy2_mmxext(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
- uint8_t *dst,
- int width, int height,
- int srcStride1, int srcStride2,
- int srcStride3, int dstStride)
-{
- int w,h;
- w=width/2; h=height;
- for (int y = 0; y < h; y++) {
- const uint8_t* yp=src1+srcStride1*y;
- const uint8_t* up=src2+srcStride2*(y>>2);
- const uint8_t* vp=src3+srcStride3*(y>>2);
- uint8_t* d=dst+dstStride*y;
- x86_reg x = 0;
- for (;x<w-7;x+=8) {
- __asm__ volatile(
- PREFETCH" 32(%1, %0) \n\t"
- PREFETCH" 32(%2, %0) \n\t"
- PREFETCH" 32(%3, %0) \n\t"
- "movq (%1, %0, 4), %%mm0 \n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */
- "movq (%2, %0), %%mm1 \n\t" /* U0U1U2U3U4U5U6U7 */
- "movq (%3, %0), %%mm2 \n\t" /* V0V1V2V3V4V5V6V7 */
- "movq %%mm0, %%mm3 \n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */
- "movq %%mm1, %%mm4 \n\t" /* U0U1U2U3U4U5U6U7 */
- "movq %%mm2, %%mm5 \n\t" /* V0V1V2V3V4V5V6V7 */
- "punpcklbw %%mm1, %%mm1 \n\t" /* U0U0 U1U1 U2U2 U3U3 */
- "punpcklbw %%mm2, %%mm2 \n\t" /* V0V0 V1V1 V2V2 V3V3 */
- "punpckhbw %%mm4, %%mm4 \n\t" /* U4U4 U5U5 U6U6 U7U7 */
- "punpckhbw %%mm5, %%mm5 \n\t" /* V4V4 V5V5 V6V6 V7V7 */
-
- "movq %%mm1, %%mm6 \n\t"
- "punpcklbw %%mm2, %%mm1 \n\t" /* U0V0 U0V0 U1V1 U1V1*/
- "punpcklbw %%mm1, %%mm0 \n\t" /* Y0U0 Y1V0 Y2U0 Y3V0*/
- "punpckhbw %%mm1, %%mm3 \n\t" /* Y4U1 Y5V1 Y6U1 Y7V1*/
- MOVNTQ" %%mm0, (%4, %0, 8) \n\t"
- MOVNTQ" %%mm3, 8(%4, %0, 8) \n\t"
-
- "punpckhbw %%mm2, %%mm6 \n\t" /* U2V2 U2V2 U3V3 U3V3*/
- "movq 8(%1, %0, 4), %%mm0 \n\t"
- "movq %%mm0, %%mm3 \n\t"
- "punpcklbw %%mm6, %%mm0 \n\t" /* Y U2 Y V2 Y U2 Y V2*/
- "punpckhbw %%mm6, %%mm3 \n\t" /* Y U3 Y V3 Y U3 Y V3*/
- MOVNTQ" %%mm0, 16(%4, %0, 8) \n\t"
- MOVNTQ" %%mm3, 24(%4, %0, 8) \n\t"
-
- "movq %%mm4, %%mm6 \n\t"
- "movq 16(%1, %0, 4), %%mm0 \n\t"
- "movq %%mm0, %%mm3 \n\t"
- "punpcklbw %%mm5, %%mm4 \n\t"
- "punpcklbw %%mm4, %%mm0 \n\t" /* Y U4 Y V4 Y U4 Y V4*/
- "punpckhbw %%mm4, %%mm3 \n\t" /* Y U5 Y V5 Y U5 Y V5*/
- MOVNTQ" %%mm0, 32(%4, %0, 8) \n\t"
- MOVNTQ" %%mm3, 40(%4, %0, 8) \n\t"
-
- "punpckhbw %%mm5, %%mm6 \n\t"
- "movq 24(%1, %0, 4), %%mm0 \n\t"
- "movq %%mm0, %%mm3 \n\t"
- "punpcklbw %%mm6, %%mm0 \n\t" /* Y U6 Y V6 Y U6 Y V6*/
- "punpckhbw %%mm6, %%mm3 \n\t" /* Y U7 Y V7 Y U7 Y V7*/
- MOVNTQ" %%mm0, 48(%4, %0, 8) \n\t"
- MOVNTQ" %%mm3, 56(%4, %0, 8) \n\t"
-
- : "+r" (x)
- : "r"(yp), "r" (up), "r"(vp), "r"(d)
- :"memory");
- }
- for (; x<w; x++) {
- const int x2 = x<<2;
- d[8*x+0] = yp[x2];
- d[8*x+1] = up[x];
- d[8*x+2] = yp[x2+1];
- d[8*x+3] = vp[x];
- d[8*x+4] = yp[x2+2];
- d[8*x+5] = up[x];
- d[8*x+6] = yp[x2+3];
- d[8*x+7] = vp[x];
- }
- }
- __asm__(
- EMMS" \n\t"
- SFENCE" \n\t"
- ::: "memory"
- );
-}
-
static void extract_even_mmxext(const uint8_t *src, uint8_t *dst, x86_reg count)
{
dst += count;
yv12touyvy = yv12touyvy_mmxext;
yuv422ptoyuy2 = yuv422ptoyuy2_mmxext;
yuv422ptouyvy = yuv422ptouyvy_mmxext;
- yuy2toyv12 = yuy2toyv12_mmxext;
- vu9_to_vu12 = vu9_to_vu12_mmxext;
- yvu9_to_yuy2 = yvu9_to_yuy2_mmxext;
#if ARCH_X86_32
uyvytoyuv422 = uyvytoyuv422_mmxext;
#endif