Update to MPlayer SVN rev 29319 and FFmpeg SVN rev 18938.
[vaapi:kinkis-mplayer.git] / libmpcodecs / vf_halfpack.c
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <string.h>
4 #include <inttypes.h>
5
6 #include "config.h"
7 #include "mp_msg.h"
8 #include "cpudetect.h"
9
10 #include "img_format.h"
11 #include "mp_image.h"
12 #include "vf.h"
13
14 #include "libswscale/rgb2rgb.h"
15
16 struct vf_priv_s {
17         int field;
18 };
19
20 #if HAVE_MMX
21 static void halfpack_MMX(unsigned char *dst, unsigned char *src[3],
22                      int dststride, int srcstride[3],
23                      int w, int h)
24 {
25         int j;
26         unsigned char *y1, *y2, *u, *v;
27         int dstinc, yinc, uinc, vinc;
28
29         y1 = src[0];
30         y2 = src[0] + srcstride[0];
31         u = src[1];
32         v = src[2];
33
34         dstinc = dststride - 2*w;
35         yinc = 2*srcstride[0] - w;
36         uinc = srcstride[1] - w/2;
37         vinc = srcstride[2] - w/2;
38
39         for (h/=2; h; h--) {
40                 __asm__ (
41                         "pxor %%mm0, %%mm0 \n\t"
42                         ASMALIGN(4)
43                         "1: \n\t"
44                         "movq (%0), %%mm1 \n\t"
45                         "movq (%0), %%mm2 \n\t"
46                         "movq (%1), %%mm3 \n\t"
47                         "movq (%1), %%mm4 \n\t"
48                         "punpcklbw %%mm0, %%mm1 \n\t"
49                         "punpckhbw %%mm0, %%mm2 \n\t"
50                         "punpcklbw %%mm0, %%mm3 \n\t"
51                         "punpckhbw %%mm0, %%mm4 \n\t"
52                         "paddw %%mm3, %%mm1 \n\t"
53                         "paddw %%mm4, %%mm2 \n\t"
54                         "psrlw $1, %%mm1 \n\t"
55                         "psrlw $1, %%mm2 \n\t"
56
57                         "movq (%2), %%mm3 \n\t"
58                         "movq (%3), %%mm5 \n\t"
59                         "punpcklbw %%mm0, %%mm3 \n\t"
60                         "punpcklbw %%mm0, %%mm5 \n\t"
61                         "movq %%mm3, %%mm4 \n\t"
62                         "movq %%mm5, %%mm6 \n\t"
63                         "punpcklwd %%mm0, %%mm3 \n\t"
64                         "punpckhwd %%mm0, %%mm4 \n\t"
65                         "punpcklwd %%mm0, %%mm5 \n\t"
66                         "punpckhwd %%mm0, %%mm6 \n\t"
67                         "pslld $8, %%mm3 \n\t"
68                         "pslld $8, %%mm4 \n\t"
69                         "pslld $24, %%mm5 \n\t"
70                         "pslld $24, %%mm6 \n\t"
71
72                         "por %%mm3, %%mm1 \n\t"
73                         "por %%mm4, %%mm2 \n\t"
74                         "por %%mm5, %%mm1 \n\t"
75                         "por %%mm6, %%mm2 \n\t"
76
77                         "add $8, %0 \n\t"
78                         "add $8, %1 \n\t"
79                         "add $4, %2 \n\t"
80                         "add $4, %3 \n\t"
81                         "movq %%mm1, (%8) \n\t"
82                         "movq %%mm2, 8(%8) \n\t"
83                         "add $16, %8 \n\t"
84                         "decl %9 \n\t"
85                         "jnz 1b \n\t"
86                         : "=r" (y1), "=r" (y2), "=r" (u), "=r" (v)
87                         : "0" (y1), "1" (y2), "2" (u), "3" (v), "r" (dst), "r" (w/8)
88                         : "memory"
89                 );
90                 for (j = (w&7)/2; j; j--) {
91                         *dst++ = (*y1++ + *y2++)/2;
92                         *dst++ = *u++;
93                         *dst++ = (*y1++ + *y2++)/2;
94                         *dst++ = *v++;
95                 }
96                 y1 += yinc;
97                 y2 += yinc;
98                 u += uinc;
99                 v += vinc;
100                 dst += dstinc;
101         }
102         __asm__ volatile ( "emms \n\t" ::: "memory" );
103 }
104 #endif
105
106
107
108 static void halfpack_C(unsigned char *dst, unsigned char *src[3],
109                      int dststride, int srcstride[3],
110                      int w, int h)
111 {
112         int i, j;
113         unsigned char *y1, *y2, *u, *v;
114         int dstinc, yinc, uinc, vinc;
115
116         y1 = src[0];
117         y2 = src[0] + srcstride[0];
118         u = src[1];
119         v = src[2];
120
121         dstinc = dststride - 2*w;
122         yinc = 2*srcstride[0] - w;
123         uinc = srcstride[1] - w/2;
124         vinc = srcstride[2] - w/2;
125
126         for (i = h/2; i; i--) {
127                 for (j = w/2; j; j--) {
128                         *dst++ = (*y1++ + *y2++)>>1;
129                         *dst++ = *u++;
130                         *dst++ = (*y1++ + *y2++)>>1;
131                         *dst++ = *v++;
132                 }
133                 y1 += yinc;
134                 y2 += yinc;
135                 u += uinc;
136                 v += vinc;
137                 dst += dstinc;
138         }
139 }
140
141 static void (*halfpack)(unsigned char *dst, unsigned char *src[3],
142         int dststride, int srcstride[3], int w, int h);
143
144
145 static int put_image(struct vf_instance_s* vf, mp_image_t *mpi, double pts)
146 {
147         mp_image_t *dmpi;
148
149         // hope we'll get DR buffer:
150         dmpi=vf_get_image(vf->next, IMGFMT_YUY2,
151                           MP_IMGTYPE_TEMP, MP_IMGFLAG_ACCEPT_STRIDE,
152                           mpi->w, mpi->h/2);
153
154         switch(vf->priv->field) {
155         case 0:
156         case 1:
157                 yuv422ptoyuy2(mpi->planes[0] + mpi->stride[0]*vf->priv->field,
158                         mpi->planes[1], mpi->planes[2], dmpi->planes[0],
159                         mpi->w, mpi->h/2, mpi->stride[0]*2, mpi->stride[1], dmpi->stride[0]);
160                 break;
161         default:
162                 halfpack(dmpi->planes[0], mpi->planes, dmpi->stride[0],
163                         mpi->stride, mpi->w, mpi->h);
164         }
165
166         return vf_next_put_image(vf,dmpi, pts);
167 }
168
169 static int config(struct vf_instance_s* vf,
170                   int width, int height, int d_width, int d_height,
171                   unsigned int flags, unsigned int outfmt)
172 {
173         /* FIXME - also support UYVY output? */
174         return vf_next_config(vf, width, height/2, d_width, d_height, flags, IMGFMT_YUY2);
175 }
176
177
178 static int query_format(struct vf_instance_s* vf, unsigned int fmt)
179 {
180         /* FIXME - really any YUV 4:2:0 input format should work */
181         switch (fmt) {
182         case IMGFMT_YV12:
183         case IMGFMT_IYUV:
184         case IMGFMT_I420:
185                 return vf_next_query_format(vf,IMGFMT_YUY2);
186         }
187         return 0;
188 }
189
190 static void uninit(struct vf_instance_s* vf)
191 {
192         free(vf->priv);
193 }
194
195 static int open(vf_instance_t *vf, char* args)
196 {
197         vf->config=config;
198         vf->query_format=query_format;
199         vf->put_image=put_image;
200         vf->uninit=uninit;
201
202         vf->priv = calloc(1, sizeof (struct vf_priv_s));
203         vf->priv->field = 2;
204         if (args) sscanf(args, "%d", &vf->priv->field);
205
206         halfpack = halfpack_C;
207 #if HAVE_MMX
208         if(gCpuCaps.hasMMX) halfpack = halfpack_MMX;
209 #endif
210         return 1;
211 }
212
213 const vf_info_t vf_info_halfpack = {
214         "yuv planar 4:2:0 -> packed 4:2:2, half height",
215         "halfpack",
216         "Richard Felker",
217         "",
218         open,
219         NULL
220 };
221