Update to MPlayer SVN rev 30589 and FFmpeg SVN rev 21847.
[vaapi:kinkis-mplayer.git] / libmpcodecs / vf_halfpack.c
1 /*
2  * This file is part of MPlayer.
3  *
4  * MPlayer is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * MPlayer is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License along
15  * with MPlayer; if not, write to the Free Software Foundation, Inc.,
16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17  */
18
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include <inttypes.h>
23
24 #include "config.h"
25 #include "mp_msg.h"
26 #include "cpudetect.h"
27
28 #include "img_format.h"
29 #include "mp_image.h"
30 #include "vf.h"
31 #include "vf_scale.h"
32
33 #include "libswscale/swscale.h"
34 #include "fmt-conversion.h"
35
36 struct vf_priv_s {
37         int field;
38         struct SwsContext *ctx;
39 };
40
41 #if HAVE_MMX
42 static void halfpack_MMX(unsigned char *dst, unsigned char *src[3],
43                      int dststride, int srcstride[3],
44                      int w, int h)
45 {
46         int j;
47         unsigned char *y1, *y2, *u, *v;
48         int dstinc, yinc, uinc, vinc;
49
50         y1 = src[0];
51         y2 = src[0] + srcstride[0];
52         u = src[1];
53         v = src[2];
54
55         dstinc = dststride - 2*w;
56         yinc = 2*srcstride[0] - w;
57         uinc = srcstride[1] - w/2;
58         vinc = srcstride[2] - w/2;
59
60         for (h/=2; h; h--) {
61                 __asm__ (
62                         "pxor %%mm0, %%mm0 \n\t"
63                         ASMALIGN(4)
64                         "1: \n\t"
65                         "movq (%0), %%mm1 \n\t"
66                         "movq (%0), %%mm2 \n\t"
67                         "movq (%1), %%mm3 \n\t"
68                         "movq (%1), %%mm4 \n\t"
69                         "punpcklbw %%mm0, %%mm1 \n\t"
70                         "punpckhbw %%mm0, %%mm2 \n\t"
71                         "punpcklbw %%mm0, %%mm3 \n\t"
72                         "punpckhbw %%mm0, %%mm4 \n\t"
73                         "paddw %%mm3, %%mm1 \n\t"
74                         "paddw %%mm4, %%mm2 \n\t"
75                         "psrlw $1, %%mm1 \n\t"
76                         "psrlw $1, %%mm2 \n\t"
77
78                         "movq (%2), %%mm3 \n\t"
79                         "movq (%3), %%mm5 \n\t"
80                         "punpcklbw %%mm0, %%mm3 \n\t"
81                         "punpcklbw %%mm0, %%mm5 \n\t"
82                         "movq %%mm3, %%mm4 \n\t"
83                         "movq %%mm5, %%mm6 \n\t"
84                         "punpcklwd %%mm0, %%mm3 \n\t"
85                         "punpckhwd %%mm0, %%mm4 \n\t"
86                         "punpcklwd %%mm0, %%mm5 \n\t"
87                         "punpckhwd %%mm0, %%mm6 \n\t"
88                         "pslld $8, %%mm3 \n\t"
89                         "pslld $8, %%mm4 \n\t"
90                         "pslld $24, %%mm5 \n\t"
91                         "pslld $24, %%mm6 \n\t"
92
93                         "por %%mm3, %%mm1 \n\t"
94                         "por %%mm4, %%mm2 \n\t"
95                         "por %%mm5, %%mm1 \n\t"
96                         "por %%mm6, %%mm2 \n\t"
97
98                         "add $8, %0 \n\t"
99                         "add $8, %1 \n\t"
100                         "add $4, %2 \n\t"
101                         "add $4, %3 \n\t"
102                         "movq %%mm1, (%8) \n\t"
103                         "movq %%mm2, 8(%8) \n\t"
104                         "add $16, %8 \n\t"
105                         "decl %9 \n\t"
106                         "jnz 1b \n\t"
107                         : "=r" (y1), "=r" (y2), "=r" (u), "=r" (v)
108                         : "0" (y1), "1" (y2), "2" (u), "3" (v), "r" (dst), "r" (w/8)
109                         : "memory"
110                 );
111                 for (j = (w&7)/2; j; j--) {
112                         *dst++ = (*y1++ + *y2++)/2;
113                         *dst++ = *u++;
114                         *dst++ = (*y1++ + *y2++)/2;
115                         *dst++ = *v++;
116                 }
117                 y1 += yinc;
118                 y2 += yinc;
119                 u += uinc;
120                 v += vinc;
121                 dst += dstinc;
122         }
123         __asm__ volatile ( "emms \n\t" ::: "memory" );
124 }
125 #endif
126
127
128
129 static void halfpack_C(unsigned char *dst, unsigned char *src[3],
130                      int dststride, int srcstride[3],
131                      int w, int h)
132 {
133         int i, j;
134         unsigned char *y1, *y2, *u, *v;
135         int dstinc, yinc, uinc, vinc;
136
137         y1 = src[0];
138         y2 = src[0] + srcstride[0];
139         u = src[1];
140         v = src[2];
141
142         dstinc = dststride - 2*w;
143         yinc = 2*srcstride[0] - w;
144         uinc = srcstride[1] - w/2;
145         vinc = srcstride[2] - w/2;
146
147         for (i = h/2; i; i--) {
148                 for (j = w/2; j; j--) {
149                         *dst++ = (*y1++ + *y2++)>>1;
150                         *dst++ = *u++;
151                         *dst++ = (*y1++ + *y2++)>>1;
152                         *dst++ = *v++;
153                 }
154                 y1 += yinc;
155                 y2 += yinc;
156                 u += uinc;
157                 v += vinc;
158                 dst += dstinc;
159         }
160 }
161
162 static void (*halfpack)(unsigned char *dst, unsigned char *src[3],
163         int dststride, int srcstride[3], int w, int h);
164
165
166 static int put_image(struct vf_instance_s* vf, mp_image_t *mpi, double pts)
167 {
168         const uint8_t *src[MP_MAX_PLANES] = {
169                 mpi->planes[0] + mpi->stride[0]*vf->priv->field,
170                 mpi->planes[1], mpi->planes[2], NULL};
171         int src_stride[MP_MAX_PLANES] = {mpi->stride[0]*2, mpi->stride[1], mpi->stride[2], 0};
172         mp_image_t *dmpi;
173
174         // hope we'll get DR buffer:
175         dmpi=vf_get_image(vf->next, IMGFMT_YUY2,
176                           MP_IMGTYPE_TEMP, MP_IMGFLAG_ACCEPT_STRIDE,
177                           mpi->w, mpi->h/2);
178
179         switch(vf->priv->field) {
180         case 0:
181         case 1:
182                 sws_scale(vf->priv->ctx, src, src_stride,
183                           0, mpi->h/2, dmpi->planes, dmpi->stride);
184                 break;
185         default:
186                 halfpack(dmpi->planes[0], mpi->planes, dmpi->stride[0],
187                         mpi->stride, mpi->w, mpi->h);
188         }
189
190         return vf_next_put_image(vf,dmpi, pts);
191 }
192
193 static int config(struct vf_instance_s* vf,
194                   int width, int height, int d_width, int d_height,
195                   unsigned int flags, unsigned int outfmt)
196 {
197         if (vf->priv->field < 2) {
198                 sws_freeContext(vf->priv->ctx);
199                 // get unscaled 422p -> yuy2 conversion
200                 vf->priv->ctx =
201                         sws_getContext(width, height / 2, PIX_FMT_YUV422P,
202                                        width, height / 2, PIX_FMT_YUYV422,
203                                        SWS_POINT | SWS_PRINT_INFO | get_sws_cpuflags(),
204                                        NULL, NULL, NULL);
205         }
206         /* FIXME - also support UYVY output? */
207         return vf_next_config(vf, width, height/2, d_width, d_height, flags, IMGFMT_YUY2);
208 }
209
210
211 static int query_format(struct vf_instance_s* vf, unsigned int fmt)
212 {
213         /* FIXME - really any YUV 4:2:0 input format should work */
214         switch (fmt) {
215         case IMGFMT_YV12:
216         case IMGFMT_IYUV:
217         case IMGFMT_I420:
218                 return vf_next_query_format(vf,IMGFMT_YUY2);
219         }
220         return 0;
221 }
222
223 static void uninit(struct vf_instance_s* vf)
224 {
225         sws_freeContext(vf->priv->ctx);
226         free(vf->priv);
227 }
228
229 static int open(vf_instance_t *vf, char* args)
230 {
231         vf->config=config;
232         vf->query_format=query_format;
233         vf->put_image=put_image;
234         vf->uninit=uninit;
235
236         vf->priv = calloc(1, sizeof (struct vf_priv_s));
237         vf->priv->field = 2;
238         if (args) sscanf(args, "%d", &vf->priv->field);
239
240         halfpack = halfpack_C;
241 #if HAVE_MMX
242         if(gCpuCaps.hasMMX) halfpack = halfpack_MMX;
243 #endif
244         return 1;
245 }
246
247 const vf_info_t vf_info_halfpack = {
248         "yuv planar 4:2:0 -> packed 4:2:2, half height",
249         "halfpack",
250         "Richard Felker",
251         "",
252         open,
253         NULL
254 };
255