1 | // Copyright 2016 Adrien Descamps |
2 | // Distributed under BSD 3-Clause License |
3 | #include "../../SDL_internal.h" |
4 | |
5 | #if SDL_HAVE_YUV |
6 | |
7 | #include "yuv_rgb.h" |
8 | |
9 | #include "SDL_cpuinfo.h" |
10 | /*#include <x86intrin.h>*/ |
11 | |
12 | #define PRECISION 6 |
13 | #define PRECISION_FACTOR (1<<PRECISION) |
14 | |
15 | typedef struct |
16 | { |
17 | uint8_t y_shift; |
18 | int16_t matrix[3][3]; |
19 | } RGB2YUVParam; |
20 | // |Y| |y_shift| |matrix[0][0] matrix[0][1] matrix[0][2]| |R| |
21 | // |U| = | 128 | + 1/PRECISION_FACTOR * |matrix[1][0] matrix[1][1] matrix[1][2]| * |G| |
22 | // |V| | 128 | |matrix[2][0] matrix[2][1] matrix[2][2]| |B| |
23 | |
24 | typedef struct |
25 | { |
26 | uint8_t y_shift; |
27 | int16_t y_factor; |
28 | int16_t v_r_factor; |
29 | int16_t u_g_factor; |
30 | int16_t v_g_factor; |
31 | int16_t u_b_factor; |
32 | } YUV2RGBParam; |
33 | // |R| |y_factor 0 v_r_factor| |Y-y_shift| |
34 | // |G| = 1/PRECISION_FACTOR * |y_factor u_g_factor v_g_factor| * | U-128 | |
35 | // |B| |y_factor u_b_factor 0 | | V-128 | |
36 | |
37 | #define V(value) (int16_t)((value*PRECISION_FACTOR)+0.5) |
38 | |
39 | // for ITU-T T.871, values can be found in section 7 |
40 | // for ITU-R BT.601-7 values are derived from equations in sections 2.5.1-2.5.3, assuming RGB is encoded using full range ([0-1]<->[0-255]) |
41 | // for ITU-R BT.709-6 values are derived from equations in sections 3.2-3.4, assuming RGB is encoded using full range ([0-1]<->[0-255]) |
42 | // all values are rounded to the fourth decimal |
43 | |
44 | static const YUV2RGBParam YUV2RGB[3] = { |
45 | // ITU-T T.871 (JPEG) |
46 | {/*.y_shift=*/ 0, /*.y_factor=*/ V(1.0), /*.v_r_factor=*/ V(1.402), /*.u_g_factor=*/ -V(0.3441), /*.v_g_factor=*/ -V(0.7141), /*.u_b_factor=*/ V(1.772)}, |
47 | // ITU-R BT.601-7 |
48 | {/*.y_shift=*/ 16, /*.y_factor=*/ V(1.1644), /*.v_r_factor=*/ V(1.596), /*.u_g_factor=*/ -V(0.3918), /*.v_g_factor=*/ -V(0.813), /*.u_b_factor=*/ V(2.0172)}, |
49 | // ITU-R BT.709-6 |
50 | {/*.y_shift=*/ 16, /*.y_factor=*/ V(1.1644), /*.v_r_factor=*/ V(1.7927), /*.u_g_factor=*/ -V(0.2132), /*.v_g_factor=*/ -V(0.5329), /*.u_b_factor=*/ V(2.1124)} |
51 | }; |
52 | |
53 | static const RGB2YUVParam RGB2YUV[3] = { |
54 | // ITU-T T.871 (JPEG) |
55 | {/*.y_shift=*/ 0, /*.matrix=*/ {{V(0.299), V(0.587), V(0.114)}, {-V(0.1687), -V(0.3313), V(0.5)}, {V(0.5), -V(0.4187), -V(0.0813)}}}, |
56 | // ITU-R BT.601-7 |
57 | {/*.y_shift=*/ 16, /*.matrix=*/ {{V(0.2568), V(0.5041), V(0.0979)}, {-V(0.1482), -V(0.291), V(0.4392)}, {V(0.4392), -V(0.3678), -V(0.0714)}}}, |
58 | // ITU-R BT.709-6 |
59 | {/*.y_shift=*/ 16, /*.matrix=*/ {{V(0.1826), V(0.6142), V(0.062)}, {-V(0.1006), -V(0.3386), V(0.4392)}, {V(0.4392), -V(0.3989), -V(0.0403)}}} |
60 | }; |
61 | |
62 | /* The various layouts of YUV data we support */ |
63 | #define YUV_FORMAT_420 1 |
64 | #define YUV_FORMAT_422 2 |
65 | #define YUV_FORMAT_NV12 3 |
66 | |
67 | /* The various formats of RGB pixel that we support */ |
68 | #define RGB_FORMAT_RGB565 1 |
69 | #define RGB_FORMAT_RGB24 2 |
70 | #define RGB_FORMAT_RGBA 3 |
71 | #define RGB_FORMAT_BGRA 4 |
72 | #define RGB_FORMAT_ARGB 5 |
73 | #define RGB_FORMAT_ABGR 6 |
74 | |
75 | // divide by PRECISION_FACTOR and clamp to [0:255] interval |
76 | // input must be in the [-128*PRECISION_FACTOR:384*PRECISION_FACTOR] range |
77 | static uint8_t clampU8(int32_t v) |
78 | { |
79 | static const uint8_t lut[512] = |
80 | {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
81 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
82 | 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46, |
83 | 47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90, |
84 | 91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125, |
85 | 126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158, |
86 | 159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, |
87 | 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224, |
88 | 225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255, |
89 | 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, |
90 | 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, |
91 | 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, |
92 | 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255 |
93 | }; |
94 | return lut[(v+128*PRECISION_FACTOR)>>PRECISION]; |
95 | } |
96 | |
97 | |
98 | #define STD_FUNCTION_NAME yuv420_rgb565_std |
99 | #define YUV_FORMAT YUV_FORMAT_420 |
100 | #define RGB_FORMAT RGB_FORMAT_RGB565 |
101 | #include "yuv_rgb_std_func.h" |
102 | |
103 | #define STD_FUNCTION_NAME yuv420_rgb24_std |
104 | #define YUV_FORMAT YUV_FORMAT_420 |
105 | #define RGB_FORMAT RGB_FORMAT_RGB24 |
106 | #include "yuv_rgb_std_func.h" |
107 | |
108 | #define STD_FUNCTION_NAME yuv420_rgba_std |
109 | #define YUV_FORMAT YUV_FORMAT_420 |
110 | #define RGB_FORMAT RGB_FORMAT_RGBA |
111 | #include "yuv_rgb_std_func.h" |
112 | |
113 | #define STD_FUNCTION_NAME yuv420_bgra_std |
114 | #define YUV_FORMAT YUV_FORMAT_420 |
115 | #define RGB_FORMAT RGB_FORMAT_BGRA |
116 | #include "yuv_rgb_std_func.h" |
117 | |
118 | #define STD_FUNCTION_NAME yuv420_argb_std |
119 | #define YUV_FORMAT YUV_FORMAT_420 |
120 | #define RGB_FORMAT RGB_FORMAT_ARGB |
121 | #include "yuv_rgb_std_func.h" |
122 | |
123 | #define STD_FUNCTION_NAME yuv420_abgr_std |
124 | #define YUV_FORMAT YUV_FORMAT_420 |
125 | #define RGB_FORMAT RGB_FORMAT_ABGR |
126 | #include "yuv_rgb_std_func.h" |
127 | |
128 | #define STD_FUNCTION_NAME yuv422_rgb565_std |
129 | #define YUV_FORMAT YUV_FORMAT_422 |
130 | #define RGB_FORMAT RGB_FORMAT_RGB565 |
131 | #include "yuv_rgb_std_func.h" |
132 | |
133 | #define STD_FUNCTION_NAME yuv422_rgb24_std |
134 | #define YUV_FORMAT YUV_FORMAT_422 |
135 | #define RGB_FORMAT RGB_FORMAT_RGB24 |
136 | #include "yuv_rgb_std_func.h" |
137 | |
138 | #define STD_FUNCTION_NAME yuv422_rgba_std |
139 | #define YUV_FORMAT YUV_FORMAT_422 |
140 | #define RGB_FORMAT RGB_FORMAT_RGBA |
141 | #include "yuv_rgb_std_func.h" |
142 | |
143 | #define STD_FUNCTION_NAME yuv422_bgra_std |
144 | #define YUV_FORMAT YUV_FORMAT_422 |
145 | #define RGB_FORMAT RGB_FORMAT_BGRA |
146 | #include "yuv_rgb_std_func.h" |
147 | |
148 | #define STD_FUNCTION_NAME yuv422_argb_std |
149 | #define YUV_FORMAT YUV_FORMAT_422 |
150 | #define RGB_FORMAT RGB_FORMAT_ARGB |
151 | #include "yuv_rgb_std_func.h" |
152 | |
153 | #define STD_FUNCTION_NAME yuv422_abgr_std |
154 | #define YUV_FORMAT YUV_FORMAT_422 |
155 | #define RGB_FORMAT RGB_FORMAT_ABGR |
156 | #include "yuv_rgb_std_func.h" |
157 | |
158 | #define STD_FUNCTION_NAME yuvnv12_rgb565_std |
159 | #define YUV_FORMAT YUV_FORMAT_NV12 |
160 | #define RGB_FORMAT RGB_FORMAT_RGB565 |
161 | #include "yuv_rgb_std_func.h" |
162 | |
163 | #define STD_FUNCTION_NAME yuvnv12_rgb24_std |
164 | #define YUV_FORMAT YUV_FORMAT_NV12 |
165 | #define RGB_FORMAT RGB_FORMAT_RGB24 |
166 | #include "yuv_rgb_std_func.h" |
167 | |
168 | #define STD_FUNCTION_NAME yuvnv12_rgba_std |
169 | #define YUV_FORMAT YUV_FORMAT_NV12 |
170 | #define RGB_FORMAT RGB_FORMAT_RGBA |
171 | #include "yuv_rgb_std_func.h" |
172 | |
173 | #define STD_FUNCTION_NAME yuvnv12_bgra_std |
174 | #define YUV_FORMAT YUV_FORMAT_NV12 |
175 | #define RGB_FORMAT RGB_FORMAT_BGRA |
176 | #include "yuv_rgb_std_func.h" |
177 | |
178 | #define STD_FUNCTION_NAME yuvnv12_argb_std |
179 | #define YUV_FORMAT YUV_FORMAT_NV12 |
180 | #define RGB_FORMAT RGB_FORMAT_ARGB |
181 | #include "yuv_rgb_std_func.h" |
182 | |
183 | #define STD_FUNCTION_NAME yuvnv12_abgr_std |
184 | #define YUV_FORMAT YUV_FORMAT_NV12 |
185 | #define RGB_FORMAT RGB_FORMAT_ABGR |
186 | #include "yuv_rgb_std_func.h" |
187 | |
188 | void rgb24_yuv420_std( |
189 | uint32_t width, uint32_t height, |
190 | const uint8_t *RGB, uint32_t RGB_stride, |
191 | uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, |
192 | YCbCrType yuv_type) |
193 | { |
194 | const RGB2YUVParam *const param = &(RGB2YUV[yuv_type]); |
195 | |
196 | uint32_t x, y; |
197 | for(y=0; y<(height-1); y+=2) |
198 | { |
199 | const uint8_t *rgb_ptr1=RGB+y*RGB_stride, |
200 | *rgb_ptr2=RGB+(y+1)*RGB_stride; |
201 | |
202 | uint8_t *y_ptr1=Y+y*Y_stride, |
203 | *y_ptr2=Y+(y+1)*Y_stride, |
204 | *u_ptr=U+(y/2)*UV_stride, |
205 | *v_ptr=V+(y/2)*UV_stride; |
206 | |
207 | for(x=0; x<(width-1); x+=2) |
208 | { |
209 | // compute yuv for the four pixels, u and v values are summed |
210 | int32_t y_tmp, u_tmp, v_tmp; |
211 | |
212 | y_tmp = param->matrix[0][0]*rgb_ptr1[0] + param->matrix[0][1]*rgb_ptr1[1] + param->matrix[0][2]*rgb_ptr1[2]; |
213 | u_tmp = param->matrix[1][0]*rgb_ptr1[0] + param->matrix[1][1]*rgb_ptr1[1] + param->matrix[1][2]*rgb_ptr1[2]; |
214 | v_tmp = param->matrix[2][0]*rgb_ptr1[0] + param->matrix[2][1]*rgb_ptr1[1] + param->matrix[2][2]*rgb_ptr1[2]; |
215 | y_ptr1[0]=clampU8(y_tmp+((param->y_shift)<<PRECISION)); |
216 | |
217 | y_tmp = param->matrix[0][0]*rgb_ptr1[3] + param->matrix[0][1]*rgb_ptr1[4] + param->matrix[0][2]*rgb_ptr1[5]; |
218 | u_tmp += param->matrix[1][0]*rgb_ptr1[3] + param->matrix[1][1]*rgb_ptr1[4] + param->matrix[1][2]*rgb_ptr1[5]; |
219 | v_tmp += param->matrix[2][0]*rgb_ptr1[3] + param->matrix[2][1]*rgb_ptr1[4] + param->matrix[2][2]*rgb_ptr1[5]; |
220 | y_ptr1[1]=clampU8(y_tmp+((param->y_shift)<<PRECISION)); |
221 | |
222 | y_tmp = param->matrix[0][0]*rgb_ptr2[0] + param->matrix[0][1]*rgb_ptr2[1] + param->matrix[0][2]*rgb_ptr2[2]; |
223 | u_tmp += param->matrix[1][0]*rgb_ptr2[0] + param->matrix[1][1]*rgb_ptr2[1] + param->matrix[1][2]*rgb_ptr2[2]; |
224 | v_tmp += param->matrix[2][0]*rgb_ptr2[0] + param->matrix[2][1]*rgb_ptr2[1] + param->matrix[2][2]*rgb_ptr2[2]; |
225 | y_ptr2[0]=clampU8(y_tmp+((param->y_shift)<<PRECISION)); |
226 | |
227 | y_tmp = param->matrix[0][0]*rgb_ptr2[3] + param->matrix[0][1]*rgb_ptr2[4] + param->matrix[0][2]*rgb_ptr2[5]; |
228 | u_tmp += param->matrix[1][0]*rgb_ptr2[3] + param->matrix[1][1]*rgb_ptr2[4] + param->matrix[1][2]*rgb_ptr2[5]; |
229 | v_tmp += param->matrix[2][0]*rgb_ptr2[3] + param->matrix[2][1]*rgb_ptr2[4] + param->matrix[2][2]*rgb_ptr2[5]; |
230 | y_ptr2[1]=clampU8(y_tmp+((param->y_shift)<<PRECISION)); |
231 | |
232 | u_ptr[0] = clampU8(u_tmp/4+(128<<PRECISION)); |
233 | v_ptr[0] = clampU8(v_tmp/4+(128<<PRECISION)); |
234 | |
235 | rgb_ptr1 += 6; |
236 | rgb_ptr2 += 6; |
237 | y_ptr1 += 2; |
238 | y_ptr2 += 2; |
239 | u_ptr += 1; |
240 | v_ptr += 1; |
241 | } |
242 | } |
243 | } |
244 | |
245 | #ifdef __SSE2__ |
246 | |
247 | #define SSE_FUNCTION_NAME yuv420_rgb565_sse |
248 | #define STD_FUNCTION_NAME yuv420_rgb565_std |
249 | #define YUV_FORMAT YUV_FORMAT_420 |
250 | #define RGB_FORMAT RGB_FORMAT_RGB565 |
251 | #define SSE_ALIGNED |
252 | #include "yuv_rgb_sse_func.h" |
253 | |
254 | #define SSE_FUNCTION_NAME yuv420_rgb565_sseu |
255 | #define STD_FUNCTION_NAME yuv420_rgb565_std |
256 | #define YUV_FORMAT YUV_FORMAT_420 |
257 | #define RGB_FORMAT RGB_FORMAT_RGB565 |
258 | #include "yuv_rgb_sse_func.h" |
259 | |
260 | #define SSE_FUNCTION_NAME yuv420_rgb24_sse |
261 | #define STD_FUNCTION_NAME yuv420_rgb24_std |
262 | #define YUV_FORMAT YUV_FORMAT_420 |
263 | #define RGB_FORMAT RGB_FORMAT_RGB24 |
264 | #define SSE_ALIGNED |
265 | #include "yuv_rgb_sse_func.h" |
266 | |
267 | #define SSE_FUNCTION_NAME yuv420_rgb24_sseu |
268 | #define STD_FUNCTION_NAME yuv420_rgb24_std |
269 | #define YUV_FORMAT YUV_FORMAT_420 |
270 | #define RGB_FORMAT RGB_FORMAT_RGB24 |
271 | #include "yuv_rgb_sse_func.h" |
272 | |
273 | #define SSE_FUNCTION_NAME yuv420_rgba_sse |
274 | #define STD_FUNCTION_NAME yuv420_rgba_std |
275 | #define YUV_FORMAT YUV_FORMAT_420 |
276 | #define RGB_FORMAT RGB_FORMAT_RGBA |
277 | #define SSE_ALIGNED |
278 | #include "yuv_rgb_sse_func.h" |
279 | |
280 | #define SSE_FUNCTION_NAME yuv420_rgba_sseu |
281 | #define STD_FUNCTION_NAME yuv420_rgba_std |
282 | #define YUV_FORMAT YUV_FORMAT_420 |
283 | #define RGB_FORMAT RGB_FORMAT_RGBA |
284 | #include "yuv_rgb_sse_func.h" |
285 | |
286 | #define SSE_FUNCTION_NAME yuv420_bgra_sse |
287 | #define STD_FUNCTION_NAME yuv420_bgra_std |
288 | #define YUV_FORMAT YUV_FORMAT_420 |
289 | #define RGB_FORMAT RGB_FORMAT_BGRA |
290 | #define SSE_ALIGNED |
291 | #include "yuv_rgb_sse_func.h" |
292 | |
293 | #define SSE_FUNCTION_NAME yuv420_bgra_sseu |
294 | #define STD_FUNCTION_NAME yuv420_bgra_std |
295 | #define YUV_FORMAT YUV_FORMAT_420 |
296 | #define RGB_FORMAT RGB_FORMAT_BGRA |
297 | #include "yuv_rgb_sse_func.h" |
298 | |
299 | #define SSE_FUNCTION_NAME yuv420_argb_sse |
300 | #define STD_FUNCTION_NAME yuv420_argb_std |
301 | #define YUV_FORMAT YUV_FORMAT_420 |
302 | #define RGB_FORMAT RGB_FORMAT_ARGB |
303 | #define SSE_ALIGNED |
304 | #include "yuv_rgb_sse_func.h" |
305 | |
306 | #define SSE_FUNCTION_NAME yuv420_argb_sseu |
307 | #define STD_FUNCTION_NAME yuv420_argb_std |
308 | #define YUV_FORMAT YUV_FORMAT_420 |
309 | #define RGB_FORMAT RGB_FORMAT_ARGB |
310 | #include "yuv_rgb_sse_func.h" |
311 | |
312 | #define SSE_FUNCTION_NAME yuv420_abgr_sse |
313 | #define STD_FUNCTION_NAME yuv420_abgr_std |
314 | #define YUV_FORMAT YUV_FORMAT_420 |
315 | #define RGB_FORMAT RGB_FORMAT_ABGR |
316 | #define SSE_ALIGNED |
317 | #include "yuv_rgb_sse_func.h" |
318 | |
319 | #define SSE_FUNCTION_NAME yuv420_abgr_sseu |
320 | #define STD_FUNCTION_NAME yuv420_abgr_std |
321 | #define YUV_FORMAT YUV_FORMAT_420 |
322 | #define RGB_FORMAT RGB_FORMAT_ABGR |
323 | #include "yuv_rgb_sse_func.h" |
324 | |
325 | #define SSE_FUNCTION_NAME yuv422_rgb565_sse |
326 | #define STD_FUNCTION_NAME yuv422_rgb565_std |
327 | #define YUV_FORMAT YUV_FORMAT_422 |
328 | #define RGB_FORMAT RGB_FORMAT_RGB565 |
329 | #define SSE_ALIGNED |
330 | #include "yuv_rgb_sse_func.h" |
331 | |
332 | #define SSE_FUNCTION_NAME yuv422_rgb565_sseu |
333 | #define STD_FUNCTION_NAME yuv422_rgb565_std |
334 | #define YUV_FORMAT YUV_FORMAT_422 |
335 | #define RGB_FORMAT RGB_FORMAT_RGB565 |
336 | #include "yuv_rgb_sse_func.h" |
337 | |
338 | #define SSE_FUNCTION_NAME yuv422_rgb24_sse |
339 | #define STD_FUNCTION_NAME yuv422_rgb24_std |
340 | #define YUV_FORMAT YUV_FORMAT_422 |
341 | #define RGB_FORMAT RGB_FORMAT_RGB24 |
342 | #define SSE_ALIGNED |
343 | #include "yuv_rgb_sse_func.h" |
344 | |
345 | #define SSE_FUNCTION_NAME yuv422_rgb24_sseu |
346 | #define STD_FUNCTION_NAME yuv422_rgb24_std |
347 | #define YUV_FORMAT YUV_FORMAT_422 |
348 | #define RGB_FORMAT RGB_FORMAT_RGB24 |
349 | #include "yuv_rgb_sse_func.h" |
350 | |
351 | #define SSE_FUNCTION_NAME yuv422_rgba_sse |
352 | #define STD_FUNCTION_NAME yuv422_rgba_std |
353 | #define YUV_FORMAT YUV_FORMAT_422 |
354 | #define RGB_FORMAT RGB_FORMAT_RGBA |
355 | #define SSE_ALIGNED |
356 | #include "yuv_rgb_sse_func.h" |
357 | |
358 | #define SSE_FUNCTION_NAME yuv422_rgba_sseu |
359 | #define STD_FUNCTION_NAME yuv422_rgba_std |
360 | #define YUV_FORMAT YUV_FORMAT_422 |
361 | #define RGB_FORMAT RGB_FORMAT_RGBA |
362 | #include "yuv_rgb_sse_func.h" |
363 | |
364 | #define SSE_FUNCTION_NAME yuv422_bgra_sse |
365 | #define STD_FUNCTION_NAME yuv422_bgra_std |
366 | #define YUV_FORMAT YUV_FORMAT_422 |
367 | #define RGB_FORMAT RGB_FORMAT_BGRA |
368 | #define SSE_ALIGNED |
369 | #include "yuv_rgb_sse_func.h" |
370 | |
371 | #define SSE_FUNCTION_NAME yuv422_bgra_sseu |
372 | #define STD_FUNCTION_NAME yuv422_bgra_std |
373 | #define YUV_FORMAT YUV_FORMAT_422 |
374 | #define RGB_FORMAT RGB_FORMAT_BGRA |
375 | #include "yuv_rgb_sse_func.h" |
376 | |
377 | #define SSE_FUNCTION_NAME yuv422_argb_sse |
378 | #define STD_FUNCTION_NAME yuv422_argb_std |
379 | #define YUV_FORMAT YUV_FORMAT_422 |
380 | #define RGB_FORMAT RGB_FORMAT_ARGB |
381 | #define SSE_ALIGNED |
382 | #include "yuv_rgb_sse_func.h" |
383 | |
384 | #define SSE_FUNCTION_NAME yuv422_argb_sseu |
385 | #define STD_FUNCTION_NAME yuv422_argb_std |
386 | #define YUV_FORMAT YUV_FORMAT_422 |
387 | #define RGB_FORMAT RGB_FORMAT_ARGB |
388 | #include "yuv_rgb_sse_func.h" |
389 | |
390 | #define SSE_FUNCTION_NAME yuv422_abgr_sse |
391 | #define STD_FUNCTION_NAME yuv422_abgr_std |
392 | #define YUV_FORMAT YUV_FORMAT_422 |
393 | #define RGB_FORMAT RGB_FORMAT_ABGR |
394 | #define SSE_ALIGNED |
395 | #include "yuv_rgb_sse_func.h" |
396 | |
397 | #define SSE_FUNCTION_NAME yuv422_abgr_sseu |
398 | #define STD_FUNCTION_NAME yuv422_abgr_std |
399 | #define YUV_FORMAT YUV_FORMAT_422 |
400 | #define RGB_FORMAT RGB_FORMAT_ABGR |
401 | #include "yuv_rgb_sse_func.h" |
402 | |
403 | #define SSE_FUNCTION_NAME yuvnv12_rgb565_sse |
404 | #define STD_FUNCTION_NAME yuvnv12_rgb565_std |
405 | #define YUV_FORMAT YUV_FORMAT_NV12 |
406 | #define RGB_FORMAT RGB_FORMAT_RGB565 |
407 | #define SSE_ALIGNED |
408 | #include "yuv_rgb_sse_func.h" |
409 | |
410 | #define SSE_FUNCTION_NAME yuvnv12_rgb565_sseu |
411 | #define STD_FUNCTION_NAME yuvnv12_rgb565_std |
412 | #define YUV_FORMAT YUV_FORMAT_NV12 |
413 | #define RGB_FORMAT RGB_FORMAT_RGB565 |
414 | #include "yuv_rgb_sse_func.h" |
415 | |
416 | #define SSE_FUNCTION_NAME yuvnv12_rgb24_sse |
417 | #define STD_FUNCTION_NAME yuvnv12_rgb24_std |
418 | #define YUV_FORMAT YUV_FORMAT_NV12 |
419 | #define RGB_FORMAT RGB_FORMAT_RGB24 |
420 | #define SSE_ALIGNED |
421 | #include "yuv_rgb_sse_func.h" |
422 | |
423 | #define SSE_FUNCTION_NAME yuvnv12_rgb24_sseu |
424 | #define STD_FUNCTION_NAME yuvnv12_rgb24_std |
425 | #define YUV_FORMAT YUV_FORMAT_NV12 |
426 | #define RGB_FORMAT RGB_FORMAT_RGB24 |
427 | #include "yuv_rgb_sse_func.h" |
428 | |
429 | #define SSE_FUNCTION_NAME yuvnv12_rgba_sse |
430 | #define STD_FUNCTION_NAME yuvnv12_rgba_std |
431 | #define YUV_FORMAT YUV_FORMAT_NV12 |
432 | #define RGB_FORMAT RGB_FORMAT_RGBA |
433 | #define SSE_ALIGNED |
434 | #include "yuv_rgb_sse_func.h" |
435 | |
436 | #define SSE_FUNCTION_NAME yuvnv12_rgba_sseu |
437 | #define STD_FUNCTION_NAME yuvnv12_rgba_std |
438 | #define YUV_FORMAT YUV_FORMAT_NV12 |
439 | #define RGB_FORMAT RGB_FORMAT_RGBA |
440 | #include "yuv_rgb_sse_func.h" |
441 | |
442 | #define SSE_FUNCTION_NAME yuvnv12_bgra_sse |
443 | #define STD_FUNCTION_NAME yuvnv12_bgra_std |
444 | #define YUV_FORMAT YUV_FORMAT_NV12 |
445 | #define RGB_FORMAT RGB_FORMAT_BGRA |
446 | #define SSE_ALIGNED |
447 | #include "yuv_rgb_sse_func.h" |
448 | |
449 | #define SSE_FUNCTION_NAME yuvnv12_bgra_sseu |
450 | #define STD_FUNCTION_NAME yuvnv12_bgra_std |
451 | #define YUV_FORMAT YUV_FORMAT_NV12 |
452 | #define RGB_FORMAT RGB_FORMAT_BGRA |
453 | #include "yuv_rgb_sse_func.h" |
454 | |
455 | #define SSE_FUNCTION_NAME yuvnv12_argb_sse |
456 | #define STD_FUNCTION_NAME yuvnv12_argb_std |
457 | #define YUV_FORMAT YUV_FORMAT_NV12 |
458 | #define RGB_FORMAT RGB_FORMAT_ARGB |
459 | #define SSE_ALIGNED |
460 | #include "yuv_rgb_sse_func.h" |
461 | |
462 | #define SSE_FUNCTION_NAME yuvnv12_argb_sseu |
463 | #define STD_FUNCTION_NAME yuvnv12_argb_std |
464 | #define YUV_FORMAT YUV_FORMAT_NV12 |
465 | #define RGB_FORMAT RGB_FORMAT_ARGB |
466 | #include "yuv_rgb_sse_func.h" |
467 | |
468 | #define SSE_FUNCTION_NAME yuvnv12_abgr_sse |
469 | #define STD_FUNCTION_NAME yuvnv12_abgr_std |
470 | #define YUV_FORMAT YUV_FORMAT_NV12 |
471 | #define RGB_FORMAT RGB_FORMAT_ABGR |
472 | #define SSE_ALIGNED |
473 | #include "yuv_rgb_sse_func.h" |
474 | |
475 | #define SSE_FUNCTION_NAME yuvnv12_abgr_sseu |
476 | #define STD_FUNCTION_NAME yuvnv12_abgr_std |
477 | #define YUV_FORMAT YUV_FORMAT_NV12 |
478 | #define RGB_FORMAT RGB_FORMAT_ABGR |
479 | #include "yuv_rgb_sse_func.h" |
480 | |
481 | |
482 | #define UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ |
483 | R1 = _mm_unpacklo_epi8(RGB1, RGB4); \ |
484 | R2 = _mm_unpackhi_epi8(RGB1, RGB4); \ |
485 | G1 = _mm_unpacklo_epi8(RGB2, RGB5); \ |
486 | G2 = _mm_unpackhi_epi8(RGB2, RGB5); \ |
487 | B1 = _mm_unpacklo_epi8(RGB3, RGB6); \ |
488 | B2 = _mm_unpackhi_epi8(RGB3, RGB6); |
489 | |
490 | #define UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ |
491 | RGB1 = _mm_unpacklo_epi8(R1, G2); \ |
492 | RGB2 = _mm_unpackhi_epi8(R1, G2); \ |
493 | RGB3 = _mm_unpacklo_epi8(R2, B1); \ |
494 | RGB4 = _mm_unpackhi_epi8(R2, B1); \ |
495 | RGB5 = _mm_unpacklo_epi8(G1, B2); \ |
496 | RGB6 = _mm_unpackhi_epi8(G1, B2); \ |
497 | |
498 | #define UNPACK_RGB24_32(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ |
499 | UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ |
500 | UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ |
501 | UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ |
502 | UNPACK_RGB24_32_STEP2(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ |
503 | UNPACK_RGB24_32_STEP1(RGB1, RGB2, RGB3, RGB4, RGB5, RGB6, R1, R2, G1, G2, B1, B2) \ |
504 | |
505 | #define RGB2YUV_16(R, G, B, Y, U, V) \ |
506 | Y = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->matrix[0][0])), \ |
507 | _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[0][1]))); \ |
508 | Y = _mm_add_epi16(Y, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[0][2]))); \ |
509 | Y = _mm_add_epi16(Y, _mm_set1_epi16((param->y_shift)<<PRECISION)); \ |
510 | Y = _mm_srai_epi16(Y, PRECISION); \ |
511 | U = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->matrix[1][0])), \ |
512 | _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[1][1]))); \ |
513 | U = _mm_add_epi16(U, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[1][2]))); \ |
514 | U = _mm_add_epi16(U, _mm_set1_epi16(128<<PRECISION)); \ |
515 | U = _mm_srai_epi16(U, PRECISION); \ |
516 | V = _mm_add_epi16(_mm_mullo_epi16(R, _mm_set1_epi16(param->matrix[2][0])), \ |
517 | _mm_mullo_epi16(G, _mm_set1_epi16(param->matrix[2][1]))); \ |
518 | V = _mm_add_epi16(V, _mm_mullo_epi16(B, _mm_set1_epi16(param->matrix[2][2]))); \ |
519 | V = _mm_add_epi16(V, _mm_set1_epi16(128<<PRECISION)); \ |
520 | V = _mm_srai_epi16(V, PRECISION); |
521 | |
522 | #define RGB2YUV_32 \ |
523 | __m128i r1, r2, b1, b2, g1, g2; \ |
524 | __m128i r_16, g_16, b_16; \ |
525 | __m128i y1_16, y2_16, u1_16, u2_16, v1_16, v2_16, y, u1, u2, v1, v2, u1_tmp, u2_tmp, v1_tmp, v2_tmp; \ |
526 | __m128i rgb1 = LOAD_SI128((const __m128i*)(rgb_ptr1)), \ |
527 | rgb2 = LOAD_SI128((const __m128i*)(rgb_ptr1+16)), \ |
528 | rgb3 = LOAD_SI128((const __m128i*)(rgb_ptr1+32)), \ |
529 | rgb4 = LOAD_SI128((const __m128i*)(rgb_ptr2)), \ |
530 | rgb5 = LOAD_SI128((const __m128i*)(rgb_ptr2+16)), \ |
531 | rgb6 = LOAD_SI128((const __m128i*)(rgb_ptr2+32)); \ |
532 | /* unpack rgb24 data to r, g and b data in separate channels*/ \ |
533 | UNPACK_RGB24_32(rgb1, rgb2, rgb3, rgb4, rgb5, rgb6, r1, r2, g1, g2, b1, b2) \ |
534 | /* process pixels of first line */ \ |
535 | r_16 = _mm_unpacklo_epi8(r1, _mm_setzero_si128()); \ |
536 | g_16 = _mm_unpacklo_epi8(g1, _mm_setzero_si128()); \ |
537 | b_16 = _mm_unpacklo_epi8(b1, _mm_setzero_si128()); \ |
538 | RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \ |
539 | r_16 = _mm_unpackhi_epi8(r1, _mm_setzero_si128()); \ |
540 | g_16 = _mm_unpackhi_epi8(g1, _mm_setzero_si128()); \ |
541 | b_16 = _mm_unpackhi_epi8(b1, _mm_setzero_si128()); \ |
542 | RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \ |
543 | y = _mm_packus_epi16(y1_16, y2_16); \ |
544 | u1 = _mm_packus_epi16(u1_16, u2_16); \ |
545 | v1 = _mm_packus_epi16(v1_16, v2_16); \ |
546 | /* save Y values */ \ |
547 | SAVE_SI128((__m128i*)(y_ptr1), y); \ |
548 | /* process pixels of second line */ \ |
549 | r_16 = _mm_unpacklo_epi8(r2, _mm_setzero_si128()); \ |
550 | g_16 = _mm_unpacklo_epi8(g2, _mm_setzero_si128()); \ |
551 | b_16 = _mm_unpacklo_epi8(b2, _mm_setzero_si128()); \ |
552 | RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \ |
553 | r_16 = _mm_unpackhi_epi8(r2, _mm_setzero_si128()); \ |
554 | g_16 = _mm_unpackhi_epi8(g2, _mm_setzero_si128()); \ |
555 | b_16 = _mm_unpackhi_epi8(b2, _mm_setzero_si128()); \ |
556 | RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \ |
557 | y = _mm_packus_epi16(y1_16, y2_16); \ |
558 | u2 = _mm_packus_epi16(u1_16, u2_16); \ |
559 | v2 = _mm_packus_epi16(v1_16, v2_16); \ |
560 | /* save Y values */ \ |
561 | SAVE_SI128((__m128i*)(y_ptr2), y); \ |
562 | /* vertical subsampling of u/v values */ \ |
563 | u1_tmp = _mm_avg_epu8(u1, u2); \ |
564 | v1_tmp = _mm_avg_epu8(v1, v2); \ |
565 | /* do the same again with next data */ \ |
566 | rgb1 = LOAD_SI128((const __m128i*)(rgb_ptr1+48)); \ |
567 | rgb2 = LOAD_SI128((const __m128i*)(rgb_ptr1+64)); \ |
568 | rgb3 = LOAD_SI128((const __m128i*)(rgb_ptr1+80)); \ |
569 | rgb4 = LOAD_SI128((const __m128i*)(rgb_ptr2+48)); \ |
570 | rgb5 = LOAD_SI128((const __m128i*)(rgb_ptr2+64)); \ |
571 | rgb6 = LOAD_SI128((const __m128i*)(rgb_ptr2+80)); \ |
572 | /* unpack rgb24 data to r, g and b data in separate channels*/ \ |
573 | UNPACK_RGB24_32(rgb1, rgb2, rgb3, rgb4, rgb5, rgb6, r1, r2, g1, g2, b1, b2) \ |
574 | /* process pixels of first line */ \ |
575 | r_16 = _mm_unpacklo_epi8(r1, _mm_setzero_si128()); \ |
576 | g_16 = _mm_unpacklo_epi8(g1, _mm_setzero_si128()); \ |
577 | b_16 = _mm_unpacklo_epi8(b1, _mm_setzero_si128()); \ |
578 | RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \ |
579 | r_16 = _mm_unpackhi_epi8(r1, _mm_setzero_si128()); \ |
580 | g_16 = _mm_unpackhi_epi8(g1, _mm_setzero_si128()); \ |
581 | b_16 = _mm_unpackhi_epi8(b1, _mm_setzero_si128()); \ |
582 | RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \ |
583 | y = _mm_packus_epi16(y1_16, y2_16); \ |
584 | u1 = _mm_packus_epi16(u1_16, u2_16); \ |
585 | v1 = _mm_packus_epi16(v1_16, v2_16); \ |
586 | /* save Y values */ \ |
587 | SAVE_SI128((__m128i*)(y_ptr1+16), y); \ |
588 | /* process pixels of second line */ \ |
589 | r_16 = _mm_unpacklo_epi8(r2, _mm_setzero_si128()); \ |
590 | g_16 = _mm_unpacklo_epi8(g2, _mm_setzero_si128()); \ |
591 | b_16 = _mm_unpacklo_epi8(b2, _mm_setzero_si128()); \ |
592 | RGB2YUV_16(r_16, g_16, b_16, y1_16, u1_16, v1_16) \ |
593 | r_16 = _mm_unpackhi_epi8(r2, _mm_setzero_si128()); \ |
594 | g_16 = _mm_unpackhi_epi8(g2, _mm_setzero_si128()); \ |
595 | b_16 = _mm_unpackhi_epi8(b2, _mm_setzero_si128()); \ |
596 | RGB2YUV_16(r_16, g_16, b_16, y2_16, u2_16, v2_16) \ |
597 | y = _mm_packus_epi16(y1_16, y2_16); \ |
598 | u2 = _mm_packus_epi16(u1_16, u2_16); \ |
599 | v2 = _mm_packus_epi16(v1_16, v2_16); \ |
600 | /* save Y values */ \ |
601 | SAVE_SI128((__m128i*)(y_ptr2+16), y); \ |
602 | /* vertical subsampling of u/v values */ \ |
603 | u2_tmp = _mm_avg_epu8(u1, u2); \ |
604 | v2_tmp = _mm_avg_epu8(v1, v2); \ |
605 | /* horizontal subsampling of u/v values */ \ |
606 | u1 = _mm_packus_epi16(_mm_srl_epi16(u1_tmp, _mm_cvtsi32_si128(8)), _mm_srl_epi16(u2_tmp, _mm_cvtsi32_si128(8))); \ |
607 | v1 = _mm_packus_epi16(_mm_srl_epi16(v1_tmp, _mm_cvtsi32_si128(8)), _mm_srl_epi16(v2_tmp, _mm_cvtsi32_si128(8))); \ |
608 | u2 = _mm_packus_epi16(_mm_and_si128(u1_tmp, _mm_set1_epi16(0xFF)), _mm_and_si128(u2_tmp, _mm_set1_epi16(0xFF))); \ |
609 | v2 = _mm_packus_epi16(_mm_and_si128(v1_tmp, _mm_set1_epi16(0xFF)), _mm_and_si128(v2_tmp, _mm_set1_epi16(0xFF))); \ |
610 | u1 = _mm_avg_epu8(u1, u2); \ |
611 | v1 = _mm_avg_epu8(v1, v2); \ |
612 | SAVE_SI128((__m128i*)(u_ptr), u1); \ |
613 | SAVE_SI128((__m128i*)(v_ptr), v1); |
614 | |
615 | void rgb24_yuv420_sse(uint32_t width, uint32_t height, |
616 | const uint8_t *RGB, uint32_t RGB_stride, |
617 | uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, |
618 | YCbCrType yuv_type) |
619 | { |
620 | #define LOAD_SI128 _mm_load_si128 |
621 | #define SAVE_SI128 _mm_stream_si128 |
622 | const RGB2YUVParam *const param = &(RGB2YUV[yuv_type]); |
623 | |
624 | uint32_t xpos, ypos; |
625 | for(ypos=0; ypos<(height-1); ypos+=2) |
626 | { |
627 | const uint8_t *rgb_ptr1=RGB+ypos*RGB_stride, |
628 | *rgb_ptr2=RGB+(ypos+1)*RGB_stride; |
629 | |
630 | uint8_t *y_ptr1=Y+ypos*Y_stride, |
631 | *y_ptr2=Y+(ypos+1)*Y_stride, |
632 | *u_ptr=U+(ypos/2)*UV_stride, |
633 | *v_ptr=V+(ypos/2)*UV_stride; |
634 | |
635 | for(xpos=0; xpos<(width-31); xpos+=32) |
636 | { |
637 | RGB2YUV_32 |
638 | |
639 | rgb_ptr1+=96; |
640 | rgb_ptr2+=96; |
641 | y_ptr1+=32; |
642 | y_ptr2+=32; |
643 | u_ptr+=16; |
644 | v_ptr+=16; |
645 | } |
646 | } |
647 | #undef LOAD_SI128 |
648 | #undef SAVE_SI128 |
649 | } |
650 | |
651 | void rgb24_yuv420_sseu(uint32_t width, uint32_t height, |
652 | const uint8_t *RGB, uint32_t RGB_stride, |
653 | uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride, |
654 | YCbCrType yuv_type) |
655 | { |
656 | #define LOAD_SI128 _mm_loadu_si128 |
657 | #define SAVE_SI128 _mm_storeu_si128 |
658 | const RGB2YUVParam *const param = &(RGB2YUV[yuv_type]); |
659 | |
660 | uint32_t xpos, ypos; |
661 | for(ypos=0; ypos<(height-1); ypos+=2) |
662 | { |
663 | const uint8_t *rgb_ptr1=RGB+ypos*RGB_stride, |
664 | *rgb_ptr2=RGB+(ypos+1)*RGB_stride; |
665 | |
666 | uint8_t *y_ptr1=Y+ypos*Y_stride, |
667 | *y_ptr2=Y+(ypos+1)*Y_stride, |
668 | *u_ptr=U+(ypos/2)*UV_stride, |
669 | *v_ptr=V+(ypos/2)*UV_stride; |
670 | |
671 | for(xpos=0; xpos<(width-31); xpos+=32) |
672 | { |
673 | RGB2YUV_32 |
674 | |
675 | rgb_ptr1+=96; |
676 | rgb_ptr2+=96; |
677 | y_ptr1+=32; |
678 | y_ptr2+=32; |
679 | u_ptr+=16; |
680 | v_ptr+=16; |
681 | } |
682 | } |
683 | #undef LOAD_SI128 |
684 | #undef SAVE_SI128 |
685 | } |
686 | |
687 | |
688 | #endif //__SSE2__ |
689 | |
690 | #endif /* SDL_HAVE_YUV */ |
691 | |