1 | // [Blend2D] |
2 | // 2D Vector Graphics Powered by a JIT Compiler. |
3 | // |
4 | // [License] |
5 | // Zlib - See LICENSE.md file in the package. |
6 | |
7 | #include "./blapi-build_p.h" |
8 | #include "./blformat_p.h" |
9 | #include "./blimage.h" |
10 | #include "./blpixelconverter_p.h" |
11 | #include "./blruntime_p.h" |
12 | #include "./blsupport_p.h" |
13 | #include "./bltables_p.h" |
14 | |
15 | // ============================================================================ |
16 | // [BLPixelConverter - Tables] |
17 | // ============================================================================ |
18 | |
19 | // A table that contains shifts of native 32-bit pixel format. The only reason |
20 | // to have this in a table is a fact that a blue component is shifted by 8 (the |
21 | // same as green) to be at the right place, because there is no way to calculate |
22 | // the constants of component that has to stay within the low 8 bits as `scale` |
23 | // value is calculated by doubling the size until it reaches the required depth, |
24 | // so for example depth of 5 would scale to 10, depth 3 would scale to 9, and |
25 | // depths 1-2 would scale to 8. |
26 | static constexpr const uint8_t blPixelConverterNative32FromExternalShiftTable[] = { |
27 | 16, // [0x00FF0000] R. |
28 | 8 , // [0x0000FF00] G. |
29 | 8 , // [0x0000FF00] B (shift to right by 8 to get the desired result). |
30 | 24 // [0xFF000000] A. |
31 | }; |
32 | |
33 | #define F(VALUE) BL_FORMAT_FLAG_##VALUE |
34 | #define U 0 // Used only to distinguish between zero and Unused. |
35 | |
36 | const BLFormatInfo blPixelConverterFormatInfo[BL_PIXEL_CONVERTER_FORMAT_COUNT] = { |
37 | { 0, 0 , {{ { U, U, U, U }, { U, U, U, U } }} }, // NONE. |
38 | { 32, F(RGBA) | F(BYTE_ALIGNED) | F(PREMULTIPLIED), {{ { 8, 8, 8, 8 }, { 16, 8, 0, 24 } }} }, // PRGB32. |
39 | { 32, F(RGB) | F(BYTE_ALIGNED) , {{ { 8, 8, 8, U }, { 16, 8, 0, U } }} }, // XRGB32. |
40 | { 8, F(ALPHA) | F(BYTE_ALIGNED) , {{ { U, U, U, 8 }, { U, U, U, 0 } }} }, // A8. |
41 | { 32, F(RGBA) | F(BYTE_ALIGNED) , {{ { 8, 8, 8, 8 }, { 16, 8, 0, 24 } }} } // ARGB32. |
42 | }; |
43 | |
44 | #undef U |
45 | #undef F |
46 | |
47 | // ============================================================================ |
48 | // [BLPixelConverter - Globals] |
49 | // ============================================================================ |
50 | |
51 | const BLPixelConverterOptions blPixelConverterDefaultOptions {}; |
52 | |
53 | // ============================================================================ |
54 | // [BLPixelConverter - Pixel Access] |
55 | // ============================================================================ |
56 | |
57 | template<uint32_t ByteOrder> |
58 | struct BLPixelAccess16 { |
59 | enum : uint32_t { kSize = 2 }; |
60 | |
61 | static BL_INLINE uint32_t fetchA(const void* p) noexcept { return blMemReadU16<ByteOrder, 2>(p); } |
62 | static BL_INLINE uint32_t fetchU(const void* p) noexcept { return blMemReadU16<ByteOrder, 1>(p); } |
63 | |
64 | static BL_INLINE void storeA(void* p, uint32_t v) noexcept { blMemWriteU16<ByteOrder, 2>(p, uint16_t(v)); } |
65 | static BL_INLINE void storeU(void* p, uint32_t v) noexcept { blMemWriteU16<ByteOrder, 1>(p, uint16_t(v)); } |
66 | }; |
67 | |
68 | template<uint32_t ByteOrder> |
69 | struct BLPixelAccess24 { |
70 | enum : uint32_t { kSize = 3 }; |
71 | |
72 | static BL_INLINE uint32_t fetchA(const void* p) noexcept { return blMemReadU24u<ByteOrder>(p); } |
73 | static BL_INLINE uint32_t fetchU(const void* p) noexcept { return blMemReadU24u<ByteOrder>(p); } |
74 | |
75 | static BL_INLINE void storeA(void* p, uint32_t v) noexcept { blMemWriteU24u<ByteOrder>(p, v); } |
76 | static BL_INLINE void storeU(void* p, uint32_t v) noexcept { blMemWriteU24u<ByteOrder>(p, v); } |
77 | }; |
78 | |
79 | template<uint32_t ByteOrder> |
80 | struct BLPixelAccess32 { |
81 | enum : uint32_t { kSize = 4 }; |
82 | |
83 | static BL_INLINE uint32_t fetchA(const void* p) noexcept { return blMemReadU32<ByteOrder, 4>(p); } |
84 | static BL_INLINE uint32_t fetchU(const void* p) noexcept { return blMemReadU32<ByteOrder, 1>(p); } |
85 | |
86 | static BL_INLINE void storeA(void* p, uint32_t v) noexcept { blMemWriteU32<ByteOrder, 4>(p, v); } |
87 | static BL_INLINE void storeU(void* p, uint32_t v) noexcept { blMemWriteU32<ByteOrder, 1>(p, v); } |
88 | }; |
89 | |
90 | // ============================================================================ |
91 | // [BLPixelConverter - LookupTable] |
92 | // ============================================================================ |
93 | |
94 | static BLResult BL_CDECL bl_convert_lookup32_from_index1( |
95 | const BLPixelConverterCore* self, |
96 | uint8_t* dstData, intptr_t dstStride, |
97 | const uint8_t* srcLine, intptr_t srcStride, uint32_t w, uint32_t h, const BLPixelConverterOptions* options) noexcept { |
98 | |
99 | if (!options) |
100 | options = &blPixelConverterDefaultOptions; |
101 | |
102 | const BLPixelConverterData::LookupTable& d = blPixelConverterGetData(self)->lookupTable; |
103 | const size_t gap = options->gap; |
104 | |
105 | // Instead of doing a table lookup each time we create a XOR mask that is |
106 | // used to get the second color value from the first one. This allows to |
107 | // remove the lookup completely. The only requirement is that we need all |
108 | // zeros or ones depending on the source value (see the implementation, it |
109 | // uses signed right shift to fill these bits in). |
110 | uint32_t c0 = d.table[0]; |
111 | uint32_t cm = d.table[1] ^ c0; |
112 | |
113 | dstStride -= intptr_t(w * 4 + gap); |
114 | |
115 | if (c0 == 0x00000000u && cm == 0xFFFFFFFFu) { |
116 | // Special case for black/white palette, quite common. |
117 | for (uint32_t y = h; y != 0; y--) { |
118 | const uint8_t* srcData = srcLine; |
119 | |
120 | uint32_t i = w; |
121 | while (i >= 8) { |
122 | uint32_t b0 = uint32_t(*srcData++) << 24; |
123 | uint32_t b1 = b0 << 1; |
124 | |
125 | blMemWriteU32a(dstData + 0, blBitSar(b0, 31)); b0 <<= 2; |
126 | blMemWriteU32a(dstData + 4, blBitSar(b1, 31)); b1 <<= 2; |
127 | blMemWriteU32a(dstData + 8, blBitSar(b0, 31)); b0 <<= 2; |
128 | blMemWriteU32a(dstData + 12, blBitSar(b1, 31)); b1 <<= 2; |
129 | blMemWriteU32a(dstData + 16, blBitSar(b0, 31)); b0 <<= 2; |
130 | blMemWriteU32a(dstData + 20, blBitSar(b1, 31)); b1 <<= 2; |
131 | blMemWriteU32a(dstData + 24, blBitSar(b0, 31)); |
132 | blMemWriteU32a(dstData + 28, blBitSar(b1, 31)); |
133 | |
134 | dstData += 32; |
135 | i -= 8; |
136 | } |
137 | |
138 | if (i) { |
139 | uint32_t b0 = uint32_t(*srcData++) << 24; |
140 | do { |
141 | blMemWriteU32a(dstData, blBitSar(b0, 31)); |
142 | |
143 | dstData += 4; |
144 | b0 <<= 1; |
145 | } while (--i); |
146 | } |
147 | |
148 | dstData = blPixelConverterFillGap(dstData, gap); |
149 | dstData += dstStride; |
150 | srcLine += srcStride; |
151 | } |
152 | } |
153 | else { |
154 | // Generic case for any other combination. |
155 | for (uint32_t y = h; y != 0; y--) { |
156 | const uint8_t* srcData = srcLine; |
157 | |
158 | uint32_t i = w; |
159 | while (i >= 8) { |
160 | uint32_t b0 = uint32_t(*srcData++) << 24; |
161 | uint32_t b1 = b0 << 1; |
162 | |
163 | blMemWriteU32a(dstData + 0, c0 ^ (cm & blBitSar(b0, 31))); b0 <<= 2; |
164 | blMemWriteU32a(dstData + 4, c0 ^ (cm & blBitSar(b1, 31))); b1 <<= 2; |
165 | blMemWriteU32a(dstData + 8, c0 ^ (cm & blBitSar(b0, 31))); b0 <<= 2; |
166 | blMemWriteU32a(dstData + 12, c0 ^ (cm & blBitSar(b1, 31))); b1 <<= 2; |
167 | blMemWriteU32a(dstData + 16, c0 ^ (cm & blBitSar(b0, 31))); b0 <<= 2; |
168 | blMemWriteU32a(dstData + 20, c0 ^ (cm & blBitSar(b1, 31))); b1 <<= 2; |
169 | blMemWriteU32a(dstData + 24, c0 ^ (cm & blBitSar(b0, 31))); |
170 | blMemWriteU32a(dstData + 28, c0 ^ (cm & blBitSar(b1, 31))); |
171 | |
172 | dstData += 32; |
173 | i -= 8; |
174 | } |
175 | |
176 | if (i) { |
177 | uint32_t b0 = uint32_t(*srcData++) << 24; |
178 | do { |
179 | blMemWriteU32a(dstData, c0 ^ (cm & blBitSar(b0, 31))); |
180 | |
181 | dstData += 4; |
182 | b0 <<= 1; |
183 | } while (--i); |
184 | } |
185 | |
186 | dstData = blPixelConverterFillGap(dstData, gap); |
187 | dstData += dstStride; |
188 | srcLine += srcStride; |
189 | } |
190 | } |
191 | |
192 | return BL_SUCCESS; |
193 | } |
194 | |
195 | static BLResult BL_CDECL bl_convert_lookup32_from_index2( |
196 | const BLPixelConverterCore* self, |
197 | uint8_t* dstData, intptr_t dstStride, |
198 | const uint8_t* srcLine, intptr_t srcStride, uint32_t w, uint32_t h, const BLPixelConverterOptions* options) noexcept { |
199 | |
200 | if (!options) |
201 | options = &blPixelConverterDefaultOptions; |
202 | |
203 | const BLPixelConverterData::LookupTable& d = blPixelConverterGetData(self)->lookupTable; |
204 | const uint32_t* table = d.table; |
205 | const size_t gap = options->gap; |
206 | |
207 | dstStride -= w * 4 + gap; |
208 | |
209 | for (uint32_t y = h; y != 0; y--) { |
210 | const uint8_t* srcData = srcLine; |
211 | |
212 | uint32_t i = w; |
213 | while (i >= 4) { |
214 | uint32_t b0 = uint32_t(*srcData++) << 24; |
215 | |
216 | blMemWriteU32a(dstData + 0, table[b0 >> 30]); b0 <<= 2; |
217 | blMemWriteU32a(dstData + 4, table[b0 >> 30]); b0 <<= 2; |
218 | blMemWriteU32a(dstData + 8, table[b0 >> 30]); b0 <<= 2; |
219 | blMemWriteU32a(dstData + 12, table[b0 >> 30]); |
220 | |
221 | dstData += 16; |
222 | i -= 4; |
223 | } |
224 | |
225 | if (i) { |
226 | uint32_t b0 = uint32_t(*srcData++) << 24; |
227 | do { |
228 | blMemWriteU32a(dstData, table[b0 >> 30]); |
229 | |
230 | dstData += 4; |
231 | b0 <<= 2; |
232 | } while (--i); |
233 | } |
234 | |
235 | dstData = blPixelConverterFillGap(dstData, gap); |
236 | dstData += dstStride; |
237 | srcLine += srcStride; |
238 | } |
239 | |
240 | return BL_SUCCESS; |
241 | } |
242 | |
243 | static BLResult BL_CDECL bl_convert_lookup32_from_index4( |
244 | const BLPixelConverterCore* self, |
245 | uint8_t* dstData, intptr_t dstStride, |
246 | const uint8_t* srcLine, intptr_t srcStride, uint32_t w, uint32_t h, const BLPixelConverterOptions* options) noexcept { |
247 | |
248 | if (!options) |
249 | options = &blPixelConverterDefaultOptions; |
250 | |
251 | const BLPixelConverterData::LookupTable& d = blPixelConverterGetData(self)->lookupTable; |
252 | const uint32_t* table = d.table; |
253 | const size_t gap = options->gap; |
254 | |
255 | dstStride -= w * 4 + gap; |
256 | |
257 | for (uint32_t y = h; y != 0; y--) { |
258 | const uint8_t* srcData = srcLine; |
259 | |
260 | uint32_t i = w; |
261 | while (i >= 2) { |
262 | uint32_t b0 = *srcData++; |
263 | |
264 | blMemWriteU32a(dstData + 0, table[b0 >> 4]); |
265 | blMemWriteU32a(dstData + 4, table[b0 & 15]); |
266 | |
267 | dstData += 8; |
268 | i -= 2; |
269 | } |
270 | |
271 | if (i) { |
272 | uint32_t b0 = srcData[0]; |
273 | blMemWriteU32a(dstData, table[b0 >> 4]); |
274 | dstData += 4; |
275 | } |
276 | |
277 | dstData = blPixelConverterFillGap(dstData, gap); |
278 | dstData += dstStride; |
279 | srcLine += srcStride; |
280 | } |
281 | |
282 | return BL_SUCCESS; |
283 | } |
284 | |
285 | static BLResult BL_CDECL bl_convert_lookup32_from_index8( |
286 | const BLPixelConverterCore* self, |
287 | uint8_t* dstData, intptr_t dstStride, |
288 | const uint8_t* srcLine, intptr_t srcStride, uint32_t w, uint32_t h, const BLPixelConverterOptions* options) noexcept { |
289 | |
290 | if (!options) |
291 | options = &blPixelConverterDefaultOptions; |
292 | |
293 | const BLPixelConverterData::LookupTable& d = blPixelConverterGetData(self)->lookupTable; |
294 | const uint32_t* table = d.table; |
295 | const size_t gap = options->gap; |
296 | |
297 | dstStride -= w * 4 + gap; |
298 | |
299 | for (uint32_t y = h; y != 0; y--) { |
300 | const uint8_t* srcData = srcLine; |
301 | |
302 | for (uint32_t i = w; i != 0; i--) { |
303 | uint32_t b0 = *srcData++; |
304 | blMemWriteU32a(dstData, table[b0]); |
305 | dstData += 4; |
306 | } |
307 | |
308 | dstData = blPixelConverterFillGap(dstData, gap); |
309 | dstData += dstStride; |
310 | srcLine += srcStride; |
311 | } |
312 | |
313 | return BL_SUCCESS; |
314 | } |
315 | |
316 | // ============================================================================ |
317 | // [BLPixelConverter - ByteShuffle] |
318 | // ============================================================================ |
319 | |
320 | // TODO: |
321 | |
322 | // ============================================================================ |
323 | // [BLPixelConverter - Native32 <- XRGB|ARGB|PRGB] |
324 | // ============================================================================ |
325 | |
326 | template<typename PixelAccess, bool AlwaysUnaligned> |
327 | static BLResult BL_CDECL bl_convert_xrgb32_from_xrgb_any( |
328 | const BLPixelConverterCore* self, |
329 | uint8_t* dstData, intptr_t dstStride, |
330 | const uint8_t* srcData, intptr_t srcStride, uint32_t w, uint32_t h, const BLPixelConverterOptions* options) noexcept { |
331 | |
332 | if (!options) |
333 | options = &blPixelConverterDefaultOptions; |
334 | |
335 | const BLPixelConverterData::NativeFromExternal& d = blPixelConverterGetData(self)->nativeFromExternal; |
336 | const size_t gap = options->gap; |
337 | |
338 | dstStride -= w * 4 + gap; |
339 | srcStride -= w * PixelAccess::kSize; |
340 | |
341 | uint32_t rMask = d.masks[0]; |
342 | uint32_t gMask = d.masks[1]; |
343 | uint32_t bMask = d.masks[2]; |
344 | |
345 | uint32_t rShift = d.shifts[0]; |
346 | uint32_t gShift = d.shifts[1]; |
347 | uint32_t bShift = d.shifts[2]; |
348 | |
349 | uint32_t rScale = d.scale[0]; |
350 | uint32_t gScale = d.scale[1]; |
351 | uint32_t bScale = d.scale[2]; |
352 | |
353 | uint32_t fillMask = d.fillMask; |
354 | |
355 | for (uint32_t y = h; y != 0; y--) { |
356 | if (!AlwaysUnaligned && blIsAligned(srcData, PixelAccess::kSize)) { |
357 | for (uint32_t i = w; i != 0; i--) { |
358 | uint32_t pix = PixelAccess::fetchA(srcData); |
359 | uint32_t r = (((pix >> rShift) & rMask) * rScale) & 0x00FF0000u; |
360 | uint32_t g = (((pix >> gShift) & gMask) * gScale) & 0x0000FF00u; |
361 | uint32_t b = (((pix >> bShift) & bMask) * bScale) >> 8; |
362 | |
363 | blMemWriteU32a(dstData, r | g | b | fillMask); |
364 | |
365 | dstData += 4; |
366 | srcData += PixelAccess::kSize; |
367 | } |
368 | } |
369 | else { |
370 | for (uint32_t i = w; i != 0; i--) { |
371 | uint32_t pix = PixelAccess::fetchU(srcData); |
372 | uint32_t r = (((pix >> rShift) & rMask) * rScale) & 0x00FF0000u; |
373 | uint32_t g = (((pix >> gShift) & gMask) * gScale) & 0x0000FF00u; |
374 | uint32_t b = (((pix >> bShift) & bMask) * bScale) >> 8; |
375 | |
376 | blMemWriteU32a(dstData, r | g | b | fillMask); |
377 | |
378 | dstData += 4; |
379 | srcData += PixelAccess::kSize; |
380 | } |
381 | } |
382 | |
383 | dstData = blPixelConverterFillGap(dstData, gap); |
384 | dstData += dstStride; |
385 | srcData += srcStride; |
386 | } |
387 | |
388 | return BL_SUCCESS; |
389 | } |
390 | |
391 | template<typename PixelAccess, bool AlwaysUnaligned> |
392 | static BLResult BL_CDECL bl_convert_prgb32_from_argb_any( |
393 | const BLPixelConverterCore* self, |
394 | uint8_t* dstData, intptr_t dstStride, |
395 | const uint8_t* srcData, intptr_t srcStride, uint32_t w, uint32_t h, const BLPixelConverterOptions* options) noexcept { |
396 | |
397 | if (!options) |
398 | options = &blPixelConverterDefaultOptions; |
399 | |
400 | const BLPixelConverterData::NativeFromExternal& d = blPixelConverterGetData(self)->nativeFromExternal; |
401 | const size_t gap = options->gap; |
402 | |
403 | dstStride -= w * 4 + gap; |
404 | srcStride -= w * PixelAccess::kSize; |
405 | |
406 | uint32_t rMask = d.masks[0]; |
407 | uint32_t gMask = d.masks[1]; |
408 | uint32_t bMask = d.masks[2]; |
409 | uint32_t aMask = d.masks[3]; |
410 | |
411 | uint32_t rShift = d.shifts[0]; |
412 | uint32_t gShift = d.shifts[1]; |
413 | uint32_t bShift = d.shifts[2]; |
414 | uint32_t aShift = d.shifts[3]; |
415 | |
416 | uint32_t rScale = d.scale[0]; |
417 | uint32_t gScale = d.scale[1]; |
418 | uint32_t bScale = d.scale[2]; |
419 | uint32_t aScale = d.scale[3]; |
420 | |
421 | for (uint32_t y = h; y != 0; y--) { |
422 | if (!AlwaysUnaligned && blIsAligned(srcData, PixelAccess::kSize)) { |
423 | for (uint32_t i = w; i != 0; i--) { |
424 | uint32_t pix = PixelAccess::fetchA(srcData); |
425 | uint32_t _a = ((((pix >> aShift) & aMask) * aScale) >> 24); |
426 | uint32_t ag = ((((pix >> gShift) & gMask) * gScale) >> 8); |
427 | uint32_t rb = ((((pix >> rShift) & rMask) * rScale) & 0x00FF0000u) | |
428 | ((((pix >> bShift) & bMask) * bScale) >> 8); |
429 | |
430 | ag |= 0x00FF0000u; |
431 | rb *= _a; |
432 | ag *= _a; |
433 | |
434 | rb += 0x00800080u; |
435 | ag += 0x00800080u; |
436 | |
437 | rb = (rb + ((rb >> 8) & 0x00FF00FFu)) & 0xFF00FF00u; |
438 | ag = (ag + ((ag >> 8) & 0x00FF00FFu)) & 0xFF00FF00u; |
439 | |
440 | rb >>= 8; |
441 | blMemWriteU32a(dstData, ag + rb); |
442 | |
443 | dstData += 4; |
444 | srcData += PixelAccess::kSize; |
445 | } |
446 | } |
447 | else { |
448 | for (uint32_t i = w; i != 0; i--) { |
449 | uint32_t pix = PixelAccess::fetchU(srcData); |
450 | uint32_t _a = ((((pix >> aShift) & aMask) * aScale) >> 24); |
451 | uint32_t ag = ((((pix >> gShift) & gMask) * gScale) >> 8); |
452 | uint32_t rb = ((((pix >> rShift) & rMask) * rScale) & 0x00FF0000u) | |
453 | ((((pix >> bShift) & bMask) * bScale) >> 8); |
454 | |
455 | ag |= 0x00FF0000u; |
456 | rb *= _a; |
457 | ag *= _a; |
458 | |
459 | rb += 0x00800080u; |
460 | ag += 0x00800080u; |
461 | |
462 | rb = (rb + ((rb >> 8) & 0x00FF00FFu)) & 0xFF00FF00u; |
463 | ag = (ag + ((ag >> 8) & 0x00FF00FFu)) & 0xFF00FF00u; |
464 | |
465 | rb >>= 8; |
466 | blMemWriteU32a(dstData, ag | rb); |
467 | |
468 | dstData += 4; |
469 | srcData += PixelAccess::kSize; |
470 | } |
471 | } |
472 | |
473 | dstData = blPixelConverterFillGap(dstData, gap); |
474 | dstData += dstStride; |
475 | srcData += srcStride; |
476 | } |
477 | |
478 | return BL_SUCCESS; |
479 | } |
480 | |
481 | template<typename PixelAccess, bool AlwaysUnaligned> |
482 | static BLResult BL_CDECL bl_convert_prgb32_from_prgb_any( |
483 | const BLPixelConverterCore* self, |
484 | uint8_t* dstData, intptr_t dstStride, |
485 | const uint8_t* srcData, intptr_t srcStride, uint32_t w, uint32_t h, const BLPixelConverterOptions* options) noexcept { |
486 | |
487 | if (!options) |
488 | options = &blPixelConverterDefaultOptions; |
489 | |
490 | const BLPixelConverterData::NativeFromExternal& d = blPixelConverterGetData(self)->nativeFromExternal; |
491 | const size_t gap = options->gap; |
492 | |
493 | dstStride -= w * 4 + gap; |
494 | srcStride -= w * PixelAccess::kSize; |
495 | |
496 | uint32_t rMask = d.masks[0]; |
497 | uint32_t gMask = d.masks[1]; |
498 | uint32_t bMask = d.masks[2]; |
499 | uint32_t aMask = d.masks[3]; |
500 | |
501 | uint32_t rShift = d.shifts[0]; |
502 | uint32_t gShift = d.shifts[1]; |
503 | uint32_t bShift = d.shifts[2]; |
504 | uint32_t aShift = d.shifts[3]; |
505 | |
506 | uint32_t rScale = d.scale[0]; |
507 | uint32_t gScale = d.scale[1]; |
508 | uint32_t bScale = d.scale[2]; |
509 | uint32_t aScale = d.scale[3]; |
510 | |
511 | for (uint32_t y = h; y != 0; y--) { |
512 | if (!AlwaysUnaligned && blIsAligned(srcData, PixelAccess::kSize)) { |
513 | for (uint32_t i = w; i != 0; i--) { |
514 | uint32_t pix = PixelAccess::fetchA(srcData); |
515 | uint32_t r = ((pix >> rShift) & rMask) * rScale; |
516 | uint32_t g = ((pix >> gShift) & gMask) * gScale; |
517 | uint32_t b = ((pix >> bShift) & bMask) * bScale; |
518 | uint32_t a = ((pix >> aShift) & aMask) * aScale; |
519 | |
520 | uint32_t ag = (a + (g )) & 0xFF00FF00u; |
521 | uint32_t rb = (r + (b >> 8)) & 0x00FF00FFu; |
522 | |
523 | blMemWriteU32a(dstData, ag | rb); |
524 | |
525 | dstData += 4; |
526 | srcData += PixelAccess::kSize; |
527 | } |
528 | } |
529 | else { |
530 | for (uint32_t i = w; i != 0; i--) { |
531 | uint32_t pix = PixelAccess::fetchU(srcData); |
532 | uint32_t g = ((pix >> gShift) & gMask) * gScale; |
533 | uint32_t r = ((pix >> rShift) & rMask) * rScale; |
534 | uint32_t b = ((pix >> bShift) & bMask) * bScale; |
535 | uint32_t a = ((pix >> aShift) & aMask) * aScale; |
536 | |
537 | uint32_t ag = (a + (g )) & 0xFF00FF00u; |
538 | uint32_t rb = (r + (b >> 8)) & 0x00FF00FFu; |
539 | |
540 | blMemWriteU32a(dstData, ag | rb); |
541 | |
542 | dstData += 4; |
543 | srcData += PixelAccess::kSize; |
544 | } |
545 | } |
546 | |
547 | dstData = blPixelConverterFillGap(dstData, gap); |
548 | dstData += dstStride; |
549 | srcData += srcStride; |
550 | } |
551 | |
552 | return BL_SUCCESS; |
553 | } |
554 | |
555 | // ============================================================================ |
556 | // [BLPixelConverter - XRGB|ARGB|PRGB <- Native32] |
557 | // ============================================================================ |
558 | |
559 | template<typename PixelAccess, bool AlwaysUnaligned> |
560 | static BLResult BL_CDECL bl_convert_xrgb_any_from_xrgb32( |
561 | const BLPixelConverterCore* self, |
562 | uint8_t* dstData, intptr_t dstStride, |
563 | const uint8_t* srcData, intptr_t srcStride, uint32_t w, uint32_t h, const BLPixelConverterOptions* options) noexcept { |
564 | |
565 | if (!options) |
566 | options = &blPixelConverterDefaultOptions; |
567 | |
568 | const BLPixelConverterData::ExternalFromNative& d = blPixelConverterGetData(self)->externalFromNative; |
569 | const size_t gap = options->gap; |
570 | |
571 | dstStride -= w * PixelAccess::kSize + gap; |
572 | srcStride -= w * 4; |
573 | |
574 | uint32_t rMask = d.masks[0]; |
575 | uint32_t gMask = d.masks[1]; |
576 | uint32_t bMask = d.masks[2]; |
577 | |
578 | uint32_t rShift = d.shifts[0]; |
579 | uint32_t gShift = d.shifts[1]; |
580 | uint32_t bShift = d.shifts[2]; |
581 | |
582 | uint32_t fillMask = d.fillMask; |
583 | |
584 | for (uint32_t y = h; y != 0; y--) { |
585 | if (!AlwaysUnaligned && blIsAligned(dstData, PixelAccess::kSize)) { |
586 | for (uint32_t i = w; i != 0; i--) { |
587 | uint32_t pix = blMemReadU32a(srcData); |
588 | |
589 | uint32_t r = ((pix >> 16) & 0xFFu) * 0x01010101u; |
590 | uint32_t g = ((pix >> 8) & 0xFFu) * 0x01010101u; |
591 | uint32_t b = ((pix ) & 0xFFu) * 0x01010101u; |
592 | |
593 | PixelAccess::storeA(dstData, ((r >> rShift) & rMask) | |
594 | ((g >> gShift) & gMask) | |
595 | ((b >> bShift) & bMask) | fillMask); |
596 | dstData += PixelAccess::kSize; |
597 | srcData += 4; |
598 | } |
599 | } |
600 | else { |
601 | for (uint32_t i = w; i != 0; i--) { |
602 | uint32_t pix = blMemReadU32u(srcData); |
603 | |
604 | uint32_t r = ((pix >> 16) & 0xFFu) * 0x01010101u; |
605 | uint32_t g = ((pix >> 8) & 0xFFu) * 0x01010101u; |
606 | uint32_t b = ((pix ) & 0xFFu) * 0x01010101u; |
607 | |
608 | PixelAccess::storeU(dstData, ((r >> rShift) & rMask) | |
609 | ((g >> gShift) & gMask) | |
610 | ((b >> bShift) & bMask) | fillMask); |
611 | dstData += PixelAccess::kSize; |
612 | srcData += 4; |
613 | } |
614 | } |
615 | |
616 | dstData = blPixelConverterFillGap(dstData, gap); |
617 | dstData += dstStride; |
618 | srcData += srcStride; |
619 | } |
620 | |
621 | return BL_SUCCESS; |
622 | } |
623 | |
624 | template<typename PixelAccess, bool AlwaysUnaligned> |
625 | static BLResult BL_CDECL bl_convert_argb_any_from_prgb32( |
626 | const BLPixelConverterCore* self, |
627 | uint8_t* dstData, intptr_t dstStride, |
628 | const uint8_t* srcData, intptr_t srcStride, uint32_t w, uint32_t h, const BLPixelConverterOptions* options) noexcept { |
629 | |
630 | if (!options) |
631 | options = &blPixelConverterDefaultOptions; |
632 | |
633 | const BLPixelConverterData::ExternalFromNative& d = blPixelConverterGetData(self)->externalFromNative; |
634 | const size_t gap = options->gap; |
635 | |
636 | dstStride -= w * PixelAccess::kSize + gap; |
637 | srcStride -= w * 4; |
638 | |
639 | uint32_t rMask = d.masks[0]; |
640 | uint32_t gMask = d.masks[1]; |
641 | uint32_t bMask = d.masks[2]; |
642 | uint32_t aMask = d.masks[3]; |
643 | |
644 | uint32_t rShift = d.shifts[0]; |
645 | uint32_t gShift = d.shifts[1]; |
646 | uint32_t bShift = d.shifts[2]; |
647 | uint32_t aShift = d.shifts[3]; |
648 | |
649 | const uint32_t* div24bitRecip = blCommonTable.div24bit.data; |
650 | |
651 | for (uint32_t y = h; y != 0; y--) { |
652 | if (!AlwaysUnaligned && blIsAligned(dstData, PixelAccess::kSize)) { |
653 | for (uint32_t i = w; i != 0; i--) { |
654 | uint32_t pix = blMemReadU32a(srcData); |
655 | |
656 | uint32_t a = pix >> 24; |
657 | uint32_t recip = div24bitRecip[a]; |
658 | |
659 | uint32_t r = ((((pix >> 16) & 0xFFu) * recip) >> 16) * 0x01010101u; |
660 | uint32_t g = ((((pix >> 8) & 0xFFu) * recip) >> 16) * 0x01010101u; |
661 | uint32_t b = ((((pix ) & 0xFFu) * recip) >> 16) * 0x01010101u; |
662 | |
663 | a *= 0x01010101u; |
664 | PixelAccess::storeA(dstData, ((r >> rShift) & rMask) | |
665 | ((g >> gShift) & gMask) | |
666 | ((b >> bShift) & bMask) | |
667 | ((a >> aShift) & aMask)); |
668 | dstData += PixelAccess::kSize; |
669 | srcData += 4; |
670 | } |
671 | } |
672 | else { |
673 | for (uint32_t i = w; i != 0; i--) { |
674 | uint32_t pix = blMemReadU32u(srcData); |
675 | |
676 | uint32_t a = pix >> 24; |
677 | uint32_t recip = div24bitRecip[a]; |
678 | |
679 | uint32_t r = ((((pix >> 16) & 0xFFu) * recip) >> 16) * 0x01010101u; |
680 | uint32_t g = ((((pix >> 8) & 0xFFu) * recip) >> 16) * 0x01010101u; |
681 | uint32_t b = ((((pix ) & 0xFFu) * recip) >> 16) * 0x01010101u; |
682 | |
683 | a *= 0x01010101u; |
684 | PixelAccess::storeU(dstData, ((r >> rShift) & rMask) | |
685 | ((g >> gShift) & gMask) | |
686 | ((b >> bShift) & bMask) | |
687 | ((a >> aShift) & aMask)); |
688 | dstData += PixelAccess::kSize; |
689 | srcData += 4; |
690 | } |
691 | } |
692 | |
693 | dstData = blPixelConverterFillGap(dstData, gap); |
694 | dstData += dstStride; |
695 | srcData += srcStride; |
696 | } |
697 | |
698 | return BL_SUCCESS; |
699 | } |
700 | |
701 | template<typename PixelAccess, bool AlwaysUnaligned> |
702 | static BLResult BL_CDECL bl_convert_prgb_any_from_prgb32( |
703 | const BLPixelConverterCore* self, |
704 | uint8_t* dstData, intptr_t dstStride, |
705 | const uint8_t* srcData, intptr_t srcStride, uint32_t w, uint32_t h, const BLPixelConverterOptions* options) noexcept { |
706 | |
707 | if (!options) |
708 | options = &blPixelConverterDefaultOptions; |
709 | |
710 | const BLPixelConverterData::ExternalFromNative& d = blPixelConverterGetData(self)->externalFromNative; |
711 | const size_t gap = options->gap; |
712 | |
713 | dstStride -= w * PixelAccess::kSize + gap; |
714 | srcStride -= w * 4; |
715 | |
716 | uint32_t rMask = d.masks[0]; |
717 | uint32_t gMask = d.masks[1]; |
718 | uint32_t bMask = d.masks[2]; |
719 | uint32_t aMask = d.masks[3]; |
720 | |
721 | uint32_t rShift = d.shifts[0]; |
722 | uint32_t gShift = d.shifts[1]; |
723 | uint32_t bShift = d.shifts[2]; |
724 | uint32_t aShift = d.shifts[3]; |
725 | |
726 | for (uint32_t y = h; y != 0; y--) { |
727 | if (!AlwaysUnaligned && blIsAligned(dstData, PixelAccess::kSize)) { |
728 | for (uint32_t i = w; i != 0; i--) { |
729 | uint32_t pix = blMemReadU32a(srcData); |
730 | |
731 | uint32_t r = ((pix >> 16) & 0xFFu) * 0x01010101u; |
732 | uint32_t g = ((pix >> 8) & 0xFFu) * 0x01010101u; |
733 | uint32_t b = ((pix ) & 0xFFu) * 0x01010101u; |
734 | uint32_t a = ((pix >> 24) ) * 0x01010101u; |
735 | |
736 | PixelAccess::storeA(dstData, ((r >> rShift) & rMask) | |
737 | ((g >> gShift) & gMask) | |
738 | ((b >> bShift) & bMask) | |
739 | ((a >> aShift) & aMask)); |
740 | dstData += PixelAccess::kSize; |
741 | srcData += 4; |
742 | } |
743 | } |
744 | else { |
745 | for (uint32_t i = w; i != 0; i--) { |
746 | uint32_t pix = blMemReadU32u(srcData); |
747 | |
748 | uint32_t r = ((pix >> 16) & 0xFFu) * 0x01010101u; |
749 | uint32_t g = ((pix >> 8) & 0xFFu) * 0x01010101u; |
750 | uint32_t b = ((pix ) & 0xFFu) * 0x01010101u; |
751 | uint32_t a = ((pix >> 24) ) * 0x01010101u; |
752 | |
753 | PixelAccess::storeU(dstData, ((r >> rShift) & rMask) | |
754 | ((g >> gShift) & gMask) | |
755 | ((b >> bShift) & bMask) | |
756 | ((a >> aShift) & aMask)); |
757 | dstData += PixelAccess::kSize; |
758 | srcData += 4; |
759 | } |
760 | } |
761 | |
762 | dstData = blPixelConverterFillGap(dstData, gap); |
763 | dstData += dstStride; |
764 | srcData += srcStride; |
765 | } |
766 | |
767 | return BL_SUCCESS; |
768 | } |
769 | |
770 | // ============================================================================ |
771 | // [BLPixelConverter - Utilities] |
772 | // ============================================================================ |
773 | |
774 | static uint32_t blPixelConverterMatchFormat(const BLFormatInfo& fmt) noexcept { |
775 | for (uint32_t i = 1; i < BL_PIXEL_CONVERTER_FORMAT_COUNT; i++) |
776 | if (memcmp(&blPixelConverterFormatInfo[i], &fmt, sizeof(BLFormatInfo)) == 0) |
777 | return i; |
778 | return BL_PIXEL_CONVERTER_FORMAT_NONE; |
779 | } |
780 | |
781 | static BLResult blPixelConverterInitInternal(BLPixelConverterCore* self, const BLFormatInfo& dstInfo, const BLFormatInfo& srcInfo) noexcept { |
782 | // Initially the pixel converter should be initialized to all zeros. So we |
783 | // just fill what we need to, but we don't have to zero the existing members. |
784 | BLPixelConverterFunc func = nullptr; |
785 | |
786 | uint32_t dstFormat = blPixelConverterMatchFormat(dstInfo); |
787 | uint32_t srcFormat = blPixelConverterMatchFormat(srcInfo); |
788 | |
789 | // -------------------------------------------------------------------------- |
790 | // [Native <- External] |
791 | // -------------------------------------------------------------------------- |
792 | |
793 | if (dstFormat != BL_PIXEL_CONVERTER_FORMAT_NONE) { |
794 | if (srcInfo.flags & BL_FORMAT_FLAG_INDEXED) { |
795 | switch (srcInfo.depth) { |
796 | case 1: func = bl_convert_lookup32_from_index1; break; |
797 | case 2: func = bl_convert_lookup32_from_index2; break; |
798 | case 4: func = bl_convert_lookup32_from_index4; break; |
799 | case 8: func = bl_convert_lookup32_from_index8; break; |
800 | |
801 | default: |
802 | // We return invalid value, but the sanitizer should fail in such case. |
803 | return blTraceError(BL_ERROR_INVALID_VALUE); |
804 | } |
805 | |
806 | BLPixelConverterData::LookupTable& d = blPixelConverterGetData(self)->lookupTable; |
807 | d.strategy = BL_PIXEL_CONVERTER_STRATEGY_LOOKUP_TABLE; |
808 | d.table = reinterpret_cast<const uint32_t*>(srcInfo.palette); |
809 | |
810 | self->convertFunc = func; |
811 | return BL_SUCCESS; |
812 | } |
813 | else { |
814 | BLPixelConverterData::NativeFromExternal& d = blPixelConverterGetData(self)->nativeFromExternal; |
815 | |
816 | bool isARGB = (srcInfo.flags & BL_FORMAT_FLAG_ALPHA) != 0; |
817 | bool isPRGB = (srcInfo.flags & BL_FORMAT_FLAG_PREMULTIPLIED) != 0; |
818 | bool isGray = (srcInfo.flags & BL_FORMAT_FLAG_LUM) != 0; |
819 | bool hostBO = (srcInfo.flags & BL_FORMAT_FLAG_BYTE_SWAP) == 0; |
820 | |
821 | if (dstInfo.depth == 32 && !isARGB) |
822 | d.fillMask = 0xFF000000u; |
823 | |
824 | for (uint32_t i = 0; i < 4; i++) { |
825 | uint32_t size = srcInfo.sizes[i]; |
826 | uint32_t shift = srcInfo.shifts[i]; |
827 | |
828 | d.masks[i] = 0; |
829 | d.shifts[i] = uint8_t(shift); |
830 | d.scale[i] = 0; |
831 | |
832 | if (size == 0) |
833 | continue; |
834 | |
835 | // Discard all bits that are below 8 most significant ones. |
836 | if (size > 8) { |
837 | shift += (size - 8); |
838 | size = 8; |
839 | } |
840 | |
841 | d.masks[i] = blTrailingBitMask<uint32_t>(size); |
842 | d.shifts[i] = uint8_t(shift); |
843 | |
844 | // Calculate a scale constant that will be used to expand bits in case |
845 | // that the source contains less than 8 bits. We do it by adding `size` |
846 | // to the `scaledSize` until we reach the required bit-depth. |
847 | uint32_t scale = 0x1; |
848 | uint32_t scaledSize = size; |
849 | |
850 | while (scaledSize < 8) { |
851 | scale = (scale << size) | 1; |
852 | scaledSize += size; |
853 | } |
854 | |
855 | // Shift scale in a way that it contains MSB of the mask and the right position. |
856 | uint32_t scaledShift = blPixelConverterNative32FromExternalShiftTable[i] - (scaledSize - 8); |
857 | scale <<= scaledShift; |
858 | d.scale[i] = scale; |
859 | } |
860 | |
861 | // Prefer SIMD optimized converters if possible. |
862 | #ifdef BL_BUILD_OPT_AVX2 |
863 | if (blRuntimeHasAVX2(&blRuntimeContext) && blPixelConverterInitNativeFromXRGB_AVX2(self, dstFormat, srcInfo)) |
864 | return BL_SUCCESS; |
865 | #endif |
866 | |
867 | #ifdef BL_BUILD_OPT_SSSE3 |
868 | if (blRuntimeHasSSSE3(&blRuntimeContext) && blPixelConverterInitNativeFromXRGB_SSSE3(self, dstFormat, srcInfo)) |
869 | return BL_SUCCESS; |
870 | #endif |
871 | |
872 | #ifdef BL_BUILD_OPT_SSE2 |
873 | if (blRuntimeHasSSE2(&blRuntimeContext) && blPixelConverterInitNativeFromXRGB_SSE2(self, dstFormat, srcInfo)) |
874 | return BL_SUCCESS; |
875 | #endif |
876 | |
877 | // Special case of converting LUM to RGB. |
878 | if (srcInfo.flags & BL_FORMAT_FLAG_LUM) { |
879 | // TODO: |
880 | } |
881 | |
882 | // Generic conversion. |
883 | switch (srcInfo.depth) { |
884 | case 16: |
885 | if (isPRGB) |
886 | func = hostBO ? bl_convert_prgb32_from_prgb_any<BLPixelAccess16<BL_BYTE_ORDER_NATIVE>, BL_UNALIGNED_IO_16> |
887 | : bl_convert_prgb32_from_prgb_any<BLPixelAccess16<BL_BYTE_ORDER_SWAPPED>, BL_UNALIGNED_IO_16>; |
888 | else if (isARGB) |
889 | func = hostBO ? bl_convert_prgb32_from_argb_any<BLPixelAccess16<BL_BYTE_ORDER_NATIVE>, BL_UNALIGNED_IO_16> |
890 | : bl_convert_prgb32_from_argb_any<BLPixelAccess16<BL_BYTE_ORDER_SWAPPED>, BL_UNALIGNED_IO_16>; |
891 | else |
892 | func = hostBO ? bl_convert_xrgb32_from_xrgb_any<BLPixelAccess16<BL_BYTE_ORDER_NATIVE>, BL_UNALIGNED_IO_16> |
893 | : bl_convert_xrgb32_from_xrgb_any<BLPixelAccess16<BL_BYTE_ORDER_SWAPPED>, BL_UNALIGNED_IO_16>; |
894 | break; |
895 | |
896 | case 24: |
897 | if (isPRGB) |
898 | func = hostBO ? bl_convert_prgb32_from_prgb_any<BLPixelAccess24<BL_BYTE_ORDER_NATIVE>, true> |
899 | : bl_convert_prgb32_from_prgb_any<BLPixelAccess24<BL_BYTE_ORDER_SWAPPED>, true>; |
900 | else if (isARGB) |
901 | func = hostBO ? bl_convert_prgb32_from_argb_any<BLPixelAccess24<BL_BYTE_ORDER_NATIVE>, true> |
902 | : bl_convert_prgb32_from_argb_any<BLPixelAccess24<BL_BYTE_ORDER_SWAPPED>, true>; |
903 | else |
904 | func = hostBO ? bl_convert_xrgb32_from_xrgb_any<BLPixelAccess24<BL_BYTE_ORDER_NATIVE>, true> |
905 | : bl_convert_xrgb32_from_xrgb_any<BLPixelAccess24<BL_BYTE_ORDER_SWAPPED>, true>; |
906 | break; |
907 | |
908 | case 32: |
909 | if (isPRGB) |
910 | func = hostBO ? bl_convert_prgb32_from_prgb_any<BLPixelAccess32<BL_BYTE_ORDER_NATIVE>, BL_UNALIGNED_IO_32> |
911 | : bl_convert_prgb32_from_prgb_any<BLPixelAccess32<BL_BYTE_ORDER_SWAPPED>, BL_UNALIGNED_IO_32>; |
912 | else if (isARGB) |
913 | func = hostBO ? bl_convert_prgb32_from_argb_any<BLPixelAccess32<BL_BYTE_ORDER_NATIVE>, BL_UNALIGNED_IO_32> |
914 | : bl_convert_prgb32_from_argb_any<BLPixelAccess32<BL_BYTE_ORDER_SWAPPED>, BL_UNALIGNED_IO_32>; |
915 | else |
916 | func = hostBO ? bl_convert_xrgb32_from_xrgb_any<BLPixelAccess32<BL_BYTE_ORDER_NATIVE>, BL_UNALIGNED_IO_32> |
917 | : bl_convert_xrgb32_from_xrgb_any<BLPixelAccess32<BL_BYTE_ORDER_SWAPPED>, BL_UNALIGNED_IO_32>; |
918 | break; |
919 | |
920 | default: |
921 | return blTraceError(BL_ERROR_INVALID_VALUE); |
922 | } |
923 | |
924 | self->convertFunc = func; |
925 | return BL_SUCCESS; |
926 | } |
927 | } |
928 | |
929 | // -------------------------------------------------------------------------- |
930 | // [External <- Native] |
931 | // -------------------------------------------------------------------------- |
932 | |
933 | if (srcFormat != BL_PIXEL_CONVERTER_FORMAT_NONE) { |
934 | if (dstInfo.flags & BL_FORMAT_FLAG_INDEXED) { |
935 | // TODO: |
936 | return blTraceError(BL_ERROR_NOT_IMPLEMENTED); |
937 | } |
938 | else { |
939 | BLPixelConverterData::ExternalFromNative& d = blPixelConverterGetData(self)->externalFromNative; |
940 | |
941 | bool isARGB = (dstInfo.flags & BL_FORMAT_FLAG_ALPHA) != 0; |
942 | bool isPRGB = (dstInfo.flags & BL_FORMAT_FLAG_PREMULTIPLIED) != 0; |
943 | bool isGray = (dstInfo.flags & BL_FORMAT_FLAG_LUM) != 0; |
944 | bool hostBO = (dstInfo.flags & BL_FORMAT_FLAG_BYTE_SWAP) == 0; |
945 | |
946 | for (uint32_t i = 0; i < 4; i++) { |
947 | uint32_t mask = 0; |
948 | uint32_t size = dstInfo.sizes[i]; |
949 | uint32_t shift = dstInfo.shifts[i]; |
950 | |
951 | if (size != 0) { |
952 | mask = blTrailingBitMask<uint32_t>(size) << shift; |
953 | shift = 32 - size - shift; |
954 | } |
955 | |
956 | d.masks[i] = mask; |
957 | d.shifts[i] = uint8_t(shift); |
958 | } |
959 | |
960 | switch (dstInfo.depth) { |
961 | case 16: |
962 | if (isPRGB) |
963 | func = hostBO ? bl_convert_prgb_any_from_prgb32<BLPixelAccess16<BL_BYTE_ORDER_NATIVE>, BL_UNALIGNED_IO_16> |
964 | : bl_convert_prgb_any_from_prgb32<BLPixelAccess16<BL_BYTE_ORDER_SWAPPED>, BL_UNALIGNED_IO_16>; |
965 | else if (isARGB) |
966 | func = hostBO ? bl_convert_argb_any_from_prgb32<BLPixelAccess16<BL_BYTE_ORDER_NATIVE>, BL_UNALIGNED_IO_16> |
967 | : bl_convert_argb_any_from_prgb32<BLPixelAccess16<BL_BYTE_ORDER_SWAPPED>, BL_UNALIGNED_IO_16>; |
968 | else |
969 | func = hostBO ? bl_convert_xrgb_any_from_xrgb32<BLPixelAccess16<BL_BYTE_ORDER_NATIVE>, BL_UNALIGNED_IO_16> |
970 | : bl_convert_xrgb_any_from_xrgb32<BLPixelAccess16<BL_BYTE_ORDER_SWAPPED>, BL_UNALIGNED_IO_16>; |
971 | break; |
972 | |
973 | case 24: |
974 | if (isPRGB) |
975 | func = hostBO ? bl_convert_prgb_any_from_prgb32<BLPixelAccess24<BL_BYTE_ORDER_NATIVE>, true> |
976 | : bl_convert_prgb_any_from_prgb32<BLPixelAccess24<BL_BYTE_ORDER_SWAPPED>, true>; |
977 | else if (isARGB) |
978 | func = hostBO ? bl_convert_argb_any_from_prgb32<BLPixelAccess24<BL_BYTE_ORDER_NATIVE>, true> |
979 | : bl_convert_argb_any_from_prgb32<BLPixelAccess24<BL_BYTE_ORDER_SWAPPED>, true>; |
980 | else |
981 | func = hostBO ? bl_convert_xrgb_any_from_xrgb32<BLPixelAccess24<BL_BYTE_ORDER_NATIVE>, true> |
982 | : bl_convert_xrgb_any_from_xrgb32<BLPixelAccess24<BL_BYTE_ORDER_SWAPPED>, true>; |
983 | break; |
984 | |
985 | case 32: |
986 | if (isPRGB) |
987 | func = hostBO ? bl_convert_prgb_any_from_prgb32<BLPixelAccess32<BL_BYTE_ORDER_NATIVE>, BL_UNALIGNED_IO_32> |
988 | : bl_convert_prgb_any_from_prgb32<BLPixelAccess32<BL_BYTE_ORDER_SWAPPED>, BL_UNALIGNED_IO_32>; |
989 | else if (isARGB) |
990 | func = hostBO ? bl_convert_argb_any_from_prgb32<BLPixelAccess32<BL_BYTE_ORDER_NATIVE>, BL_UNALIGNED_IO_32> |
991 | : bl_convert_argb_any_from_prgb32<BLPixelAccess32<BL_BYTE_ORDER_SWAPPED>, BL_UNALIGNED_IO_32>; |
992 | else |
993 | func = hostBO ? bl_convert_xrgb_any_from_xrgb32<BLPixelAccess32<BL_BYTE_ORDER_NATIVE>, BL_UNALIGNED_IO_32> |
994 | : bl_convert_xrgb_any_from_xrgb32<BLPixelAccess32<BL_BYTE_ORDER_SWAPPED>, BL_UNALIGNED_IO_32>; |
995 | break; |
996 | |
997 | default: |
998 | return blTraceError(BL_ERROR_INVALID_VALUE); |
999 | } |
1000 | |
1001 | self->convertFunc = func; |
1002 | return BL_SUCCESS; |
1003 | } |
1004 | } |
1005 | |
1006 | // -------------------------------------------------------------------------- |
1007 | // [External <- External] |
1008 | // -------------------------------------------------------------------------- |
1009 | |
1010 | // We have non-native pixel formats on input and output. This means that we |
1011 | // will create two converters and convert through a native pixel format as |
1012 | // otherwise there would be a lot of combinations that we would have to handle. |
1013 | |
1014 | // TODO: |
1015 | |
1016 | // -------------------------------------------------------------------------- |
1017 | // [Invalid] |
1018 | // -------------------------------------------------------------------------- |
1019 | |
1020 | return blTraceError(BL_ERROR_INVALID_VALUE); |
1021 | } |
1022 | |
1023 | // ============================================================================ |
1024 | // [BLPixelConverter - Init / Reset] |
1025 | // ============================================================================ |
1026 | |
1027 | BLResult blPixelConverterInit(BLPixelConverterCore* self) noexcept { |
1028 | memset(self, 0, sizeof(BLPixelConverterCore)); |
1029 | return BL_SUCCESS; |
1030 | } |
1031 | |
1032 | BLResult blPixelConverterInitWeak(BLPixelConverterCore* self, const BLPixelConverterCore* other) noexcept { |
1033 | memcpy(self, other, sizeof(BLPixelConverterCore)); |
1034 | return BL_SUCCESS; |
1035 | } |
1036 | |
1037 | BLResult blPixelConverterReset(BLPixelConverterCore* self) noexcept { |
1038 | memset(self, 0, sizeof(BLPixelConverterCore)); |
1039 | return BL_SUCCESS; |
1040 | } |
1041 | |
1042 | // ============================================================================ |
1043 | // [BLPixelConverter - Assign] |
1044 | // ============================================================================ |
1045 | |
1046 | BLResult blPixelConverterAssign(BLPixelConverterCore* self, const BLPixelConverterCore* other) noexcept { |
1047 | memcpy(self, other, sizeof(BLPixelConverterCore)); |
1048 | return BL_SUCCESS; |
1049 | } |
1050 | |
1051 | // ============================================================================ |
1052 | // [BLPixelConverter - Create] |
1053 | // ============================================================================ |
1054 | |
1055 | BLResult blPixelConverterCreate(BLPixelConverterCore* self, const BLFormatInfo* dstInfo, const BLFormatInfo* srcInfo) noexcept { |
1056 | BLFormatInfo dstSanitized = *dstInfo; |
1057 | BLFormatInfo srcSanitized = *srcInfo; |
1058 | |
1059 | BL_PROPAGATE(dstSanitized.sanitize()); |
1060 | BL_PROPAGATE(srcSanitized.sanitize()); |
1061 | |
1062 | // Always create a new one and then swap it if the initialization succeeded. |
1063 | BLPixelConverterCore pc {}; |
1064 | BL_PROPAGATE(blPixelConverterInitInternal(&pc, dstSanitized, srcSanitized)); |
1065 | |
1066 | blPixelConverterReset(self); |
1067 | memcpy(self, &pc, sizeof(BLPixelConverterCore)); |
1068 | return BL_SUCCESS; |
1069 | } |
1070 | |
1071 | // ============================================================================ |
1072 | // [BLPixelConverter - Convert] |
1073 | // ============================================================================ |
1074 | |
1075 | BLResult blPixelConverterConvert(const BLPixelConverterCore* self, |
1076 | void* dstData, intptr_t dstStride, |
1077 | const void* srcData, intptr_t srcStride, |
1078 | uint32_t w, uint32_t h, const BLPixelConverterOptions* options) noexcept { |
1079 | |
1080 | return self->convertFunc(self, |
1081 | static_cast< uint8_t*>(dstData), dstStride, |
1082 | static_cast<const uint8_t*>(srcData), srcStride, w, h, options); |
1083 | } |
1084 | |
1085 | // ============================================================================ |
1086 | // [BLPixelConverter - Unit Tests] |
1087 | // ============================================================================ |
1088 | |
1089 | #ifdef BL_TEST |
1090 | template<typename T> |
1091 | struct BLPixelConverterUnit { |
1092 | static void fillMasks(BLFormatInfo& fi) noexcept { |
1093 | fi.shifts[0] = uint8_t(T::kR ? blBitCtz(T::kR) : uint32_t(0)); |
1094 | fi.shifts[1] = uint8_t(T::kG ? blBitCtz(T::kG) : uint32_t(0)); |
1095 | fi.shifts[2] = uint8_t(T::kB ? blBitCtz(T::kB) : uint32_t(0)); |
1096 | fi.shifts[3] = uint8_t(T::kA ? blBitCtz(T::kA) : uint32_t(0)); |
1097 | fi.sizes[0] = uint8_t(T::kR ? blBitCtz(~(T::kR >> fi.shifts[0])) : uint32_t(0)); |
1098 | fi.sizes[1] = uint8_t(T::kG ? blBitCtz(~(T::kG >> fi.shifts[1])) : uint32_t(0)); |
1099 | fi.sizes[2] = uint8_t(T::kB ? blBitCtz(~(T::kB >> fi.shifts[2])) : uint32_t(0)); |
1100 | fi.sizes[3] = uint8_t(T::kA ? blBitCtz(~(T::kA >> fi.shifts[3])) : uint32_t(0)); |
1101 | } |
1102 | |
1103 | static void testPrgb32() noexcept { |
1104 | INFO("Testing %dbpp %s format" , T::kDepth, T::formatString()); |
1105 | |
1106 | BLPixelConverter from; |
1107 | BLPixelConverter back; |
1108 | |
1109 | BLFormatInfo fi {}; |
1110 | fillMasks(fi); |
1111 | fi.depth = T::kDepth; |
1112 | fi.flags = fi.sizes[3] ? BL_FORMAT_FLAG_RGBA | BL_FORMAT_FLAG_PREMULTIPLIED : BL_FORMAT_FLAG_RGB; |
1113 | |
1114 | EXPECT(from.create(fi, blFormatInfo[BL_FORMAT_PRGB32]) == BL_SUCCESS, "%s: Failed to create from [%dbpp 0x%08X 0x%08X 0x%08X 0x%08X]" , T::formatString(), T::kDepth, T::kR, T::kG, T::kB, T::kA); |
1115 | EXPECT(back.create(blFormatInfo[BL_FORMAT_PRGB32], fi) == BL_SUCCESS, "%s: Failed to create to [%dbpp 0x%08X 0x%08X 0x%08X 0x%08X]" , T::formatString(), T::kDepth, T::kR, T::kG, T::kB, T::kA); |
1116 | |
1117 | enum : uint32_t { kCount = 8 }; |
1118 | |
1119 | static const uint32_t src[kCount] = { |
1120 | 0xFF000000, 0xFF0000FF, 0xFF00FF00, 0xFF00FFFF, |
1121 | 0xFFFF0000, 0xFFFF00FF, 0xFFFFFF00, 0xFFFFFFFF |
1122 | }; |
1123 | |
1124 | uint32_t dst[kCount]; |
1125 | uint8_t buf[kCount * 16]; |
1126 | |
1127 | // The test is rather basic now, we basically convert from PRGB to external |
1128 | // pixel format, then back, and then compare if the output is matching input. |
1129 | // In the future we should also check the intermediate result. |
1130 | from.convertSpan(buf, src, kCount); |
1131 | back.convertSpan(dst, buf, kCount); |
1132 | |
1133 | for (uint32_t i = 0; i < kCount; i++) { |
1134 | uint32_t mid = 0; |
1135 | switch (uint32_t(T::kDepth)) { |
1136 | case 8 : mid = blMemReadU8(buf + i); break; |
1137 | case 16: mid = blMemReadU16u(buf + i * 2u); break; |
1138 | case 24: mid = blMemReadU24u(buf + i * 3u); break; |
1139 | case 32: mid = blMemReadU32u(buf + i * 4u); break; |
1140 | } |
1141 | |
1142 | EXPECT(dst[i] == src[i], |
1143 | "%s: Dst(%08X) <- 0x%08X <- Src(0x%08X) [%dbpp %08X|%08X|%08X|%08X]" , |
1144 | T::formatString(), dst[i], mid, src[i], T::kDepth, T::kA, T::kR, T::kG, T::kB); |
1145 | } |
1146 | } |
1147 | |
1148 | static void test() noexcept { |
1149 | testPrgb32(); |
1150 | } |
1151 | }; |
1152 | |
1153 | #define BL_PIXEL_TEST(FORMAT, DEPTH, R_MASK, G_MASK, B_MASK, A_MASK) \ |
1154 | struct Test_##FORMAT { \ |
1155 | static inline const char* formatString() noexcept { return #FORMAT; } \ |
1156 | \ |
1157 | enum : uint32_t { \ |
1158 | kDepth = DEPTH, \ |
1159 | kR = R_MASK, \ |
1160 | kG = G_MASK, \ |
1161 | kB = B_MASK, \ |
1162 | kA = A_MASK \ |
1163 | }; \ |
1164 | } |
1165 | |
1166 | BL_PIXEL_TEST(XRGB_0555, 16, 0x00007C00u, 0x000003E0u, 0x0000001Fu, 0x00000000u); |
1167 | BL_PIXEL_TEST(XBGR_0555, 16, 0x0000001Fu, 0x000003E0u, 0x00007C00u, 0x00000000u); |
1168 | BL_PIXEL_TEST(XRGB_0565, 16, 0x0000F800u, 0x000007E0u, 0x0000001Fu, 0x00000000u); |
1169 | BL_PIXEL_TEST(XBGR_0565, 16, 0x0000001Fu, 0x000007E0u, 0x0000F800u, 0x00000000u); |
1170 | BL_PIXEL_TEST(ARGB_4444, 16, 0x00000F00u, 0x000000F0u, 0x0000000Fu, 0x0000F000u); |
1171 | BL_PIXEL_TEST(ABGR_4444, 16, 0x0000000Fu, 0x000000F0u, 0x00000F00u, 0x0000F000u); |
1172 | BL_PIXEL_TEST(RGBA_4444, 16, 0x0000F000u, 0x00000F00u, 0x000000F0u, 0x0000000Fu); |
1173 | BL_PIXEL_TEST(BGRA_4444, 16, 0x000000F0u, 0x00000F00u, 0x0000F000u, 0x0000000Fu); |
1174 | BL_PIXEL_TEST(XRGB_0888, 24, 0x00FF0000u, 0x0000FF00u, 0x000000FFu, 0x00000000u); |
1175 | BL_PIXEL_TEST(XBGR_0888, 24, 0x000000FFu, 0x0000FF00u, 0x00FF0000u, 0x00000000u); |
1176 | BL_PIXEL_TEST(XRGB_8888, 32, 0x00FF0000u, 0x0000FF00u, 0x000000FFu, 0x00000000u); |
1177 | BL_PIXEL_TEST(XBGR_8888, 32, 0x000000FFu, 0x0000FF00u, 0x00FF0000u, 0x00000000u); |
1178 | BL_PIXEL_TEST(RGBX_8888, 32, 0xFF000000u, 0x00FF0000u, 0x0000FF00u, 0x00000000u); |
1179 | BL_PIXEL_TEST(BGRX_8888, 32, 0x0000FF00u, 0x00FF0000u, 0xFF000000u, 0x00000000u); |
1180 | BL_PIXEL_TEST(ARGB_8888, 32, 0x00FF0000u, 0x0000FF00u, 0x000000FFu, 0xFF000000u); |
1181 | BL_PIXEL_TEST(ABGR_8888, 32, 0x000000FFu, 0x0000FF00u, 0x00FF0000u, 0xFF000000u); |
1182 | BL_PIXEL_TEST(RGBA_8888, 32, 0xFF000000u, 0x00FF0000u, 0x0000FF00u, 0x000000FFu); |
1183 | BL_PIXEL_TEST(BGRA_8888, 32, 0x0000FF00u, 0x00FF0000u, 0xFF000000u, 0x000000FFu); |
1184 | BL_PIXEL_TEST(BRGA_8888, 32, 0x00FF0000u, 0x0000FF00u, 0xFF000000u, 0x000000FFu); |
1185 | |
1186 | #undef BL_PIXEL_TEST |
1187 | |
1188 | UNIT(blend2d_pixel_converter) { |
1189 | BLPixelConverterUnit<Test_XRGB_0555>::test(); |
1190 | BLPixelConverterUnit<Test_XBGR_0555>::test(); |
1191 | BLPixelConverterUnit<Test_XRGB_0565>::test(); |
1192 | BLPixelConverterUnit<Test_XBGR_0565>::test(); |
1193 | BLPixelConverterUnit<Test_ARGB_4444>::test(); |
1194 | BLPixelConverterUnit<Test_ABGR_4444>::test(); |
1195 | BLPixelConverterUnit<Test_RGBA_4444>::test(); |
1196 | BLPixelConverterUnit<Test_BGRA_4444>::test(); |
1197 | BLPixelConverterUnit<Test_XRGB_0888>::test(); |
1198 | BLPixelConverterUnit<Test_XBGR_0888>::test(); |
1199 | BLPixelConverterUnit<Test_XRGB_8888>::test(); |
1200 | BLPixelConverterUnit<Test_XBGR_8888>::test(); |
1201 | BLPixelConverterUnit<Test_RGBX_8888>::test(); |
1202 | BLPixelConverterUnit<Test_BGRX_8888>::test(); |
1203 | BLPixelConverterUnit<Test_ARGB_8888>::test(); |
1204 | BLPixelConverterUnit<Test_ABGR_8888>::test(); |
1205 | BLPixelConverterUnit<Test_RGBA_8888>::test(); |
1206 | BLPixelConverterUnit<Test_BGRA_8888>::test(); |
1207 | BLPixelConverterUnit<Test_BRGA_8888>::test(); |
1208 | } |
1209 | #endif |
1210 | |