1 | // Copyright 2016 The SwiftShader Authors. All Rights Reserved. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | #include "Blitter.hpp" |
16 | |
17 | #include "Shader/ShaderCore.hpp" |
18 | #include "Reactor/Reactor.hpp" |
19 | #include "Common/Memory.hpp" |
20 | #include "Common/Debug.hpp" |
21 | |
22 | namespace sw |
23 | { |
24 | using namespace rr; |
25 | |
26 | Blitter::Blitter() |
27 | { |
28 | blitCache = new RoutineCache<State>(1024); |
29 | } |
30 | |
31 | Blitter::~Blitter() |
32 | { |
33 | delete blitCache; |
34 | } |
35 | |
36 | void Blitter::clear(void *pixel, sw::Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask) |
37 | { |
38 | if(fastClear(pixel, format, dest, dRect, rgbaMask)) |
39 | { |
40 | return; |
41 | } |
42 | |
43 | sw::Surface *color = sw::Surface::create(1, 1, 1, format, pixel, sw::Surface::bytes(format), sw::Surface::bytes(format)); |
44 | SliceRectF sRect(0.5f, 0.5f, 0.5f, 0.5f, 0); // Sample from the middle. |
45 | blit(color, sRect, dest, dRect, {rgbaMask}); |
46 | delete color; |
47 | } |
48 | |
49 | bool Blitter::fastClear(void *pixel, sw::Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask) |
50 | { |
51 | if(format != FORMAT_A32B32G32R32F) |
52 | { |
53 | return false; |
54 | } |
55 | |
56 | float *color = (float*)pixel; |
57 | float r = color[0]; |
58 | float g = color[1]; |
59 | float b = color[2]; |
60 | float a = color[3]; |
61 | |
62 | uint32_t packed; |
63 | |
64 | switch(dest->getFormat()) |
65 | { |
66 | case FORMAT_R5G6B5: |
67 | if((rgbaMask & 0x7) != 0x7) return false; |
68 | packed = ((uint16_t)(31 * b + 0.5f) << 0) | |
69 | ((uint16_t)(63 * g + 0.5f) << 5) | |
70 | ((uint16_t)(31 * r + 0.5f) << 11); |
71 | break; |
72 | case FORMAT_X8B8G8R8: |
73 | if((rgbaMask & 0x7) != 0x7) return false; |
74 | packed = ((uint32_t)(255) << 24) | |
75 | ((uint32_t)(255 * b + 0.5f) << 16) | |
76 | ((uint32_t)(255 * g + 0.5f) << 8) | |
77 | ((uint32_t)(255 * r + 0.5f) << 0); |
78 | break; |
79 | case FORMAT_A8B8G8R8: |
80 | if((rgbaMask & 0xF) != 0xF) return false; |
81 | packed = ((uint32_t)(255 * a + 0.5f) << 24) | |
82 | ((uint32_t)(255 * b + 0.5f) << 16) | |
83 | ((uint32_t)(255 * g + 0.5f) << 8) | |
84 | ((uint32_t)(255 * r + 0.5f) << 0); |
85 | break; |
86 | case FORMAT_X8R8G8B8: |
87 | if((rgbaMask & 0x7) != 0x7) return false; |
88 | packed = ((uint32_t)(255) << 24) | |
89 | ((uint32_t)(255 * r + 0.5f) << 16) | |
90 | ((uint32_t)(255 * g + 0.5f) << 8) | |
91 | ((uint32_t)(255 * b + 0.5f) << 0); |
92 | break; |
93 | case FORMAT_A8R8G8B8: |
94 | if((rgbaMask & 0xF) != 0xF) return false; |
95 | packed = ((uint32_t)(255 * a + 0.5f) << 24) | |
96 | ((uint32_t)(255 * r + 0.5f) << 16) | |
97 | ((uint32_t)(255 * g + 0.5f) << 8) | |
98 | ((uint32_t)(255 * b + 0.5f) << 0); |
99 | break; |
100 | default: |
101 | return false; |
102 | } |
103 | |
104 | bool useDestInternal = !dest->isExternalDirty(); |
105 | uint8_t *slice = (uint8_t*)dest->lock(dRect.x0, dRect.y0, dRect.slice, sw::LOCK_WRITEONLY, sw::PUBLIC, useDestInternal); |
106 | |
107 | for(int j = 0; j < dest->getSamples(); j++) |
108 | { |
109 | uint8_t *d = slice; |
110 | |
111 | switch(Surface::bytes(dest->getFormat())) |
112 | { |
113 | case 2: |
114 | for(int i = dRect.y0; i < dRect.y1; i++) |
115 | { |
116 | sw::clear((uint16_t*)d, packed, dRect.x1 - dRect.x0); |
117 | d += dest->getPitchB(useDestInternal); |
118 | } |
119 | break; |
120 | case 4: |
121 | for(int i = dRect.y0; i < dRect.y1; i++) |
122 | { |
123 | sw::clear((uint32_t*)d, packed, dRect.x1 - dRect.x0); |
124 | d += dest->getPitchB(useDestInternal); |
125 | } |
126 | break; |
127 | default: |
128 | assert(false); |
129 | } |
130 | |
131 | slice += dest->getSliceB(useDestInternal); |
132 | } |
133 | |
134 | dest->unlock(useDestInternal); |
135 | |
136 | return true; |
137 | } |
138 | |
139 | void Blitter::blit(Surface *source, const SliceRectF &sourceRect, Surface *dest, const SliceRect &destRect, const Blitter::Options& options) |
140 | { |
141 | if(dest->getInternalFormat() == FORMAT_NULL) |
142 | { |
143 | return; |
144 | } |
145 | |
146 | if(blitReactor(source, sourceRect, dest, destRect, options)) |
147 | { |
148 | return; |
149 | } |
150 | |
151 | SliceRectF sRect = sourceRect; |
152 | SliceRect dRect = destRect; |
153 | |
154 | bool flipX = destRect.x0 > destRect.x1; |
155 | bool flipY = destRect.y0 > destRect.y1; |
156 | |
157 | if(flipX) |
158 | { |
159 | swap(dRect.x0, dRect.x1); |
160 | swap(sRect.x0, sRect.x1); |
161 | } |
162 | if(flipY) |
163 | { |
164 | swap(dRect.y0, dRect.y1); |
165 | swap(sRect.y0, sRect.y1); |
166 | } |
167 | |
168 | source->lockInternal(0, 0, sRect.slice, sw::LOCK_READONLY, sw::PUBLIC); |
169 | dest->lockInternal(0, 0, dRect.slice, sw::LOCK_WRITEONLY, sw::PUBLIC); |
170 | |
171 | float w = sRect.width() / dRect.width(); |
172 | float h = sRect.height() / dRect.height(); |
173 | |
174 | float xStart = sRect.x0 + (0.5f - dRect.x0) * w; |
175 | float yStart = sRect.y0 + (0.5f - dRect.y0) * h; |
176 | |
177 | for(int j = dRect.y0; j < dRect.y1; j++) |
178 | { |
179 | float y = yStart + j * h; |
180 | |
181 | for(int i = dRect.x0; i < dRect.x1; i++) |
182 | { |
183 | float x = xStart + i * w; |
184 | |
185 | // FIXME: Support RGBA mask |
186 | dest->copyInternal(source, i, j, x, y, options.filter); |
187 | } |
188 | } |
189 | |
190 | source->unlockInternal(); |
191 | dest->unlockInternal(); |
192 | } |
193 | |
194 | void Blitter::blit3D(Surface *source, Surface *dest) |
195 | { |
196 | source->lockInternal(0, 0, 0, sw::LOCK_READONLY, sw::PUBLIC); |
197 | dest->lockInternal(0, 0, 0, sw::LOCK_WRITEONLY, sw::PUBLIC); |
198 | |
199 | float w = static_cast<float>(source->getWidth()) / static_cast<float>(dest->getWidth()); |
200 | float h = static_cast<float>(source->getHeight()) / static_cast<float>(dest->getHeight()); |
201 | float d = static_cast<float>(source->getDepth()) / static_cast<float>(dest->getDepth()); |
202 | |
203 | for(int k = 0; k < dest->getDepth(); k++) |
204 | { |
205 | float z = (k + 0.5f) * d; |
206 | |
207 | for(int j = 0; j < dest->getHeight(); j++) |
208 | { |
209 | float y = (j + 0.5f) * h; |
210 | |
211 | for(int i = 0; i < dest->getWidth(); i++) |
212 | { |
213 | float x = (i + 0.5f) * w; |
214 | |
215 | dest->copyInternal(source, i, j, k, x, y, z, true); |
216 | } |
217 | } |
218 | } |
219 | |
220 | source->unlockInternal(); |
221 | dest->unlockInternal(); |
222 | } |
223 | |
224 | bool Blitter::read(Float4 &c, Pointer<Byte> element, const State &state) |
225 | { |
226 | c = Float4(0.0f, 0.0f, 0.0f, 1.0f); |
227 | |
228 | switch(state.sourceFormat) |
229 | { |
230 | case FORMAT_L8: |
231 | c.xyz = Float(Int(*Pointer<Byte>(element))); |
232 | c.w = float(0xFF); |
233 | break; |
234 | case FORMAT_A8: |
235 | c.w = Float(Int(*Pointer<Byte>(element))); |
236 | break; |
237 | case FORMAT_R8I: |
238 | case FORMAT_R8_SNORM: |
239 | c.x = Float(Int(*Pointer<SByte>(element))); |
240 | c.w = float(0x7F); |
241 | break; |
242 | case FORMAT_R8: |
243 | case FORMAT_R8UI: |
244 | c.x = Float(Int(*Pointer<Byte>(element))); |
245 | c.w = float(0xFF); |
246 | break; |
247 | case FORMAT_R16I: |
248 | c.x = Float(Int(*Pointer<Short>(element))); |
249 | c.w = float(0x7FFF); |
250 | break; |
251 | case FORMAT_R16UI: |
252 | c.x = Float(Int(*Pointer<UShort>(element))); |
253 | c.w = float(0xFFFF); |
254 | break; |
255 | case FORMAT_R32I: |
256 | c.x = Float(*Pointer<Int>(element)); |
257 | c.w = float(0x7FFFFFFF); |
258 | break; |
259 | case FORMAT_R32UI: |
260 | c.x = Float(*Pointer<UInt>(element)); |
261 | c.w = float(0xFFFFFFFF); |
262 | break; |
263 | case FORMAT_A8R8G8B8: |
264 | c = Float4(*Pointer<Byte4>(element)).zyxw; |
265 | break; |
266 | case FORMAT_A8B8G8R8I: |
267 | case FORMAT_A8B8G8R8_SNORM: |
268 | c = Float4(*Pointer<SByte4>(element)); |
269 | break; |
270 | case FORMAT_A8B8G8R8: |
271 | case FORMAT_A8B8G8R8UI: |
272 | case FORMAT_SRGB8_A8: |
273 | c = Float4(*Pointer<Byte4>(element)); |
274 | break; |
275 | case FORMAT_X8R8G8B8: |
276 | c = Float4(*Pointer<Byte4>(element)).zyxw; |
277 | c.w = float(0xFF); |
278 | break; |
279 | case FORMAT_R8G8B8: |
280 | c.z = Float(Int(*Pointer<Byte>(element + 0))); |
281 | c.y = Float(Int(*Pointer<Byte>(element + 1))); |
282 | c.x = Float(Int(*Pointer<Byte>(element + 2))); |
283 | c.w = float(0xFF); |
284 | break; |
285 | case FORMAT_B8G8R8: |
286 | c.x = Float(Int(*Pointer<Byte>(element + 0))); |
287 | c.y = Float(Int(*Pointer<Byte>(element + 1))); |
288 | c.z = Float(Int(*Pointer<Byte>(element + 2))); |
289 | c.w = float(0xFF); |
290 | break; |
291 | case FORMAT_X8B8G8R8I: |
292 | case FORMAT_X8B8G8R8_SNORM: |
293 | c = Float4(*Pointer<SByte4>(element)); |
294 | c.w = float(0x7F); |
295 | break; |
296 | case FORMAT_X8B8G8R8: |
297 | case FORMAT_X8B8G8R8UI: |
298 | case FORMAT_SRGB8_X8: |
299 | c = Float4(*Pointer<Byte4>(element)); |
300 | c.w = float(0xFF); |
301 | break; |
302 | case FORMAT_A16B16G16R16I: |
303 | c = Float4(*Pointer<Short4>(element)); |
304 | break; |
305 | case FORMAT_A16B16G16R16: |
306 | case FORMAT_A16B16G16R16UI: |
307 | c = Float4(*Pointer<UShort4>(element)); |
308 | break; |
309 | case FORMAT_X16B16G16R16I: |
310 | c = Float4(*Pointer<Short4>(element)); |
311 | c.w = float(0x7FFF); |
312 | break; |
313 | case FORMAT_X16B16G16R16UI: |
314 | c = Float4(*Pointer<UShort4>(element)); |
315 | c.w = float(0xFFFF); |
316 | break; |
317 | case FORMAT_A32B32G32R32I: |
318 | c = Float4(*Pointer<Int4>(element)); |
319 | break; |
320 | case FORMAT_A32B32G32R32UI: |
321 | c = Float4(*Pointer<UInt4>(element)); |
322 | break; |
323 | case FORMAT_X32B32G32R32I: |
324 | c = Float4(*Pointer<Int4>(element)); |
325 | c.w = float(0x7FFFFFFF); |
326 | break; |
327 | case FORMAT_X32B32G32R32UI: |
328 | c = Float4(*Pointer<UInt4>(element)); |
329 | c.w = float(0xFFFFFFFF); |
330 | break; |
331 | case FORMAT_G8R8I: |
332 | case FORMAT_G8R8_SNORM: |
333 | c.x = Float(Int(*Pointer<SByte>(element + 0))); |
334 | c.y = Float(Int(*Pointer<SByte>(element + 1))); |
335 | c.w = float(0x7F); |
336 | break; |
337 | case FORMAT_G8R8: |
338 | case FORMAT_G8R8UI: |
339 | c.x = Float(Int(*Pointer<Byte>(element + 0))); |
340 | c.y = Float(Int(*Pointer<Byte>(element + 1))); |
341 | c.w = float(0xFF); |
342 | break; |
343 | case FORMAT_G16R16I: |
344 | c.x = Float(Int(*Pointer<Short>(element + 0))); |
345 | c.y = Float(Int(*Pointer<Short>(element + 2))); |
346 | c.w = float(0x7FFF); |
347 | break; |
348 | case FORMAT_G16R16: |
349 | case FORMAT_G16R16UI: |
350 | c.x = Float(Int(*Pointer<UShort>(element + 0))); |
351 | c.y = Float(Int(*Pointer<UShort>(element + 2))); |
352 | c.w = float(0xFFFF); |
353 | break; |
354 | case FORMAT_G32R32I: |
355 | c.x = Float(*Pointer<Int>(element + 0)); |
356 | c.y = Float(*Pointer<Int>(element + 4)); |
357 | c.w = float(0x7FFFFFFF); |
358 | break; |
359 | case FORMAT_G32R32UI: |
360 | c.x = Float(*Pointer<UInt>(element + 0)); |
361 | c.y = Float(*Pointer<UInt>(element + 4)); |
362 | c.w = float(0xFFFFFFFF); |
363 | break; |
364 | case FORMAT_A32B32G32R32F: |
365 | c = *Pointer<Float4>(element); |
366 | break; |
367 | case FORMAT_X32B32G32R32F: |
368 | case FORMAT_X32B32G32R32F_UNSIGNED: |
369 | case FORMAT_B32G32R32F: |
370 | c.z = *Pointer<Float>(element + 8); |
371 | case FORMAT_G32R32F: |
372 | c.x = *Pointer<Float>(element + 0); |
373 | c.y = *Pointer<Float>(element + 4); |
374 | break; |
375 | case FORMAT_R32F: |
376 | c.x = *Pointer<Float>(element); |
377 | break; |
378 | case FORMAT_R5G6B5: |
379 | c.x = Float(Int((*Pointer<UShort>(element) & UShort(0xF800)) >> UShort(11))); |
380 | c.y = Float(Int((*Pointer<UShort>(element) & UShort(0x07E0)) >> UShort(5))); |
381 | c.z = Float(Int(*Pointer<UShort>(element) & UShort(0x001F))); |
382 | break; |
383 | case FORMAT_A2B10G10R10: |
384 | case FORMAT_A2B10G10R10UI: |
385 | c.x = Float(Int((*Pointer<UInt>(element) & UInt(0x000003FF)))); |
386 | c.y = Float(Int((*Pointer<UInt>(element) & UInt(0x000FFC00)) >> 10)); |
387 | c.z = Float(Int((*Pointer<UInt>(element) & UInt(0x3FF00000)) >> 20)); |
388 | c.w = Float(Int((*Pointer<UInt>(element) & UInt(0xC0000000)) >> 30)); |
389 | break; |
390 | case FORMAT_D16: |
391 | c.x = Float(Int((*Pointer<UShort>(element)))); |
392 | break; |
393 | case FORMAT_D24S8: |
394 | case FORMAT_D24X8: |
395 | c.x = Float(Int((*Pointer<UInt>(element) & UInt(0xFFFFFF00)) >> 8)); |
396 | break; |
397 | case FORMAT_D32: |
398 | c.x = Float(Int((*Pointer<UInt>(element)))); |
399 | break; |
400 | case FORMAT_D32F_COMPLEMENTARY: |
401 | case FORMAT_D32FS8_COMPLEMENTARY: |
402 | c.x = 1.0f - *Pointer<Float>(element); |
403 | break; |
404 | case FORMAT_D32F: |
405 | case FORMAT_D32FS8: |
406 | case FORMAT_D32F_LOCKABLE: |
407 | case FORMAT_D32FS8_TEXTURE: |
408 | case FORMAT_D32F_SHADOW: |
409 | case FORMAT_D32FS8_SHADOW: |
410 | c.x = *Pointer<Float>(element); |
411 | break; |
412 | case FORMAT_S8: |
413 | c.x = Float(Int(*Pointer<Byte>(element))); |
414 | break; |
415 | default: |
416 | return false; |
417 | } |
418 | |
419 | return true; |
420 | } |
421 | |
422 | bool Blitter::write(Float4 &c, Pointer<Byte> element, const State &state) |
423 | { |
424 | bool writeR = state.writeRed; |
425 | bool writeG = state.writeGreen; |
426 | bool writeB = state.writeBlue; |
427 | bool writeA = state.writeAlpha; |
428 | bool writeRGBA = writeR && writeG && writeB && writeA; |
429 | |
430 | switch(state.destFormat) |
431 | { |
432 | case FORMAT_L8: |
433 | *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x))); |
434 | break; |
435 | case FORMAT_A8: |
436 | if(writeA) { *Pointer<Byte>(element) = Byte(RoundInt(Float(c.w))); } |
437 | break; |
438 | case FORMAT_A8R8G8B8: |
439 | if(writeRGBA) |
440 | { |
441 | Short4 c0 = RoundShort4(c.zyxw); |
442 | *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0)); |
443 | } |
444 | else |
445 | { |
446 | if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); } |
447 | if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); } |
448 | if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); } |
449 | if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); } |
450 | } |
451 | break; |
452 | case FORMAT_A8B8G8R8: |
453 | case FORMAT_SRGB8_A8: |
454 | if(writeRGBA) |
455 | { |
456 | Short4 c0 = RoundShort4(c); |
457 | *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0)); |
458 | } |
459 | else |
460 | { |
461 | if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); } |
462 | if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); } |
463 | if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); } |
464 | if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); } |
465 | } |
466 | break; |
467 | case FORMAT_X8R8G8B8: |
468 | if(writeRGBA) |
469 | { |
470 | Short4 c0 = RoundShort4(c.zyxw) | Short4(0x0000, 0x0000, 0x0000, 0x00FF); |
471 | *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0)); |
472 | } |
473 | else |
474 | { |
475 | if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); } |
476 | if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); } |
477 | if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); } |
478 | if(writeA) { *Pointer<Byte>(element + 3) = Byte(0xFF); } |
479 | } |
480 | break; |
481 | case FORMAT_X8B8G8R8: |
482 | case FORMAT_SRGB8_X8: |
483 | if(writeRGBA) |
484 | { |
485 | Short4 c0 = RoundShort4(c) | Short4(0x0000, 0x0000, 0x0000, 0x00FF); |
486 | *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0)); |
487 | } |
488 | else |
489 | { |
490 | if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); } |
491 | if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); } |
492 | if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); } |
493 | if(writeA) { *Pointer<Byte>(element + 3) = Byte(0xFF); } |
494 | } |
495 | break; |
496 | case FORMAT_R8G8B8: |
497 | if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); } |
498 | if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); } |
499 | if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); } |
500 | break; |
501 | case FORMAT_B8G8R8: |
502 | if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); } |
503 | if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); } |
504 | if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); } |
505 | break; |
506 | case FORMAT_A32B32G32R32F: |
507 | if(writeRGBA) |
508 | { |
509 | *Pointer<Float4>(element) = c; |
510 | } |
511 | else |
512 | { |
513 | if(writeR) { *Pointer<Float>(element) = c.x; } |
514 | if(writeG) { *Pointer<Float>(element + 4) = c.y; } |
515 | if(writeB) { *Pointer<Float>(element + 8) = c.z; } |
516 | if(writeA) { *Pointer<Float>(element + 12) = c.w; } |
517 | } |
518 | break; |
519 | case FORMAT_X32B32G32R32F: |
520 | case FORMAT_X32B32G32R32F_UNSIGNED: |
521 | if(writeA) { *Pointer<Float>(element + 12) = 1.0f; } |
522 | case FORMAT_B32G32R32F: |
523 | if(writeR) { *Pointer<Float>(element) = c.x; } |
524 | if(writeG) { *Pointer<Float>(element + 4) = c.y; } |
525 | if(writeB) { *Pointer<Float>(element + 8) = c.z; } |
526 | break; |
527 | case FORMAT_G32R32F: |
528 | if(writeR && writeG) |
529 | { |
530 | *Pointer<Float2>(element) = Float2(c); |
531 | } |
532 | else |
533 | { |
534 | if(writeR) { *Pointer<Float>(element) = c.x; } |
535 | if(writeG) { *Pointer<Float>(element + 4) = c.y; } |
536 | } |
537 | break; |
538 | case FORMAT_R32F: |
539 | if(writeR) { *Pointer<Float>(element) = c.x; } |
540 | break; |
541 | case FORMAT_A8B8G8R8I: |
542 | case FORMAT_A8B8G8R8_SNORM: |
543 | if(writeA) { *Pointer<SByte>(element + 3) = SByte(RoundInt(Float(c.w))); } |
544 | case FORMAT_X8B8G8R8I: |
545 | case FORMAT_X8B8G8R8_SNORM: |
546 | if(writeA && (state.destFormat == FORMAT_X8B8G8R8I || state.destFormat == FORMAT_X8B8G8R8_SNORM)) |
547 | { |
548 | *Pointer<SByte>(element + 3) = SByte(0x7F); |
549 | } |
550 | if(writeB) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.z))); } |
551 | case FORMAT_G8R8I: |
552 | case FORMAT_G8R8_SNORM: |
553 | if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); } |
554 | case FORMAT_R8I: |
555 | case FORMAT_R8_SNORM: |
556 | if(writeR) { *Pointer<SByte>(element) = SByte(RoundInt(Float(c.x))); } |
557 | break; |
558 | case FORMAT_A8B8G8R8UI: |
559 | if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); } |
560 | case FORMAT_X8B8G8R8UI: |
561 | if(writeA && (state.destFormat == FORMAT_X8B8G8R8UI)) |
562 | { |
563 | *Pointer<Byte>(element + 3) = Byte(0xFF); |
564 | } |
565 | if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); } |
566 | case FORMAT_G8R8UI: |
567 | case FORMAT_G8R8: |
568 | if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); } |
569 | case FORMAT_R8UI: |
570 | case FORMAT_R8: |
571 | if(writeR) { *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x))); } |
572 | break; |
573 | case FORMAT_A16B16G16R16I: |
574 | if(writeRGBA) |
575 | { |
576 | *Pointer<Short4>(element) = Short4(RoundInt(c)); |
577 | } |
578 | else |
579 | { |
580 | if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); } |
581 | if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); } |
582 | if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); } |
583 | if(writeA) { *Pointer<Short>(element + 6) = Short(RoundInt(Float(c.w))); } |
584 | } |
585 | break; |
586 | case FORMAT_X16B16G16R16I: |
587 | if(writeRGBA) |
588 | { |
589 | *Pointer<Short4>(element) = Short4(RoundInt(c)); |
590 | } |
591 | else |
592 | { |
593 | if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); } |
594 | if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); } |
595 | if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); } |
596 | } |
597 | if(writeA) { *Pointer<Short>(element + 6) = Short(0x7F); } |
598 | break; |
599 | case FORMAT_G16R16I: |
600 | if(writeR && writeG) |
601 | { |
602 | *Pointer<Short2>(element) = Short2(Short4(RoundInt(c))); |
603 | } |
604 | else |
605 | { |
606 | if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); } |
607 | if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); } |
608 | } |
609 | break; |
610 | case FORMAT_R16I: |
611 | if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); } |
612 | break; |
613 | case FORMAT_A16B16G16R16UI: |
614 | case FORMAT_A16B16G16R16: |
615 | if(writeRGBA) |
616 | { |
617 | *Pointer<UShort4>(element) = UShort4(RoundInt(c)); |
618 | } |
619 | else |
620 | { |
621 | if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); } |
622 | if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); } |
623 | if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); } |
624 | if(writeA) { *Pointer<UShort>(element + 6) = UShort(RoundInt(Float(c.w))); } |
625 | } |
626 | break; |
627 | case FORMAT_X16B16G16R16UI: |
628 | if(writeRGBA) |
629 | { |
630 | *Pointer<UShort4>(element) = UShort4(RoundInt(c)); |
631 | } |
632 | else |
633 | { |
634 | if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); } |
635 | if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); } |
636 | if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); } |
637 | } |
638 | if(writeA) { *Pointer<UShort>(element + 6) = UShort(0xFF); } |
639 | break; |
640 | case FORMAT_G16R16UI: |
641 | case FORMAT_G16R16: |
642 | if(writeR && writeG) |
643 | { |
644 | *Pointer<UShort2>(element) = UShort2(UShort4(RoundInt(c))); |
645 | } |
646 | else |
647 | { |
648 | if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); } |
649 | if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); } |
650 | } |
651 | break; |
652 | case FORMAT_R16UI: |
653 | if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); } |
654 | break; |
655 | case FORMAT_A32B32G32R32I: |
656 | if(writeRGBA) |
657 | { |
658 | *Pointer<Int4>(element) = RoundInt(c); |
659 | } |
660 | else |
661 | { |
662 | if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); } |
663 | if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); } |
664 | if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); } |
665 | if(writeA) { *Pointer<Int>(element + 12) = RoundInt(Float(c.w)); } |
666 | } |
667 | break; |
668 | case FORMAT_X32B32G32R32I: |
669 | if(writeRGBA) |
670 | { |
671 | *Pointer<Int4>(element) = RoundInt(c); |
672 | } |
673 | else |
674 | { |
675 | if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); } |
676 | if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); } |
677 | if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); } |
678 | } |
679 | if(writeA) { *Pointer<Int>(element + 12) = Int(0x7FFFFFFF); } |
680 | break; |
681 | case FORMAT_G32R32I: |
682 | if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); } |
683 | case FORMAT_R32I: |
684 | if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); } |
685 | break; |
686 | case FORMAT_A32B32G32R32UI: |
687 | if(writeRGBA) |
688 | { |
689 | *Pointer<UInt4>(element) = UInt4(RoundInt(c)); |
690 | } |
691 | else |
692 | { |
693 | if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); } |
694 | if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); } |
695 | if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); } |
696 | if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(RoundInt(Float(c.w))); } |
697 | } |
698 | break; |
699 | case FORMAT_X32B32G32R32UI: |
700 | if(writeRGBA) |
701 | { |
702 | *Pointer<UInt4>(element) = UInt4(RoundInt(c)); |
703 | } |
704 | else |
705 | { |
706 | if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); } |
707 | if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); } |
708 | if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); } |
709 | } |
710 | if(writeA) { *Pointer<UInt4>(element + 12) = UInt4(0xFFFFFFFF); } |
711 | break; |
712 | case FORMAT_G32R32UI: |
713 | if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); } |
714 | case FORMAT_R32UI: |
715 | if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); } |
716 | break; |
717 | case FORMAT_R5G6B5: |
718 | if(writeR && writeG && writeB) |
719 | { |
720 | *Pointer<UShort>(element) = UShort(RoundInt(Float(c.z)) | |
721 | (RoundInt(Float(c.y)) << Int(5)) | |
722 | (RoundInt(Float(c.x)) << Int(11))); |
723 | } |
724 | else |
725 | { |
726 | unsigned short mask = (writeB ? 0x001F : 0x0000) | (writeG ? 0x07E0 : 0x0000) | (writeR ? 0xF800 : 0x0000); |
727 | unsigned short unmask = ~mask; |
728 | *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) | |
729 | (UShort(RoundInt(Float(c.z)) | |
730 | (RoundInt(Float(c.y)) << Int(5)) | |
731 | (RoundInt(Float(c.x)) << Int(11))) & UShort(mask)); |
732 | } |
733 | break; |
734 | case FORMAT_A2B10G10R10: |
735 | case FORMAT_A2B10G10R10UI: |
736 | if(writeRGBA) |
737 | { |
738 | *Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)) | |
739 | (RoundInt(Float(c.y)) << 10) | |
740 | (RoundInt(Float(c.z)) << 20) | |
741 | (RoundInt(Float(c.w)) << 30)); |
742 | } |
743 | else |
744 | { |
745 | unsigned int mask = (writeA ? 0xC0000000 : 0x0000) | |
746 | (writeB ? 0x3FF00000 : 0x0000) | |
747 | (writeG ? 0x000FFC00 : 0x0000) | |
748 | (writeR ? 0x000003FF : 0x0000); |
749 | unsigned int unmask = ~mask; |
750 | *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) | |
751 | (UInt(RoundInt(Float(c.x)) | |
752 | (RoundInt(Float(c.y)) << 10) | |
753 | (RoundInt(Float(c.z)) << 20) | |
754 | (RoundInt(Float(c.w)) << 30)) & UInt(mask)); |
755 | } |
756 | break; |
757 | case FORMAT_D16: |
758 | *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); |
759 | break; |
760 | case FORMAT_D24S8: |
761 | case FORMAT_D24X8: |
762 | *Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)) << 8); |
763 | break; |
764 | case FORMAT_D32: |
765 | *Pointer<UInt>(element) = UInt(RoundInt(Float(c.x))); |
766 | break; |
767 | case FORMAT_D32F_COMPLEMENTARY: |
768 | case FORMAT_D32FS8_COMPLEMENTARY: |
769 | *Pointer<Float>(element) = 1.0f - c.x; |
770 | break; |
771 | case FORMAT_D32F: |
772 | case FORMAT_D32FS8: |
773 | case FORMAT_D32F_LOCKABLE: |
774 | case FORMAT_D32FS8_TEXTURE: |
775 | case FORMAT_D32F_SHADOW: |
776 | case FORMAT_D32FS8_SHADOW: |
777 | *Pointer<Float>(element) = c.x; |
778 | break; |
779 | case FORMAT_S8: |
780 | *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x))); |
781 | break; |
782 | default: |
783 | return false; |
784 | } |
785 | return true; |
786 | } |
787 | |
788 | bool Blitter::read(Int4 &c, Pointer<Byte> element, const State &state) |
789 | { |
790 | c = Int4(0, 0, 0, 1); |
791 | |
792 | switch(state.sourceFormat) |
793 | { |
794 | case FORMAT_A8B8G8R8I: |
795 | c = Insert(c, Int(*Pointer<SByte>(element + 3)), 3); |
796 | case FORMAT_X8B8G8R8I: |
797 | c = Insert(c, Int(*Pointer<SByte>(element + 2)), 2); |
798 | case FORMAT_G8R8I: |
799 | c = Insert(c, Int(*Pointer<SByte>(element + 1)), 1); |
800 | case FORMAT_R8I: |
801 | c = Insert(c, Int(*Pointer<SByte>(element)), 0); |
802 | break; |
803 | case FORMAT_A8B8G8R8UI: |
804 | c = Insert(c, Int(*Pointer<Byte>(element + 3)), 3); |
805 | case FORMAT_X8B8G8R8UI: |
806 | c = Insert(c, Int(*Pointer<Byte>(element + 2)), 2); |
807 | case FORMAT_G8R8UI: |
808 | c = Insert(c, Int(*Pointer<Byte>(element + 1)), 1); |
809 | case FORMAT_R8UI: |
810 | c = Insert(c, Int(*Pointer<Byte>(element)), 0); |
811 | break; |
812 | case FORMAT_A16B16G16R16I: |
813 | c = Insert(c, Int(*Pointer<Short>(element + 6)), 3); |
814 | case FORMAT_X16B16G16R16I: |
815 | c = Insert(c, Int(*Pointer<Short>(element + 4)), 2); |
816 | case FORMAT_G16R16I: |
817 | c = Insert(c, Int(*Pointer<Short>(element + 2)), 1); |
818 | case FORMAT_R16I: |
819 | c = Insert(c, Int(*Pointer<Short>(element)), 0); |
820 | break; |
821 | case FORMAT_A16B16G16R16UI: |
822 | c = Insert(c, Int(*Pointer<UShort>(element + 6)), 3); |
823 | case FORMAT_X16B16G16R16UI: |
824 | c = Insert(c, Int(*Pointer<UShort>(element + 4)), 2); |
825 | case FORMAT_G16R16UI: |
826 | c = Insert(c, Int(*Pointer<UShort>(element + 2)), 1); |
827 | case FORMAT_R16UI: |
828 | c = Insert(c, Int(*Pointer<UShort>(element)), 0); |
829 | break; |
830 | case FORMAT_A32B32G32R32I: |
831 | case FORMAT_A32B32G32R32UI: |
832 | c = *Pointer<Int4>(element); |
833 | break; |
834 | case FORMAT_X32B32G32R32I: |
835 | case FORMAT_X32B32G32R32UI: |
836 | c = Insert(c, *Pointer<Int>(element + 8), 2); |
837 | case FORMAT_G32R32I: |
838 | case FORMAT_G32R32UI: |
839 | c = Insert(c, *Pointer<Int>(element + 4), 1); |
840 | case FORMAT_R32I: |
841 | case FORMAT_R32UI: |
842 | c = Insert(c, *Pointer<Int>(element), 0); |
843 | break; |
844 | default: |
845 | return false; |
846 | } |
847 | |
848 | return true; |
849 | } |
850 | |
851 | bool Blitter::write(Int4 &c, Pointer<Byte> element, const State &state) |
852 | { |
853 | bool writeR = state.writeRed; |
854 | bool writeG = state.writeGreen; |
855 | bool writeB = state.writeBlue; |
856 | bool writeA = state.writeAlpha; |
857 | bool writeRGBA = writeR && writeG && writeB && writeA; |
858 | |
859 | switch(state.destFormat) |
860 | { |
861 | case FORMAT_A8B8G8R8I: |
862 | if(writeA) { *Pointer<SByte>(element + 3) = SByte(Extract(c, 3)); } |
863 | case FORMAT_X8B8G8R8I: |
864 | if(writeA && (state.destFormat != FORMAT_A8B8G8R8I)) |
865 | { |
866 | *Pointer<SByte>(element + 3) = SByte(0x7F); |
867 | } |
868 | if(writeB) { *Pointer<SByte>(element + 2) = SByte(Extract(c, 2)); } |
869 | case FORMAT_G8R8I: |
870 | if(writeG) { *Pointer<SByte>(element + 1) = SByte(Extract(c, 1)); } |
871 | case FORMAT_R8I: |
872 | if(writeR) { *Pointer<SByte>(element) = SByte(Extract(c, 0)); } |
873 | break; |
874 | case FORMAT_A8B8G8R8UI: |
875 | if(writeA) { *Pointer<Byte>(element + 3) = Byte(Extract(c, 3)); } |
876 | case FORMAT_X8B8G8R8UI: |
877 | if(writeA && (state.destFormat != FORMAT_A8B8G8R8UI)) |
878 | { |
879 | *Pointer<Byte>(element + 3) = Byte(0xFF); |
880 | } |
881 | if(writeB) { *Pointer<Byte>(element + 2) = Byte(Extract(c, 2)); } |
882 | case FORMAT_G8R8UI: |
883 | if(writeG) { *Pointer<Byte>(element + 1) = Byte(Extract(c, 1)); } |
884 | case FORMAT_R8UI: |
885 | if(writeR) { *Pointer<Byte>(element) = Byte(Extract(c, 0)); } |
886 | break; |
887 | case FORMAT_A16B16G16R16I: |
888 | if(writeA) { *Pointer<Short>(element + 6) = Short(Extract(c, 3)); } |
889 | case FORMAT_X16B16G16R16I: |
890 | if(writeA && (state.destFormat != FORMAT_A16B16G16R16I)) |
891 | { |
892 | *Pointer<Short>(element + 6) = Short(0x7FFF); |
893 | } |
894 | if(writeB) { *Pointer<Short>(element + 4) = Short(Extract(c, 2)); } |
895 | case FORMAT_G16R16I: |
896 | if(writeG) { *Pointer<Short>(element + 2) = Short(Extract(c, 1)); } |
897 | case FORMAT_R16I: |
898 | if(writeR) { *Pointer<Short>(element) = Short(Extract(c, 0)); } |
899 | break; |
900 | case FORMAT_A16B16G16R16UI: |
901 | if(writeA) { *Pointer<UShort>(element + 6) = UShort(Extract(c, 3)); } |
902 | case FORMAT_X16B16G16R16UI: |
903 | if(writeA && (state.destFormat != FORMAT_A16B16G16R16UI)) |
904 | { |
905 | *Pointer<UShort>(element + 6) = UShort(0xFFFF); |
906 | } |
907 | if(writeB) { *Pointer<UShort>(element + 4) = UShort(Extract(c, 2)); } |
908 | case FORMAT_G16R16UI: |
909 | if(writeG) { *Pointer<UShort>(element + 2) = UShort(Extract(c, 1)); } |
910 | case FORMAT_R16UI: |
911 | if(writeR) { *Pointer<UShort>(element) = UShort(Extract(c, 0)); } |
912 | break; |
913 | case FORMAT_A32B32G32R32I: |
914 | if(writeRGBA) |
915 | { |
916 | *Pointer<Int4>(element) = c; |
917 | } |
918 | else |
919 | { |
920 | if(writeR) { *Pointer<Int>(element) = Extract(c, 0); } |
921 | if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); } |
922 | if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); } |
923 | if(writeA) { *Pointer<Int>(element + 12) = Extract(c, 3); } |
924 | } |
925 | break; |
926 | case FORMAT_X32B32G32R32I: |
927 | if(writeRGBA) |
928 | { |
929 | *Pointer<Int4>(element) = c; |
930 | } |
931 | else |
932 | { |
933 | if(writeR) { *Pointer<Int>(element) = Extract(c, 0); } |
934 | if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); } |
935 | if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); } |
936 | } |
937 | if(writeA) { *Pointer<Int>(element + 12) = Int(0x7FFFFFFF); } |
938 | break; |
939 | case FORMAT_G32R32I: |
940 | if(writeR) { *Pointer<Int>(element) = Extract(c, 0); } |
941 | if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); } |
942 | break; |
943 | case FORMAT_R32I: |
944 | if(writeR) { *Pointer<Int>(element) = Extract(c, 0); } |
945 | break; |
946 | case FORMAT_A32B32G32R32UI: |
947 | if(writeRGBA) |
948 | { |
949 | *Pointer<UInt4>(element) = As<UInt4>(c); |
950 | } |
951 | else |
952 | { |
953 | if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); } |
954 | if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); } |
955 | if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); } |
956 | if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(Extract(c, 3)); } |
957 | } |
958 | break; |
959 | case FORMAT_X32B32G32R32UI: |
960 | if(writeRGBA) |
961 | { |
962 | *Pointer<UInt4>(element) = As<UInt4>(c); |
963 | } |
964 | else |
965 | { |
966 | if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); } |
967 | if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); } |
968 | if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); } |
969 | } |
970 | if(writeA) { *Pointer<UInt>(element + 3) = UInt(0xFFFFFFFF); } |
971 | break; |
972 | case FORMAT_G32R32UI: |
973 | if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); } |
974 | if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); } |
975 | break; |
976 | case FORMAT_R32UI: |
977 | if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); } |
978 | break; |
979 | default: |
980 | return false; |
981 | } |
982 | |
983 | return true; |
984 | } |
985 | |
986 | bool Blitter::GetScale(float4 &scale, Format format) |
987 | { |
988 | switch(format) |
989 | { |
990 | case FORMAT_L8: |
991 | case FORMAT_A8: |
992 | case FORMAT_A8R8G8B8: |
993 | case FORMAT_X8R8G8B8: |
994 | case FORMAT_R8: |
995 | case FORMAT_G8R8: |
996 | case FORMAT_R8G8B8: |
997 | case FORMAT_B8G8R8: |
998 | case FORMAT_X8B8G8R8: |
999 | case FORMAT_A8B8G8R8: |
1000 | case FORMAT_SRGB8_X8: |
1001 | case FORMAT_SRGB8_A8: |
1002 | scale = vector(0xFF, 0xFF, 0xFF, 0xFF); |
1003 | break; |
1004 | case FORMAT_R8_SNORM: |
1005 | case FORMAT_G8R8_SNORM: |
1006 | case FORMAT_X8B8G8R8_SNORM: |
1007 | case FORMAT_A8B8G8R8_SNORM: |
1008 | scale = vector(0x7F, 0x7F, 0x7F, 0x7F); |
1009 | break; |
1010 | case FORMAT_A16B16G16R16: |
1011 | scale = vector(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF); |
1012 | break; |
1013 | case FORMAT_R8I: |
1014 | case FORMAT_R8UI: |
1015 | case FORMAT_G8R8I: |
1016 | case FORMAT_G8R8UI: |
1017 | case FORMAT_X8B8G8R8I: |
1018 | case FORMAT_X8B8G8R8UI: |
1019 | case FORMAT_A8B8G8R8I: |
1020 | case FORMAT_A8B8G8R8UI: |
1021 | case FORMAT_R16I: |
1022 | case FORMAT_R16UI: |
1023 | case FORMAT_G16R16: |
1024 | case FORMAT_G16R16I: |
1025 | case FORMAT_G16R16UI: |
1026 | case FORMAT_X16B16G16R16I: |
1027 | case FORMAT_X16B16G16R16UI: |
1028 | case FORMAT_A16B16G16R16I: |
1029 | case FORMAT_A16B16G16R16UI: |
1030 | case FORMAT_R32I: |
1031 | case FORMAT_R32UI: |
1032 | case FORMAT_G32R32I: |
1033 | case FORMAT_G32R32UI: |
1034 | case FORMAT_X32B32G32R32I: |
1035 | case FORMAT_X32B32G32R32UI: |
1036 | case FORMAT_A32B32G32R32I: |
1037 | case FORMAT_A32B32G32R32UI: |
1038 | case FORMAT_A32B32G32R32F: |
1039 | case FORMAT_X32B32G32R32F: |
1040 | case FORMAT_X32B32G32R32F_UNSIGNED: |
1041 | case FORMAT_B32G32R32F: |
1042 | case FORMAT_G32R32F: |
1043 | case FORMAT_R32F: |
1044 | case FORMAT_A2B10G10R10UI: |
1045 | scale = vector(1.0f, 1.0f, 1.0f, 1.0f); |
1046 | break; |
1047 | case FORMAT_R5G6B5: |
1048 | scale = vector(0x1F, 0x3F, 0x1F, 1.0f); |
1049 | break; |
1050 | case FORMAT_A2B10G10R10: |
1051 | scale = vector(0x3FF, 0x3FF, 0x3FF, 0x03); |
1052 | break; |
1053 | case FORMAT_D16: |
1054 | scale = vector(0xFFFF, 0.0f, 0.0f, 0.0f); |
1055 | break; |
1056 | case FORMAT_D24S8: |
1057 | case FORMAT_D24X8: |
1058 | scale = vector(0xFFFFFF, 0.0f, 0.0f, 0.0f); |
1059 | break; |
1060 | case FORMAT_D32: |
1061 | scale = vector(static_cast<float>(0xFFFFFFFF), 0.0f, 0.0f, 0.0f); |
1062 | break; |
1063 | case FORMAT_D32F: |
1064 | case FORMAT_D32FS8: |
1065 | case FORMAT_D32F_COMPLEMENTARY: |
1066 | case FORMAT_D32FS8_COMPLEMENTARY: |
1067 | case FORMAT_D32F_LOCKABLE: |
1068 | case FORMAT_D32FS8_TEXTURE: |
1069 | case FORMAT_D32F_SHADOW: |
1070 | case FORMAT_D32FS8_SHADOW: |
1071 | case FORMAT_S8: |
1072 | scale = vector(1.0f, 1.0f, 1.0f, 1.0f); |
1073 | break; |
1074 | default: |
1075 | return false; |
1076 | } |
1077 | |
1078 | return true; |
1079 | } |
1080 | |
1081 | bool Blitter::ApplyScaleAndClamp(Float4 &value, const State &state, bool preScaled) |
1082 | { |
1083 | float4 scale, unscale; |
1084 | if(state.clearOperation && |
1085 | Surface::isNonNormalizedInteger(state.sourceFormat) && |
1086 | !Surface::isNonNormalizedInteger(state.destFormat)) |
1087 | { |
1088 | // If we're clearing a buffer from an int or uint color into a normalized color, |
1089 | // then the whole range of the int or uint color must be scaled between 0 and 1. |
1090 | switch(state.sourceFormat) |
1091 | { |
1092 | case FORMAT_A32B32G32R32I: |
1093 | unscale = replicate(static_cast<float>(0x7FFFFFFF)); |
1094 | break; |
1095 | case FORMAT_A32B32G32R32UI: |
1096 | unscale = replicate(static_cast<float>(0xFFFFFFFF)); |
1097 | break; |
1098 | default: |
1099 | return false; |
1100 | } |
1101 | } |
1102 | else if(!GetScale(unscale, state.sourceFormat)) |
1103 | { |
1104 | return false; |
1105 | } |
1106 | |
1107 | if(!GetScale(scale, state.destFormat)) |
1108 | { |
1109 | return false; |
1110 | } |
1111 | |
1112 | bool srcSRGB = Surface::isSRGBformat(state.sourceFormat); |
1113 | bool dstSRGB = Surface::isSRGBformat(state.destFormat); |
1114 | |
1115 | if(state.convertSRGB && ((srcSRGB && !preScaled) || dstSRGB)) // One of the formats is sRGB encoded. |
1116 | { |
1117 | value *= preScaled ? Float4(1.0f / scale.x, 1.0f / scale.y, 1.0f / scale.z, 1.0f / scale.w) : // Unapply scale |
1118 | Float4(1.0f / unscale.x, 1.0f / unscale.y, 1.0f / unscale.z, 1.0f / unscale.w); // Apply unscale |
1119 | value = (srcSRGB && !preScaled) ? sRGBtoLinear(value) : LinearToSRGB(value); |
1120 | value *= Float4(scale.x, scale.y, scale.z, scale.w); // Apply scale |
1121 | } |
1122 | else if(unscale != scale) |
1123 | { |
1124 | value *= Float4(scale.x / unscale.x, scale.y / unscale.y, scale.z / unscale.z, scale.w / unscale.w); |
1125 | } |
1126 | |
1127 | if(state.destFormat == FORMAT_X32B32G32R32F_UNSIGNED) |
1128 | { |
1129 | value = Max(value, Float4(0.0f)); // TODO: Only necessary if source is signed. |
1130 | } |
1131 | else if(Surface::isFloatFormat(state.sourceFormat) && !Surface::isFloatFormat(state.destFormat)) |
1132 | { |
1133 | value = Min(value, Float4(scale.x, scale.y, scale.z, scale.w)); |
1134 | |
1135 | value = Max(value, Float4(Surface::isUnsignedComponent(state.destFormat, 0) ? 0.0f : -scale.x, |
1136 | Surface::isUnsignedComponent(state.destFormat, 1) ? 0.0f : -scale.y, |
1137 | Surface::isUnsignedComponent(state.destFormat, 2) ? 0.0f : -scale.z, |
1138 | Surface::isUnsignedComponent(state.destFormat, 3) ? 0.0f : -scale.w)); |
1139 | } |
1140 | |
1141 | return true; |
1142 | } |
1143 | |
1144 | Int Blitter::ComputeOffset(Int &x, Int &y, Int &pitchB, int bytes, bool quadLayout) |
1145 | { |
1146 | if(!quadLayout) |
1147 | { |
1148 | return y * pitchB + x * bytes; |
1149 | } |
1150 | else |
1151 | { |
1152 | // (x & ~1) * 2 + (x & 1) == (x - (x & 1)) * 2 + (x & 1) == x * 2 - (x & 1) * 2 + (x & 1) == x * 2 - (x & 1) |
1153 | return (y & Int(~1)) * pitchB + |
1154 | ((y & Int(1)) * 2 + x * 2 - (x & Int(1))) * bytes; |
1155 | } |
1156 | } |
1157 | |
1158 | Float4 Blitter::LinearToSRGB(Float4 &c) |
1159 | { |
1160 | Float4 lc = Min(c, Float4(0.0031308f)) * Float4(12.92f); |
1161 | Float4 ec = Float4(1.055f) * power(c, Float4(1.0f / 2.4f)) - Float4(0.055f); |
1162 | |
1163 | Float4 s = c; |
1164 | s.xyz = Max(lc, ec); |
1165 | |
1166 | return s; |
1167 | } |
1168 | |
1169 | Float4 Blitter::sRGBtoLinear(Float4 &c) |
1170 | { |
1171 | Float4 lc = c * Float4(1.0f / 12.92f); |
1172 | Float4 ec = power((c + Float4(0.055f)) * Float4(1.0f / 1.055f), Float4(2.4f)); |
1173 | |
1174 | Int4 linear = CmpLT(c, Float4(0.04045f)); |
1175 | |
1176 | Float4 s = c; |
1177 | s.xyz = As<Float4>((linear & As<Int4>(lc)) | (~linear & As<Int4>(ec))); // TODO: IfThenElse() |
1178 | |
1179 | return s; |
1180 | } |
1181 | |
1182 | std::shared_ptr<Routine> Blitter::generate(const State &state) |
1183 | { |
1184 | Function<Void(Pointer<Byte>)> function; |
1185 | { |
1186 | Pointer<Byte> blit(function.Arg<0>()); |
1187 | |
1188 | Pointer<Byte> source = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData,source)); |
1189 | Pointer<Byte> dest = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData,dest)); |
1190 | Int sPitchB = *Pointer<Int>(blit + OFFSET(BlitData,sPitchB)); |
1191 | Int dPitchB = *Pointer<Int>(blit + OFFSET(BlitData,dPitchB)); |
1192 | |
1193 | Float x0 = *Pointer<Float>(blit + OFFSET(BlitData,x0)); |
1194 | Float y0 = *Pointer<Float>(blit + OFFSET(BlitData,y0)); |
1195 | Float w = *Pointer<Float>(blit + OFFSET(BlitData,w)); |
1196 | Float h = *Pointer<Float>(blit + OFFSET(BlitData,h)); |
1197 | |
1198 | Int x0d = *Pointer<Int>(blit + OFFSET(BlitData,x0d)); |
1199 | Int x1d = *Pointer<Int>(blit + OFFSET(BlitData,x1d)); |
1200 | Int y0d = *Pointer<Int>(blit + OFFSET(BlitData,y0d)); |
1201 | Int y1d = *Pointer<Int>(blit + OFFSET(BlitData,y1d)); |
1202 | |
1203 | Int sWidth = *Pointer<Int>(blit + OFFSET(BlitData,sWidth)); |
1204 | Int sHeight = *Pointer<Int>(blit + OFFSET(BlitData,sHeight)); |
1205 | |
1206 | bool intSrc = Surface::isNonNormalizedInteger(state.sourceFormat); |
1207 | bool intDst = Surface::isNonNormalizedInteger(state.destFormat); |
1208 | bool intBoth = intSrc && intDst; |
1209 | bool srcQuadLayout = Surface::hasQuadLayout(state.sourceFormat); |
1210 | bool dstQuadLayout = Surface::hasQuadLayout(state.destFormat); |
1211 | int srcBytes = Surface::bytes(state.sourceFormat); |
1212 | int dstBytes = Surface::bytes(state.destFormat); |
1213 | |
1214 | bool hasConstantColorI = false; |
1215 | Int4 constantColorI; |
1216 | bool hasConstantColorF = false; |
1217 | Float4 constantColorF; |
1218 | if(state.clearOperation) |
1219 | { |
1220 | if(intBoth) // Integer types |
1221 | { |
1222 | if(!read(constantColorI, source, state)) |
1223 | { |
1224 | return nullptr; |
1225 | } |
1226 | hasConstantColorI = true; |
1227 | } |
1228 | else |
1229 | { |
1230 | if(!read(constantColorF, source, state)) |
1231 | { |
1232 | return nullptr; |
1233 | } |
1234 | hasConstantColorF = true; |
1235 | |
1236 | if(!ApplyScaleAndClamp(constantColorF, state)) |
1237 | { |
1238 | return nullptr; |
1239 | } |
1240 | } |
1241 | } |
1242 | |
1243 | For(Int j = y0d, j < y1d, j++) |
1244 | { |
1245 | Float y = state.clearOperation ? RValue<Float>(y0) : y0 + Float(j) * h; |
1246 | Pointer<Byte> destLine = dest + (dstQuadLayout ? j & Int(~1) : RValue<Int>(j)) * dPitchB; |
1247 | |
1248 | For(Int i = x0d, i < x1d, i++) |
1249 | { |
1250 | Float x = state.clearOperation ? RValue<Float>(x0) : x0 + Float(i) * w; |
1251 | Pointer<Byte> d = destLine + (dstQuadLayout ? (((j & Int(1)) << 1) + (i * 2) - (i & Int(1))) : RValue<Int>(i)) * dstBytes; |
1252 | |
1253 | if(hasConstantColorI) |
1254 | { |
1255 | if(!write(constantColorI, d, state)) |
1256 | { |
1257 | return nullptr; |
1258 | } |
1259 | } |
1260 | else if(hasConstantColorF) |
1261 | { |
1262 | for(int s = 0; s < state.destSamples; s++) |
1263 | { |
1264 | if(!write(constantColorF, d, state)) |
1265 | { |
1266 | return nullptr; |
1267 | } |
1268 | |
1269 | d += *Pointer<Int>(blit + OFFSET(BlitData, dSliceB)); |
1270 | } |
1271 | } |
1272 | else if(intBoth) // Integer types do not support filtering |
1273 | { |
1274 | Int4 color; // When both formats are true integer types, we don't go to float to avoid losing precision |
1275 | Int X = Int(x); |
1276 | Int Y = Int(y); |
1277 | |
1278 | if(state.clampToEdge) |
1279 | { |
1280 | X = Clamp(X, 0, sWidth - 1); |
1281 | Y = Clamp(Y, 0, sHeight - 1); |
1282 | } |
1283 | |
1284 | Pointer<Byte> s = source + ComputeOffset(X, Y, sPitchB, srcBytes, srcQuadLayout); |
1285 | |
1286 | if(!read(color, s, state)) |
1287 | { |
1288 | return nullptr; |
1289 | } |
1290 | |
1291 | if(!write(color, d, state)) |
1292 | { |
1293 | return nullptr; |
1294 | } |
1295 | } |
1296 | else |
1297 | { |
1298 | Float4 color; |
1299 | |
1300 | bool preScaled = false; |
1301 | if(!state.filter || intSrc) |
1302 | { |
1303 | Int X = Int(x); |
1304 | Int Y = Int(y); |
1305 | |
1306 | if(state.clampToEdge) |
1307 | { |
1308 | X = Clamp(X, 0, sWidth - 1); |
1309 | Y = Clamp(Y, 0, sHeight - 1); |
1310 | } |
1311 | |
1312 | Pointer<Byte> s = source + ComputeOffset(X, Y, sPitchB, srcBytes, srcQuadLayout); |
1313 | |
1314 | if(!read(color, s, state)) |
1315 | { |
1316 | return nullptr; |
1317 | } |
1318 | } |
1319 | else // Bilinear filtering |
1320 | { |
1321 | Float X = x; |
1322 | Float Y = y; |
1323 | |
1324 | if(state.clampToEdge) |
1325 | { |
1326 | X = Min(Max(x, 0.5f), Float(sWidth) - 0.5f); |
1327 | Y = Min(Max(y, 0.5f), Float(sHeight) - 0.5f); |
1328 | } |
1329 | |
1330 | Float x0 = X - 0.5f; |
1331 | Float y0 = Y - 0.5f; |
1332 | |
1333 | Int X0 = Max(Int(x0), 0); |
1334 | Int Y0 = Max(Int(y0), 0); |
1335 | |
1336 | Int X1 = X0 + 1; |
1337 | Int Y1 = Y0 + 1; |
1338 | X1 = IfThenElse(X1 >= sWidth, X0, X1); |
1339 | Y1 = IfThenElse(Y1 >= sHeight, Y0, Y1); |
1340 | |
1341 | Pointer<Byte> s00 = source + ComputeOffset(X0, Y0, sPitchB, srcBytes, srcQuadLayout); |
1342 | Pointer<Byte> s01 = source + ComputeOffset(X1, Y0, sPitchB, srcBytes, srcQuadLayout); |
1343 | Pointer<Byte> s10 = source + ComputeOffset(X0, Y1, sPitchB, srcBytes, srcQuadLayout); |
1344 | Pointer<Byte> s11 = source + ComputeOffset(X1, Y1, sPitchB, srcBytes, srcQuadLayout); |
1345 | |
1346 | Float4 c00; if(!read(c00, s00, state)) return nullptr; |
1347 | Float4 c01; if(!read(c01, s01, state)) return nullptr; |
1348 | Float4 c10; if(!read(c10, s10, state)) return nullptr; |
1349 | Float4 c11; if(!read(c11, s11, state)) return nullptr; |
1350 | |
1351 | if(state.convertSRGB && Surface::isSRGBformat(state.sourceFormat)) // sRGB -> RGB |
1352 | { |
1353 | if(!ApplyScaleAndClamp(c00, state)) return nullptr; |
1354 | if(!ApplyScaleAndClamp(c01, state)) return nullptr; |
1355 | if(!ApplyScaleAndClamp(c10, state)) return nullptr; |
1356 | if(!ApplyScaleAndClamp(c11, state)) return nullptr; |
1357 | preScaled = true; |
1358 | } |
1359 | |
1360 | Float4 fx = Float4(x0 - Float(X0)); |
1361 | Float4 fy = Float4(y0 - Float(Y0)); |
1362 | Float4 ix = Float4(1.0f) - fx; |
1363 | Float4 iy = Float4(1.0f) - fy; |
1364 | |
1365 | color = (c00 * ix + c01 * fx) * iy + |
1366 | (c10 * ix + c11 * fx) * fy; |
1367 | } |
1368 | |
1369 | if(!ApplyScaleAndClamp(color, state, preScaled)) |
1370 | { |
1371 | return nullptr; |
1372 | } |
1373 | |
1374 | for(int s = 0; s < state.destSamples; s++) |
1375 | { |
1376 | if(!write(color, d, state)) |
1377 | { |
1378 | return nullptr; |
1379 | } |
1380 | |
1381 | d += *Pointer<Int>(blit + OFFSET(BlitData,dSliceB)); |
1382 | } |
1383 | } |
1384 | } |
1385 | } |
1386 | } |
1387 | |
1388 | return function("BlitRoutine" ); |
1389 | } |
1390 | |
1391 | bool Blitter::blitReactor(Surface *source, const SliceRectF &sourceRect, Surface *dest, const SliceRect &destRect, const Blitter::Options &options) |
1392 | { |
1393 | ASSERT(!options.clearOperation || ((source->getWidth() == 1) && (source->getHeight() == 1) && (source->getDepth() == 1))); |
1394 | |
1395 | Rect dRect = destRect; |
1396 | RectF sRect = sourceRect; |
1397 | if(destRect.x0 > destRect.x1) |
1398 | { |
1399 | swap(dRect.x0, dRect.x1); |
1400 | swap(sRect.x0, sRect.x1); |
1401 | } |
1402 | if(destRect.y0 > destRect.y1) |
1403 | { |
1404 | swap(dRect.y0, dRect.y1); |
1405 | swap(sRect.y0, sRect.y1); |
1406 | } |
1407 | |
1408 | State state(options); |
1409 | state.clampToEdge = (sourceRect.x0 < 0.0f) || |
1410 | (sourceRect.y0 < 0.0f) || |
1411 | (sourceRect.x1 > (float)source->getWidth()) || |
1412 | (sourceRect.y1 > (float)source->getHeight()); |
1413 | |
1414 | bool useSourceInternal = !source->isExternalDirty(); |
1415 | bool useDestInternal = !dest->isExternalDirty(); |
1416 | bool isStencil = options.useStencil; |
1417 | |
1418 | state.sourceFormat = isStencil ? source->getStencilFormat() : source->getFormat(useSourceInternal); |
1419 | state.destFormat = isStencil ? dest->getStencilFormat() : dest->getFormat(useDestInternal); |
1420 | state.destSamples = dest->getSamples(); |
1421 | |
1422 | criticalSection.lock(); |
1423 | auto blitRoutine = blitCache->query(state); |
1424 | |
1425 | if(!blitRoutine) |
1426 | { |
1427 | blitRoutine = generate(state); |
1428 | |
1429 | if(!blitRoutine) |
1430 | { |
1431 | criticalSection.unlock(); |
1432 | return false; |
1433 | } |
1434 | |
1435 | blitCache->add(state, blitRoutine); |
1436 | } |
1437 | |
1438 | criticalSection.unlock(); |
1439 | |
1440 | void (*blitFunction)(const BlitData *data) = (void(*)(const BlitData*))blitRoutine->getEntry(); |
1441 | |
1442 | BlitData data; |
1443 | |
1444 | bool isRGBA = options.writeMask == 0xF; |
1445 | bool isEntireDest = dest->isEntire(destRect); |
1446 | |
1447 | data.source = isStencil ? source->lockStencil(0, 0, 0, sw::PUBLIC) : |
1448 | source->lock(0, 0, sourceRect.slice, sw::LOCK_READONLY, sw::PUBLIC, useSourceInternal); |
1449 | data.dest = isStencil ? dest->lockStencil(0, 0, 0, sw::PUBLIC) : |
1450 | dest->lock(0, 0, destRect.slice, isRGBA ? (isEntireDest ? sw::LOCK_DISCARD : sw::LOCK_WRITEONLY) : sw::LOCK_READWRITE, sw::PUBLIC, useDestInternal); |
1451 | data.sPitchB = isStencil ? source->getStencilPitchB() : source->getPitchB(useSourceInternal); |
1452 | data.dPitchB = isStencil ? dest->getStencilPitchB() : dest->getPitchB(useDestInternal); |
1453 | data.dSliceB = isStencil ? dest->getStencilSliceB() : dest->getSliceB(useDestInternal); |
1454 | |
1455 | data.w = sRect.width() / dRect.width(); |
1456 | data.h = sRect.height() / dRect.height(); |
1457 | data.x0 = sRect.x0 + (0.5f - dRect.x0) * data.w; |
1458 | data.y0 = sRect.y0 + (0.5f - dRect.y0) * data.h; |
1459 | |
1460 | data.x0d = dRect.x0; |
1461 | data.x1d = dRect.x1; |
1462 | data.y0d = dRect.y0; |
1463 | data.y1d = dRect.y1; |
1464 | |
1465 | data.sWidth = source->getWidth(); |
1466 | data.sHeight = source->getHeight(); |
1467 | |
1468 | blitFunction(&data); |
1469 | |
1470 | if(isStencil) |
1471 | { |
1472 | source->unlockStencil(); |
1473 | dest->unlockStencil(); |
1474 | } |
1475 | else |
1476 | { |
1477 | source->unlock(useSourceInternal); |
1478 | dest->unlock(useDestInternal); |
1479 | } |
1480 | |
1481 | return true; |
1482 | } |
1483 | } |
1484 | |