1// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "Blitter.hpp"
16
17#include "Shader/ShaderCore.hpp"
18#include "Reactor/Reactor.hpp"
19#include "Common/Memory.hpp"
20#include "Common/Debug.hpp"
21
22namespace sw
23{
24 using namespace rr;
25
26 Blitter::Blitter()
27 {
28 blitCache = new RoutineCache<State>(1024);
29 }
30
31 Blitter::~Blitter()
32 {
33 delete blitCache;
34 }
35
36 void Blitter::clear(void *pixel, sw::Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask)
37 {
38 if(fastClear(pixel, format, dest, dRect, rgbaMask))
39 {
40 return;
41 }
42
43 sw::Surface *color = sw::Surface::create(1, 1, 1, format, pixel, sw::Surface::bytes(format), sw::Surface::bytes(format));
44 SliceRectF sRect(0.5f, 0.5f, 0.5f, 0.5f, 0); // Sample from the middle.
45 blit(color, sRect, dest, dRect, {rgbaMask});
46 delete color;
47 }
48
49 bool Blitter::fastClear(void *pixel, sw::Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask)
50 {
51 if(format != FORMAT_A32B32G32R32F)
52 {
53 return false;
54 }
55
56 float *color = (float*)pixel;
57 float r = color[0];
58 float g = color[1];
59 float b = color[2];
60 float a = color[3];
61
62 uint32_t packed;
63
64 switch(dest->getFormat())
65 {
66 case FORMAT_R5G6B5:
67 if((rgbaMask & 0x7) != 0x7) return false;
68 packed = ((uint16_t)(31 * b + 0.5f) << 0) |
69 ((uint16_t)(63 * g + 0.5f) << 5) |
70 ((uint16_t)(31 * r + 0.5f) << 11);
71 break;
72 case FORMAT_X8B8G8R8:
73 if((rgbaMask & 0x7) != 0x7) return false;
74 packed = ((uint32_t)(255) << 24) |
75 ((uint32_t)(255 * b + 0.5f) << 16) |
76 ((uint32_t)(255 * g + 0.5f) << 8) |
77 ((uint32_t)(255 * r + 0.5f) << 0);
78 break;
79 case FORMAT_A8B8G8R8:
80 if((rgbaMask & 0xF) != 0xF) return false;
81 packed = ((uint32_t)(255 * a + 0.5f) << 24) |
82 ((uint32_t)(255 * b + 0.5f) << 16) |
83 ((uint32_t)(255 * g + 0.5f) << 8) |
84 ((uint32_t)(255 * r + 0.5f) << 0);
85 break;
86 case FORMAT_X8R8G8B8:
87 if((rgbaMask & 0x7) != 0x7) return false;
88 packed = ((uint32_t)(255) << 24) |
89 ((uint32_t)(255 * r + 0.5f) << 16) |
90 ((uint32_t)(255 * g + 0.5f) << 8) |
91 ((uint32_t)(255 * b + 0.5f) << 0);
92 break;
93 case FORMAT_A8R8G8B8:
94 if((rgbaMask & 0xF) != 0xF) return false;
95 packed = ((uint32_t)(255 * a + 0.5f) << 24) |
96 ((uint32_t)(255 * r + 0.5f) << 16) |
97 ((uint32_t)(255 * g + 0.5f) << 8) |
98 ((uint32_t)(255 * b + 0.5f) << 0);
99 break;
100 default:
101 return false;
102 }
103
104 bool useDestInternal = !dest->isExternalDirty();
105 uint8_t *slice = (uint8_t*)dest->lock(dRect.x0, dRect.y0, dRect.slice, sw::LOCK_WRITEONLY, sw::PUBLIC, useDestInternal);
106
107 for(int j = 0; j < dest->getSamples(); j++)
108 {
109 uint8_t *d = slice;
110
111 switch(Surface::bytes(dest->getFormat()))
112 {
113 case 2:
114 for(int i = dRect.y0; i < dRect.y1; i++)
115 {
116 sw::clear((uint16_t*)d, packed, dRect.x1 - dRect.x0);
117 d += dest->getPitchB(useDestInternal);
118 }
119 break;
120 case 4:
121 for(int i = dRect.y0; i < dRect.y1; i++)
122 {
123 sw::clear((uint32_t*)d, packed, dRect.x1 - dRect.x0);
124 d += dest->getPitchB(useDestInternal);
125 }
126 break;
127 default:
128 assert(false);
129 }
130
131 slice += dest->getSliceB(useDestInternal);
132 }
133
134 dest->unlock(useDestInternal);
135
136 return true;
137 }
138
139 void Blitter::blit(Surface *source, const SliceRectF &sourceRect, Surface *dest, const SliceRect &destRect, const Blitter::Options& options)
140 {
141 if(dest->getInternalFormat() == FORMAT_NULL)
142 {
143 return;
144 }
145
146 if(blitReactor(source, sourceRect, dest, destRect, options))
147 {
148 return;
149 }
150
151 SliceRectF sRect = sourceRect;
152 SliceRect dRect = destRect;
153
154 bool flipX = destRect.x0 > destRect.x1;
155 bool flipY = destRect.y0 > destRect.y1;
156
157 if(flipX)
158 {
159 swap(dRect.x0, dRect.x1);
160 swap(sRect.x0, sRect.x1);
161 }
162 if(flipY)
163 {
164 swap(dRect.y0, dRect.y1);
165 swap(sRect.y0, sRect.y1);
166 }
167
168 source->lockInternal(0, 0, sRect.slice, sw::LOCK_READONLY, sw::PUBLIC);
169 dest->lockInternal(0, 0, dRect.slice, sw::LOCK_WRITEONLY, sw::PUBLIC);
170
171 float w = sRect.width() / dRect.width();
172 float h = sRect.height() / dRect.height();
173
174 float xStart = sRect.x0 + (0.5f - dRect.x0) * w;
175 float yStart = sRect.y0 + (0.5f - dRect.y0) * h;
176
177 for(int j = dRect.y0; j < dRect.y1; j++)
178 {
179 float y = yStart + j * h;
180
181 for(int i = dRect.x0; i < dRect.x1; i++)
182 {
183 float x = xStart + i * w;
184
185 // FIXME: Support RGBA mask
186 dest->copyInternal(source, i, j, x, y, options.filter);
187 }
188 }
189
190 source->unlockInternal();
191 dest->unlockInternal();
192 }
193
194 void Blitter::blit3D(Surface *source, Surface *dest)
195 {
196 source->lockInternal(0, 0, 0, sw::LOCK_READONLY, sw::PUBLIC);
197 dest->lockInternal(0, 0, 0, sw::LOCK_WRITEONLY, sw::PUBLIC);
198
199 float w = static_cast<float>(source->getWidth()) / static_cast<float>(dest->getWidth());
200 float h = static_cast<float>(source->getHeight()) / static_cast<float>(dest->getHeight());
201 float d = static_cast<float>(source->getDepth()) / static_cast<float>(dest->getDepth());
202
203 for(int k = 0; k < dest->getDepth(); k++)
204 {
205 float z = (k + 0.5f) * d;
206
207 for(int j = 0; j < dest->getHeight(); j++)
208 {
209 float y = (j + 0.5f) * h;
210
211 for(int i = 0; i < dest->getWidth(); i++)
212 {
213 float x = (i + 0.5f) * w;
214
215 dest->copyInternal(source, i, j, k, x, y, z, true);
216 }
217 }
218 }
219
220 source->unlockInternal();
221 dest->unlockInternal();
222 }
223
224 bool Blitter::read(Float4 &c, Pointer<Byte> element, const State &state)
225 {
226 c = Float4(0.0f, 0.0f, 0.0f, 1.0f);
227
228 switch(state.sourceFormat)
229 {
230 case FORMAT_L8:
231 c.xyz = Float(Int(*Pointer<Byte>(element)));
232 c.w = float(0xFF);
233 break;
234 case FORMAT_A8:
235 c.w = Float(Int(*Pointer<Byte>(element)));
236 break;
237 case FORMAT_R8I:
238 case FORMAT_R8_SNORM:
239 c.x = Float(Int(*Pointer<SByte>(element)));
240 c.w = float(0x7F);
241 break;
242 case FORMAT_R8:
243 case FORMAT_R8UI:
244 c.x = Float(Int(*Pointer<Byte>(element)));
245 c.w = float(0xFF);
246 break;
247 case FORMAT_R16I:
248 c.x = Float(Int(*Pointer<Short>(element)));
249 c.w = float(0x7FFF);
250 break;
251 case FORMAT_R16UI:
252 c.x = Float(Int(*Pointer<UShort>(element)));
253 c.w = float(0xFFFF);
254 break;
255 case FORMAT_R32I:
256 c.x = Float(*Pointer<Int>(element));
257 c.w = float(0x7FFFFFFF);
258 break;
259 case FORMAT_R32UI:
260 c.x = Float(*Pointer<UInt>(element));
261 c.w = float(0xFFFFFFFF);
262 break;
263 case FORMAT_A8R8G8B8:
264 c = Float4(*Pointer<Byte4>(element)).zyxw;
265 break;
266 case FORMAT_A8B8G8R8I:
267 case FORMAT_A8B8G8R8_SNORM:
268 c = Float4(*Pointer<SByte4>(element));
269 break;
270 case FORMAT_A8B8G8R8:
271 case FORMAT_A8B8G8R8UI:
272 case FORMAT_SRGB8_A8:
273 c = Float4(*Pointer<Byte4>(element));
274 break;
275 case FORMAT_X8R8G8B8:
276 c = Float4(*Pointer<Byte4>(element)).zyxw;
277 c.w = float(0xFF);
278 break;
279 case FORMAT_R8G8B8:
280 c.z = Float(Int(*Pointer<Byte>(element + 0)));
281 c.y = Float(Int(*Pointer<Byte>(element + 1)));
282 c.x = Float(Int(*Pointer<Byte>(element + 2)));
283 c.w = float(0xFF);
284 break;
285 case FORMAT_B8G8R8:
286 c.x = Float(Int(*Pointer<Byte>(element + 0)));
287 c.y = Float(Int(*Pointer<Byte>(element + 1)));
288 c.z = Float(Int(*Pointer<Byte>(element + 2)));
289 c.w = float(0xFF);
290 break;
291 case FORMAT_X8B8G8R8I:
292 case FORMAT_X8B8G8R8_SNORM:
293 c = Float4(*Pointer<SByte4>(element));
294 c.w = float(0x7F);
295 break;
296 case FORMAT_X8B8G8R8:
297 case FORMAT_X8B8G8R8UI:
298 case FORMAT_SRGB8_X8:
299 c = Float4(*Pointer<Byte4>(element));
300 c.w = float(0xFF);
301 break;
302 case FORMAT_A16B16G16R16I:
303 c = Float4(*Pointer<Short4>(element));
304 break;
305 case FORMAT_A16B16G16R16:
306 case FORMAT_A16B16G16R16UI:
307 c = Float4(*Pointer<UShort4>(element));
308 break;
309 case FORMAT_X16B16G16R16I:
310 c = Float4(*Pointer<Short4>(element));
311 c.w = float(0x7FFF);
312 break;
313 case FORMAT_X16B16G16R16UI:
314 c = Float4(*Pointer<UShort4>(element));
315 c.w = float(0xFFFF);
316 break;
317 case FORMAT_A32B32G32R32I:
318 c = Float4(*Pointer<Int4>(element));
319 break;
320 case FORMAT_A32B32G32R32UI:
321 c = Float4(*Pointer<UInt4>(element));
322 break;
323 case FORMAT_X32B32G32R32I:
324 c = Float4(*Pointer<Int4>(element));
325 c.w = float(0x7FFFFFFF);
326 break;
327 case FORMAT_X32B32G32R32UI:
328 c = Float4(*Pointer<UInt4>(element));
329 c.w = float(0xFFFFFFFF);
330 break;
331 case FORMAT_G8R8I:
332 case FORMAT_G8R8_SNORM:
333 c.x = Float(Int(*Pointer<SByte>(element + 0)));
334 c.y = Float(Int(*Pointer<SByte>(element + 1)));
335 c.w = float(0x7F);
336 break;
337 case FORMAT_G8R8:
338 case FORMAT_G8R8UI:
339 c.x = Float(Int(*Pointer<Byte>(element + 0)));
340 c.y = Float(Int(*Pointer<Byte>(element + 1)));
341 c.w = float(0xFF);
342 break;
343 case FORMAT_G16R16I:
344 c.x = Float(Int(*Pointer<Short>(element + 0)));
345 c.y = Float(Int(*Pointer<Short>(element + 2)));
346 c.w = float(0x7FFF);
347 break;
348 case FORMAT_G16R16:
349 case FORMAT_G16R16UI:
350 c.x = Float(Int(*Pointer<UShort>(element + 0)));
351 c.y = Float(Int(*Pointer<UShort>(element + 2)));
352 c.w = float(0xFFFF);
353 break;
354 case FORMAT_G32R32I:
355 c.x = Float(*Pointer<Int>(element + 0));
356 c.y = Float(*Pointer<Int>(element + 4));
357 c.w = float(0x7FFFFFFF);
358 break;
359 case FORMAT_G32R32UI:
360 c.x = Float(*Pointer<UInt>(element + 0));
361 c.y = Float(*Pointer<UInt>(element + 4));
362 c.w = float(0xFFFFFFFF);
363 break;
364 case FORMAT_A32B32G32R32F:
365 c = *Pointer<Float4>(element);
366 break;
367 case FORMAT_X32B32G32R32F:
368 case FORMAT_X32B32G32R32F_UNSIGNED:
369 case FORMAT_B32G32R32F:
370 c.z = *Pointer<Float>(element + 8);
371 case FORMAT_G32R32F:
372 c.x = *Pointer<Float>(element + 0);
373 c.y = *Pointer<Float>(element + 4);
374 break;
375 case FORMAT_R32F:
376 c.x = *Pointer<Float>(element);
377 break;
378 case FORMAT_R5G6B5:
379 c.x = Float(Int((*Pointer<UShort>(element) & UShort(0xF800)) >> UShort(11)));
380 c.y = Float(Int((*Pointer<UShort>(element) & UShort(0x07E0)) >> UShort(5)));
381 c.z = Float(Int(*Pointer<UShort>(element) & UShort(0x001F)));
382 break;
383 case FORMAT_A2B10G10R10:
384 case FORMAT_A2B10G10R10UI:
385 c.x = Float(Int((*Pointer<UInt>(element) & UInt(0x000003FF))));
386 c.y = Float(Int((*Pointer<UInt>(element) & UInt(0x000FFC00)) >> 10));
387 c.z = Float(Int((*Pointer<UInt>(element) & UInt(0x3FF00000)) >> 20));
388 c.w = Float(Int((*Pointer<UInt>(element) & UInt(0xC0000000)) >> 30));
389 break;
390 case FORMAT_D16:
391 c.x = Float(Int((*Pointer<UShort>(element))));
392 break;
393 case FORMAT_D24S8:
394 case FORMAT_D24X8:
395 c.x = Float(Int((*Pointer<UInt>(element) & UInt(0xFFFFFF00)) >> 8));
396 break;
397 case FORMAT_D32:
398 c.x = Float(Int((*Pointer<UInt>(element))));
399 break;
400 case FORMAT_D32F_COMPLEMENTARY:
401 case FORMAT_D32FS8_COMPLEMENTARY:
402 c.x = 1.0f - *Pointer<Float>(element);
403 break;
404 case FORMAT_D32F:
405 case FORMAT_D32FS8:
406 case FORMAT_D32F_LOCKABLE:
407 case FORMAT_D32FS8_TEXTURE:
408 case FORMAT_D32F_SHADOW:
409 case FORMAT_D32FS8_SHADOW:
410 c.x = *Pointer<Float>(element);
411 break;
412 case FORMAT_S8:
413 c.x = Float(Int(*Pointer<Byte>(element)));
414 break;
415 default:
416 return false;
417 }
418
419 return true;
420 }
421
422 bool Blitter::write(Float4 &c, Pointer<Byte> element, const State &state)
423 {
424 bool writeR = state.writeRed;
425 bool writeG = state.writeGreen;
426 bool writeB = state.writeBlue;
427 bool writeA = state.writeAlpha;
428 bool writeRGBA = writeR && writeG && writeB && writeA;
429
430 switch(state.destFormat)
431 {
432 case FORMAT_L8:
433 *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x)));
434 break;
435 case FORMAT_A8:
436 if(writeA) { *Pointer<Byte>(element) = Byte(RoundInt(Float(c.w))); }
437 break;
438 case FORMAT_A8R8G8B8:
439 if(writeRGBA)
440 {
441 Short4 c0 = RoundShort4(c.zyxw);
442 *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0));
443 }
444 else
445 {
446 if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
447 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
448 if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
449 if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
450 }
451 break;
452 case FORMAT_A8B8G8R8:
453 case FORMAT_SRGB8_A8:
454 if(writeRGBA)
455 {
456 Short4 c0 = RoundShort4(c);
457 *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0));
458 }
459 else
460 {
461 if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); }
462 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
463 if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
464 if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
465 }
466 break;
467 case FORMAT_X8R8G8B8:
468 if(writeRGBA)
469 {
470 Short4 c0 = RoundShort4(c.zyxw) | Short4(0x0000, 0x0000, 0x0000, 0x00FF);
471 *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0));
472 }
473 else
474 {
475 if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
476 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
477 if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
478 if(writeA) { *Pointer<Byte>(element + 3) = Byte(0xFF); }
479 }
480 break;
481 case FORMAT_X8B8G8R8:
482 case FORMAT_SRGB8_X8:
483 if(writeRGBA)
484 {
485 Short4 c0 = RoundShort4(c) | Short4(0x0000, 0x0000, 0x0000, 0x00FF);
486 *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0));
487 }
488 else
489 {
490 if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); }
491 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
492 if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
493 if(writeA) { *Pointer<Byte>(element + 3) = Byte(0xFF); }
494 }
495 break;
496 case FORMAT_R8G8B8:
497 if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
498 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
499 if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
500 break;
501 case FORMAT_B8G8R8:
502 if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); }
503 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
504 if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
505 break;
506 case FORMAT_A32B32G32R32F:
507 if(writeRGBA)
508 {
509 *Pointer<Float4>(element) = c;
510 }
511 else
512 {
513 if(writeR) { *Pointer<Float>(element) = c.x; }
514 if(writeG) { *Pointer<Float>(element + 4) = c.y; }
515 if(writeB) { *Pointer<Float>(element + 8) = c.z; }
516 if(writeA) { *Pointer<Float>(element + 12) = c.w; }
517 }
518 break;
519 case FORMAT_X32B32G32R32F:
520 case FORMAT_X32B32G32R32F_UNSIGNED:
521 if(writeA) { *Pointer<Float>(element + 12) = 1.0f; }
522 case FORMAT_B32G32R32F:
523 if(writeR) { *Pointer<Float>(element) = c.x; }
524 if(writeG) { *Pointer<Float>(element + 4) = c.y; }
525 if(writeB) { *Pointer<Float>(element + 8) = c.z; }
526 break;
527 case FORMAT_G32R32F:
528 if(writeR && writeG)
529 {
530 *Pointer<Float2>(element) = Float2(c);
531 }
532 else
533 {
534 if(writeR) { *Pointer<Float>(element) = c.x; }
535 if(writeG) { *Pointer<Float>(element + 4) = c.y; }
536 }
537 break;
538 case FORMAT_R32F:
539 if(writeR) { *Pointer<Float>(element) = c.x; }
540 break;
541 case FORMAT_A8B8G8R8I:
542 case FORMAT_A8B8G8R8_SNORM:
543 if(writeA) { *Pointer<SByte>(element + 3) = SByte(RoundInt(Float(c.w))); }
544 case FORMAT_X8B8G8R8I:
545 case FORMAT_X8B8G8R8_SNORM:
546 if(writeA && (state.destFormat == FORMAT_X8B8G8R8I || state.destFormat == FORMAT_X8B8G8R8_SNORM))
547 {
548 *Pointer<SByte>(element + 3) = SByte(0x7F);
549 }
550 if(writeB) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.z))); }
551 case FORMAT_G8R8I:
552 case FORMAT_G8R8_SNORM:
553 if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); }
554 case FORMAT_R8I:
555 case FORMAT_R8_SNORM:
556 if(writeR) { *Pointer<SByte>(element) = SByte(RoundInt(Float(c.x))); }
557 break;
558 case FORMAT_A8B8G8R8UI:
559 if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
560 case FORMAT_X8B8G8R8UI:
561 if(writeA && (state.destFormat == FORMAT_X8B8G8R8UI))
562 {
563 *Pointer<Byte>(element + 3) = Byte(0xFF);
564 }
565 if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
566 case FORMAT_G8R8UI:
567 case FORMAT_G8R8:
568 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
569 case FORMAT_R8UI:
570 case FORMAT_R8:
571 if(writeR) { *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x))); }
572 break;
573 case FORMAT_A16B16G16R16I:
574 if(writeRGBA)
575 {
576 *Pointer<Short4>(element) = Short4(RoundInt(c));
577 }
578 else
579 {
580 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
581 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
582 if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); }
583 if(writeA) { *Pointer<Short>(element + 6) = Short(RoundInt(Float(c.w))); }
584 }
585 break;
586 case FORMAT_X16B16G16R16I:
587 if(writeRGBA)
588 {
589 *Pointer<Short4>(element) = Short4(RoundInt(c));
590 }
591 else
592 {
593 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
594 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
595 if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); }
596 }
597 if(writeA) { *Pointer<Short>(element + 6) = Short(0x7F); }
598 break;
599 case FORMAT_G16R16I:
600 if(writeR && writeG)
601 {
602 *Pointer<Short2>(element) = Short2(Short4(RoundInt(c)));
603 }
604 else
605 {
606 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
607 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
608 }
609 break;
610 case FORMAT_R16I:
611 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
612 break;
613 case FORMAT_A16B16G16R16UI:
614 case FORMAT_A16B16G16R16:
615 if(writeRGBA)
616 {
617 *Pointer<UShort4>(element) = UShort4(RoundInt(c));
618 }
619 else
620 {
621 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
622 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
623 if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); }
624 if(writeA) { *Pointer<UShort>(element + 6) = UShort(RoundInt(Float(c.w))); }
625 }
626 break;
627 case FORMAT_X16B16G16R16UI:
628 if(writeRGBA)
629 {
630 *Pointer<UShort4>(element) = UShort4(RoundInt(c));
631 }
632 else
633 {
634 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
635 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
636 if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); }
637 }
638 if(writeA) { *Pointer<UShort>(element + 6) = UShort(0xFF); }
639 break;
640 case FORMAT_G16R16UI:
641 case FORMAT_G16R16:
642 if(writeR && writeG)
643 {
644 *Pointer<UShort2>(element) = UShort2(UShort4(RoundInt(c)));
645 }
646 else
647 {
648 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
649 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
650 }
651 break;
652 case FORMAT_R16UI:
653 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
654 break;
655 case FORMAT_A32B32G32R32I:
656 if(writeRGBA)
657 {
658 *Pointer<Int4>(element) = RoundInt(c);
659 }
660 else
661 {
662 if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
663 if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
664 if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); }
665 if(writeA) { *Pointer<Int>(element + 12) = RoundInt(Float(c.w)); }
666 }
667 break;
668 case FORMAT_X32B32G32R32I:
669 if(writeRGBA)
670 {
671 *Pointer<Int4>(element) = RoundInt(c);
672 }
673 else
674 {
675 if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
676 if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
677 if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); }
678 }
679 if(writeA) { *Pointer<Int>(element + 12) = Int(0x7FFFFFFF); }
680 break;
681 case FORMAT_G32R32I:
682 if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
683 case FORMAT_R32I:
684 if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
685 break;
686 case FORMAT_A32B32G32R32UI:
687 if(writeRGBA)
688 {
689 *Pointer<UInt4>(element) = UInt4(RoundInt(c));
690 }
691 else
692 {
693 if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
694 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
695 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); }
696 if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(RoundInt(Float(c.w))); }
697 }
698 break;
699 case FORMAT_X32B32G32R32UI:
700 if(writeRGBA)
701 {
702 *Pointer<UInt4>(element) = UInt4(RoundInt(c));
703 }
704 else
705 {
706 if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
707 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
708 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); }
709 }
710 if(writeA) { *Pointer<UInt4>(element + 12) = UInt4(0xFFFFFFFF); }
711 break;
712 case FORMAT_G32R32UI:
713 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
714 case FORMAT_R32UI:
715 if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
716 break;
717 case FORMAT_R5G6B5:
718 if(writeR && writeG && writeB)
719 {
720 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.z)) |
721 (RoundInt(Float(c.y)) << Int(5)) |
722 (RoundInt(Float(c.x)) << Int(11)));
723 }
724 else
725 {
726 unsigned short mask = (writeB ? 0x001F : 0x0000) | (writeG ? 0x07E0 : 0x0000) | (writeR ? 0xF800 : 0x0000);
727 unsigned short unmask = ~mask;
728 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
729 (UShort(RoundInt(Float(c.z)) |
730 (RoundInt(Float(c.y)) << Int(5)) |
731 (RoundInt(Float(c.x)) << Int(11))) & UShort(mask));
732 }
733 break;
734 case FORMAT_A2B10G10R10:
735 case FORMAT_A2B10G10R10UI:
736 if(writeRGBA)
737 {
738 *Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)) |
739 (RoundInt(Float(c.y)) << 10) |
740 (RoundInt(Float(c.z)) << 20) |
741 (RoundInt(Float(c.w)) << 30));
742 }
743 else
744 {
745 unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
746 (writeB ? 0x3FF00000 : 0x0000) |
747 (writeG ? 0x000FFC00 : 0x0000) |
748 (writeR ? 0x000003FF : 0x0000);
749 unsigned int unmask = ~mask;
750 *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
751 (UInt(RoundInt(Float(c.x)) |
752 (RoundInt(Float(c.y)) << 10) |
753 (RoundInt(Float(c.z)) << 20) |
754 (RoundInt(Float(c.w)) << 30)) & UInt(mask));
755 }
756 break;
757 case FORMAT_D16:
758 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x)));
759 break;
760 case FORMAT_D24S8:
761 case FORMAT_D24X8:
762 *Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)) << 8);
763 break;
764 case FORMAT_D32:
765 *Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)));
766 break;
767 case FORMAT_D32F_COMPLEMENTARY:
768 case FORMAT_D32FS8_COMPLEMENTARY:
769 *Pointer<Float>(element) = 1.0f - c.x;
770 break;
771 case FORMAT_D32F:
772 case FORMAT_D32FS8:
773 case FORMAT_D32F_LOCKABLE:
774 case FORMAT_D32FS8_TEXTURE:
775 case FORMAT_D32F_SHADOW:
776 case FORMAT_D32FS8_SHADOW:
777 *Pointer<Float>(element) = c.x;
778 break;
779 case FORMAT_S8:
780 *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x)));
781 break;
782 default:
783 return false;
784 }
785 return true;
786 }
787
788 bool Blitter::read(Int4 &c, Pointer<Byte> element, const State &state)
789 {
790 c = Int4(0, 0, 0, 1);
791
792 switch(state.sourceFormat)
793 {
794 case FORMAT_A8B8G8R8I:
795 c = Insert(c, Int(*Pointer<SByte>(element + 3)), 3);
796 case FORMAT_X8B8G8R8I:
797 c = Insert(c, Int(*Pointer<SByte>(element + 2)), 2);
798 case FORMAT_G8R8I:
799 c = Insert(c, Int(*Pointer<SByte>(element + 1)), 1);
800 case FORMAT_R8I:
801 c = Insert(c, Int(*Pointer<SByte>(element)), 0);
802 break;
803 case FORMAT_A8B8G8R8UI:
804 c = Insert(c, Int(*Pointer<Byte>(element + 3)), 3);
805 case FORMAT_X8B8G8R8UI:
806 c = Insert(c, Int(*Pointer<Byte>(element + 2)), 2);
807 case FORMAT_G8R8UI:
808 c = Insert(c, Int(*Pointer<Byte>(element + 1)), 1);
809 case FORMAT_R8UI:
810 c = Insert(c, Int(*Pointer<Byte>(element)), 0);
811 break;
812 case FORMAT_A16B16G16R16I:
813 c = Insert(c, Int(*Pointer<Short>(element + 6)), 3);
814 case FORMAT_X16B16G16R16I:
815 c = Insert(c, Int(*Pointer<Short>(element + 4)), 2);
816 case FORMAT_G16R16I:
817 c = Insert(c, Int(*Pointer<Short>(element + 2)), 1);
818 case FORMAT_R16I:
819 c = Insert(c, Int(*Pointer<Short>(element)), 0);
820 break;
821 case FORMAT_A16B16G16R16UI:
822 c = Insert(c, Int(*Pointer<UShort>(element + 6)), 3);
823 case FORMAT_X16B16G16R16UI:
824 c = Insert(c, Int(*Pointer<UShort>(element + 4)), 2);
825 case FORMAT_G16R16UI:
826 c = Insert(c, Int(*Pointer<UShort>(element + 2)), 1);
827 case FORMAT_R16UI:
828 c = Insert(c, Int(*Pointer<UShort>(element)), 0);
829 break;
830 case FORMAT_A32B32G32R32I:
831 case FORMAT_A32B32G32R32UI:
832 c = *Pointer<Int4>(element);
833 break;
834 case FORMAT_X32B32G32R32I:
835 case FORMAT_X32B32G32R32UI:
836 c = Insert(c, *Pointer<Int>(element + 8), 2);
837 case FORMAT_G32R32I:
838 case FORMAT_G32R32UI:
839 c = Insert(c, *Pointer<Int>(element + 4), 1);
840 case FORMAT_R32I:
841 case FORMAT_R32UI:
842 c = Insert(c, *Pointer<Int>(element), 0);
843 break;
844 default:
845 return false;
846 }
847
848 return true;
849 }
850
851 bool Blitter::write(Int4 &c, Pointer<Byte> element, const State &state)
852 {
853 bool writeR = state.writeRed;
854 bool writeG = state.writeGreen;
855 bool writeB = state.writeBlue;
856 bool writeA = state.writeAlpha;
857 bool writeRGBA = writeR && writeG && writeB && writeA;
858
859 switch(state.destFormat)
860 {
861 case FORMAT_A8B8G8R8I:
862 if(writeA) { *Pointer<SByte>(element + 3) = SByte(Extract(c, 3)); }
863 case FORMAT_X8B8G8R8I:
864 if(writeA && (state.destFormat != FORMAT_A8B8G8R8I))
865 {
866 *Pointer<SByte>(element + 3) = SByte(0x7F);
867 }
868 if(writeB) { *Pointer<SByte>(element + 2) = SByte(Extract(c, 2)); }
869 case FORMAT_G8R8I:
870 if(writeG) { *Pointer<SByte>(element + 1) = SByte(Extract(c, 1)); }
871 case FORMAT_R8I:
872 if(writeR) { *Pointer<SByte>(element) = SByte(Extract(c, 0)); }
873 break;
874 case FORMAT_A8B8G8R8UI:
875 if(writeA) { *Pointer<Byte>(element + 3) = Byte(Extract(c, 3)); }
876 case FORMAT_X8B8G8R8UI:
877 if(writeA && (state.destFormat != FORMAT_A8B8G8R8UI))
878 {
879 *Pointer<Byte>(element + 3) = Byte(0xFF);
880 }
881 if(writeB) { *Pointer<Byte>(element + 2) = Byte(Extract(c, 2)); }
882 case FORMAT_G8R8UI:
883 if(writeG) { *Pointer<Byte>(element + 1) = Byte(Extract(c, 1)); }
884 case FORMAT_R8UI:
885 if(writeR) { *Pointer<Byte>(element) = Byte(Extract(c, 0)); }
886 break;
887 case FORMAT_A16B16G16R16I:
888 if(writeA) { *Pointer<Short>(element + 6) = Short(Extract(c, 3)); }
889 case FORMAT_X16B16G16R16I:
890 if(writeA && (state.destFormat != FORMAT_A16B16G16R16I))
891 {
892 *Pointer<Short>(element + 6) = Short(0x7FFF);
893 }
894 if(writeB) { *Pointer<Short>(element + 4) = Short(Extract(c, 2)); }
895 case FORMAT_G16R16I:
896 if(writeG) { *Pointer<Short>(element + 2) = Short(Extract(c, 1)); }
897 case FORMAT_R16I:
898 if(writeR) { *Pointer<Short>(element) = Short(Extract(c, 0)); }
899 break;
900 case FORMAT_A16B16G16R16UI:
901 if(writeA) { *Pointer<UShort>(element + 6) = UShort(Extract(c, 3)); }
902 case FORMAT_X16B16G16R16UI:
903 if(writeA && (state.destFormat != FORMAT_A16B16G16R16UI))
904 {
905 *Pointer<UShort>(element + 6) = UShort(0xFFFF);
906 }
907 if(writeB) { *Pointer<UShort>(element + 4) = UShort(Extract(c, 2)); }
908 case FORMAT_G16R16UI:
909 if(writeG) { *Pointer<UShort>(element + 2) = UShort(Extract(c, 1)); }
910 case FORMAT_R16UI:
911 if(writeR) { *Pointer<UShort>(element) = UShort(Extract(c, 0)); }
912 break;
913 case FORMAT_A32B32G32R32I:
914 if(writeRGBA)
915 {
916 *Pointer<Int4>(element) = c;
917 }
918 else
919 {
920 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
921 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
922 if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); }
923 if(writeA) { *Pointer<Int>(element + 12) = Extract(c, 3); }
924 }
925 break;
926 case FORMAT_X32B32G32R32I:
927 if(writeRGBA)
928 {
929 *Pointer<Int4>(element) = c;
930 }
931 else
932 {
933 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
934 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
935 if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); }
936 }
937 if(writeA) { *Pointer<Int>(element + 12) = Int(0x7FFFFFFF); }
938 break;
939 case FORMAT_G32R32I:
940 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
941 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
942 break;
943 case FORMAT_R32I:
944 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
945 break;
946 case FORMAT_A32B32G32R32UI:
947 if(writeRGBA)
948 {
949 *Pointer<UInt4>(element) = As<UInt4>(c);
950 }
951 else
952 {
953 if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
954 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
955 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); }
956 if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(Extract(c, 3)); }
957 }
958 break;
959 case FORMAT_X32B32G32R32UI:
960 if(writeRGBA)
961 {
962 *Pointer<UInt4>(element) = As<UInt4>(c);
963 }
964 else
965 {
966 if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
967 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
968 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); }
969 }
970 if(writeA) { *Pointer<UInt>(element + 3) = UInt(0xFFFFFFFF); }
971 break;
972 case FORMAT_G32R32UI:
973 if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
974 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
975 break;
976 case FORMAT_R32UI:
977 if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
978 break;
979 default:
980 return false;
981 }
982
983 return true;
984 }
985
986 bool Blitter::GetScale(float4 &scale, Format format)
987 {
988 switch(format)
989 {
990 case FORMAT_L8:
991 case FORMAT_A8:
992 case FORMAT_A8R8G8B8:
993 case FORMAT_X8R8G8B8:
994 case FORMAT_R8:
995 case FORMAT_G8R8:
996 case FORMAT_R8G8B8:
997 case FORMAT_B8G8R8:
998 case FORMAT_X8B8G8R8:
999 case FORMAT_A8B8G8R8:
1000 case FORMAT_SRGB8_X8:
1001 case FORMAT_SRGB8_A8:
1002 scale = vector(0xFF, 0xFF, 0xFF, 0xFF);
1003 break;
1004 case FORMAT_R8_SNORM:
1005 case FORMAT_G8R8_SNORM:
1006 case FORMAT_X8B8G8R8_SNORM:
1007 case FORMAT_A8B8G8R8_SNORM:
1008 scale = vector(0x7F, 0x7F, 0x7F, 0x7F);
1009 break;
1010 case FORMAT_A16B16G16R16:
1011 scale = vector(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF);
1012 break;
1013 case FORMAT_R8I:
1014 case FORMAT_R8UI:
1015 case FORMAT_G8R8I:
1016 case FORMAT_G8R8UI:
1017 case FORMAT_X8B8G8R8I:
1018 case FORMAT_X8B8G8R8UI:
1019 case FORMAT_A8B8G8R8I:
1020 case FORMAT_A8B8G8R8UI:
1021 case FORMAT_R16I:
1022 case FORMAT_R16UI:
1023 case FORMAT_G16R16:
1024 case FORMAT_G16R16I:
1025 case FORMAT_G16R16UI:
1026 case FORMAT_X16B16G16R16I:
1027 case FORMAT_X16B16G16R16UI:
1028 case FORMAT_A16B16G16R16I:
1029 case FORMAT_A16B16G16R16UI:
1030 case FORMAT_R32I:
1031 case FORMAT_R32UI:
1032 case FORMAT_G32R32I:
1033 case FORMAT_G32R32UI:
1034 case FORMAT_X32B32G32R32I:
1035 case FORMAT_X32B32G32R32UI:
1036 case FORMAT_A32B32G32R32I:
1037 case FORMAT_A32B32G32R32UI:
1038 case FORMAT_A32B32G32R32F:
1039 case FORMAT_X32B32G32R32F:
1040 case FORMAT_X32B32G32R32F_UNSIGNED:
1041 case FORMAT_B32G32R32F:
1042 case FORMAT_G32R32F:
1043 case FORMAT_R32F:
1044 case FORMAT_A2B10G10R10UI:
1045 scale = vector(1.0f, 1.0f, 1.0f, 1.0f);
1046 break;
1047 case FORMAT_R5G6B5:
1048 scale = vector(0x1F, 0x3F, 0x1F, 1.0f);
1049 break;
1050 case FORMAT_A2B10G10R10:
1051 scale = vector(0x3FF, 0x3FF, 0x3FF, 0x03);
1052 break;
1053 case FORMAT_D16:
1054 scale = vector(0xFFFF, 0.0f, 0.0f, 0.0f);
1055 break;
1056 case FORMAT_D24S8:
1057 case FORMAT_D24X8:
1058 scale = vector(0xFFFFFF, 0.0f, 0.0f, 0.0f);
1059 break;
1060 case FORMAT_D32:
1061 scale = vector(static_cast<float>(0xFFFFFFFF), 0.0f, 0.0f, 0.0f);
1062 break;
1063 case FORMAT_D32F:
1064 case FORMAT_D32FS8:
1065 case FORMAT_D32F_COMPLEMENTARY:
1066 case FORMAT_D32FS8_COMPLEMENTARY:
1067 case FORMAT_D32F_LOCKABLE:
1068 case FORMAT_D32FS8_TEXTURE:
1069 case FORMAT_D32F_SHADOW:
1070 case FORMAT_D32FS8_SHADOW:
1071 case FORMAT_S8:
1072 scale = vector(1.0f, 1.0f, 1.0f, 1.0f);
1073 break;
1074 default:
1075 return false;
1076 }
1077
1078 return true;
1079 }
1080
1081 bool Blitter::ApplyScaleAndClamp(Float4 &value, const State &state, bool preScaled)
1082 {
1083 float4 scale, unscale;
1084 if(state.clearOperation &&
1085 Surface::isNonNormalizedInteger(state.sourceFormat) &&
1086 !Surface::isNonNormalizedInteger(state.destFormat))
1087 {
1088 // If we're clearing a buffer from an int or uint color into a normalized color,
1089 // then the whole range of the int or uint color must be scaled between 0 and 1.
1090 switch(state.sourceFormat)
1091 {
1092 case FORMAT_A32B32G32R32I:
1093 unscale = replicate(static_cast<float>(0x7FFFFFFF));
1094 break;
1095 case FORMAT_A32B32G32R32UI:
1096 unscale = replicate(static_cast<float>(0xFFFFFFFF));
1097 break;
1098 default:
1099 return false;
1100 }
1101 }
1102 else if(!GetScale(unscale, state.sourceFormat))
1103 {
1104 return false;
1105 }
1106
1107 if(!GetScale(scale, state.destFormat))
1108 {
1109 return false;
1110 }
1111
1112 bool srcSRGB = Surface::isSRGBformat(state.sourceFormat);
1113 bool dstSRGB = Surface::isSRGBformat(state.destFormat);
1114
1115 if(state.convertSRGB && ((srcSRGB && !preScaled) || dstSRGB)) // One of the formats is sRGB encoded.
1116 {
1117 value *= preScaled ? Float4(1.0f / scale.x, 1.0f / scale.y, 1.0f / scale.z, 1.0f / scale.w) : // Unapply scale
1118 Float4(1.0f / unscale.x, 1.0f / unscale.y, 1.0f / unscale.z, 1.0f / unscale.w); // Apply unscale
1119 value = (srcSRGB && !preScaled) ? sRGBtoLinear(value) : LinearToSRGB(value);
1120 value *= Float4(scale.x, scale.y, scale.z, scale.w); // Apply scale
1121 }
1122 else if(unscale != scale)
1123 {
1124 value *= Float4(scale.x / unscale.x, scale.y / unscale.y, scale.z / unscale.z, scale.w / unscale.w);
1125 }
1126
1127 if(state.destFormat == FORMAT_X32B32G32R32F_UNSIGNED)
1128 {
1129 value = Max(value, Float4(0.0f)); // TODO: Only necessary if source is signed.
1130 }
1131 else if(Surface::isFloatFormat(state.sourceFormat) && !Surface::isFloatFormat(state.destFormat))
1132 {
1133 value = Min(value, Float4(scale.x, scale.y, scale.z, scale.w));
1134
1135 value = Max(value, Float4(Surface::isUnsignedComponent(state.destFormat, 0) ? 0.0f : -scale.x,
1136 Surface::isUnsignedComponent(state.destFormat, 1) ? 0.0f : -scale.y,
1137 Surface::isUnsignedComponent(state.destFormat, 2) ? 0.0f : -scale.z,
1138 Surface::isUnsignedComponent(state.destFormat, 3) ? 0.0f : -scale.w));
1139 }
1140
1141 return true;
1142 }
1143
1144 Int Blitter::ComputeOffset(Int &x, Int &y, Int &pitchB, int bytes, bool quadLayout)
1145 {
1146 if(!quadLayout)
1147 {
1148 return y * pitchB + x * bytes;
1149 }
1150 else
1151 {
1152 // (x & ~1) * 2 + (x & 1) == (x - (x & 1)) * 2 + (x & 1) == x * 2 - (x & 1) * 2 + (x & 1) == x * 2 - (x & 1)
1153 return (y & Int(~1)) * pitchB +
1154 ((y & Int(1)) * 2 + x * 2 - (x & Int(1))) * bytes;
1155 }
1156 }
1157
1158 Float4 Blitter::LinearToSRGB(Float4 &c)
1159 {
1160 Float4 lc = Min(c, Float4(0.0031308f)) * Float4(12.92f);
1161 Float4 ec = Float4(1.055f) * power(c, Float4(1.0f / 2.4f)) - Float4(0.055f);
1162
1163 Float4 s = c;
1164 s.xyz = Max(lc, ec);
1165
1166 return s;
1167 }
1168
1169 Float4 Blitter::sRGBtoLinear(Float4 &c)
1170 {
1171 Float4 lc = c * Float4(1.0f / 12.92f);
1172 Float4 ec = power((c + Float4(0.055f)) * Float4(1.0f / 1.055f), Float4(2.4f));
1173
1174 Int4 linear = CmpLT(c, Float4(0.04045f));
1175
1176 Float4 s = c;
1177 s.xyz = As<Float4>((linear & As<Int4>(lc)) | (~linear & As<Int4>(ec))); // TODO: IfThenElse()
1178
1179 return s;
1180 }
1181
1182 std::shared_ptr<Routine> Blitter::generate(const State &state)
1183 {
1184 Function<Void(Pointer<Byte>)> function;
1185 {
1186 Pointer<Byte> blit(function.Arg<0>());
1187
1188 Pointer<Byte> source = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData,source));
1189 Pointer<Byte> dest = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData,dest));
1190 Int sPitchB = *Pointer<Int>(blit + OFFSET(BlitData,sPitchB));
1191 Int dPitchB = *Pointer<Int>(blit + OFFSET(BlitData,dPitchB));
1192
1193 Float x0 = *Pointer<Float>(blit + OFFSET(BlitData,x0));
1194 Float y0 = *Pointer<Float>(blit + OFFSET(BlitData,y0));
1195 Float w = *Pointer<Float>(blit + OFFSET(BlitData,w));
1196 Float h = *Pointer<Float>(blit + OFFSET(BlitData,h));
1197
1198 Int x0d = *Pointer<Int>(blit + OFFSET(BlitData,x0d));
1199 Int x1d = *Pointer<Int>(blit + OFFSET(BlitData,x1d));
1200 Int y0d = *Pointer<Int>(blit + OFFSET(BlitData,y0d));
1201 Int y1d = *Pointer<Int>(blit + OFFSET(BlitData,y1d));
1202
1203 Int sWidth = *Pointer<Int>(blit + OFFSET(BlitData,sWidth));
1204 Int sHeight = *Pointer<Int>(blit + OFFSET(BlitData,sHeight));
1205
1206 bool intSrc = Surface::isNonNormalizedInteger(state.sourceFormat);
1207 bool intDst = Surface::isNonNormalizedInteger(state.destFormat);
1208 bool intBoth = intSrc && intDst;
1209 bool srcQuadLayout = Surface::hasQuadLayout(state.sourceFormat);
1210 bool dstQuadLayout = Surface::hasQuadLayout(state.destFormat);
1211 int srcBytes = Surface::bytes(state.sourceFormat);
1212 int dstBytes = Surface::bytes(state.destFormat);
1213
1214 bool hasConstantColorI = false;
1215 Int4 constantColorI;
1216 bool hasConstantColorF = false;
1217 Float4 constantColorF;
1218 if(state.clearOperation)
1219 {
1220 if(intBoth) // Integer types
1221 {
1222 if(!read(constantColorI, source, state))
1223 {
1224 return nullptr;
1225 }
1226 hasConstantColorI = true;
1227 }
1228 else
1229 {
1230 if(!read(constantColorF, source, state))
1231 {
1232 return nullptr;
1233 }
1234 hasConstantColorF = true;
1235
1236 if(!ApplyScaleAndClamp(constantColorF, state))
1237 {
1238 return nullptr;
1239 }
1240 }
1241 }
1242
1243 For(Int j = y0d, j < y1d, j++)
1244 {
1245 Float y = state.clearOperation ? RValue<Float>(y0) : y0 + Float(j) * h;
1246 Pointer<Byte> destLine = dest + (dstQuadLayout ? j & Int(~1) : RValue<Int>(j)) * dPitchB;
1247
1248 For(Int i = x0d, i < x1d, i++)
1249 {
1250 Float x = state.clearOperation ? RValue<Float>(x0) : x0 + Float(i) * w;
1251 Pointer<Byte> d = destLine + (dstQuadLayout ? (((j & Int(1)) << 1) + (i * 2) - (i & Int(1))) : RValue<Int>(i)) * dstBytes;
1252
1253 if(hasConstantColorI)
1254 {
1255 if(!write(constantColorI, d, state))
1256 {
1257 return nullptr;
1258 }
1259 }
1260 else if(hasConstantColorF)
1261 {
1262 for(int s = 0; s < state.destSamples; s++)
1263 {
1264 if(!write(constantColorF, d, state))
1265 {
1266 return nullptr;
1267 }
1268
1269 d += *Pointer<Int>(blit + OFFSET(BlitData, dSliceB));
1270 }
1271 }
1272 else if(intBoth) // Integer types do not support filtering
1273 {
1274 Int4 color; // When both formats are true integer types, we don't go to float to avoid losing precision
1275 Int X = Int(x);
1276 Int Y = Int(y);
1277
1278 if(state.clampToEdge)
1279 {
1280 X = Clamp(X, 0, sWidth - 1);
1281 Y = Clamp(Y, 0, sHeight - 1);
1282 }
1283
1284 Pointer<Byte> s = source + ComputeOffset(X, Y, sPitchB, srcBytes, srcQuadLayout);
1285
1286 if(!read(color, s, state))
1287 {
1288 return nullptr;
1289 }
1290
1291 if(!write(color, d, state))
1292 {
1293 return nullptr;
1294 }
1295 }
1296 else
1297 {
1298 Float4 color;
1299
1300 bool preScaled = false;
1301 if(!state.filter || intSrc)
1302 {
1303 Int X = Int(x);
1304 Int Y = Int(y);
1305
1306 if(state.clampToEdge)
1307 {
1308 X = Clamp(X, 0, sWidth - 1);
1309 Y = Clamp(Y, 0, sHeight - 1);
1310 }
1311
1312 Pointer<Byte> s = source + ComputeOffset(X, Y, sPitchB, srcBytes, srcQuadLayout);
1313
1314 if(!read(color, s, state))
1315 {
1316 return nullptr;
1317 }
1318 }
1319 else // Bilinear filtering
1320 {
1321 Float X = x;
1322 Float Y = y;
1323
1324 if(state.clampToEdge)
1325 {
1326 X = Min(Max(x, 0.5f), Float(sWidth) - 0.5f);
1327 Y = Min(Max(y, 0.5f), Float(sHeight) - 0.5f);
1328 }
1329
1330 Float x0 = X - 0.5f;
1331 Float y0 = Y - 0.5f;
1332
1333 Int X0 = Max(Int(x0), 0);
1334 Int Y0 = Max(Int(y0), 0);
1335
1336 Int X1 = X0 + 1;
1337 Int Y1 = Y0 + 1;
1338 X1 = IfThenElse(X1 >= sWidth, X0, X1);
1339 Y1 = IfThenElse(Y1 >= sHeight, Y0, Y1);
1340
1341 Pointer<Byte> s00 = source + ComputeOffset(X0, Y0, sPitchB, srcBytes, srcQuadLayout);
1342 Pointer<Byte> s01 = source + ComputeOffset(X1, Y0, sPitchB, srcBytes, srcQuadLayout);
1343 Pointer<Byte> s10 = source + ComputeOffset(X0, Y1, sPitchB, srcBytes, srcQuadLayout);
1344 Pointer<Byte> s11 = source + ComputeOffset(X1, Y1, sPitchB, srcBytes, srcQuadLayout);
1345
1346 Float4 c00; if(!read(c00, s00, state)) return nullptr;
1347 Float4 c01; if(!read(c01, s01, state)) return nullptr;
1348 Float4 c10; if(!read(c10, s10, state)) return nullptr;
1349 Float4 c11; if(!read(c11, s11, state)) return nullptr;
1350
1351 if(state.convertSRGB && Surface::isSRGBformat(state.sourceFormat)) // sRGB -> RGB
1352 {
1353 if(!ApplyScaleAndClamp(c00, state)) return nullptr;
1354 if(!ApplyScaleAndClamp(c01, state)) return nullptr;
1355 if(!ApplyScaleAndClamp(c10, state)) return nullptr;
1356 if(!ApplyScaleAndClamp(c11, state)) return nullptr;
1357 preScaled = true;
1358 }
1359
1360 Float4 fx = Float4(x0 - Float(X0));
1361 Float4 fy = Float4(y0 - Float(Y0));
1362 Float4 ix = Float4(1.0f) - fx;
1363 Float4 iy = Float4(1.0f) - fy;
1364
1365 color = (c00 * ix + c01 * fx) * iy +
1366 (c10 * ix + c11 * fx) * fy;
1367 }
1368
1369 if(!ApplyScaleAndClamp(color, state, preScaled))
1370 {
1371 return nullptr;
1372 }
1373
1374 for(int s = 0; s < state.destSamples; s++)
1375 {
1376 if(!write(color, d, state))
1377 {
1378 return nullptr;
1379 }
1380
1381 d += *Pointer<Int>(blit + OFFSET(BlitData,dSliceB));
1382 }
1383 }
1384 }
1385 }
1386 }
1387
1388 return function("BlitRoutine");
1389 }
1390
1391 bool Blitter::blitReactor(Surface *source, const SliceRectF &sourceRect, Surface *dest, const SliceRect &destRect, const Blitter::Options &options)
1392 {
1393 ASSERT(!options.clearOperation || ((source->getWidth() == 1) && (source->getHeight() == 1) && (source->getDepth() == 1)));
1394
1395 Rect dRect = destRect;
1396 RectF sRect = sourceRect;
1397 if(destRect.x0 > destRect.x1)
1398 {
1399 swap(dRect.x0, dRect.x1);
1400 swap(sRect.x0, sRect.x1);
1401 }
1402 if(destRect.y0 > destRect.y1)
1403 {
1404 swap(dRect.y0, dRect.y1);
1405 swap(sRect.y0, sRect.y1);
1406 }
1407
1408 State state(options);
1409 state.clampToEdge = (sourceRect.x0 < 0.0f) ||
1410 (sourceRect.y0 < 0.0f) ||
1411 (sourceRect.x1 > (float)source->getWidth()) ||
1412 (sourceRect.y1 > (float)source->getHeight());
1413
1414 bool useSourceInternal = !source->isExternalDirty();
1415 bool useDestInternal = !dest->isExternalDirty();
1416 bool isStencil = options.useStencil;
1417
1418 state.sourceFormat = isStencil ? source->getStencilFormat() : source->getFormat(useSourceInternal);
1419 state.destFormat = isStencil ? dest->getStencilFormat() : dest->getFormat(useDestInternal);
1420 state.destSamples = dest->getSamples();
1421
1422 criticalSection.lock();
1423 auto blitRoutine = blitCache->query(state);
1424
1425 if(!blitRoutine)
1426 {
1427 blitRoutine = generate(state);
1428
1429 if(!blitRoutine)
1430 {
1431 criticalSection.unlock();
1432 return false;
1433 }
1434
1435 blitCache->add(state, blitRoutine);
1436 }
1437
1438 criticalSection.unlock();
1439
1440 void (*blitFunction)(const BlitData *data) = (void(*)(const BlitData*))blitRoutine->getEntry();
1441
1442 BlitData data;
1443
1444 bool isRGBA = options.writeMask == 0xF;
1445 bool isEntireDest = dest->isEntire(destRect);
1446
1447 data.source = isStencil ? source->lockStencil(0, 0, 0, sw::PUBLIC) :
1448 source->lock(0, 0, sourceRect.slice, sw::LOCK_READONLY, sw::PUBLIC, useSourceInternal);
1449 data.dest = isStencil ? dest->lockStencil(0, 0, 0, sw::PUBLIC) :
1450 dest->lock(0, 0, destRect.slice, isRGBA ? (isEntireDest ? sw::LOCK_DISCARD : sw::LOCK_WRITEONLY) : sw::LOCK_READWRITE, sw::PUBLIC, useDestInternal);
1451 data.sPitchB = isStencil ? source->getStencilPitchB() : source->getPitchB(useSourceInternal);
1452 data.dPitchB = isStencil ? dest->getStencilPitchB() : dest->getPitchB(useDestInternal);
1453 data.dSliceB = isStencil ? dest->getStencilSliceB() : dest->getSliceB(useDestInternal);
1454
1455 data.w = sRect.width() / dRect.width();
1456 data.h = sRect.height() / dRect.height();
1457 data.x0 = sRect.x0 + (0.5f - dRect.x0) * data.w;
1458 data.y0 = sRect.y0 + (0.5f - dRect.y0) * data.h;
1459
1460 data.x0d = dRect.x0;
1461 data.x1d = dRect.x1;
1462 data.y0d = dRect.y0;
1463 data.y1d = dRect.y1;
1464
1465 data.sWidth = source->getWidth();
1466 data.sHeight = source->getHeight();
1467
1468 blitFunction(&data);
1469
1470 if(isStencil)
1471 {
1472 source->unlockStencil();
1473 dest->unlockStencil();
1474 }
1475 else
1476 {
1477 source->unlock(useSourceInternal);
1478 dest->unlock(useDestInternal);
1479 }
1480
1481 return true;
1482 }
1483}
1484