1// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "Blitter.hpp"
16
17#include "Pipeline/ShaderCore.hpp"
18#include "Reactor/Reactor.hpp"
19#include "System/Half.hpp"
20#include "System/Memory.hpp"
21#include "Vulkan/VkDebug.hpp"
22#include "Vulkan/VkImage.hpp"
23#include "Vulkan/VkBuffer.hpp"
24
25#include <utility>
26
27namespace sw
28{
29 Blitter::Blitter() :
30 blitMutex(),
31 blitCache(1024),
32 cornerUpdateMutex(),
33 cornerUpdateCache(64) // We only need one of these per format
34 {
35 }
36
37 Blitter::~Blitter()
38 {
39 }
40
41 void Blitter::clear(void *pixel, vk::Format format, vk::Image *dest, const vk::Format& viewFormat, const VkImageSubresourceRange& subresourceRange, const VkRect2D* renderArea)
42 {
43 VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(subresourceRange.aspectMask);
44 vk::Format dstFormat = viewFormat.getAspectFormat(aspect);
45 if(dstFormat == VK_FORMAT_UNDEFINED)
46 {
47 return;
48 }
49
50 float *pPixel = static_cast<float *>(pixel);
51 if (viewFormat.isUnsignedNormalized())
52 {
53 pPixel[0] = sw::clamp(pPixel[0], 0.0f, 1.0f);
54 pPixel[1] = sw::clamp(pPixel[1], 0.0f, 1.0f);
55 pPixel[2] = sw::clamp(pPixel[2], 0.0f, 1.0f);
56 pPixel[3] = sw::clamp(pPixel[3], 0.0f, 1.0f);
57 }
58 else if (viewFormat.isSignedNormalized())
59 {
60 pPixel[0] = sw::clamp(pPixel[0], -1.0f, 1.0f);
61 pPixel[1] = sw::clamp(pPixel[1], -1.0f, 1.0f);
62 pPixel[2] = sw::clamp(pPixel[2], -1.0f, 1.0f);
63 pPixel[3] = sw::clamp(pPixel[3], -1.0f, 1.0f);
64 }
65
66 if(fastClear(pixel, format, dest, dstFormat, subresourceRange, renderArea))
67 {
68 return;
69 }
70
71 State state(format, dstFormat, 1, dest->getSampleCountFlagBits(), Options{ 0xF });
72 auto blitRoutine = getBlitRoutine(state);
73 if(!blitRoutine)
74 {
75 return;
76 }
77
78 VkImageSubresourceLayers subresLayers =
79 {
80 subresourceRange.aspectMask,
81 subresourceRange.baseMipLevel,
82 subresourceRange.baseArrayLayer,
83 1
84 };
85
86 uint32_t lastMipLevel = dest->getLastMipLevel(subresourceRange);
87 uint32_t lastLayer = dest->getLastLayerIndex(subresourceRange);
88
89 VkRect2D area = { { 0, 0 }, { 0, 0 } };
90 if(renderArea)
91 {
92 ASSERT(subresourceRange.levelCount == 1);
93 area = *renderArea;
94 }
95
96 for(; subresLayers.mipLevel <= lastMipLevel; subresLayers.mipLevel++)
97 {
98 VkExtent3D extent = dest->getMipLevelExtent(aspect, subresLayers.mipLevel);
99 if(!renderArea)
100 {
101 area.extent.width = extent.width;
102 area.extent.height = extent.height;
103 }
104
105 BlitData data =
106 {
107 pixel, nullptr, // source, dest
108
109 format.bytes(), // sPitchB
110 dest->rowPitchBytes(aspect, subresLayers.mipLevel), // dPitchB
111 0, // sSliceB (unused in clear operations)
112 dest->slicePitchBytes(aspect, subresLayers.mipLevel), // dSliceB
113
114 0.5f, 0.5f, 0.0f, 0.0f, // x0, y0, w, h
115
116 area.offset.y, static_cast<int>(area.offset.y + area.extent.height), // y0d, y1d
117 area.offset.x, static_cast<int>(area.offset.x + area.extent.width), // x0d, x1d
118
119 0, 0, // sWidth, sHeight
120 };
121
122 if (renderArea && dest->is3DSlice())
123 {
124 // Reinterpret layers as depth slices
125 subresLayers.baseArrayLayer = 0;
126 subresLayers.layerCount = 1;
127 for (uint32_t depth = subresourceRange.baseArrayLayer; depth <= lastLayer; depth++)
128 {
129 data.dest = dest->getTexelPointer({0, 0, static_cast<int32_t>(depth)}, subresLayers);
130 blitRoutine(&data);
131 }
132 }
133 else
134 {
135 for(subresLayers.baseArrayLayer = subresourceRange.baseArrayLayer; subresLayers.baseArrayLayer <= lastLayer; subresLayers.baseArrayLayer++)
136 {
137 for(uint32_t depth = 0; depth < extent.depth; depth++)
138 {
139 data.dest = dest->getTexelPointer({ 0, 0, static_cast<int32_t>(depth) }, subresLayers);
140
141 blitRoutine(&data);
142 }
143 }
144 }
145 }
146 }
147
148 bool Blitter::fastClear(void *pixel, vk::Format format, vk::Image *dest, const vk::Format& viewFormat, const VkImageSubresourceRange& subresourceRange, const VkRect2D* renderArea)
149 {
150 if(format != VK_FORMAT_R32G32B32A32_SFLOAT)
151 {
152 return false;
153 }
154
155 float *color = (float*)pixel;
156 float r = color[0];
157 float g = color[1];
158 float b = color[2];
159 float a = color[3];
160
161 uint32_t packed;
162
163 VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(subresourceRange.aspectMask);
164 switch(viewFormat)
165 {
166 case VK_FORMAT_R5G6B5_UNORM_PACK16:
167 packed = ((uint16_t)(31 * b + 0.5f) << 0) |
168 ((uint16_t)(63 * g + 0.5f) << 5) |
169 ((uint16_t)(31 * r + 0.5f) << 11);
170 break;
171 case VK_FORMAT_B5G6R5_UNORM_PACK16:
172 packed = ((uint16_t)(31 * r + 0.5f) << 0) |
173 ((uint16_t)(63 * g + 0.5f) << 5) |
174 ((uint16_t)(31 * b + 0.5f) << 11);
175 break;
176 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
177 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
178 case VK_FORMAT_R8G8B8A8_UNORM:
179 packed = ((uint32_t)(255 * a + 0.5f) << 24) |
180 ((uint32_t)(255 * b + 0.5f) << 16) |
181 ((uint32_t)(255 * g + 0.5f) << 8) |
182 ((uint32_t)(255 * r + 0.5f) << 0);
183 break;
184 case VK_FORMAT_B8G8R8A8_UNORM:
185 packed = ((uint32_t)(255 * a + 0.5f) << 24) |
186 ((uint32_t)(255 * r + 0.5f) << 16) |
187 ((uint32_t)(255 * g + 0.5f) << 8) |
188 ((uint32_t)(255 * b + 0.5f) << 0);
189 break;
190 case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
191 packed = R11G11B10F(color);
192 break;
193 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
194 packed = RGB9E5(color);
195 break;
196 default:
197 return false;
198 }
199
200 VkImageSubresourceLayers subresLayers =
201 {
202 subresourceRange.aspectMask,
203 subresourceRange.baseMipLevel,
204 subresourceRange.baseArrayLayer,
205 1
206 };
207 uint32_t lastMipLevel = dest->getLastMipLevel(subresourceRange);
208 uint32_t lastLayer = dest->getLastLayerIndex(subresourceRange);
209
210 VkRect2D area = { { 0, 0 }, { 0, 0 } };
211 if(renderArea)
212 {
213 ASSERT(subresourceRange.levelCount == 1);
214 area = *renderArea;
215 }
216
217 for(; subresLayers.mipLevel <= lastMipLevel; subresLayers.mipLevel++)
218 {
219 int rowPitchBytes = dest->rowPitchBytes(aspect, subresLayers.mipLevel);
220 int slicePitchBytes = dest->slicePitchBytes(aspect, subresLayers.mipLevel);
221 VkExtent3D extent = dest->getMipLevelExtent(aspect, subresLayers.mipLevel);
222 if(!renderArea)
223 {
224 area.extent.width = extent.width;
225 area.extent.height = extent.height;
226 }
227 if(dest->is3DSlice())
228 {
229 extent.depth = 1; // The 3D image is instead interpreted as a 2D image with layers
230 }
231
232 for(subresLayers.baseArrayLayer = subresourceRange.baseArrayLayer; subresLayers.baseArrayLayer <= lastLayer; subresLayers.baseArrayLayer++)
233 {
234 for(uint32_t depth = 0; depth < extent.depth; depth++)
235 {
236 uint8_t *slice = (uint8_t*)dest->getTexelPointer(
237 { area.offset.x, area.offset.y, static_cast<int32_t>(depth) }, subresLayers);
238
239 for(int j = 0; j < dest->getSampleCountFlagBits(); j++)
240 {
241 uint8_t *d = slice;
242
243 switch(viewFormat.bytes())
244 {
245 case 2:
246 for(uint32_t i = 0; i < area.extent.height; i++)
247 {
248 ASSERT(d < dest->end());
249 sw::clear((uint16_t*)d, static_cast<uint16_t>(packed), area.extent.width);
250 d += rowPitchBytes;
251 }
252 break;
253 case 4:
254 for(uint32_t i = 0; i < area.extent.height; i++)
255 {
256 ASSERT(d < dest->end());
257 sw::clear((uint32_t*)d, packed, area.extent.width);
258 d += rowPitchBytes;
259 }
260 break;
261 default:
262 assert(false);
263 }
264
265 slice += slicePitchBytes;
266 }
267 }
268 }
269 }
270
271 return true;
272 }
273
274 Float4 Blitter::readFloat4(Pointer<Byte> element, const State &state)
275 {
276 Float4 c(0.0f, 0.0f, 0.0f, 1.0f);
277
278 switch(state.sourceFormat)
279 {
280 case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
281 c.w = Float(Int(*Pointer<Byte>(element)) & Int(0xF));
282 c.x = Float((Int(*Pointer<Byte>(element)) >> 4) & Int(0xF));
283 c.y = Float(Int(*Pointer<Byte>(element + 1)) & Int(0xF));
284 c.z = Float((Int(*Pointer<Byte>(element + 1)) >> 4) & Int(0xF));
285 break;
286 case VK_FORMAT_R8_SINT:
287 case VK_FORMAT_R8_SNORM:
288 c.x = Float(Int(*Pointer<SByte>(element)));
289 c.w = float(0x7F);
290 break;
291 case VK_FORMAT_R8_UNORM:
292 case VK_FORMAT_R8_UINT:
293 case VK_FORMAT_R8_SRGB:
294 c.x = Float(Int(*Pointer<Byte>(element)));
295 c.w = float(0xFF);
296 break;
297 case VK_FORMAT_R16_SINT:
298 case VK_FORMAT_R16_SNORM:
299 c.x = Float(Int(*Pointer<Short>(element)));
300 c.w = float(0x7FFF);
301 break;
302 case VK_FORMAT_R16_UNORM:
303 case VK_FORMAT_R16_UINT:
304 c.x = Float(Int(*Pointer<UShort>(element)));
305 c.w = float(0xFFFF);
306 break;
307 case VK_FORMAT_R32_SINT:
308 c.x = Float(*Pointer<Int>(element));
309 c.w = float(0x7FFFFFFF);
310 break;
311 case VK_FORMAT_R32_UINT:
312 c.x = Float(*Pointer<UInt>(element));
313 c.w = float(0xFFFFFFFF);
314 break;
315 case VK_FORMAT_B8G8R8A8_SRGB:
316 case VK_FORMAT_B8G8R8A8_UNORM:
317 c = Float4(*Pointer<Byte4>(element)).zyxw;
318 break;
319 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
320 case VK_FORMAT_R8G8B8A8_SINT:
321 case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
322 case VK_FORMAT_R8G8B8A8_SNORM:
323 c = Float4(*Pointer<SByte4>(element));
324 break;
325 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
326 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
327 case VK_FORMAT_R8G8B8A8_UNORM:
328 case VK_FORMAT_R8G8B8A8_UINT:
329 case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
330 case VK_FORMAT_R8G8B8A8_SRGB:
331 c = Float4(*Pointer<Byte4>(element));
332 break;
333 case VK_FORMAT_R16G16B16A16_SINT:
334 c = Float4(*Pointer<Short4>(element));
335 break;
336 case VK_FORMAT_R16G16B16A16_UNORM:
337 case VK_FORMAT_R16G16B16A16_UINT:
338 c = Float4(*Pointer<UShort4>(element));
339 break;
340 case VK_FORMAT_R32G32B32A32_SINT:
341 c = Float4(*Pointer<Int4>(element));
342 break;
343 case VK_FORMAT_R32G32B32A32_UINT:
344 c = Float4(*Pointer<UInt4>(element));
345 break;
346 case VK_FORMAT_R8G8_SINT:
347 case VK_FORMAT_R8G8_SNORM:
348 c.x = Float(Int(*Pointer<SByte>(element + 0)));
349 c.y = Float(Int(*Pointer<SByte>(element + 1)));
350 c.w = float(0x7F);
351 break;
352 case VK_FORMAT_R8G8_UNORM:
353 case VK_FORMAT_R8G8_UINT:
354 case VK_FORMAT_R8G8_SRGB:
355 c.x = Float(Int(*Pointer<Byte>(element + 0)));
356 c.y = Float(Int(*Pointer<Byte>(element + 1)));
357 c.w = float(0xFF);
358 break;
359 case VK_FORMAT_R16G16_SINT:
360 case VK_FORMAT_R16G16_SNORM:
361 c.x = Float(Int(*Pointer<Short>(element + 0)));
362 c.y = Float(Int(*Pointer<Short>(element + 2)));
363 c.w = float(0x7FFF);
364 break;
365 case VK_FORMAT_R16G16_UNORM:
366 case VK_FORMAT_R16G16_UINT:
367 c.x = Float(Int(*Pointer<UShort>(element + 0)));
368 c.y = Float(Int(*Pointer<UShort>(element + 2)));
369 c.w = float(0xFFFF);
370 break;
371 case VK_FORMAT_R32G32_SINT:
372 c.x = Float(*Pointer<Int>(element + 0));
373 c.y = Float(*Pointer<Int>(element + 4));
374 c.w = float(0x7FFFFFFF);
375 break;
376 case VK_FORMAT_R32G32_UINT:
377 c.x = Float(*Pointer<UInt>(element + 0));
378 c.y = Float(*Pointer<UInt>(element + 4));
379 c.w = float(0xFFFFFFFF);
380 break;
381 case VK_FORMAT_R32G32B32A32_SFLOAT:
382 c = *Pointer<Float4>(element);
383 break;
384 case VK_FORMAT_R32G32_SFLOAT:
385 c.x = *Pointer<Float>(element + 0);
386 c.y = *Pointer<Float>(element + 4);
387 break;
388 case VK_FORMAT_R32_SFLOAT:
389 c.x = *Pointer<Float>(element);
390 break;
391 case VK_FORMAT_R16G16B16A16_SFLOAT:
392 c.w = Float(*Pointer<Half>(element + 6));
393 case VK_FORMAT_R16G16B16_SFLOAT:
394 c.z = Float(*Pointer<Half>(element + 4));
395 case VK_FORMAT_R16G16_SFLOAT:
396 c.y = Float(*Pointer<Half>(element + 2));
397 case VK_FORMAT_R16_SFLOAT:
398 c.x = Float(*Pointer<Half>(element));
399 break;
400 case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
401 // 10 (or 11) bit float formats are unsigned formats with a 5 bit exponent and a 5 (or 6) bit mantissa.
402 // Since the Half float format also has a 5 bit exponent, we can convert these formats to half by
403 // copy/pasting the bits so the the exponent bits and top mantissa bits are aligned to the half format.
404 // In this case, we have:
405 // B B B B B B B B B B G G G G G G G G G G G R R R R R R R R R R R
406 // 1st Short: |xxxxxxxxxx---------------------|
407 // 2nd Short: |xxxx---------------------xxxxxx|
408 // 3rd Short: |--------------------xxxxxxxxxxxx|
409 // These memory reads overlap, but each of them contains an entire channel, so we can read this without
410 // any int -> short conversion.
411 c.x = Float(As<Half>((*Pointer<UShort>(element + 0) & UShort(0x07FF)) << UShort(4)));
412 c.y = Float(As<Half>((*Pointer<UShort>(element + 1) & UShort(0x3FF8)) << UShort(1)));
413 c.z = Float(As<Half>((*Pointer<UShort>(element + 2) & UShort(0xFFC0)) >> UShort(1)));
414 break;
415 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
416 // This type contains a common 5 bit exponent (E) and a 9 bit the mantissa for R, G and B.
417 c.x = Float(*Pointer<UInt>(element) & UInt(0x000001FF)); // R's mantissa (bits 0-8)
418 c.y = Float((*Pointer<UInt>(element) & UInt(0x0003FE00)) >> 9); // G's mantissa (bits 9-17)
419 c.z = Float((*Pointer<UInt>(element) & UInt(0x07FC0000)) >> 18); // B's mantissa (bits 18-26)
420 c *= Float4(
421 // 2^E, using the exponent (bits 27-31) and treating it as an unsigned integer value
422 Float(UInt(1) << ((*Pointer<UInt>(element) & UInt(0xF8000000)) >> 27)) *
423 // Since the 9 bit mantissa values currently stored in RGB were converted straight
424 // from int to float (in the [0, 1<<9] range instead of the [0, 1] range), they
425 // are (1 << 9) times too high.
426 // Also, the exponent has 5 bits and we compute the exponent bias of floating point
427 // formats using "2^(k-1) - 1", so, in this case, the exponent bias is 2^(5-1)-1 = 15
428 // Exponent bias (15) + number of mantissa bits per component (9) = 24
429 Float(1.0f / (1 << 24)));
430 c.w = 1.0f;
431 break;
432 case VK_FORMAT_R5G6B5_UNORM_PACK16:
433 c.x = Float(Int((*Pointer<UShort>(element) & UShort(0xF800)) >> UShort(11)));
434 c.y = Float(Int((*Pointer<UShort>(element) & UShort(0x07E0)) >> UShort(5)));
435 c.z = Float(Int(*Pointer<UShort>(element) & UShort(0x001F)));
436 break;
437 case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
438 c.w = Float(Int((*Pointer<UShort>(element) & UShort(0x8000)) >> UShort(15)));
439 c.x = Float(Int((*Pointer<UShort>(element) & UShort(0x7C00)) >> UShort(10)));
440 c.y = Float(Int((*Pointer<UShort>(element) & UShort(0x03E0)) >> UShort(5)));
441 c.z = Float(Int(*Pointer<UShort>(element) & UShort(0x001F)));
442 break;
443 case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
444 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
445 c.x = Float(Int((*Pointer<UInt>(element) & UInt(0x000003FF))));
446 c.y = Float(Int((*Pointer<UInt>(element) & UInt(0x000FFC00)) >> 10));
447 c.z = Float(Int((*Pointer<UInt>(element) & UInt(0x3FF00000)) >> 20));
448 c.w = Float(Int((*Pointer<UInt>(element) & UInt(0xC0000000)) >> 30));
449 break;
450 case VK_FORMAT_D16_UNORM:
451 c.x = Float(Int((*Pointer<UShort>(element))));
452 break;
453 case VK_FORMAT_X8_D24_UNORM_PACK32:
454 c.x = Float(Int((*Pointer<UInt>(element) & UInt(0xFFFFFF00)) >> 8));
455 break;
456 case VK_FORMAT_D32_SFLOAT:
457 c.x = *Pointer<Float>(element);
458 break;
459 case VK_FORMAT_S8_UINT:
460 c.x = Float(Int(*Pointer<Byte>(element)));
461 break;
462 default:
463 UNSUPPORTED("Blitter source format %d", (int)state.sourceFormat);
464 }
465
466 return c;
467 }
468
469 void Blitter::write(Float4 &c, Pointer<Byte> element, const State &state)
470 {
471 bool writeR = state.writeRed;
472 bool writeG = state.writeGreen;
473 bool writeB = state.writeBlue;
474 bool writeA = state.writeAlpha;
475 bool writeRGBA = writeR && writeG && writeB && writeA;
476
477 switch(state.destFormat)
478 {
479 case VK_FORMAT_R4G4_UNORM_PACK8:
480 if(writeR | writeG)
481 {
482 if(!writeR)
483 {
484 *Pointer<Byte>(element) = (Byte(RoundInt(Float(c.y))) & Byte(0xF)) |
485 (*Pointer<Byte>(element) & Byte(0xF0));
486 }
487 else if(!writeG)
488 {
489 *Pointer<Byte>(element) = (*Pointer<Byte>(element) & Byte(0xF)) |
490 (Byte(RoundInt(Float(c.x))) << Byte(4));
491 }
492 else
493 {
494 *Pointer<Byte>(element) = (Byte(RoundInt(Float(c.y))) & Byte(0xF)) |
495 (Byte(RoundInt(Float(c.x))) << Byte(4));
496 }
497 }
498 break;
499 case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
500 if(writeR || writeG || writeB || writeA)
501 {
502 *Pointer<UShort>(element) = (writeR ? ((UShort(RoundInt(Float(c.x))) & UShort(0xF)) << UShort(12)) :
503 (*Pointer<UShort>(element) & UShort(0x000F))) |
504 (writeG ? ((UShort(RoundInt(Float(c.y))) & UShort(0xF)) << UShort(8)) :
505 (*Pointer<UShort>(element) & UShort(0x00F0))) |
506 (writeB ? ((UShort(RoundInt(Float(c.z))) & UShort(0xF)) << UShort(4)) :
507 (*Pointer<UShort>(element) & UShort(0x0F00))) |
508 (writeA ? (UShort(RoundInt(Float(c.w))) & UShort(0xF)) :
509 (*Pointer<UShort>(element) & UShort(0xF000)));
510 }
511 break;
512 case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
513 if(writeRGBA)
514 {
515 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.w)) & Int(0xF)) |
516 UShort((RoundInt(Float(c.x)) & Int(0xF)) << 4) |
517 UShort((RoundInt(Float(c.y)) & Int(0xF)) << 8) |
518 UShort((RoundInt(Float(c.z)) & Int(0xF)) << 12);
519 }
520 else
521 {
522 unsigned short mask = (writeA ? 0x000F : 0x0000) |
523 (writeR ? 0x00F0 : 0x0000) |
524 (writeG ? 0x0F00 : 0x0000) |
525 (writeB ? 0xF000 : 0x0000);
526 unsigned short unmask = ~mask;
527 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
528 ((UShort(RoundInt(Float(c.w)) & Int(0xF)) |
529 UShort((RoundInt(Float(c.x)) & Int(0xF)) << 4) |
530 UShort((RoundInt(Float(c.y)) & Int(0xF)) << 8) |
531 UShort((RoundInt(Float(c.z)) & Int(0xF)) << 12)) & UShort(mask));
532 }
533 break;
534 case VK_FORMAT_B8G8R8A8_SRGB:
535 case VK_FORMAT_B8G8R8A8_UNORM:
536 if(writeRGBA)
537 {
538 Short4 c0 = RoundShort4(c.zyxw);
539 *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0));
540 }
541 else
542 {
543 if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
544 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
545 if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
546 if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
547 }
548 break;
549 case VK_FORMAT_B8G8R8_SNORM:
550 if(writeB) { *Pointer<SByte>(element + 0) = SByte(RoundInt(Float(c.z))); }
551 if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); }
552 if(writeR) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.x))); }
553 break;
554 case VK_FORMAT_B8G8R8_UNORM:
555 case VK_FORMAT_B8G8R8_SRGB:
556 if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
557 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
558 if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
559 break;
560 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
561 case VK_FORMAT_R8G8B8A8_UNORM:
562 case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
563 case VK_FORMAT_R8G8B8A8_SRGB:
564 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
565 case VK_FORMAT_R8G8B8A8_UINT:
566 case VK_FORMAT_R8G8B8A8_USCALED:
567 case VK_FORMAT_A8B8G8R8_USCALED_PACK32:
568 if(writeRGBA)
569 {
570 Short4 c0 = RoundShort4(c);
571 *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0));
572 }
573 else
574 {
575 if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); }
576 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
577 if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
578 if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
579 }
580 break;
581 case VK_FORMAT_R32G32B32A32_SFLOAT:
582 if(writeRGBA)
583 {
584 *Pointer<Float4>(element) = c;
585 }
586 else
587 {
588 if(writeR) { *Pointer<Float>(element) = c.x; }
589 if(writeG) { *Pointer<Float>(element + 4) = c.y; }
590 if(writeB) { *Pointer<Float>(element + 8) = c.z; }
591 if(writeA) { *Pointer<Float>(element + 12) = c.w; }
592 }
593 break;
594 case VK_FORMAT_R32G32B32_SFLOAT:
595 if(writeR) { *Pointer<Float>(element) = c.x; }
596 if(writeG) { *Pointer<Float>(element + 4) = c.y; }
597 if(writeB) { *Pointer<Float>(element + 8) = c.z; }
598 break;
599 case VK_FORMAT_R32G32_SFLOAT:
600 if(writeR && writeG)
601 {
602 *Pointer<Float2>(element) = Float2(c);
603 }
604 else
605 {
606 if(writeR) { *Pointer<Float>(element) = c.x; }
607 if(writeG) { *Pointer<Float>(element + 4) = c.y; }
608 }
609 break;
610 case VK_FORMAT_R32_SFLOAT:
611 if(writeR) { *Pointer<Float>(element) = c.x; }
612 break;
613 case VK_FORMAT_R16G16B16A16_SFLOAT:
614 if(writeA) { *Pointer<Half>(element + 6) = Half(c.w); }
615 case VK_FORMAT_R16G16B16_SFLOAT:
616 if(writeB) { *Pointer<Half>(element + 4) = Half(c.z); }
617 case VK_FORMAT_R16G16_SFLOAT:
618 if(writeG) { *Pointer<Half>(element + 2) = Half(c.y); }
619 case VK_FORMAT_R16_SFLOAT:
620 if(writeR) { *Pointer<Half>(element) = Half(c.x); }
621 break;
622 case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
623 {
624 // 10 (or 11) bit float formats are unsigned formats with a 5 bit exponent and a 5 (or 6) bit mantissa.
625 // Since the 16-bit half-precision float format also has a 5 bit exponent, we can extract these minifloats from them.
626
627 // FIXME(b/138944025): Handle negative values, Inf, and NaN.
628 // FIXME(b/138944025): Perform rounding before truncating the mantissa.
629 UInt r = (UInt(As<UShort>(Half(c.x))) & 0x00007FF0) >> 4;
630 UInt g = (UInt(As<UShort>(Half(c.y))) & 0x00007FF0) << 7;
631 UInt b = (UInt(As<UShort>(Half(c.z))) & 0x00007FE0) << 17;
632
633 UInt rgb = r | g | b;
634
635 UInt old = *Pointer<UInt>(element);
636
637 unsigned int mask = (writeR ? 0x000007FF : 0) |
638 (writeG ? 0x003FF800 : 0) |
639 (writeB ? 0xFFC00000 : 0);
640
641 *Pointer<UInt>(element) = (rgb & mask) | (old & ~mask);
642 }
643 break;
644 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
645 {
646 ASSERT(writeRGBA); // Can't sensibly write just part of this format.
647
648 // Vulkan 1.1.117 section 15.2.1 RGB to Shared Exponent Conversion
649
650 constexpr int N = 9; // number of mantissa bits per component
651 constexpr int B = 15; // exponent bias
652 constexpr int E_max = 31; // maximum possible biased exponent value
653
654 // Maximum representable value.
655 constexpr float sharedexp_max = ((static_cast<float>(1 << N) - 1) / static_cast<float>(1 << N)) * static_cast<float>(1 << (E_max - B));
656
657 // Clamp components to valid range. NaN becomes 0.
658 Float red_c = Min(IfThenElse(!(c.x > 0), Float(0), Float(c.x)), sharedexp_max);
659 Float green_c = Min(IfThenElse(!(c.y > 0), Float(0), Float(c.y)), sharedexp_max);
660 Float blue_c = Min(IfThenElse(!(c.z > 0), Float(0), Float(c.z)), sharedexp_max);
661
662 // We're reducing the mantissa to 9 bits, so we must round up if the next
663 // bit is 1. In other words add 0.5 to the new mantissa's position and
664 // allow overflow into the exponent so we can scale correctly.
665 constexpr int half = 1 << (23 - N);
666 Float red_r = As<Float>(As<Int>(red_c) + half);
667 Float green_r = As<Float>(As<Int>(green_c) + half);
668 Float blue_r = As<Float>(As<Int>(blue_c) + half);
669
670 // The largest component determines the shared exponent. It can't be lower
671 // than 0 (after bias subtraction) so also limit to the mimimum representable.
672 constexpr float min_s = 0.5f / (1 << B);
673 Float max_s = Max(Max(red_r, green_r), Max(blue_r, min_s));
674
675 // Obtain the reciprocal of the shared exponent by inverting the bits,
676 // and scale by the new mantissa's size. Note that the IEEE-754 single-precision
677 // format has an implicit leading 1, but this shared component format does not.
678 Float scale = As<Float>((As<Int>(max_s) & 0x7F800000) ^ 0x7F800000) * (1 << (N - 2));
679
680 UInt R9 = RoundInt(red_c * scale);
681 UInt G9 = UInt(RoundInt(green_c * scale));
682 UInt B9 = UInt(RoundInt(blue_c * scale));
683 UInt E5 = (As<UInt>(max_s) >> 23) - 127 + 15 + 1;
684
685 UInt E5B9G9R9 = (E5 << 27) | (B9 << 18) | (G9 << 9) | R9;
686
687 *Pointer<UInt>(element) = E5B9G9R9;
688 }
689 break;
690 case VK_FORMAT_B8G8R8A8_SNORM:
691 if(writeB) { *Pointer<SByte>(element) = SByte(RoundInt(Float(c.z))); }
692 if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); }
693 if(writeR) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.x))); }
694 if(writeA) { *Pointer<SByte>(element + 3) = SByte(RoundInt(Float(c.w))); }
695 break;
696 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
697 case VK_FORMAT_R8G8B8A8_SINT:
698 case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
699 case VK_FORMAT_R8G8B8A8_SNORM:
700 case VK_FORMAT_R8G8B8A8_SSCALED:
701 case VK_FORMAT_A8B8G8R8_SSCALED_PACK32:
702 if(writeA) { *Pointer<SByte>(element + 3) = SByte(RoundInt(Float(c.w))); }
703 case VK_FORMAT_R8G8B8_SINT:
704 case VK_FORMAT_R8G8B8_SNORM:
705 case VK_FORMAT_R8G8B8_SSCALED:
706 if(writeB) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.z))); }
707 case VK_FORMAT_R8G8_SINT:
708 case VK_FORMAT_R8G8_SNORM:
709 case VK_FORMAT_R8G8_SSCALED:
710 if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); }
711 case VK_FORMAT_R8_SINT:
712 case VK_FORMAT_R8_SNORM:
713 case VK_FORMAT_R8_SSCALED:
714 if(writeR) { *Pointer<SByte>(element) = SByte(RoundInt(Float(c.x))); }
715 break;
716 case VK_FORMAT_R8G8B8_UINT:
717 case VK_FORMAT_R8G8B8_UNORM:
718 case VK_FORMAT_R8G8B8_USCALED:
719 case VK_FORMAT_R8G8B8_SRGB:
720 if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
721 case VK_FORMAT_R8G8_UINT:
722 case VK_FORMAT_R8G8_UNORM:
723 case VK_FORMAT_R8G8_USCALED:
724 case VK_FORMAT_R8G8_SRGB:
725 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
726 case VK_FORMAT_R8_UINT:
727 case VK_FORMAT_R8_UNORM:
728 case VK_FORMAT_R8_USCALED:
729 case VK_FORMAT_R8_SRGB:
730 if(writeR) { *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x))); }
731 break;
732 case VK_FORMAT_R16G16B16A16_SINT:
733 case VK_FORMAT_R16G16B16A16_SNORM:
734 case VK_FORMAT_R16G16B16A16_SSCALED:
735 if(writeRGBA)
736 {
737 *Pointer<Short4>(element) = Short4(RoundInt(c));
738 }
739 else
740 {
741 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
742 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
743 if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); }
744 if(writeA) { *Pointer<Short>(element + 6) = Short(RoundInt(Float(c.w))); }
745 }
746 break;
747 case VK_FORMAT_R16G16B16_SINT:
748 case VK_FORMAT_R16G16B16_SNORM:
749 case VK_FORMAT_R16G16B16_SSCALED:
750 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
751 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
752 if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); }
753 break;
754 case VK_FORMAT_R16G16_SINT:
755 case VK_FORMAT_R16G16_SNORM:
756 case VK_FORMAT_R16G16_SSCALED:
757 if(writeR && writeG)
758 {
759 *Pointer<Short2>(element) = Short2(Short4(RoundInt(c)));
760 }
761 else
762 {
763 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
764 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
765 }
766 break;
767 case VK_FORMAT_R16_SINT:
768 case VK_FORMAT_R16_SNORM:
769 case VK_FORMAT_R16_SSCALED:
770 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
771 break;
772 case VK_FORMAT_R16G16B16A16_UINT:
773 case VK_FORMAT_R16G16B16A16_UNORM:
774 case VK_FORMAT_R16G16B16A16_USCALED:
775 if(writeRGBA)
776 {
777 *Pointer<UShort4>(element) = UShort4(RoundInt(c));
778 }
779 else
780 {
781 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
782 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
783 if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); }
784 if(writeA) { *Pointer<UShort>(element + 6) = UShort(RoundInt(Float(c.w))); }
785 }
786 break;
787 case VK_FORMAT_R16G16B16_UINT:
788 case VK_FORMAT_R16G16B16_UNORM:
789 case VK_FORMAT_R16G16B16_USCALED:
790 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
791 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
792 if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); }
793 break;
794 case VK_FORMAT_R16G16_UINT:
795 case VK_FORMAT_R16G16_UNORM:
796 case VK_FORMAT_R16G16_USCALED:
797 if(writeR && writeG)
798 {
799 *Pointer<UShort2>(element) = UShort2(UShort4(RoundInt(c)));
800 }
801 else
802 {
803 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
804 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
805 }
806 break;
807 case VK_FORMAT_R16_UINT:
808 case VK_FORMAT_R16_UNORM:
809 case VK_FORMAT_R16_USCALED:
810 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
811 break;
812 case VK_FORMAT_R32G32B32A32_SINT:
813 if(writeRGBA)
814 {
815 *Pointer<Int4>(element) = RoundInt(c);
816 }
817 else
818 {
819 if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
820 if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
821 if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); }
822 if(writeA) { *Pointer<Int>(element + 12) = RoundInt(Float(c.w)); }
823 }
824 break;
825 case VK_FORMAT_R32G32B32_SINT:
826 if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); }
827 case VK_FORMAT_R32G32_SINT:
828 if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
829 case VK_FORMAT_R32_SINT:
830 if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
831 break;
832 case VK_FORMAT_R32G32B32A32_UINT:
833 if(writeRGBA)
834 {
835 *Pointer<UInt4>(element) = UInt4(RoundInt(c));
836 }
837 else
838 {
839 if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
840 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
841 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); }
842 if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(RoundInt(Float(c.w))); }
843 }
844 break;
845 case VK_FORMAT_R32G32B32_UINT:
846 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); }
847 case VK_FORMAT_R32G32_UINT:
848 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
849 case VK_FORMAT_R32_UINT:
850 if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
851 break;
852 case VK_FORMAT_R5G6B5_UNORM_PACK16:
853 if(writeR && writeG && writeB)
854 {
855 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.z)) |
856 (RoundInt(Float(c.y)) << Int(5)) |
857 (RoundInt(Float(c.x)) << Int(11)));
858 }
859 else
860 {
861 unsigned short mask = (writeB ? 0x001F : 0x0000) | (writeG ? 0x07E0 : 0x0000) | (writeR ? 0xF800 : 0x0000);
862 unsigned short unmask = ~mask;
863 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
864 (UShort(RoundInt(Float(c.z)) |
865 (RoundInt(Float(c.y)) << Int(5)) |
866 (RoundInt(Float(c.x)) << Int(11))) & UShort(mask));
867 }
868 break;
869 case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
870 if(writeRGBA)
871 {
872 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.w)) |
873 (RoundInt(Float(c.z)) << Int(1)) |
874 (RoundInt(Float(c.y)) << Int(6)) |
875 (RoundInt(Float(c.x)) << Int(11)));
876 }
877 else
878 {
879 unsigned short mask = (writeA ? 0x8000 : 0x0000) |
880 (writeR ? 0x7C00 : 0x0000) |
881 (writeG ? 0x03E0 : 0x0000) |
882 (writeB ? 0x001F : 0x0000);
883 unsigned short unmask = ~mask;
884 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
885 (UShort(RoundInt(Float(c.w)) |
886 (RoundInt(Float(c.z)) << Int(1)) |
887 (RoundInt(Float(c.y)) << Int(6)) |
888 (RoundInt(Float(c.x)) << Int(11))) & UShort(mask));
889 }
890 break;
891 case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
892 if(writeRGBA)
893 {
894 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.w)) |
895 (RoundInt(Float(c.x)) << Int(1)) |
896 (RoundInt(Float(c.y)) << Int(6)) |
897 (RoundInt(Float(c.z)) << Int(11)));
898 }
899 else
900 {
901 unsigned short mask = (writeA ? 0x8000 : 0x0000) |
902 (writeR ? 0x7C00 : 0x0000) |
903 (writeG ? 0x03E0 : 0x0000) |
904 (writeB ? 0x001F : 0x0000);
905 unsigned short unmask = ~mask;
906 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
907 (UShort(RoundInt(Float(c.w)) |
908 (RoundInt(Float(c.x)) << Int(1)) |
909 (RoundInt(Float(c.y)) << Int(6)) |
910 (RoundInt(Float(c.z)) << Int(11))) & UShort(mask));
911 }
912 break;
913 case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
914 if(writeRGBA)
915 {
916 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.z)) |
917 (RoundInt(Float(c.y)) << Int(5)) |
918 (RoundInt(Float(c.x)) << Int(10)) |
919 (RoundInt(Float(c.w)) << Int(15)));
920 }
921 else
922 {
923 unsigned short mask = (writeA ? 0x8000 : 0x0000) |
924 (writeR ? 0x7C00 : 0x0000) |
925 (writeG ? 0x03E0 : 0x0000) |
926 (writeB ? 0x001F : 0x0000);
927 unsigned short unmask = ~mask;
928 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
929 (UShort(RoundInt(Float(c.z)) |
930 (RoundInt(Float(c.y)) << Int(5)) |
931 (RoundInt(Float(c.x)) << Int(10)) |
932 (RoundInt(Float(c.w)) << Int(15))) & UShort(mask));
933 }
934 break;
935 case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
936 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
937 case VK_FORMAT_A2B10G10R10_SNORM_PACK32:
938 if(writeRGBA)
939 {
940 *Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)) |
941 (RoundInt(Float(c.y)) << 10) |
942 (RoundInt(Float(c.z)) << 20) |
943 (RoundInt(Float(c.w)) << 30));
944 }
945 else
946 {
947 unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
948 (writeB ? 0x3FF00000 : 0x0000) |
949 (writeG ? 0x000FFC00 : 0x0000) |
950 (writeR ? 0x000003FF : 0x0000);
951 unsigned int unmask = ~mask;
952 *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
953 (UInt(RoundInt(Float(c.x)) |
954 (RoundInt(Float(c.y)) << 10) |
955 (RoundInt(Float(c.z)) << 20) |
956 (RoundInt(Float(c.w)) << 30)) & UInt(mask));
957 }
958 break;
959 case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
960 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
961 case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
962 if(writeRGBA)
963 {
964 *Pointer<UInt>(element) = UInt(RoundInt(Float(c.z)) |
965 (RoundInt(Float(c.y)) << 10) |
966 (RoundInt(Float(c.x)) << 20) |
967 (RoundInt(Float(c.w)) << 30));
968 }
969 else
970 {
971 unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
972 (writeR ? 0x3FF00000 : 0x0000) |
973 (writeG ? 0x000FFC00 : 0x0000) |
974 (writeB ? 0x000003FF : 0x0000);
975 unsigned int unmask = ~mask;
976 *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
977 (UInt(RoundInt(Float(c.z)) |
978 (RoundInt(Float(c.y)) << 10) |
979 (RoundInt(Float(c.x)) << 20) |
980 (RoundInt(Float(c.w)) << 30)) & UInt(mask));
981 }
982 break;
983 case VK_FORMAT_D16_UNORM:
984 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x)));
985 break;
986 case VK_FORMAT_X8_D24_UNORM_PACK32:
987 *Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)) << 8);
988 break;
989 case VK_FORMAT_D32_SFLOAT:
990 *Pointer<Float>(element) = c.x;
991 break;
992 case VK_FORMAT_S8_UINT:
993 *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x)));
994 break;
995 default:
996 UNSUPPORTED("Blitter destination format %d", (int)state.destFormat);
997 break;
998 }
999 }
1000
1001 Int4 Blitter::readInt4(Pointer<Byte> element, const State &state)
1002 {
1003 Int4 c(0, 0, 0, 1);
1004
1005 switch(state.sourceFormat)
1006 {
1007 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
1008 case VK_FORMAT_R8G8B8A8_SINT:
1009 c = Insert(c, Int(*Pointer<SByte>(element + 3)), 3);
1010 c = Insert(c, Int(*Pointer<SByte>(element + 2)), 2);
1011 case VK_FORMAT_R8G8_SINT:
1012 c = Insert(c, Int(*Pointer<SByte>(element + 1)), 1);
1013 case VK_FORMAT_R8_SINT:
1014 c = Insert(c, Int(*Pointer<SByte>(element)), 0);
1015 break;
1016 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
1017 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x000003FF))), 0);
1018 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x000FFC00)) >> 10), 1);
1019 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x3FF00000)) >> 20), 2);
1020 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0xC0000000)) >> 30), 3);
1021 break;
1022 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
1023 case VK_FORMAT_R8G8B8A8_UINT:
1024 c = Insert(c, Int(*Pointer<Byte>(element + 3)), 3);
1025 c = Insert(c, Int(*Pointer<Byte>(element + 2)), 2);
1026 case VK_FORMAT_R8G8_UINT:
1027 c = Insert(c, Int(*Pointer<Byte>(element + 1)), 1);
1028 case VK_FORMAT_R8_UINT:
1029 case VK_FORMAT_S8_UINT:
1030 c = Insert(c, Int(*Pointer<Byte>(element)), 0);
1031 break;
1032 case VK_FORMAT_R16G16B16A16_SINT:
1033 c = Insert(c, Int(*Pointer<Short>(element + 6)), 3);
1034 c = Insert(c, Int(*Pointer<Short>(element + 4)), 2);
1035 case VK_FORMAT_R16G16_SINT:
1036 c = Insert(c, Int(*Pointer<Short>(element + 2)), 1);
1037 case VK_FORMAT_R16_SINT:
1038 c = Insert(c, Int(*Pointer<Short>(element)), 0);
1039 break;
1040 case VK_FORMAT_R16G16B16A16_UINT:
1041 c = Insert(c, Int(*Pointer<UShort>(element + 6)), 3);
1042 c = Insert(c, Int(*Pointer<UShort>(element + 4)), 2);
1043 case VK_FORMAT_R16G16_UINT:
1044 c = Insert(c, Int(*Pointer<UShort>(element + 2)), 1);
1045 case VK_FORMAT_R16_UINT:
1046 c = Insert(c, Int(*Pointer<UShort>(element)), 0);
1047 break;
1048 case VK_FORMAT_R32G32B32A32_SINT:
1049 case VK_FORMAT_R32G32B32A32_UINT:
1050 c = *Pointer<Int4>(element);
1051 break;
1052 case VK_FORMAT_R32G32_SINT:
1053 case VK_FORMAT_R32G32_UINT:
1054 c = Insert(c, *Pointer<Int>(element + 4), 1);
1055 case VK_FORMAT_R32_SINT:
1056 case VK_FORMAT_R32_UINT:
1057 c = Insert(c, *Pointer<Int>(element), 0);
1058 break;
1059 default:
1060 UNSUPPORTED("Blitter source format %d", (int)state.sourceFormat);
1061 }
1062
1063 return c;
1064 }
1065
1066 void Blitter::write(Int4 &c, Pointer<Byte> element, const State &state)
1067 {
1068 bool writeR = state.writeRed;
1069 bool writeG = state.writeGreen;
1070 bool writeB = state.writeBlue;
1071 bool writeA = state.writeAlpha;
1072 bool writeRGBA = writeR && writeG && writeB && writeA;
1073
1074 switch(state.destFormat)
1075 {
1076 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
1077 c = Min(As<UInt4>(c), UInt4(0x03FF, 0x03FF, 0x03FF, 0x0003));
1078 break;
1079 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
1080 case VK_FORMAT_R8G8B8A8_UINT:
1081 case VK_FORMAT_R8G8B8_UINT:
1082 case VK_FORMAT_R8G8_UINT:
1083 case VK_FORMAT_R8_UINT:
1084 case VK_FORMAT_R8G8B8A8_USCALED:
1085 case VK_FORMAT_R8G8B8_USCALED:
1086 case VK_FORMAT_R8G8_USCALED:
1087 case VK_FORMAT_R8_USCALED:
1088 case VK_FORMAT_S8_UINT:
1089 c = Min(As<UInt4>(c), UInt4(0xFF));
1090 break;
1091 case VK_FORMAT_R16G16B16A16_UINT:
1092 case VK_FORMAT_R16G16B16_UINT:
1093 case VK_FORMAT_R16G16_UINT:
1094 case VK_FORMAT_R16_UINT:
1095 case VK_FORMAT_R16G16B16A16_USCALED:
1096 case VK_FORMAT_R16G16B16_USCALED:
1097 case VK_FORMAT_R16G16_USCALED:
1098 case VK_FORMAT_R16_USCALED:
1099 c = Min(As<UInt4>(c), UInt4(0xFFFF));
1100 break;
1101 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
1102 case VK_FORMAT_R8G8B8A8_SINT:
1103 case VK_FORMAT_R8G8_SINT:
1104 case VK_FORMAT_R8_SINT:
1105 case VK_FORMAT_R8G8B8A8_SSCALED:
1106 case VK_FORMAT_R8G8B8_SSCALED:
1107 case VK_FORMAT_R8G8_SSCALED:
1108 case VK_FORMAT_R8_SSCALED:
1109 c = Min(Max(c, Int4(-0x80)), Int4(0x7F));
1110 break;
1111 case VK_FORMAT_R16G16B16A16_SINT:
1112 case VK_FORMAT_R16G16B16_SINT:
1113 case VK_FORMAT_R16G16_SINT:
1114 case VK_FORMAT_R16_SINT:
1115 case VK_FORMAT_R16G16B16A16_SSCALED:
1116 case VK_FORMAT_R16G16B16_SSCALED:
1117 case VK_FORMAT_R16G16_SSCALED:
1118 case VK_FORMAT_R16_SSCALED:
1119 c = Min(Max(c, Int4(-0x8000)), Int4(0x7FFF));
1120 break;
1121 default:
1122 break;
1123 }
1124
1125 switch(state.destFormat)
1126 {
1127 case VK_FORMAT_B8G8R8A8_SINT:
1128 case VK_FORMAT_B8G8R8A8_SSCALED:
1129 if(writeA) { *Pointer<SByte>(element + 3) = SByte(Extract(c, 3)); }
1130 case VK_FORMAT_B8G8R8_SINT:
1131 case VK_FORMAT_B8G8R8_SSCALED:
1132 if(writeB) { *Pointer<SByte>(element) = SByte(Extract(c, 2)); }
1133 if(writeG) { *Pointer<SByte>(element + 1) = SByte(Extract(c, 1)); }
1134 if(writeR) { *Pointer<SByte>(element + 2) = SByte(Extract(c, 0)); }
1135 break;
1136 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
1137 case VK_FORMAT_R8G8B8A8_SINT:
1138 case VK_FORMAT_R8G8B8A8_SSCALED:
1139 case VK_FORMAT_A8B8G8R8_SSCALED_PACK32:
1140 if(writeA) { *Pointer<SByte>(element + 3) = SByte(Extract(c, 3)); }
1141 case VK_FORMAT_R8G8B8_SINT:
1142 case VK_FORMAT_R8G8B8_SSCALED:
1143 if(writeB) { *Pointer<SByte>(element + 2) = SByte(Extract(c, 2)); }
1144 case VK_FORMAT_R8G8_SINT:
1145 case VK_FORMAT_R8G8_SSCALED:
1146 if(writeG) { *Pointer<SByte>(element + 1) = SByte(Extract(c, 1)); }
1147 case VK_FORMAT_R8_SINT:
1148 case VK_FORMAT_R8_SSCALED:
1149 if(writeR) { *Pointer<SByte>(element) = SByte(Extract(c, 0)); }
1150 break;
1151 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
1152 case VK_FORMAT_A2B10G10R10_SINT_PACK32:
1153 case VK_FORMAT_A2B10G10R10_USCALED_PACK32:
1154 case VK_FORMAT_A2B10G10R10_SSCALED_PACK32:
1155 if(writeRGBA)
1156 {
1157 *Pointer<UInt>(element) =
1158 UInt((Extract(c, 0)) | (Extract(c, 1) << 10) | (Extract(c, 2) << 20) | (Extract(c, 3) << 30));
1159 }
1160 else
1161 {
1162 unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
1163 (writeB ? 0x3FF00000 : 0x0000) |
1164 (writeG ? 0x000FFC00 : 0x0000) |
1165 (writeR ? 0x000003FF : 0x0000);
1166 unsigned int unmask = ~mask;
1167 *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
1168 (UInt(Extract(c, 0) | (Extract(c, 1) << 10) | (Extract(c, 2) << 20) | (Extract(c, 3) << 30)) & UInt(mask));
1169 }
1170 break;
1171 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
1172 case VK_FORMAT_A2R10G10B10_SINT_PACK32:
1173 case VK_FORMAT_A2R10G10B10_USCALED_PACK32:
1174 case VK_FORMAT_A2R10G10B10_SSCALED_PACK32:
1175 if(writeRGBA)
1176 {
1177 *Pointer<UInt>(element) =
1178 UInt((Extract(c, 2)) | (Extract(c, 1) << 10) | (Extract(c, 0) << 20) | (Extract(c, 3) << 30));
1179 }
1180 else
1181 {
1182 unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
1183 (writeR ? 0x3FF00000 : 0x0000) |
1184 (writeG ? 0x000FFC00 : 0x0000) |
1185 (writeB ? 0x000003FF : 0x0000);
1186 unsigned int unmask = ~mask;
1187 *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
1188 (UInt(Extract(c, 2) | (Extract(c, 1) << 10) | (Extract(c, 0) << 20) | (Extract(c, 3) << 30)) & UInt(mask));
1189 }
1190 break;
1191 case VK_FORMAT_B8G8R8A8_UINT:
1192 case VK_FORMAT_B8G8R8A8_USCALED:
1193 if(writeA) { *Pointer<Byte>(element + 3) = Byte(Extract(c, 3)); }
1194 case VK_FORMAT_B8G8R8_UINT:
1195 case VK_FORMAT_B8G8R8_USCALED:
1196 case VK_FORMAT_B8G8R8_SRGB:
1197 if(writeB) { *Pointer<Byte>(element) = Byte(Extract(c, 2)); }
1198 if(writeG) { *Pointer<Byte>(element + 1) = Byte(Extract(c, 1)); }
1199 if(writeR) { *Pointer<Byte>(element + 2) = Byte(Extract(c, 0)); }
1200 break;
1201 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
1202 case VK_FORMAT_R8G8B8A8_UINT:
1203 case VK_FORMAT_R8G8B8A8_USCALED:
1204 case VK_FORMAT_A8B8G8R8_USCALED_PACK32:
1205 if(writeA) { *Pointer<Byte>(element + 3) = Byte(Extract(c, 3)); }
1206 case VK_FORMAT_R8G8B8_UINT:
1207 case VK_FORMAT_R8G8B8_USCALED:
1208 if(writeB) { *Pointer<Byte>(element + 2) = Byte(Extract(c, 2)); }
1209 case VK_FORMAT_R8G8_UINT:
1210 case VK_FORMAT_R8G8_USCALED:
1211 if(writeG) { *Pointer<Byte>(element + 1) = Byte(Extract(c, 1)); }
1212 case VK_FORMAT_R8_UINT:
1213 case VK_FORMAT_R8_USCALED:
1214 case VK_FORMAT_S8_UINT:
1215 if(writeR) { *Pointer<Byte>(element) = Byte(Extract(c, 0)); }
1216 break;
1217 case VK_FORMAT_R16G16B16A16_SINT:
1218 case VK_FORMAT_R16G16B16A16_SSCALED:
1219 if(writeA) { *Pointer<Short>(element + 6) = Short(Extract(c, 3)); }
1220 case VK_FORMAT_R16G16B16_SINT:
1221 case VK_FORMAT_R16G16B16_SSCALED:
1222 if(writeB) { *Pointer<Short>(element + 4) = Short(Extract(c, 2)); }
1223 case VK_FORMAT_R16G16_SINT:
1224 case VK_FORMAT_R16G16_SSCALED:
1225 if(writeG) { *Pointer<Short>(element + 2) = Short(Extract(c, 1)); }
1226 case VK_FORMAT_R16_SINT:
1227 case VK_FORMAT_R16_SSCALED:
1228 if(writeR) { *Pointer<Short>(element) = Short(Extract(c, 0)); }
1229 break;
1230 case VK_FORMAT_R16G16B16A16_UINT:
1231 case VK_FORMAT_R16G16B16A16_USCALED:
1232 if(writeA) { *Pointer<UShort>(element + 6) = UShort(Extract(c, 3)); }
1233 case VK_FORMAT_R16G16B16_UINT:
1234 case VK_FORMAT_R16G16B16_USCALED:
1235 if(writeB) { *Pointer<UShort>(element + 4) = UShort(Extract(c, 2)); }
1236 case VK_FORMAT_R16G16_UINT:
1237 case VK_FORMAT_R16G16_USCALED:
1238 if(writeG) { *Pointer<UShort>(element + 2) = UShort(Extract(c, 1)); }
1239 case VK_FORMAT_R16_UINT:
1240 case VK_FORMAT_R16_USCALED:
1241 if(writeR) { *Pointer<UShort>(element) = UShort(Extract(c, 0)); }
1242 break;
1243 case VK_FORMAT_R32G32B32A32_SINT:
1244 if(writeRGBA)
1245 {
1246 *Pointer<Int4>(element) = c;
1247 }
1248 else
1249 {
1250 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
1251 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
1252 if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); }
1253 if(writeA) { *Pointer<Int>(element + 12) = Extract(c, 3); }
1254 }
1255 break;
1256 case VK_FORMAT_R32G32B32_SINT:
1257 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
1258 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
1259 if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); }
1260 break;
1261 case VK_FORMAT_R32G32_SINT:
1262 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
1263 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
1264 break;
1265 case VK_FORMAT_R32_SINT:
1266 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
1267 break;
1268 case VK_FORMAT_R32G32B32A32_UINT:
1269 if(writeRGBA)
1270 {
1271 *Pointer<UInt4>(element) = As<UInt4>(c);
1272 }
1273 else
1274 {
1275 if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
1276 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
1277 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); }
1278 if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(Extract(c, 3)); }
1279 }
1280 break;
1281 case VK_FORMAT_R32G32B32_UINT:
1282 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); }
1283 case VK_FORMAT_R32G32_UINT:
1284 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
1285 case VK_FORMAT_R32_UINT:
1286 if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
1287 break;
1288 default:
1289 UNSUPPORTED("Blitter destination format %d", (int)state.destFormat);
1290 }
1291 }
1292
1293 void Blitter::ApplyScaleAndClamp(Float4 &value, const State &state, bool preScaled)
1294 {
1295 float4 scale{}, unscale{};
1296
1297 if(state.clearOperation &&
1298 state.sourceFormat.isNonNormalizedInteger() &&
1299 !state.destFormat.isNonNormalizedInteger())
1300 {
1301 // If we're clearing a buffer from an int or uint color into a normalized color,
1302 // then the whole range of the int or uint color must be scaled between 0 and 1.
1303 switch(state.sourceFormat)
1304 {
1305 case VK_FORMAT_R32G32B32A32_SINT:
1306 unscale = replicate(static_cast<float>(0x7FFFFFFF));
1307 break;
1308 case VK_FORMAT_R32G32B32A32_UINT:
1309 unscale = replicate(static_cast<float>(0xFFFFFFFF));
1310 break;
1311 default:
1312 UNSUPPORTED("Blitter source format %d", (int)state.sourceFormat);
1313 }
1314 }
1315 else
1316 {
1317 unscale = state.sourceFormat.getScale();
1318 }
1319
1320 scale = state.destFormat.getScale();
1321
1322 bool srcSRGB = state.sourceFormat.isSRGBformat();
1323 bool dstSRGB = state.destFormat.isSRGBformat();
1324
1325 if(state.allowSRGBConversion && ((srcSRGB && !preScaled) || dstSRGB)) // One of the formats is sRGB encoded.
1326 {
1327 value *= preScaled ? Float4(1.0f / scale.x, 1.0f / scale.y, 1.0f / scale.z, 1.0f / scale.w) : // Unapply scale
1328 Float4(1.0f / unscale.x, 1.0f / unscale.y, 1.0f / unscale.z, 1.0f / unscale.w); // Apply unscale
1329 value = (srcSRGB && !preScaled) ? sRGBtoLinear(value) : LinearToSRGB(value);
1330 value *= Float4(scale.x, scale.y, scale.z, scale.w); // Apply scale
1331 }
1332 else if(unscale != scale)
1333 {
1334 value *= Float4(scale.x / unscale.x, scale.y / unscale.y, scale.z / unscale.z, scale.w / unscale.w);
1335 }
1336
1337 if(state.sourceFormat.isFloatFormat() && !state.destFormat.isFloatFormat())
1338 {
1339 value = Min(value, Float4(scale.x, scale.y, scale.z, scale.w));
1340
1341 value = Max(value, Float4(state.destFormat.isUnsignedComponent(0) ? 0.0f : -scale.x,
1342 state.destFormat.isUnsignedComponent(1) ? 0.0f : -scale.y,
1343 state.destFormat.isUnsignedComponent(2) ? 0.0f : -scale.z,
1344 state.destFormat.isUnsignedComponent(3) ? 0.0f : -scale.w));
1345 }
1346 }
1347
1348 Int Blitter::ComputeOffset(Int &x, Int &y, Int &pitchB, int bytes, bool quadLayout)
1349 {
1350 if(!quadLayout)
1351 {
1352 return y * pitchB + x * bytes;
1353 }
1354 else
1355 {
1356 // (x & ~1) * 2 + (x & 1) == (x - (x & 1)) * 2 + (x & 1) == x * 2 - (x & 1) * 2 + (x & 1) == x * 2 - (x & 1)
1357 return (y & Int(~1)) * pitchB +
1358 ((((y & Int(1)) + x) << 1) - (x & Int(1))) * bytes;
1359 }
1360 }
1361
1362 Float4 Blitter::LinearToSRGB(Float4 &c)
1363 {
1364 Float4 lc = Min(c, Float4(0.0031308f)) * Float4(12.92f);
1365 Float4 ec = Float4(1.055f) * power(c, Float4(1.0f / 2.4f)) - Float4(0.055f);
1366
1367 Float4 s = c;
1368 s.xyz = Max(lc, ec);
1369
1370 return s;
1371 }
1372
1373 Float4 Blitter::sRGBtoLinear(Float4 &c)
1374 {
1375 Float4 lc = c * Float4(1.0f / 12.92f);
1376 Float4 ec = power((c + Float4(0.055f)) * Float4(1.0f / 1.055f), Float4(2.4f));
1377
1378 Int4 linear = CmpLT(c, Float4(0.04045f));
1379
1380 Float4 s = c;
1381 s.xyz = As<Float4>((linear & As<Int4>(lc)) | (~linear & As<Int4>(ec))); // TODO: IfThenElse()
1382
1383 return s;
1384 }
1385
1386 Blitter::BlitRoutineType Blitter::generate(const State &state)
1387 {
1388 BlitFunction function;
1389 {
1390 Pointer<Byte> blit(function.Arg<0>());
1391
1392 Pointer<Byte> source = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData,source));
1393 Pointer<Byte> dest = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData,dest));
1394 Int sPitchB = *Pointer<Int>(blit + OFFSET(BlitData,sPitchB));
1395 Int dPitchB = *Pointer<Int>(blit + OFFSET(BlitData,dPitchB));
1396
1397 Float x0 = *Pointer<Float>(blit + OFFSET(BlitData,x0));
1398 Float y0 = *Pointer<Float>(blit + OFFSET(BlitData,y0));
1399 Float w = *Pointer<Float>(blit + OFFSET(BlitData,w));
1400 Float h = *Pointer<Float>(blit + OFFSET(BlitData,h));
1401
1402 Int x0d = *Pointer<Int>(blit + OFFSET(BlitData,x0d));
1403 Int x1d = *Pointer<Int>(blit + OFFSET(BlitData,x1d));
1404 Int y0d = *Pointer<Int>(blit + OFFSET(BlitData,y0d));
1405 Int y1d = *Pointer<Int>(blit + OFFSET(BlitData,y1d));
1406
1407 Int sWidth = *Pointer<Int>(blit + OFFSET(BlitData,sWidth));
1408 Int sHeight = *Pointer<Int>(blit + OFFSET(BlitData,sHeight));
1409
1410 bool intSrc = state.sourceFormat.isNonNormalizedInteger();
1411 bool intDst = state.destFormat.isNonNormalizedInteger();
1412 bool intBoth = intSrc && intDst;
1413 bool srcQuadLayout = state.sourceFormat.hasQuadLayout();
1414 bool dstQuadLayout = state.destFormat.hasQuadLayout();
1415 int srcBytes = state.sourceFormat.bytes();
1416 int dstBytes = state.destFormat.bytes();
1417
1418 bool hasConstantColorI = false;
1419 Int4 constantColorI;
1420 bool hasConstantColorF = false;
1421 Float4 constantColorF;
1422 if(state.clearOperation)
1423 {
1424 if(intBoth) // Integer types
1425 {
1426 constantColorI = readInt4(source, state);
1427 hasConstantColorI = true;
1428 }
1429 else
1430 {
1431 constantColorF = readFloat4(source, state);
1432 hasConstantColorF = true;
1433
1434 ApplyScaleAndClamp(constantColorF, state);
1435 }
1436 }
1437
1438 For(Int j = y0d, j < y1d, j++)
1439 {
1440 Float y = state.clearOperation ? RValue<Float>(y0) : y0 + Float(j) * h;
1441 Pointer<Byte> destLine = dest + (dstQuadLayout ? j & Int(~1) : RValue<Int>(j)) * dPitchB;
1442
1443 For(Int i = x0d, i < x1d, i++)
1444 {
1445 Float x = state.clearOperation ? RValue<Float>(x0) : x0 + Float(i) * w;
1446 Pointer<Byte> d = destLine + (dstQuadLayout ? (((j & Int(1)) << 1) + (i * 2) - (i & Int(1))) : RValue<Int>(i)) * dstBytes;
1447
1448 if(hasConstantColorI)
1449 {
1450 for(int s = 0; s < state.destSamples; s++)
1451 {
1452 write(constantColorI, d, state);
1453
1454 d += *Pointer<Int>(blit + OFFSET(BlitData, dSliceB));
1455 }
1456 }
1457 else if(hasConstantColorF)
1458 {
1459 for(int s = 0; s < state.destSamples; s++)
1460 {
1461 write(constantColorF, d, state);
1462
1463 d += *Pointer<Int>(blit + OFFSET(BlitData, dSliceB));
1464 }
1465 }
1466 else if(intBoth) // Integer types do not support filtering
1467 {
1468 Int X = Int(x);
1469 Int Y = Int(y);
1470
1471 if(state.clampToEdge)
1472 {
1473 X = Clamp(X, 0, sWidth - 1);
1474 Y = Clamp(Y, 0, sHeight - 1);
1475 }
1476
1477 Pointer<Byte> s = source + ComputeOffset(X, Y, sPitchB, srcBytes, srcQuadLayout);
1478
1479 // When both formats are true integer types, we don't go to float to avoid losing precision
1480 Int4 color = readInt4(s, state);
1481 for(int s = 0; s < state.destSamples; s++)
1482 {
1483 write(color, d, state);
1484
1485 d += *Pointer<Int>(blit + OFFSET(BlitData,dSliceB));
1486 }
1487 }
1488 else
1489 {
1490 Float4 color;
1491
1492 bool preScaled = false;
1493 if(!state.filter || intSrc)
1494 {
1495 Int X = Int(x);
1496 Int Y = Int(y);
1497
1498 if(state.clampToEdge)
1499 {
1500 X = Clamp(X, 0, sWidth - 1);
1501 Y = Clamp(Y, 0, sHeight - 1);
1502 }
1503
1504 Pointer<Byte> s = source + ComputeOffset(X, Y, sPitchB, srcBytes, srcQuadLayout);
1505
1506 color = readFloat4(s, state);
1507
1508 if(state.srcSamples > 1) // Resolve multisampled source
1509 {
1510 if(state.allowSRGBConversion && state.sourceFormat.isSRGBformat()) // sRGB -> RGB
1511 {
1512 ApplyScaleAndClamp(color, state);
1513 preScaled = true;
1514 }
1515 Float4 accum = color;
1516 for(int sample = 1; sample < state.srcSamples; sample++)
1517 {
1518 s += *Pointer<Int>(blit + OFFSET(BlitData, sSliceB));
1519 color = readFloat4(s, state);
1520
1521 if(state.allowSRGBConversion && state.sourceFormat.isSRGBformat()) // sRGB -> RGB
1522 {
1523 ApplyScaleAndClamp(color, state);
1524 preScaled = true;
1525 }
1526 accum += color;
1527 }
1528 color = accum * Float4(1.0f / static_cast<float>(state.srcSamples));
1529 }
1530 }
1531 else // Bilinear filtering
1532 {
1533 Float X = x;
1534 Float Y = y;
1535
1536 if(state.clampToEdge)
1537 {
1538 X = Min(Max(x, 0.5f), Float(sWidth) - 0.5f);
1539 Y = Min(Max(y, 0.5f), Float(sHeight) - 0.5f);
1540 }
1541
1542 Float x0 = X - 0.5f;
1543 Float y0 = Y - 0.5f;
1544
1545 Int X0 = Max(Int(x0), 0);
1546 Int Y0 = Max(Int(y0), 0);
1547
1548 Int X1 = X0 + 1;
1549 Int Y1 = Y0 + 1;
1550 X1 = IfThenElse(X1 >= sWidth, X0, X1);
1551 Y1 = IfThenElse(Y1 >= sHeight, Y0, Y1);
1552
1553 Pointer<Byte> s00 = source + ComputeOffset(X0, Y0, sPitchB, srcBytes, srcQuadLayout);
1554 Pointer<Byte> s01 = source + ComputeOffset(X1, Y0, sPitchB, srcBytes, srcQuadLayout);
1555 Pointer<Byte> s10 = source + ComputeOffset(X0, Y1, sPitchB, srcBytes, srcQuadLayout);
1556 Pointer<Byte> s11 = source + ComputeOffset(X1, Y1, sPitchB, srcBytes, srcQuadLayout);
1557
1558 Float4 c00 = readFloat4(s00, state);
1559 Float4 c01 = readFloat4(s01, state);
1560 Float4 c10 = readFloat4(s10, state);
1561 Float4 c11 = readFloat4(s11, state);
1562
1563 if(state.allowSRGBConversion && state.sourceFormat.isSRGBformat()) // sRGB -> RGB
1564 {
1565 ApplyScaleAndClamp(c00, state);
1566 ApplyScaleAndClamp(c01, state);
1567 ApplyScaleAndClamp(c10, state);
1568 ApplyScaleAndClamp(c11, state);
1569 preScaled = true;
1570 }
1571
1572 Float4 fx = Float4(x0 - Float(X0));
1573 Float4 fy = Float4(y0 - Float(Y0));
1574 Float4 ix = Float4(1.0f) - fx;
1575 Float4 iy = Float4(1.0f) - fy;
1576
1577 color = (c00 * ix + c01 * fx) * iy +
1578 (c10 * ix + c11 * fx) * fy;
1579 }
1580
1581 ApplyScaleAndClamp(color, state, preScaled);
1582
1583 for(int s = 0; s < state.destSamples; s++)
1584 {
1585 write(color, d, state);
1586
1587 d += *Pointer<Int>(blit + OFFSET(BlitData,dSliceB));
1588 }
1589 }
1590 }
1591 }
1592 }
1593
1594 return function("BlitRoutine");
1595 }
1596
1597 Blitter::BlitRoutineType Blitter::getBlitRoutine(const State &state)
1598 {
1599 std::unique_lock<std::mutex> lock(blitMutex);
1600 auto blitRoutine = blitCache.query(state);
1601
1602 if(!blitRoutine)
1603 {
1604 blitRoutine = generate(state);
1605 blitCache.add(state, blitRoutine);
1606 }
1607
1608 return blitRoutine;
1609 }
1610
1611 Blitter::CornerUpdateRoutineType Blitter::getCornerUpdateRoutine(const State &state)
1612 {
1613 std::unique_lock<std::mutex> lock(cornerUpdateMutex);
1614 auto cornerUpdateRoutine = cornerUpdateCache.query(state);
1615
1616 if(!cornerUpdateRoutine)
1617 {
1618 cornerUpdateRoutine = generateCornerUpdate(state);
1619 cornerUpdateCache.add(state, cornerUpdateRoutine);
1620 }
1621
1622 return cornerUpdateRoutine;
1623 }
1624
1625 void Blitter::blitToBuffer(const vk::Image *src, VkImageSubresourceLayers subresource, VkOffset3D offset, VkExtent3D extent, uint8_t *dst, int bufferRowPitch, int bufferSlicePitch)
1626 {
1627 auto aspect = static_cast<VkImageAspectFlagBits>(subresource.aspectMask);
1628 auto format = src->getFormat(aspect);
1629 State state(format, format.getNonQuadLayoutFormat(), VK_SAMPLE_COUNT_1_BIT, VK_SAMPLE_COUNT_1_BIT,
1630 Options{false, false});
1631
1632 auto blitRoutine = getBlitRoutine(state);
1633 if(!blitRoutine)
1634 {
1635 return;
1636 }
1637
1638 BlitData data =
1639 {
1640 nullptr, // source
1641 dst, // dest
1642 src->rowPitchBytes(aspect, subresource.mipLevel), // sPitchB
1643 bufferRowPitch, // dPitchB
1644 src->slicePitchBytes(aspect, subresource.mipLevel), // sSliceB
1645 bufferSlicePitch, // dSliceB
1646
1647 0, 0, 1, 1,
1648
1649 0, // y0d
1650 static_cast<int>(extent.height), // y1d
1651 0, // x0d
1652 static_cast<int>(extent.width), // x1d
1653
1654 static_cast<int>(extent.width), // sWidth
1655 static_cast<int>(extent.height) // sHeight;
1656 };
1657
1658 VkOffset3D srcOffset = { 0, 0, offset.z };
1659
1660 VkImageSubresourceLayers srcSubresLayers = subresource;
1661 srcSubresLayers.layerCount = 1;
1662
1663 VkImageSubresourceRange srcSubresRange =
1664 {
1665 subresource.aspectMask,
1666 subresource.mipLevel,
1667 1,
1668 subresource.baseArrayLayer,
1669 subresource.layerCount
1670 };
1671
1672 uint32_t lastLayer = src->getLastLayerIndex(srcSubresRange);
1673
1674 for(; srcSubresLayers.baseArrayLayer <= lastLayer; srcSubresLayers.baseArrayLayer++)
1675 {
1676 srcOffset.z = offset.z;
1677
1678 for(auto i = 0u; i < extent.depth; i++)
1679 {
1680 data.source = src->getTexelPointer(srcOffset, srcSubresLayers);
1681 ASSERT(data.source < src->end());
1682 blitRoutine(&data);
1683 srcOffset.z++;
1684 data.dest = (dst += bufferSlicePitch);
1685 }
1686 }
1687 }
1688
1689 void Blitter::blitFromBuffer(const vk::Image *dst, VkImageSubresourceLayers subresource, VkOffset3D offset, VkExtent3D extent, uint8_t *src, int bufferRowPitch, int bufferSlicePitch)
1690 {
1691 auto aspect = static_cast<VkImageAspectFlagBits>(subresource.aspectMask);
1692 auto format = dst->getFormat(aspect);
1693 State state(format.getNonQuadLayoutFormat(), format, VK_SAMPLE_COUNT_1_BIT, VK_SAMPLE_COUNT_1_BIT,
1694 Options{false, false});
1695
1696 auto blitRoutine = getBlitRoutine(state);
1697 if(!blitRoutine)
1698 {
1699 return;
1700 }
1701
1702 BlitData data =
1703 {
1704 src, // source
1705 nullptr, // dest
1706 bufferRowPitch, // sPitchB
1707 dst->rowPitchBytes(aspect, subresource.mipLevel), // dPitchB
1708 bufferSlicePitch, // sSliceB
1709 dst->slicePitchBytes(aspect, subresource.mipLevel), // dSliceB
1710
1711 static_cast<float>(-offset.x), // x0
1712 static_cast<float>(-offset.y), // y0
1713 1.0f, // w
1714 1.0f, // h
1715
1716 offset.y, // y0d
1717 static_cast<int>(offset.y + extent.height), // y1d
1718 offset.x, // x0d
1719 static_cast<int>(offset.x + extent.width), // x1d
1720
1721 static_cast<int>(extent.width), // sWidth
1722 static_cast<int>(extent.height) // sHeight;
1723 };
1724
1725 VkOffset3D dstOffset = { 0, 0, offset.z };
1726
1727 VkImageSubresourceLayers dstSubresLayers = subresource;
1728 dstSubresLayers.layerCount = 1;
1729
1730 VkImageSubresourceRange dstSubresRange =
1731 {
1732 subresource.aspectMask,
1733 subresource.mipLevel,
1734 1,
1735 subresource.baseArrayLayer,
1736 subresource.layerCount
1737 };
1738
1739 uint32_t lastLayer = dst->getLastLayerIndex(dstSubresRange);
1740
1741 for(; dstSubresLayers.baseArrayLayer <= lastLayer; dstSubresLayers.baseArrayLayer++)
1742 {
1743 dstOffset.z = offset.z;
1744
1745 for(auto i = 0u; i < extent.depth; i++)
1746 {
1747 data.dest = dst->getTexelPointer(dstOffset, dstSubresLayers);
1748 ASSERT(data.dest < dst->end());
1749 blitRoutine(&data);
1750 dstOffset.z++;
1751 data.source = (src += bufferSlicePitch);
1752 }
1753 }
1754 }
1755
1756 void Blitter::blit(const vk::Image *src, vk::Image *dst, VkImageBlit region, VkFilter filter)
1757 {
1758 if(dst->getFormat() == VK_FORMAT_UNDEFINED)
1759 {
1760 return;
1761 }
1762
1763 if((region.srcSubresource.layerCount != region.dstSubresource.layerCount) ||
1764 (region.srcSubresource.aspectMask != region.dstSubresource.aspectMask))
1765 {
1766 UNIMPLEMENTED("region");
1767 }
1768
1769 if(region.dstOffsets[0].x > region.dstOffsets[1].x)
1770 {
1771 std::swap(region.srcOffsets[0].x, region.srcOffsets[1].x);
1772 std::swap(region.dstOffsets[0].x, region.dstOffsets[1].x);
1773 }
1774
1775 if(region.dstOffsets[0].y > region.dstOffsets[1].y)
1776 {
1777 std::swap(region.srcOffsets[0].y, region.srcOffsets[1].y);
1778 std::swap(region.dstOffsets[0].y, region.dstOffsets[1].y);
1779 }
1780
1781 VkImageAspectFlagBits srcAspect = static_cast<VkImageAspectFlagBits>(region.srcSubresource.aspectMask);
1782 VkImageAspectFlagBits dstAspect = static_cast<VkImageAspectFlagBits>(region.dstSubresource.aspectMask);
1783 VkExtent3D srcExtent = src->getMipLevelExtent(srcAspect, region.srcSubresource.mipLevel);
1784
1785 int32_t numSlices = (region.srcOffsets[1].z - region.srcOffsets[0].z);
1786 ASSERT(numSlices == (region.dstOffsets[1].z - region.dstOffsets[0].z));
1787
1788 float widthRatio = static_cast<float>(region.srcOffsets[1].x - region.srcOffsets[0].x) /
1789 static_cast<float>(region.dstOffsets[1].x - region.dstOffsets[0].x);
1790 float heightRatio = static_cast<float>(region.srcOffsets[1].y - region.srcOffsets[0].y) /
1791 static_cast<float>(region.dstOffsets[1].y - region.dstOffsets[0].y);
1792 float x0 = region.srcOffsets[0].x + (0.5f - region.dstOffsets[0].x) * widthRatio;
1793 float y0 = region.srcOffsets[0].y + (0.5f - region.dstOffsets[0].y) * heightRatio;
1794
1795 auto srcFormat = src->getFormat(srcAspect);
1796 auto dstFormat = dst->getFormat(dstAspect);
1797
1798 bool doFilter = (filter != VK_FILTER_NEAREST);
1799 bool allowSRGBConversion =
1800 doFilter ||
1801 (src->getSampleCountFlagBits() > 1) ||
1802 (srcFormat.isSRGBformat() != dstFormat.isSRGBformat());
1803
1804 State state(src->getFormat(srcAspect), dst->getFormat(dstAspect), src->getSampleCountFlagBits(), dst->getSampleCountFlagBits(),
1805 Options{ doFilter, allowSRGBConversion });
1806 state.clampToEdge = (region.srcOffsets[0].x < 0) ||
1807 (region.srcOffsets[0].y < 0) ||
1808 (static_cast<uint32_t>(region.srcOffsets[1].x) > srcExtent.width) ||
1809 (static_cast<uint32_t>(region.srcOffsets[1].y) > srcExtent.height) ||
1810 (doFilter && ((x0 < 0.5f) || (y0 < 0.5f)));
1811
1812 auto blitRoutine = getBlitRoutine(state);
1813 if(!blitRoutine)
1814 {
1815 return;
1816 }
1817
1818 BlitData data =
1819 {
1820 nullptr, // source
1821 nullptr, // dest
1822 src->rowPitchBytes(srcAspect, region.srcSubresource.mipLevel), // sPitchB
1823 dst->rowPitchBytes(dstAspect, region.dstSubresource.mipLevel), // dPitchB
1824 src->slicePitchBytes(srcAspect, region.srcSubresource.mipLevel), // sSliceB
1825 dst->slicePitchBytes(dstAspect, region.dstSubresource.mipLevel), // dSliceB
1826
1827 x0,
1828 y0,
1829 widthRatio,
1830 heightRatio,
1831
1832 region.dstOffsets[0].y, // y0d
1833 region.dstOffsets[1].y, // y1d
1834 region.dstOffsets[0].x, // x0d
1835 region.dstOffsets[1].x, // x1d
1836
1837 static_cast<int>(srcExtent.width), // sWidth
1838 static_cast<int>(srcExtent.height) // sHeight;
1839 };
1840
1841 VkOffset3D srcOffset = { 0, 0, region.srcOffsets[0].z };
1842 VkOffset3D dstOffset = { 0, 0, region.dstOffsets[0].z };
1843
1844 VkImageSubresourceLayers srcSubresLayers =
1845 {
1846 region.srcSubresource.aspectMask,
1847 region.srcSubresource.mipLevel,
1848 region.srcSubresource.baseArrayLayer,
1849 1
1850 };
1851
1852 VkImageSubresourceLayers dstSubresLayers =
1853 {
1854 region.dstSubresource.aspectMask,
1855 region.dstSubresource.mipLevel,
1856 region.dstSubresource.baseArrayLayer,
1857 1
1858 };
1859
1860 VkImageSubresourceRange srcSubresRange =
1861 {
1862 region.srcSubresource.aspectMask,
1863 region.srcSubresource.mipLevel,
1864 1,
1865 region.srcSubresource.baseArrayLayer,
1866 region.srcSubresource.layerCount
1867 };
1868
1869 uint32_t lastLayer = src->getLastLayerIndex(srcSubresRange);
1870
1871 for(; srcSubresLayers.baseArrayLayer <= lastLayer; srcSubresLayers.baseArrayLayer++, dstSubresLayers.baseArrayLayer++)
1872 {
1873 srcOffset.z = region.srcOffsets[0].z;
1874 dstOffset.z = region.dstOffsets[0].z;
1875
1876 for(int i = 0; i < numSlices; i++)
1877 {
1878 data.source = src->getTexelPointer(srcOffset, srcSubresLayers);
1879 data.dest = dst->getTexelPointer(dstOffset, dstSubresLayers);
1880
1881 ASSERT(data.source < src->end());
1882 ASSERT(data.dest < dst->end());
1883
1884 blitRoutine(&data);
1885 srcOffset.z++;
1886 dstOffset.z++;
1887 }
1888 }
1889 }
1890
1891 void Blitter::computeCubeCorner(Pointer<Byte>& layer, Int& x0, Int& x1, Int& y0, Int& y1, Int& pitchB, const State& state)
1892 {
1893 int bytes = state.sourceFormat.bytes();
1894 bool quadLayout = state.sourceFormat.hasQuadLayout();
1895
1896 Float4 c = readFloat4(layer + ComputeOffset(x0, y1, pitchB, bytes, quadLayout), state) +
1897 readFloat4(layer + ComputeOffset(x1, y0, pitchB, bytes, quadLayout), state) +
1898 readFloat4(layer + ComputeOffset(x1, y1, pitchB, bytes, quadLayout), state);
1899
1900 c *= Float4(1.0f / 3.0f);
1901
1902 write(c, layer + ComputeOffset(x0, y0, pitchB, bytes, quadLayout), state);
1903 }
1904
1905 Blitter::CornerUpdateRoutineType Blitter::generateCornerUpdate(const State& state)
1906 {
1907 // Reading and writing from/to the same image
1908 ASSERT(state.sourceFormat == state.destFormat);
1909 ASSERT(state.srcSamples == state.destSamples);
1910
1911 if(state.srcSamples != 1)
1912 {
1913 UNIMPLEMENTED("state.srcSamples %d", state.srcSamples);
1914 }
1915
1916 CornerUpdateFunction function;
1917 {
1918 Pointer<Byte> blit(function.Arg<0>());
1919
1920 Pointer<Byte> layers = *Pointer<Pointer<Byte>>(blit + OFFSET(CubeBorderData, layers));
1921 Int pitchB = *Pointer<Int>(blit + OFFSET(CubeBorderData, pitchB));
1922 UInt layerSize = *Pointer<Int>(blit + OFFSET(CubeBorderData, layerSize));
1923 UInt dim = *Pointer<Int>(blit + OFFSET(CubeBorderData, dim));
1924
1925 // Low Border, Low Pixel, High Border, High Pixel
1926 Int LB(-1), LP(0), HB(dim), HP(dim-1);
1927
1928 for(int face = 0; face < 6; face++)
1929 {
1930 computeCubeCorner(layers, LB, LP, LB, LP, pitchB, state);
1931 computeCubeCorner(layers, LB, LP, HB, HP, pitchB, state);
1932 computeCubeCorner(layers, HB, HP, LB, LP, pitchB, state);
1933 computeCubeCorner(layers, HB, HP, HB, HP, pitchB, state);
1934 layers = layers + layerSize;
1935 }
1936 }
1937
1938 return function("BlitRoutine");
1939 }
1940
1941 void Blitter::updateBorders(vk::Image* image, const VkImageSubresourceLayers& subresourceLayers)
1942 {
1943 if(image->getArrayLayers() < (subresourceLayers.baseArrayLayer + 6))
1944 {
1945 UNIMPLEMENTED("image->getArrayLayers() %d, baseArrayLayer %d",
1946 image->getArrayLayers(), subresourceLayers.baseArrayLayer);
1947 }
1948
1949 // From Vulkan 1.1 spec, section 11.5. Image Views:
1950 // "For cube and cube array image views, the layers of the image view starting
1951 // at baseArrayLayer correspond to faces in the order +X, -X, +Y, -Y, +Z, -Z."
1952 VkImageSubresourceLayers posX = subresourceLayers;
1953 posX.layerCount = 1;
1954 VkImageSubresourceLayers negX = posX;
1955 negX.baseArrayLayer++;
1956 VkImageSubresourceLayers posY = negX;
1957 posY.baseArrayLayer++;
1958 VkImageSubresourceLayers negY = posY;
1959 negY.baseArrayLayer++;
1960 VkImageSubresourceLayers posZ = negY;
1961 posZ.baseArrayLayer++;
1962 VkImageSubresourceLayers negZ = posZ;
1963 negZ.baseArrayLayer++;
1964
1965 // Copy top / bottom
1966 copyCubeEdge(image, posX, BOTTOM, negY, RIGHT);
1967 copyCubeEdge(image, posY, BOTTOM, posZ, TOP);
1968 copyCubeEdge(image, posZ, BOTTOM, negY, TOP);
1969 copyCubeEdge(image, negX, BOTTOM, negY, LEFT);
1970 copyCubeEdge(image, negY, BOTTOM, negZ, BOTTOM);
1971 copyCubeEdge(image, negZ, BOTTOM, negY, BOTTOM);
1972
1973 copyCubeEdge(image, posX, TOP, posY, RIGHT);
1974 copyCubeEdge(image, posY, TOP, negZ, TOP);
1975 copyCubeEdge(image, posZ, TOP, posY, BOTTOM);
1976 copyCubeEdge(image, negX, TOP, posY, LEFT);
1977 copyCubeEdge(image, negY, TOP, posZ, BOTTOM);
1978 copyCubeEdge(image, negZ, TOP, posY, TOP);
1979
1980 // Copy left / right
1981 copyCubeEdge(image, posX, RIGHT, negZ, LEFT);
1982 copyCubeEdge(image, posY, RIGHT, posX, TOP);
1983 copyCubeEdge(image, posZ, RIGHT, posX, LEFT);
1984 copyCubeEdge(image, negX, RIGHT, posZ, LEFT);
1985 copyCubeEdge(image, negY, RIGHT, posX, BOTTOM);
1986 copyCubeEdge(image, negZ, RIGHT, negX, LEFT);
1987
1988 copyCubeEdge(image, posX, LEFT, posZ, RIGHT);
1989 copyCubeEdge(image, posY, LEFT, negX, TOP);
1990 copyCubeEdge(image, posZ, LEFT, negX, RIGHT);
1991 copyCubeEdge(image, negX, LEFT, negZ, RIGHT);
1992 copyCubeEdge(image, negY, LEFT, negX, BOTTOM);
1993 copyCubeEdge(image, negZ, LEFT, posX, RIGHT);
1994
1995 // Compute corner colors
1996 VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(subresourceLayers.aspectMask);
1997 vk::Format format = image->getFormat(aspect);
1998 VkSampleCountFlagBits samples = image->getSampleCountFlagBits();
1999 State state(format, format, samples, samples, Options{ 0xF });
2000
2001 if(samples != VK_SAMPLE_COUNT_1_BIT)
2002 {
2003 UNIMPLEMENTED("Multi-sampled cube: %d samples", static_cast<int>(samples));
2004 }
2005
2006 auto cornerUpdateRoutine = getCornerUpdateRoutine(state);
2007 if(!cornerUpdateRoutine)
2008 {
2009 return;
2010 }
2011
2012 VkExtent3D extent = image->getMipLevelExtent(aspect, subresourceLayers.mipLevel);
2013 CubeBorderData data =
2014 {
2015 image->getTexelPointer({ 0, 0, 0 }, posX),
2016 image->rowPitchBytes(aspect, subresourceLayers.mipLevel),
2017 static_cast<uint32_t>(image->getLayerSize(aspect)),
2018 extent.width
2019 };
2020 cornerUpdateRoutine(&data);
2021 }
2022
2023 void Blitter::copyCubeEdge(vk::Image* image,
2024 const VkImageSubresourceLayers& dstSubresourceLayers, Edge dstEdge,
2025 const VkImageSubresourceLayers& srcSubresourceLayers, Edge srcEdge)
2026 {
2027 ASSERT(srcSubresourceLayers.aspectMask == dstSubresourceLayers.aspectMask);
2028 ASSERT(srcSubresourceLayers.mipLevel == dstSubresourceLayers.mipLevel);
2029 ASSERT(srcSubresourceLayers.baseArrayLayer != dstSubresourceLayers.baseArrayLayer);
2030 ASSERT(srcSubresourceLayers.layerCount == 1);
2031 ASSERT(dstSubresourceLayers.layerCount == 1);
2032
2033 // Figure out if the edges to be copied in reverse order respectively from one another
2034 // The copy should be reversed whenever the same edges are contiguous or if we're
2035 // copying top <-> right or bottom <-> left. This is explained by the layout, which is:
2036 //
2037 // | +y |
2038 // | -x | +z | +x | -z |
2039 // | -y |
2040
2041 bool reverse = (srcEdge == dstEdge) ||
2042 ((srcEdge == TOP) && (dstEdge == RIGHT)) ||
2043 ((srcEdge == RIGHT) && (dstEdge == TOP)) ||
2044 ((srcEdge == BOTTOM) && (dstEdge == LEFT)) ||
2045 ((srcEdge == LEFT) && (dstEdge == BOTTOM));
2046
2047 VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(srcSubresourceLayers.aspectMask);
2048 int bytes = image->getFormat(aspect).bytes();
2049 int pitchB = image->rowPitchBytes(aspect, srcSubresourceLayers.mipLevel);
2050
2051 VkExtent3D extent = image->getMipLevelExtent(aspect, srcSubresourceLayers.mipLevel);
2052 int w = extent.width;
2053 int h = extent.height;
2054 if(w != h)
2055 {
2056 UNSUPPORTED("Cube doesn't have square faces : (%d, %d)", w, h);
2057 }
2058
2059 // Src is expressed in the regular [0, width-1], [0, height-1] space
2060 bool srcHorizontal = ((srcEdge == TOP) || (srcEdge == BOTTOM));
2061 int srcDelta = srcHorizontal ? bytes : pitchB;
2062 VkOffset3D srcOffset = { (srcEdge == RIGHT) ? (w - 1) : 0, (srcEdge == BOTTOM) ? (h - 1) : 0, 0 };
2063
2064 // Dst contains borders, so it is expressed in the [-1, width], [-1, height] space
2065 bool dstHorizontal = ((dstEdge == TOP) || (dstEdge == BOTTOM));
2066 int dstDelta = (dstHorizontal ? bytes : pitchB) * (reverse ? -1 : 1);
2067 VkOffset3D dstOffset = { (dstEdge == RIGHT) ? w : -1, (dstEdge == BOTTOM) ? h : -1, 0 };
2068
2069 // Don't write in the corners
2070 if(dstHorizontal)
2071 {
2072 dstOffset.x += reverse ? w : 1;
2073 }
2074 else
2075 {
2076 dstOffset.y += reverse ? h : 1;
2077 }
2078
2079 const uint8_t* src = static_cast<const uint8_t*>(image->getTexelPointer(srcOffset, srcSubresourceLayers));
2080 uint8_t *dst = static_cast<uint8_t*>(image->getTexelPointer(dstOffset, dstSubresourceLayers));
2081 ASSERT((src < image->end()) && ((src + (w * srcDelta)) < image->end()));
2082 ASSERT((dst < image->end()) && ((dst + (w * dstDelta)) < image->end()));
2083
2084 for(int i = 0; i < w; ++i, dst += dstDelta, src += srcDelta)
2085 {
2086 memcpy(dst, src, bytes);
2087 }
2088 }
2089}
2090