1 | // Copyright 2016 The SwiftShader Authors. All Rights Reserved. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | #include "Blitter.hpp" |
16 | |
17 | #include "Pipeline/ShaderCore.hpp" |
18 | #include "Reactor/Reactor.hpp" |
19 | #include "System/Half.hpp" |
20 | #include "System/Memory.hpp" |
21 | #include "Vulkan/VkDebug.hpp" |
22 | #include "Vulkan/VkImage.hpp" |
23 | #include "Vulkan/VkBuffer.hpp" |
24 | |
25 | #include <utility> |
26 | |
27 | namespace sw |
28 | { |
29 | Blitter::Blitter() : |
30 | blitMutex(), |
31 | blitCache(1024), |
32 | cornerUpdateMutex(), |
33 | cornerUpdateCache(64) // We only need one of these per format |
34 | { |
35 | } |
36 | |
37 | Blitter::~Blitter() |
38 | { |
39 | } |
40 | |
41 | void Blitter::clear(void *pixel, vk::Format format, vk::Image *dest, const vk::Format& viewFormat, const VkImageSubresourceRange& subresourceRange, const VkRect2D* renderArea) |
42 | { |
43 | VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(subresourceRange.aspectMask); |
44 | vk::Format dstFormat = viewFormat.getAspectFormat(aspect); |
45 | if(dstFormat == VK_FORMAT_UNDEFINED) |
46 | { |
47 | return; |
48 | } |
49 | |
50 | float *pPixel = static_cast<float *>(pixel); |
51 | if (viewFormat.isUnsignedNormalized()) |
52 | { |
53 | pPixel[0] = sw::clamp(pPixel[0], 0.0f, 1.0f); |
54 | pPixel[1] = sw::clamp(pPixel[1], 0.0f, 1.0f); |
55 | pPixel[2] = sw::clamp(pPixel[2], 0.0f, 1.0f); |
56 | pPixel[3] = sw::clamp(pPixel[3], 0.0f, 1.0f); |
57 | } |
58 | else if (viewFormat.isSignedNormalized()) |
59 | { |
60 | pPixel[0] = sw::clamp(pPixel[0], -1.0f, 1.0f); |
61 | pPixel[1] = sw::clamp(pPixel[1], -1.0f, 1.0f); |
62 | pPixel[2] = sw::clamp(pPixel[2], -1.0f, 1.0f); |
63 | pPixel[3] = sw::clamp(pPixel[3], -1.0f, 1.0f); |
64 | } |
65 | |
66 | if(fastClear(pixel, format, dest, dstFormat, subresourceRange, renderArea)) |
67 | { |
68 | return; |
69 | } |
70 | |
71 | State state(format, dstFormat, 1, dest->getSampleCountFlagBits(), Options{ 0xF }); |
72 | auto blitRoutine = getBlitRoutine(state); |
73 | if(!blitRoutine) |
74 | { |
75 | return; |
76 | } |
77 | |
78 | VkImageSubresourceLayers subresLayers = |
79 | { |
80 | subresourceRange.aspectMask, |
81 | subresourceRange.baseMipLevel, |
82 | subresourceRange.baseArrayLayer, |
83 | 1 |
84 | }; |
85 | |
86 | uint32_t lastMipLevel = dest->getLastMipLevel(subresourceRange); |
87 | uint32_t lastLayer = dest->getLastLayerIndex(subresourceRange); |
88 | |
89 | VkRect2D area = { { 0, 0 }, { 0, 0 } }; |
90 | if(renderArea) |
91 | { |
92 | ASSERT(subresourceRange.levelCount == 1); |
93 | area = *renderArea; |
94 | } |
95 | |
96 | for(; subresLayers.mipLevel <= lastMipLevel; subresLayers.mipLevel++) |
97 | { |
98 | VkExtent3D extent = dest->getMipLevelExtent(aspect, subresLayers.mipLevel); |
99 | if(!renderArea) |
100 | { |
101 | area.extent.width = extent.width; |
102 | area.extent.height = extent.height; |
103 | } |
104 | |
105 | BlitData data = |
106 | { |
107 | pixel, nullptr, // source, dest |
108 | |
109 | format.bytes(), // sPitchB |
110 | dest->rowPitchBytes(aspect, subresLayers.mipLevel), // dPitchB |
111 | 0, // sSliceB (unused in clear operations) |
112 | dest->slicePitchBytes(aspect, subresLayers.mipLevel), // dSliceB |
113 | |
114 | 0.5f, 0.5f, 0.0f, 0.0f, // x0, y0, w, h |
115 | |
116 | area.offset.y, static_cast<int>(area.offset.y + area.extent.height), // y0d, y1d |
117 | area.offset.x, static_cast<int>(area.offset.x + area.extent.width), // x0d, x1d |
118 | |
119 | 0, 0, // sWidth, sHeight |
120 | }; |
121 | |
122 | if (renderArea && dest->is3DSlice()) |
123 | { |
124 | // Reinterpret layers as depth slices |
125 | subresLayers.baseArrayLayer = 0; |
126 | subresLayers.layerCount = 1; |
127 | for (uint32_t depth = subresourceRange.baseArrayLayer; depth <= lastLayer; depth++) |
128 | { |
129 | data.dest = dest->getTexelPointer({0, 0, static_cast<int32_t>(depth)}, subresLayers); |
130 | blitRoutine(&data); |
131 | } |
132 | } |
133 | else |
134 | { |
135 | for(subresLayers.baseArrayLayer = subresourceRange.baseArrayLayer; subresLayers.baseArrayLayer <= lastLayer; subresLayers.baseArrayLayer++) |
136 | { |
137 | for(uint32_t depth = 0; depth < extent.depth; depth++) |
138 | { |
139 | data.dest = dest->getTexelPointer({ 0, 0, static_cast<int32_t>(depth) }, subresLayers); |
140 | |
141 | blitRoutine(&data); |
142 | } |
143 | } |
144 | } |
145 | } |
146 | } |
147 | |
148 | bool Blitter::fastClear(void *pixel, vk::Format format, vk::Image *dest, const vk::Format& viewFormat, const VkImageSubresourceRange& subresourceRange, const VkRect2D* renderArea) |
149 | { |
150 | if(format != VK_FORMAT_R32G32B32A32_SFLOAT) |
151 | { |
152 | return false; |
153 | } |
154 | |
155 | float *color = (float*)pixel; |
156 | float r = color[0]; |
157 | float g = color[1]; |
158 | float b = color[2]; |
159 | float a = color[3]; |
160 | |
161 | uint32_t packed; |
162 | |
163 | VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(subresourceRange.aspectMask); |
164 | switch(viewFormat) |
165 | { |
166 | case VK_FORMAT_R5G6B5_UNORM_PACK16: |
167 | packed = ((uint16_t)(31 * b + 0.5f) << 0) | |
168 | ((uint16_t)(63 * g + 0.5f) << 5) | |
169 | ((uint16_t)(31 * r + 0.5f) << 11); |
170 | break; |
171 | case VK_FORMAT_B5G6R5_UNORM_PACK16: |
172 | packed = ((uint16_t)(31 * r + 0.5f) << 0) | |
173 | ((uint16_t)(63 * g + 0.5f) << 5) | |
174 | ((uint16_t)(31 * b + 0.5f) << 11); |
175 | break; |
176 | case VK_FORMAT_A8B8G8R8_UINT_PACK32: |
177 | case VK_FORMAT_A8B8G8R8_UNORM_PACK32: |
178 | case VK_FORMAT_R8G8B8A8_UNORM: |
179 | packed = ((uint32_t)(255 * a + 0.5f) << 24) | |
180 | ((uint32_t)(255 * b + 0.5f) << 16) | |
181 | ((uint32_t)(255 * g + 0.5f) << 8) | |
182 | ((uint32_t)(255 * r + 0.5f) << 0); |
183 | break; |
184 | case VK_FORMAT_B8G8R8A8_UNORM: |
185 | packed = ((uint32_t)(255 * a + 0.5f) << 24) | |
186 | ((uint32_t)(255 * r + 0.5f) << 16) | |
187 | ((uint32_t)(255 * g + 0.5f) << 8) | |
188 | ((uint32_t)(255 * b + 0.5f) << 0); |
189 | break; |
190 | case VK_FORMAT_B10G11R11_UFLOAT_PACK32: |
191 | packed = R11G11B10F(color); |
192 | break; |
193 | case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32: |
194 | packed = RGB9E5(color); |
195 | break; |
196 | default: |
197 | return false; |
198 | } |
199 | |
200 | VkImageSubresourceLayers subresLayers = |
201 | { |
202 | subresourceRange.aspectMask, |
203 | subresourceRange.baseMipLevel, |
204 | subresourceRange.baseArrayLayer, |
205 | 1 |
206 | }; |
207 | uint32_t lastMipLevel = dest->getLastMipLevel(subresourceRange); |
208 | uint32_t lastLayer = dest->getLastLayerIndex(subresourceRange); |
209 | |
210 | VkRect2D area = { { 0, 0 }, { 0, 0 } }; |
211 | if(renderArea) |
212 | { |
213 | ASSERT(subresourceRange.levelCount == 1); |
214 | area = *renderArea; |
215 | } |
216 | |
217 | for(; subresLayers.mipLevel <= lastMipLevel; subresLayers.mipLevel++) |
218 | { |
219 | int rowPitchBytes = dest->rowPitchBytes(aspect, subresLayers.mipLevel); |
220 | int slicePitchBytes = dest->slicePitchBytes(aspect, subresLayers.mipLevel); |
221 | VkExtent3D extent = dest->getMipLevelExtent(aspect, subresLayers.mipLevel); |
222 | if(!renderArea) |
223 | { |
224 | area.extent.width = extent.width; |
225 | area.extent.height = extent.height; |
226 | } |
227 | if(dest->is3DSlice()) |
228 | { |
229 | extent.depth = 1; // The 3D image is instead interpreted as a 2D image with layers |
230 | } |
231 | |
232 | for(subresLayers.baseArrayLayer = subresourceRange.baseArrayLayer; subresLayers.baseArrayLayer <= lastLayer; subresLayers.baseArrayLayer++) |
233 | { |
234 | for(uint32_t depth = 0; depth < extent.depth; depth++) |
235 | { |
236 | uint8_t *slice = (uint8_t*)dest->getTexelPointer( |
237 | { area.offset.x, area.offset.y, static_cast<int32_t>(depth) }, subresLayers); |
238 | |
239 | for(int j = 0; j < dest->getSampleCountFlagBits(); j++) |
240 | { |
241 | uint8_t *d = slice; |
242 | |
243 | switch(viewFormat.bytes()) |
244 | { |
245 | case 2: |
246 | for(uint32_t i = 0; i < area.extent.height; i++) |
247 | { |
248 | ASSERT(d < dest->end()); |
249 | sw::clear((uint16_t*)d, static_cast<uint16_t>(packed), area.extent.width); |
250 | d += rowPitchBytes; |
251 | } |
252 | break; |
253 | case 4: |
254 | for(uint32_t i = 0; i < area.extent.height; i++) |
255 | { |
256 | ASSERT(d < dest->end()); |
257 | sw::clear((uint32_t*)d, packed, area.extent.width); |
258 | d += rowPitchBytes; |
259 | } |
260 | break; |
261 | default: |
262 | assert(false); |
263 | } |
264 | |
265 | slice += slicePitchBytes; |
266 | } |
267 | } |
268 | } |
269 | } |
270 | |
271 | return true; |
272 | } |
273 | |
274 | Float4 Blitter::readFloat4(Pointer<Byte> element, const State &state) |
275 | { |
276 | Float4 c(0.0f, 0.0f, 0.0f, 1.0f); |
277 | |
278 | switch(state.sourceFormat) |
279 | { |
280 | case VK_FORMAT_B4G4R4A4_UNORM_PACK16: |
281 | c.w = Float(Int(*Pointer<Byte>(element)) & Int(0xF)); |
282 | c.x = Float((Int(*Pointer<Byte>(element)) >> 4) & Int(0xF)); |
283 | c.y = Float(Int(*Pointer<Byte>(element + 1)) & Int(0xF)); |
284 | c.z = Float((Int(*Pointer<Byte>(element + 1)) >> 4) & Int(0xF)); |
285 | break; |
286 | case VK_FORMAT_R8_SINT: |
287 | case VK_FORMAT_R8_SNORM: |
288 | c.x = Float(Int(*Pointer<SByte>(element))); |
289 | c.w = float(0x7F); |
290 | break; |
291 | case VK_FORMAT_R8_UNORM: |
292 | case VK_FORMAT_R8_UINT: |
293 | case VK_FORMAT_R8_SRGB: |
294 | c.x = Float(Int(*Pointer<Byte>(element))); |
295 | c.w = float(0xFF); |
296 | break; |
297 | case VK_FORMAT_R16_SINT: |
298 | case VK_FORMAT_R16_SNORM: |
299 | c.x = Float(Int(*Pointer<Short>(element))); |
300 | c.w = float(0x7FFF); |
301 | break; |
302 | case VK_FORMAT_R16_UNORM: |
303 | case VK_FORMAT_R16_UINT: |
304 | c.x = Float(Int(*Pointer<UShort>(element))); |
305 | c.w = float(0xFFFF); |
306 | break; |
307 | case VK_FORMAT_R32_SINT: |
308 | c.x = Float(*Pointer<Int>(element)); |
309 | c.w = float(0x7FFFFFFF); |
310 | break; |
311 | case VK_FORMAT_R32_UINT: |
312 | c.x = Float(*Pointer<UInt>(element)); |
313 | c.w = float(0xFFFFFFFF); |
314 | break; |
315 | case VK_FORMAT_B8G8R8A8_SRGB: |
316 | case VK_FORMAT_B8G8R8A8_UNORM: |
317 | c = Float4(*Pointer<Byte4>(element)).zyxw; |
318 | break; |
319 | case VK_FORMAT_A8B8G8R8_SINT_PACK32: |
320 | case VK_FORMAT_R8G8B8A8_SINT: |
321 | case VK_FORMAT_A8B8G8R8_SNORM_PACK32: |
322 | case VK_FORMAT_R8G8B8A8_SNORM: |
323 | c = Float4(*Pointer<SByte4>(element)); |
324 | break; |
325 | case VK_FORMAT_A8B8G8R8_UINT_PACK32: |
326 | case VK_FORMAT_A8B8G8R8_UNORM_PACK32: |
327 | case VK_FORMAT_R8G8B8A8_UNORM: |
328 | case VK_FORMAT_R8G8B8A8_UINT: |
329 | case VK_FORMAT_A8B8G8R8_SRGB_PACK32: |
330 | case VK_FORMAT_R8G8B8A8_SRGB: |
331 | c = Float4(*Pointer<Byte4>(element)); |
332 | break; |
333 | case VK_FORMAT_R16G16B16A16_SINT: |
334 | c = Float4(*Pointer<Short4>(element)); |
335 | break; |
336 | case VK_FORMAT_R16G16B16A16_UNORM: |
337 | case VK_FORMAT_R16G16B16A16_UINT: |
338 | c = Float4(*Pointer<UShort4>(element)); |
339 | break; |
340 | case VK_FORMAT_R32G32B32A32_SINT: |
341 | c = Float4(*Pointer<Int4>(element)); |
342 | break; |
343 | case VK_FORMAT_R32G32B32A32_UINT: |
344 | c = Float4(*Pointer<UInt4>(element)); |
345 | break; |
346 | case VK_FORMAT_R8G8_SINT: |
347 | case VK_FORMAT_R8G8_SNORM: |
348 | c.x = Float(Int(*Pointer<SByte>(element + 0))); |
349 | c.y = Float(Int(*Pointer<SByte>(element + 1))); |
350 | c.w = float(0x7F); |
351 | break; |
352 | case VK_FORMAT_R8G8_UNORM: |
353 | case VK_FORMAT_R8G8_UINT: |
354 | case VK_FORMAT_R8G8_SRGB: |
355 | c.x = Float(Int(*Pointer<Byte>(element + 0))); |
356 | c.y = Float(Int(*Pointer<Byte>(element + 1))); |
357 | c.w = float(0xFF); |
358 | break; |
359 | case VK_FORMAT_R16G16_SINT: |
360 | case VK_FORMAT_R16G16_SNORM: |
361 | c.x = Float(Int(*Pointer<Short>(element + 0))); |
362 | c.y = Float(Int(*Pointer<Short>(element + 2))); |
363 | c.w = float(0x7FFF); |
364 | break; |
365 | case VK_FORMAT_R16G16_UNORM: |
366 | case VK_FORMAT_R16G16_UINT: |
367 | c.x = Float(Int(*Pointer<UShort>(element + 0))); |
368 | c.y = Float(Int(*Pointer<UShort>(element + 2))); |
369 | c.w = float(0xFFFF); |
370 | break; |
371 | case VK_FORMAT_R32G32_SINT: |
372 | c.x = Float(*Pointer<Int>(element + 0)); |
373 | c.y = Float(*Pointer<Int>(element + 4)); |
374 | c.w = float(0x7FFFFFFF); |
375 | break; |
376 | case VK_FORMAT_R32G32_UINT: |
377 | c.x = Float(*Pointer<UInt>(element + 0)); |
378 | c.y = Float(*Pointer<UInt>(element + 4)); |
379 | c.w = float(0xFFFFFFFF); |
380 | break; |
381 | case VK_FORMAT_R32G32B32A32_SFLOAT: |
382 | c = *Pointer<Float4>(element); |
383 | break; |
384 | case VK_FORMAT_R32G32_SFLOAT: |
385 | c.x = *Pointer<Float>(element + 0); |
386 | c.y = *Pointer<Float>(element + 4); |
387 | break; |
388 | case VK_FORMAT_R32_SFLOAT: |
389 | c.x = *Pointer<Float>(element); |
390 | break; |
391 | case VK_FORMAT_R16G16B16A16_SFLOAT: |
392 | c.w = Float(*Pointer<Half>(element + 6)); |
393 | case VK_FORMAT_R16G16B16_SFLOAT: |
394 | c.z = Float(*Pointer<Half>(element + 4)); |
395 | case VK_FORMAT_R16G16_SFLOAT: |
396 | c.y = Float(*Pointer<Half>(element + 2)); |
397 | case VK_FORMAT_R16_SFLOAT: |
398 | c.x = Float(*Pointer<Half>(element)); |
399 | break; |
400 | case VK_FORMAT_B10G11R11_UFLOAT_PACK32: |
401 | // 10 (or 11) bit float formats are unsigned formats with a 5 bit exponent and a 5 (or 6) bit mantissa. |
402 | // Since the Half float format also has a 5 bit exponent, we can convert these formats to half by |
403 | // copy/pasting the bits so the the exponent bits and top mantissa bits are aligned to the half format. |
404 | // In this case, we have: |
405 | // B B B B B B B B B B G G G G G G G G G G G R R R R R R R R R R R |
406 | // 1st Short: |xxxxxxxxxx---------------------| |
407 | // 2nd Short: |xxxx---------------------xxxxxx| |
408 | // 3rd Short: |--------------------xxxxxxxxxxxx| |
409 | // These memory reads overlap, but each of them contains an entire channel, so we can read this without |
410 | // any int -> short conversion. |
411 | c.x = Float(As<Half>((*Pointer<UShort>(element + 0) & UShort(0x07FF)) << UShort(4))); |
412 | c.y = Float(As<Half>((*Pointer<UShort>(element + 1) & UShort(0x3FF8)) << UShort(1))); |
413 | c.z = Float(As<Half>((*Pointer<UShort>(element + 2) & UShort(0xFFC0)) >> UShort(1))); |
414 | break; |
415 | case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32: |
416 | // This type contains a common 5 bit exponent (E) and a 9 bit the mantissa for R, G and B. |
417 | c.x = Float(*Pointer<UInt>(element) & UInt(0x000001FF)); // R's mantissa (bits 0-8) |
418 | c.y = Float((*Pointer<UInt>(element) & UInt(0x0003FE00)) >> 9); // G's mantissa (bits 9-17) |
419 | c.z = Float((*Pointer<UInt>(element) & UInt(0x07FC0000)) >> 18); // B's mantissa (bits 18-26) |
420 | c *= Float4( |
421 | // 2^E, using the exponent (bits 27-31) and treating it as an unsigned integer value |
422 | Float(UInt(1) << ((*Pointer<UInt>(element) & UInt(0xF8000000)) >> 27)) * |
423 | // Since the 9 bit mantissa values currently stored in RGB were converted straight |
424 | // from int to float (in the [0, 1<<9] range instead of the [0, 1] range), they |
425 | // are (1 << 9) times too high. |
426 | // Also, the exponent has 5 bits and we compute the exponent bias of floating point |
427 | // formats using "2^(k-1) - 1", so, in this case, the exponent bias is 2^(5-1)-1 = 15 |
428 | // Exponent bias (15) + number of mantissa bits per component (9) = 24 |
429 | Float(1.0f / (1 << 24))); |
430 | c.w = 1.0f; |
431 | break; |
432 | case VK_FORMAT_R5G6B5_UNORM_PACK16: |
433 | c.x = Float(Int((*Pointer<UShort>(element) & UShort(0xF800)) >> UShort(11))); |
434 | c.y = Float(Int((*Pointer<UShort>(element) & UShort(0x07E0)) >> UShort(5))); |
435 | c.z = Float(Int(*Pointer<UShort>(element) & UShort(0x001F))); |
436 | break; |
437 | case VK_FORMAT_A1R5G5B5_UNORM_PACK16: |
438 | c.w = Float(Int((*Pointer<UShort>(element) & UShort(0x8000)) >> UShort(15))); |
439 | c.x = Float(Int((*Pointer<UShort>(element) & UShort(0x7C00)) >> UShort(10))); |
440 | c.y = Float(Int((*Pointer<UShort>(element) & UShort(0x03E0)) >> UShort(5))); |
441 | c.z = Float(Int(*Pointer<UShort>(element) & UShort(0x001F))); |
442 | break; |
443 | case VK_FORMAT_A2B10G10R10_UNORM_PACK32: |
444 | case VK_FORMAT_A2B10G10R10_UINT_PACK32: |
445 | c.x = Float(Int((*Pointer<UInt>(element) & UInt(0x000003FF)))); |
446 | c.y = Float(Int((*Pointer<UInt>(element) & UInt(0x000FFC00)) >> 10)); |
447 | c.z = Float(Int((*Pointer<UInt>(element) & UInt(0x3FF00000)) >> 20)); |
448 | c.w = Float(Int((*Pointer<UInt>(element) & UInt(0xC0000000)) >> 30)); |
449 | break; |
450 | case VK_FORMAT_D16_UNORM: |
451 | c.x = Float(Int((*Pointer<UShort>(element)))); |
452 | break; |
453 | case VK_FORMAT_X8_D24_UNORM_PACK32: |
454 | c.x = Float(Int((*Pointer<UInt>(element) & UInt(0xFFFFFF00)) >> 8)); |
455 | break; |
456 | case VK_FORMAT_D32_SFLOAT: |
457 | c.x = *Pointer<Float>(element); |
458 | break; |
459 | case VK_FORMAT_S8_UINT: |
460 | c.x = Float(Int(*Pointer<Byte>(element))); |
461 | break; |
462 | default: |
463 | UNSUPPORTED("Blitter source format %d" , (int)state.sourceFormat); |
464 | } |
465 | |
466 | return c; |
467 | } |
468 | |
469 | void Blitter::write(Float4 &c, Pointer<Byte> element, const State &state) |
470 | { |
471 | bool writeR = state.writeRed; |
472 | bool writeG = state.writeGreen; |
473 | bool writeB = state.writeBlue; |
474 | bool writeA = state.writeAlpha; |
475 | bool writeRGBA = writeR && writeG && writeB && writeA; |
476 | |
477 | switch(state.destFormat) |
478 | { |
479 | case VK_FORMAT_R4G4_UNORM_PACK8: |
480 | if(writeR | writeG) |
481 | { |
482 | if(!writeR) |
483 | { |
484 | *Pointer<Byte>(element) = (Byte(RoundInt(Float(c.y))) & Byte(0xF)) | |
485 | (*Pointer<Byte>(element) & Byte(0xF0)); |
486 | } |
487 | else if(!writeG) |
488 | { |
489 | *Pointer<Byte>(element) = (*Pointer<Byte>(element) & Byte(0xF)) | |
490 | (Byte(RoundInt(Float(c.x))) << Byte(4)); |
491 | } |
492 | else |
493 | { |
494 | *Pointer<Byte>(element) = (Byte(RoundInt(Float(c.y))) & Byte(0xF)) | |
495 | (Byte(RoundInt(Float(c.x))) << Byte(4)); |
496 | } |
497 | } |
498 | break; |
499 | case VK_FORMAT_R4G4B4A4_UNORM_PACK16: |
500 | if(writeR || writeG || writeB || writeA) |
501 | { |
502 | *Pointer<UShort>(element) = (writeR ? ((UShort(RoundInt(Float(c.x))) & UShort(0xF)) << UShort(12)) : |
503 | (*Pointer<UShort>(element) & UShort(0x000F))) | |
504 | (writeG ? ((UShort(RoundInt(Float(c.y))) & UShort(0xF)) << UShort(8)) : |
505 | (*Pointer<UShort>(element) & UShort(0x00F0))) | |
506 | (writeB ? ((UShort(RoundInt(Float(c.z))) & UShort(0xF)) << UShort(4)) : |
507 | (*Pointer<UShort>(element) & UShort(0x0F00))) | |
508 | (writeA ? (UShort(RoundInt(Float(c.w))) & UShort(0xF)) : |
509 | (*Pointer<UShort>(element) & UShort(0xF000))); |
510 | } |
511 | break; |
512 | case VK_FORMAT_B4G4R4A4_UNORM_PACK16: |
513 | if(writeRGBA) |
514 | { |
515 | *Pointer<UShort>(element) = UShort(RoundInt(Float(c.w)) & Int(0xF)) | |
516 | UShort((RoundInt(Float(c.x)) & Int(0xF)) << 4) | |
517 | UShort((RoundInt(Float(c.y)) & Int(0xF)) << 8) | |
518 | UShort((RoundInt(Float(c.z)) & Int(0xF)) << 12); |
519 | } |
520 | else |
521 | { |
522 | unsigned short mask = (writeA ? 0x000F : 0x0000) | |
523 | (writeR ? 0x00F0 : 0x0000) | |
524 | (writeG ? 0x0F00 : 0x0000) | |
525 | (writeB ? 0xF000 : 0x0000); |
526 | unsigned short unmask = ~mask; |
527 | *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) | |
528 | ((UShort(RoundInt(Float(c.w)) & Int(0xF)) | |
529 | UShort((RoundInt(Float(c.x)) & Int(0xF)) << 4) | |
530 | UShort((RoundInt(Float(c.y)) & Int(0xF)) << 8) | |
531 | UShort((RoundInt(Float(c.z)) & Int(0xF)) << 12)) & UShort(mask)); |
532 | } |
533 | break; |
534 | case VK_FORMAT_B8G8R8A8_SRGB: |
535 | case VK_FORMAT_B8G8R8A8_UNORM: |
536 | if(writeRGBA) |
537 | { |
538 | Short4 c0 = RoundShort4(c.zyxw); |
539 | *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0)); |
540 | } |
541 | else |
542 | { |
543 | if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); } |
544 | if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); } |
545 | if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); } |
546 | if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); } |
547 | } |
548 | break; |
549 | case VK_FORMAT_B8G8R8_SNORM: |
550 | if(writeB) { *Pointer<SByte>(element + 0) = SByte(RoundInt(Float(c.z))); } |
551 | if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); } |
552 | if(writeR) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.x))); } |
553 | break; |
554 | case VK_FORMAT_B8G8R8_UNORM: |
555 | case VK_FORMAT_B8G8R8_SRGB: |
556 | if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); } |
557 | if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); } |
558 | if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); } |
559 | break; |
560 | case VK_FORMAT_A8B8G8R8_UNORM_PACK32: |
561 | case VK_FORMAT_R8G8B8A8_UNORM: |
562 | case VK_FORMAT_A8B8G8R8_SRGB_PACK32: |
563 | case VK_FORMAT_R8G8B8A8_SRGB: |
564 | case VK_FORMAT_A8B8G8R8_UINT_PACK32: |
565 | case VK_FORMAT_R8G8B8A8_UINT: |
566 | case VK_FORMAT_R8G8B8A8_USCALED: |
567 | case VK_FORMAT_A8B8G8R8_USCALED_PACK32: |
568 | if(writeRGBA) |
569 | { |
570 | Short4 c0 = RoundShort4(c); |
571 | *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0)); |
572 | } |
573 | else |
574 | { |
575 | if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); } |
576 | if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); } |
577 | if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); } |
578 | if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); } |
579 | } |
580 | break; |
581 | case VK_FORMAT_R32G32B32A32_SFLOAT: |
582 | if(writeRGBA) |
583 | { |
584 | *Pointer<Float4>(element) = c; |
585 | } |
586 | else |
587 | { |
588 | if(writeR) { *Pointer<Float>(element) = c.x; } |
589 | if(writeG) { *Pointer<Float>(element + 4) = c.y; } |
590 | if(writeB) { *Pointer<Float>(element + 8) = c.z; } |
591 | if(writeA) { *Pointer<Float>(element + 12) = c.w; } |
592 | } |
593 | break; |
594 | case VK_FORMAT_R32G32B32_SFLOAT: |
595 | if(writeR) { *Pointer<Float>(element) = c.x; } |
596 | if(writeG) { *Pointer<Float>(element + 4) = c.y; } |
597 | if(writeB) { *Pointer<Float>(element + 8) = c.z; } |
598 | break; |
599 | case VK_FORMAT_R32G32_SFLOAT: |
600 | if(writeR && writeG) |
601 | { |
602 | *Pointer<Float2>(element) = Float2(c); |
603 | } |
604 | else |
605 | { |
606 | if(writeR) { *Pointer<Float>(element) = c.x; } |
607 | if(writeG) { *Pointer<Float>(element + 4) = c.y; } |
608 | } |
609 | break; |
610 | case VK_FORMAT_R32_SFLOAT: |
611 | if(writeR) { *Pointer<Float>(element) = c.x; } |
612 | break; |
613 | case VK_FORMAT_R16G16B16A16_SFLOAT: |
614 | if(writeA) { *Pointer<Half>(element + 6) = Half(c.w); } |
615 | case VK_FORMAT_R16G16B16_SFLOAT: |
616 | if(writeB) { *Pointer<Half>(element + 4) = Half(c.z); } |
617 | case VK_FORMAT_R16G16_SFLOAT: |
618 | if(writeG) { *Pointer<Half>(element + 2) = Half(c.y); } |
619 | case VK_FORMAT_R16_SFLOAT: |
620 | if(writeR) { *Pointer<Half>(element) = Half(c.x); } |
621 | break; |
622 | case VK_FORMAT_B10G11R11_UFLOAT_PACK32: |
623 | { |
624 | // 10 (or 11) bit float formats are unsigned formats with a 5 bit exponent and a 5 (or 6) bit mantissa. |
625 | // Since the 16-bit half-precision float format also has a 5 bit exponent, we can extract these minifloats from them. |
626 | |
627 | // FIXME(b/138944025): Handle negative values, Inf, and NaN. |
628 | // FIXME(b/138944025): Perform rounding before truncating the mantissa. |
629 | UInt r = (UInt(As<UShort>(Half(c.x))) & 0x00007FF0) >> 4; |
630 | UInt g = (UInt(As<UShort>(Half(c.y))) & 0x00007FF0) << 7; |
631 | UInt b = (UInt(As<UShort>(Half(c.z))) & 0x00007FE0) << 17; |
632 | |
633 | UInt rgb = r | g | b; |
634 | |
635 | UInt old = *Pointer<UInt>(element); |
636 | |
637 | unsigned int mask = (writeR ? 0x000007FF : 0) | |
638 | (writeG ? 0x003FF800 : 0) | |
639 | (writeB ? 0xFFC00000 : 0); |
640 | |
641 | *Pointer<UInt>(element) = (rgb & mask) | (old & ~mask); |
642 | } |
643 | break; |
644 | case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32: |
645 | { |
646 | ASSERT(writeRGBA); // Can't sensibly write just part of this format. |
647 | |
648 | // Vulkan 1.1.117 section 15.2.1 RGB to Shared Exponent Conversion |
649 | |
650 | constexpr int N = 9; // number of mantissa bits per component |
651 | constexpr int B = 15; // exponent bias |
652 | constexpr int E_max = 31; // maximum possible biased exponent value |
653 | |
654 | // Maximum representable value. |
655 | constexpr float sharedexp_max = ((static_cast<float>(1 << N) - 1) / static_cast<float>(1 << N)) * static_cast<float>(1 << (E_max - B)); |
656 | |
657 | // Clamp components to valid range. NaN becomes 0. |
658 | Float red_c = Min(IfThenElse(!(c.x > 0), Float(0), Float(c.x)), sharedexp_max); |
659 | Float green_c = Min(IfThenElse(!(c.y > 0), Float(0), Float(c.y)), sharedexp_max); |
660 | Float blue_c = Min(IfThenElse(!(c.z > 0), Float(0), Float(c.z)), sharedexp_max); |
661 | |
662 | // We're reducing the mantissa to 9 bits, so we must round up if the next |
663 | // bit is 1. In other words add 0.5 to the new mantissa's position and |
664 | // allow overflow into the exponent so we can scale correctly. |
665 | constexpr int half = 1 << (23 - N); |
666 | Float red_r = As<Float>(As<Int>(red_c) + half); |
667 | Float green_r = As<Float>(As<Int>(green_c) + half); |
668 | Float blue_r = As<Float>(As<Int>(blue_c) + half); |
669 | |
670 | // The largest component determines the shared exponent. It can't be lower |
671 | // than 0 (after bias subtraction) so also limit to the mimimum representable. |
672 | constexpr float min_s = 0.5f / (1 << B); |
673 | Float max_s = Max(Max(red_r, green_r), Max(blue_r, min_s)); |
674 | |
675 | // Obtain the reciprocal of the shared exponent by inverting the bits, |
676 | // and scale by the new mantissa's size. Note that the IEEE-754 single-precision |
677 | // format has an implicit leading 1, but this shared component format does not. |
678 | Float scale = As<Float>((As<Int>(max_s) & 0x7F800000) ^ 0x7F800000) * (1 << (N - 2)); |
679 | |
680 | UInt R9 = RoundInt(red_c * scale); |
681 | UInt G9 = UInt(RoundInt(green_c * scale)); |
682 | UInt B9 = UInt(RoundInt(blue_c * scale)); |
683 | UInt E5 = (As<UInt>(max_s) >> 23) - 127 + 15 + 1; |
684 | |
685 | UInt E5B9G9R9 = (E5 << 27) | (B9 << 18) | (G9 << 9) | R9; |
686 | |
687 | *Pointer<UInt>(element) = E5B9G9R9; |
688 | } |
689 | break; |
690 | case VK_FORMAT_B8G8R8A8_SNORM: |
691 | if(writeB) { *Pointer<SByte>(element) = SByte(RoundInt(Float(c.z))); } |
692 | if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); } |
693 | if(writeR) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.x))); } |
694 | if(writeA) { *Pointer<SByte>(element + 3) = SByte(RoundInt(Float(c.w))); } |
695 | break; |
696 | case VK_FORMAT_A8B8G8R8_SINT_PACK32: |
697 | case VK_FORMAT_R8G8B8A8_SINT: |
698 | case VK_FORMAT_A8B8G8R8_SNORM_PACK32: |
699 | case VK_FORMAT_R8G8B8A8_SNORM: |
700 | case VK_FORMAT_R8G8B8A8_SSCALED: |
701 | case VK_FORMAT_A8B8G8R8_SSCALED_PACK32: |
702 | if(writeA) { *Pointer<SByte>(element + 3) = SByte(RoundInt(Float(c.w))); } |
703 | case VK_FORMAT_R8G8B8_SINT: |
704 | case VK_FORMAT_R8G8B8_SNORM: |
705 | case VK_FORMAT_R8G8B8_SSCALED: |
706 | if(writeB) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.z))); } |
707 | case VK_FORMAT_R8G8_SINT: |
708 | case VK_FORMAT_R8G8_SNORM: |
709 | case VK_FORMAT_R8G8_SSCALED: |
710 | if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); } |
711 | case VK_FORMAT_R8_SINT: |
712 | case VK_FORMAT_R8_SNORM: |
713 | case VK_FORMAT_R8_SSCALED: |
714 | if(writeR) { *Pointer<SByte>(element) = SByte(RoundInt(Float(c.x))); } |
715 | break; |
716 | case VK_FORMAT_R8G8B8_UINT: |
717 | case VK_FORMAT_R8G8B8_UNORM: |
718 | case VK_FORMAT_R8G8B8_USCALED: |
719 | case VK_FORMAT_R8G8B8_SRGB: |
720 | if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); } |
721 | case VK_FORMAT_R8G8_UINT: |
722 | case VK_FORMAT_R8G8_UNORM: |
723 | case VK_FORMAT_R8G8_USCALED: |
724 | case VK_FORMAT_R8G8_SRGB: |
725 | if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); } |
726 | case VK_FORMAT_R8_UINT: |
727 | case VK_FORMAT_R8_UNORM: |
728 | case VK_FORMAT_R8_USCALED: |
729 | case VK_FORMAT_R8_SRGB: |
730 | if(writeR) { *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x))); } |
731 | break; |
732 | case VK_FORMAT_R16G16B16A16_SINT: |
733 | case VK_FORMAT_R16G16B16A16_SNORM: |
734 | case VK_FORMAT_R16G16B16A16_SSCALED: |
735 | if(writeRGBA) |
736 | { |
737 | *Pointer<Short4>(element) = Short4(RoundInt(c)); |
738 | } |
739 | else |
740 | { |
741 | if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); } |
742 | if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); } |
743 | if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); } |
744 | if(writeA) { *Pointer<Short>(element + 6) = Short(RoundInt(Float(c.w))); } |
745 | } |
746 | break; |
747 | case VK_FORMAT_R16G16B16_SINT: |
748 | case VK_FORMAT_R16G16B16_SNORM: |
749 | case VK_FORMAT_R16G16B16_SSCALED: |
750 | if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); } |
751 | if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); } |
752 | if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); } |
753 | break; |
754 | case VK_FORMAT_R16G16_SINT: |
755 | case VK_FORMAT_R16G16_SNORM: |
756 | case VK_FORMAT_R16G16_SSCALED: |
757 | if(writeR && writeG) |
758 | { |
759 | *Pointer<Short2>(element) = Short2(Short4(RoundInt(c))); |
760 | } |
761 | else |
762 | { |
763 | if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); } |
764 | if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); } |
765 | } |
766 | break; |
767 | case VK_FORMAT_R16_SINT: |
768 | case VK_FORMAT_R16_SNORM: |
769 | case VK_FORMAT_R16_SSCALED: |
770 | if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); } |
771 | break; |
772 | case VK_FORMAT_R16G16B16A16_UINT: |
773 | case VK_FORMAT_R16G16B16A16_UNORM: |
774 | case VK_FORMAT_R16G16B16A16_USCALED: |
775 | if(writeRGBA) |
776 | { |
777 | *Pointer<UShort4>(element) = UShort4(RoundInt(c)); |
778 | } |
779 | else |
780 | { |
781 | if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); } |
782 | if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); } |
783 | if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); } |
784 | if(writeA) { *Pointer<UShort>(element + 6) = UShort(RoundInt(Float(c.w))); } |
785 | } |
786 | break; |
787 | case VK_FORMAT_R16G16B16_UINT: |
788 | case VK_FORMAT_R16G16B16_UNORM: |
789 | case VK_FORMAT_R16G16B16_USCALED: |
790 | if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); } |
791 | if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); } |
792 | if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); } |
793 | break; |
794 | case VK_FORMAT_R16G16_UINT: |
795 | case VK_FORMAT_R16G16_UNORM: |
796 | case VK_FORMAT_R16G16_USCALED: |
797 | if(writeR && writeG) |
798 | { |
799 | *Pointer<UShort2>(element) = UShort2(UShort4(RoundInt(c))); |
800 | } |
801 | else |
802 | { |
803 | if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); } |
804 | if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); } |
805 | } |
806 | break; |
807 | case VK_FORMAT_R16_UINT: |
808 | case VK_FORMAT_R16_UNORM: |
809 | case VK_FORMAT_R16_USCALED: |
810 | if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); } |
811 | break; |
812 | case VK_FORMAT_R32G32B32A32_SINT: |
813 | if(writeRGBA) |
814 | { |
815 | *Pointer<Int4>(element) = RoundInt(c); |
816 | } |
817 | else |
818 | { |
819 | if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); } |
820 | if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); } |
821 | if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); } |
822 | if(writeA) { *Pointer<Int>(element + 12) = RoundInt(Float(c.w)); } |
823 | } |
824 | break; |
825 | case VK_FORMAT_R32G32B32_SINT: |
826 | if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); } |
827 | case VK_FORMAT_R32G32_SINT: |
828 | if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); } |
829 | case VK_FORMAT_R32_SINT: |
830 | if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); } |
831 | break; |
832 | case VK_FORMAT_R32G32B32A32_UINT: |
833 | if(writeRGBA) |
834 | { |
835 | *Pointer<UInt4>(element) = UInt4(RoundInt(c)); |
836 | } |
837 | else |
838 | { |
839 | if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); } |
840 | if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); } |
841 | if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); } |
842 | if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(RoundInt(Float(c.w))); } |
843 | } |
844 | break; |
845 | case VK_FORMAT_R32G32B32_UINT: |
846 | if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); } |
847 | case VK_FORMAT_R32G32_UINT: |
848 | if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); } |
849 | case VK_FORMAT_R32_UINT: |
850 | if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); } |
851 | break; |
852 | case VK_FORMAT_R5G6B5_UNORM_PACK16: |
853 | if(writeR && writeG && writeB) |
854 | { |
855 | *Pointer<UShort>(element) = UShort(RoundInt(Float(c.z)) | |
856 | (RoundInt(Float(c.y)) << Int(5)) | |
857 | (RoundInt(Float(c.x)) << Int(11))); |
858 | } |
859 | else |
860 | { |
861 | unsigned short mask = (writeB ? 0x001F : 0x0000) | (writeG ? 0x07E0 : 0x0000) | (writeR ? 0xF800 : 0x0000); |
862 | unsigned short unmask = ~mask; |
863 | *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) | |
864 | (UShort(RoundInt(Float(c.z)) | |
865 | (RoundInt(Float(c.y)) << Int(5)) | |
866 | (RoundInt(Float(c.x)) << Int(11))) & UShort(mask)); |
867 | } |
868 | break; |
869 | case VK_FORMAT_R5G5B5A1_UNORM_PACK16: |
870 | if(writeRGBA) |
871 | { |
872 | *Pointer<UShort>(element) = UShort(RoundInt(Float(c.w)) | |
873 | (RoundInt(Float(c.z)) << Int(1)) | |
874 | (RoundInt(Float(c.y)) << Int(6)) | |
875 | (RoundInt(Float(c.x)) << Int(11))); |
876 | } |
877 | else |
878 | { |
879 | unsigned short mask = (writeA ? 0x8000 : 0x0000) | |
880 | (writeR ? 0x7C00 : 0x0000) | |
881 | (writeG ? 0x03E0 : 0x0000) | |
882 | (writeB ? 0x001F : 0x0000); |
883 | unsigned short unmask = ~mask; |
884 | *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) | |
885 | (UShort(RoundInt(Float(c.w)) | |
886 | (RoundInt(Float(c.z)) << Int(1)) | |
887 | (RoundInt(Float(c.y)) << Int(6)) | |
888 | (RoundInt(Float(c.x)) << Int(11))) & UShort(mask)); |
889 | } |
890 | break; |
891 | case VK_FORMAT_B5G5R5A1_UNORM_PACK16: |
892 | if(writeRGBA) |
893 | { |
894 | *Pointer<UShort>(element) = UShort(RoundInt(Float(c.w)) | |
895 | (RoundInt(Float(c.x)) << Int(1)) | |
896 | (RoundInt(Float(c.y)) << Int(6)) | |
897 | (RoundInt(Float(c.z)) << Int(11))); |
898 | } |
899 | else |
900 | { |
901 | unsigned short mask = (writeA ? 0x8000 : 0x0000) | |
902 | (writeR ? 0x7C00 : 0x0000) | |
903 | (writeG ? 0x03E0 : 0x0000) | |
904 | (writeB ? 0x001F : 0x0000); |
905 | unsigned short unmask = ~mask; |
906 | *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) | |
907 | (UShort(RoundInt(Float(c.w)) | |
908 | (RoundInt(Float(c.x)) << Int(1)) | |
909 | (RoundInt(Float(c.y)) << Int(6)) | |
910 | (RoundInt(Float(c.z)) << Int(11))) & UShort(mask)); |
911 | } |
912 | break; |
913 | case VK_FORMAT_A1R5G5B5_UNORM_PACK16: |
914 | if(writeRGBA) |
915 | { |
916 | *Pointer<UShort>(element) = UShort(RoundInt(Float(c.z)) | |
917 | (RoundInt(Float(c.y)) << Int(5)) | |
918 | (RoundInt(Float(c.x)) << Int(10)) | |
919 | (RoundInt(Float(c.w)) << Int(15))); |
920 | } |
921 | else |
922 | { |
923 | unsigned short mask = (writeA ? 0x8000 : 0x0000) | |
924 | (writeR ? 0x7C00 : 0x0000) | |
925 | (writeG ? 0x03E0 : 0x0000) | |
926 | (writeB ? 0x001F : 0x0000); |
927 | unsigned short unmask = ~mask; |
928 | *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) | |
929 | (UShort(RoundInt(Float(c.z)) | |
930 | (RoundInt(Float(c.y)) << Int(5)) | |
931 | (RoundInt(Float(c.x)) << Int(10)) | |
932 | (RoundInt(Float(c.w)) << Int(15))) & UShort(mask)); |
933 | } |
934 | break; |
935 | case VK_FORMAT_A2B10G10R10_UNORM_PACK32: |
936 | case VK_FORMAT_A2B10G10R10_UINT_PACK32: |
937 | case VK_FORMAT_A2B10G10R10_SNORM_PACK32: |
938 | if(writeRGBA) |
939 | { |
940 | *Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)) | |
941 | (RoundInt(Float(c.y)) << 10) | |
942 | (RoundInt(Float(c.z)) << 20) | |
943 | (RoundInt(Float(c.w)) << 30)); |
944 | } |
945 | else |
946 | { |
947 | unsigned int mask = (writeA ? 0xC0000000 : 0x0000) | |
948 | (writeB ? 0x3FF00000 : 0x0000) | |
949 | (writeG ? 0x000FFC00 : 0x0000) | |
950 | (writeR ? 0x000003FF : 0x0000); |
951 | unsigned int unmask = ~mask; |
952 | *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) | |
953 | (UInt(RoundInt(Float(c.x)) | |
954 | (RoundInt(Float(c.y)) << 10) | |
955 | (RoundInt(Float(c.z)) << 20) | |
956 | (RoundInt(Float(c.w)) << 30)) & UInt(mask)); |
957 | } |
958 | break; |
959 | case VK_FORMAT_A2R10G10B10_UNORM_PACK32: |
960 | case VK_FORMAT_A2R10G10B10_UINT_PACK32: |
961 | case VK_FORMAT_A2R10G10B10_SNORM_PACK32: |
962 | if(writeRGBA) |
963 | { |
964 | *Pointer<UInt>(element) = UInt(RoundInt(Float(c.z)) | |
965 | (RoundInt(Float(c.y)) << 10) | |
966 | (RoundInt(Float(c.x)) << 20) | |
967 | (RoundInt(Float(c.w)) << 30)); |
968 | } |
969 | else |
970 | { |
971 | unsigned int mask = (writeA ? 0xC0000000 : 0x0000) | |
972 | (writeR ? 0x3FF00000 : 0x0000) | |
973 | (writeG ? 0x000FFC00 : 0x0000) | |
974 | (writeB ? 0x000003FF : 0x0000); |
975 | unsigned int unmask = ~mask; |
976 | *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) | |
977 | (UInt(RoundInt(Float(c.z)) | |
978 | (RoundInt(Float(c.y)) << 10) | |
979 | (RoundInt(Float(c.x)) << 20) | |
980 | (RoundInt(Float(c.w)) << 30)) & UInt(mask)); |
981 | } |
982 | break; |
983 | case VK_FORMAT_D16_UNORM: |
984 | *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); |
985 | break; |
986 | case VK_FORMAT_X8_D24_UNORM_PACK32: |
987 | *Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)) << 8); |
988 | break; |
989 | case VK_FORMAT_D32_SFLOAT: |
990 | *Pointer<Float>(element) = c.x; |
991 | break; |
992 | case VK_FORMAT_S8_UINT: |
993 | *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x))); |
994 | break; |
995 | default: |
996 | UNSUPPORTED("Blitter destination format %d" , (int)state.destFormat); |
997 | break; |
998 | } |
999 | } |
1000 | |
1001 | Int4 Blitter::readInt4(Pointer<Byte> element, const State &state) |
1002 | { |
1003 | Int4 c(0, 0, 0, 1); |
1004 | |
1005 | switch(state.sourceFormat) |
1006 | { |
1007 | case VK_FORMAT_A8B8G8R8_SINT_PACK32: |
1008 | case VK_FORMAT_R8G8B8A8_SINT: |
1009 | c = Insert(c, Int(*Pointer<SByte>(element + 3)), 3); |
1010 | c = Insert(c, Int(*Pointer<SByte>(element + 2)), 2); |
1011 | case VK_FORMAT_R8G8_SINT: |
1012 | c = Insert(c, Int(*Pointer<SByte>(element + 1)), 1); |
1013 | case VK_FORMAT_R8_SINT: |
1014 | c = Insert(c, Int(*Pointer<SByte>(element)), 0); |
1015 | break; |
1016 | case VK_FORMAT_A2B10G10R10_UINT_PACK32: |
1017 | c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x000003FF))), 0); |
1018 | c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x000FFC00)) >> 10), 1); |
1019 | c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x3FF00000)) >> 20), 2); |
1020 | c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0xC0000000)) >> 30), 3); |
1021 | break; |
1022 | case VK_FORMAT_A8B8G8R8_UINT_PACK32: |
1023 | case VK_FORMAT_R8G8B8A8_UINT: |
1024 | c = Insert(c, Int(*Pointer<Byte>(element + 3)), 3); |
1025 | c = Insert(c, Int(*Pointer<Byte>(element + 2)), 2); |
1026 | case VK_FORMAT_R8G8_UINT: |
1027 | c = Insert(c, Int(*Pointer<Byte>(element + 1)), 1); |
1028 | case VK_FORMAT_R8_UINT: |
1029 | case VK_FORMAT_S8_UINT: |
1030 | c = Insert(c, Int(*Pointer<Byte>(element)), 0); |
1031 | break; |
1032 | case VK_FORMAT_R16G16B16A16_SINT: |
1033 | c = Insert(c, Int(*Pointer<Short>(element + 6)), 3); |
1034 | c = Insert(c, Int(*Pointer<Short>(element + 4)), 2); |
1035 | case VK_FORMAT_R16G16_SINT: |
1036 | c = Insert(c, Int(*Pointer<Short>(element + 2)), 1); |
1037 | case VK_FORMAT_R16_SINT: |
1038 | c = Insert(c, Int(*Pointer<Short>(element)), 0); |
1039 | break; |
1040 | case VK_FORMAT_R16G16B16A16_UINT: |
1041 | c = Insert(c, Int(*Pointer<UShort>(element + 6)), 3); |
1042 | c = Insert(c, Int(*Pointer<UShort>(element + 4)), 2); |
1043 | case VK_FORMAT_R16G16_UINT: |
1044 | c = Insert(c, Int(*Pointer<UShort>(element + 2)), 1); |
1045 | case VK_FORMAT_R16_UINT: |
1046 | c = Insert(c, Int(*Pointer<UShort>(element)), 0); |
1047 | break; |
1048 | case VK_FORMAT_R32G32B32A32_SINT: |
1049 | case VK_FORMAT_R32G32B32A32_UINT: |
1050 | c = *Pointer<Int4>(element); |
1051 | break; |
1052 | case VK_FORMAT_R32G32_SINT: |
1053 | case VK_FORMAT_R32G32_UINT: |
1054 | c = Insert(c, *Pointer<Int>(element + 4), 1); |
1055 | case VK_FORMAT_R32_SINT: |
1056 | case VK_FORMAT_R32_UINT: |
1057 | c = Insert(c, *Pointer<Int>(element), 0); |
1058 | break; |
1059 | default: |
1060 | UNSUPPORTED("Blitter source format %d" , (int)state.sourceFormat); |
1061 | } |
1062 | |
1063 | return c; |
1064 | } |
1065 | |
1066 | void Blitter::write(Int4 &c, Pointer<Byte> element, const State &state) |
1067 | { |
1068 | bool writeR = state.writeRed; |
1069 | bool writeG = state.writeGreen; |
1070 | bool writeB = state.writeBlue; |
1071 | bool writeA = state.writeAlpha; |
1072 | bool writeRGBA = writeR && writeG && writeB && writeA; |
1073 | |
1074 | switch(state.destFormat) |
1075 | { |
1076 | case VK_FORMAT_A2B10G10R10_UINT_PACK32: |
1077 | c = Min(As<UInt4>(c), UInt4(0x03FF, 0x03FF, 0x03FF, 0x0003)); |
1078 | break; |
1079 | case VK_FORMAT_A8B8G8R8_UINT_PACK32: |
1080 | case VK_FORMAT_R8G8B8A8_UINT: |
1081 | case VK_FORMAT_R8G8B8_UINT: |
1082 | case VK_FORMAT_R8G8_UINT: |
1083 | case VK_FORMAT_R8_UINT: |
1084 | case VK_FORMAT_R8G8B8A8_USCALED: |
1085 | case VK_FORMAT_R8G8B8_USCALED: |
1086 | case VK_FORMAT_R8G8_USCALED: |
1087 | case VK_FORMAT_R8_USCALED: |
1088 | case VK_FORMAT_S8_UINT: |
1089 | c = Min(As<UInt4>(c), UInt4(0xFF)); |
1090 | break; |
1091 | case VK_FORMAT_R16G16B16A16_UINT: |
1092 | case VK_FORMAT_R16G16B16_UINT: |
1093 | case VK_FORMAT_R16G16_UINT: |
1094 | case VK_FORMAT_R16_UINT: |
1095 | case VK_FORMAT_R16G16B16A16_USCALED: |
1096 | case VK_FORMAT_R16G16B16_USCALED: |
1097 | case VK_FORMAT_R16G16_USCALED: |
1098 | case VK_FORMAT_R16_USCALED: |
1099 | c = Min(As<UInt4>(c), UInt4(0xFFFF)); |
1100 | break; |
1101 | case VK_FORMAT_A8B8G8R8_SINT_PACK32: |
1102 | case VK_FORMAT_R8G8B8A8_SINT: |
1103 | case VK_FORMAT_R8G8_SINT: |
1104 | case VK_FORMAT_R8_SINT: |
1105 | case VK_FORMAT_R8G8B8A8_SSCALED: |
1106 | case VK_FORMAT_R8G8B8_SSCALED: |
1107 | case VK_FORMAT_R8G8_SSCALED: |
1108 | case VK_FORMAT_R8_SSCALED: |
1109 | c = Min(Max(c, Int4(-0x80)), Int4(0x7F)); |
1110 | break; |
1111 | case VK_FORMAT_R16G16B16A16_SINT: |
1112 | case VK_FORMAT_R16G16B16_SINT: |
1113 | case VK_FORMAT_R16G16_SINT: |
1114 | case VK_FORMAT_R16_SINT: |
1115 | case VK_FORMAT_R16G16B16A16_SSCALED: |
1116 | case VK_FORMAT_R16G16B16_SSCALED: |
1117 | case VK_FORMAT_R16G16_SSCALED: |
1118 | case VK_FORMAT_R16_SSCALED: |
1119 | c = Min(Max(c, Int4(-0x8000)), Int4(0x7FFF)); |
1120 | break; |
1121 | default: |
1122 | break; |
1123 | } |
1124 | |
1125 | switch(state.destFormat) |
1126 | { |
1127 | case VK_FORMAT_B8G8R8A8_SINT: |
1128 | case VK_FORMAT_B8G8R8A8_SSCALED: |
1129 | if(writeA) { *Pointer<SByte>(element + 3) = SByte(Extract(c, 3)); } |
1130 | case VK_FORMAT_B8G8R8_SINT: |
1131 | case VK_FORMAT_B8G8R8_SSCALED: |
1132 | if(writeB) { *Pointer<SByte>(element) = SByte(Extract(c, 2)); } |
1133 | if(writeG) { *Pointer<SByte>(element + 1) = SByte(Extract(c, 1)); } |
1134 | if(writeR) { *Pointer<SByte>(element + 2) = SByte(Extract(c, 0)); } |
1135 | break; |
1136 | case VK_FORMAT_A8B8G8R8_SINT_PACK32: |
1137 | case VK_FORMAT_R8G8B8A8_SINT: |
1138 | case VK_FORMAT_R8G8B8A8_SSCALED: |
1139 | case VK_FORMAT_A8B8G8R8_SSCALED_PACK32: |
1140 | if(writeA) { *Pointer<SByte>(element + 3) = SByte(Extract(c, 3)); } |
1141 | case VK_FORMAT_R8G8B8_SINT: |
1142 | case VK_FORMAT_R8G8B8_SSCALED: |
1143 | if(writeB) { *Pointer<SByte>(element + 2) = SByte(Extract(c, 2)); } |
1144 | case VK_FORMAT_R8G8_SINT: |
1145 | case VK_FORMAT_R8G8_SSCALED: |
1146 | if(writeG) { *Pointer<SByte>(element + 1) = SByte(Extract(c, 1)); } |
1147 | case VK_FORMAT_R8_SINT: |
1148 | case VK_FORMAT_R8_SSCALED: |
1149 | if(writeR) { *Pointer<SByte>(element) = SByte(Extract(c, 0)); } |
1150 | break; |
1151 | case VK_FORMAT_A2B10G10R10_UINT_PACK32: |
1152 | case VK_FORMAT_A2B10G10R10_SINT_PACK32: |
1153 | case VK_FORMAT_A2B10G10R10_USCALED_PACK32: |
1154 | case VK_FORMAT_A2B10G10R10_SSCALED_PACK32: |
1155 | if(writeRGBA) |
1156 | { |
1157 | *Pointer<UInt>(element) = |
1158 | UInt((Extract(c, 0)) | (Extract(c, 1) << 10) | (Extract(c, 2) << 20) | (Extract(c, 3) << 30)); |
1159 | } |
1160 | else |
1161 | { |
1162 | unsigned int mask = (writeA ? 0xC0000000 : 0x0000) | |
1163 | (writeB ? 0x3FF00000 : 0x0000) | |
1164 | (writeG ? 0x000FFC00 : 0x0000) | |
1165 | (writeR ? 0x000003FF : 0x0000); |
1166 | unsigned int unmask = ~mask; |
1167 | *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) | |
1168 | (UInt(Extract(c, 0) | (Extract(c, 1) << 10) | (Extract(c, 2) << 20) | (Extract(c, 3) << 30)) & UInt(mask)); |
1169 | } |
1170 | break; |
1171 | case VK_FORMAT_A2R10G10B10_UINT_PACK32: |
1172 | case VK_FORMAT_A2R10G10B10_SINT_PACK32: |
1173 | case VK_FORMAT_A2R10G10B10_USCALED_PACK32: |
1174 | case VK_FORMAT_A2R10G10B10_SSCALED_PACK32: |
1175 | if(writeRGBA) |
1176 | { |
1177 | *Pointer<UInt>(element) = |
1178 | UInt((Extract(c, 2)) | (Extract(c, 1) << 10) | (Extract(c, 0) << 20) | (Extract(c, 3) << 30)); |
1179 | } |
1180 | else |
1181 | { |
1182 | unsigned int mask = (writeA ? 0xC0000000 : 0x0000) | |
1183 | (writeR ? 0x3FF00000 : 0x0000) | |
1184 | (writeG ? 0x000FFC00 : 0x0000) | |
1185 | (writeB ? 0x000003FF : 0x0000); |
1186 | unsigned int unmask = ~mask; |
1187 | *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) | |
1188 | (UInt(Extract(c, 2) | (Extract(c, 1) << 10) | (Extract(c, 0) << 20) | (Extract(c, 3) << 30)) & UInt(mask)); |
1189 | } |
1190 | break; |
1191 | case VK_FORMAT_B8G8R8A8_UINT: |
1192 | case VK_FORMAT_B8G8R8A8_USCALED: |
1193 | if(writeA) { *Pointer<Byte>(element + 3) = Byte(Extract(c, 3)); } |
1194 | case VK_FORMAT_B8G8R8_UINT: |
1195 | case VK_FORMAT_B8G8R8_USCALED: |
1196 | case VK_FORMAT_B8G8R8_SRGB: |
1197 | if(writeB) { *Pointer<Byte>(element) = Byte(Extract(c, 2)); } |
1198 | if(writeG) { *Pointer<Byte>(element + 1) = Byte(Extract(c, 1)); } |
1199 | if(writeR) { *Pointer<Byte>(element + 2) = Byte(Extract(c, 0)); } |
1200 | break; |
1201 | case VK_FORMAT_A8B8G8R8_UINT_PACK32: |
1202 | case VK_FORMAT_R8G8B8A8_UINT: |
1203 | case VK_FORMAT_R8G8B8A8_USCALED: |
1204 | case VK_FORMAT_A8B8G8R8_USCALED_PACK32: |
1205 | if(writeA) { *Pointer<Byte>(element + 3) = Byte(Extract(c, 3)); } |
1206 | case VK_FORMAT_R8G8B8_UINT: |
1207 | case VK_FORMAT_R8G8B8_USCALED: |
1208 | if(writeB) { *Pointer<Byte>(element + 2) = Byte(Extract(c, 2)); } |
1209 | case VK_FORMAT_R8G8_UINT: |
1210 | case VK_FORMAT_R8G8_USCALED: |
1211 | if(writeG) { *Pointer<Byte>(element + 1) = Byte(Extract(c, 1)); } |
1212 | case VK_FORMAT_R8_UINT: |
1213 | case VK_FORMAT_R8_USCALED: |
1214 | case VK_FORMAT_S8_UINT: |
1215 | if(writeR) { *Pointer<Byte>(element) = Byte(Extract(c, 0)); } |
1216 | break; |
1217 | case VK_FORMAT_R16G16B16A16_SINT: |
1218 | case VK_FORMAT_R16G16B16A16_SSCALED: |
1219 | if(writeA) { *Pointer<Short>(element + 6) = Short(Extract(c, 3)); } |
1220 | case VK_FORMAT_R16G16B16_SINT: |
1221 | case VK_FORMAT_R16G16B16_SSCALED: |
1222 | if(writeB) { *Pointer<Short>(element + 4) = Short(Extract(c, 2)); } |
1223 | case VK_FORMAT_R16G16_SINT: |
1224 | case VK_FORMAT_R16G16_SSCALED: |
1225 | if(writeG) { *Pointer<Short>(element + 2) = Short(Extract(c, 1)); } |
1226 | case VK_FORMAT_R16_SINT: |
1227 | case VK_FORMAT_R16_SSCALED: |
1228 | if(writeR) { *Pointer<Short>(element) = Short(Extract(c, 0)); } |
1229 | break; |
1230 | case VK_FORMAT_R16G16B16A16_UINT: |
1231 | case VK_FORMAT_R16G16B16A16_USCALED: |
1232 | if(writeA) { *Pointer<UShort>(element + 6) = UShort(Extract(c, 3)); } |
1233 | case VK_FORMAT_R16G16B16_UINT: |
1234 | case VK_FORMAT_R16G16B16_USCALED: |
1235 | if(writeB) { *Pointer<UShort>(element + 4) = UShort(Extract(c, 2)); } |
1236 | case VK_FORMAT_R16G16_UINT: |
1237 | case VK_FORMAT_R16G16_USCALED: |
1238 | if(writeG) { *Pointer<UShort>(element + 2) = UShort(Extract(c, 1)); } |
1239 | case VK_FORMAT_R16_UINT: |
1240 | case VK_FORMAT_R16_USCALED: |
1241 | if(writeR) { *Pointer<UShort>(element) = UShort(Extract(c, 0)); } |
1242 | break; |
1243 | case VK_FORMAT_R32G32B32A32_SINT: |
1244 | if(writeRGBA) |
1245 | { |
1246 | *Pointer<Int4>(element) = c; |
1247 | } |
1248 | else |
1249 | { |
1250 | if(writeR) { *Pointer<Int>(element) = Extract(c, 0); } |
1251 | if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); } |
1252 | if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); } |
1253 | if(writeA) { *Pointer<Int>(element + 12) = Extract(c, 3); } |
1254 | } |
1255 | break; |
1256 | case VK_FORMAT_R32G32B32_SINT: |
1257 | if(writeR) { *Pointer<Int>(element) = Extract(c, 0); } |
1258 | if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); } |
1259 | if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); } |
1260 | break; |
1261 | case VK_FORMAT_R32G32_SINT: |
1262 | if(writeR) { *Pointer<Int>(element) = Extract(c, 0); } |
1263 | if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); } |
1264 | break; |
1265 | case VK_FORMAT_R32_SINT: |
1266 | if(writeR) { *Pointer<Int>(element) = Extract(c, 0); } |
1267 | break; |
1268 | case VK_FORMAT_R32G32B32A32_UINT: |
1269 | if(writeRGBA) |
1270 | { |
1271 | *Pointer<UInt4>(element) = As<UInt4>(c); |
1272 | } |
1273 | else |
1274 | { |
1275 | if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); } |
1276 | if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); } |
1277 | if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); } |
1278 | if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(Extract(c, 3)); } |
1279 | } |
1280 | break; |
1281 | case VK_FORMAT_R32G32B32_UINT: |
1282 | if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); } |
1283 | case VK_FORMAT_R32G32_UINT: |
1284 | if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); } |
1285 | case VK_FORMAT_R32_UINT: |
1286 | if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); } |
1287 | break; |
1288 | default: |
1289 | UNSUPPORTED("Blitter destination format %d" , (int)state.destFormat); |
1290 | } |
1291 | } |
1292 | |
1293 | void Blitter::ApplyScaleAndClamp(Float4 &value, const State &state, bool preScaled) |
1294 | { |
1295 | float4 scale{}, unscale{}; |
1296 | |
1297 | if(state.clearOperation && |
1298 | state.sourceFormat.isNonNormalizedInteger() && |
1299 | !state.destFormat.isNonNormalizedInteger()) |
1300 | { |
1301 | // If we're clearing a buffer from an int or uint color into a normalized color, |
1302 | // then the whole range of the int or uint color must be scaled between 0 and 1. |
1303 | switch(state.sourceFormat) |
1304 | { |
1305 | case VK_FORMAT_R32G32B32A32_SINT: |
1306 | unscale = replicate(static_cast<float>(0x7FFFFFFF)); |
1307 | break; |
1308 | case VK_FORMAT_R32G32B32A32_UINT: |
1309 | unscale = replicate(static_cast<float>(0xFFFFFFFF)); |
1310 | break; |
1311 | default: |
1312 | UNSUPPORTED("Blitter source format %d" , (int)state.sourceFormat); |
1313 | } |
1314 | } |
1315 | else |
1316 | { |
1317 | unscale = state.sourceFormat.getScale(); |
1318 | } |
1319 | |
1320 | scale = state.destFormat.getScale(); |
1321 | |
1322 | bool srcSRGB = state.sourceFormat.isSRGBformat(); |
1323 | bool dstSRGB = state.destFormat.isSRGBformat(); |
1324 | |
1325 | if(state.allowSRGBConversion && ((srcSRGB && !preScaled) || dstSRGB)) // One of the formats is sRGB encoded. |
1326 | { |
1327 | value *= preScaled ? Float4(1.0f / scale.x, 1.0f / scale.y, 1.0f / scale.z, 1.0f / scale.w) : // Unapply scale |
1328 | Float4(1.0f / unscale.x, 1.0f / unscale.y, 1.0f / unscale.z, 1.0f / unscale.w); // Apply unscale |
1329 | value = (srcSRGB && !preScaled) ? sRGBtoLinear(value) : LinearToSRGB(value); |
1330 | value *= Float4(scale.x, scale.y, scale.z, scale.w); // Apply scale |
1331 | } |
1332 | else if(unscale != scale) |
1333 | { |
1334 | value *= Float4(scale.x / unscale.x, scale.y / unscale.y, scale.z / unscale.z, scale.w / unscale.w); |
1335 | } |
1336 | |
1337 | if(state.sourceFormat.isFloatFormat() && !state.destFormat.isFloatFormat()) |
1338 | { |
1339 | value = Min(value, Float4(scale.x, scale.y, scale.z, scale.w)); |
1340 | |
1341 | value = Max(value, Float4(state.destFormat.isUnsignedComponent(0) ? 0.0f : -scale.x, |
1342 | state.destFormat.isUnsignedComponent(1) ? 0.0f : -scale.y, |
1343 | state.destFormat.isUnsignedComponent(2) ? 0.0f : -scale.z, |
1344 | state.destFormat.isUnsignedComponent(3) ? 0.0f : -scale.w)); |
1345 | } |
1346 | } |
1347 | |
1348 | Int Blitter::ComputeOffset(Int &x, Int &y, Int &pitchB, int bytes, bool quadLayout) |
1349 | { |
1350 | if(!quadLayout) |
1351 | { |
1352 | return y * pitchB + x * bytes; |
1353 | } |
1354 | else |
1355 | { |
1356 | // (x & ~1) * 2 + (x & 1) == (x - (x & 1)) * 2 + (x & 1) == x * 2 - (x & 1) * 2 + (x & 1) == x * 2 - (x & 1) |
1357 | return (y & Int(~1)) * pitchB + |
1358 | ((((y & Int(1)) + x) << 1) - (x & Int(1))) * bytes; |
1359 | } |
1360 | } |
1361 | |
1362 | Float4 Blitter::LinearToSRGB(Float4 &c) |
1363 | { |
1364 | Float4 lc = Min(c, Float4(0.0031308f)) * Float4(12.92f); |
1365 | Float4 ec = Float4(1.055f) * power(c, Float4(1.0f / 2.4f)) - Float4(0.055f); |
1366 | |
1367 | Float4 s = c; |
1368 | s.xyz = Max(lc, ec); |
1369 | |
1370 | return s; |
1371 | } |
1372 | |
1373 | Float4 Blitter::sRGBtoLinear(Float4 &c) |
1374 | { |
1375 | Float4 lc = c * Float4(1.0f / 12.92f); |
1376 | Float4 ec = power((c + Float4(0.055f)) * Float4(1.0f / 1.055f), Float4(2.4f)); |
1377 | |
1378 | Int4 linear = CmpLT(c, Float4(0.04045f)); |
1379 | |
1380 | Float4 s = c; |
1381 | s.xyz = As<Float4>((linear & As<Int4>(lc)) | (~linear & As<Int4>(ec))); // TODO: IfThenElse() |
1382 | |
1383 | return s; |
1384 | } |
1385 | |
1386 | Blitter::BlitRoutineType Blitter::generate(const State &state) |
1387 | { |
1388 | BlitFunction function; |
1389 | { |
1390 | Pointer<Byte> blit(function.Arg<0>()); |
1391 | |
1392 | Pointer<Byte> source = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData,source)); |
1393 | Pointer<Byte> dest = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData,dest)); |
1394 | Int sPitchB = *Pointer<Int>(blit + OFFSET(BlitData,sPitchB)); |
1395 | Int dPitchB = *Pointer<Int>(blit + OFFSET(BlitData,dPitchB)); |
1396 | |
1397 | Float x0 = *Pointer<Float>(blit + OFFSET(BlitData,x0)); |
1398 | Float y0 = *Pointer<Float>(blit + OFFSET(BlitData,y0)); |
1399 | Float w = *Pointer<Float>(blit + OFFSET(BlitData,w)); |
1400 | Float h = *Pointer<Float>(blit + OFFSET(BlitData,h)); |
1401 | |
1402 | Int x0d = *Pointer<Int>(blit + OFFSET(BlitData,x0d)); |
1403 | Int x1d = *Pointer<Int>(blit + OFFSET(BlitData,x1d)); |
1404 | Int y0d = *Pointer<Int>(blit + OFFSET(BlitData,y0d)); |
1405 | Int y1d = *Pointer<Int>(blit + OFFSET(BlitData,y1d)); |
1406 | |
1407 | Int sWidth = *Pointer<Int>(blit + OFFSET(BlitData,sWidth)); |
1408 | Int sHeight = *Pointer<Int>(blit + OFFSET(BlitData,sHeight)); |
1409 | |
1410 | bool intSrc = state.sourceFormat.isNonNormalizedInteger(); |
1411 | bool intDst = state.destFormat.isNonNormalizedInteger(); |
1412 | bool intBoth = intSrc && intDst; |
1413 | bool srcQuadLayout = state.sourceFormat.hasQuadLayout(); |
1414 | bool dstQuadLayout = state.destFormat.hasQuadLayout(); |
1415 | int srcBytes = state.sourceFormat.bytes(); |
1416 | int dstBytes = state.destFormat.bytes(); |
1417 | |
1418 | bool hasConstantColorI = false; |
1419 | Int4 constantColorI; |
1420 | bool hasConstantColorF = false; |
1421 | Float4 constantColorF; |
1422 | if(state.clearOperation) |
1423 | { |
1424 | if(intBoth) // Integer types |
1425 | { |
1426 | constantColorI = readInt4(source, state); |
1427 | hasConstantColorI = true; |
1428 | } |
1429 | else |
1430 | { |
1431 | constantColorF = readFloat4(source, state); |
1432 | hasConstantColorF = true; |
1433 | |
1434 | ApplyScaleAndClamp(constantColorF, state); |
1435 | } |
1436 | } |
1437 | |
1438 | For(Int j = y0d, j < y1d, j++) |
1439 | { |
1440 | Float y = state.clearOperation ? RValue<Float>(y0) : y0 + Float(j) * h; |
1441 | Pointer<Byte> destLine = dest + (dstQuadLayout ? j & Int(~1) : RValue<Int>(j)) * dPitchB; |
1442 | |
1443 | For(Int i = x0d, i < x1d, i++) |
1444 | { |
1445 | Float x = state.clearOperation ? RValue<Float>(x0) : x0 + Float(i) * w; |
1446 | Pointer<Byte> d = destLine + (dstQuadLayout ? (((j & Int(1)) << 1) + (i * 2) - (i & Int(1))) : RValue<Int>(i)) * dstBytes; |
1447 | |
1448 | if(hasConstantColorI) |
1449 | { |
1450 | for(int s = 0; s < state.destSamples; s++) |
1451 | { |
1452 | write(constantColorI, d, state); |
1453 | |
1454 | d += *Pointer<Int>(blit + OFFSET(BlitData, dSliceB)); |
1455 | } |
1456 | } |
1457 | else if(hasConstantColorF) |
1458 | { |
1459 | for(int s = 0; s < state.destSamples; s++) |
1460 | { |
1461 | write(constantColorF, d, state); |
1462 | |
1463 | d += *Pointer<Int>(blit + OFFSET(BlitData, dSliceB)); |
1464 | } |
1465 | } |
1466 | else if(intBoth) // Integer types do not support filtering |
1467 | { |
1468 | Int X = Int(x); |
1469 | Int Y = Int(y); |
1470 | |
1471 | if(state.clampToEdge) |
1472 | { |
1473 | X = Clamp(X, 0, sWidth - 1); |
1474 | Y = Clamp(Y, 0, sHeight - 1); |
1475 | } |
1476 | |
1477 | Pointer<Byte> s = source + ComputeOffset(X, Y, sPitchB, srcBytes, srcQuadLayout); |
1478 | |
1479 | // When both formats are true integer types, we don't go to float to avoid losing precision |
1480 | Int4 color = readInt4(s, state); |
1481 | for(int s = 0; s < state.destSamples; s++) |
1482 | { |
1483 | write(color, d, state); |
1484 | |
1485 | d += *Pointer<Int>(blit + OFFSET(BlitData,dSliceB)); |
1486 | } |
1487 | } |
1488 | else |
1489 | { |
1490 | Float4 color; |
1491 | |
1492 | bool preScaled = false; |
1493 | if(!state.filter || intSrc) |
1494 | { |
1495 | Int X = Int(x); |
1496 | Int Y = Int(y); |
1497 | |
1498 | if(state.clampToEdge) |
1499 | { |
1500 | X = Clamp(X, 0, sWidth - 1); |
1501 | Y = Clamp(Y, 0, sHeight - 1); |
1502 | } |
1503 | |
1504 | Pointer<Byte> s = source + ComputeOffset(X, Y, sPitchB, srcBytes, srcQuadLayout); |
1505 | |
1506 | color = readFloat4(s, state); |
1507 | |
1508 | if(state.srcSamples > 1) // Resolve multisampled source |
1509 | { |
1510 | if(state.allowSRGBConversion && state.sourceFormat.isSRGBformat()) // sRGB -> RGB |
1511 | { |
1512 | ApplyScaleAndClamp(color, state); |
1513 | preScaled = true; |
1514 | } |
1515 | Float4 accum = color; |
1516 | for(int sample = 1; sample < state.srcSamples; sample++) |
1517 | { |
1518 | s += *Pointer<Int>(blit + OFFSET(BlitData, sSliceB)); |
1519 | color = readFloat4(s, state); |
1520 | |
1521 | if(state.allowSRGBConversion && state.sourceFormat.isSRGBformat()) // sRGB -> RGB |
1522 | { |
1523 | ApplyScaleAndClamp(color, state); |
1524 | preScaled = true; |
1525 | } |
1526 | accum += color; |
1527 | } |
1528 | color = accum * Float4(1.0f / static_cast<float>(state.srcSamples)); |
1529 | } |
1530 | } |
1531 | else // Bilinear filtering |
1532 | { |
1533 | Float X = x; |
1534 | Float Y = y; |
1535 | |
1536 | if(state.clampToEdge) |
1537 | { |
1538 | X = Min(Max(x, 0.5f), Float(sWidth) - 0.5f); |
1539 | Y = Min(Max(y, 0.5f), Float(sHeight) - 0.5f); |
1540 | } |
1541 | |
1542 | Float x0 = X - 0.5f; |
1543 | Float y0 = Y - 0.5f; |
1544 | |
1545 | Int X0 = Max(Int(x0), 0); |
1546 | Int Y0 = Max(Int(y0), 0); |
1547 | |
1548 | Int X1 = X0 + 1; |
1549 | Int Y1 = Y0 + 1; |
1550 | X1 = IfThenElse(X1 >= sWidth, X0, X1); |
1551 | Y1 = IfThenElse(Y1 >= sHeight, Y0, Y1); |
1552 | |
1553 | Pointer<Byte> s00 = source + ComputeOffset(X0, Y0, sPitchB, srcBytes, srcQuadLayout); |
1554 | Pointer<Byte> s01 = source + ComputeOffset(X1, Y0, sPitchB, srcBytes, srcQuadLayout); |
1555 | Pointer<Byte> s10 = source + ComputeOffset(X0, Y1, sPitchB, srcBytes, srcQuadLayout); |
1556 | Pointer<Byte> s11 = source + ComputeOffset(X1, Y1, sPitchB, srcBytes, srcQuadLayout); |
1557 | |
1558 | Float4 c00 = readFloat4(s00, state); |
1559 | Float4 c01 = readFloat4(s01, state); |
1560 | Float4 c10 = readFloat4(s10, state); |
1561 | Float4 c11 = readFloat4(s11, state); |
1562 | |
1563 | if(state.allowSRGBConversion && state.sourceFormat.isSRGBformat()) // sRGB -> RGB |
1564 | { |
1565 | ApplyScaleAndClamp(c00, state); |
1566 | ApplyScaleAndClamp(c01, state); |
1567 | ApplyScaleAndClamp(c10, state); |
1568 | ApplyScaleAndClamp(c11, state); |
1569 | preScaled = true; |
1570 | } |
1571 | |
1572 | Float4 fx = Float4(x0 - Float(X0)); |
1573 | Float4 fy = Float4(y0 - Float(Y0)); |
1574 | Float4 ix = Float4(1.0f) - fx; |
1575 | Float4 iy = Float4(1.0f) - fy; |
1576 | |
1577 | color = (c00 * ix + c01 * fx) * iy + |
1578 | (c10 * ix + c11 * fx) * fy; |
1579 | } |
1580 | |
1581 | ApplyScaleAndClamp(color, state, preScaled); |
1582 | |
1583 | for(int s = 0; s < state.destSamples; s++) |
1584 | { |
1585 | write(color, d, state); |
1586 | |
1587 | d += *Pointer<Int>(blit + OFFSET(BlitData,dSliceB)); |
1588 | } |
1589 | } |
1590 | } |
1591 | } |
1592 | } |
1593 | |
1594 | return function("BlitRoutine" ); |
1595 | } |
1596 | |
1597 | Blitter::BlitRoutineType Blitter::getBlitRoutine(const State &state) |
1598 | { |
1599 | std::unique_lock<std::mutex> lock(blitMutex); |
1600 | auto blitRoutine = blitCache.query(state); |
1601 | |
1602 | if(!blitRoutine) |
1603 | { |
1604 | blitRoutine = generate(state); |
1605 | blitCache.add(state, blitRoutine); |
1606 | } |
1607 | |
1608 | return blitRoutine; |
1609 | } |
1610 | |
1611 | Blitter::CornerUpdateRoutineType Blitter::getCornerUpdateRoutine(const State &state) |
1612 | { |
1613 | std::unique_lock<std::mutex> lock(cornerUpdateMutex); |
1614 | auto cornerUpdateRoutine = cornerUpdateCache.query(state); |
1615 | |
1616 | if(!cornerUpdateRoutine) |
1617 | { |
1618 | cornerUpdateRoutine = generateCornerUpdate(state); |
1619 | cornerUpdateCache.add(state, cornerUpdateRoutine); |
1620 | } |
1621 | |
1622 | return cornerUpdateRoutine; |
1623 | } |
1624 | |
1625 | void Blitter::blitToBuffer(const vk::Image *src, VkImageSubresourceLayers subresource, VkOffset3D offset, VkExtent3D extent, uint8_t *dst, int bufferRowPitch, int bufferSlicePitch) |
1626 | { |
1627 | auto aspect = static_cast<VkImageAspectFlagBits>(subresource.aspectMask); |
1628 | auto format = src->getFormat(aspect); |
1629 | State state(format, format.getNonQuadLayoutFormat(), VK_SAMPLE_COUNT_1_BIT, VK_SAMPLE_COUNT_1_BIT, |
1630 | Options{false, false}); |
1631 | |
1632 | auto blitRoutine = getBlitRoutine(state); |
1633 | if(!blitRoutine) |
1634 | { |
1635 | return; |
1636 | } |
1637 | |
1638 | BlitData data = |
1639 | { |
1640 | nullptr, // source |
1641 | dst, // dest |
1642 | src->rowPitchBytes(aspect, subresource.mipLevel), // sPitchB |
1643 | bufferRowPitch, // dPitchB |
1644 | src->slicePitchBytes(aspect, subresource.mipLevel), // sSliceB |
1645 | bufferSlicePitch, // dSliceB |
1646 | |
1647 | 0, 0, 1, 1, |
1648 | |
1649 | 0, // y0d |
1650 | static_cast<int>(extent.height), // y1d |
1651 | 0, // x0d |
1652 | static_cast<int>(extent.width), // x1d |
1653 | |
1654 | static_cast<int>(extent.width), // sWidth |
1655 | static_cast<int>(extent.height) // sHeight; |
1656 | }; |
1657 | |
1658 | VkOffset3D srcOffset = { 0, 0, offset.z }; |
1659 | |
1660 | VkImageSubresourceLayers srcSubresLayers = subresource; |
1661 | srcSubresLayers.layerCount = 1; |
1662 | |
1663 | VkImageSubresourceRange srcSubresRange = |
1664 | { |
1665 | subresource.aspectMask, |
1666 | subresource.mipLevel, |
1667 | 1, |
1668 | subresource.baseArrayLayer, |
1669 | subresource.layerCount |
1670 | }; |
1671 | |
1672 | uint32_t lastLayer = src->getLastLayerIndex(srcSubresRange); |
1673 | |
1674 | for(; srcSubresLayers.baseArrayLayer <= lastLayer; srcSubresLayers.baseArrayLayer++) |
1675 | { |
1676 | srcOffset.z = offset.z; |
1677 | |
1678 | for(auto i = 0u; i < extent.depth; i++) |
1679 | { |
1680 | data.source = src->getTexelPointer(srcOffset, srcSubresLayers); |
1681 | ASSERT(data.source < src->end()); |
1682 | blitRoutine(&data); |
1683 | srcOffset.z++; |
1684 | data.dest = (dst += bufferSlicePitch); |
1685 | } |
1686 | } |
1687 | } |
1688 | |
1689 | void Blitter::blitFromBuffer(const vk::Image *dst, VkImageSubresourceLayers subresource, VkOffset3D offset, VkExtent3D extent, uint8_t *src, int bufferRowPitch, int bufferSlicePitch) |
1690 | { |
1691 | auto aspect = static_cast<VkImageAspectFlagBits>(subresource.aspectMask); |
1692 | auto format = dst->getFormat(aspect); |
1693 | State state(format.getNonQuadLayoutFormat(), format, VK_SAMPLE_COUNT_1_BIT, VK_SAMPLE_COUNT_1_BIT, |
1694 | Options{false, false}); |
1695 | |
1696 | auto blitRoutine = getBlitRoutine(state); |
1697 | if(!blitRoutine) |
1698 | { |
1699 | return; |
1700 | } |
1701 | |
1702 | BlitData data = |
1703 | { |
1704 | src, // source |
1705 | nullptr, // dest |
1706 | bufferRowPitch, // sPitchB |
1707 | dst->rowPitchBytes(aspect, subresource.mipLevel), // dPitchB |
1708 | bufferSlicePitch, // sSliceB |
1709 | dst->slicePitchBytes(aspect, subresource.mipLevel), // dSliceB |
1710 | |
1711 | static_cast<float>(-offset.x), // x0 |
1712 | static_cast<float>(-offset.y), // y0 |
1713 | 1.0f, // w |
1714 | 1.0f, // h |
1715 | |
1716 | offset.y, // y0d |
1717 | static_cast<int>(offset.y + extent.height), // y1d |
1718 | offset.x, // x0d |
1719 | static_cast<int>(offset.x + extent.width), // x1d |
1720 | |
1721 | static_cast<int>(extent.width), // sWidth |
1722 | static_cast<int>(extent.height) // sHeight; |
1723 | }; |
1724 | |
1725 | VkOffset3D dstOffset = { 0, 0, offset.z }; |
1726 | |
1727 | VkImageSubresourceLayers dstSubresLayers = subresource; |
1728 | dstSubresLayers.layerCount = 1; |
1729 | |
1730 | VkImageSubresourceRange dstSubresRange = |
1731 | { |
1732 | subresource.aspectMask, |
1733 | subresource.mipLevel, |
1734 | 1, |
1735 | subresource.baseArrayLayer, |
1736 | subresource.layerCount |
1737 | }; |
1738 | |
1739 | uint32_t lastLayer = dst->getLastLayerIndex(dstSubresRange); |
1740 | |
1741 | for(; dstSubresLayers.baseArrayLayer <= lastLayer; dstSubresLayers.baseArrayLayer++) |
1742 | { |
1743 | dstOffset.z = offset.z; |
1744 | |
1745 | for(auto i = 0u; i < extent.depth; i++) |
1746 | { |
1747 | data.dest = dst->getTexelPointer(dstOffset, dstSubresLayers); |
1748 | ASSERT(data.dest < dst->end()); |
1749 | blitRoutine(&data); |
1750 | dstOffset.z++; |
1751 | data.source = (src += bufferSlicePitch); |
1752 | } |
1753 | } |
1754 | } |
1755 | |
1756 | void Blitter::blit(const vk::Image *src, vk::Image *dst, VkImageBlit region, VkFilter filter) |
1757 | { |
1758 | if(dst->getFormat() == VK_FORMAT_UNDEFINED) |
1759 | { |
1760 | return; |
1761 | } |
1762 | |
1763 | if((region.srcSubresource.layerCount != region.dstSubresource.layerCount) || |
1764 | (region.srcSubresource.aspectMask != region.dstSubresource.aspectMask)) |
1765 | { |
1766 | UNIMPLEMENTED("region" ); |
1767 | } |
1768 | |
1769 | if(region.dstOffsets[0].x > region.dstOffsets[1].x) |
1770 | { |
1771 | std::swap(region.srcOffsets[0].x, region.srcOffsets[1].x); |
1772 | std::swap(region.dstOffsets[0].x, region.dstOffsets[1].x); |
1773 | } |
1774 | |
1775 | if(region.dstOffsets[0].y > region.dstOffsets[1].y) |
1776 | { |
1777 | std::swap(region.srcOffsets[0].y, region.srcOffsets[1].y); |
1778 | std::swap(region.dstOffsets[0].y, region.dstOffsets[1].y); |
1779 | } |
1780 | |
1781 | VkImageAspectFlagBits srcAspect = static_cast<VkImageAspectFlagBits>(region.srcSubresource.aspectMask); |
1782 | VkImageAspectFlagBits dstAspect = static_cast<VkImageAspectFlagBits>(region.dstSubresource.aspectMask); |
1783 | VkExtent3D srcExtent = src->getMipLevelExtent(srcAspect, region.srcSubresource.mipLevel); |
1784 | |
1785 | int32_t numSlices = (region.srcOffsets[1].z - region.srcOffsets[0].z); |
1786 | ASSERT(numSlices == (region.dstOffsets[1].z - region.dstOffsets[0].z)); |
1787 | |
1788 | float widthRatio = static_cast<float>(region.srcOffsets[1].x - region.srcOffsets[0].x) / |
1789 | static_cast<float>(region.dstOffsets[1].x - region.dstOffsets[0].x); |
1790 | float heightRatio = static_cast<float>(region.srcOffsets[1].y - region.srcOffsets[0].y) / |
1791 | static_cast<float>(region.dstOffsets[1].y - region.dstOffsets[0].y); |
1792 | float x0 = region.srcOffsets[0].x + (0.5f - region.dstOffsets[0].x) * widthRatio; |
1793 | float y0 = region.srcOffsets[0].y + (0.5f - region.dstOffsets[0].y) * heightRatio; |
1794 | |
1795 | auto srcFormat = src->getFormat(srcAspect); |
1796 | auto dstFormat = dst->getFormat(dstAspect); |
1797 | |
1798 | bool doFilter = (filter != VK_FILTER_NEAREST); |
1799 | bool allowSRGBConversion = |
1800 | doFilter || |
1801 | (src->getSampleCountFlagBits() > 1) || |
1802 | (srcFormat.isSRGBformat() != dstFormat.isSRGBformat()); |
1803 | |
1804 | State state(src->getFormat(srcAspect), dst->getFormat(dstAspect), src->getSampleCountFlagBits(), dst->getSampleCountFlagBits(), |
1805 | Options{ doFilter, allowSRGBConversion }); |
1806 | state.clampToEdge = (region.srcOffsets[0].x < 0) || |
1807 | (region.srcOffsets[0].y < 0) || |
1808 | (static_cast<uint32_t>(region.srcOffsets[1].x) > srcExtent.width) || |
1809 | (static_cast<uint32_t>(region.srcOffsets[1].y) > srcExtent.height) || |
1810 | (doFilter && ((x0 < 0.5f) || (y0 < 0.5f))); |
1811 | |
1812 | auto blitRoutine = getBlitRoutine(state); |
1813 | if(!blitRoutine) |
1814 | { |
1815 | return; |
1816 | } |
1817 | |
1818 | BlitData data = |
1819 | { |
1820 | nullptr, // source |
1821 | nullptr, // dest |
1822 | src->rowPitchBytes(srcAspect, region.srcSubresource.mipLevel), // sPitchB |
1823 | dst->rowPitchBytes(dstAspect, region.dstSubresource.mipLevel), // dPitchB |
1824 | src->slicePitchBytes(srcAspect, region.srcSubresource.mipLevel), // sSliceB |
1825 | dst->slicePitchBytes(dstAspect, region.dstSubresource.mipLevel), // dSliceB |
1826 | |
1827 | x0, |
1828 | y0, |
1829 | widthRatio, |
1830 | heightRatio, |
1831 | |
1832 | region.dstOffsets[0].y, // y0d |
1833 | region.dstOffsets[1].y, // y1d |
1834 | region.dstOffsets[0].x, // x0d |
1835 | region.dstOffsets[1].x, // x1d |
1836 | |
1837 | static_cast<int>(srcExtent.width), // sWidth |
1838 | static_cast<int>(srcExtent.height) // sHeight; |
1839 | }; |
1840 | |
1841 | VkOffset3D srcOffset = { 0, 0, region.srcOffsets[0].z }; |
1842 | VkOffset3D dstOffset = { 0, 0, region.dstOffsets[0].z }; |
1843 | |
1844 | VkImageSubresourceLayers srcSubresLayers = |
1845 | { |
1846 | region.srcSubresource.aspectMask, |
1847 | region.srcSubresource.mipLevel, |
1848 | region.srcSubresource.baseArrayLayer, |
1849 | 1 |
1850 | }; |
1851 | |
1852 | VkImageSubresourceLayers dstSubresLayers = |
1853 | { |
1854 | region.dstSubresource.aspectMask, |
1855 | region.dstSubresource.mipLevel, |
1856 | region.dstSubresource.baseArrayLayer, |
1857 | 1 |
1858 | }; |
1859 | |
1860 | VkImageSubresourceRange srcSubresRange = |
1861 | { |
1862 | region.srcSubresource.aspectMask, |
1863 | region.srcSubresource.mipLevel, |
1864 | 1, |
1865 | region.srcSubresource.baseArrayLayer, |
1866 | region.srcSubresource.layerCount |
1867 | }; |
1868 | |
1869 | uint32_t lastLayer = src->getLastLayerIndex(srcSubresRange); |
1870 | |
1871 | for(; srcSubresLayers.baseArrayLayer <= lastLayer; srcSubresLayers.baseArrayLayer++, dstSubresLayers.baseArrayLayer++) |
1872 | { |
1873 | srcOffset.z = region.srcOffsets[0].z; |
1874 | dstOffset.z = region.dstOffsets[0].z; |
1875 | |
1876 | for(int i = 0; i < numSlices; i++) |
1877 | { |
1878 | data.source = src->getTexelPointer(srcOffset, srcSubresLayers); |
1879 | data.dest = dst->getTexelPointer(dstOffset, dstSubresLayers); |
1880 | |
1881 | ASSERT(data.source < src->end()); |
1882 | ASSERT(data.dest < dst->end()); |
1883 | |
1884 | blitRoutine(&data); |
1885 | srcOffset.z++; |
1886 | dstOffset.z++; |
1887 | } |
1888 | } |
1889 | } |
1890 | |
1891 | void Blitter::computeCubeCorner(Pointer<Byte>& layer, Int& x0, Int& x1, Int& y0, Int& y1, Int& pitchB, const State& state) |
1892 | { |
1893 | int bytes = state.sourceFormat.bytes(); |
1894 | bool quadLayout = state.sourceFormat.hasQuadLayout(); |
1895 | |
1896 | Float4 c = readFloat4(layer + ComputeOffset(x0, y1, pitchB, bytes, quadLayout), state) + |
1897 | readFloat4(layer + ComputeOffset(x1, y0, pitchB, bytes, quadLayout), state) + |
1898 | readFloat4(layer + ComputeOffset(x1, y1, pitchB, bytes, quadLayout), state); |
1899 | |
1900 | c *= Float4(1.0f / 3.0f); |
1901 | |
1902 | write(c, layer + ComputeOffset(x0, y0, pitchB, bytes, quadLayout), state); |
1903 | } |
1904 | |
1905 | Blitter::CornerUpdateRoutineType Blitter::generateCornerUpdate(const State& state) |
1906 | { |
1907 | // Reading and writing from/to the same image |
1908 | ASSERT(state.sourceFormat == state.destFormat); |
1909 | ASSERT(state.srcSamples == state.destSamples); |
1910 | |
1911 | if(state.srcSamples != 1) |
1912 | { |
1913 | UNIMPLEMENTED("state.srcSamples %d" , state.srcSamples); |
1914 | } |
1915 | |
1916 | CornerUpdateFunction function; |
1917 | { |
1918 | Pointer<Byte> blit(function.Arg<0>()); |
1919 | |
1920 | Pointer<Byte> layers = *Pointer<Pointer<Byte>>(blit + OFFSET(CubeBorderData, layers)); |
1921 | Int pitchB = *Pointer<Int>(blit + OFFSET(CubeBorderData, pitchB)); |
1922 | UInt layerSize = *Pointer<Int>(blit + OFFSET(CubeBorderData, layerSize)); |
1923 | UInt dim = *Pointer<Int>(blit + OFFSET(CubeBorderData, dim)); |
1924 | |
1925 | // Low Border, Low Pixel, High Border, High Pixel |
1926 | Int LB(-1), LP(0), HB(dim), HP(dim-1); |
1927 | |
1928 | for(int face = 0; face < 6; face++) |
1929 | { |
1930 | computeCubeCorner(layers, LB, LP, LB, LP, pitchB, state); |
1931 | computeCubeCorner(layers, LB, LP, HB, HP, pitchB, state); |
1932 | computeCubeCorner(layers, HB, HP, LB, LP, pitchB, state); |
1933 | computeCubeCorner(layers, HB, HP, HB, HP, pitchB, state); |
1934 | layers = layers + layerSize; |
1935 | } |
1936 | } |
1937 | |
1938 | return function("BlitRoutine" ); |
1939 | } |
1940 | |
1941 | void Blitter::updateBorders(vk::Image* image, const VkImageSubresourceLayers& subresourceLayers) |
1942 | { |
1943 | if(image->getArrayLayers() < (subresourceLayers.baseArrayLayer + 6)) |
1944 | { |
1945 | UNIMPLEMENTED("image->getArrayLayers() %d, baseArrayLayer %d" , |
1946 | image->getArrayLayers(), subresourceLayers.baseArrayLayer); |
1947 | } |
1948 | |
1949 | // From Vulkan 1.1 spec, section 11.5. Image Views: |
1950 | // "For cube and cube array image views, the layers of the image view starting |
1951 | // at baseArrayLayer correspond to faces in the order +X, -X, +Y, -Y, +Z, -Z." |
1952 | VkImageSubresourceLayers posX = subresourceLayers; |
1953 | posX.layerCount = 1; |
1954 | VkImageSubresourceLayers negX = posX; |
1955 | negX.baseArrayLayer++; |
1956 | VkImageSubresourceLayers posY = negX; |
1957 | posY.baseArrayLayer++; |
1958 | VkImageSubresourceLayers negY = posY; |
1959 | negY.baseArrayLayer++; |
1960 | VkImageSubresourceLayers posZ = negY; |
1961 | posZ.baseArrayLayer++; |
1962 | VkImageSubresourceLayers negZ = posZ; |
1963 | negZ.baseArrayLayer++; |
1964 | |
1965 | // Copy top / bottom |
1966 | copyCubeEdge(image, posX, BOTTOM, negY, RIGHT); |
1967 | copyCubeEdge(image, posY, BOTTOM, posZ, TOP); |
1968 | copyCubeEdge(image, posZ, BOTTOM, negY, TOP); |
1969 | copyCubeEdge(image, negX, BOTTOM, negY, LEFT); |
1970 | copyCubeEdge(image, negY, BOTTOM, negZ, BOTTOM); |
1971 | copyCubeEdge(image, negZ, BOTTOM, negY, BOTTOM); |
1972 | |
1973 | copyCubeEdge(image, posX, TOP, posY, RIGHT); |
1974 | copyCubeEdge(image, posY, TOP, negZ, TOP); |
1975 | copyCubeEdge(image, posZ, TOP, posY, BOTTOM); |
1976 | copyCubeEdge(image, negX, TOP, posY, LEFT); |
1977 | copyCubeEdge(image, negY, TOP, posZ, BOTTOM); |
1978 | copyCubeEdge(image, negZ, TOP, posY, TOP); |
1979 | |
1980 | // Copy left / right |
1981 | copyCubeEdge(image, posX, RIGHT, negZ, LEFT); |
1982 | copyCubeEdge(image, posY, RIGHT, posX, TOP); |
1983 | copyCubeEdge(image, posZ, RIGHT, posX, LEFT); |
1984 | copyCubeEdge(image, negX, RIGHT, posZ, LEFT); |
1985 | copyCubeEdge(image, negY, RIGHT, posX, BOTTOM); |
1986 | copyCubeEdge(image, negZ, RIGHT, negX, LEFT); |
1987 | |
1988 | copyCubeEdge(image, posX, LEFT, posZ, RIGHT); |
1989 | copyCubeEdge(image, posY, LEFT, negX, TOP); |
1990 | copyCubeEdge(image, posZ, LEFT, negX, RIGHT); |
1991 | copyCubeEdge(image, negX, LEFT, negZ, RIGHT); |
1992 | copyCubeEdge(image, negY, LEFT, negX, BOTTOM); |
1993 | copyCubeEdge(image, negZ, LEFT, posX, RIGHT); |
1994 | |
1995 | // Compute corner colors |
1996 | VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(subresourceLayers.aspectMask); |
1997 | vk::Format format = image->getFormat(aspect); |
1998 | VkSampleCountFlagBits samples = image->getSampleCountFlagBits(); |
1999 | State state(format, format, samples, samples, Options{ 0xF }); |
2000 | |
2001 | if(samples != VK_SAMPLE_COUNT_1_BIT) |
2002 | { |
2003 | UNIMPLEMENTED("Multi-sampled cube: %d samples" , static_cast<int>(samples)); |
2004 | } |
2005 | |
2006 | auto cornerUpdateRoutine = getCornerUpdateRoutine(state); |
2007 | if(!cornerUpdateRoutine) |
2008 | { |
2009 | return; |
2010 | } |
2011 | |
2012 | VkExtent3D extent = image->getMipLevelExtent(aspect, subresourceLayers.mipLevel); |
2013 | CubeBorderData data = |
2014 | { |
2015 | image->getTexelPointer({ 0, 0, 0 }, posX), |
2016 | image->rowPitchBytes(aspect, subresourceLayers.mipLevel), |
2017 | static_cast<uint32_t>(image->getLayerSize(aspect)), |
2018 | extent.width |
2019 | }; |
2020 | cornerUpdateRoutine(&data); |
2021 | } |
2022 | |
2023 | void Blitter::copyCubeEdge(vk::Image* image, |
2024 | const VkImageSubresourceLayers& dstSubresourceLayers, Edge dstEdge, |
2025 | const VkImageSubresourceLayers& srcSubresourceLayers, Edge srcEdge) |
2026 | { |
2027 | ASSERT(srcSubresourceLayers.aspectMask == dstSubresourceLayers.aspectMask); |
2028 | ASSERT(srcSubresourceLayers.mipLevel == dstSubresourceLayers.mipLevel); |
2029 | ASSERT(srcSubresourceLayers.baseArrayLayer != dstSubresourceLayers.baseArrayLayer); |
2030 | ASSERT(srcSubresourceLayers.layerCount == 1); |
2031 | ASSERT(dstSubresourceLayers.layerCount == 1); |
2032 | |
2033 | // Figure out if the edges to be copied in reverse order respectively from one another |
2034 | // The copy should be reversed whenever the same edges are contiguous or if we're |
2035 | // copying top <-> right or bottom <-> left. This is explained by the layout, which is: |
2036 | // |
2037 | // | +y | |
2038 | // | -x | +z | +x | -z | |
2039 | // | -y | |
2040 | |
2041 | bool reverse = (srcEdge == dstEdge) || |
2042 | ((srcEdge == TOP) && (dstEdge == RIGHT)) || |
2043 | ((srcEdge == RIGHT) && (dstEdge == TOP)) || |
2044 | ((srcEdge == BOTTOM) && (dstEdge == LEFT)) || |
2045 | ((srcEdge == LEFT) && (dstEdge == BOTTOM)); |
2046 | |
2047 | VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(srcSubresourceLayers.aspectMask); |
2048 | int bytes = image->getFormat(aspect).bytes(); |
2049 | int pitchB = image->rowPitchBytes(aspect, srcSubresourceLayers.mipLevel); |
2050 | |
2051 | VkExtent3D extent = image->getMipLevelExtent(aspect, srcSubresourceLayers.mipLevel); |
2052 | int w = extent.width; |
2053 | int h = extent.height; |
2054 | if(w != h) |
2055 | { |
2056 | UNSUPPORTED("Cube doesn't have square faces : (%d, %d)" , w, h); |
2057 | } |
2058 | |
2059 | // Src is expressed in the regular [0, width-1], [0, height-1] space |
2060 | bool srcHorizontal = ((srcEdge == TOP) || (srcEdge == BOTTOM)); |
2061 | int srcDelta = srcHorizontal ? bytes : pitchB; |
2062 | VkOffset3D srcOffset = { (srcEdge == RIGHT) ? (w - 1) : 0, (srcEdge == BOTTOM) ? (h - 1) : 0, 0 }; |
2063 | |
2064 | // Dst contains borders, so it is expressed in the [-1, width], [-1, height] space |
2065 | bool dstHorizontal = ((dstEdge == TOP) || (dstEdge == BOTTOM)); |
2066 | int dstDelta = (dstHorizontal ? bytes : pitchB) * (reverse ? -1 : 1); |
2067 | VkOffset3D dstOffset = { (dstEdge == RIGHT) ? w : -1, (dstEdge == BOTTOM) ? h : -1, 0 }; |
2068 | |
2069 | // Don't write in the corners |
2070 | if(dstHorizontal) |
2071 | { |
2072 | dstOffset.x += reverse ? w : 1; |
2073 | } |
2074 | else |
2075 | { |
2076 | dstOffset.y += reverse ? h : 1; |
2077 | } |
2078 | |
2079 | const uint8_t* src = static_cast<const uint8_t*>(image->getTexelPointer(srcOffset, srcSubresourceLayers)); |
2080 | uint8_t *dst = static_cast<uint8_t*>(image->getTexelPointer(dstOffset, dstSubresourceLayers)); |
2081 | ASSERT((src < image->end()) && ((src + (w * srcDelta)) < image->end())); |
2082 | ASSERT((dst < image->end()) && ((dst + (w * dstDelta)) < image->end())); |
2083 | |
2084 | for(int i = 0; i < w; ++i, dst += dstDelta, src += srcDelta) |
2085 | { |
2086 | memcpy(dst, src, bytes); |
2087 | } |
2088 | } |
2089 | } |
2090 | |