1 | // Copyright 2016 The SwiftShader Authors. All Rights Reserved. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | #include "PixelProgram.hpp" |
16 | |
17 | #include "SamplerCore.hpp" |
18 | #include "Device/Primitive.hpp" |
19 | #include "Device/Renderer.hpp" |
20 | |
21 | namespace sw |
22 | { |
23 | // Union all cMask and return it as 4 booleans |
24 | Int4 PixelProgram::maskAny(Int cMask[4]) const |
25 | { |
26 | // See if at least 1 sample is used |
27 | Int maskUnion = cMask[0]; |
28 | for(auto i = 1u; i < state.multiSample; i++) |
29 | { |
30 | maskUnion |= cMask[i]; |
31 | } |
32 | |
33 | // Convert to 4 booleans |
34 | Int4 laneBits = Int4(1, 2, 4, 8); |
35 | Int4 laneShiftsToMSB = Int4(31, 30, 29, 28); |
36 | Int4 mask(maskUnion); |
37 | mask = ((mask & laneBits) << laneShiftsToMSB) >> Int4(31); |
38 | return mask; |
39 | } |
40 | |
41 | // Union all cMask/sMask/zMask and return it as 4 booleans |
42 | Int4 PixelProgram::maskAny(Int cMask[4], Int sMask[4], Int zMask[4]) const |
43 | { |
44 | // See if at least 1 sample is used |
45 | Int maskUnion = cMask[0] & sMask[0] & zMask[0]; |
46 | for(auto i = 1u; i < state.multiSample; i++) |
47 | { |
48 | maskUnion |= (cMask[i] & sMask[i] & zMask[i]); |
49 | } |
50 | |
51 | // Convert to 4 booleans |
52 | Int4 laneBits = Int4(1, 2, 4, 8); |
53 | Int4 laneShiftsToMSB = Int4(31, 30, 29, 28); |
54 | Int4 mask(maskUnion); |
55 | mask = ((mask & laneBits) << laneShiftsToMSB) >> Int4(31); |
56 | return mask; |
57 | } |
58 | |
59 | void PixelProgram::setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w, Int cMask[4]) |
60 | { |
61 | routine.setImmutableInputBuiltins(spirvShader); |
62 | |
63 | routine.setInputBuiltin(spirvShader, spv::BuiltInViewIndex, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value) |
64 | { |
65 | assert(builtin.SizeInComponents == 1); |
66 | value[builtin.FirstComponent] = As<Float4>(Int4((*Pointer<Int>(data + OFFSET(DrawData, viewID))))); |
67 | }); |
68 | |
69 | routine.setInputBuiltin(spirvShader, spv::BuiltInFragCoord, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value) |
70 | { |
71 | assert(builtin.SizeInComponents == 4); |
72 | value[builtin.FirstComponent+0] = SIMD::Float(Float(x)) + SIMD::Float(0.5f, 1.5f, 0.5f, 1.5f); |
73 | value[builtin.FirstComponent+1] = SIMD::Float(Float(y)) + SIMD::Float(0.5f, 0.5f, 1.5f, 1.5f); |
74 | value[builtin.FirstComponent+2] = z[0]; // sample 0 |
75 | value[builtin.FirstComponent+3] = w; |
76 | }); |
77 | |
78 | routine.setInputBuiltin(spirvShader, spv::BuiltInPointCoord, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value) |
79 | { |
80 | assert(builtin.SizeInComponents == 2); |
81 | value[builtin.FirstComponent+0] = SIMD::Float(0.5f, 1.5f, 0.5f, 1.5f) + |
82 | SIMD::Float(Float(x) - (*Pointer<Float>(primitive + OFFSET(Primitive, pointCoordX)))); |
83 | value[builtin.FirstComponent+1] = SIMD::Float(0.5f, 0.5f, 1.5f, 1.5f) + |
84 | SIMD::Float(Float(y) - (*Pointer<Float>(primitive + OFFSET(Primitive, pointCoordY)))); |
85 | }); |
86 | |
87 | routine.setInputBuiltin(spirvShader, spv::BuiltInSubgroupSize, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value) |
88 | { |
89 | assert(builtin.SizeInComponents == 1); |
90 | value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(SIMD::Width)); |
91 | }); |
92 | |
93 | routine.setInputBuiltin(spirvShader, spv::BuiltInHelperInvocation, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value) |
94 | { |
95 | assert(builtin.SizeInComponents == 1); |
96 | value[builtin.FirstComponent] = As<SIMD::Float>(~maskAny(cMask)); |
97 | }); |
98 | |
99 | routine.windowSpacePosition[0] = x + SIMD::Int(0,1,0,1); |
100 | routine.windowSpacePosition[1] = y + SIMD::Int(0,0,1,1); |
101 | routine.viewID = *Pointer<Int>(data + OFFSET(DrawData, viewID)); |
102 | } |
103 | |
104 | void PixelProgram::applyShader(Int cMask[4], Int sMask[4], Int zMask[4]) |
105 | { |
106 | routine.descriptorSets = data + OFFSET(DrawData, descriptorSets); |
107 | routine.descriptorDynamicOffsets = data + OFFSET(DrawData, descriptorDynamicOffsets); |
108 | routine.pushConstants = data + OFFSET(DrawData, pushConstants); |
109 | routine.constants = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, constants)); |
110 | |
111 | auto it = spirvShader->inputBuiltins.find(spv::BuiltInFrontFacing); |
112 | if (it != spirvShader->inputBuiltins.end()) |
113 | { |
114 | ASSERT(it->second.SizeInComponents == 1); |
115 | auto frontFacing = Int4(*Pointer<Int>(primitive + OFFSET(Primitive, clockwiseMask))); |
116 | routine.getVariable(it->second.Id)[it->second.FirstComponent] = As<Float4>(frontFacing); |
117 | } |
118 | |
119 | it = spirvShader->inputBuiltins.find(spv::BuiltInSampleMask); |
120 | if (it != spirvShader->inputBuiltins.end()) |
121 | { |
122 | static_assert(SIMD::Width == 4, "Expects SIMD width to be 4" ); |
123 | Int4 laneBits = Int4(1, 2, 4, 8); |
124 | |
125 | Int4 inputSampleMask = Int4(1) & CmpNEQ(Int4(cMask[0]) & laneBits, Int4(0)); |
126 | for (auto i = 1u; i < state.multiSample; i++) |
127 | { |
128 | inputSampleMask |= Int4(1 << i) & CmpNEQ(Int4(cMask[i]) & laneBits, Int4(0)); |
129 | } |
130 | |
131 | routine.getVariable(it->second.Id)[it->second.FirstComponent] = As<Float4>(inputSampleMask); |
132 | // Sample mask input is an array, as the spec contemplates MSAA levels higher than 32. |
133 | // Fill any non-zero indices with 0. |
134 | for (auto i = 1u; i < it->second.SizeInComponents; i++) |
135 | routine.getVariable(it->second.Id)[it->second.FirstComponent + i] = Float4(0); |
136 | } |
137 | |
138 | // Note: all lanes initially active to facilitate derivatives etc. Actual coverage is |
139 | // handled separately, through the cMask. |
140 | auto activeLaneMask = SIMD::Int(0xFFFFFFFF); |
141 | auto storesAndAtomicsMask = maskAny(cMask, sMask, zMask); |
142 | routine.killMask = 0; |
143 | |
144 | spirvShader->emit(&routine, activeLaneMask, storesAndAtomicsMask, descriptorSets); |
145 | spirvShader->emitEpilog(&routine); |
146 | |
147 | for(int i = 0; i < RENDERTARGETS; i++) |
148 | { |
149 | c[i].x = routine.outputs[i * 4]; |
150 | c[i].y = routine.outputs[i * 4 + 1]; |
151 | c[i].z = routine.outputs[i * 4 + 2]; |
152 | c[i].w = routine.outputs[i * 4 + 3]; |
153 | } |
154 | |
155 | clampColor(c); |
156 | |
157 | if(spirvShader->getModes().ContainsKill) |
158 | { |
159 | for (auto i = 0u; i < state.multiSample; i++) |
160 | { |
161 | cMask[i] &= ~routine.killMask; |
162 | } |
163 | } |
164 | |
165 | it = spirvShader->outputBuiltins.find(spv::BuiltInSampleMask); |
166 | if (it != spirvShader->outputBuiltins.end()) |
167 | { |
168 | auto outputSampleMask = As<SIMD::Int>(routine.getVariable(it->second.Id)[it->second.FirstComponent]); |
169 | |
170 | for (auto i = 0u; i < state.multiSample; i++) |
171 | { |
172 | cMask[i] &= SignMask(CmpNEQ(outputSampleMask & SIMD::Int(1<<i), SIMD::Int(0))); |
173 | } |
174 | } |
175 | |
176 | it = spirvShader->outputBuiltins.find(spv::BuiltInFragDepth); |
177 | if (it != spirvShader->outputBuiltins.end()) |
178 | { |
179 | oDepth = Min(Max(routine.getVariable(it->second.Id)[it->second.FirstComponent], Float4(0.0f)), Float4(1.0f)); |
180 | } |
181 | } |
182 | |
183 | Bool PixelProgram::alphaTest(Int cMask[4]) |
184 | { |
185 | if(!state.alphaToCoverage) |
186 | { |
187 | return true; |
188 | } |
189 | |
190 | alphaToCoverage(cMask, c[0].w); |
191 | |
192 | Int pass = cMask[0]; |
193 | |
194 | for(unsigned int q = 1; q < state.multiSample; q++) |
195 | { |
196 | pass = pass | cMask[q]; |
197 | } |
198 | |
199 | return pass != 0x0; |
200 | } |
201 | |
202 | void PixelProgram::rasterOperation(Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4]) |
203 | { |
204 | for(int index = 0; index < RENDERTARGETS; index++) |
205 | { |
206 | if(!state.colorWriteActive(index)) |
207 | { |
208 | continue; |
209 | } |
210 | |
211 | auto format = state.targetFormat[index]; |
212 | switch(format) |
213 | { |
214 | case VK_FORMAT_A1R5G5B5_UNORM_PACK16: |
215 | case VK_FORMAT_R5G6B5_UNORM_PACK16: |
216 | case VK_FORMAT_B8G8R8A8_UNORM: |
217 | case VK_FORMAT_B8G8R8A8_SRGB: |
218 | case VK_FORMAT_R8G8B8A8_UNORM: |
219 | case VK_FORMAT_R8G8B8A8_SRGB: |
220 | case VK_FORMAT_R8G8_UNORM: |
221 | case VK_FORMAT_R8_UNORM: |
222 | case VK_FORMAT_R16G16_UNORM: |
223 | case VK_FORMAT_R16G16B16A16_UNORM: |
224 | case VK_FORMAT_A8B8G8R8_UNORM_PACK32: |
225 | case VK_FORMAT_A8B8G8R8_SRGB_PACK32: |
226 | case VK_FORMAT_A2B10G10R10_UNORM_PACK32: |
227 | for(unsigned int q = 0; q < state.multiSample; q++) |
228 | { |
229 | if(state.multiSampleMask & (1 << q)) |
230 | { |
231 | Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index])); |
232 | Vector4s color; |
233 | |
234 | color.x = convertFixed16(c[index].x, false); |
235 | color.y = convertFixed16(c[index].y, false); |
236 | color.z = convertFixed16(c[index].z, false); |
237 | color.w = convertFixed16(c[index].w, false); |
238 | |
239 | alphaBlend(index, buffer, color, x); |
240 | writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]); |
241 | } |
242 | } |
243 | break; |
244 | case VK_FORMAT_R16_SFLOAT: |
245 | case VK_FORMAT_R16G16_SFLOAT: |
246 | case VK_FORMAT_R16G16B16A16_SFLOAT: |
247 | case VK_FORMAT_R32_SFLOAT: |
248 | case VK_FORMAT_R32G32_SFLOAT: |
249 | case VK_FORMAT_R32G32B32A32_SFLOAT: |
250 | case VK_FORMAT_R32_SINT: |
251 | case VK_FORMAT_R32G32_SINT: |
252 | case VK_FORMAT_R32G32B32A32_SINT: |
253 | case VK_FORMAT_R32_UINT: |
254 | case VK_FORMAT_R32G32_UINT: |
255 | case VK_FORMAT_R32G32B32A32_UINT: |
256 | case VK_FORMAT_R16_SINT: |
257 | case VK_FORMAT_R16G16_SINT: |
258 | case VK_FORMAT_R16G16B16A16_SINT: |
259 | case VK_FORMAT_R16_UINT: |
260 | case VK_FORMAT_R16G16_UINT: |
261 | case VK_FORMAT_R16G16B16A16_UINT: |
262 | case VK_FORMAT_R8_SINT: |
263 | case VK_FORMAT_R8G8_SINT: |
264 | case VK_FORMAT_R8G8B8A8_SINT: |
265 | case VK_FORMAT_R8_UINT: |
266 | case VK_FORMAT_R8G8_UINT: |
267 | case VK_FORMAT_R8G8B8A8_UINT: |
268 | case VK_FORMAT_A8B8G8R8_UINT_PACK32: |
269 | case VK_FORMAT_A8B8G8R8_SINT_PACK32: |
270 | case VK_FORMAT_A2B10G10R10_UINT_PACK32: |
271 | for(unsigned int q = 0; q < state.multiSample; q++) |
272 | { |
273 | if(state.multiSampleMask & (1 << q)) |
274 | { |
275 | Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index])); |
276 | Vector4f color = c[index]; |
277 | |
278 | alphaBlend(index, buffer, color, x); |
279 | writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]); |
280 | } |
281 | } |
282 | break; |
283 | default: |
284 | UNIMPLEMENTED("VkFormat: %d" , int(format)); |
285 | } |
286 | } |
287 | } |
288 | |
289 | void PixelProgram::clampColor(Vector4f oC[RENDERTARGETS]) |
290 | { |
291 | for(int index = 0; index < RENDERTARGETS; index++) |
292 | { |
293 | if(!state.colorWriteActive(index) && !(index == 0 && state.alphaToCoverage)) |
294 | { |
295 | continue; |
296 | } |
297 | |
298 | switch(state.targetFormat[index]) |
299 | { |
300 | case VK_FORMAT_UNDEFINED: |
301 | break; |
302 | case VK_FORMAT_A1R5G5B5_UNORM_PACK16: |
303 | case VK_FORMAT_R5G6B5_UNORM_PACK16: |
304 | case VK_FORMAT_B8G8R8A8_UNORM: |
305 | case VK_FORMAT_B8G8R8A8_SRGB: |
306 | case VK_FORMAT_R8G8B8A8_UNORM: |
307 | case VK_FORMAT_R8G8B8A8_SRGB: |
308 | case VK_FORMAT_R8G8_UNORM: |
309 | case VK_FORMAT_R8_UNORM: |
310 | case VK_FORMAT_R16G16_UNORM: |
311 | case VK_FORMAT_R16G16B16A16_UNORM: |
312 | case VK_FORMAT_A8B8G8R8_UNORM_PACK32: |
313 | case VK_FORMAT_A8B8G8R8_SRGB_PACK32: |
314 | case VK_FORMAT_A2B10G10R10_UNORM_PACK32: |
315 | oC[index].x = Max(oC[index].x, Float4(0.0f)); oC[index].x = Min(oC[index].x, Float4(1.0f)); |
316 | oC[index].y = Max(oC[index].y, Float4(0.0f)); oC[index].y = Min(oC[index].y, Float4(1.0f)); |
317 | oC[index].z = Max(oC[index].z, Float4(0.0f)); oC[index].z = Min(oC[index].z, Float4(1.0f)); |
318 | oC[index].w = Max(oC[index].w, Float4(0.0f)); oC[index].w = Min(oC[index].w, Float4(1.0f)); |
319 | break; |
320 | case VK_FORMAT_R32_SFLOAT: |
321 | case VK_FORMAT_R32G32_SFLOAT: |
322 | case VK_FORMAT_R32G32B32A32_SFLOAT: |
323 | case VK_FORMAT_R32_SINT: |
324 | case VK_FORMAT_R32G32_SINT: |
325 | case VK_FORMAT_R32G32B32A32_SINT: |
326 | case VK_FORMAT_R32_UINT: |
327 | case VK_FORMAT_R32G32_UINT: |
328 | case VK_FORMAT_R32G32B32A32_UINT: |
329 | case VK_FORMAT_R16_SFLOAT: |
330 | case VK_FORMAT_R16G16_SFLOAT: |
331 | case VK_FORMAT_R16G16B16A16_SFLOAT: |
332 | case VK_FORMAT_R16_SINT: |
333 | case VK_FORMAT_R16G16_SINT: |
334 | case VK_FORMAT_R16G16B16A16_SINT: |
335 | case VK_FORMAT_R16_UINT: |
336 | case VK_FORMAT_R16G16_UINT: |
337 | case VK_FORMAT_R16G16B16A16_UINT: |
338 | case VK_FORMAT_R8_SINT: |
339 | case VK_FORMAT_R8G8_SINT: |
340 | case VK_FORMAT_R8G8B8A8_SINT: |
341 | case VK_FORMAT_R8_UINT: |
342 | case VK_FORMAT_R8G8_UINT: |
343 | case VK_FORMAT_R8G8B8A8_UINT: |
344 | case VK_FORMAT_A8B8G8R8_UINT_PACK32: |
345 | case VK_FORMAT_A8B8G8R8_SINT_PACK32: |
346 | case VK_FORMAT_A2B10G10R10_UINT_PACK32: |
347 | break; |
348 | default: |
349 | UNIMPLEMENTED("VkFormat: %d" , int(state.targetFormat[index])); |
350 | } |
351 | } |
352 | } |
353 | |
354 | Float4 PixelProgram::linearToSRGB(const Float4 &x) // Approximates x^(1.0/2.2) |
355 | { |
356 | Float4 sqrtx = Rcp_pp(RcpSqrt_pp(x)); |
357 | Float4 sRGB = sqrtx * Float4(1.14f) - x * Float4(0.14f); |
358 | |
359 | return Min(Max(sRGB, Float4(0.0f)), Float4(1.0f)); |
360 | } |
361 | } |
362 | |