1// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "PixelProgram.hpp"
16
17#include "SamplerCore.hpp"
18#include "Device/Primitive.hpp"
19#include "Device/Renderer.hpp"
20
21namespace sw
22{
23 // Union all cMask and return it as 4 booleans
24 Int4 PixelProgram::maskAny(Int cMask[4]) const
25 {
26 // See if at least 1 sample is used
27 Int maskUnion = cMask[0];
28 for(auto i = 1u; i < state.multiSample; i++)
29 {
30 maskUnion |= cMask[i];
31 }
32
33 // Convert to 4 booleans
34 Int4 laneBits = Int4(1, 2, 4, 8);
35 Int4 laneShiftsToMSB = Int4(31, 30, 29, 28);
36 Int4 mask(maskUnion);
37 mask = ((mask & laneBits) << laneShiftsToMSB) >> Int4(31);
38 return mask;
39 }
40
41 // Union all cMask/sMask/zMask and return it as 4 booleans
42 Int4 PixelProgram::maskAny(Int cMask[4], Int sMask[4], Int zMask[4]) const
43 {
44 // See if at least 1 sample is used
45 Int maskUnion = cMask[0] & sMask[0] & zMask[0];
46 for(auto i = 1u; i < state.multiSample; i++)
47 {
48 maskUnion |= (cMask[i] & sMask[i] & zMask[i]);
49 }
50
51 // Convert to 4 booleans
52 Int4 laneBits = Int4(1, 2, 4, 8);
53 Int4 laneShiftsToMSB = Int4(31, 30, 29, 28);
54 Int4 mask(maskUnion);
55 mask = ((mask & laneBits) << laneShiftsToMSB) >> Int4(31);
56 return mask;
57 }
58
59 void PixelProgram::setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w, Int cMask[4])
60 {
61 routine.setImmutableInputBuiltins(spirvShader);
62
63 routine.setInputBuiltin(spirvShader, spv::BuiltInViewIndex, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
64 {
65 assert(builtin.SizeInComponents == 1);
66 value[builtin.FirstComponent] = As<Float4>(Int4((*Pointer<Int>(data + OFFSET(DrawData, viewID)))));
67 });
68
69 routine.setInputBuiltin(spirvShader, spv::BuiltInFragCoord, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
70 {
71 assert(builtin.SizeInComponents == 4);
72 value[builtin.FirstComponent+0] = SIMD::Float(Float(x)) + SIMD::Float(0.5f, 1.5f, 0.5f, 1.5f);
73 value[builtin.FirstComponent+1] = SIMD::Float(Float(y)) + SIMD::Float(0.5f, 0.5f, 1.5f, 1.5f);
74 value[builtin.FirstComponent+2] = z[0]; // sample 0
75 value[builtin.FirstComponent+3] = w;
76 });
77
78 routine.setInputBuiltin(spirvShader, spv::BuiltInPointCoord, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
79 {
80 assert(builtin.SizeInComponents == 2);
81 value[builtin.FirstComponent+0] = SIMD::Float(0.5f, 1.5f, 0.5f, 1.5f) +
82 SIMD::Float(Float(x) - (*Pointer<Float>(primitive + OFFSET(Primitive, pointCoordX))));
83 value[builtin.FirstComponent+1] = SIMD::Float(0.5f, 0.5f, 1.5f, 1.5f) +
84 SIMD::Float(Float(y) - (*Pointer<Float>(primitive + OFFSET(Primitive, pointCoordY))));
85 });
86
87 routine.setInputBuiltin(spirvShader, spv::BuiltInSubgroupSize, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
88 {
89 assert(builtin.SizeInComponents == 1);
90 value[builtin.FirstComponent] = As<SIMD::Float>(SIMD::Int(SIMD::Width));
91 });
92
93 routine.setInputBuiltin(spirvShader, spv::BuiltInHelperInvocation, [&](const SpirvShader::BuiltinMapping& builtin, Array<SIMD::Float>& value)
94 {
95 assert(builtin.SizeInComponents == 1);
96 value[builtin.FirstComponent] = As<SIMD::Float>(~maskAny(cMask));
97 });
98
99 routine.windowSpacePosition[0] = x + SIMD::Int(0,1,0,1);
100 routine.windowSpacePosition[1] = y + SIMD::Int(0,0,1,1);
101 routine.viewID = *Pointer<Int>(data + OFFSET(DrawData, viewID));
102 }
103
104 void PixelProgram::applyShader(Int cMask[4], Int sMask[4], Int zMask[4])
105 {
106 routine.descriptorSets = data + OFFSET(DrawData, descriptorSets);
107 routine.descriptorDynamicOffsets = data + OFFSET(DrawData, descriptorDynamicOffsets);
108 routine.pushConstants = data + OFFSET(DrawData, pushConstants);
109 routine.constants = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, constants));
110
111 auto it = spirvShader->inputBuiltins.find(spv::BuiltInFrontFacing);
112 if (it != spirvShader->inputBuiltins.end())
113 {
114 ASSERT(it->second.SizeInComponents == 1);
115 auto frontFacing = Int4(*Pointer<Int>(primitive + OFFSET(Primitive, clockwiseMask)));
116 routine.getVariable(it->second.Id)[it->second.FirstComponent] = As<Float4>(frontFacing);
117 }
118
119 it = spirvShader->inputBuiltins.find(spv::BuiltInSampleMask);
120 if (it != spirvShader->inputBuiltins.end())
121 {
122 static_assert(SIMD::Width == 4, "Expects SIMD width to be 4");
123 Int4 laneBits = Int4(1, 2, 4, 8);
124
125 Int4 inputSampleMask = Int4(1) & CmpNEQ(Int4(cMask[0]) & laneBits, Int4(0));
126 for (auto i = 1u; i < state.multiSample; i++)
127 {
128 inputSampleMask |= Int4(1 << i) & CmpNEQ(Int4(cMask[i]) & laneBits, Int4(0));
129 }
130
131 routine.getVariable(it->second.Id)[it->second.FirstComponent] = As<Float4>(inputSampleMask);
132 // Sample mask input is an array, as the spec contemplates MSAA levels higher than 32.
133 // Fill any non-zero indices with 0.
134 for (auto i = 1u; i < it->second.SizeInComponents; i++)
135 routine.getVariable(it->second.Id)[it->second.FirstComponent + i] = Float4(0);
136 }
137
138 // Note: all lanes initially active to facilitate derivatives etc. Actual coverage is
139 // handled separately, through the cMask.
140 auto activeLaneMask = SIMD::Int(0xFFFFFFFF);
141 auto storesAndAtomicsMask = maskAny(cMask, sMask, zMask);
142 routine.killMask = 0;
143
144 spirvShader->emit(&routine, activeLaneMask, storesAndAtomicsMask, descriptorSets);
145 spirvShader->emitEpilog(&routine);
146
147 for(int i = 0; i < RENDERTARGETS; i++)
148 {
149 c[i].x = routine.outputs[i * 4];
150 c[i].y = routine.outputs[i * 4 + 1];
151 c[i].z = routine.outputs[i * 4 + 2];
152 c[i].w = routine.outputs[i * 4 + 3];
153 }
154
155 clampColor(c);
156
157 if(spirvShader->getModes().ContainsKill)
158 {
159 for (auto i = 0u; i < state.multiSample; i++)
160 {
161 cMask[i] &= ~routine.killMask;
162 }
163 }
164
165 it = spirvShader->outputBuiltins.find(spv::BuiltInSampleMask);
166 if (it != spirvShader->outputBuiltins.end())
167 {
168 auto outputSampleMask = As<SIMD::Int>(routine.getVariable(it->second.Id)[it->second.FirstComponent]);
169
170 for (auto i = 0u; i < state.multiSample; i++)
171 {
172 cMask[i] &= SignMask(CmpNEQ(outputSampleMask & SIMD::Int(1<<i), SIMD::Int(0)));
173 }
174 }
175
176 it = spirvShader->outputBuiltins.find(spv::BuiltInFragDepth);
177 if (it != spirvShader->outputBuiltins.end())
178 {
179 oDepth = Min(Max(routine.getVariable(it->second.Id)[it->second.FirstComponent], Float4(0.0f)), Float4(1.0f));
180 }
181 }
182
183 Bool PixelProgram::alphaTest(Int cMask[4])
184 {
185 if(!state.alphaToCoverage)
186 {
187 return true;
188 }
189
190 alphaToCoverage(cMask, c[0].w);
191
192 Int pass = cMask[0];
193
194 for(unsigned int q = 1; q < state.multiSample; q++)
195 {
196 pass = pass | cMask[q];
197 }
198
199 return pass != 0x0;
200 }
201
202 void PixelProgram::rasterOperation(Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4])
203 {
204 for(int index = 0; index < RENDERTARGETS; index++)
205 {
206 if(!state.colorWriteActive(index))
207 {
208 continue;
209 }
210
211 auto format = state.targetFormat[index];
212 switch(format)
213 {
214 case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
215 case VK_FORMAT_R5G6B5_UNORM_PACK16:
216 case VK_FORMAT_B8G8R8A8_UNORM:
217 case VK_FORMAT_B8G8R8A8_SRGB:
218 case VK_FORMAT_R8G8B8A8_UNORM:
219 case VK_FORMAT_R8G8B8A8_SRGB:
220 case VK_FORMAT_R8G8_UNORM:
221 case VK_FORMAT_R8_UNORM:
222 case VK_FORMAT_R16G16_UNORM:
223 case VK_FORMAT_R16G16B16A16_UNORM:
224 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
225 case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
226 case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
227 for(unsigned int q = 0; q < state.multiSample; q++)
228 {
229 if(state.multiSampleMask & (1 << q))
230 {
231 Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index]));
232 Vector4s color;
233
234 color.x = convertFixed16(c[index].x, false);
235 color.y = convertFixed16(c[index].y, false);
236 color.z = convertFixed16(c[index].z, false);
237 color.w = convertFixed16(c[index].w, false);
238
239 alphaBlend(index, buffer, color, x);
240 writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
241 }
242 }
243 break;
244 case VK_FORMAT_R16_SFLOAT:
245 case VK_FORMAT_R16G16_SFLOAT:
246 case VK_FORMAT_R16G16B16A16_SFLOAT:
247 case VK_FORMAT_R32_SFLOAT:
248 case VK_FORMAT_R32G32_SFLOAT:
249 case VK_FORMAT_R32G32B32A32_SFLOAT:
250 case VK_FORMAT_R32_SINT:
251 case VK_FORMAT_R32G32_SINT:
252 case VK_FORMAT_R32G32B32A32_SINT:
253 case VK_FORMAT_R32_UINT:
254 case VK_FORMAT_R32G32_UINT:
255 case VK_FORMAT_R32G32B32A32_UINT:
256 case VK_FORMAT_R16_SINT:
257 case VK_FORMAT_R16G16_SINT:
258 case VK_FORMAT_R16G16B16A16_SINT:
259 case VK_FORMAT_R16_UINT:
260 case VK_FORMAT_R16G16_UINT:
261 case VK_FORMAT_R16G16B16A16_UINT:
262 case VK_FORMAT_R8_SINT:
263 case VK_FORMAT_R8G8_SINT:
264 case VK_FORMAT_R8G8B8A8_SINT:
265 case VK_FORMAT_R8_UINT:
266 case VK_FORMAT_R8G8_UINT:
267 case VK_FORMAT_R8G8B8A8_UINT:
268 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
269 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
270 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
271 for(unsigned int q = 0; q < state.multiSample; q++)
272 {
273 if(state.multiSampleMask & (1 << q))
274 {
275 Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index]));
276 Vector4f color = c[index];
277
278 alphaBlend(index, buffer, color, x);
279 writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
280 }
281 }
282 break;
283 default:
284 UNIMPLEMENTED("VkFormat: %d", int(format));
285 }
286 }
287 }
288
289 void PixelProgram::clampColor(Vector4f oC[RENDERTARGETS])
290 {
291 for(int index = 0; index < RENDERTARGETS; index++)
292 {
293 if(!state.colorWriteActive(index) && !(index == 0 && state.alphaToCoverage))
294 {
295 continue;
296 }
297
298 switch(state.targetFormat[index])
299 {
300 case VK_FORMAT_UNDEFINED:
301 break;
302 case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
303 case VK_FORMAT_R5G6B5_UNORM_PACK16:
304 case VK_FORMAT_B8G8R8A8_UNORM:
305 case VK_FORMAT_B8G8R8A8_SRGB:
306 case VK_FORMAT_R8G8B8A8_UNORM:
307 case VK_FORMAT_R8G8B8A8_SRGB:
308 case VK_FORMAT_R8G8_UNORM:
309 case VK_FORMAT_R8_UNORM:
310 case VK_FORMAT_R16G16_UNORM:
311 case VK_FORMAT_R16G16B16A16_UNORM:
312 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
313 case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
314 case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
315 oC[index].x = Max(oC[index].x, Float4(0.0f)); oC[index].x = Min(oC[index].x, Float4(1.0f));
316 oC[index].y = Max(oC[index].y, Float4(0.0f)); oC[index].y = Min(oC[index].y, Float4(1.0f));
317 oC[index].z = Max(oC[index].z, Float4(0.0f)); oC[index].z = Min(oC[index].z, Float4(1.0f));
318 oC[index].w = Max(oC[index].w, Float4(0.0f)); oC[index].w = Min(oC[index].w, Float4(1.0f));
319 break;
320 case VK_FORMAT_R32_SFLOAT:
321 case VK_FORMAT_R32G32_SFLOAT:
322 case VK_FORMAT_R32G32B32A32_SFLOAT:
323 case VK_FORMAT_R32_SINT:
324 case VK_FORMAT_R32G32_SINT:
325 case VK_FORMAT_R32G32B32A32_SINT:
326 case VK_FORMAT_R32_UINT:
327 case VK_FORMAT_R32G32_UINT:
328 case VK_FORMAT_R32G32B32A32_UINT:
329 case VK_FORMAT_R16_SFLOAT:
330 case VK_FORMAT_R16G16_SFLOAT:
331 case VK_FORMAT_R16G16B16A16_SFLOAT:
332 case VK_FORMAT_R16_SINT:
333 case VK_FORMAT_R16G16_SINT:
334 case VK_FORMAT_R16G16B16A16_SINT:
335 case VK_FORMAT_R16_UINT:
336 case VK_FORMAT_R16G16_UINT:
337 case VK_FORMAT_R16G16B16A16_UINT:
338 case VK_FORMAT_R8_SINT:
339 case VK_FORMAT_R8G8_SINT:
340 case VK_FORMAT_R8G8B8A8_SINT:
341 case VK_FORMAT_R8_UINT:
342 case VK_FORMAT_R8G8_UINT:
343 case VK_FORMAT_R8G8B8A8_UINT:
344 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
345 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
346 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
347 break;
348 default:
349 UNIMPLEMENTED("VkFormat: %d", int(state.targetFormat[index]));
350 }
351 }
352 }
353
354 Float4 PixelProgram::linearToSRGB(const Float4 &x) // Approximates x^(1.0/2.2)
355 {
356 Float4 sqrtx = Rcp_pp(RcpSqrt_pp(x));
357 Float4 sRGB = sqrtx * Float4(1.14f) - x * Float4(0.14f);
358
359 return Min(Max(sRGB, Float4(0.0f)), Float4(1.0f));
360 }
361}
362