1// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "QuadRasterizer.hpp"
16
17#include "Primitive.hpp"
18#include "Renderer.hpp"
19#include "Shader/Constants.hpp"
20#include "Common/Math.hpp"
21#include "Common/Debug.hpp"
22
23namespace sw
24{
25 extern bool veryEarlyDepthTest;
26 extern bool complementaryDepthBuffer;
27 extern bool fullPixelPositionRegister;
28
29 extern int clusterCount;
30
31 QuadRasterizer::QuadRasterizer(const PixelProcessor::State &state, const PixelShader *pixelShader) : state(state), shader(pixelShader)
32 {
33 }
34
35 QuadRasterizer::~QuadRasterizer()
36 {
37 }
38
39 void QuadRasterizer::generate()
40 {
41 #if PERF_PROFILE
42 for(int i = 0; i < PERF_TIMERS; i++)
43 {
44 cycles[i] = 0;
45 }
46
47 Long pixelTime = Ticks();
48 #endif
49
50 constants = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,constants));
51 occlusion = 0;
52 int clusterCount = Renderer::getClusterCount();
53
54 Do
55 {
56 yMin = *Pointer<Int>(primitive + OFFSET(Primitive,yMin));
57 yMax = *Pointer<Int>(primitive + OFFSET(Primitive,yMax));
58
59 Int cluster2 = cluster + cluster;
60 yMin += clusterCount * 2 - 2 - cluster2;
61 yMin &= -clusterCount * 2;
62 yMin += cluster2;
63
64 If(yMin < yMax)
65 {
66 rasterize();
67 }
68
69 primitive += sizeof(Primitive) * state.multiSample;
70 count--;
71 }
72 Until(count == 0)
73
74 if(state.occlusionEnabled)
75 {
76 UInt clusterOcclusion = *Pointer<UInt>(data + OFFSET(DrawData,occlusion) + 4 * cluster);
77 clusterOcclusion += occlusion;
78 *Pointer<UInt>(data + OFFSET(DrawData,occlusion) + 4 * cluster) = clusterOcclusion;
79 }
80
81 #if PERF_PROFILE
82 cycles[PERF_PIXEL] = Ticks() - pixelTime;
83
84 for(int i = 0; i < PERF_TIMERS; i++)
85 {
86 *Pointer<Long>(data + OFFSET(DrawData,cycles[i]) + 8 * cluster) += cycles[i];
87 }
88 #endif
89
90 Return();
91 }
92
93 void QuadRasterizer::rasterize()
94 {
95 Pointer<Byte> cBuffer[RENDERTARGETS];
96 Pointer<Byte> zBuffer;
97 Pointer<Byte> sBuffer;
98
99 for(int index = 0; index < RENDERTARGETS; index++)
100 {
101 if(state.colorWriteActive(index))
102 {
103 cBuffer[index] = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,colorBuffer[index])) + yMin * *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
104 }
105 }
106
107 if(state.depthTestActive)
108 {
109 zBuffer = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,depthBuffer)) + yMin * *Pointer<Int>(data + OFFSET(DrawData,depthPitchB));
110 }
111
112 if(state.stencilActive)
113 {
114 sBuffer = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,stencilBuffer)) + yMin * *Pointer<Int>(data + OFFSET(DrawData,stencilPitchB));
115 }
116
117 y = yMin;
118
119 Do
120 {
121 Int x0a = Int(*Pointer<Short>(primitive + OFFSET(Primitive,outline->left) + (y + 0) * sizeof(Primitive::Span)));
122 Int x0b = Int(*Pointer<Short>(primitive + OFFSET(Primitive,outline->left) + (y + 1) * sizeof(Primitive::Span)));
123 Int x0 = Min(x0a, x0b);
124
125 for(unsigned int q = 1; q < state.multiSample; q++)
126 {
127 x0a = Int(*Pointer<Short>(primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->left) + (y + 0) * sizeof(Primitive::Span)));
128 x0b = Int(*Pointer<Short>(primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->left) + (y + 1) * sizeof(Primitive::Span)));
129 x0 = Min(x0, Min(x0a, x0b));
130 }
131
132 x0 &= 0xFFFFFFFE;
133
134 Int x1a = Int(*Pointer<Short>(primitive + OFFSET(Primitive,outline->right) + (y + 0) * sizeof(Primitive::Span)));
135 Int x1b = Int(*Pointer<Short>(primitive + OFFSET(Primitive,outline->right) + (y + 1) * sizeof(Primitive::Span)));
136 Int x1 = Max(x1a, x1b);
137
138 for(unsigned int q = 1; q < state.multiSample; q++)
139 {
140 x1a = Int(*Pointer<Short>(primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->right) + (y + 0) * sizeof(Primitive::Span)));
141 x1b = Int(*Pointer<Short>(primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->right) + (y + 1) * sizeof(Primitive::Span)));
142 x1 = Max(x1, Max(x1a, x1b));
143 }
144
145 Float4 yyyy = Float4(Float(y)) + *Pointer<Float4>(primitive + OFFSET(Primitive,yQuad), 16);
146
147 if(interpolateZ())
148 {
149 for(unsigned int q = 0; q < state.multiSample; q++)
150 {
151 Float4 y = yyyy;
152
153 if(state.multiSample > 1)
154 {
155 y -= *Pointer<Float4>(constants + OFFSET(Constants,Y) + q * sizeof(float4));
156 }
157
158 Dz[q] = *Pointer<Float4>(primitive + OFFSET(Primitive,z.C), 16) + y * *Pointer<Float4>(primitive + OFFSET(Primitive,z.B), 16);
159 }
160 }
161
162 if(veryEarlyDepthTest && state.multiSample == 1 && !state.depthOverride)
163 {
164 if(!state.stencilActive && state.depthTestActive && (state.depthCompareMode == DEPTH_LESSEQUAL || state.depthCompareMode == DEPTH_LESS)) // FIXME: Both modes ok?
165 {
166 Float4 xxxx = Float4(Float(x0)) + *Pointer<Float4>(primitive + OFFSET(Primitive,xQuad), 16);
167
168 Pointer<Byte> buffer;
169 Int pitch;
170
171 if(!state.quadLayoutDepthBuffer)
172 {
173 buffer = zBuffer + 4 * x0;
174 pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB));
175 }
176 else
177 {
178 buffer = zBuffer + 8 * x0;
179 }
180
181 For(Int x = x0, x < x1, x += 2)
182 {
183 Float4 z = interpolate(xxxx, Dz[0], z, primitive + OFFSET(Primitive,z), false, false, state.depthClamp);
184
185 Float4 zValue;
186
187 if(!state.quadLayoutDepthBuffer)
188 {
189 // FIXME: Properly optimizes?
190 zValue.xy = *Pointer<Float4>(buffer);
191 zValue.zw = *Pointer<Float4>(buffer + pitch - 8);
192 }
193 else
194 {
195 zValue = *Pointer<Float4>(buffer, 16);
196 }
197
198 Int4 zTest;
199
200 if(complementaryDepthBuffer)
201 {
202 zTest = CmpLE(zValue, z);
203 }
204 else
205 {
206 zTest = CmpNLT(zValue, z);
207 }
208
209 Int zMask = SignMask(zTest);
210
211 If(zMask == 0)
212 {
213 x0 += 2;
214 }
215 Else
216 {
217 x = x1;
218 }
219
220 xxxx += Float4(2);
221
222 if(!state.quadLayoutDepthBuffer)
223 {
224 buffer += 8;
225 }
226 else
227 {
228 buffer += 16;
229 }
230 }
231 }
232 }
233
234 If(x0 < x1)
235 {
236 if(interpolateW())
237 {
238 Dw = *Pointer<Float4>(primitive + OFFSET(Primitive,w.C), 16) + yyyy * *Pointer<Float4>(primitive + OFFSET(Primitive,w.B), 16);
239 }
240
241 for(int interpolant = 0; interpolant < MAX_FRAGMENT_INPUTS; interpolant++)
242 {
243 for(int component = 0; component < 4; component++)
244 {
245 if(state.interpolant[interpolant].component & (1 << component))
246 {
247 Dv[interpolant][component] = *Pointer<Float4>(primitive + OFFSET(Primitive,V[interpolant][component].C), 16);
248
249 if(!(state.interpolant[interpolant].flat & (1 << component)))
250 {
251 Dv[interpolant][component] += yyyy * *Pointer<Float4>(primitive + OFFSET(Primitive,V[interpolant][component].B), 16);
252 }
253 }
254 }
255 }
256
257 if(state.fog.component)
258 {
259 Df = *Pointer<Float4>(primitive + OFFSET(Primitive,f.C), 16);
260
261 if(!state.fog.flat)
262 {
263 Df += yyyy * *Pointer<Float4>(primitive + OFFSET(Primitive,f.B), 16);
264 }
265 }
266
267 Short4 xLeft[4];
268 Short4 xRight[4];
269
270 for(unsigned int q = 0; q < state.multiSample; q++)
271 {
272 xLeft[q] = *Pointer<Short4>(primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline) + y * sizeof(Primitive::Span));
273 xRight[q] = xLeft[q];
274
275 xLeft[q] = Swizzle(xLeft[q], 0xA0) - Short4(1, 2, 1, 2);
276 xRight[q] = Swizzle(xRight[q], 0xF5) - Short4(0, 1, 0, 1);
277 }
278
279 For(Int x = x0, x < x1, x += 2)
280 {
281 Short4 xxxx = Short4(x);
282 Int cMask[4];
283
284 for(unsigned int q = 0; q < state.multiSample; q++)
285 {
286 Short4 mask = CmpGT(xxxx, xLeft[q]) & CmpGT(xRight[q], xxxx);
287 cMask[q] = SignMask(PackSigned(mask, mask)) & 0x0000000F;
288 }
289
290 quad(cBuffer, zBuffer, sBuffer, cMask, x);
291 }
292 }
293
294 int clusterCount = Renderer::getClusterCount();
295
296 for(int index = 0; index < RENDERTARGETS; index++)
297 {
298 if(state.colorWriteActive(index))
299 {
300 cBuffer[index] += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])) << (1 + sw::log2(clusterCount)); // FIXME: Precompute
301 }
302 }
303
304 if(state.depthTestActive)
305 {
306 zBuffer += *Pointer<Int>(data + OFFSET(DrawData,depthPitchB)) << (1 + sw::log2(clusterCount)); // FIXME: Precompute
307 }
308
309 if(state.stencilActive)
310 {
311 sBuffer += *Pointer<Int>(data + OFFSET(DrawData,stencilPitchB)) << (1 + sw::log2(clusterCount)); // FIXME: Precompute
312 }
313
314 y += 2 * clusterCount;
315 }
316 Until(y >= yMax)
317 }
318
319 Float4 QuadRasterizer::interpolate(Float4 &x, Float4 &D, Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective, bool clamp)
320 {
321 Float4 interpolant = D;
322
323 if(!flat)
324 {
325 interpolant += x * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation, A), 16);
326
327 if(perspective)
328 {
329 interpolant *= rhw;
330 }
331 }
332
333 if(clamp)
334 {
335 interpolant = Min(Max(interpolant, Float4(0.0f)), Float4(1.0f));
336 }
337
338 return interpolant;
339 }
340
341 bool QuadRasterizer::interpolateZ() const
342 {
343 return state.depthTestActive || state.pixelFogActive() || (shader && shader->isVPosDeclared() && fullPixelPositionRegister);
344 }
345
346 bool QuadRasterizer::interpolateW() const
347 {
348 return state.perspective || (shader && shader->isVPosDeclared() && fullPixelPositionRegister);
349 }
350}
351