1 | // Copyright 2016 The SwiftShader Authors. All Rights Reserved. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | #include "QuadRasterizer.hpp" |
16 | |
17 | #include "Primitive.hpp" |
18 | #include "Renderer.hpp" |
19 | #include "Shader/Constants.hpp" |
20 | #include "Common/Math.hpp" |
21 | #include "Common/Debug.hpp" |
22 | |
23 | namespace sw |
24 | { |
25 | extern bool veryEarlyDepthTest; |
26 | extern bool complementaryDepthBuffer; |
27 | extern bool fullPixelPositionRegister; |
28 | |
29 | extern int clusterCount; |
30 | |
31 | QuadRasterizer::QuadRasterizer(const PixelProcessor::State &state, const PixelShader *pixelShader) : state(state), shader(pixelShader) |
32 | { |
33 | } |
34 | |
35 | QuadRasterizer::~QuadRasterizer() |
36 | { |
37 | } |
38 | |
39 | void QuadRasterizer::generate() |
40 | { |
41 | #if PERF_PROFILE |
42 | for(int i = 0; i < PERF_TIMERS; i++) |
43 | { |
44 | cycles[i] = 0; |
45 | } |
46 | |
47 | Long pixelTime = Ticks(); |
48 | #endif |
49 | |
50 | constants = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,constants)); |
51 | occlusion = 0; |
52 | int clusterCount = Renderer::getClusterCount(); |
53 | |
54 | Do |
55 | { |
56 | yMin = *Pointer<Int>(primitive + OFFSET(Primitive,yMin)); |
57 | yMax = *Pointer<Int>(primitive + OFFSET(Primitive,yMax)); |
58 | |
59 | Int cluster2 = cluster + cluster; |
60 | yMin += clusterCount * 2 - 2 - cluster2; |
61 | yMin &= -clusterCount * 2; |
62 | yMin += cluster2; |
63 | |
64 | If(yMin < yMax) |
65 | { |
66 | rasterize(); |
67 | } |
68 | |
69 | primitive += sizeof(Primitive) * state.multiSample; |
70 | count--; |
71 | } |
72 | Until(count == 0) |
73 | |
74 | if(state.occlusionEnabled) |
75 | { |
76 | UInt clusterOcclusion = *Pointer<UInt>(data + OFFSET(DrawData,occlusion) + 4 * cluster); |
77 | clusterOcclusion += occlusion; |
78 | *Pointer<UInt>(data + OFFSET(DrawData,occlusion) + 4 * cluster) = clusterOcclusion; |
79 | } |
80 | |
81 | #if PERF_PROFILE |
82 | cycles[PERF_PIXEL] = Ticks() - pixelTime; |
83 | |
84 | for(int i = 0; i < PERF_TIMERS; i++) |
85 | { |
86 | *Pointer<Long>(data + OFFSET(DrawData,cycles[i]) + 8 * cluster) += cycles[i]; |
87 | } |
88 | #endif |
89 | |
90 | Return(); |
91 | } |
92 | |
93 | void QuadRasterizer::rasterize() |
94 | { |
95 | Pointer<Byte> cBuffer[RENDERTARGETS]; |
96 | Pointer<Byte> zBuffer; |
97 | Pointer<Byte> sBuffer; |
98 | |
99 | for(int index = 0; index < RENDERTARGETS; index++) |
100 | { |
101 | if(state.colorWriteActive(index)) |
102 | { |
103 | cBuffer[index] = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,colorBuffer[index])) + yMin * *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); |
104 | } |
105 | } |
106 | |
107 | if(state.depthTestActive) |
108 | { |
109 | zBuffer = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,depthBuffer)) + yMin * *Pointer<Int>(data + OFFSET(DrawData,depthPitchB)); |
110 | } |
111 | |
112 | if(state.stencilActive) |
113 | { |
114 | sBuffer = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,stencilBuffer)) + yMin * *Pointer<Int>(data + OFFSET(DrawData,stencilPitchB)); |
115 | } |
116 | |
117 | y = yMin; |
118 | |
119 | Do |
120 | { |
121 | Int x0a = Int(*Pointer<Short>(primitive + OFFSET(Primitive,outline->left) + (y + 0) * sizeof(Primitive::Span))); |
122 | Int x0b = Int(*Pointer<Short>(primitive + OFFSET(Primitive,outline->left) + (y + 1) * sizeof(Primitive::Span))); |
123 | Int x0 = Min(x0a, x0b); |
124 | |
125 | for(unsigned int q = 1; q < state.multiSample; q++) |
126 | { |
127 | x0a = Int(*Pointer<Short>(primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->left) + (y + 0) * sizeof(Primitive::Span))); |
128 | x0b = Int(*Pointer<Short>(primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->left) + (y + 1) * sizeof(Primitive::Span))); |
129 | x0 = Min(x0, Min(x0a, x0b)); |
130 | } |
131 | |
132 | x0 &= 0xFFFFFFFE; |
133 | |
134 | Int x1a = Int(*Pointer<Short>(primitive + OFFSET(Primitive,outline->right) + (y + 0) * sizeof(Primitive::Span))); |
135 | Int x1b = Int(*Pointer<Short>(primitive + OFFSET(Primitive,outline->right) + (y + 1) * sizeof(Primitive::Span))); |
136 | Int x1 = Max(x1a, x1b); |
137 | |
138 | for(unsigned int q = 1; q < state.multiSample; q++) |
139 | { |
140 | x1a = Int(*Pointer<Short>(primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->right) + (y + 0) * sizeof(Primitive::Span))); |
141 | x1b = Int(*Pointer<Short>(primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->right) + (y + 1) * sizeof(Primitive::Span))); |
142 | x1 = Max(x1, Max(x1a, x1b)); |
143 | } |
144 | |
145 | Float4 yyyy = Float4(Float(y)) + *Pointer<Float4>(primitive + OFFSET(Primitive,yQuad), 16); |
146 | |
147 | if(interpolateZ()) |
148 | { |
149 | for(unsigned int q = 0; q < state.multiSample; q++) |
150 | { |
151 | Float4 y = yyyy; |
152 | |
153 | if(state.multiSample > 1) |
154 | { |
155 | y -= *Pointer<Float4>(constants + OFFSET(Constants,Y) + q * sizeof(float4)); |
156 | } |
157 | |
158 | Dz[q] = *Pointer<Float4>(primitive + OFFSET(Primitive,z.C), 16) + y * *Pointer<Float4>(primitive + OFFSET(Primitive,z.B), 16); |
159 | } |
160 | } |
161 | |
162 | if(veryEarlyDepthTest && state.multiSample == 1 && !state.depthOverride) |
163 | { |
164 | if(!state.stencilActive && state.depthTestActive && (state.depthCompareMode == DEPTH_LESSEQUAL || state.depthCompareMode == DEPTH_LESS)) // FIXME: Both modes ok? |
165 | { |
166 | Float4 xxxx = Float4(Float(x0)) + *Pointer<Float4>(primitive + OFFSET(Primitive,xQuad), 16); |
167 | |
168 | Pointer<Byte> buffer; |
169 | Int pitch; |
170 | |
171 | if(!state.quadLayoutDepthBuffer) |
172 | { |
173 | buffer = zBuffer + 4 * x0; |
174 | pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB)); |
175 | } |
176 | else |
177 | { |
178 | buffer = zBuffer + 8 * x0; |
179 | } |
180 | |
181 | For(Int x = x0, x < x1, x += 2) |
182 | { |
183 | Float4 z = interpolate(xxxx, Dz[0], z, primitive + OFFSET(Primitive,z), false, false, state.depthClamp); |
184 | |
185 | Float4 zValue; |
186 | |
187 | if(!state.quadLayoutDepthBuffer) |
188 | { |
189 | // FIXME: Properly optimizes? |
190 | zValue.xy = *Pointer<Float4>(buffer); |
191 | zValue.zw = *Pointer<Float4>(buffer + pitch - 8); |
192 | } |
193 | else |
194 | { |
195 | zValue = *Pointer<Float4>(buffer, 16); |
196 | } |
197 | |
198 | Int4 zTest; |
199 | |
200 | if(complementaryDepthBuffer) |
201 | { |
202 | zTest = CmpLE(zValue, z); |
203 | } |
204 | else |
205 | { |
206 | zTest = CmpNLT(zValue, z); |
207 | } |
208 | |
209 | Int zMask = SignMask(zTest); |
210 | |
211 | If(zMask == 0) |
212 | { |
213 | x0 += 2; |
214 | } |
215 | Else |
216 | { |
217 | x = x1; |
218 | } |
219 | |
220 | xxxx += Float4(2); |
221 | |
222 | if(!state.quadLayoutDepthBuffer) |
223 | { |
224 | buffer += 8; |
225 | } |
226 | else |
227 | { |
228 | buffer += 16; |
229 | } |
230 | } |
231 | } |
232 | } |
233 | |
234 | If(x0 < x1) |
235 | { |
236 | if(interpolateW()) |
237 | { |
238 | Dw = *Pointer<Float4>(primitive + OFFSET(Primitive,w.C), 16) + yyyy * *Pointer<Float4>(primitive + OFFSET(Primitive,w.B), 16); |
239 | } |
240 | |
241 | for(int interpolant = 0; interpolant < MAX_FRAGMENT_INPUTS; interpolant++) |
242 | { |
243 | for(int component = 0; component < 4; component++) |
244 | { |
245 | if(state.interpolant[interpolant].component & (1 << component)) |
246 | { |
247 | Dv[interpolant][component] = *Pointer<Float4>(primitive + OFFSET(Primitive,V[interpolant][component].C), 16); |
248 | |
249 | if(!(state.interpolant[interpolant].flat & (1 << component))) |
250 | { |
251 | Dv[interpolant][component] += yyyy * *Pointer<Float4>(primitive + OFFSET(Primitive,V[interpolant][component].B), 16); |
252 | } |
253 | } |
254 | } |
255 | } |
256 | |
257 | if(state.fog.component) |
258 | { |
259 | Df = *Pointer<Float4>(primitive + OFFSET(Primitive,f.C), 16); |
260 | |
261 | if(!state.fog.flat) |
262 | { |
263 | Df += yyyy * *Pointer<Float4>(primitive + OFFSET(Primitive,f.B), 16); |
264 | } |
265 | } |
266 | |
267 | Short4 xLeft[4]; |
268 | Short4 xRight[4]; |
269 | |
270 | for(unsigned int q = 0; q < state.multiSample; q++) |
271 | { |
272 | xLeft[q] = *Pointer<Short4>(primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline) + y * sizeof(Primitive::Span)); |
273 | xRight[q] = xLeft[q]; |
274 | |
275 | xLeft[q] = Swizzle(xLeft[q], 0xA0) - Short4(1, 2, 1, 2); |
276 | xRight[q] = Swizzle(xRight[q], 0xF5) - Short4(0, 1, 0, 1); |
277 | } |
278 | |
279 | For(Int x = x0, x < x1, x += 2) |
280 | { |
281 | Short4 xxxx = Short4(x); |
282 | Int cMask[4]; |
283 | |
284 | for(unsigned int q = 0; q < state.multiSample; q++) |
285 | { |
286 | Short4 mask = CmpGT(xxxx, xLeft[q]) & CmpGT(xRight[q], xxxx); |
287 | cMask[q] = SignMask(PackSigned(mask, mask)) & 0x0000000F; |
288 | } |
289 | |
290 | quad(cBuffer, zBuffer, sBuffer, cMask, x); |
291 | } |
292 | } |
293 | |
294 | int clusterCount = Renderer::getClusterCount(); |
295 | |
296 | for(int index = 0; index < RENDERTARGETS; index++) |
297 | { |
298 | if(state.colorWriteActive(index)) |
299 | { |
300 | cBuffer[index] += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])) << (1 + sw::log2(clusterCount)); // FIXME: Precompute |
301 | } |
302 | } |
303 | |
304 | if(state.depthTestActive) |
305 | { |
306 | zBuffer += *Pointer<Int>(data + OFFSET(DrawData,depthPitchB)) << (1 + sw::log2(clusterCount)); // FIXME: Precompute |
307 | } |
308 | |
309 | if(state.stencilActive) |
310 | { |
311 | sBuffer += *Pointer<Int>(data + OFFSET(DrawData,stencilPitchB)) << (1 + sw::log2(clusterCount)); // FIXME: Precompute |
312 | } |
313 | |
314 | y += 2 * clusterCount; |
315 | } |
316 | Until(y >= yMax) |
317 | } |
318 | |
319 | Float4 QuadRasterizer::interpolate(Float4 &x, Float4 &D, Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective, bool clamp) |
320 | { |
321 | Float4 interpolant = D; |
322 | |
323 | if(!flat) |
324 | { |
325 | interpolant += x * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation, A), 16); |
326 | |
327 | if(perspective) |
328 | { |
329 | interpolant *= rhw; |
330 | } |
331 | } |
332 | |
333 | if(clamp) |
334 | { |
335 | interpolant = Min(Max(interpolant, Float4(0.0f)), Float4(1.0f)); |
336 | } |
337 | |
338 | return interpolant; |
339 | } |
340 | |
341 | bool QuadRasterizer::interpolateZ() const |
342 | { |
343 | return state.depthTestActive || state.pixelFogActive() || (shader && shader->isVPosDeclared() && fullPixelPositionRegister); |
344 | } |
345 | |
346 | bool QuadRasterizer::interpolateW() const |
347 | { |
348 | return state.perspective || (shader && shader->isVPosDeclared() && fullPixelPositionRegister); |
349 | } |
350 | } |
351 | |