1 | // Copyright 2016 The SwiftShader Authors. All Rights Reserved. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | #ifndef sw_Renderer_hpp |
16 | #define sw_Renderer_hpp |
17 | |
18 | #include "VertexProcessor.hpp" |
19 | #include "PixelProcessor.hpp" |
20 | #include "SetupProcessor.hpp" |
21 | #include "Plane.hpp" |
22 | #include "Blitter.hpp" |
23 | #include "Common/MutexLock.hpp" |
24 | #include "Common/Thread.hpp" |
25 | #include "Main/Config.hpp" |
26 | |
27 | #include <list> |
28 | |
29 | namespace sw |
30 | { |
31 | class Clipper; |
32 | struct DrawCall; |
33 | class PixelShader; |
34 | class VertexShader; |
35 | class SwiftConfig; |
36 | struct Task; |
37 | class Resource; |
38 | struct Constants; |
39 | |
40 | enum TranscendentalPrecision |
41 | { |
42 | APPROXIMATE, |
43 | PARTIAL, // 2^-10 |
44 | ACCURATE, |
45 | WHQL, // 2^-21 |
46 | IEEE // 2^-23 |
47 | }; |
48 | |
49 | extern TranscendentalPrecision logPrecision; |
50 | extern TranscendentalPrecision expPrecision; |
51 | extern TranscendentalPrecision rcpPrecision; |
52 | extern TranscendentalPrecision rsqPrecision; |
53 | extern bool perspectiveCorrection; |
54 | |
55 | struct Conventions |
56 | { |
57 | bool halfIntegerCoordinates; |
58 | bool symmetricNormalizedDepth; |
59 | bool booleanFaceRegister; |
60 | bool fullPixelPositionRegister; |
61 | bool leadingVertexFirst; |
62 | bool secondaryColor; |
63 | bool colorsDefaultToZero; |
64 | }; |
65 | |
66 | static const Conventions OpenGL = |
67 | { |
68 | true, // halfIntegerCoordinates |
69 | true, // symmetricNormalizedDepth |
70 | true, // booleanFaceRegister |
71 | true, // fullPixelPositionRegister |
72 | false, // leadingVertexFirst |
73 | false, // secondaryColor |
74 | true, // colorsDefaultToZero |
75 | }; |
76 | |
77 | static const Conventions Direct3D = |
78 | { |
79 | false, // halfIntegerCoordinates |
80 | false, // symmetricNormalizedDepth |
81 | false, // booleanFaceRegister |
82 | false, // fullPixelPositionRegister |
83 | true, // leadingVertexFirst |
84 | true, // secondardyColor |
85 | false, // colorsDefaultToZero |
86 | }; |
87 | |
88 | struct Query |
89 | { |
90 | enum Type { FRAGMENTS_PASSED, TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN }; |
91 | |
92 | Query(Type type); |
93 | |
94 | void addRef(); |
95 | void release(); |
96 | |
97 | inline void begin() |
98 | { |
99 | building = true; |
100 | data = 0; |
101 | } |
102 | |
103 | inline void end() |
104 | { |
105 | building = false; |
106 | } |
107 | |
108 | inline bool isReady() const |
109 | { |
110 | return (reference == 1); |
111 | } |
112 | |
113 | bool building; |
114 | AtomicInt data; |
115 | |
116 | const Type type; |
117 | private: |
118 | ~Query() {} // Only delete a query within the release() function |
119 | |
120 | AtomicInt reference; |
121 | }; |
122 | |
123 | struct DrawData |
124 | { |
125 | const Constants *constants; |
126 | |
127 | const void *input[MAX_VERTEX_INPUTS]; |
128 | unsigned int stride[MAX_VERTEX_INPUTS]; |
129 | Texture mipmap[TOTAL_IMAGE_UNITS]; |
130 | const void *indices; |
131 | |
132 | struct VS |
133 | { |
134 | float4 c[VERTEX_UNIFORM_VECTORS + 1]; // One extra for indices out of range, c[VERTEX_UNIFORM_VECTORS] = {0, 0, 0, 0} |
135 | byte* u[MAX_UNIFORM_BUFFER_BINDINGS]; |
136 | byte* t[MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS]; |
137 | unsigned int reg[MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS]; // Offset used when reading from registers, in components |
138 | unsigned int row[MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS]; // Number of rows to read |
139 | unsigned int col[MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS]; // Number of columns to read |
140 | unsigned int str[MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS]; // Number of components between each varying in output buffer |
141 | int4 i[16]; |
142 | bool b[16]; |
143 | }; |
144 | |
145 | struct PS |
146 | { |
147 | word4 cW[8][4]; |
148 | float4 c[FRAGMENT_UNIFORM_VECTORS]; |
149 | byte* u[MAX_UNIFORM_BUFFER_BINDINGS]; |
150 | int4 i[16]; |
151 | bool b[16]; |
152 | }; |
153 | |
154 | union |
155 | { |
156 | VS vs; |
157 | VertexProcessor::FixedFunction ff; |
158 | }; |
159 | |
160 | PS ps; |
161 | |
162 | int instanceID; |
163 | |
164 | VertexProcessor::PointSprite point; |
165 | float lineWidth; |
166 | |
167 | PixelProcessor::Stencil stencil[2]; // clockwise, counterclockwise |
168 | PixelProcessor::Fog fog; |
169 | PixelProcessor::Factor factor; |
170 | unsigned int occlusion[16]; // Number of pixels passing depth test |
171 | |
172 | #if PERF_PROFILE |
173 | int64_t cycles[PERF_TIMERS][16]; |
174 | #endif |
175 | |
176 | TextureStage::Uniforms textureStage[8]; |
177 | |
178 | float4 Wx16; |
179 | float4 Hx16; |
180 | float4 X0x16; |
181 | float4 Y0x16; |
182 | float4 XXXX; |
183 | float4 YYYY; |
184 | float4 halfPixelX; |
185 | float4 halfPixelY; |
186 | float viewportHeight; |
187 | float slopeDepthBias; |
188 | float depthRange; |
189 | float depthNear; |
190 | Plane clipPlane[6]; |
191 | |
192 | unsigned int *colorBuffer[RENDERTARGETS]; |
193 | int colorPitchB[RENDERTARGETS]; |
194 | int colorSliceB[RENDERTARGETS]; |
195 | float *depthBuffer; |
196 | int depthPitchB; |
197 | int depthSliceB; |
198 | unsigned char *stencilBuffer; |
199 | int stencilPitchB; |
200 | int stencilSliceB; |
201 | |
202 | int scissorX0; |
203 | int scissorX1; |
204 | int scissorY0; |
205 | int scissorY1; |
206 | |
207 | float4 a2c0; |
208 | float4 a2c1; |
209 | float4 a2c2; |
210 | float4 a2c3; |
211 | }; |
212 | |
213 | struct Viewport |
214 | { |
215 | float x0; |
216 | float y0; |
217 | float width; |
218 | float height; |
219 | float minZ; |
220 | float maxZ; |
221 | }; |
222 | |
223 | class Renderer : public VertexProcessor, public PixelProcessor, public SetupProcessor |
224 | { |
225 | struct Task |
226 | { |
227 | enum Type |
228 | { |
229 | PRIMITIVES, |
230 | PIXELS, |
231 | |
232 | RESUME, |
233 | SUSPEND |
234 | }; |
235 | |
236 | AtomicInt type; |
237 | AtomicInt primitiveUnit; |
238 | AtomicInt pixelCluster; |
239 | }; |
240 | |
241 | struct PrimitiveProgress |
242 | { |
243 | void init() |
244 | { |
245 | drawCall = 0; |
246 | firstPrimitive = 0; |
247 | primitiveCount = 0; |
248 | visible = 0; |
249 | references = 0; |
250 | } |
251 | |
252 | AtomicInt drawCall; |
253 | AtomicInt firstPrimitive; |
254 | AtomicInt primitiveCount; |
255 | AtomicInt visible; |
256 | AtomicInt references; |
257 | }; |
258 | |
259 | struct PixelProgress |
260 | { |
261 | void init() |
262 | { |
263 | drawCall = 0; |
264 | processedPrimitives = 0; |
265 | executing = false; |
266 | } |
267 | |
268 | AtomicInt drawCall; |
269 | AtomicInt processedPrimitives; |
270 | AtomicInt executing; |
271 | }; |
272 | |
273 | public: |
274 | Renderer(Context *context, Conventions conventions, bool exactColorRounding); |
275 | |
276 | virtual ~Renderer(); |
277 | |
278 | void *operator new(size_t size); |
279 | void operator delete(void * mem); |
280 | |
281 | void draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update = true); |
282 | |
283 | void clear(void *value, Format format, Surface *dest, const Rect &rect, unsigned int rgbaMask); |
284 | void blit(Surface *source, const SliceRectF &sRect, Surface *dest, const SliceRect &dRect, bool filter, bool isStencil = false, bool sRGBconversion = true); |
285 | void blit3D(Surface *source, Surface *dest); |
286 | |
287 | void setIndexBuffer(Resource *indexBuffer); |
288 | |
289 | void setMultiSampleMask(unsigned int mask); |
290 | void setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing); |
291 | |
292 | void setTextureResource(unsigned int sampler, Resource *resource); |
293 | void setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type); |
294 | |
295 | void setTextureFilter(SamplerType type, int sampler, FilterType textureFilter); |
296 | void setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter); |
297 | void setGatherEnable(SamplerType type, int sampler, bool enable); |
298 | void setAddressingModeU(SamplerType type, int sampler, AddressingMode addressingMode); |
299 | void setAddressingModeV(SamplerType type, int sampler, AddressingMode addressingMode); |
300 | void setAddressingModeW(SamplerType type, int sampler, AddressingMode addressingMode); |
301 | void setReadSRGB(SamplerType type, int sampler, bool sRGB); |
302 | void setMipmapLOD(SamplerType type, int sampler, float bias); |
303 | void setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor); |
304 | void setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy); |
305 | void setHighPrecisionFiltering(SamplerType type, int sampler, bool highPrecisionFiltering); |
306 | void setSwizzleR(SamplerType type, int sampler, SwizzleType swizzleR); |
307 | void setSwizzleG(SamplerType type, int sampler, SwizzleType swizzleG); |
308 | void setSwizzleB(SamplerType type, int sampler, SwizzleType swizzleB); |
309 | void setSwizzleA(SamplerType type, int sampler, SwizzleType swizzleA); |
310 | void setCompareFunc(SamplerType type, int sampler, CompareFunc compare); |
311 | void setBaseLevel(SamplerType type, int sampler, int baseLevel); |
312 | void setMaxLevel(SamplerType type, int sampler, int maxLevel); |
313 | void setMinLod(SamplerType type, int sampler, float minLod); |
314 | void setMaxLod(SamplerType type, int sampler, float maxLod); |
315 | void setSyncRequired(SamplerType type, int sampler, bool syncRequired); |
316 | |
317 | void setPointSpriteEnable(bool pointSpriteEnable); |
318 | void setPointScaleEnable(bool pointScaleEnable); |
319 | void setLineWidth(float width); |
320 | |
321 | void setDepthBias(float bias); |
322 | void setSlopeDepthBias(float slopeBias); |
323 | |
324 | void setRasterizerDiscard(bool rasterizerDiscard); |
325 | |
326 | // Programmable pipelines |
327 | void setPixelShader(const PixelShader *shader); |
328 | void setVertexShader(const VertexShader *shader); |
329 | |
330 | void setPixelShaderConstantF(unsigned int index, const float value[4], unsigned int count = 1); |
331 | void setPixelShaderConstantI(unsigned int index, const int value[4], unsigned int count = 1); |
332 | void setPixelShaderConstantB(unsigned int index, const int *boolean, unsigned int count = 1); |
333 | |
334 | void setVertexShaderConstantF(unsigned int index, const float value[4], unsigned int count = 1); |
335 | void setVertexShaderConstantI(unsigned int index, const int value[4], unsigned int count = 1); |
336 | void setVertexShaderConstantB(unsigned int index, const int *boolean, unsigned int count = 1); |
337 | |
338 | // Viewport & Clipper |
339 | void setViewport(const Viewport &viewport); |
340 | void setScissor(const Rect &scissor); |
341 | void setClipFlags(int flags); |
342 | void setClipPlane(unsigned int index, const float plane[4]); |
343 | |
344 | // Partial transform |
345 | void setModelMatrix(const Matrix &M, int i = 0); |
346 | void setViewMatrix(const Matrix &V); |
347 | void setBaseMatrix(const Matrix &B); |
348 | void setProjectionMatrix(const Matrix &P); |
349 | |
350 | void addQuery(Query *query); |
351 | void removeQuery(Query *query); |
352 | |
353 | void synchronize(); |
354 | |
355 | #if PERF_HUD |
356 | // Performance timers |
357 | int getThreadCount(); |
358 | int64_t getVertexTime(int thread); |
359 | int64_t getSetupTime(int thread); |
360 | int64_t getPixelTime(int thread); |
361 | void resetTimers(); |
362 | #endif |
363 | |
364 | static int getClusterCount() { return clusterCount; } |
365 | |
366 | private: |
367 | static void threadFunction(void *parameters); |
368 | void threadLoop(int threadIndex); |
369 | void taskLoop(int threadIndex); |
370 | void findAvailableTasks(); |
371 | void scheduleTask(int threadIndex); |
372 | void executeTask(int threadIndex); |
373 | void finishRendering(Task &pixelTask); |
374 | |
375 | void processPrimitiveVertices(int unit, unsigned int start, unsigned int count, unsigned int loop, int thread); |
376 | |
377 | int setupSolidTriangles(int batch, int count); |
378 | int setupWireframeTriangle(int batch, int count); |
379 | int setupVertexTriangle(int batch, int count); |
380 | int setupLines(int batch, int count); |
381 | int setupPoints(int batch, int count); |
382 | |
383 | bool setupLine(Primitive &primitive, Triangle &triangle, const DrawCall &draw); |
384 | bool setupPoint(Primitive &primitive, Triangle &triangle, const DrawCall &draw); |
385 | |
386 | bool isReadWriteTexture(int sampler); |
387 | void updateClipper(); |
388 | void updateConfiguration(bool initialUpdate = false); |
389 | void initializeThreads(); |
390 | void terminateThreads(); |
391 | |
392 | void loadConstants(const VertexShader *vertexShader); |
393 | void loadConstants(const PixelShader *pixelShader); |
394 | |
395 | Context *context; |
396 | Clipper *clipper; |
397 | Blitter *blitter; |
398 | Viewport viewport; |
399 | Rect scissor; |
400 | int clipFlags; |
401 | |
402 | Triangle *triangleBatch[16]; |
403 | Primitive *primitiveBatch[16]; |
404 | |
405 | // User-defined clipping planes |
406 | Plane userPlane[MAX_CLIP_PLANES]; |
407 | Plane clipPlane[MAX_CLIP_PLANES]; // Tranformed to clip space |
408 | bool updateClipPlanes; |
409 | |
410 | AtomicInt exitThreads; |
411 | AtomicInt threadsAwake; |
412 | Thread *worker[16]; |
413 | Event *resume[16]; // Events for resuming threads |
414 | Event *suspend[16]; // Events for suspending threads |
415 | Event *resumeApp; // Event for resuming the application thread |
416 | |
417 | PrimitiveProgress primitiveProgress[16]; |
418 | PixelProgress pixelProgress[16]; |
419 | Task task[16]; // Current tasks for threads |
420 | |
421 | enum { |
422 | DRAW_COUNT = 16, // Number of draw calls buffered (must be power of 2) |
423 | DRAW_COUNT_BITS = DRAW_COUNT - 1, |
424 | }; |
425 | DrawCall *drawCall[DRAW_COUNT]; |
426 | DrawCall *drawList[DRAW_COUNT]; |
427 | |
428 | AtomicInt currentDraw; |
429 | AtomicInt nextDraw; |
430 | |
431 | enum { |
432 | TASK_COUNT = 32, // Size of the task queue (must be power of 2) |
433 | TASK_COUNT_BITS = TASK_COUNT - 1, |
434 | }; |
435 | Task taskQueue[TASK_COUNT]; |
436 | AtomicInt qHead; |
437 | AtomicInt qSize; |
438 | |
439 | static AtomicInt unitCount; |
440 | static AtomicInt clusterCount; |
441 | |
442 | MutexLock schedulerMutex; |
443 | |
444 | #if PERF_HUD |
445 | int64_t vertexTime[16]; |
446 | int64_t setupTime[16]; |
447 | int64_t pixelTime[16]; |
448 | #endif |
449 | |
450 | VertexTask *vertexTask[16]; |
451 | |
452 | SwiftConfig *swiftConfig; |
453 | |
454 | std::list<Query*> queries; |
455 | Resource *sync; |
456 | |
457 | VertexProcessor::State vertexState; |
458 | SetupProcessor::State setupState; |
459 | PixelProcessor::State pixelState; |
460 | |
461 | std::shared_ptr<Routine> vertexRoutine; |
462 | std::shared_ptr<Routine> setupRoutine; |
463 | std::shared_ptr<Routine> pixelRoutine; |
464 | }; |
465 | |
466 | struct DrawCall |
467 | { |
468 | DrawCall(); |
469 | |
470 | ~DrawCall(); |
471 | |
472 | AtomicInt drawType; |
473 | AtomicInt batchSize; |
474 | |
475 | std::shared_ptr<Routine> vertexRoutine; |
476 | std::shared_ptr<Routine> setupRoutine; |
477 | std::shared_ptr<Routine> pixelRoutine; |
478 | |
479 | VertexProcessor::RoutinePointer vertexPointer; |
480 | SetupProcessor::RoutinePointer setupPointer; |
481 | PixelProcessor::RoutinePointer pixelPointer; |
482 | |
483 | int (Renderer::*setupPrimitives)(int batch, int count); |
484 | SetupProcessor::State setupState; |
485 | |
486 | Resource *vertexStream[MAX_VERTEX_INPUTS]; |
487 | Resource *indexBuffer; |
488 | Surface *renderTarget[RENDERTARGETS]; |
489 | Surface *depthBuffer; |
490 | Surface *stencilBuffer; |
491 | Resource *texture[TOTAL_IMAGE_UNITS]; |
492 | Resource* pUniformBuffers[MAX_UNIFORM_BUFFER_BINDINGS]; |
493 | Resource* vUniformBuffers[MAX_UNIFORM_BUFFER_BINDINGS]; |
494 | Resource* transformFeedbackBuffers[MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS]; |
495 | |
496 | unsigned int vsDirtyConstF; |
497 | unsigned int vsDirtyConstI; |
498 | unsigned int vsDirtyConstB; |
499 | |
500 | unsigned int psDirtyConstF; |
501 | unsigned int psDirtyConstI; |
502 | unsigned int psDirtyConstB; |
503 | |
504 | std::list<Query*> *queries; |
505 | |
506 | AtomicInt clipFlags; |
507 | |
508 | AtomicInt primitive; // Current primitive to enter pipeline |
509 | AtomicInt count; // Number of primitives to render |
510 | AtomicInt references; // Remaining references to this draw call, 0 when done drawing, -1 when resources unlocked and slot is free |
511 | |
512 | DrawData *data; |
513 | }; |
514 | } |
515 | |
516 | #endif // sw_Renderer_hpp |
517 | |