1// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#ifndef sw_Renderer_hpp
16#define sw_Renderer_hpp
17
18#include "VertexProcessor.hpp"
19#include "PixelProcessor.hpp"
20#include "SetupProcessor.hpp"
21#include "Plane.hpp"
22#include "Blitter.hpp"
23#include "Common/MutexLock.hpp"
24#include "Common/Thread.hpp"
25#include "Main/Config.hpp"
26
27#include <list>
28
29namespace sw
30{
31 class Clipper;
32 struct DrawCall;
33 class PixelShader;
34 class VertexShader;
35 class SwiftConfig;
36 struct Task;
37 class Resource;
38 struct Constants;
39
40 enum TranscendentalPrecision
41 {
42 APPROXIMATE,
43 PARTIAL, // 2^-10
44 ACCURATE,
45 WHQL, // 2^-21
46 IEEE // 2^-23
47 };
48
49 extern TranscendentalPrecision logPrecision;
50 extern TranscendentalPrecision expPrecision;
51 extern TranscendentalPrecision rcpPrecision;
52 extern TranscendentalPrecision rsqPrecision;
53 extern bool perspectiveCorrection;
54
55 struct Conventions
56 {
57 bool halfIntegerCoordinates;
58 bool symmetricNormalizedDepth;
59 bool booleanFaceRegister;
60 bool fullPixelPositionRegister;
61 bool leadingVertexFirst;
62 bool secondaryColor;
63 bool colorsDefaultToZero;
64 };
65
66 static const Conventions OpenGL =
67 {
68 true, // halfIntegerCoordinates
69 true, // symmetricNormalizedDepth
70 true, // booleanFaceRegister
71 true, // fullPixelPositionRegister
72 false, // leadingVertexFirst
73 false, // secondaryColor
74 true, // colorsDefaultToZero
75 };
76
77 static const Conventions Direct3D =
78 {
79 false, // halfIntegerCoordinates
80 false, // symmetricNormalizedDepth
81 false, // booleanFaceRegister
82 false, // fullPixelPositionRegister
83 true, // leadingVertexFirst
84 true, // secondardyColor
85 false, // colorsDefaultToZero
86 };
87
88 struct Query
89 {
90 enum Type { FRAGMENTS_PASSED, TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN };
91
92 Query(Type type);
93
94 void addRef();
95 void release();
96
97 inline void begin()
98 {
99 building = true;
100 data = 0;
101 }
102
103 inline void end()
104 {
105 building = false;
106 }
107
108 inline bool isReady() const
109 {
110 return (reference == 1);
111 }
112
113 bool building;
114 AtomicInt data;
115
116 const Type type;
117 private:
118 ~Query() {} // Only delete a query within the release() function
119
120 AtomicInt reference;
121 };
122
123 struct DrawData
124 {
125 const Constants *constants;
126
127 const void *input[MAX_VERTEX_INPUTS];
128 unsigned int stride[MAX_VERTEX_INPUTS];
129 Texture mipmap[TOTAL_IMAGE_UNITS];
130 const void *indices;
131
132 struct VS
133 {
134 float4 c[VERTEX_UNIFORM_VECTORS + 1]; // One extra for indices out of range, c[VERTEX_UNIFORM_VECTORS] = {0, 0, 0, 0}
135 byte* u[MAX_UNIFORM_BUFFER_BINDINGS];
136 byte* t[MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS];
137 unsigned int reg[MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS]; // Offset used when reading from registers, in components
138 unsigned int row[MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS]; // Number of rows to read
139 unsigned int col[MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS]; // Number of columns to read
140 unsigned int str[MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS]; // Number of components between each varying in output buffer
141 int4 i[16];
142 bool b[16];
143 };
144
145 struct PS
146 {
147 word4 cW[8][4];
148 float4 c[FRAGMENT_UNIFORM_VECTORS];
149 byte* u[MAX_UNIFORM_BUFFER_BINDINGS];
150 int4 i[16];
151 bool b[16];
152 };
153
154 union
155 {
156 VS vs;
157 VertexProcessor::FixedFunction ff;
158 };
159
160 PS ps;
161
162 int instanceID;
163
164 VertexProcessor::PointSprite point;
165 float lineWidth;
166
167 PixelProcessor::Stencil stencil[2]; // clockwise, counterclockwise
168 PixelProcessor::Fog fog;
169 PixelProcessor::Factor factor;
170 unsigned int occlusion[16]; // Number of pixels passing depth test
171
172 #if PERF_PROFILE
173 int64_t cycles[PERF_TIMERS][16];
174 #endif
175
176 TextureStage::Uniforms textureStage[8];
177
178 float4 Wx16;
179 float4 Hx16;
180 float4 X0x16;
181 float4 Y0x16;
182 float4 XXXX;
183 float4 YYYY;
184 float4 halfPixelX;
185 float4 halfPixelY;
186 float viewportHeight;
187 float slopeDepthBias;
188 float depthRange;
189 float depthNear;
190 Plane clipPlane[6];
191
192 unsigned int *colorBuffer[RENDERTARGETS];
193 int colorPitchB[RENDERTARGETS];
194 int colorSliceB[RENDERTARGETS];
195 float *depthBuffer;
196 int depthPitchB;
197 int depthSliceB;
198 unsigned char *stencilBuffer;
199 int stencilPitchB;
200 int stencilSliceB;
201
202 int scissorX0;
203 int scissorX1;
204 int scissorY0;
205 int scissorY1;
206
207 float4 a2c0;
208 float4 a2c1;
209 float4 a2c2;
210 float4 a2c3;
211 };
212
213 struct Viewport
214 {
215 float x0;
216 float y0;
217 float width;
218 float height;
219 float minZ;
220 float maxZ;
221 };
222
223 class Renderer : public VertexProcessor, public PixelProcessor, public SetupProcessor
224 {
225 struct Task
226 {
227 enum Type
228 {
229 PRIMITIVES,
230 PIXELS,
231
232 RESUME,
233 SUSPEND
234 };
235
236 AtomicInt type;
237 AtomicInt primitiveUnit;
238 AtomicInt pixelCluster;
239 };
240
241 struct PrimitiveProgress
242 {
243 void init()
244 {
245 drawCall = 0;
246 firstPrimitive = 0;
247 primitiveCount = 0;
248 visible = 0;
249 references = 0;
250 }
251
252 AtomicInt drawCall;
253 AtomicInt firstPrimitive;
254 AtomicInt primitiveCount;
255 AtomicInt visible;
256 AtomicInt references;
257 };
258
259 struct PixelProgress
260 {
261 void init()
262 {
263 drawCall = 0;
264 processedPrimitives = 0;
265 executing = false;
266 }
267
268 AtomicInt drawCall;
269 AtomicInt processedPrimitives;
270 AtomicInt executing;
271 };
272
273 public:
274 Renderer(Context *context, Conventions conventions, bool exactColorRounding);
275
276 virtual ~Renderer();
277
278 void *operator new(size_t size);
279 void operator delete(void * mem);
280
281 void draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update = true);
282
283 void clear(void *value, Format format, Surface *dest, const Rect &rect, unsigned int rgbaMask);
284 void blit(Surface *source, const SliceRectF &sRect, Surface *dest, const SliceRect &dRect, bool filter, bool isStencil = false, bool sRGBconversion = true);
285 void blit3D(Surface *source, Surface *dest);
286
287 void setIndexBuffer(Resource *indexBuffer);
288
289 void setMultiSampleMask(unsigned int mask);
290 void setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing);
291
292 void setTextureResource(unsigned int sampler, Resource *resource);
293 void setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type);
294
295 void setTextureFilter(SamplerType type, int sampler, FilterType textureFilter);
296 void setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter);
297 void setGatherEnable(SamplerType type, int sampler, bool enable);
298 void setAddressingModeU(SamplerType type, int sampler, AddressingMode addressingMode);
299 void setAddressingModeV(SamplerType type, int sampler, AddressingMode addressingMode);
300 void setAddressingModeW(SamplerType type, int sampler, AddressingMode addressingMode);
301 void setReadSRGB(SamplerType type, int sampler, bool sRGB);
302 void setMipmapLOD(SamplerType type, int sampler, float bias);
303 void setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor);
304 void setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy);
305 void setHighPrecisionFiltering(SamplerType type, int sampler, bool highPrecisionFiltering);
306 void setSwizzleR(SamplerType type, int sampler, SwizzleType swizzleR);
307 void setSwizzleG(SamplerType type, int sampler, SwizzleType swizzleG);
308 void setSwizzleB(SamplerType type, int sampler, SwizzleType swizzleB);
309 void setSwizzleA(SamplerType type, int sampler, SwizzleType swizzleA);
310 void setCompareFunc(SamplerType type, int sampler, CompareFunc compare);
311 void setBaseLevel(SamplerType type, int sampler, int baseLevel);
312 void setMaxLevel(SamplerType type, int sampler, int maxLevel);
313 void setMinLod(SamplerType type, int sampler, float minLod);
314 void setMaxLod(SamplerType type, int sampler, float maxLod);
315 void setSyncRequired(SamplerType type, int sampler, bool syncRequired);
316
317 void setPointSpriteEnable(bool pointSpriteEnable);
318 void setPointScaleEnable(bool pointScaleEnable);
319 void setLineWidth(float width);
320
321 void setDepthBias(float bias);
322 void setSlopeDepthBias(float slopeBias);
323
324 void setRasterizerDiscard(bool rasterizerDiscard);
325
326 // Programmable pipelines
327 void setPixelShader(const PixelShader *shader);
328 void setVertexShader(const VertexShader *shader);
329
330 void setPixelShaderConstantF(unsigned int index, const float value[4], unsigned int count = 1);
331 void setPixelShaderConstantI(unsigned int index, const int value[4], unsigned int count = 1);
332 void setPixelShaderConstantB(unsigned int index, const int *boolean, unsigned int count = 1);
333
334 void setVertexShaderConstantF(unsigned int index, const float value[4], unsigned int count = 1);
335 void setVertexShaderConstantI(unsigned int index, const int value[4], unsigned int count = 1);
336 void setVertexShaderConstantB(unsigned int index, const int *boolean, unsigned int count = 1);
337
338 // Viewport & Clipper
339 void setViewport(const Viewport &viewport);
340 void setScissor(const Rect &scissor);
341 void setClipFlags(int flags);
342 void setClipPlane(unsigned int index, const float plane[4]);
343
344 // Partial transform
345 void setModelMatrix(const Matrix &M, int i = 0);
346 void setViewMatrix(const Matrix &V);
347 void setBaseMatrix(const Matrix &B);
348 void setProjectionMatrix(const Matrix &P);
349
350 void addQuery(Query *query);
351 void removeQuery(Query *query);
352
353 void synchronize();
354
355 #if PERF_HUD
356 // Performance timers
357 int getThreadCount();
358 int64_t getVertexTime(int thread);
359 int64_t getSetupTime(int thread);
360 int64_t getPixelTime(int thread);
361 void resetTimers();
362 #endif
363
364 static int getClusterCount() { return clusterCount; }
365
366 private:
367 static void threadFunction(void *parameters);
368 void threadLoop(int threadIndex);
369 void taskLoop(int threadIndex);
370 void findAvailableTasks();
371 void scheduleTask(int threadIndex);
372 void executeTask(int threadIndex);
373 void finishRendering(Task &pixelTask);
374
375 void processPrimitiveVertices(int unit, unsigned int start, unsigned int count, unsigned int loop, int thread);
376
377 int setupSolidTriangles(int batch, int count);
378 int setupWireframeTriangle(int batch, int count);
379 int setupVertexTriangle(int batch, int count);
380 int setupLines(int batch, int count);
381 int setupPoints(int batch, int count);
382
383 bool setupLine(Primitive &primitive, Triangle &triangle, const DrawCall &draw);
384 bool setupPoint(Primitive &primitive, Triangle &triangle, const DrawCall &draw);
385
386 bool isReadWriteTexture(int sampler);
387 void updateClipper();
388 void updateConfiguration(bool initialUpdate = false);
389 void initializeThreads();
390 void terminateThreads();
391
392 void loadConstants(const VertexShader *vertexShader);
393 void loadConstants(const PixelShader *pixelShader);
394
395 Context *context;
396 Clipper *clipper;
397 Blitter *blitter;
398 Viewport viewport;
399 Rect scissor;
400 int clipFlags;
401
402 Triangle *triangleBatch[16];
403 Primitive *primitiveBatch[16];
404
405 // User-defined clipping planes
406 Plane userPlane[MAX_CLIP_PLANES];
407 Plane clipPlane[MAX_CLIP_PLANES]; // Tranformed to clip space
408 bool updateClipPlanes;
409
410 AtomicInt exitThreads;
411 AtomicInt threadsAwake;
412 Thread *worker[16];
413 Event *resume[16]; // Events for resuming threads
414 Event *suspend[16]; // Events for suspending threads
415 Event *resumeApp; // Event for resuming the application thread
416
417 PrimitiveProgress primitiveProgress[16];
418 PixelProgress pixelProgress[16];
419 Task task[16]; // Current tasks for threads
420
421 enum {
422 DRAW_COUNT = 16, // Number of draw calls buffered (must be power of 2)
423 DRAW_COUNT_BITS = DRAW_COUNT - 1,
424 };
425 DrawCall *drawCall[DRAW_COUNT];
426 DrawCall *drawList[DRAW_COUNT];
427
428 AtomicInt currentDraw;
429 AtomicInt nextDraw;
430
431 enum {
432 TASK_COUNT = 32, // Size of the task queue (must be power of 2)
433 TASK_COUNT_BITS = TASK_COUNT - 1,
434 };
435 Task taskQueue[TASK_COUNT];
436 AtomicInt qHead;
437 AtomicInt qSize;
438
439 static AtomicInt unitCount;
440 static AtomicInt clusterCount;
441
442 MutexLock schedulerMutex;
443
444 #if PERF_HUD
445 int64_t vertexTime[16];
446 int64_t setupTime[16];
447 int64_t pixelTime[16];
448 #endif
449
450 VertexTask *vertexTask[16];
451
452 SwiftConfig *swiftConfig;
453
454 std::list<Query*> queries;
455 Resource *sync;
456
457 VertexProcessor::State vertexState;
458 SetupProcessor::State setupState;
459 PixelProcessor::State pixelState;
460
461 std::shared_ptr<Routine> vertexRoutine;
462 std::shared_ptr<Routine> setupRoutine;
463 std::shared_ptr<Routine> pixelRoutine;
464 };
465
466 struct DrawCall
467 {
468 DrawCall();
469
470 ~DrawCall();
471
472 AtomicInt drawType;
473 AtomicInt batchSize;
474
475 std::shared_ptr<Routine> vertexRoutine;
476 std::shared_ptr<Routine> setupRoutine;
477 std::shared_ptr<Routine> pixelRoutine;
478
479 VertexProcessor::RoutinePointer vertexPointer;
480 SetupProcessor::RoutinePointer setupPointer;
481 PixelProcessor::RoutinePointer pixelPointer;
482
483 int (Renderer::*setupPrimitives)(int batch, int count);
484 SetupProcessor::State setupState;
485
486 Resource *vertexStream[MAX_VERTEX_INPUTS];
487 Resource *indexBuffer;
488 Surface *renderTarget[RENDERTARGETS];
489 Surface *depthBuffer;
490 Surface *stencilBuffer;
491 Resource *texture[TOTAL_IMAGE_UNITS];
492 Resource* pUniformBuffers[MAX_UNIFORM_BUFFER_BINDINGS];
493 Resource* vUniformBuffers[MAX_UNIFORM_BUFFER_BINDINGS];
494 Resource* transformFeedbackBuffers[MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS];
495
496 unsigned int vsDirtyConstF;
497 unsigned int vsDirtyConstI;
498 unsigned int vsDirtyConstB;
499
500 unsigned int psDirtyConstF;
501 unsigned int psDirtyConstI;
502 unsigned int psDirtyConstB;
503
504 std::list<Query*> *queries;
505
506 AtomicInt clipFlags;
507
508 AtomicInt primitive; // Current primitive to enter pipeline
509 AtomicInt count; // Number of primitives to render
510 AtomicInt references; // Remaining references to this draw call, 0 when done drawing, -1 when resources unlocked and slot is free
511
512 DrawData *data;
513 };
514}
515
516#endif // sw_Renderer_hpp
517