1 | // Copyright 2016 The SwiftShader Authors. All Rights Reserved. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | #include "Renderer.hpp" |
16 | |
17 | #include "Clipper.hpp" |
18 | #include "Surface.hpp" |
19 | #include "Primitive.hpp" |
20 | #include "Polygon.hpp" |
21 | #include "Main/FrameBuffer.hpp" |
22 | #include "Main/SwiftConfig.hpp" |
23 | #include "Reactor/Reactor.hpp" |
24 | #include "Shader/Constants.hpp" |
25 | #include "Common/MutexLock.hpp" |
26 | #include "Common/CPUID.hpp" |
27 | #include "Common/Memory.hpp" |
28 | #include "Common/Resource.hpp" |
29 | #include "Common/Half.hpp" |
30 | #include "Common/Math.hpp" |
31 | #include "Common/Timer.hpp" |
32 | #include "Common/Debug.hpp" |
33 | |
34 | #undef max |
35 | |
36 | bool disableServer = true; |
37 | |
38 | #ifndef NDEBUG |
39 | unsigned int minPrimitives = 1; |
40 | unsigned int maxPrimitives = 1 << 21; |
41 | #endif |
42 | |
43 | namespace sw |
44 | { |
45 | extern bool halfIntegerCoordinates; // Pixel centers are not at integer coordinates |
46 | extern bool symmetricNormalizedDepth; // [-1, 1] instead of [0, 1] |
47 | extern bool booleanFaceRegister; |
48 | extern bool fullPixelPositionRegister; |
49 | extern bool leadingVertexFirst; // Flat shading uses first vertex, else last |
50 | extern bool secondaryColor; // Specular lighting is applied after texturing |
51 | extern bool colorsDefaultToZero; |
52 | |
53 | extern bool forceWindowed; |
54 | extern bool complementaryDepthBuffer; |
55 | extern bool postBlendSRGB; |
56 | extern bool exactColorRounding; |
57 | extern TransparencyAntialiasing transparencyAntialiasing; |
58 | extern bool forceClearRegisters; |
59 | |
60 | extern bool precacheVertex; |
61 | extern bool precacheSetup; |
62 | extern bool precachePixel; |
63 | |
64 | static const int batchSize = 128; |
65 | AtomicInt threadCount(1); |
66 | AtomicInt Renderer::unitCount(1); |
67 | AtomicInt Renderer::clusterCount(1); |
68 | |
69 | TranscendentalPrecision logPrecision = ACCURATE; |
70 | TranscendentalPrecision expPrecision = ACCURATE; |
71 | TranscendentalPrecision rcpPrecision = ACCURATE; |
72 | TranscendentalPrecision rsqPrecision = ACCURATE; |
73 | bool perspectiveCorrection = true; |
74 | |
75 | static void setGlobalRenderingSettings(Conventions conventions, bool exactColorRounding) |
76 | { |
77 | static bool initialized = false; |
78 | |
79 | if(!initialized) |
80 | { |
81 | sw::halfIntegerCoordinates = conventions.halfIntegerCoordinates; |
82 | sw::symmetricNormalizedDepth = conventions.symmetricNormalizedDepth; |
83 | sw::booleanFaceRegister = conventions.booleanFaceRegister; |
84 | sw::fullPixelPositionRegister = conventions.fullPixelPositionRegister; |
85 | sw::leadingVertexFirst = conventions.leadingVertexFirst; |
86 | sw::secondaryColor = conventions.secondaryColor; |
87 | sw::colorsDefaultToZero = conventions.colorsDefaultToZero; |
88 | sw::exactColorRounding = exactColorRounding; |
89 | initialized = true; |
90 | } |
91 | } |
92 | |
93 | struct Parameters |
94 | { |
95 | Renderer *renderer; |
96 | int threadIndex; |
97 | }; |
98 | |
99 | Query::Query(Type type) : building(false), data(0), type(type), reference(1) |
100 | { |
101 | } |
102 | |
103 | void Query::addRef() |
104 | { |
105 | ++reference; // Atomic |
106 | } |
107 | |
108 | void Query::release() |
109 | { |
110 | int ref = reference--; // Atomic |
111 | |
112 | ASSERT(ref >= 0); |
113 | |
114 | if(ref == 0) |
115 | { |
116 | delete this; |
117 | } |
118 | } |
119 | |
120 | DrawCall::DrawCall() |
121 | { |
122 | queries = 0; |
123 | |
124 | vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1; |
125 | vsDirtyConstI = 16; |
126 | vsDirtyConstB = 16; |
127 | |
128 | psDirtyConstF = FRAGMENT_UNIFORM_VECTORS; |
129 | psDirtyConstI = 16; |
130 | psDirtyConstB = 16; |
131 | |
132 | references = -1; |
133 | |
134 | data = (DrawData*)allocate(sizeof(DrawData)); |
135 | data->constants = &constants; |
136 | } |
137 | |
138 | DrawCall::~DrawCall() |
139 | { |
140 | delete queries; |
141 | |
142 | deallocate(data); |
143 | } |
144 | |
145 | Renderer::Renderer(Context *context, Conventions conventions, bool exactColorRounding) : VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), context(context), viewport() |
146 | { |
147 | setGlobalRenderingSettings(conventions, exactColorRounding); |
148 | |
149 | setRenderTarget(0, 0); |
150 | clipper = new Clipper(symmetricNormalizedDepth); |
151 | blitter = new Blitter; |
152 | |
153 | updateViewMatrix = true; |
154 | updateBaseMatrix = true; |
155 | updateProjectionMatrix = true; |
156 | updateClipPlanes = true; |
157 | |
158 | #if PERF_HUD |
159 | resetTimers(); |
160 | #endif |
161 | |
162 | for(int i = 0; i < 16; i++) |
163 | { |
164 | vertexTask[i] = 0; |
165 | |
166 | worker[i] = 0; |
167 | resume[i] = 0; |
168 | suspend[i] = 0; |
169 | } |
170 | |
171 | threadsAwake = 0; |
172 | resumeApp = new Event(); |
173 | |
174 | currentDraw = 0; |
175 | nextDraw = 0; |
176 | |
177 | qHead = 0; |
178 | qSize = 0; |
179 | |
180 | for(int i = 0; i < 16; i++) |
181 | { |
182 | triangleBatch[i] = 0; |
183 | primitiveBatch[i] = 0; |
184 | } |
185 | |
186 | for(int draw = 0; draw < DRAW_COUNT; draw++) |
187 | { |
188 | drawCall[draw] = new DrawCall(); |
189 | drawList[draw] = drawCall[draw]; |
190 | } |
191 | |
192 | for(int unit = 0; unit < 16; unit++) |
193 | { |
194 | primitiveProgress[unit].init(); |
195 | } |
196 | |
197 | for(int cluster = 0; cluster < 16; cluster++) |
198 | { |
199 | pixelProgress[cluster].init(); |
200 | } |
201 | |
202 | clipFlags = 0; |
203 | |
204 | swiftConfig = new SwiftConfig(disableServer); |
205 | updateConfiguration(true); |
206 | |
207 | sync = new Resource(0); |
208 | } |
209 | |
210 | Renderer::~Renderer() |
211 | { |
212 | sync->lock(EXCLUSIVE); |
213 | sync->destruct(); |
214 | terminateThreads(); |
215 | sync->unlock(); |
216 | |
217 | delete clipper; |
218 | clipper = nullptr; |
219 | |
220 | delete blitter; |
221 | blitter = nullptr; |
222 | |
223 | delete resumeApp; |
224 | resumeApp = nullptr; |
225 | |
226 | for(int draw = 0; draw < DRAW_COUNT; draw++) |
227 | { |
228 | delete drawCall[draw]; |
229 | drawCall[draw] = nullptr; |
230 | } |
231 | |
232 | delete swiftConfig; |
233 | swiftConfig = nullptr; |
234 | } |
235 | |
236 | // This object has to be mem aligned |
237 | void* Renderer::operator new(size_t size) |
238 | { |
239 | ASSERT(size == sizeof(Renderer)); // This operator can't be called from a derived class |
240 | return sw::allocate(sizeof(Renderer), 16); |
241 | } |
242 | |
243 | void Renderer::operator delete(void * mem) |
244 | { |
245 | sw::deallocate(mem); |
246 | } |
247 | |
248 | void Renderer::draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update) |
249 | { |
250 | #ifndef NDEBUG |
251 | if(count < minPrimitives || count > maxPrimitives) |
252 | { |
253 | return; |
254 | } |
255 | #endif |
256 | |
257 | context->drawType = drawType; |
258 | |
259 | updateConfiguration(); |
260 | updateClipper(); |
261 | |
262 | int ss = context->getSuperSampleCount(); |
263 | int ms = context->getMultiSampleCount(); |
264 | bool requiresSync = false; |
265 | |
266 | for(int q = 0; q < ss; q++) |
267 | { |
268 | unsigned int oldMultiSampleMask = context->multiSampleMask; |
269 | context->multiSampleMask = (context->sampleMask >> (ms * q)) & ((unsigned)0xFFFFFFFF >> (32 - ms)); |
270 | |
271 | if(!context->multiSampleMask) |
272 | { |
273 | continue; |
274 | } |
275 | |
276 | sync->lock(sw::PRIVATE); |
277 | |
278 | if(update || oldMultiSampleMask != context->multiSampleMask) |
279 | { |
280 | vertexState = VertexProcessor::update(drawType); |
281 | setupState = SetupProcessor::update(); |
282 | pixelState = PixelProcessor::update(); |
283 | |
284 | vertexRoutine = VertexProcessor::routine(vertexState); |
285 | setupRoutine = SetupProcessor::routine(setupState); |
286 | pixelRoutine = PixelProcessor::routine(pixelState); |
287 | } |
288 | |
289 | int batch = batchSize / ms; |
290 | |
291 | int (Renderer::*setupPrimitives)(int batch, int count); |
292 | |
293 | if(context->isDrawTriangle()) |
294 | { |
295 | switch(context->fillMode) |
296 | { |
297 | case FILL_SOLID: |
298 | setupPrimitives = &Renderer::setupSolidTriangles; |
299 | break; |
300 | case FILL_WIREFRAME: |
301 | setupPrimitives = &Renderer::setupWireframeTriangle; |
302 | batch = 1; |
303 | break; |
304 | case FILL_VERTEX: |
305 | setupPrimitives = &Renderer::setupVertexTriangle; |
306 | batch = 1; |
307 | break; |
308 | default: |
309 | ASSERT(false); |
310 | return; |
311 | } |
312 | } |
313 | else if(context->isDrawLine()) |
314 | { |
315 | setupPrimitives = &Renderer::setupLines; |
316 | } |
317 | else // Point draw |
318 | { |
319 | setupPrimitives = &Renderer::setupPoints; |
320 | } |
321 | |
322 | DrawCall *draw = nullptr; |
323 | |
324 | do |
325 | { |
326 | for(int i = 0; i < DRAW_COUNT; i++) |
327 | { |
328 | if(drawCall[i]->references == -1) |
329 | { |
330 | draw = drawCall[i]; |
331 | drawList[nextDraw & DRAW_COUNT_BITS] = draw; |
332 | |
333 | break; |
334 | } |
335 | } |
336 | |
337 | if(!draw) |
338 | { |
339 | resumeApp->wait(); |
340 | } |
341 | } |
342 | while(!draw); |
343 | |
344 | DrawData *data = draw->data; |
345 | |
346 | if(queries.size() != 0) |
347 | { |
348 | draw->queries = new std::list<Query*>(); |
349 | bool includePrimitivesWrittenQueries = vertexState.transformFeedbackQueryEnabled && vertexState.transformFeedbackEnabled; |
350 | for(auto &query : queries) |
351 | { |
352 | if(includePrimitivesWrittenQueries || (query->type != Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN)) |
353 | { |
354 | query->addRef(); |
355 | draw->queries->push_back(query); |
356 | } |
357 | } |
358 | } |
359 | |
360 | draw->drawType = drawType; |
361 | draw->batchSize = batch; |
362 | |
363 | draw->vertexRoutine = vertexRoutine; |
364 | draw->setupRoutine = setupRoutine; |
365 | draw->pixelRoutine = pixelRoutine; |
366 | draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry(); |
367 | draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry(); |
368 | draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry(); |
369 | draw->setupPrimitives = setupPrimitives; |
370 | draw->setupState = setupState; |
371 | |
372 | for(int i = 0; i < MAX_VERTEX_INPUTS; i++) |
373 | { |
374 | draw->vertexStream[i] = context->input[i].resource; |
375 | data->input[i] = context->input[i].buffer; |
376 | data->stride[i] = context->input[i].stride; |
377 | |
378 | if(draw->vertexStream[i]) |
379 | { |
380 | draw->vertexStream[i]->lock(PUBLIC, PRIVATE); |
381 | } |
382 | } |
383 | |
384 | if(context->indexBuffer) |
385 | { |
386 | data->indices = (unsigned char*)context->indexBuffer->lock(PUBLIC, PRIVATE) + indexOffset; |
387 | } |
388 | |
389 | draw->indexBuffer = context->indexBuffer; |
390 | |
391 | for(int sampler = 0; sampler < TOTAL_IMAGE_UNITS; sampler++) |
392 | { |
393 | draw->texture[sampler] = 0; |
394 | } |
395 | |
396 | for(int sampler = 0; sampler < TEXTURE_IMAGE_UNITS; sampler++) |
397 | { |
398 | if(pixelState.sampler[sampler].textureType != TEXTURE_NULL) |
399 | { |
400 | draw->texture[sampler] = context->texture[sampler]; |
401 | draw->texture[sampler]->lock(PUBLIC, isReadWriteTexture(sampler) ? MANAGED : PRIVATE); // If the texure is both read and written, use the same read/write lock as render targets |
402 | |
403 | data->mipmap[sampler] = context->sampler[sampler].getTextureData(); |
404 | |
405 | requiresSync |= context->sampler[sampler].requiresSync(); |
406 | } |
407 | } |
408 | |
409 | if(context->pixelShader) |
410 | { |
411 | if(draw->psDirtyConstF) |
412 | { |
413 | memcpy(&data->ps.cW, PixelProcessor::cW, sizeof(word4) * 4 * (draw->psDirtyConstF < 8 ? draw->psDirtyConstF : 8)); |
414 | memcpy(&data->ps.c, PixelProcessor::c, sizeof(float4) * draw->psDirtyConstF); |
415 | draw->psDirtyConstF = 0; |
416 | } |
417 | |
418 | if(draw->psDirtyConstI) |
419 | { |
420 | memcpy(&data->ps.i, PixelProcessor::i, sizeof(int4) * draw->psDirtyConstI); |
421 | draw->psDirtyConstI = 0; |
422 | } |
423 | |
424 | if(draw->psDirtyConstB) |
425 | { |
426 | memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB); |
427 | draw->psDirtyConstB = 0; |
428 | } |
429 | |
430 | PixelProcessor::lockUniformBuffers(data->ps.u, draw->pUniformBuffers); |
431 | } |
432 | else |
433 | { |
434 | for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++) |
435 | { |
436 | draw->pUniformBuffers[i] = nullptr; |
437 | } |
438 | } |
439 | |
440 | if(context->pixelShaderModel() <= 0x0104) |
441 | { |
442 | for(int stage = 0; stage < 8; stage++) |
443 | { |
444 | if(pixelState.textureStage[stage].stageOperation != TextureStage::STAGE_DISABLE || context->pixelShader) |
445 | { |
446 | data->textureStage[stage] = context->textureStage[stage].uniforms; |
447 | } |
448 | else break; |
449 | } |
450 | } |
451 | |
452 | if(context->vertexShader) |
453 | { |
454 | if(context->vertexShader->getShaderModel() >= 0x0300) |
455 | { |
456 | for(int sampler = 0; sampler < VERTEX_TEXTURE_IMAGE_UNITS; sampler++) |
457 | { |
458 | if(vertexState.sampler[sampler].textureType != TEXTURE_NULL) |
459 | { |
460 | draw->texture[TEXTURE_IMAGE_UNITS + sampler] = context->texture[TEXTURE_IMAGE_UNITS + sampler]; |
461 | draw->texture[TEXTURE_IMAGE_UNITS + sampler]->lock(PUBLIC, PRIVATE); |
462 | |
463 | data->mipmap[TEXTURE_IMAGE_UNITS + sampler] = context->sampler[TEXTURE_IMAGE_UNITS + sampler].getTextureData(); |
464 | |
465 | requiresSync |= context->sampler[TEXTURE_IMAGE_UNITS + sampler].requiresSync(); |
466 | } |
467 | } |
468 | } |
469 | |
470 | if(draw->vsDirtyConstF) |
471 | { |
472 | memcpy(&data->vs.c, VertexProcessor::c, sizeof(float4) * draw->vsDirtyConstF); |
473 | draw->vsDirtyConstF = 0; |
474 | } |
475 | |
476 | if(draw->vsDirtyConstI) |
477 | { |
478 | memcpy(&data->vs.i, VertexProcessor::i, sizeof(int4) * draw->vsDirtyConstI); |
479 | draw->vsDirtyConstI = 0; |
480 | } |
481 | |
482 | if(draw->vsDirtyConstB) |
483 | { |
484 | memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB); |
485 | draw->vsDirtyConstB = 0; |
486 | } |
487 | |
488 | if(context->vertexShader->isInstanceIdDeclared()) |
489 | { |
490 | data->instanceID = context->instanceID; |
491 | } |
492 | |
493 | VertexProcessor::lockUniformBuffers(data->vs.u, draw->vUniformBuffers); |
494 | VertexProcessor::lockTransformFeedbackBuffers(data->vs.t, data->vs.reg, data->vs.row, data->vs.col, data->vs.str, draw->transformFeedbackBuffers); |
495 | } |
496 | else |
497 | { |
498 | data->ff = ff; |
499 | |
500 | draw->vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1; |
501 | draw->vsDirtyConstI = 16; |
502 | draw->vsDirtyConstB = 16; |
503 | |
504 | for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++) |
505 | { |
506 | draw->vUniformBuffers[i] = nullptr; |
507 | } |
508 | |
509 | for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++) |
510 | { |
511 | draw->transformFeedbackBuffers[i] = nullptr; |
512 | } |
513 | } |
514 | |
515 | if(pixelState.stencilActive) |
516 | { |
517 | data->stencil[0] = stencil; |
518 | data->stencil[1] = stencilCCW; |
519 | } |
520 | |
521 | if(pixelState.fogActive) |
522 | { |
523 | data->fog = fog; |
524 | } |
525 | |
526 | if(setupState.isDrawPoint) |
527 | { |
528 | data->point = point; |
529 | } |
530 | |
531 | data->lineWidth = context->lineWidth; |
532 | |
533 | data->factor = factor; |
534 | |
535 | if(pixelState.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE) |
536 | { |
537 | float ref = context->alphaReference * (1.0f / 255.0f); |
538 | float margin = sw::min(ref, 1.0f - ref); |
539 | |
540 | if(ms == 4) |
541 | { |
542 | data->a2c0 = replicate(ref - margin * 0.6f); |
543 | data->a2c1 = replicate(ref - margin * 0.2f); |
544 | data->a2c2 = replicate(ref + margin * 0.2f); |
545 | data->a2c3 = replicate(ref + margin * 0.6f); |
546 | } |
547 | else if(ms == 2) |
548 | { |
549 | data->a2c0 = replicate(ref - margin * 0.3f); |
550 | data->a2c1 = replicate(ref + margin * 0.3f); |
551 | } |
552 | else ASSERT(false); |
553 | } |
554 | |
555 | if(pixelState.occlusionEnabled) |
556 | { |
557 | for(int cluster = 0; cluster < clusterCount; cluster++) |
558 | { |
559 | data->occlusion[cluster] = 0; |
560 | } |
561 | } |
562 | |
563 | #if PERF_PROFILE |
564 | for(int cluster = 0; cluster < clusterCount; cluster++) |
565 | { |
566 | for(int i = 0; i < PERF_TIMERS; i++) |
567 | { |
568 | data->cycles[i][cluster] = 0; |
569 | } |
570 | } |
571 | #endif |
572 | |
573 | // Viewport |
574 | { |
575 | float W = 0.5f * viewport.width; |
576 | float H = 0.5f * viewport.height; |
577 | float X0 = viewport.x0 + W; |
578 | float Y0 = viewport.y0 + H; |
579 | float N = viewport.minZ; |
580 | float F = viewport.maxZ; |
581 | float Z = F - N; |
582 | |
583 | if(context->isDrawTriangle(false)) |
584 | { |
585 | N += context->depthBias; |
586 | } |
587 | |
588 | if(complementaryDepthBuffer) |
589 | { |
590 | Z = -Z; |
591 | N = 1 - N; |
592 | } |
593 | |
594 | static const float X[5][16] = // Fragment offsets |
595 | { |
596 | {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 1 sample |
597 | {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 2 samples |
598 | {-0.3000f, +0.1000f, +0.3000f, -0.1000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 4 samples |
599 | {+0.1875f, -0.3125f, +0.3125f, -0.4375f, -0.0625f, +0.4375f, +0.0625f, -0.1875f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 8 samples |
600 | {+0.2553f, -0.1155f, +0.1661f, -0.1828f, +0.2293f, -0.4132f, -0.1773f, -0.0577f, +0.3891f, -0.4656f, +0.4103f, +0.4248f, -0.2109f, +0.3966f, -0.2664f, -0.3872f} // 16 samples |
601 | }; |
602 | |
603 | static const float Y[5][16] = // Fragment offsets |
604 | { |
605 | {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 1 sample |
606 | {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 2 samples |
607 | {-0.1000f, -0.3000f, +0.1000f, +0.3000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 4 samples |
608 | {-0.4375f, -0.3125f, -0.1875f, -0.0625f, +0.0625f, +0.1875f, +0.3125f, +0.4375f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 8 samples |
609 | {-0.4503f, +0.1883f, +0.3684f, -0.4668f, -0.0690f, -0.1315f, +0.4999f, +0.0728f, +0.1070f, -0.3086f, +0.3725f, -0.1547f, -0.1102f, -0.3588f, +0.1789f, +0.0269f} // 16 samples |
610 | }; |
611 | |
612 | int s = sw::log2(ss); |
613 | |
614 | data->Wx16 = replicate(W * 16); |
615 | data->Hx16 = replicate(H * 16); |
616 | data->X0x16 = replicate(X0 * 16 - 8); |
617 | data->Y0x16 = replicate(Y0 * 16 - 8); |
618 | data->XXXX = replicate(X[s][q] / W); |
619 | data->YYYY = replicate(Y[s][q] / H); |
620 | data->halfPixelX = replicate(0.5f / W); |
621 | data->halfPixelY = replicate(0.5f / H); |
622 | data->viewportHeight = abs(viewport.height); |
623 | data->slopeDepthBias = context->slopeDepthBias; |
624 | data->depthRange = Z; |
625 | data->depthNear = N; |
626 | draw->clipFlags = clipFlags; |
627 | |
628 | if(clipFlags) |
629 | { |
630 | if(clipFlags & Clipper::CLIP_PLANE0) data->clipPlane[0] = clipPlane[0]; |
631 | if(clipFlags & Clipper::CLIP_PLANE1) data->clipPlane[1] = clipPlane[1]; |
632 | if(clipFlags & Clipper::CLIP_PLANE2) data->clipPlane[2] = clipPlane[2]; |
633 | if(clipFlags & Clipper::CLIP_PLANE3) data->clipPlane[3] = clipPlane[3]; |
634 | if(clipFlags & Clipper::CLIP_PLANE4) data->clipPlane[4] = clipPlane[4]; |
635 | if(clipFlags & Clipper::CLIP_PLANE5) data->clipPlane[5] = clipPlane[5]; |
636 | } |
637 | } |
638 | |
639 | // Target |
640 | { |
641 | for(int index = 0; index < RENDERTARGETS; index++) |
642 | { |
643 | draw->renderTarget[index] = context->renderTarget[index]; |
644 | |
645 | if(draw->renderTarget[index]) |
646 | { |
647 | unsigned int layer = context->renderTargetLayer[index]; |
648 | requiresSync |= context->renderTarget[index]->requiresSync(); |
649 | data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->lockInternal(0, 0, layer, LOCK_READWRITE, MANAGED); |
650 | data->colorBuffer[index] += q * ms * context->renderTarget[index]->getSliceB(true); |
651 | data->colorPitchB[index] = context->renderTarget[index]->getInternalPitchB(); |
652 | data->colorSliceB[index] = context->renderTarget[index]->getInternalSliceB(); |
653 | } |
654 | } |
655 | |
656 | draw->depthBuffer = context->depthBuffer; |
657 | draw->stencilBuffer = context->stencilBuffer; |
658 | |
659 | if(draw->depthBuffer) |
660 | { |
661 | unsigned int layer = context->depthBufferLayer; |
662 | requiresSync |= context->depthBuffer->requiresSync(); |
663 | data->depthBuffer = (float*)context->depthBuffer->lockInternal(0, 0, layer, LOCK_READWRITE, MANAGED); |
664 | data->depthBuffer += q * ms * context->depthBuffer->getSliceB(true); |
665 | data->depthPitchB = context->depthBuffer->getInternalPitchB(); |
666 | data->depthSliceB = context->depthBuffer->getInternalSliceB(); |
667 | } |
668 | |
669 | if(draw->stencilBuffer) |
670 | { |
671 | unsigned int layer = context->stencilBufferLayer; |
672 | requiresSync |= context->stencilBuffer->requiresSync(); |
673 | data->stencilBuffer = (unsigned char*)context->stencilBuffer->lockStencil(0, 0, layer, MANAGED); |
674 | data->stencilBuffer += q * ms * context->stencilBuffer->getSliceB(true); |
675 | data->stencilPitchB = context->stencilBuffer->getStencilPitchB(); |
676 | data->stencilSliceB = context->stencilBuffer->getStencilSliceB(); |
677 | } |
678 | } |
679 | |
680 | // Scissor |
681 | { |
682 | data->scissorX0 = scissor.x0; |
683 | data->scissorX1 = scissor.x1; |
684 | data->scissorY0 = scissor.y0; |
685 | data->scissorY1 = scissor.y1; |
686 | } |
687 | |
688 | draw->primitive = 0; |
689 | draw->count = count; |
690 | |
691 | draw->references = (count + batch - 1) / batch; |
692 | |
693 | schedulerMutex.lock(); |
694 | ++nextDraw; // Atomic |
695 | schedulerMutex.unlock(); |
696 | |
697 | #ifndef NDEBUG |
698 | if(threadCount == 1) // Use main thread for draw execution |
699 | { |
700 | threadsAwake = 1; |
701 | task[0].type = Task::RESUME; |
702 | |
703 | taskLoop(0); |
704 | } |
705 | else |
706 | #endif |
707 | { |
708 | if(!threadsAwake) |
709 | { |
710 | suspend[0]->wait(); |
711 | |
712 | threadsAwake = 1; |
713 | task[0].type = Task::RESUME; |
714 | |
715 | resume[0]->signal(); |
716 | } |
717 | } |
718 | } |
719 | |
720 | // TODO(sugoi): This is a temporary brute-force workaround to ensure IOSurface synchronization. |
721 | if(requiresSync) |
722 | { |
723 | synchronize(); |
724 | } |
725 | } |
726 | |
727 | void Renderer::clear(void *value, Format format, Surface *dest, const Rect &clearRect, unsigned int rgbaMask) |
728 | { |
729 | blitter->clear(value, format, dest, clearRect, rgbaMask); |
730 | } |
731 | |
732 | void Renderer::blit(Surface *source, const SliceRectF &sRect, Surface *dest, const SliceRect &dRect, bool filter, bool isStencil, bool sRGBconversion) |
733 | { |
734 | blitter->blit(source, sRect, dest, dRect, {filter, isStencil, sRGBconversion}); |
735 | } |
736 | |
737 | void Renderer::blit3D(Surface *source, Surface *dest) |
738 | { |
739 | blitter->blit3D(source, dest); |
740 | } |
741 | |
742 | void Renderer::threadFunction(void *parameters) |
743 | { |
744 | Renderer *renderer = static_cast<Parameters*>(parameters)->renderer; |
745 | int threadIndex = static_cast<Parameters*>(parameters)->threadIndex; |
746 | |
747 | if(logPrecision < IEEE) |
748 | { |
749 | CPUID::setFlushToZero(true); |
750 | CPUID::setDenormalsAreZero(true); |
751 | } |
752 | |
753 | renderer->threadLoop(threadIndex); |
754 | } |
755 | |
756 | void Renderer::threadLoop(int threadIndex) |
757 | { |
758 | while(!exitThreads) |
759 | { |
760 | taskLoop(threadIndex); |
761 | |
762 | suspend[threadIndex]->signal(); |
763 | resume[threadIndex]->wait(); |
764 | } |
765 | } |
766 | |
767 | void Renderer::taskLoop(int threadIndex) |
768 | { |
769 | while(task[threadIndex].type != Task::SUSPEND) |
770 | { |
771 | scheduleTask(threadIndex); |
772 | executeTask(threadIndex); |
773 | } |
774 | } |
775 | |
776 | void Renderer::findAvailableTasks() |
777 | { |
778 | // Find pixel tasks |
779 | for(int cluster = 0; cluster < clusterCount; cluster++) |
780 | { |
781 | if(!pixelProgress[cluster].executing) |
782 | { |
783 | for(int unit = 0; unit < unitCount; unit++) |
784 | { |
785 | if(primitiveProgress[unit].references > 0) // Contains processed primitives |
786 | { |
787 | if(pixelProgress[cluster].drawCall == primitiveProgress[unit].drawCall) |
788 | { |
789 | if(pixelProgress[cluster].processedPrimitives == primitiveProgress[unit].firstPrimitive) // Previous primitives have been rendered |
790 | { |
791 | Task &task = taskQueue[qHead]; |
792 | task.type = Task::PIXELS; |
793 | task.primitiveUnit = unit; |
794 | task.pixelCluster = cluster; |
795 | |
796 | pixelProgress[cluster].executing = true; |
797 | |
798 | // Commit to the task queue |
799 | qHead = (qHead + 1) & TASK_COUNT_BITS; |
800 | qSize++; |
801 | |
802 | break; |
803 | } |
804 | } |
805 | } |
806 | } |
807 | } |
808 | } |
809 | |
810 | // Find primitive tasks |
811 | if(currentDraw == nextDraw) |
812 | { |
813 | return; // No more primitives to process |
814 | } |
815 | |
816 | for(int unit = 0; unit < unitCount; unit++) |
817 | { |
818 | DrawCall *draw = drawList[currentDraw & DRAW_COUNT_BITS]; |
819 | |
820 | int primitive = draw->primitive; |
821 | int count = draw->count; |
822 | |
823 | if(primitive >= count) |
824 | { |
825 | ++currentDraw; // Atomic |
826 | |
827 | if(currentDraw == nextDraw) |
828 | { |
829 | return; // No more primitives to process |
830 | } |
831 | |
832 | draw = drawList[currentDraw & DRAW_COUNT_BITS]; |
833 | } |
834 | |
835 | if(!primitiveProgress[unit].references) // Task not already being executed and not still in use by a pixel unit |
836 | { |
837 | primitive = draw->primitive; |
838 | count = draw->count; |
839 | int batch = draw->batchSize; |
840 | |
841 | primitiveProgress[unit].drawCall = currentDraw; |
842 | primitiveProgress[unit].firstPrimitive = primitive; |
843 | primitiveProgress[unit].primitiveCount = count - primitive >= batch ? batch : count - primitive; |
844 | |
845 | draw->primitive += batch; |
846 | |
847 | Task &task = taskQueue[qHead]; |
848 | task.type = Task::PRIMITIVES; |
849 | task.primitiveUnit = unit; |
850 | |
851 | primitiveProgress[unit].references = -1; |
852 | |
853 | // Commit to the task queue |
854 | qHead = (qHead + 1) & TASK_COUNT_BITS; |
855 | qSize++; |
856 | } |
857 | } |
858 | } |
859 | |
860 | void Renderer::scheduleTask(int threadIndex) |
861 | { |
862 | schedulerMutex.lock(); |
863 | |
864 | int curThreadsAwake = threadsAwake; |
865 | |
866 | if((int)qSize < threadCount - curThreadsAwake + 1) |
867 | { |
868 | findAvailableTasks(); |
869 | } |
870 | |
871 | if(qSize != 0) |
872 | { |
873 | task[threadIndex] = taskQueue[(qHead - qSize) & TASK_COUNT_BITS]; |
874 | qSize--; |
875 | |
876 | if(curThreadsAwake != threadCount) |
877 | { |
878 | int wakeup = qSize - curThreadsAwake + 1; |
879 | |
880 | for(int i = 0; i < threadCount && wakeup > 0; i++) |
881 | { |
882 | if(task[i].type == Task::SUSPEND) |
883 | { |
884 | suspend[i]->wait(); |
885 | task[i].type = Task::RESUME; |
886 | resume[i]->signal(); |
887 | |
888 | ++threadsAwake; // Atomic |
889 | wakeup--; |
890 | } |
891 | } |
892 | } |
893 | } |
894 | else |
895 | { |
896 | task[threadIndex].type = Task::SUSPEND; |
897 | |
898 | --threadsAwake; // Atomic |
899 | } |
900 | |
901 | schedulerMutex.unlock(); |
902 | } |
903 | |
904 | void Renderer::executeTask(int threadIndex) |
905 | { |
906 | #if PERF_HUD |
907 | int64_t startTick = Timer::ticks(); |
908 | #endif |
909 | |
910 | switch(task[threadIndex].type) |
911 | { |
912 | case Task::PRIMITIVES: |
913 | { |
914 | int unit = task[threadIndex].primitiveUnit; |
915 | |
916 | int input = primitiveProgress[unit].firstPrimitive; |
917 | int count = primitiveProgress[unit].primitiveCount; |
918 | DrawCall *draw = drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; |
919 | int (Renderer::*setupPrimitives)(int batch, int count) = draw->setupPrimitives; |
920 | |
921 | processPrimitiveVertices(unit, input, count, draw->count, threadIndex); |
922 | |
923 | #if PERF_HUD |
924 | int64_t time = Timer::ticks(); |
925 | vertexTime[threadIndex] += time - startTick; |
926 | startTick = time; |
927 | #endif |
928 | |
929 | int visible = 0; |
930 | |
931 | if(!draw->setupState.rasterizerDiscard) |
932 | { |
933 | visible = (this->*setupPrimitives)(unit, count); |
934 | } |
935 | |
936 | primitiveProgress[unit].visible = visible; |
937 | primitiveProgress[unit].references = clusterCount; |
938 | |
939 | #if PERF_HUD |
940 | setupTime[threadIndex] += Timer::ticks() - startTick; |
941 | #endif |
942 | } |
943 | break; |
944 | case Task::PIXELS: |
945 | { |
946 | int unit = task[threadIndex].primitiveUnit; |
947 | int visible = primitiveProgress[unit].visible; |
948 | |
949 | if(visible > 0) |
950 | { |
951 | int cluster = task[threadIndex].pixelCluster; |
952 | Primitive *primitive = primitiveBatch[unit]; |
953 | DrawCall *draw = drawList[pixelProgress[cluster].drawCall & DRAW_COUNT_BITS]; |
954 | DrawData *data = draw->data; |
955 | PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer; |
956 | |
957 | pixelRoutine(primitive, visible, cluster, data); |
958 | } |
959 | |
960 | finishRendering(task[threadIndex]); |
961 | |
962 | #if PERF_HUD |
963 | pixelTime[threadIndex] += Timer::ticks() - startTick; |
964 | #endif |
965 | } |
966 | break; |
967 | case Task::RESUME: |
968 | break; |
969 | case Task::SUSPEND: |
970 | break; |
971 | default: |
972 | ASSERT(false); |
973 | } |
974 | } |
975 | |
976 | void Renderer::synchronize() |
977 | { |
978 | sync->lock(sw::PUBLIC); |
979 | sync->unlock(); |
980 | } |
981 | |
982 | void Renderer::finishRendering(Task &pixelTask) |
983 | { |
984 | int unit = pixelTask.primitiveUnit; |
985 | int cluster = pixelTask.pixelCluster; |
986 | |
987 | DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; |
988 | DrawData &data = *draw.data; |
989 | int primitive = primitiveProgress[unit].firstPrimitive; |
990 | int count = primitiveProgress[unit].primitiveCount; |
991 | int processedPrimitives = primitive + count; |
992 | |
993 | pixelProgress[cluster].processedPrimitives = processedPrimitives; |
994 | |
995 | if(pixelProgress[cluster].processedPrimitives >= draw.count) |
996 | { |
997 | ++pixelProgress[cluster].drawCall; // Atomic |
998 | pixelProgress[cluster].processedPrimitives = 0; |
999 | } |
1000 | |
1001 | int ref = primitiveProgress[unit].references--; // Atomic |
1002 | |
1003 | if(ref == 0) |
1004 | { |
1005 | ref = draw.references--; // Atomic |
1006 | |
1007 | if(ref == 0) |
1008 | { |
1009 | #if PERF_PROFILE |
1010 | for(int cluster = 0; cluster < clusterCount; cluster++) |
1011 | { |
1012 | for(int i = 0; i < PERF_TIMERS; i++) |
1013 | { |
1014 | profiler.cycles[i] += data.cycles[i][cluster]; |
1015 | } |
1016 | } |
1017 | #endif |
1018 | |
1019 | if(draw.queries) |
1020 | { |
1021 | for(auto &query : *(draw.queries)) |
1022 | { |
1023 | switch(query->type) |
1024 | { |
1025 | case Query::FRAGMENTS_PASSED: |
1026 | for(int cluster = 0; cluster < clusterCount; cluster++) |
1027 | { |
1028 | query->data += data.occlusion[cluster]; |
1029 | } |
1030 | break; |
1031 | case Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN: |
1032 | query->data += processedPrimitives; |
1033 | break; |
1034 | default: |
1035 | break; |
1036 | } |
1037 | |
1038 | query->release(); |
1039 | } |
1040 | |
1041 | delete draw.queries; |
1042 | draw.queries = 0; |
1043 | } |
1044 | |
1045 | for(int i = 0; i < RENDERTARGETS; i++) |
1046 | { |
1047 | if(draw.renderTarget[i]) |
1048 | { |
1049 | draw.renderTarget[i]->unlockInternal(); |
1050 | } |
1051 | } |
1052 | |
1053 | if(draw.depthBuffer) |
1054 | { |
1055 | draw.depthBuffer->unlockInternal(); |
1056 | } |
1057 | |
1058 | if(draw.stencilBuffer) |
1059 | { |
1060 | draw.stencilBuffer->unlockStencil(); |
1061 | } |
1062 | |
1063 | for(int i = 0; i < TOTAL_IMAGE_UNITS; i++) |
1064 | { |
1065 | if(draw.texture[i]) |
1066 | { |
1067 | draw.texture[i]->unlock(); |
1068 | } |
1069 | } |
1070 | |
1071 | for(int i = 0; i < MAX_VERTEX_INPUTS; i++) |
1072 | { |
1073 | if(draw.vertexStream[i]) |
1074 | { |
1075 | draw.vertexStream[i]->unlock(); |
1076 | } |
1077 | } |
1078 | |
1079 | if(draw.indexBuffer) |
1080 | { |
1081 | draw.indexBuffer->unlock(); |
1082 | } |
1083 | |
1084 | for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++) |
1085 | { |
1086 | if(draw.pUniformBuffers[i]) |
1087 | { |
1088 | draw.pUniformBuffers[i]->unlock(); |
1089 | } |
1090 | if(draw.vUniformBuffers[i]) |
1091 | { |
1092 | draw.vUniformBuffers[i]->unlock(); |
1093 | } |
1094 | } |
1095 | |
1096 | for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++) |
1097 | { |
1098 | if(draw.transformFeedbackBuffers[i]) |
1099 | { |
1100 | draw.transformFeedbackBuffers[i]->unlock(); |
1101 | } |
1102 | } |
1103 | |
1104 | draw.vertexRoutine.reset(); |
1105 | draw.setupRoutine.reset(); |
1106 | draw.pixelRoutine.reset(); |
1107 | |
1108 | sync->unlock(); |
1109 | |
1110 | draw.references = -1; |
1111 | resumeApp->signal(); |
1112 | } |
1113 | } |
1114 | |
1115 | pixelProgress[cluster].executing = false; |
1116 | } |
1117 | |
1118 | void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread) |
1119 | { |
1120 | Triangle *triangle = triangleBatch[unit]; |
1121 | int primitiveDrawCall = primitiveProgress[unit].drawCall; |
1122 | DrawCall *draw = drawList[primitiveDrawCall & DRAW_COUNT_BITS]; |
1123 | DrawData *data = draw->data; |
1124 | VertexTask *task = vertexTask[thread]; |
1125 | |
1126 | const void *indices = data->indices; |
1127 | VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer; |
1128 | |
1129 | if(task->vertexCache.drawCall != primitiveDrawCall) |
1130 | { |
1131 | task->vertexCache.clear(); |
1132 | task->vertexCache.drawCall = primitiveDrawCall; |
1133 | } |
1134 | |
1135 | unsigned int batch[128][3]; // FIXME: Adjust to dynamic batch size |
1136 | |
1137 | switch(draw->drawType) |
1138 | { |
1139 | case DRAW_POINTLIST: |
1140 | { |
1141 | unsigned int index = start; |
1142 | |
1143 | for(unsigned int i = 0; i < triangleCount; i++) |
1144 | { |
1145 | batch[i][0] = index; |
1146 | batch[i][1] = index; |
1147 | batch[i][2] = index; |
1148 | |
1149 | index += 1; |
1150 | } |
1151 | } |
1152 | break; |
1153 | case DRAW_LINELIST: |
1154 | { |
1155 | unsigned int index = 2 * start; |
1156 | |
1157 | for(unsigned int i = 0; i < triangleCount; i++) |
1158 | { |
1159 | batch[i][0] = index + 0; |
1160 | batch[i][1] = index + 1; |
1161 | batch[i][2] = index + 1; |
1162 | |
1163 | index += 2; |
1164 | } |
1165 | } |
1166 | break; |
1167 | case DRAW_LINESTRIP: |
1168 | { |
1169 | unsigned int index = start; |
1170 | |
1171 | for(unsigned int i = 0; i < triangleCount; i++) |
1172 | { |
1173 | batch[i][0] = index + 0; |
1174 | batch[i][1] = index + 1; |
1175 | batch[i][2] = index + 1; |
1176 | |
1177 | index += 1; |
1178 | } |
1179 | } |
1180 | break; |
1181 | case DRAW_LINELOOP: |
1182 | { |
1183 | unsigned int index = start; |
1184 | |
1185 | for(unsigned int i = 0; i < triangleCount; i++) |
1186 | { |
1187 | batch[i][0] = (index + 0) % loop; |
1188 | batch[i][1] = (index + 1) % loop; |
1189 | batch[i][2] = (index + 1) % loop; |
1190 | |
1191 | index += 1; |
1192 | } |
1193 | } |
1194 | break; |
1195 | case DRAW_TRIANGLELIST: |
1196 | { |
1197 | unsigned int index = 3 * start; |
1198 | |
1199 | for(unsigned int i = 0; i < triangleCount; i++) |
1200 | { |
1201 | batch[i][0] = index + 0; |
1202 | batch[i][1] = index + 1; |
1203 | batch[i][2] = index + 2; |
1204 | |
1205 | index += 3; |
1206 | } |
1207 | } |
1208 | break; |
1209 | case DRAW_TRIANGLESTRIP: |
1210 | { |
1211 | unsigned int index = start; |
1212 | |
1213 | for(unsigned int i = 0; i < triangleCount; i++) |
1214 | { |
1215 | if(leadingVertexFirst) |
1216 | { |
1217 | batch[i][0] = index + 0; |
1218 | batch[i][1] = index + (index & 1) + 1; |
1219 | batch[i][2] = index + (~index & 1) + 1; |
1220 | } |
1221 | else |
1222 | { |
1223 | batch[i][0] = index + (index & 1); |
1224 | batch[i][1] = index + (~index & 1); |
1225 | batch[i][2] = index + 2; |
1226 | } |
1227 | |
1228 | index += 1; |
1229 | } |
1230 | } |
1231 | break; |
1232 | case DRAW_TRIANGLEFAN: |
1233 | { |
1234 | unsigned int index = start; |
1235 | |
1236 | for(unsigned int i = 0; i < triangleCount; i++) |
1237 | { |
1238 | if(leadingVertexFirst) |
1239 | { |
1240 | batch[i][0] = index + 1; |
1241 | batch[i][1] = index + 2; |
1242 | batch[i][2] = 0; |
1243 | } |
1244 | else |
1245 | { |
1246 | batch[i][0] = 0; |
1247 | batch[i][1] = index + 1; |
1248 | batch[i][2] = index + 2; |
1249 | } |
1250 | |
1251 | index += 1; |
1252 | } |
1253 | } |
1254 | break; |
1255 | case DRAW_INDEXEDPOINTLIST8: |
1256 | { |
1257 | const unsigned char *index = (const unsigned char*)indices + start; |
1258 | |
1259 | for(unsigned int i = 0; i < triangleCount; i++) |
1260 | { |
1261 | batch[i][0] = *index; |
1262 | batch[i][1] = *index; |
1263 | batch[i][2] = *index; |
1264 | |
1265 | index += 1; |
1266 | } |
1267 | } |
1268 | break; |
1269 | case DRAW_INDEXEDPOINTLIST16: |
1270 | { |
1271 | const unsigned short *index = (const unsigned short*)indices + start; |
1272 | |
1273 | for(unsigned int i = 0; i < triangleCount; i++) |
1274 | { |
1275 | batch[i][0] = *index; |
1276 | batch[i][1] = *index; |
1277 | batch[i][2] = *index; |
1278 | |
1279 | index += 1; |
1280 | } |
1281 | } |
1282 | break; |
1283 | case DRAW_INDEXEDPOINTLIST32: |
1284 | { |
1285 | const unsigned int *index = (const unsigned int*)indices + start; |
1286 | |
1287 | for(unsigned int i = 0; i < triangleCount; i++) |
1288 | { |
1289 | batch[i][0] = *index; |
1290 | batch[i][1] = *index; |
1291 | batch[i][2] = *index; |
1292 | |
1293 | index += 1; |
1294 | } |
1295 | } |
1296 | break; |
1297 | case DRAW_INDEXEDLINELIST8: |
1298 | { |
1299 | const unsigned char *index = (const unsigned char*)indices + 2 * start; |
1300 | |
1301 | for(unsigned int i = 0; i < triangleCount; i++) |
1302 | { |
1303 | batch[i][0] = index[0]; |
1304 | batch[i][1] = index[1]; |
1305 | batch[i][2] = index[1]; |
1306 | |
1307 | index += 2; |
1308 | } |
1309 | } |
1310 | break; |
1311 | case DRAW_INDEXEDLINELIST16: |
1312 | { |
1313 | const unsigned short *index = (const unsigned short*)indices + 2 * start; |
1314 | |
1315 | for(unsigned int i = 0; i < triangleCount; i++) |
1316 | { |
1317 | batch[i][0] = index[0]; |
1318 | batch[i][1] = index[1]; |
1319 | batch[i][2] = index[1]; |
1320 | |
1321 | index += 2; |
1322 | } |
1323 | } |
1324 | break; |
1325 | case DRAW_INDEXEDLINELIST32: |
1326 | { |
1327 | const unsigned int *index = (const unsigned int*)indices + 2 * start; |
1328 | |
1329 | for(unsigned int i = 0; i < triangleCount; i++) |
1330 | { |
1331 | batch[i][0] = index[0]; |
1332 | batch[i][1] = index[1]; |
1333 | batch[i][2] = index[1]; |
1334 | |
1335 | index += 2; |
1336 | } |
1337 | } |
1338 | break; |
1339 | case DRAW_INDEXEDLINESTRIP8: |
1340 | { |
1341 | const unsigned char *index = (const unsigned char*)indices + start; |
1342 | |
1343 | for(unsigned int i = 0; i < triangleCount; i++) |
1344 | { |
1345 | batch[i][0] = index[0]; |
1346 | batch[i][1] = index[1]; |
1347 | batch[i][2] = index[1]; |
1348 | |
1349 | index += 1; |
1350 | } |
1351 | } |
1352 | break; |
1353 | case DRAW_INDEXEDLINESTRIP16: |
1354 | { |
1355 | const unsigned short *index = (const unsigned short*)indices + start; |
1356 | |
1357 | for(unsigned int i = 0; i < triangleCount; i++) |
1358 | { |
1359 | batch[i][0] = index[0]; |
1360 | batch[i][1] = index[1]; |
1361 | batch[i][2] = index[1]; |
1362 | |
1363 | index += 1; |
1364 | } |
1365 | } |
1366 | break; |
1367 | case DRAW_INDEXEDLINESTRIP32: |
1368 | { |
1369 | const unsigned int *index = (const unsigned int*)indices + start; |
1370 | |
1371 | for(unsigned int i = 0; i < triangleCount; i++) |
1372 | { |
1373 | batch[i][0] = index[0]; |
1374 | batch[i][1] = index[1]; |
1375 | batch[i][2] = index[1]; |
1376 | |
1377 | index += 1; |
1378 | } |
1379 | } |
1380 | break; |
1381 | case DRAW_INDEXEDLINELOOP8: |
1382 | { |
1383 | const unsigned char *index = (const unsigned char*)indices; |
1384 | |
1385 | for(unsigned int i = 0; i < triangleCount; i++) |
1386 | { |
1387 | batch[i][0] = index[(start + i + 0) % loop]; |
1388 | batch[i][1] = index[(start + i + 1) % loop]; |
1389 | batch[i][2] = index[(start + i + 1) % loop]; |
1390 | } |
1391 | } |
1392 | break; |
1393 | case DRAW_INDEXEDLINELOOP16: |
1394 | { |
1395 | const unsigned short *index = (const unsigned short*)indices; |
1396 | |
1397 | for(unsigned int i = 0; i < triangleCount; i++) |
1398 | { |
1399 | batch[i][0] = index[(start + i + 0) % loop]; |
1400 | batch[i][1] = index[(start + i + 1) % loop]; |
1401 | batch[i][2] = index[(start + i + 1) % loop]; |
1402 | } |
1403 | } |
1404 | break; |
1405 | case DRAW_INDEXEDLINELOOP32: |
1406 | { |
1407 | const unsigned int *index = (const unsigned int*)indices; |
1408 | |
1409 | for(unsigned int i = 0; i < triangleCount; i++) |
1410 | { |
1411 | batch[i][0] = index[(start + i + 0) % loop]; |
1412 | batch[i][1] = index[(start + i + 1) % loop]; |
1413 | batch[i][2] = index[(start + i + 1) % loop]; |
1414 | } |
1415 | } |
1416 | break; |
1417 | case DRAW_INDEXEDTRIANGLELIST8: |
1418 | { |
1419 | const unsigned char *index = (const unsigned char*)indices + 3 * start; |
1420 | |
1421 | for(unsigned int i = 0; i < triangleCount; i++) |
1422 | { |
1423 | batch[i][0] = index[0]; |
1424 | batch[i][1] = index[1]; |
1425 | batch[i][2] = index[2]; |
1426 | |
1427 | index += 3; |
1428 | } |
1429 | } |
1430 | break; |
1431 | case DRAW_INDEXEDTRIANGLELIST16: |
1432 | { |
1433 | const unsigned short *index = (const unsigned short*)indices + 3 * start; |
1434 | |
1435 | for(unsigned int i = 0; i < triangleCount; i++) |
1436 | { |
1437 | batch[i][0] = index[0]; |
1438 | batch[i][1] = index[1]; |
1439 | batch[i][2] = index[2]; |
1440 | |
1441 | index += 3; |
1442 | } |
1443 | } |
1444 | break; |
1445 | case DRAW_INDEXEDTRIANGLELIST32: |
1446 | { |
1447 | const unsigned int *index = (const unsigned int*)indices + 3 * start; |
1448 | |
1449 | for(unsigned int i = 0; i < triangleCount; i++) |
1450 | { |
1451 | batch[i][0] = index[0]; |
1452 | batch[i][1] = index[1]; |
1453 | batch[i][2] = index[2]; |
1454 | |
1455 | index += 3; |
1456 | } |
1457 | } |
1458 | break; |
1459 | case DRAW_INDEXEDTRIANGLESTRIP8: |
1460 | { |
1461 | const unsigned char *index = (const unsigned char*)indices + start; |
1462 | |
1463 | for(unsigned int i = 0; i < triangleCount; i++) |
1464 | { |
1465 | batch[i][0] = index[0]; |
1466 | batch[i][1] = index[((start + i) & 1) + 1]; |
1467 | batch[i][2] = index[(~(start + i) & 1) + 1]; |
1468 | |
1469 | index += 1; |
1470 | } |
1471 | } |
1472 | break; |
1473 | case DRAW_INDEXEDTRIANGLESTRIP16: |
1474 | { |
1475 | const unsigned short *index = (const unsigned short*)indices + start; |
1476 | |
1477 | for(unsigned int i = 0; i < triangleCount; i++) |
1478 | { |
1479 | batch[i][0] = index[0]; |
1480 | batch[i][1] = index[((start + i) & 1) + 1]; |
1481 | batch[i][2] = index[(~(start + i) & 1) + 1]; |
1482 | |
1483 | index += 1; |
1484 | } |
1485 | } |
1486 | break; |
1487 | case DRAW_INDEXEDTRIANGLESTRIP32: |
1488 | { |
1489 | const unsigned int *index = (const unsigned int*)indices + start; |
1490 | |
1491 | for(unsigned int i = 0; i < triangleCount; i++) |
1492 | { |
1493 | batch[i][0] = index[0]; |
1494 | batch[i][1] = index[((start + i) & 1) + 1]; |
1495 | batch[i][2] = index[(~(start + i) & 1) + 1]; |
1496 | |
1497 | index += 1; |
1498 | } |
1499 | } |
1500 | break; |
1501 | case DRAW_INDEXEDTRIANGLEFAN8: |
1502 | { |
1503 | const unsigned char *index = (const unsigned char*)indices; |
1504 | |
1505 | for(unsigned int i = 0; i < triangleCount; i++) |
1506 | { |
1507 | batch[i][0] = index[start + i + 1]; |
1508 | batch[i][1] = index[start + i + 2]; |
1509 | batch[i][2] = index[0]; |
1510 | } |
1511 | } |
1512 | break; |
1513 | case DRAW_INDEXEDTRIANGLEFAN16: |
1514 | { |
1515 | const unsigned short *index = (const unsigned short*)indices; |
1516 | |
1517 | for(unsigned int i = 0; i < triangleCount; i++) |
1518 | { |
1519 | batch[i][0] = index[start + i + 1]; |
1520 | batch[i][1] = index[start + i + 2]; |
1521 | batch[i][2] = index[0]; |
1522 | } |
1523 | } |
1524 | break; |
1525 | case DRAW_INDEXEDTRIANGLEFAN32: |
1526 | { |
1527 | const unsigned int *index = (const unsigned int*)indices; |
1528 | |
1529 | for(unsigned int i = 0; i < triangleCount; i++) |
1530 | { |
1531 | batch[i][0] = index[start + i + 1]; |
1532 | batch[i][1] = index[start + i + 2]; |
1533 | batch[i][2] = index[0]; |
1534 | } |
1535 | } |
1536 | break; |
1537 | case DRAW_QUADLIST: |
1538 | { |
1539 | unsigned int index = 4 * start / 2; |
1540 | |
1541 | for(unsigned int i = 0; i < triangleCount; i += 2) |
1542 | { |
1543 | batch[i+0][0] = index + 0; |
1544 | batch[i+0][1] = index + 1; |
1545 | batch[i+0][2] = index + 2; |
1546 | |
1547 | batch[i+1][0] = index + 0; |
1548 | batch[i+1][1] = index + 2; |
1549 | batch[i+1][2] = index + 3; |
1550 | |
1551 | index += 4; |
1552 | } |
1553 | } |
1554 | break; |
1555 | default: |
1556 | ASSERT(false); |
1557 | return; |
1558 | } |
1559 | |
1560 | task->primitiveStart = start; |
1561 | task->vertexCount = triangleCount * 3; |
1562 | vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data); |
1563 | } |
1564 | |
1565 | int Renderer::setupSolidTriangles(int unit, int count) |
1566 | { |
1567 | Triangle *triangle = triangleBatch[unit]; |
1568 | Primitive *primitive = primitiveBatch[unit]; |
1569 | |
1570 | DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; |
1571 | SetupProcessor::State &state = draw.setupState; |
1572 | const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer; |
1573 | |
1574 | int ms = state.multiSample; |
1575 | int pos = state.positionRegister; |
1576 | const DrawData *data = draw.data; |
1577 | int visible = 0; |
1578 | |
1579 | for(int i = 0; i < count; i++, triangle++) |
1580 | { |
1581 | Vertex &v0 = triangle->v0; |
1582 | Vertex &v1 = triangle->v1; |
1583 | Vertex &v2 = triangle->v2; |
1584 | |
1585 | if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE) |
1586 | { |
1587 | Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]); |
1588 | |
1589 | int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags; |
1590 | |
1591 | if(clipFlagsOr != Clipper::CLIP_FINITE) |
1592 | { |
1593 | if(!clipper->clip(polygon, clipFlagsOr, draw)) |
1594 | { |
1595 | continue; |
1596 | } |
1597 | } |
1598 | |
1599 | if(setupRoutine(primitive, triangle, &polygon, data)) |
1600 | { |
1601 | primitive += ms; |
1602 | visible++; |
1603 | } |
1604 | } |
1605 | } |
1606 | |
1607 | return visible; |
1608 | } |
1609 | |
1610 | int Renderer::setupWireframeTriangle(int unit, int count) |
1611 | { |
1612 | Triangle *triangle = triangleBatch[unit]; |
1613 | Primitive *primitive = primitiveBatch[unit]; |
1614 | int visible = 0; |
1615 | |
1616 | DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; |
1617 | SetupProcessor::State &state = draw.setupState; |
1618 | |
1619 | const Vertex &v0 = triangle[0].v0; |
1620 | const Vertex &v1 = triangle[0].v1; |
1621 | const Vertex &v2 = triangle[0].v2; |
1622 | |
1623 | float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w; |
1624 | |
1625 | if(state.cullMode == CULL_CLOCKWISE) |
1626 | { |
1627 | if(d >= 0) return 0; |
1628 | } |
1629 | else if(state.cullMode == CULL_COUNTERCLOCKWISE) |
1630 | { |
1631 | if(d <= 0) return 0; |
1632 | } |
1633 | |
1634 | // Copy attributes |
1635 | triangle[1].v0 = v1; |
1636 | triangle[1].v1 = v2; |
1637 | triangle[2].v0 = v2; |
1638 | triangle[2].v1 = v0; |
1639 | |
1640 | if(state.color[0][0].flat) // FIXME |
1641 | { |
1642 | for(int i = 0; i < 2; i++) |
1643 | { |
1644 | triangle[1].v0.C[i] = triangle[0].v0.C[i]; |
1645 | triangle[1].v1.C[i] = triangle[0].v0.C[i]; |
1646 | triangle[2].v0.C[i] = triangle[0].v0.C[i]; |
1647 | triangle[2].v1.C[i] = triangle[0].v0.C[i]; |
1648 | } |
1649 | } |
1650 | |
1651 | for(int i = 0; i < 3; i++) |
1652 | { |
1653 | if(setupLine(*primitive, *triangle, draw)) |
1654 | { |
1655 | primitive->area = 0.5f * d; |
1656 | |
1657 | primitive++; |
1658 | visible++; |
1659 | } |
1660 | |
1661 | triangle++; |
1662 | } |
1663 | |
1664 | return visible; |
1665 | } |
1666 | |
1667 | int Renderer::setupVertexTriangle(int unit, int count) |
1668 | { |
1669 | Triangle *triangle = triangleBatch[unit]; |
1670 | Primitive *primitive = primitiveBatch[unit]; |
1671 | int visible = 0; |
1672 | |
1673 | DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; |
1674 | SetupProcessor::State &state = draw.setupState; |
1675 | |
1676 | const Vertex &v0 = triangle[0].v0; |
1677 | const Vertex &v1 = triangle[0].v1; |
1678 | const Vertex &v2 = triangle[0].v2; |
1679 | |
1680 | float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w; |
1681 | |
1682 | if(state.cullMode == CULL_CLOCKWISE) |
1683 | { |
1684 | if(d >= 0) return 0; |
1685 | } |
1686 | else if(state.cullMode == CULL_COUNTERCLOCKWISE) |
1687 | { |
1688 | if(d <= 0) return 0; |
1689 | } |
1690 | |
1691 | // Copy attributes |
1692 | triangle[1].v0 = v1; |
1693 | triangle[2].v0 = v2; |
1694 | |
1695 | for(int i = 0; i < 3; i++) |
1696 | { |
1697 | if(setupPoint(*primitive, *triangle, draw)) |
1698 | { |
1699 | primitive->area = 0.5f * d; |
1700 | |
1701 | primitive++; |
1702 | visible++; |
1703 | } |
1704 | |
1705 | triangle++; |
1706 | } |
1707 | |
1708 | return visible; |
1709 | } |
1710 | |
1711 | int Renderer::setupLines(int unit, int count) |
1712 | { |
1713 | Triangle *triangle = triangleBatch[unit]; |
1714 | Primitive *primitive = primitiveBatch[unit]; |
1715 | int visible = 0; |
1716 | |
1717 | DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; |
1718 | SetupProcessor::State &state = draw.setupState; |
1719 | |
1720 | int ms = state.multiSample; |
1721 | |
1722 | for(int i = 0; i < count; i++) |
1723 | { |
1724 | if(setupLine(*primitive, *triangle, draw)) |
1725 | { |
1726 | primitive += ms; |
1727 | visible++; |
1728 | } |
1729 | |
1730 | triangle++; |
1731 | } |
1732 | |
1733 | return visible; |
1734 | } |
1735 | |
1736 | int Renderer::setupPoints(int unit, int count) |
1737 | { |
1738 | Triangle *triangle = triangleBatch[unit]; |
1739 | Primitive *primitive = primitiveBatch[unit]; |
1740 | int visible = 0; |
1741 | |
1742 | DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; |
1743 | SetupProcessor::State &state = draw.setupState; |
1744 | |
1745 | int ms = state.multiSample; |
1746 | |
1747 | for(int i = 0; i < count; i++) |
1748 | { |
1749 | if(setupPoint(*primitive, *triangle, draw)) |
1750 | { |
1751 | primitive += ms; |
1752 | visible++; |
1753 | } |
1754 | |
1755 | triangle++; |
1756 | } |
1757 | |
1758 | return visible; |
1759 | } |
1760 | |
1761 | bool Renderer::setupLine(Primitive &primitive, Triangle &triangle, const DrawCall &draw) |
1762 | { |
1763 | const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer; |
1764 | const SetupProcessor::State &state = draw.setupState; |
1765 | const DrawData &data = *draw.data; |
1766 | |
1767 | float lineWidth = data.lineWidth; |
1768 | |
1769 | Vertex &v0 = triangle.v0; |
1770 | Vertex &v1 = triangle.v1; |
1771 | |
1772 | int pos = state.positionRegister; |
1773 | |
1774 | const float4 &P0 = v0.v[pos]; |
1775 | const float4 &P1 = v1.v[pos]; |
1776 | |
1777 | if(P0.w <= 0 && P1.w <= 0) |
1778 | { |
1779 | return false; |
1780 | } |
1781 | |
1782 | const float W = data.Wx16[0] * (1.0f / 16.0f); |
1783 | const float H = data.Hx16[0] * (1.0f / 16.0f); |
1784 | |
1785 | float dx = W * (P1.x / P1.w - P0.x / P0.w); |
1786 | float dy = H * (P1.y / P1.w - P0.y / P0.w); |
1787 | |
1788 | if(dx == 0 && dy == 0) |
1789 | { |
1790 | return false; |
1791 | } |
1792 | |
1793 | if(state.multiSample > 1) |
1794 | { |
1795 | // Rectangle centered on the line segment |
1796 | |
1797 | float4 P[4]; |
1798 | int C[4]; |
1799 | |
1800 | P[0] = P0; |
1801 | P[1] = P1; |
1802 | P[2] = P1; |
1803 | P[3] = P0; |
1804 | |
1805 | float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy); |
1806 | |
1807 | dx *= scale; |
1808 | dy *= scale; |
1809 | |
1810 | float dx0h = dx * P0.w / H; |
1811 | float dy0w = dy * P0.w / W; |
1812 | |
1813 | float dx1h = dx * P1.w / H; |
1814 | float dy1w = dy * P1.w / W; |
1815 | |
1816 | P[0].x += -dy0w; |
1817 | P[0].y += +dx0h; |
1818 | C[0] = clipper->computeClipFlags(P[0]); |
1819 | |
1820 | P[1].x += -dy1w; |
1821 | P[1].y += +dx1h; |
1822 | C[1] = clipper->computeClipFlags(P[1]); |
1823 | |
1824 | P[2].x += +dy1w; |
1825 | P[2].y += -dx1h; |
1826 | C[2] = clipper->computeClipFlags(P[2]); |
1827 | |
1828 | P[3].x += +dy0w; |
1829 | P[3].y += -dx0h; |
1830 | C[3] = clipper->computeClipFlags(P[3]); |
1831 | |
1832 | if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE) |
1833 | { |
1834 | Polygon polygon(P, 4); |
1835 | |
1836 | int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags; |
1837 | |
1838 | if(clipFlagsOr != Clipper::CLIP_FINITE) |
1839 | { |
1840 | if(!clipper->clip(polygon, clipFlagsOr, draw)) |
1841 | { |
1842 | return false; |
1843 | } |
1844 | } |
1845 | |
1846 | return setupRoutine(&primitive, &triangle, &polygon, &data); |
1847 | } |
1848 | } |
1849 | else if(true) |
1850 | { |
1851 | // Connecting diamonds polygon |
1852 | // This shape satisfies the diamond test convention, except for the exit rule part. |
1853 | // Line segments with overlapping endpoints have duplicate fragments. |
1854 | // The ideal algorithm requires half-open line rasterization (b/80135519). |
1855 | |
1856 | float4 P[8]; |
1857 | int C[8]; |
1858 | |
1859 | P[0] = P0; |
1860 | P[1] = P0; |
1861 | P[2] = P0; |
1862 | P[3] = P0; |
1863 | P[4] = P1; |
1864 | P[5] = P1; |
1865 | P[6] = P1; |
1866 | P[7] = P1; |
1867 | |
1868 | float dx0 = lineWidth * 0.5f * P0.w / W; |
1869 | float dy0 = lineWidth * 0.5f * P0.w / H; |
1870 | |
1871 | float dx1 = lineWidth * 0.5f * P1.w / W; |
1872 | float dy1 = lineWidth * 0.5f * P1.w / H; |
1873 | |
1874 | P[0].x += -dx0; |
1875 | C[0] = clipper->computeClipFlags(P[0]); |
1876 | |
1877 | P[1].y += +dy0; |
1878 | C[1] = clipper->computeClipFlags(P[1]); |
1879 | |
1880 | P[2].x += +dx0; |
1881 | C[2] = clipper->computeClipFlags(P[2]); |
1882 | |
1883 | P[3].y += -dy0; |
1884 | C[3] = clipper->computeClipFlags(P[3]); |
1885 | |
1886 | P[4].x += -dx1; |
1887 | C[4] = clipper->computeClipFlags(P[4]); |
1888 | |
1889 | P[5].y += +dy1; |
1890 | C[5] = clipper->computeClipFlags(P[5]); |
1891 | |
1892 | P[6].x += +dx1; |
1893 | C[6] = clipper->computeClipFlags(P[6]); |
1894 | |
1895 | P[7].y += -dy1; |
1896 | C[7] = clipper->computeClipFlags(P[7]); |
1897 | |
1898 | if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE) |
1899 | { |
1900 | float4 L[6]; |
1901 | |
1902 | if(dx > -dy) |
1903 | { |
1904 | if(dx > dy) // Right |
1905 | { |
1906 | L[0] = P[0]; |
1907 | L[1] = P[1]; |
1908 | L[2] = P[5]; |
1909 | L[3] = P[6]; |
1910 | L[4] = P[7]; |
1911 | L[5] = P[3]; |
1912 | } |
1913 | else // Down |
1914 | { |
1915 | L[0] = P[0]; |
1916 | L[1] = P[4]; |
1917 | L[2] = P[5]; |
1918 | L[3] = P[6]; |
1919 | L[4] = P[2]; |
1920 | L[5] = P[3]; |
1921 | } |
1922 | } |
1923 | else |
1924 | { |
1925 | if(dx > dy) // Up |
1926 | { |
1927 | L[0] = P[0]; |
1928 | L[1] = P[1]; |
1929 | L[2] = P[2]; |
1930 | L[3] = P[6]; |
1931 | L[4] = P[7]; |
1932 | L[5] = P[4]; |
1933 | } |
1934 | else // Left |
1935 | { |
1936 | L[0] = P[1]; |
1937 | L[1] = P[2]; |
1938 | L[2] = P[3]; |
1939 | L[3] = P[7]; |
1940 | L[4] = P[4]; |
1941 | L[5] = P[5]; |
1942 | } |
1943 | } |
1944 | |
1945 | Polygon polygon(L, 6); |
1946 | |
1947 | int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7] | draw.clipFlags; |
1948 | |
1949 | if(clipFlagsOr != Clipper::CLIP_FINITE) |
1950 | { |
1951 | if(!clipper->clip(polygon, clipFlagsOr, draw)) |
1952 | { |
1953 | return false; |
1954 | } |
1955 | } |
1956 | |
1957 | return setupRoutine(&primitive, &triangle, &polygon, &data); |
1958 | } |
1959 | } |
1960 | else |
1961 | { |
1962 | // Parallelogram approximating Bresenham line |
1963 | // This algorithm does not satisfy the ideal diamond-exit rule, but does avoid the |
1964 | // duplicate fragment rasterization problem and satisfies all of Vulkan's minimum |
1965 | // requirements for Bresenham line segment rasterization. |
1966 | |
1967 | float4 P[8]; |
1968 | P[0] = P0; |
1969 | P[1] = P0; |
1970 | P[2] = P0; |
1971 | P[3] = P0; |
1972 | P[4] = P1; |
1973 | P[5] = P1; |
1974 | P[6] = P1; |
1975 | P[7] = P1; |
1976 | |
1977 | float dx0 = lineWidth * 0.5f * P0.w / W; |
1978 | float dy0 = lineWidth * 0.5f * P0.w / H; |
1979 | |
1980 | float dx1 = lineWidth * 0.5f * P1.w / W; |
1981 | float dy1 = lineWidth * 0.5f * P1.w / H; |
1982 | |
1983 | P[0].x += -dx0; |
1984 | P[1].y += +dy0; |
1985 | P[2].x += +dx0; |
1986 | P[3].y += -dy0; |
1987 | P[4].x += -dx1; |
1988 | P[5].y += +dy1; |
1989 | P[6].x += +dx1; |
1990 | P[7].y += -dy1; |
1991 | |
1992 | float4 L[4]; |
1993 | |
1994 | if(dx > -dy) |
1995 | { |
1996 | if(dx > dy) // Right |
1997 | { |
1998 | L[0] = P[1]; |
1999 | L[1] = P[5]; |
2000 | L[2] = P[7]; |
2001 | L[3] = P[3]; |
2002 | } |
2003 | else // Down |
2004 | { |
2005 | L[0] = P[0]; |
2006 | L[1] = P[4]; |
2007 | L[2] = P[6]; |
2008 | L[3] = P[2]; |
2009 | } |
2010 | } |
2011 | else |
2012 | { |
2013 | if(dx > dy) // Up |
2014 | { |
2015 | L[0] = P[0]; |
2016 | L[1] = P[2]; |
2017 | L[2] = P[6]; |
2018 | L[3] = P[4]; |
2019 | } |
2020 | else // Left |
2021 | { |
2022 | L[0] = P[1]; |
2023 | L[1] = P[3]; |
2024 | L[2] = P[7]; |
2025 | L[3] = P[5]; |
2026 | } |
2027 | } |
2028 | |
2029 | int C0 = clipper->computeClipFlags(L[0]); |
2030 | int C1 = clipper->computeClipFlags(L[1]); |
2031 | int C2 = clipper->computeClipFlags(L[2]); |
2032 | int C3 = clipper->computeClipFlags(L[3]); |
2033 | |
2034 | if((C0 & C1 & C2 & C3) == Clipper::CLIP_FINITE) |
2035 | { |
2036 | Polygon polygon(L, 4); |
2037 | |
2038 | int clipFlagsOr = C0 | C1 | C2 | C3; |
2039 | |
2040 | if(clipFlagsOr != Clipper::CLIP_FINITE) |
2041 | { |
2042 | if(!clipper->clip(polygon, clipFlagsOr, draw)) |
2043 | { |
2044 | return false; |
2045 | } |
2046 | } |
2047 | |
2048 | return setupRoutine(&primitive, &triangle, &polygon, &data); |
2049 | } |
2050 | } |
2051 | |
2052 | return false; |
2053 | } |
2054 | |
2055 | bool Renderer::setupPoint(Primitive &primitive, Triangle &triangle, const DrawCall &draw) |
2056 | { |
2057 | const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer; |
2058 | const SetupProcessor::State &state = draw.setupState; |
2059 | const DrawData &data = *draw.data; |
2060 | |
2061 | Vertex &v = triangle.v0; |
2062 | |
2063 | float pSize; |
2064 | |
2065 | int pts = state.pointSizeRegister; |
2066 | |
2067 | if(state.pointSizeRegister != Unused) |
2068 | { |
2069 | pSize = v.v[pts].y; |
2070 | } |
2071 | else |
2072 | { |
2073 | pSize = data.point.pointSize[0]; |
2074 | } |
2075 | |
2076 | pSize = clamp(pSize, data.point.pointSizeMin, data.point.pointSizeMax); |
2077 | |
2078 | float4 P[4]; |
2079 | int C[4]; |
2080 | |
2081 | int pos = state.positionRegister; |
2082 | |
2083 | P[0] = v.v[pos]; |
2084 | P[1] = v.v[pos]; |
2085 | P[2] = v.v[pos]; |
2086 | P[3] = v.v[pos]; |
2087 | |
2088 | const float X = pSize * P[0].w * data.halfPixelX[0]; |
2089 | const float Y = pSize * P[0].w * data.halfPixelY[0]; |
2090 | |
2091 | P[0].x -= X; |
2092 | P[0].y += Y; |
2093 | C[0] = clipper->computeClipFlags(P[0]); |
2094 | |
2095 | P[1].x += X; |
2096 | P[1].y += Y; |
2097 | C[1] = clipper->computeClipFlags(P[1]); |
2098 | |
2099 | P[2].x += X; |
2100 | P[2].y -= Y; |
2101 | C[2] = clipper->computeClipFlags(P[2]); |
2102 | |
2103 | P[3].x -= X; |
2104 | P[3].y -= Y; |
2105 | C[3] = clipper->computeClipFlags(P[3]); |
2106 | |
2107 | Polygon polygon(P, 4); |
2108 | |
2109 | if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE) |
2110 | { |
2111 | int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags; |
2112 | |
2113 | if(clipFlagsOr != Clipper::CLIP_FINITE) |
2114 | { |
2115 | if(!clipper->clip(polygon, clipFlagsOr, draw)) |
2116 | { |
2117 | return false; |
2118 | } |
2119 | } |
2120 | |
2121 | triangle.v1 = triangle.v0; |
2122 | triangle.v2 = triangle.v0; |
2123 | |
2124 | triangle.v1.X += iround(16 * 0.5f * pSize); |
2125 | triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1); // Both Direct3D and OpenGL expect (0, 0) in the top-left corner |
2126 | return setupRoutine(&primitive, &triangle, &polygon, &data); |
2127 | } |
2128 | |
2129 | return false; |
2130 | } |
2131 | |
2132 | void Renderer::initializeThreads() |
2133 | { |
2134 | unitCount = ceilPow2(threadCount); |
2135 | clusterCount = ceilPow2(threadCount); |
2136 | |
2137 | for(int i = 0; i < unitCount; i++) |
2138 | { |
2139 | triangleBatch[i] = (Triangle*)allocate(batchSize * sizeof(Triangle)); |
2140 | primitiveBatch[i] = (Primitive*)allocate(batchSize * sizeof(Primitive)); |
2141 | } |
2142 | |
2143 | for(int i = 0; i < threadCount; i++) |
2144 | { |
2145 | vertexTask[i] = (VertexTask*)allocate(sizeof(VertexTask)); |
2146 | vertexTask[i]->vertexCache.drawCall = -1; |
2147 | |
2148 | task[i].type = Task::SUSPEND; |
2149 | |
2150 | resume[i] = new Event(); |
2151 | suspend[i] = new Event(); |
2152 | |
2153 | Parameters parameters; |
2154 | parameters.threadIndex = i; |
2155 | parameters.renderer = this; |
2156 | |
2157 | exitThreads = false; |
2158 | worker[i] = new Thread(threadFunction, ¶meters); |
2159 | |
2160 | suspend[i]->wait(); |
2161 | suspend[i]->signal(); |
2162 | } |
2163 | } |
2164 | |
2165 | void Renderer::terminateThreads() |
2166 | { |
2167 | while(threadsAwake != 0) |
2168 | { |
2169 | Thread::sleep(1); |
2170 | } |
2171 | |
2172 | for(int thread = 0; thread < threadCount; thread++) |
2173 | { |
2174 | if(worker[thread]) |
2175 | { |
2176 | exitThreads = true; |
2177 | resume[thread]->signal(); |
2178 | worker[thread]->join(); |
2179 | |
2180 | delete worker[thread]; |
2181 | worker[thread] = 0; |
2182 | delete resume[thread]; |
2183 | resume[thread] = 0; |
2184 | delete suspend[thread]; |
2185 | suspend[thread] = 0; |
2186 | } |
2187 | |
2188 | deallocate(vertexTask[thread]); |
2189 | vertexTask[thread] = 0; |
2190 | } |
2191 | |
2192 | for(int i = 0; i < 16; i++) |
2193 | { |
2194 | deallocate(triangleBatch[i]); |
2195 | triangleBatch[i] = 0; |
2196 | |
2197 | deallocate(primitiveBatch[i]); |
2198 | primitiveBatch[i] = 0; |
2199 | } |
2200 | } |
2201 | |
2202 | void Renderer::loadConstants(const VertexShader *vertexShader) |
2203 | { |
2204 | if(!vertexShader) return; |
2205 | |
2206 | size_t count = vertexShader->getLength(); |
2207 | |
2208 | for(size_t i = 0; i < count; i++) |
2209 | { |
2210 | const Shader::Instruction *instruction = vertexShader->getInstruction(i); |
2211 | |
2212 | if(instruction->opcode == Shader::OPCODE_DEF) |
2213 | { |
2214 | int index = instruction->dst.index; |
2215 | float value[4]; |
2216 | |
2217 | value[0] = instruction->src[0].value[0]; |
2218 | value[1] = instruction->src[0].value[1]; |
2219 | value[2] = instruction->src[0].value[2]; |
2220 | value[3] = instruction->src[0].value[3]; |
2221 | |
2222 | setVertexShaderConstantF(index, value); |
2223 | } |
2224 | else if(instruction->opcode == Shader::OPCODE_DEFI) |
2225 | { |
2226 | int index = instruction->dst.index; |
2227 | int integer[4]; |
2228 | |
2229 | integer[0] = instruction->src[0].integer[0]; |
2230 | integer[1] = instruction->src[0].integer[1]; |
2231 | integer[2] = instruction->src[0].integer[2]; |
2232 | integer[3] = instruction->src[0].integer[3]; |
2233 | |
2234 | setVertexShaderConstantI(index, integer); |
2235 | } |
2236 | else if(instruction->opcode == Shader::OPCODE_DEFB) |
2237 | { |
2238 | int index = instruction->dst.index; |
2239 | int boolean = instruction->src[0].boolean[0]; |
2240 | |
2241 | setVertexShaderConstantB(index, &boolean); |
2242 | } |
2243 | } |
2244 | } |
2245 | |
2246 | void Renderer::loadConstants(const PixelShader *pixelShader) |
2247 | { |
2248 | if(!pixelShader) return; |
2249 | |
2250 | size_t count = pixelShader->getLength(); |
2251 | |
2252 | for(size_t i = 0; i < count; i++) |
2253 | { |
2254 | const Shader::Instruction *instruction = pixelShader->getInstruction(i); |
2255 | |
2256 | if(instruction->opcode == Shader::OPCODE_DEF) |
2257 | { |
2258 | int index = instruction->dst.index; |
2259 | float value[4]; |
2260 | |
2261 | value[0] = instruction->src[0].value[0]; |
2262 | value[1] = instruction->src[0].value[1]; |
2263 | value[2] = instruction->src[0].value[2]; |
2264 | value[3] = instruction->src[0].value[3]; |
2265 | |
2266 | setPixelShaderConstantF(index, value); |
2267 | } |
2268 | else if(instruction->opcode == Shader::OPCODE_DEFI) |
2269 | { |
2270 | int index = instruction->dst.index; |
2271 | int integer[4]; |
2272 | |
2273 | integer[0] = instruction->src[0].integer[0]; |
2274 | integer[1] = instruction->src[0].integer[1]; |
2275 | integer[2] = instruction->src[0].integer[2]; |
2276 | integer[3] = instruction->src[0].integer[3]; |
2277 | |
2278 | setPixelShaderConstantI(index, integer); |
2279 | } |
2280 | else if(instruction->opcode == Shader::OPCODE_DEFB) |
2281 | { |
2282 | int index = instruction->dst.index; |
2283 | int boolean = instruction->src[0].boolean[0]; |
2284 | |
2285 | setPixelShaderConstantB(index, &boolean); |
2286 | } |
2287 | } |
2288 | } |
2289 | |
2290 | void Renderer::setIndexBuffer(Resource *indexBuffer) |
2291 | { |
2292 | context->indexBuffer = indexBuffer; |
2293 | } |
2294 | |
2295 | void Renderer::setMultiSampleMask(unsigned int mask) |
2296 | { |
2297 | context->sampleMask = mask; |
2298 | } |
2299 | |
2300 | void Renderer::setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing) |
2301 | { |
2302 | sw::transparencyAntialiasing = transparencyAntialiasing; |
2303 | } |
2304 | |
2305 | bool Renderer::isReadWriteTexture(int sampler) |
2306 | { |
2307 | for(int index = 0; index < RENDERTARGETS; index++) |
2308 | { |
2309 | if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource()) |
2310 | { |
2311 | return true; |
2312 | } |
2313 | } |
2314 | |
2315 | if(context->depthBuffer && context->texture[sampler] == context->depthBuffer->getResource()) |
2316 | { |
2317 | return true; |
2318 | } |
2319 | |
2320 | return false; |
2321 | } |
2322 | |
2323 | void Renderer::updateClipper() |
2324 | { |
2325 | if(updateClipPlanes) |
2326 | { |
2327 | if(VertexProcessor::isFixedFunction()) // User plane in world space |
2328 | { |
2329 | const Matrix &scissorWorld = getViewTransform(); |
2330 | |
2331 | if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = scissorWorld * userPlane[0]; |
2332 | if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = scissorWorld * userPlane[1]; |
2333 | if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = scissorWorld * userPlane[2]; |
2334 | if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = scissorWorld * userPlane[3]; |
2335 | if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = scissorWorld * userPlane[4]; |
2336 | if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = scissorWorld * userPlane[5]; |
2337 | } |
2338 | else // User plane in clip space |
2339 | { |
2340 | if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = userPlane[0]; |
2341 | if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = userPlane[1]; |
2342 | if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = userPlane[2]; |
2343 | if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = userPlane[3]; |
2344 | if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = userPlane[4]; |
2345 | if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = userPlane[5]; |
2346 | } |
2347 | |
2348 | updateClipPlanes = false; |
2349 | } |
2350 | } |
2351 | |
2352 | void Renderer::setTextureResource(unsigned int sampler, Resource *resource) |
2353 | { |
2354 | ASSERT(sampler < TOTAL_IMAGE_UNITS); |
2355 | |
2356 | context->texture[sampler] = resource; |
2357 | } |
2358 | |
2359 | void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type) |
2360 | { |
2361 | ASSERT(sampler < TOTAL_IMAGE_UNITS && face < 6 && level < MIPMAP_LEVELS); |
2362 | |
2363 | context->sampler[sampler].setTextureLevel(face, level, surface, type); |
2364 | } |
2365 | |
2366 | void Renderer::setTextureFilter(SamplerType type, int sampler, FilterType textureFilter) |
2367 | { |
2368 | if(type == SAMPLER_PIXEL) |
2369 | { |
2370 | PixelProcessor::setTextureFilter(sampler, textureFilter); |
2371 | } |
2372 | else |
2373 | { |
2374 | VertexProcessor::setTextureFilter(sampler, textureFilter); |
2375 | } |
2376 | } |
2377 | |
2378 | void Renderer::setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter) |
2379 | { |
2380 | if(type == SAMPLER_PIXEL) |
2381 | { |
2382 | PixelProcessor::setMipmapFilter(sampler, mipmapFilter); |
2383 | } |
2384 | else |
2385 | { |
2386 | VertexProcessor::setMipmapFilter(sampler, mipmapFilter); |
2387 | } |
2388 | } |
2389 | |
2390 | void Renderer::setGatherEnable(SamplerType type, int sampler, bool enable) |
2391 | { |
2392 | if(type == SAMPLER_PIXEL) |
2393 | { |
2394 | PixelProcessor::setGatherEnable(sampler, enable); |
2395 | } |
2396 | else |
2397 | { |
2398 | VertexProcessor::setGatherEnable(sampler, enable); |
2399 | } |
2400 | } |
2401 | |
2402 | void Renderer::setAddressingModeU(SamplerType type, int sampler, AddressingMode addressMode) |
2403 | { |
2404 | if(type == SAMPLER_PIXEL) |
2405 | { |
2406 | PixelProcessor::setAddressingModeU(sampler, addressMode); |
2407 | } |
2408 | else |
2409 | { |
2410 | VertexProcessor::setAddressingModeU(sampler, addressMode); |
2411 | } |
2412 | } |
2413 | |
2414 | void Renderer::setAddressingModeV(SamplerType type, int sampler, AddressingMode addressMode) |
2415 | { |
2416 | if(type == SAMPLER_PIXEL) |
2417 | { |
2418 | PixelProcessor::setAddressingModeV(sampler, addressMode); |
2419 | } |
2420 | else |
2421 | { |
2422 | VertexProcessor::setAddressingModeV(sampler, addressMode); |
2423 | } |
2424 | } |
2425 | |
2426 | void Renderer::setAddressingModeW(SamplerType type, int sampler, AddressingMode addressMode) |
2427 | { |
2428 | if(type == SAMPLER_PIXEL) |
2429 | { |
2430 | PixelProcessor::setAddressingModeW(sampler, addressMode); |
2431 | } |
2432 | else |
2433 | { |
2434 | VertexProcessor::setAddressingModeW(sampler, addressMode); |
2435 | } |
2436 | } |
2437 | |
2438 | void Renderer::setReadSRGB(SamplerType type, int sampler, bool sRGB) |
2439 | { |
2440 | if(type == SAMPLER_PIXEL) |
2441 | { |
2442 | PixelProcessor::setReadSRGB(sampler, sRGB); |
2443 | } |
2444 | else |
2445 | { |
2446 | VertexProcessor::setReadSRGB(sampler, sRGB); |
2447 | } |
2448 | } |
2449 | |
2450 | void Renderer::setMipmapLOD(SamplerType type, int sampler, float bias) |
2451 | { |
2452 | if(type == SAMPLER_PIXEL) |
2453 | { |
2454 | PixelProcessor::setMipmapLOD(sampler, bias); |
2455 | } |
2456 | else |
2457 | { |
2458 | VertexProcessor::setMipmapLOD(sampler, bias); |
2459 | } |
2460 | } |
2461 | |
2462 | void Renderer::setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor) |
2463 | { |
2464 | if(type == SAMPLER_PIXEL) |
2465 | { |
2466 | PixelProcessor::setBorderColor(sampler, borderColor); |
2467 | } |
2468 | else |
2469 | { |
2470 | VertexProcessor::setBorderColor(sampler, borderColor); |
2471 | } |
2472 | } |
2473 | |
2474 | void Renderer::setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy) |
2475 | { |
2476 | if(type == SAMPLER_PIXEL) |
2477 | { |
2478 | PixelProcessor::setMaxAnisotropy(sampler, maxAnisotropy); |
2479 | } |
2480 | else |
2481 | { |
2482 | VertexProcessor::setMaxAnisotropy(sampler, maxAnisotropy); |
2483 | } |
2484 | } |
2485 | |
2486 | void Renderer::setHighPrecisionFiltering(SamplerType type, int sampler, bool highPrecisionFiltering) |
2487 | { |
2488 | if(type == SAMPLER_PIXEL) |
2489 | { |
2490 | PixelProcessor::setHighPrecisionFiltering(sampler, highPrecisionFiltering); |
2491 | } |
2492 | else |
2493 | { |
2494 | VertexProcessor::setHighPrecisionFiltering(sampler, highPrecisionFiltering); |
2495 | } |
2496 | } |
2497 | |
2498 | void Renderer::setSwizzleR(SamplerType type, int sampler, SwizzleType swizzleR) |
2499 | { |
2500 | if(type == SAMPLER_PIXEL) |
2501 | { |
2502 | PixelProcessor::setSwizzleR(sampler, swizzleR); |
2503 | } |
2504 | else |
2505 | { |
2506 | VertexProcessor::setSwizzleR(sampler, swizzleR); |
2507 | } |
2508 | } |
2509 | |
2510 | void Renderer::setSwizzleG(SamplerType type, int sampler, SwizzleType swizzleG) |
2511 | { |
2512 | if(type == SAMPLER_PIXEL) |
2513 | { |
2514 | PixelProcessor::setSwizzleG(sampler, swizzleG); |
2515 | } |
2516 | else |
2517 | { |
2518 | VertexProcessor::setSwizzleG(sampler, swizzleG); |
2519 | } |
2520 | } |
2521 | |
2522 | void Renderer::setSwizzleB(SamplerType type, int sampler, SwizzleType swizzleB) |
2523 | { |
2524 | if(type == SAMPLER_PIXEL) |
2525 | { |
2526 | PixelProcessor::setSwizzleB(sampler, swizzleB); |
2527 | } |
2528 | else |
2529 | { |
2530 | VertexProcessor::setSwizzleB(sampler, swizzleB); |
2531 | } |
2532 | } |
2533 | |
2534 | void Renderer::setSwizzleA(SamplerType type, int sampler, SwizzleType swizzleA) |
2535 | { |
2536 | if(type == SAMPLER_PIXEL) |
2537 | { |
2538 | PixelProcessor::setSwizzleA(sampler, swizzleA); |
2539 | } |
2540 | else |
2541 | { |
2542 | VertexProcessor::setSwizzleA(sampler, swizzleA); |
2543 | } |
2544 | } |
2545 | |
2546 | void Renderer::setCompareFunc(SamplerType type, int sampler, CompareFunc compFunc) |
2547 | { |
2548 | if(type == SAMPLER_PIXEL) |
2549 | { |
2550 | PixelProcessor::setCompareFunc(sampler, compFunc); |
2551 | } |
2552 | else |
2553 | { |
2554 | VertexProcessor::setCompareFunc(sampler, compFunc); |
2555 | } |
2556 | } |
2557 | |
2558 | void Renderer::setBaseLevel(SamplerType type, int sampler, int baseLevel) |
2559 | { |
2560 | if(type == SAMPLER_PIXEL) |
2561 | { |
2562 | PixelProcessor::setBaseLevel(sampler, baseLevel); |
2563 | } |
2564 | else |
2565 | { |
2566 | VertexProcessor::setBaseLevel(sampler, baseLevel); |
2567 | } |
2568 | } |
2569 | |
2570 | void Renderer::setMaxLevel(SamplerType type, int sampler, int maxLevel) |
2571 | { |
2572 | if(type == SAMPLER_PIXEL) |
2573 | { |
2574 | PixelProcessor::setMaxLevel(sampler, maxLevel); |
2575 | } |
2576 | else |
2577 | { |
2578 | VertexProcessor::setMaxLevel(sampler, maxLevel); |
2579 | } |
2580 | } |
2581 | |
2582 | void Renderer::setMinLod(SamplerType type, int sampler, float minLod) |
2583 | { |
2584 | if(type == SAMPLER_PIXEL) |
2585 | { |
2586 | PixelProcessor::setMinLod(sampler, minLod); |
2587 | } |
2588 | else |
2589 | { |
2590 | VertexProcessor::setMinLod(sampler, minLod); |
2591 | } |
2592 | } |
2593 | |
2594 | void Renderer::setMaxLod(SamplerType type, int sampler, float maxLod) |
2595 | { |
2596 | if(type == SAMPLER_PIXEL) |
2597 | { |
2598 | PixelProcessor::setMaxLod(sampler, maxLod); |
2599 | } |
2600 | else |
2601 | { |
2602 | VertexProcessor::setMaxLod(sampler, maxLod); |
2603 | } |
2604 | } |
2605 | |
2606 | void Renderer::setSyncRequired(SamplerType type, int sampler, bool syncRequired) |
2607 | { |
2608 | if(type == SAMPLER_PIXEL) |
2609 | { |
2610 | PixelProcessor::setSyncRequired(sampler, syncRequired); |
2611 | } |
2612 | else |
2613 | { |
2614 | VertexProcessor::setSyncRequired(sampler, syncRequired); |
2615 | } |
2616 | } |
2617 | |
2618 | void Renderer::setPointSpriteEnable(bool pointSpriteEnable) |
2619 | { |
2620 | context->setPointSpriteEnable(pointSpriteEnable); |
2621 | } |
2622 | |
2623 | void Renderer::setPointScaleEnable(bool pointScaleEnable) |
2624 | { |
2625 | context->setPointScaleEnable(pointScaleEnable); |
2626 | } |
2627 | |
2628 | void Renderer::setLineWidth(float width) |
2629 | { |
2630 | context->lineWidth = width; |
2631 | } |
2632 | |
2633 | void Renderer::setDepthBias(float bias) |
2634 | { |
2635 | context->depthBias = bias; |
2636 | } |
2637 | |
2638 | void Renderer::setSlopeDepthBias(float slopeBias) |
2639 | { |
2640 | context->slopeDepthBias = slopeBias; |
2641 | } |
2642 | |
2643 | void Renderer::setRasterizerDiscard(bool rasterizerDiscard) |
2644 | { |
2645 | context->rasterizerDiscard = rasterizerDiscard; |
2646 | } |
2647 | |
2648 | void Renderer::setPixelShader(const PixelShader *shader) |
2649 | { |
2650 | context->pixelShader = shader; |
2651 | |
2652 | loadConstants(shader); |
2653 | } |
2654 | |
2655 | void Renderer::setVertexShader(const VertexShader *shader) |
2656 | { |
2657 | context->vertexShader = shader; |
2658 | |
2659 | loadConstants(shader); |
2660 | } |
2661 | |
2662 | void Renderer::setPixelShaderConstantF(unsigned int index, const float value[4], unsigned int count) |
2663 | { |
2664 | for(unsigned int i = 0; i < DRAW_COUNT; i++) |
2665 | { |
2666 | if(drawCall[i]->psDirtyConstF < index + count) |
2667 | { |
2668 | drawCall[i]->psDirtyConstF = index + count; |
2669 | } |
2670 | } |
2671 | |
2672 | for(unsigned int i = 0; i < count; i++) |
2673 | { |
2674 | PixelProcessor::setFloatConstant(index + i, value); |
2675 | value += 4; |
2676 | } |
2677 | } |
2678 | |
2679 | void Renderer::setPixelShaderConstantI(unsigned int index, const int value[4], unsigned int count) |
2680 | { |
2681 | for(unsigned int i = 0; i < DRAW_COUNT; i++) |
2682 | { |
2683 | if(drawCall[i]->psDirtyConstI < index + count) |
2684 | { |
2685 | drawCall[i]->psDirtyConstI = index + count; |
2686 | } |
2687 | } |
2688 | |
2689 | for(unsigned int i = 0; i < count; i++) |
2690 | { |
2691 | PixelProcessor::setIntegerConstant(index + i, value); |
2692 | value += 4; |
2693 | } |
2694 | } |
2695 | |
2696 | void Renderer::setPixelShaderConstantB(unsigned int index, const int *boolean, unsigned int count) |
2697 | { |
2698 | for(unsigned int i = 0; i < DRAW_COUNT; i++) |
2699 | { |
2700 | if(drawCall[i]->psDirtyConstB < index + count) |
2701 | { |
2702 | drawCall[i]->psDirtyConstB = index + count; |
2703 | } |
2704 | } |
2705 | |
2706 | for(unsigned int i = 0; i < count; i++) |
2707 | { |
2708 | PixelProcessor::setBooleanConstant(index + i, *boolean); |
2709 | boolean++; |
2710 | } |
2711 | } |
2712 | |
2713 | void Renderer::setVertexShaderConstantF(unsigned int index, const float value[4], unsigned int count) |
2714 | { |
2715 | for(unsigned int i = 0; i < DRAW_COUNT; i++) |
2716 | { |
2717 | if(drawCall[i]->vsDirtyConstF < index + count) |
2718 | { |
2719 | drawCall[i]->vsDirtyConstF = index + count; |
2720 | } |
2721 | } |
2722 | |
2723 | for(unsigned int i = 0; i < count; i++) |
2724 | { |
2725 | VertexProcessor::setFloatConstant(index + i, value); |
2726 | value += 4; |
2727 | } |
2728 | } |
2729 | |
2730 | void Renderer::setVertexShaderConstantI(unsigned int index, const int value[4], unsigned int count) |
2731 | { |
2732 | for(unsigned int i = 0; i < DRAW_COUNT; i++) |
2733 | { |
2734 | if(drawCall[i]->vsDirtyConstI < index + count) |
2735 | { |
2736 | drawCall[i]->vsDirtyConstI = index + count; |
2737 | } |
2738 | } |
2739 | |
2740 | for(unsigned int i = 0; i < count; i++) |
2741 | { |
2742 | VertexProcessor::setIntegerConstant(index + i, value); |
2743 | value += 4; |
2744 | } |
2745 | } |
2746 | |
2747 | void Renderer::setVertexShaderConstantB(unsigned int index, const int *boolean, unsigned int count) |
2748 | { |
2749 | for(unsigned int i = 0; i < DRAW_COUNT; i++) |
2750 | { |
2751 | if(drawCall[i]->vsDirtyConstB < index + count) |
2752 | { |
2753 | drawCall[i]->vsDirtyConstB = index + count; |
2754 | } |
2755 | } |
2756 | |
2757 | for(unsigned int i = 0; i < count; i++) |
2758 | { |
2759 | VertexProcessor::setBooleanConstant(index + i, *boolean); |
2760 | boolean++; |
2761 | } |
2762 | } |
2763 | |
2764 | void Renderer::setModelMatrix(const Matrix &M, int i) |
2765 | { |
2766 | VertexProcessor::setModelMatrix(M, i); |
2767 | } |
2768 | |
2769 | void Renderer::setViewMatrix(const Matrix &V) |
2770 | { |
2771 | VertexProcessor::setViewMatrix(V); |
2772 | updateClipPlanes = true; |
2773 | } |
2774 | |
2775 | void Renderer::setBaseMatrix(const Matrix &B) |
2776 | { |
2777 | VertexProcessor::setBaseMatrix(B); |
2778 | updateClipPlanes = true; |
2779 | } |
2780 | |
2781 | void Renderer::setProjectionMatrix(const Matrix &P) |
2782 | { |
2783 | VertexProcessor::setProjectionMatrix(P); |
2784 | updateClipPlanes = true; |
2785 | } |
2786 | |
2787 | void Renderer::addQuery(Query *query) |
2788 | { |
2789 | queries.push_back(query); |
2790 | } |
2791 | |
2792 | void Renderer::removeQuery(Query *query) |
2793 | { |
2794 | queries.remove(query); |
2795 | } |
2796 | |
2797 | #if PERF_HUD |
2798 | int Renderer::getThreadCount() |
2799 | { |
2800 | return threadCount; |
2801 | } |
2802 | |
2803 | int64_t Renderer::getVertexTime(int thread) |
2804 | { |
2805 | return vertexTime[thread]; |
2806 | } |
2807 | |
2808 | int64_t Renderer::getSetupTime(int thread) |
2809 | { |
2810 | return setupTime[thread]; |
2811 | } |
2812 | |
2813 | int64_t Renderer::getPixelTime(int thread) |
2814 | { |
2815 | return pixelTime[thread]; |
2816 | } |
2817 | |
2818 | void Renderer::resetTimers() |
2819 | { |
2820 | for(int thread = 0; thread < threadCount; thread++) |
2821 | { |
2822 | vertexTime[thread] = 0; |
2823 | setupTime[thread] = 0; |
2824 | pixelTime[thread] = 0; |
2825 | } |
2826 | } |
2827 | #endif |
2828 | |
2829 | void Renderer::setViewport(const Viewport &viewport) |
2830 | { |
2831 | this->viewport = viewport; |
2832 | } |
2833 | |
2834 | void Renderer::setScissor(const Rect &scissor) |
2835 | { |
2836 | this->scissor = scissor; |
2837 | } |
2838 | |
2839 | void Renderer::setClipFlags(int flags) |
2840 | { |
2841 | clipFlags = flags << 8; // Bottom 8 bits used by legacy frustum |
2842 | } |
2843 | |
2844 | void Renderer::setClipPlane(unsigned int index, const float plane[4]) |
2845 | { |
2846 | if(index < MAX_CLIP_PLANES) |
2847 | { |
2848 | userPlane[index] = plane; |
2849 | } |
2850 | else ASSERT(false); |
2851 | |
2852 | updateClipPlanes = true; |
2853 | } |
2854 | |
2855 | void Renderer::updateConfiguration(bool initialUpdate) |
2856 | { |
2857 | bool newConfiguration = swiftConfig->hasNewConfiguration(); |
2858 | |
2859 | if(newConfiguration || initialUpdate) |
2860 | { |
2861 | terminateThreads(); |
2862 | |
2863 | SwiftConfig::Configuration configuration = {}; |
2864 | swiftConfig->getConfiguration(configuration); |
2865 | |
2866 | precacheVertex = !newConfiguration && configuration.precache; |
2867 | precacheSetup = !newConfiguration && configuration.precache; |
2868 | precachePixel = !newConfiguration && configuration.precache; |
2869 | |
2870 | VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize); |
2871 | PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize); |
2872 | SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize); |
2873 | |
2874 | switch(configuration.textureSampleQuality) |
2875 | { |
2876 | case 0: Sampler::setFilterQuality(FILTER_POINT); break; |
2877 | case 1: Sampler::setFilterQuality(FILTER_LINEAR); break; |
2878 | case 2: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break; |
2879 | default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break; |
2880 | } |
2881 | |
2882 | switch(configuration.mipmapQuality) |
2883 | { |
2884 | case 0: Sampler::setMipmapQuality(MIPMAP_POINT); break; |
2885 | case 1: Sampler::setMipmapQuality(MIPMAP_LINEAR); break; |
2886 | default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break; |
2887 | } |
2888 | |
2889 | setPerspectiveCorrection(configuration.perspectiveCorrection); |
2890 | |
2891 | switch(configuration.transcendentalPrecision) |
2892 | { |
2893 | case 0: |
2894 | logPrecision = APPROXIMATE; |
2895 | expPrecision = APPROXIMATE; |
2896 | rcpPrecision = APPROXIMATE; |
2897 | rsqPrecision = APPROXIMATE; |
2898 | break; |
2899 | case 1: |
2900 | logPrecision = PARTIAL; |
2901 | expPrecision = PARTIAL; |
2902 | rcpPrecision = PARTIAL; |
2903 | rsqPrecision = PARTIAL; |
2904 | break; |
2905 | case 2: |
2906 | logPrecision = ACCURATE; |
2907 | expPrecision = ACCURATE; |
2908 | rcpPrecision = ACCURATE; |
2909 | rsqPrecision = ACCURATE; |
2910 | break; |
2911 | case 3: |
2912 | logPrecision = WHQL; |
2913 | expPrecision = WHQL; |
2914 | rcpPrecision = WHQL; |
2915 | rsqPrecision = WHQL; |
2916 | break; |
2917 | case 4: |
2918 | logPrecision = IEEE; |
2919 | expPrecision = IEEE; |
2920 | rcpPrecision = IEEE; |
2921 | rsqPrecision = IEEE; |
2922 | break; |
2923 | default: |
2924 | logPrecision = ACCURATE; |
2925 | expPrecision = ACCURATE; |
2926 | rcpPrecision = ACCURATE; |
2927 | rsqPrecision = ACCURATE; |
2928 | break; |
2929 | } |
2930 | |
2931 | switch(configuration.transparencyAntialiasing) |
2932 | { |
2933 | case 0: transparencyAntialiasing = TRANSPARENCY_NONE; break; |
2934 | case 1: transparencyAntialiasing = TRANSPARENCY_ALPHA_TO_COVERAGE; break; |
2935 | default: transparencyAntialiasing = TRANSPARENCY_NONE; break; |
2936 | } |
2937 | |
2938 | switch(configuration.threadCount) |
2939 | { |
2940 | case -1: threadCount = CPUID::coreCount(); break; |
2941 | case 0: threadCount = CPUID::processAffinity(); break; |
2942 | default: threadCount = configuration.threadCount; break; |
2943 | } |
2944 | |
2945 | CPUID::setEnableSSE4_1(configuration.enableSSE4_1); |
2946 | CPUID::setEnableSSSE3(configuration.enableSSSE3); |
2947 | CPUID::setEnableSSE3(configuration.enableSSE3); |
2948 | CPUID::setEnableSSE2(configuration.enableSSE2); |
2949 | CPUID::setEnableSSE(configuration.enableSSE); |
2950 | |
2951 | rr::Config::Edit cfg; |
2952 | cfg.clearOptimizationPasses(); |
2953 | for(auto pass : configuration.optimization) |
2954 | { |
2955 | if (pass != rr::Optimization::Pass::Disabled) { cfg.add(pass); } |
2956 | } |
2957 | rr::Nucleus::adjustDefaultConfig(cfg); |
2958 | |
2959 | forceWindowed = configuration.forceWindowed; |
2960 | complementaryDepthBuffer = configuration.complementaryDepthBuffer; |
2961 | postBlendSRGB = configuration.postBlendSRGB; |
2962 | exactColorRounding = configuration.exactColorRounding; |
2963 | forceClearRegisters = configuration.forceClearRegisters; |
2964 | |
2965 | #ifndef NDEBUG |
2966 | minPrimitives = configuration.minPrimitives; |
2967 | maxPrimitives = configuration.maxPrimitives; |
2968 | #endif |
2969 | } |
2970 | |
2971 | if(!initialUpdate && !worker[0]) |
2972 | { |
2973 | initializeThreads(); |
2974 | } |
2975 | } |
2976 | } |
2977 | |