1// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "Renderer.hpp"
16
17#include "Clipper.hpp"
18#include "Primitive.hpp"
19#include "Polygon.hpp"
20#include "Reactor/Reactor.hpp"
21#include "Pipeline/Constants.hpp"
22#include "System/Memory.hpp"
23#include "System/Half.hpp"
24#include "System/Math.hpp"
25#include "System/Timer.hpp"
26#include "Vulkan/VkConfig.h"
27#include "Vulkan/VkDebug.hpp"
28#include "Vulkan/VkDevice.hpp"
29#include "Vulkan/VkFence.hpp"
30#include "Vulkan/VkImageView.hpp"
31#include "Vulkan/VkQueryPool.hpp"
32#include "Pipeline/SpirvShader.hpp"
33#include "Vertex.hpp"
34
35#include "marl/containers.h"
36#include "marl/defer.h"
37#include "marl/trace.h"
38
39#undef max
40
41#ifndef NDEBUG
42unsigned int minPrimitives = 1;
43unsigned int maxPrimitives = 1 << 21;
44#endif
45
46namespace sw
47{
48 template<typename T>
49 inline bool setBatchIndices(unsigned int batch[128][3], VkPrimitiveTopology topology, VkProvokingVertexModeEXT provokingVertexMode, T indices, unsigned int start, unsigned int triangleCount)
50 {
51 bool provokeFirst = (provokingVertexMode == VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT);
52
53 switch(topology)
54 {
55 case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
56 {
57 auto index = start;
58 auto pointBatch = &(batch[0][0]);
59 for(unsigned int i = 0; i < triangleCount; i++)
60 {
61 *pointBatch++ = indices[index++];
62 }
63
64 // Repeat the last index to allow for SIMD width overrun.
65 index--;
66 for(unsigned int i = 0; i < 3; i++)
67 {
68 *pointBatch++ = indices[index];
69 }
70 break;
71 }
72 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
73 {
74 auto index = 2 * start;
75 for(unsigned int i = 0; i < triangleCount; i++)
76 {
77 batch[i][0] = indices[index + (provokeFirst ? 0 : 1)];
78 batch[i][1] = indices[index + (provokeFirst ? 1 : 0)];
79 batch[i][2] = indices[index + 1];
80
81 index += 2;
82 }
83 break;
84 }
85 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
86 {
87 auto index = start;
88 for(unsigned int i = 0; i < triangleCount; i++)
89 {
90 batch[i][0] = indices[index + (provokeFirst ? 0 : 1)];
91 batch[i][1] = indices[index + (provokeFirst ? 1 : 0)];
92 batch[i][2] = indices[index + 1];
93
94 index += 1;
95 }
96 break;
97 }
98 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
99 {
100 auto index = 3 * start;
101 for(unsigned int i = 0; i < triangleCount; i++)
102 {
103 batch[i][0] = indices[index + (provokeFirst ? 0 : 2)];
104 batch[i][1] = indices[index + (provokeFirst ? 1 : 0)];
105 batch[i][2] = indices[index + (provokeFirst ? 2 : 1)];
106
107 index += 3;
108 }
109 break;
110 }
111 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
112 {
113 auto index = start;
114 for(unsigned int i = 0; i < triangleCount; i++)
115 {
116 batch[i][0] = indices[index + (provokeFirst ? 0 : 2)];
117 batch[i][1] = indices[index + ((start + i) & 1) + (provokeFirst ? 1 : 0)];
118 batch[i][2] = indices[index + (~(start + i) & 1) + (provokeFirst ? 1 : 0)];
119
120 index += 1;
121 }
122 break;
123 }
124 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
125 {
126 auto index = start + 1;
127 for(unsigned int i = 0; i < triangleCount; i++)
128 {
129 batch[i][provokeFirst ? 0 : 2] = indices[index + 0];
130 batch[i][provokeFirst ? 1 : 0] = indices[index + 1];
131 batch[i][provokeFirst ? 2 : 1] = indices[0];
132
133 index += 1;
134 }
135 break;
136 }
137 default:
138 ASSERT(false);
139 return false;
140 }
141
142 return true;
143 }
144
145 DrawCall::DrawCall()
146 {
147 data = (DrawData*)allocate(sizeof(DrawData));
148 data->constants = &constants;
149 }
150
151 DrawCall::~DrawCall()
152 {
153 deallocate(data);
154 }
155
156 Renderer::Renderer(vk::Device* device) : device(device)
157 {
158 VertexProcessor::setRoutineCacheSize(1024);
159 PixelProcessor::setRoutineCacheSize(1024);
160 SetupProcessor::setRoutineCacheSize(1024);
161 }
162
163 Renderer::~Renderer()
164 {
165 drawTickets.take().wait();
166 }
167
168 // Renderer objects have to be mem aligned to the alignment provided in the class declaration
169 void* Renderer::operator new(size_t size)
170 {
171 ASSERT(size == sizeof(Renderer)); // This operator can't be called from a derived class
172 return vk::allocate(sizeof(Renderer), alignof(Renderer), vk::DEVICE_MEMORY, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
173 }
174
175 void Renderer::operator delete(void* mem)
176 {
177 vk::deallocate(mem, vk::DEVICE_MEMORY);
178 }
179
180 void Renderer::draw(const sw::Context* context, VkIndexType indexType, unsigned int count, int baseVertex,
181 TaskEvents *events, int instanceID, int viewID, void *indexBuffer, const VkExtent3D& framebufferExtent,
182 PushConstantStorage const & pushConstants, bool update)
183 {
184 if(count == 0) { return; }
185
186 auto id = nextDrawID++;
187 MARL_SCOPED_EVENT("draw %d", id);
188
189 #ifndef NDEBUG
190 {
191 unsigned int minPrimitives = 1;
192 unsigned int maxPrimitives = 1 << 21;
193 if(count < minPrimitives || count > maxPrimitives)
194 {
195 return;
196 }
197 }
198 #endif
199
200 int ms = context->sampleCount;
201
202 if(!context->multiSampleMask)
203 {
204 return;
205 }
206
207 marl::Pool<sw::DrawCall>::Loan draw;
208 {
209 MARL_SCOPED_EVENT("drawCallPool.borrow()");
210 draw = drawCallPool.borrow();
211 }
212 draw->id = id;
213
214 if(update)
215 {
216 MARL_SCOPED_EVENT("update");
217 vertexState = VertexProcessor::update(context);
218 setupState = SetupProcessor::update(context);
219 pixelState = PixelProcessor::update(context);
220
221 vertexRoutine = VertexProcessor::routine(vertexState, context->pipelineLayout, context->vertexShader, context->descriptorSets);
222 setupRoutine = SetupProcessor::routine(setupState);
223 pixelRoutine = PixelProcessor::routine(pixelState, context->pipelineLayout, context->pixelShader, context->descriptorSets);
224 }
225
226 DrawCall::SetupFunction setupPrimitives = nullptr;
227 unsigned int numPrimitivesPerBatch = MaxBatchSize / ms;
228
229 if(context->isDrawTriangle(false))
230 {
231 switch(context->polygonMode)
232 {
233 case VK_POLYGON_MODE_FILL:
234 setupPrimitives = &DrawCall::setupSolidTriangles;
235 break;
236 case VK_POLYGON_MODE_LINE:
237 setupPrimitives = &DrawCall::setupWireframeTriangles;
238 numPrimitivesPerBatch /= 3;
239 break;
240 case VK_POLYGON_MODE_POINT:
241 setupPrimitives = &DrawCall::setupPointTriangles;
242 numPrimitivesPerBatch /= 3;
243 break;
244 default:
245 UNSUPPORTED("polygon mode: %d", int(context->polygonMode));
246 return;
247 }
248 }
249 else if(context->isDrawLine(false))
250 {
251 setupPrimitives = &DrawCall::setupLines;
252 }
253 else // Point primitive topology
254 {
255 setupPrimitives = &DrawCall::setupPoints;
256 }
257
258 DrawData *data = draw->data;
259 draw->occlusionQuery = occlusionQuery;
260 draw->batchDataPool = &batchDataPool;
261 draw->numPrimitives = count;
262 draw->numPrimitivesPerBatch = numPrimitivesPerBatch;
263 draw->numBatches = (count + draw->numPrimitivesPerBatch - 1) / draw->numPrimitivesPerBatch;
264 draw->topology = context->topology;
265 draw->provokingVertexMode = context->provokingVertexMode;
266 draw->indexType = indexType;
267 draw->lineRasterizationMode = context->lineRasterizationMode;
268
269 draw->vertexRoutine = vertexRoutine;
270 draw->setupRoutine = setupRoutine;
271 draw->pixelRoutine = pixelRoutine;
272 draw->setupPrimitives = setupPrimitives;
273 draw->setupState = setupState;
274
275 data->descriptorSets = context->descriptorSets;
276 data->descriptorDynamicOffsets = context->descriptorDynamicOffsets;
277
278 for(int i = 0; i < MAX_INTERFACE_COMPONENTS/4; i++)
279 {
280 data->input[i] = context->input[i].buffer;
281 data->robustnessSize[i] = context->input[i].robustnessSize;
282 data->stride[i] = context->input[i].vertexStride;
283 }
284
285 data->indices = indexBuffer;
286 data->viewID = viewID;
287 data->instanceID = instanceID;
288 data->baseVertex = baseVertex;
289
290 if(pixelState.stencilActive)
291 {
292 data->stencil[0].set(context->frontStencil.reference, context->frontStencil.compareMask, context->frontStencil.writeMask);
293 data->stencil[1].set(context->backStencil.reference, context->backStencil.compareMask, context->backStencil.writeMask);
294 }
295
296 data->lineWidth = context->lineWidth;
297
298 data->factor = factor;
299
300 if(pixelState.alphaToCoverage)
301 {
302 if(ms == 4)
303 {
304 data->a2c0 = replicate(0.2f);
305 data->a2c1 = replicate(0.4f);
306 data->a2c2 = replicate(0.6f);
307 data->a2c3 = replicate(0.8f);
308 }
309 else if(ms == 2)
310 {
311 data->a2c0 = replicate(0.25f);
312 data->a2c1 = replicate(0.75f);
313 }
314 else ASSERT(false);
315 }
316
317 if(pixelState.occlusionEnabled)
318 {
319 for(int cluster = 0; cluster < MaxClusterCount; cluster++)
320 {
321 data->occlusion[cluster] = 0;
322 }
323 }
324
325 // Viewport
326 {
327 float W = 0.5f * viewport.width;
328 float H = 0.5f * viewport.height;
329 float X0 = viewport.x + W;
330 float Y0 = viewport.y + H;
331 float N = viewport.minDepth;
332 float F = viewport.maxDepth;
333 float Z = F - N;
334 constexpr float subPixF = vk::SUBPIXEL_PRECISION_FACTOR;
335
336 if(context->isDrawTriangle(false))
337 {
338 N += context->depthBias;
339 }
340
341 data->WxF = replicate(W * subPixF);
342 data->HxF = replicate(H * subPixF);
343 data->X0xF = replicate(X0 * subPixF - subPixF / 2);
344 data->Y0xF = replicate(Y0 * subPixF - subPixF / 2);
345 data->halfPixelX = replicate(0.5f / W);
346 data->halfPixelY = replicate(0.5f / H);
347 data->viewportHeight = abs(viewport.height);
348 data->slopeDepthBias = context->slopeDepthBias;
349 data->depthRange = Z;
350 data->depthNear = N;
351 }
352
353 // Target
354 {
355 for(int index = 0; index < RENDERTARGETS; index++)
356 {
357 draw->renderTarget[index] = context->renderTarget[index];
358
359 if(draw->renderTarget[index])
360 {
361 data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->getOffsetPointer({0, 0, 0}, VK_IMAGE_ASPECT_COLOR_BIT, 0, data->viewID);
362 data->colorPitchB[index] = context->renderTarget[index]->rowPitchBytes(VK_IMAGE_ASPECT_COLOR_BIT, 0);
363 data->colorSliceB[index] = context->renderTarget[index]->slicePitchBytes(VK_IMAGE_ASPECT_COLOR_BIT, 0);
364 }
365 }
366
367 draw->depthBuffer = context->depthBuffer;
368 draw->stencilBuffer = context->stencilBuffer;
369
370 if(draw->depthBuffer)
371 {
372 data->depthBuffer = (float*)context->depthBuffer->getOffsetPointer({0, 0, 0}, VK_IMAGE_ASPECT_DEPTH_BIT, 0, data->viewID);
373 data->depthPitchB = context->depthBuffer->rowPitchBytes(VK_IMAGE_ASPECT_DEPTH_BIT, 0);
374 data->depthSliceB = context->depthBuffer->slicePitchBytes(VK_IMAGE_ASPECT_DEPTH_BIT, 0);
375 }
376
377 if(draw->stencilBuffer)
378 {
379 data->stencilBuffer = (unsigned char*)context->stencilBuffer->getOffsetPointer({0, 0, 0}, VK_IMAGE_ASPECT_STENCIL_BIT, 0, data->viewID);
380 data->stencilPitchB = context->stencilBuffer->rowPitchBytes(VK_IMAGE_ASPECT_STENCIL_BIT, 0);
381 data->stencilSliceB = context->stencilBuffer->slicePitchBytes(VK_IMAGE_ASPECT_STENCIL_BIT, 0);
382 }
383 }
384
385 // Scissor
386 {
387 data->scissorX0 = clamp<int>(scissor.offset.x, 0, framebufferExtent.width);
388 data->scissorX1 = clamp<int>(scissor.offset.x + scissor.extent.width, 0, framebufferExtent.width);
389 data->scissorY0 = clamp<int>(scissor.offset.y, 0, framebufferExtent.height);
390 data->scissorY1 = clamp<int>(scissor.offset.y + scissor.extent.height, 0, framebufferExtent.height);
391 }
392
393 // Push constants
394 {
395 data->pushConstants = pushConstants;
396 }
397
398 draw->events = events;
399
400 DrawCall::run(draw, &drawTickets, clusterQueues);
401 }
402
403 void DrawCall::setup()
404 {
405 if(occlusionQuery != nullptr)
406 {
407 occlusionQuery->start();
408 }
409
410 if(events)
411 {
412 events->start();
413 }
414 }
415
416 void DrawCall::teardown()
417 {
418 if(events)
419 {
420 events->finish();
421 events = nullptr;
422 }
423
424 if (occlusionQuery != nullptr)
425 {
426 for(int cluster = 0; cluster < MaxClusterCount; cluster++)
427 {
428 occlusionQuery->add(data->occlusion[cluster]);
429 }
430 occlusionQuery->finish();
431 }
432
433 vertexRoutine = {};
434 setupRoutine = {};
435 pixelRoutine = {};
436 }
437
438 void DrawCall::run(const marl::Loan<DrawCall>& draw, marl::Ticket::Queue* tickets, marl::Ticket::Queue clusterQueues[MaxClusterCount])
439 {
440 draw->setup();
441
442 auto const numPrimitives = draw->numPrimitives;
443 auto const numPrimitivesPerBatch = draw->numPrimitivesPerBatch;
444 auto const numBatches = draw->numBatches;
445
446 auto ticket = tickets->take();
447 auto finally = marl::make_shared_finally([draw, ticket] {
448 MARL_SCOPED_EVENT("FINISH draw %d", draw->id);
449 draw->teardown();
450 ticket.done();
451 });
452
453 for (unsigned int batchId = 0; batchId < numBatches; batchId++)
454 {
455 auto batch = draw->batchDataPool->borrow();
456 batch->id = batchId;
457 batch->firstPrimitive = batch->id * numPrimitivesPerBatch;
458 batch->numPrimitives = std::min(batch->firstPrimitive + numPrimitivesPerBatch, numPrimitives) - batch->firstPrimitive;
459
460 for (int cluster = 0; cluster < MaxClusterCount; cluster++)
461 {
462 batch->clusterTickets[cluster] = std::move(clusterQueues[cluster].take());
463 }
464
465 marl::schedule([draw, batch, finally] {
466
467 processVertices(draw.get(), batch.get());
468
469 if (!draw->setupState.rasterizerDiscard)
470 {
471 processPrimitives(draw.get(), batch.get());
472
473 if (batch->numVisible > 0)
474 {
475 processPixels(draw, batch, finally);
476 return;
477 }
478 }
479
480 for (int cluster = 0; cluster < MaxClusterCount; cluster++)
481 {
482 batch->clusterTickets[cluster].done();
483 }
484 });
485 }
486 }
487
488 void DrawCall::processVertices(DrawCall* draw, BatchData* batch)
489 {
490 MARL_SCOPED_EVENT("VERTEX draw %d, batch %d", draw->id, batch->id);
491
492 unsigned int triangleIndices[MaxBatchSize + 1][3]; // One extra for SIMD width overrun. TODO: Adjust to dynamic batch size.
493 {
494 MARL_SCOPED_EVENT("processPrimitiveVertices");
495 processPrimitiveVertices(
496 triangleIndices,
497 draw->data->indices,
498 draw->indexType,
499 batch->firstPrimitive,
500 batch->numPrimitives,
501 draw->topology,
502 draw->provokingVertexMode);
503 }
504
505 auto& vertexTask = batch->vertexTask;
506 vertexTask.primitiveStart = batch->firstPrimitive;
507 // We're only using batch compaction for points, not lines
508 vertexTask.vertexCount = batch->numPrimitives * ((draw->topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST) ? 1 : 3);
509 if (vertexTask.vertexCache.drawCall != draw->id)
510 {
511 vertexTask.vertexCache.clear();
512 vertexTask.vertexCache.drawCall = draw->id;
513 }
514
515 draw->vertexRoutine(&batch->triangles.front().v0, &triangleIndices[0][0], &vertexTask, draw->data);
516 }
517
518 void DrawCall::processPrimitives(DrawCall* draw, BatchData* batch)
519 {
520 MARL_SCOPED_EVENT("PRIMITIVES draw %d batch %d", draw->id, batch->id);
521 auto triangles = &batch->triangles[0];
522 auto primitives = &batch->primitives[0];
523 batch->numVisible = draw->setupPrimitives(triangles, primitives, draw, batch->numPrimitives);
524 }
525
526 void DrawCall::processPixels(const marl::Loan<DrawCall>& draw, const marl::Loan<BatchData>& batch, const std::shared_ptr<marl::Finally>& finally)
527 {
528 struct Data
529 {
530 Data(const marl::Loan<DrawCall>& draw, const marl::Loan<BatchData>& batch, const std::shared_ptr<marl::Finally>& finally)
531 : draw(draw), batch(batch), finally(finally) {}
532 marl::Loan<DrawCall> draw;
533 marl::Loan<BatchData> batch;
534 std::shared_ptr<marl::Finally> finally;
535 };
536 auto data = std::make_shared<Data>(draw, batch, finally);
537 for (int cluster = 0; cluster < MaxClusterCount; cluster++)
538 {
539 batch->clusterTickets[cluster].onCall([data, cluster]
540 {
541 auto& draw = data->draw;
542 auto& batch = data->batch;
543 MARL_SCOPED_EVENT("PIXEL draw %d, batch %d, cluster %d", draw->id, batch->id, cluster);
544 draw->pixelRoutine(&batch->primitives.front(), batch->numVisible, cluster, MaxClusterCount, draw->data);
545 batch->clusterTickets[cluster].done();
546 });
547 }
548 }
549
550 void Renderer::synchronize()
551 {
552 MARL_SCOPED_EVENT("synchronize");
553 auto ticket = drawTickets.take();
554 ticket.wait();
555 device->updateSamplingRoutineConstCache();
556 ticket.done();
557 }
558
559 void DrawCall::processPrimitiveVertices(
560 unsigned int triangleIndicesOut[MaxBatchSize + 1][3],
561 const void *primitiveIndices,
562 VkIndexType indexType,
563 unsigned int start,
564 unsigned int triangleCount,
565 VkPrimitiveTopology topology,
566 VkProvokingVertexModeEXT provokingVertexMode)
567 {
568 if(!primitiveIndices)
569 {
570 struct LinearIndex
571 {
572 unsigned int operator[](unsigned int i) { return i; }
573 };
574
575 if(!setBatchIndices(triangleIndicesOut, topology, provokingVertexMode, LinearIndex(), start, triangleCount))
576 {
577 return;
578 }
579 }
580 else
581 {
582 switch(indexType)
583 {
584 case VK_INDEX_TYPE_UINT16:
585 if(!setBatchIndices(triangleIndicesOut, topology, provokingVertexMode, static_cast<const uint16_t*>(primitiveIndices), start, triangleCount))
586 {
587 return;
588 }
589 break;
590 case VK_INDEX_TYPE_UINT32:
591 if(!setBatchIndices(triangleIndicesOut, topology, provokingVertexMode, static_cast<const uint32_t*>(primitiveIndices), start, triangleCount))
592 {
593 return;
594 }
595 break;
596 break;
597 default:
598 ASSERT(false);
599 return;
600 }
601 }
602
603 // setBatchIndices() takes care of the point case, since it's different due to the compaction
604 if (topology != VK_PRIMITIVE_TOPOLOGY_POINT_LIST)
605 {
606 // Repeat the last index to allow for SIMD width overrun.
607 triangleIndicesOut[triangleCount][0] = triangleIndicesOut[triangleCount - 1][2];
608 triangleIndicesOut[triangleCount][1] = triangleIndicesOut[triangleCount - 1][2];
609 triangleIndicesOut[triangleCount][2] = triangleIndicesOut[triangleCount - 1][2];
610 }
611 }
612
613 int DrawCall::setupSolidTriangles(Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
614 {
615 auto &state = drawCall->setupState;
616
617 int ms = state.multiSample;
618 const DrawData *data = drawCall->data;
619 int visible = 0;
620
621 for(int i = 0; i < count; i++, triangles++)
622 {
623 Vertex &v0 = triangles->v0;
624 Vertex &v1 = triangles->v1;
625 Vertex &v2 = triangles->v2;
626
627 if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE)
628 {
629 Polygon polygon(&v0.position, &v1.position, &v2.position);
630
631 int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags;
632
633 if(clipFlagsOr != Clipper::CLIP_FINITE)
634 {
635 if(!Clipper::Clip(polygon, clipFlagsOr, *drawCall))
636 {
637 continue;
638 }
639 }
640
641 if(drawCall->setupRoutine(primitives, triangles, &polygon, data))
642 {
643 primitives += ms;
644 visible++;
645 }
646 }
647 }
648
649 return visible;
650 }
651
652 int DrawCall::setupWireframeTriangles(Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
653 {
654 auto& state = drawCall->setupState;
655
656 int ms = state.multiSample;
657 int visible = 0;
658
659 for(int i = 0; i < count; i++)
660 {
661 const Vertex &v0 = triangles[i].v0;
662 const Vertex &v1 = triangles[i].v1;
663 const Vertex &v2 = triangles[i].v2;
664
665 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w +
666 (v0.x * v2.y - v0.y * v2.x) * v1.w +
667 (v2.x * v1.y - v1.x * v2.y) * v0.w;
668
669 bool frontFacing = (state.frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE) ? (d > 0) : (d < 0);
670 if(state.cullMode & VK_CULL_MODE_FRONT_BIT)
671 {
672 if(frontFacing) continue;
673 }
674 if(state.cullMode & VK_CULL_MODE_BACK_BIT)
675 {
676 if(!frontFacing) continue;
677 }
678
679 Triangle lines[3];
680 lines[0].v0 = v0;
681 lines[0].v1 = v1;
682 lines[1].v0 = v1;
683 lines[1].v1 = v2;
684 lines[2].v0 = v2;
685 lines[2].v1 = v0;
686
687 for(int i = 0; i < 3; i++)
688 {
689 if(setupLine(*primitives, lines[i], *drawCall))
690 {
691 primitives += ms;
692 visible++;
693 }
694 }
695 }
696
697 return visible;
698 }
699
700 int DrawCall::setupPointTriangles(Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
701 {
702 auto& state = drawCall->setupState;
703
704 int ms = state.multiSample;
705 int visible = 0;
706
707 for(int i = 0; i < count; i++)
708 {
709 const Vertex &v0 = triangles[i].v0;
710 const Vertex &v1 = triangles[i].v1;
711 const Vertex &v2 = triangles[i].v2;
712
713 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w +
714 (v0.x * v2.y - v0.y * v2.x) * v1.w +
715 (v2.x * v1.y - v1.x * v2.y) * v0.w;
716
717 bool frontFacing = (state.frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE) ? (d > 0) : (d < 0);
718 if(state.cullMode & VK_CULL_MODE_FRONT_BIT)
719 {
720 if(frontFacing) continue;
721 }
722 if(state.cullMode & VK_CULL_MODE_BACK_BIT)
723 {
724 if(!frontFacing) continue;
725 }
726
727 Triangle points[3];
728 points[0].v0 = v0;
729 points[1].v0 = v1;
730 points[2].v0 = v2;
731
732 for(int i = 0; i < 3; i++)
733 {
734 if(setupPoint(*primitives, points[i], *drawCall))
735 {
736 primitives += ms;
737 visible++;
738 }
739 }
740 }
741
742 return visible;
743 }
744
745 int DrawCall::setupLines(Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
746 {
747 auto &state = drawCall->setupState;
748
749 int visible = 0;
750 int ms = state.multiSample;
751
752 for(int i = 0; i < count; i++)
753 {
754 if(setupLine(*primitives, *triangles, *drawCall))
755 {
756 primitives += ms;
757 visible++;
758 }
759
760 triangles++;
761 }
762
763 return visible;
764 }
765
766 int DrawCall::setupPoints(Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count)
767 {
768 auto &state = drawCall->setupState;
769
770 int visible = 0;
771 int ms = state.multiSample;
772
773 for(int i = 0; i < count; i++)
774 {
775 if(setupPoint(*primitives, *triangles, *drawCall))
776 {
777 primitives += ms;
778 visible++;
779 }
780
781 triangles++;
782 }
783
784 return visible;
785 }
786
787 bool DrawCall::setupLine(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
788 {
789 const DrawData &data = *draw.data;
790
791 float lineWidth = data.lineWidth;
792
793 Vertex &v0 = triangle.v0;
794 Vertex &v1 = triangle.v1;
795
796 const float4 &P0 = v0.position;
797 const float4 &P1 = v1.position;
798
799 if(P0.w <= 0 && P1.w <= 0)
800 {
801 return false;
802 }
803
804 constexpr float subPixF = vk::SUBPIXEL_PRECISION_FACTOR;
805
806 const float W = data.WxF[0] * (1.0f / subPixF);
807 const float H = data.HxF[0] * (1.0f / subPixF);
808
809 float dx = W * (P1.x / P1.w - P0.x / P0.w);
810 float dy = H * (P1.y / P1.w - P0.y / P0.w);
811
812 if(dx == 0 && dy == 0)
813 {
814 return false;
815 }
816
817 // We use rectangular lines for non-Bresenham lines (cf. 'strictLines'),
818 // and for Bresenham lines when multiSampling is enabled.
819 // FIXME(b/142965928): Bresenham lines should render the same with or without
820 // multisampling, which will require a special case in the
821 // code when multisampling is on. For now, we just use
822 // rectangular lines when multisampling is enabled.
823 if((draw.setupState.multiSample > 1) ||
824 (draw.lineRasterizationMode != VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT))
825 {
826 // Rectangle centered on the line segment
827
828 float4 P[4];
829 int C[4];
830
831 P[0] = P0;
832 P[1] = P1;
833 P[2] = P1;
834 P[3] = P0;
835
836 float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy);
837
838 dx *= scale;
839 dy *= scale;
840
841 float dx0h = dx * P0.w / H;
842 float dy0w = dy * P0.w / W;
843
844 float dx1h = dx * P1.w / H;
845 float dy1w = dy * P1.w / W;
846
847 P[0].x += -dy0w;
848 P[0].y += +dx0h;
849 C[0] = Clipper::ComputeClipFlags(P[0]);
850
851 P[1].x += -dy1w;
852 P[1].y += +dx1h;
853 C[1] = Clipper::ComputeClipFlags(P[1]);
854
855 P[2].x += +dy1w;
856 P[2].y += -dx1h;
857 C[2] = Clipper::ComputeClipFlags(P[2]);
858
859 P[3].x += +dy0w;
860 P[3].y += -dx0h;
861 C[3] = Clipper::ComputeClipFlags(P[3]);
862
863 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
864 {
865 Polygon polygon(P, 4);
866
867 int clipFlagsOr = C[0] | C[1] | C[2] | C[3];
868
869 if(clipFlagsOr != Clipper::CLIP_FINITE)
870 {
871 if(!Clipper::Clip(polygon, clipFlagsOr, draw))
872 {
873 return false;
874 }
875 }
876
877 return draw.setupRoutine(&primitive, &triangle, &polygon, &data);
878 }
879 }
880 else if(false) // TODO(b/80135519): Deprecate
881 {
882 // Connecting diamonds polygon
883 // This shape satisfies the diamond test convention, except for the exit rule part.
884 // Line segments with overlapping endpoints have duplicate fragments.
885 // The ideal algorithm requires half-open line rasterization (b/80135519).
886
887 float4 P[8];
888 int C[8];
889
890 P[0] = P0;
891 P[1] = P0;
892 P[2] = P0;
893 P[3] = P0;
894 P[4] = P1;
895 P[5] = P1;
896 P[6] = P1;
897 P[7] = P1;
898
899 float dx0 = lineWidth * 0.5f * P0.w / W;
900 float dy0 = lineWidth * 0.5f * P0.w / H;
901
902 float dx1 = lineWidth * 0.5f * P1.w / W;
903 float dy1 = lineWidth * 0.5f * P1.w / H;
904
905 P[0].x += -dx0;
906 C[0] = Clipper::ComputeClipFlags(P[0]);
907
908 P[1].y += +dy0;
909 C[1] = Clipper::ComputeClipFlags(P[1]);
910
911 P[2].x += +dx0;
912 C[2] = Clipper::ComputeClipFlags(P[2]);
913
914 P[3].y += -dy0;
915 C[3] = Clipper::ComputeClipFlags(P[3]);
916
917 P[4].x += -dx1;
918 C[4] = Clipper::ComputeClipFlags(P[4]);
919
920 P[5].y += +dy1;
921 C[5] = Clipper::ComputeClipFlags(P[5]);
922
923 P[6].x += +dx1;
924 C[6] = Clipper::ComputeClipFlags(P[6]);
925
926 P[7].y += -dy1;
927 C[7] = Clipper::ComputeClipFlags(P[7]);
928
929 if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE)
930 {
931 float4 L[6];
932
933 if(dx > -dy)
934 {
935 if(dx > dy) // Right
936 {
937 L[0] = P[0];
938 L[1] = P[1];
939 L[2] = P[5];
940 L[3] = P[6];
941 L[4] = P[7];
942 L[5] = P[3];
943 }
944 else // Down
945 {
946 L[0] = P[0];
947 L[1] = P[4];
948 L[2] = P[5];
949 L[3] = P[6];
950 L[4] = P[2];
951 L[5] = P[3];
952 }
953 }
954 else
955 {
956 if(dx > dy) // Up
957 {
958 L[0] = P[0];
959 L[1] = P[1];
960 L[2] = P[2];
961 L[3] = P[6];
962 L[4] = P[7];
963 L[5] = P[4];
964 }
965 else // Left
966 {
967 L[0] = P[1];
968 L[1] = P[2];
969 L[2] = P[3];
970 L[3] = P[7];
971 L[4] = P[4];
972 L[5] = P[5];
973 }
974 }
975
976 Polygon polygon(L, 6);
977
978 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7];
979
980 if(clipFlagsOr != Clipper::CLIP_FINITE)
981 {
982 if(!Clipper::Clip(polygon, clipFlagsOr, draw))
983 {
984 return false;
985 }
986 }
987
988 return draw.setupRoutine(&primitive, &triangle, &polygon, &data);
989 }
990 }
991 else
992 {
993 // Parallelogram approximating Bresenham line
994 // This algorithm does not satisfy the ideal diamond-exit rule, but does avoid the
995 // duplicate fragment rasterization problem and satisfies all of Vulkan's minimum
996 // requirements for Bresenham line segment rasterization.
997
998 float4 P[8];
999 P[0] = P0;
1000 P[1] = P0;
1001 P[2] = P0;
1002 P[3] = P0;
1003 P[4] = P1;
1004 P[5] = P1;
1005 P[6] = P1;
1006 P[7] = P1;
1007
1008 float dx0 = lineWidth * 0.5f * P0.w / W;
1009 float dy0 = lineWidth * 0.5f * P0.w / H;
1010
1011 float dx1 = lineWidth * 0.5f * P1.w / W;
1012 float dy1 = lineWidth * 0.5f * P1.w / H;
1013
1014 P[0].x += -dx0;
1015 P[1].y += +dy0;
1016 P[2].x += +dx0;
1017 P[3].y += -dy0;
1018 P[4].x += -dx1;
1019 P[5].y += +dy1;
1020 P[6].x += +dx1;
1021 P[7].y += -dy1;
1022
1023 float4 L[4];
1024
1025 if(dx > -dy)
1026 {
1027 if(dx > dy) // Right
1028 {
1029 L[0] = P[1];
1030 L[1] = P[5];
1031 L[2] = P[7];
1032 L[3] = P[3];
1033 }
1034 else // Down
1035 {
1036 L[0] = P[0];
1037 L[1] = P[4];
1038 L[2] = P[6];
1039 L[3] = P[2];
1040 }
1041 }
1042 else
1043 {
1044 if(dx > dy) // Up
1045 {
1046 L[0] = P[0];
1047 L[1] = P[2];
1048 L[2] = P[6];
1049 L[3] = P[4];
1050 }
1051 else // Left
1052 {
1053 L[0] = P[1];
1054 L[1] = P[3];
1055 L[2] = P[7];
1056 L[3] = P[5];
1057 }
1058 }
1059
1060 int C0 = Clipper::ComputeClipFlags(L[0]);
1061 int C1 = Clipper::ComputeClipFlags(L[1]);
1062 int C2 = Clipper::ComputeClipFlags(L[2]);
1063 int C3 = Clipper::ComputeClipFlags(L[3]);
1064
1065 if((C0 & C1 & C2 & C3) == Clipper::CLIP_FINITE)
1066 {
1067 Polygon polygon(L, 4);
1068
1069 int clipFlagsOr = C0 | C1 | C2 | C3;
1070
1071 if(clipFlagsOr != Clipper::CLIP_FINITE)
1072 {
1073 if(!Clipper::Clip(polygon, clipFlagsOr, draw))
1074 {
1075 return false;
1076 }
1077 }
1078
1079 return draw.setupRoutine(&primitive, &triangle, &polygon, &data);
1080 }
1081 }
1082
1083 return false;
1084 }
1085
1086 bool DrawCall::setupPoint(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1087 {
1088 const DrawData &data = *draw.data;
1089
1090 Vertex &v = triangle.v0;
1091
1092 float pSize = v.pointSize;
1093
1094 pSize = clamp(pSize, 1.0f, static_cast<float>(vk::MAX_POINT_SIZE));
1095
1096 float4 P[4];
1097 int C[4];
1098
1099 P[0] = v.position;
1100 P[1] = v.position;
1101 P[2] = v.position;
1102 P[3] = v.position;
1103
1104 const float X = pSize * P[0].w * data.halfPixelX[0];
1105 const float Y = pSize * P[0].w * data.halfPixelY[0];
1106
1107 P[0].x -= X;
1108 P[0].y += Y;
1109 C[0] = Clipper::ComputeClipFlags(P[0]);
1110
1111 P[1].x += X;
1112 P[1].y += Y;
1113 C[1] = Clipper::ComputeClipFlags(P[1]);
1114
1115 P[2].x += X;
1116 P[2].y -= Y;
1117 C[2] = Clipper::ComputeClipFlags(P[2]);
1118
1119 P[3].x -= X;
1120 P[3].y -= Y;
1121 C[3] = Clipper::ComputeClipFlags(P[3]);
1122
1123 Polygon polygon(P, 4);
1124
1125 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1126 {
1127 int clipFlagsOr = C[0] | C[1] | C[2] | C[3];
1128
1129 if(clipFlagsOr != Clipper::CLIP_FINITE)
1130 {
1131 if(!Clipper::Clip(polygon, clipFlagsOr, draw))
1132 {
1133 return false;
1134 }
1135 }
1136
1137 triangle.v1 = triangle.v0;
1138 triangle.v2 = triangle.v0;
1139
1140 constexpr float subPixF = vk::SUBPIXEL_PRECISION_FACTOR;
1141
1142 triangle.v1.projected.x += iround(subPixF * 0.5f * pSize);
1143 triangle.v2.projected.y -= iround(subPixF * 0.5f * pSize) * (data.HxF[0] > 0.0f ? 1 : -1); // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
1144 return draw.setupRoutine(&primitive, &triangle, &polygon, &data);
1145 }
1146
1147 return false;
1148 }
1149
1150 void Renderer::addQuery(vk::Query *query)
1151 {
1152 ASSERT(query->getType() == VK_QUERY_TYPE_OCCLUSION);
1153 ASSERT(!occlusionQuery);
1154
1155 occlusionQuery = query;
1156 }
1157
1158 void Renderer::removeQuery(vk::Query *query)
1159 {
1160 ASSERT(query->getType() == VK_QUERY_TYPE_OCCLUSION);
1161 ASSERT(occlusionQuery == query);
1162
1163 occlusionQuery = nullptr;
1164 }
1165
1166 void Renderer::advanceInstanceAttributes(Stream* inputs)
1167 {
1168 for(uint32_t i = 0; i < vk::MAX_VERTEX_INPUT_BINDINGS; i++)
1169 {
1170 auto &attrib = inputs[i];
1171 if (attrib.count && attrib.instanceStride && (attrib.instanceStride < attrib.robustnessSize))
1172 {
1173 // Under the casts: attrib.buffer += attrib.instanceStride
1174 attrib.buffer = (void const *)((uintptr_t)attrib.buffer + attrib.instanceStride);
1175 attrib.robustnessSize -= attrib.instanceStride;
1176 }
1177 }
1178 }
1179
1180 void Renderer::setViewport(const VkViewport &viewport)
1181 {
1182 this->viewport = viewport;
1183 }
1184
1185 void Renderer::setScissor(const VkRect2D &scissor)
1186 {
1187 this->scissor = scissor;
1188 }
1189
1190}
1191