1 | // Copyright 2016 The SwiftShader Authors. All Rights Reserved. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | #include "Renderer.hpp" |
16 | |
17 | #include "Clipper.hpp" |
18 | #include "Primitive.hpp" |
19 | #include "Polygon.hpp" |
20 | #include "Reactor/Reactor.hpp" |
21 | #include "Pipeline/Constants.hpp" |
22 | #include "System/Memory.hpp" |
23 | #include "System/Half.hpp" |
24 | #include "System/Math.hpp" |
25 | #include "System/Timer.hpp" |
26 | #include "Vulkan/VkConfig.h" |
27 | #include "Vulkan/VkDebug.hpp" |
28 | #include "Vulkan/VkDevice.hpp" |
29 | #include "Vulkan/VkFence.hpp" |
30 | #include "Vulkan/VkImageView.hpp" |
31 | #include "Vulkan/VkQueryPool.hpp" |
32 | #include "Pipeline/SpirvShader.hpp" |
33 | #include "Vertex.hpp" |
34 | |
35 | #include "marl/containers.h" |
36 | #include "marl/defer.h" |
37 | #include "marl/trace.h" |
38 | |
39 | #undef max |
40 | |
41 | #ifndef NDEBUG |
42 | unsigned int minPrimitives = 1; |
43 | unsigned int maxPrimitives = 1 << 21; |
44 | #endif |
45 | |
46 | namespace sw |
47 | { |
48 | template<typename T> |
49 | inline bool setBatchIndices(unsigned int batch[128][3], VkPrimitiveTopology topology, VkProvokingVertexModeEXT provokingVertexMode, T indices, unsigned int start, unsigned int triangleCount) |
50 | { |
51 | bool provokeFirst = (provokingVertexMode == VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT); |
52 | |
53 | switch(topology) |
54 | { |
55 | case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: |
56 | { |
57 | auto index = start; |
58 | auto pointBatch = &(batch[0][0]); |
59 | for(unsigned int i = 0; i < triangleCount; i++) |
60 | { |
61 | *pointBatch++ = indices[index++]; |
62 | } |
63 | |
64 | // Repeat the last index to allow for SIMD width overrun. |
65 | index--; |
66 | for(unsigned int i = 0; i < 3; i++) |
67 | { |
68 | *pointBatch++ = indices[index]; |
69 | } |
70 | break; |
71 | } |
72 | case VK_PRIMITIVE_TOPOLOGY_LINE_LIST: |
73 | { |
74 | auto index = 2 * start; |
75 | for(unsigned int i = 0; i < triangleCount; i++) |
76 | { |
77 | batch[i][0] = indices[index + (provokeFirst ? 0 : 1)]; |
78 | batch[i][1] = indices[index + (provokeFirst ? 1 : 0)]; |
79 | batch[i][2] = indices[index + 1]; |
80 | |
81 | index += 2; |
82 | } |
83 | break; |
84 | } |
85 | case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP: |
86 | { |
87 | auto index = start; |
88 | for(unsigned int i = 0; i < triangleCount; i++) |
89 | { |
90 | batch[i][0] = indices[index + (provokeFirst ? 0 : 1)]; |
91 | batch[i][1] = indices[index + (provokeFirst ? 1 : 0)]; |
92 | batch[i][2] = indices[index + 1]; |
93 | |
94 | index += 1; |
95 | } |
96 | break; |
97 | } |
98 | case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST: |
99 | { |
100 | auto index = 3 * start; |
101 | for(unsigned int i = 0; i < triangleCount; i++) |
102 | { |
103 | batch[i][0] = indices[index + (provokeFirst ? 0 : 2)]; |
104 | batch[i][1] = indices[index + (provokeFirst ? 1 : 0)]; |
105 | batch[i][2] = indices[index + (provokeFirst ? 2 : 1)]; |
106 | |
107 | index += 3; |
108 | } |
109 | break; |
110 | } |
111 | case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP: |
112 | { |
113 | auto index = start; |
114 | for(unsigned int i = 0; i < triangleCount; i++) |
115 | { |
116 | batch[i][0] = indices[index + (provokeFirst ? 0 : 2)]; |
117 | batch[i][1] = indices[index + ((start + i) & 1) + (provokeFirst ? 1 : 0)]; |
118 | batch[i][2] = indices[index + (~(start + i) & 1) + (provokeFirst ? 1 : 0)]; |
119 | |
120 | index += 1; |
121 | } |
122 | break; |
123 | } |
124 | case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN: |
125 | { |
126 | auto index = start + 1; |
127 | for(unsigned int i = 0; i < triangleCount; i++) |
128 | { |
129 | batch[i][provokeFirst ? 0 : 2] = indices[index + 0]; |
130 | batch[i][provokeFirst ? 1 : 0] = indices[index + 1]; |
131 | batch[i][provokeFirst ? 2 : 1] = indices[0]; |
132 | |
133 | index += 1; |
134 | } |
135 | break; |
136 | } |
137 | default: |
138 | ASSERT(false); |
139 | return false; |
140 | } |
141 | |
142 | return true; |
143 | } |
144 | |
145 | DrawCall::DrawCall() |
146 | { |
147 | data = (DrawData*)allocate(sizeof(DrawData)); |
148 | data->constants = &constants; |
149 | } |
150 | |
151 | DrawCall::~DrawCall() |
152 | { |
153 | deallocate(data); |
154 | } |
155 | |
156 | Renderer::Renderer(vk::Device* device) : device(device) |
157 | { |
158 | VertexProcessor::setRoutineCacheSize(1024); |
159 | PixelProcessor::setRoutineCacheSize(1024); |
160 | SetupProcessor::setRoutineCacheSize(1024); |
161 | } |
162 | |
163 | Renderer::~Renderer() |
164 | { |
165 | drawTickets.take().wait(); |
166 | } |
167 | |
168 | // Renderer objects have to be mem aligned to the alignment provided in the class declaration |
169 | void* Renderer::operator new(size_t size) |
170 | { |
171 | ASSERT(size == sizeof(Renderer)); // This operator can't be called from a derived class |
172 | return vk::allocate(sizeof(Renderer), alignof(Renderer), vk::DEVICE_MEMORY, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); |
173 | } |
174 | |
175 | void Renderer::operator delete(void* mem) |
176 | { |
177 | vk::deallocate(mem, vk::DEVICE_MEMORY); |
178 | } |
179 | |
180 | void Renderer::draw(const sw::Context* context, VkIndexType indexType, unsigned int count, int baseVertex, |
181 | TaskEvents *events, int instanceID, int viewID, void *indexBuffer, const VkExtent3D& framebufferExtent, |
182 | PushConstantStorage const & pushConstants, bool update) |
183 | { |
184 | if(count == 0) { return; } |
185 | |
186 | auto id = nextDrawID++; |
187 | MARL_SCOPED_EVENT("draw %d" , id); |
188 | |
189 | #ifndef NDEBUG |
190 | { |
191 | unsigned int minPrimitives = 1; |
192 | unsigned int maxPrimitives = 1 << 21; |
193 | if(count < minPrimitives || count > maxPrimitives) |
194 | { |
195 | return; |
196 | } |
197 | } |
198 | #endif |
199 | |
200 | int ms = context->sampleCount; |
201 | |
202 | if(!context->multiSampleMask) |
203 | { |
204 | return; |
205 | } |
206 | |
207 | marl::Pool<sw::DrawCall>::Loan draw; |
208 | { |
209 | MARL_SCOPED_EVENT("drawCallPool.borrow()" ); |
210 | draw = drawCallPool.borrow(); |
211 | } |
212 | draw->id = id; |
213 | |
214 | if(update) |
215 | { |
216 | MARL_SCOPED_EVENT("update" ); |
217 | vertexState = VertexProcessor::update(context); |
218 | setupState = SetupProcessor::update(context); |
219 | pixelState = PixelProcessor::update(context); |
220 | |
221 | vertexRoutine = VertexProcessor::routine(vertexState, context->pipelineLayout, context->vertexShader, context->descriptorSets); |
222 | setupRoutine = SetupProcessor::routine(setupState); |
223 | pixelRoutine = PixelProcessor::routine(pixelState, context->pipelineLayout, context->pixelShader, context->descriptorSets); |
224 | } |
225 | |
226 | DrawCall::SetupFunction setupPrimitives = nullptr; |
227 | unsigned int numPrimitivesPerBatch = MaxBatchSize / ms; |
228 | |
229 | if(context->isDrawTriangle(false)) |
230 | { |
231 | switch(context->polygonMode) |
232 | { |
233 | case VK_POLYGON_MODE_FILL: |
234 | setupPrimitives = &DrawCall::setupSolidTriangles; |
235 | break; |
236 | case VK_POLYGON_MODE_LINE: |
237 | setupPrimitives = &DrawCall::setupWireframeTriangles; |
238 | numPrimitivesPerBatch /= 3; |
239 | break; |
240 | case VK_POLYGON_MODE_POINT: |
241 | setupPrimitives = &DrawCall::setupPointTriangles; |
242 | numPrimitivesPerBatch /= 3; |
243 | break; |
244 | default: |
245 | UNSUPPORTED("polygon mode: %d" , int(context->polygonMode)); |
246 | return; |
247 | } |
248 | } |
249 | else if(context->isDrawLine(false)) |
250 | { |
251 | setupPrimitives = &DrawCall::setupLines; |
252 | } |
253 | else // Point primitive topology |
254 | { |
255 | setupPrimitives = &DrawCall::setupPoints; |
256 | } |
257 | |
258 | DrawData *data = draw->data; |
259 | draw->occlusionQuery = occlusionQuery; |
260 | draw->batchDataPool = &batchDataPool; |
261 | draw->numPrimitives = count; |
262 | draw->numPrimitivesPerBatch = numPrimitivesPerBatch; |
263 | draw->numBatches = (count + draw->numPrimitivesPerBatch - 1) / draw->numPrimitivesPerBatch; |
264 | draw->topology = context->topology; |
265 | draw->provokingVertexMode = context->provokingVertexMode; |
266 | draw->indexType = indexType; |
267 | draw->lineRasterizationMode = context->lineRasterizationMode; |
268 | |
269 | draw->vertexRoutine = vertexRoutine; |
270 | draw->setupRoutine = setupRoutine; |
271 | draw->pixelRoutine = pixelRoutine; |
272 | draw->setupPrimitives = setupPrimitives; |
273 | draw->setupState = setupState; |
274 | |
275 | data->descriptorSets = context->descriptorSets; |
276 | data->descriptorDynamicOffsets = context->descriptorDynamicOffsets; |
277 | |
278 | for(int i = 0; i < MAX_INTERFACE_COMPONENTS/4; i++) |
279 | { |
280 | data->input[i] = context->input[i].buffer; |
281 | data->robustnessSize[i] = context->input[i].robustnessSize; |
282 | data->stride[i] = context->input[i].vertexStride; |
283 | } |
284 | |
285 | data->indices = indexBuffer; |
286 | data->viewID = viewID; |
287 | data->instanceID = instanceID; |
288 | data->baseVertex = baseVertex; |
289 | |
290 | if(pixelState.stencilActive) |
291 | { |
292 | data->stencil[0].set(context->frontStencil.reference, context->frontStencil.compareMask, context->frontStencil.writeMask); |
293 | data->stencil[1].set(context->backStencil.reference, context->backStencil.compareMask, context->backStencil.writeMask); |
294 | } |
295 | |
296 | data->lineWidth = context->lineWidth; |
297 | |
298 | data->factor = factor; |
299 | |
300 | if(pixelState.alphaToCoverage) |
301 | { |
302 | if(ms == 4) |
303 | { |
304 | data->a2c0 = replicate(0.2f); |
305 | data->a2c1 = replicate(0.4f); |
306 | data->a2c2 = replicate(0.6f); |
307 | data->a2c3 = replicate(0.8f); |
308 | } |
309 | else if(ms == 2) |
310 | { |
311 | data->a2c0 = replicate(0.25f); |
312 | data->a2c1 = replicate(0.75f); |
313 | } |
314 | else ASSERT(false); |
315 | } |
316 | |
317 | if(pixelState.occlusionEnabled) |
318 | { |
319 | for(int cluster = 0; cluster < MaxClusterCount; cluster++) |
320 | { |
321 | data->occlusion[cluster] = 0; |
322 | } |
323 | } |
324 | |
325 | // Viewport |
326 | { |
327 | float W = 0.5f * viewport.width; |
328 | float H = 0.5f * viewport.height; |
329 | float X0 = viewport.x + W; |
330 | float Y0 = viewport.y + H; |
331 | float N = viewport.minDepth; |
332 | float F = viewport.maxDepth; |
333 | float Z = F - N; |
334 | constexpr float subPixF = vk::SUBPIXEL_PRECISION_FACTOR; |
335 | |
336 | if(context->isDrawTriangle(false)) |
337 | { |
338 | N += context->depthBias; |
339 | } |
340 | |
341 | data->WxF = replicate(W * subPixF); |
342 | data->HxF = replicate(H * subPixF); |
343 | data->X0xF = replicate(X0 * subPixF - subPixF / 2); |
344 | data->Y0xF = replicate(Y0 * subPixF - subPixF / 2); |
345 | data->halfPixelX = replicate(0.5f / W); |
346 | data->halfPixelY = replicate(0.5f / H); |
347 | data->viewportHeight = abs(viewport.height); |
348 | data->slopeDepthBias = context->slopeDepthBias; |
349 | data->depthRange = Z; |
350 | data->depthNear = N; |
351 | } |
352 | |
353 | // Target |
354 | { |
355 | for(int index = 0; index < RENDERTARGETS; index++) |
356 | { |
357 | draw->renderTarget[index] = context->renderTarget[index]; |
358 | |
359 | if(draw->renderTarget[index]) |
360 | { |
361 | data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->getOffsetPointer({0, 0, 0}, VK_IMAGE_ASPECT_COLOR_BIT, 0, data->viewID); |
362 | data->colorPitchB[index] = context->renderTarget[index]->rowPitchBytes(VK_IMAGE_ASPECT_COLOR_BIT, 0); |
363 | data->colorSliceB[index] = context->renderTarget[index]->slicePitchBytes(VK_IMAGE_ASPECT_COLOR_BIT, 0); |
364 | } |
365 | } |
366 | |
367 | draw->depthBuffer = context->depthBuffer; |
368 | draw->stencilBuffer = context->stencilBuffer; |
369 | |
370 | if(draw->depthBuffer) |
371 | { |
372 | data->depthBuffer = (float*)context->depthBuffer->getOffsetPointer({0, 0, 0}, VK_IMAGE_ASPECT_DEPTH_BIT, 0, data->viewID); |
373 | data->depthPitchB = context->depthBuffer->rowPitchBytes(VK_IMAGE_ASPECT_DEPTH_BIT, 0); |
374 | data->depthSliceB = context->depthBuffer->slicePitchBytes(VK_IMAGE_ASPECT_DEPTH_BIT, 0); |
375 | } |
376 | |
377 | if(draw->stencilBuffer) |
378 | { |
379 | data->stencilBuffer = (unsigned char*)context->stencilBuffer->getOffsetPointer({0, 0, 0}, VK_IMAGE_ASPECT_STENCIL_BIT, 0, data->viewID); |
380 | data->stencilPitchB = context->stencilBuffer->rowPitchBytes(VK_IMAGE_ASPECT_STENCIL_BIT, 0); |
381 | data->stencilSliceB = context->stencilBuffer->slicePitchBytes(VK_IMAGE_ASPECT_STENCIL_BIT, 0); |
382 | } |
383 | } |
384 | |
385 | // Scissor |
386 | { |
387 | data->scissorX0 = clamp<int>(scissor.offset.x, 0, framebufferExtent.width); |
388 | data->scissorX1 = clamp<int>(scissor.offset.x + scissor.extent.width, 0, framebufferExtent.width); |
389 | data->scissorY0 = clamp<int>(scissor.offset.y, 0, framebufferExtent.height); |
390 | data->scissorY1 = clamp<int>(scissor.offset.y + scissor.extent.height, 0, framebufferExtent.height); |
391 | } |
392 | |
393 | // Push constants |
394 | { |
395 | data->pushConstants = pushConstants; |
396 | } |
397 | |
398 | draw->events = events; |
399 | |
400 | DrawCall::run(draw, &drawTickets, clusterQueues); |
401 | } |
402 | |
403 | void DrawCall::setup() |
404 | { |
405 | if(occlusionQuery != nullptr) |
406 | { |
407 | occlusionQuery->start(); |
408 | } |
409 | |
410 | if(events) |
411 | { |
412 | events->start(); |
413 | } |
414 | } |
415 | |
416 | void DrawCall::teardown() |
417 | { |
418 | if(events) |
419 | { |
420 | events->finish(); |
421 | events = nullptr; |
422 | } |
423 | |
424 | if (occlusionQuery != nullptr) |
425 | { |
426 | for(int cluster = 0; cluster < MaxClusterCount; cluster++) |
427 | { |
428 | occlusionQuery->add(data->occlusion[cluster]); |
429 | } |
430 | occlusionQuery->finish(); |
431 | } |
432 | |
433 | vertexRoutine = {}; |
434 | setupRoutine = {}; |
435 | pixelRoutine = {}; |
436 | } |
437 | |
438 | void DrawCall::run(const marl::Loan<DrawCall>& draw, marl::Ticket::Queue* tickets, marl::Ticket::Queue clusterQueues[MaxClusterCount]) |
439 | { |
440 | draw->setup(); |
441 | |
442 | auto const numPrimitives = draw->numPrimitives; |
443 | auto const numPrimitivesPerBatch = draw->numPrimitivesPerBatch; |
444 | auto const numBatches = draw->numBatches; |
445 | |
446 | auto ticket = tickets->take(); |
447 | auto finally = marl::make_shared_finally([draw, ticket] { |
448 | MARL_SCOPED_EVENT("FINISH draw %d" , draw->id); |
449 | draw->teardown(); |
450 | ticket.done(); |
451 | }); |
452 | |
453 | for (unsigned int batchId = 0; batchId < numBatches; batchId++) |
454 | { |
455 | auto batch = draw->batchDataPool->borrow(); |
456 | batch->id = batchId; |
457 | batch->firstPrimitive = batch->id * numPrimitivesPerBatch; |
458 | batch->numPrimitives = std::min(batch->firstPrimitive + numPrimitivesPerBatch, numPrimitives) - batch->firstPrimitive; |
459 | |
460 | for (int cluster = 0; cluster < MaxClusterCount; cluster++) |
461 | { |
462 | batch->clusterTickets[cluster] = std::move(clusterQueues[cluster].take()); |
463 | } |
464 | |
465 | marl::schedule([draw, batch, finally] { |
466 | |
467 | processVertices(draw.get(), batch.get()); |
468 | |
469 | if (!draw->setupState.rasterizerDiscard) |
470 | { |
471 | processPrimitives(draw.get(), batch.get()); |
472 | |
473 | if (batch->numVisible > 0) |
474 | { |
475 | processPixels(draw, batch, finally); |
476 | return; |
477 | } |
478 | } |
479 | |
480 | for (int cluster = 0; cluster < MaxClusterCount; cluster++) |
481 | { |
482 | batch->clusterTickets[cluster].done(); |
483 | } |
484 | }); |
485 | } |
486 | } |
487 | |
488 | void DrawCall::processVertices(DrawCall* draw, BatchData* batch) |
489 | { |
490 | MARL_SCOPED_EVENT("VERTEX draw %d, batch %d" , draw->id, batch->id); |
491 | |
492 | unsigned int triangleIndices[MaxBatchSize + 1][3]; // One extra for SIMD width overrun. TODO: Adjust to dynamic batch size. |
493 | { |
494 | MARL_SCOPED_EVENT("processPrimitiveVertices" ); |
495 | processPrimitiveVertices( |
496 | triangleIndices, |
497 | draw->data->indices, |
498 | draw->indexType, |
499 | batch->firstPrimitive, |
500 | batch->numPrimitives, |
501 | draw->topology, |
502 | draw->provokingVertexMode); |
503 | } |
504 | |
505 | auto& vertexTask = batch->vertexTask; |
506 | vertexTask.primitiveStart = batch->firstPrimitive; |
507 | // We're only using batch compaction for points, not lines |
508 | vertexTask.vertexCount = batch->numPrimitives * ((draw->topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST) ? 1 : 3); |
509 | if (vertexTask.vertexCache.drawCall != draw->id) |
510 | { |
511 | vertexTask.vertexCache.clear(); |
512 | vertexTask.vertexCache.drawCall = draw->id; |
513 | } |
514 | |
515 | draw->vertexRoutine(&batch->triangles.front().v0, &triangleIndices[0][0], &vertexTask, draw->data); |
516 | } |
517 | |
518 | void DrawCall::processPrimitives(DrawCall* draw, BatchData* batch) |
519 | { |
520 | MARL_SCOPED_EVENT("PRIMITIVES draw %d batch %d" , draw->id, batch->id); |
521 | auto triangles = &batch->triangles[0]; |
522 | auto primitives = &batch->primitives[0]; |
523 | batch->numVisible = draw->setupPrimitives(triangles, primitives, draw, batch->numPrimitives); |
524 | } |
525 | |
526 | void DrawCall::processPixels(const marl::Loan<DrawCall>& draw, const marl::Loan<BatchData>& batch, const std::shared_ptr<marl::Finally>& finally) |
527 | { |
528 | struct Data |
529 | { |
530 | Data(const marl::Loan<DrawCall>& draw, const marl::Loan<BatchData>& batch, const std::shared_ptr<marl::Finally>& finally) |
531 | : draw(draw), batch(batch), finally(finally) {} |
532 | marl::Loan<DrawCall> draw; |
533 | marl::Loan<BatchData> batch; |
534 | std::shared_ptr<marl::Finally> finally; |
535 | }; |
536 | auto data = std::make_shared<Data>(draw, batch, finally); |
537 | for (int cluster = 0; cluster < MaxClusterCount; cluster++) |
538 | { |
539 | batch->clusterTickets[cluster].onCall([data, cluster] |
540 | { |
541 | auto& draw = data->draw; |
542 | auto& batch = data->batch; |
543 | MARL_SCOPED_EVENT("PIXEL draw %d, batch %d, cluster %d" , draw->id, batch->id, cluster); |
544 | draw->pixelRoutine(&batch->primitives.front(), batch->numVisible, cluster, MaxClusterCount, draw->data); |
545 | batch->clusterTickets[cluster].done(); |
546 | }); |
547 | } |
548 | } |
549 | |
550 | void Renderer::synchronize() |
551 | { |
552 | MARL_SCOPED_EVENT("synchronize" ); |
553 | auto ticket = drawTickets.take(); |
554 | ticket.wait(); |
555 | device->updateSamplingRoutineConstCache(); |
556 | ticket.done(); |
557 | } |
558 | |
559 | void DrawCall::processPrimitiveVertices( |
560 | unsigned int triangleIndicesOut[MaxBatchSize + 1][3], |
561 | const void *primitiveIndices, |
562 | VkIndexType indexType, |
563 | unsigned int start, |
564 | unsigned int triangleCount, |
565 | VkPrimitiveTopology topology, |
566 | VkProvokingVertexModeEXT provokingVertexMode) |
567 | { |
568 | if(!primitiveIndices) |
569 | { |
570 | struct LinearIndex |
571 | { |
572 | unsigned int operator[](unsigned int i) { return i; } |
573 | }; |
574 | |
575 | if(!setBatchIndices(triangleIndicesOut, topology, provokingVertexMode, LinearIndex(), start, triangleCount)) |
576 | { |
577 | return; |
578 | } |
579 | } |
580 | else |
581 | { |
582 | switch(indexType) |
583 | { |
584 | case VK_INDEX_TYPE_UINT16: |
585 | if(!setBatchIndices(triangleIndicesOut, topology, provokingVertexMode, static_cast<const uint16_t*>(primitiveIndices), start, triangleCount)) |
586 | { |
587 | return; |
588 | } |
589 | break; |
590 | case VK_INDEX_TYPE_UINT32: |
591 | if(!setBatchIndices(triangleIndicesOut, topology, provokingVertexMode, static_cast<const uint32_t*>(primitiveIndices), start, triangleCount)) |
592 | { |
593 | return; |
594 | } |
595 | break; |
596 | break; |
597 | default: |
598 | ASSERT(false); |
599 | return; |
600 | } |
601 | } |
602 | |
603 | // setBatchIndices() takes care of the point case, since it's different due to the compaction |
604 | if (topology != VK_PRIMITIVE_TOPOLOGY_POINT_LIST) |
605 | { |
606 | // Repeat the last index to allow for SIMD width overrun. |
607 | triangleIndicesOut[triangleCount][0] = triangleIndicesOut[triangleCount - 1][2]; |
608 | triangleIndicesOut[triangleCount][1] = triangleIndicesOut[triangleCount - 1][2]; |
609 | triangleIndicesOut[triangleCount][2] = triangleIndicesOut[triangleCount - 1][2]; |
610 | } |
611 | } |
612 | |
613 | int DrawCall::setupSolidTriangles(Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count) |
614 | { |
615 | auto &state = drawCall->setupState; |
616 | |
617 | int ms = state.multiSample; |
618 | const DrawData *data = drawCall->data; |
619 | int visible = 0; |
620 | |
621 | for(int i = 0; i < count; i++, triangles++) |
622 | { |
623 | Vertex &v0 = triangles->v0; |
624 | Vertex &v1 = triangles->v1; |
625 | Vertex &v2 = triangles->v2; |
626 | |
627 | if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE) |
628 | { |
629 | Polygon polygon(&v0.position, &v1.position, &v2.position); |
630 | |
631 | int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags; |
632 | |
633 | if(clipFlagsOr != Clipper::CLIP_FINITE) |
634 | { |
635 | if(!Clipper::Clip(polygon, clipFlagsOr, *drawCall)) |
636 | { |
637 | continue; |
638 | } |
639 | } |
640 | |
641 | if(drawCall->setupRoutine(primitives, triangles, &polygon, data)) |
642 | { |
643 | primitives += ms; |
644 | visible++; |
645 | } |
646 | } |
647 | } |
648 | |
649 | return visible; |
650 | } |
651 | |
652 | int DrawCall::setupWireframeTriangles(Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count) |
653 | { |
654 | auto& state = drawCall->setupState; |
655 | |
656 | int ms = state.multiSample; |
657 | int visible = 0; |
658 | |
659 | for(int i = 0; i < count; i++) |
660 | { |
661 | const Vertex &v0 = triangles[i].v0; |
662 | const Vertex &v1 = triangles[i].v1; |
663 | const Vertex &v2 = triangles[i].v2; |
664 | |
665 | float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + |
666 | (v0.x * v2.y - v0.y * v2.x) * v1.w + |
667 | (v2.x * v1.y - v1.x * v2.y) * v0.w; |
668 | |
669 | bool frontFacing = (state.frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE) ? (d > 0) : (d < 0); |
670 | if(state.cullMode & VK_CULL_MODE_FRONT_BIT) |
671 | { |
672 | if(frontFacing) continue; |
673 | } |
674 | if(state.cullMode & VK_CULL_MODE_BACK_BIT) |
675 | { |
676 | if(!frontFacing) continue; |
677 | } |
678 | |
679 | Triangle lines[3]; |
680 | lines[0].v0 = v0; |
681 | lines[0].v1 = v1; |
682 | lines[1].v0 = v1; |
683 | lines[1].v1 = v2; |
684 | lines[2].v0 = v2; |
685 | lines[2].v1 = v0; |
686 | |
687 | for(int i = 0; i < 3; i++) |
688 | { |
689 | if(setupLine(*primitives, lines[i], *drawCall)) |
690 | { |
691 | primitives += ms; |
692 | visible++; |
693 | } |
694 | } |
695 | } |
696 | |
697 | return visible; |
698 | } |
699 | |
700 | int DrawCall::setupPointTriangles(Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count) |
701 | { |
702 | auto& state = drawCall->setupState; |
703 | |
704 | int ms = state.multiSample; |
705 | int visible = 0; |
706 | |
707 | for(int i = 0; i < count; i++) |
708 | { |
709 | const Vertex &v0 = triangles[i].v0; |
710 | const Vertex &v1 = triangles[i].v1; |
711 | const Vertex &v2 = triangles[i].v2; |
712 | |
713 | float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + |
714 | (v0.x * v2.y - v0.y * v2.x) * v1.w + |
715 | (v2.x * v1.y - v1.x * v2.y) * v0.w; |
716 | |
717 | bool frontFacing = (state.frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE) ? (d > 0) : (d < 0); |
718 | if(state.cullMode & VK_CULL_MODE_FRONT_BIT) |
719 | { |
720 | if(frontFacing) continue; |
721 | } |
722 | if(state.cullMode & VK_CULL_MODE_BACK_BIT) |
723 | { |
724 | if(!frontFacing) continue; |
725 | } |
726 | |
727 | Triangle points[3]; |
728 | points[0].v0 = v0; |
729 | points[1].v0 = v1; |
730 | points[2].v0 = v2; |
731 | |
732 | for(int i = 0; i < 3; i++) |
733 | { |
734 | if(setupPoint(*primitives, points[i], *drawCall)) |
735 | { |
736 | primitives += ms; |
737 | visible++; |
738 | } |
739 | } |
740 | } |
741 | |
742 | return visible; |
743 | } |
744 | |
745 | int DrawCall::setupLines(Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count) |
746 | { |
747 | auto &state = drawCall->setupState; |
748 | |
749 | int visible = 0; |
750 | int ms = state.multiSample; |
751 | |
752 | for(int i = 0; i < count; i++) |
753 | { |
754 | if(setupLine(*primitives, *triangles, *drawCall)) |
755 | { |
756 | primitives += ms; |
757 | visible++; |
758 | } |
759 | |
760 | triangles++; |
761 | } |
762 | |
763 | return visible; |
764 | } |
765 | |
766 | int DrawCall::setupPoints(Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count) |
767 | { |
768 | auto &state = drawCall->setupState; |
769 | |
770 | int visible = 0; |
771 | int ms = state.multiSample; |
772 | |
773 | for(int i = 0; i < count; i++) |
774 | { |
775 | if(setupPoint(*primitives, *triangles, *drawCall)) |
776 | { |
777 | primitives += ms; |
778 | visible++; |
779 | } |
780 | |
781 | triangles++; |
782 | } |
783 | |
784 | return visible; |
785 | } |
786 | |
787 | bool DrawCall::setupLine(Primitive &primitive, Triangle &triangle, const DrawCall &draw) |
788 | { |
789 | const DrawData &data = *draw.data; |
790 | |
791 | float lineWidth = data.lineWidth; |
792 | |
793 | Vertex &v0 = triangle.v0; |
794 | Vertex &v1 = triangle.v1; |
795 | |
796 | const float4 &P0 = v0.position; |
797 | const float4 &P1 = v1.position; |
798 | |
799 | if(P0.w <= 0 && P1.w <= 0) |
800 | { |
801 | return false; |
802 | } |
803 | |
804 | constexpr float subPixF = vk::SUBPIXEL_PRECISION_FACTOR; |
805 | |
806 | const float W = data.WxF[0] * (1.0f / subPixF); |
807 | const float H = data.HxF[0] * (1.0f / subPixF); |
808 | |
809 | float dx = W * (P1.x / P1.w - P0.x / P0.w); |
810 | float dy = H * (P1.y / P1.w - P0.y / P0.w); |
811 | |
812 | if(dx == 0 && dy == 0) |
813 | { |
814 | return false; |
815 | } |
816 | |
817 | // We use rectangular lines for non-Bresenham lines (cf. 'strictLines'), |
818 | // and for Bresenham lines when multiSampling is enabled. |
819 | // FIXME(b/142965928): Bresenham lines should render the same with or without |
820 | // multisampling, which will require a special case in the |
821 | // code when multisampling is on. For now, we just use |
822 | // rectangular lines when multisampling is enabled. |
823 | if((draw.setupState.multiSample > 1) || |
824 | (draw.lineRasterizationMode != VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT)) |
825 | { |
826 | // Rectangle centered on the line segment |
827 | |
828 | float4 P[4]; |
829 | int C[4]; |
830 | |
831 | P[0] = P0; |
832 | P[1] = P1; |
833 | P[2] = P1; |
834 | P[3] = P0; |
835 | |
836 | float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy); |
837 | |
838 | dx *= scale; |
839 | dy *= scale; |
840 | |
841 | float dx0h = dx * P0.w / H; |
842 | float dy0w = dy * P0.w / W; |
843 | |
844 | float dx1h = dx * P1.w / H; |
845 | float dy1w = dy * P1.w / W; |
846 | |
847 | P[0].x += -dy0w; |
848 | P[0].y += +dx0h; |
849 | C[0] = Clipper::ComputeClipFlags(P[0]); |
850 | |
851 | P[1].x += -dy1w; |
852 | P[1].y += +dx1h; |
853 | C[1] = Clipper::ComputeClipFlags(P[1]); |
854 | |
855 | P[2].x += +dy1w; |
856 | P[2].y += -dx1h; |
857 | C[2] = Clipper::ComputeClipFlags(P[2]); |
858 | |
859 | P[3].x += +dy0w; |
860 | P[3].y += -dx0h; |
861 | C[3] = Clipper::ComputeClipFlags(P[3]); |
862 | |
863 | if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE) |
864 | { |
865 | Polygon polygon(P, 4); |
866 | |
867 | int clipFlagsOr = C[0] | C[1] | C[2] | C[3]; |
868 | |
869 | if(clipFlagsOr != Clipper::CLIP_FINITE) |
870 | { |
871 | if(!Clipper::Clip(polygon, clipFlagsOr, draw)) |
872 | { |
873 | return false; |
874 | } |
875 | } |
876 | |
877 | return draw.setupRoutine(&primitive, &triangle, &polygon, &data); |
878 | } |
879 | } |
880 | else if(false) // TODO(b/80135519): Deprecate |
881 | { |
882 | // Connecting diamonds polygon |
883 | // This shape satisfies the diamond test convention, except for the exit rule part. |
884 | // Line segments with overlapping endpoints have duplicate fragments. |
885 | // The ideal algorithm requires half-open line rasterization (b/80135519). |
886 | |
887 | float4 P[8]; |
888 | int C[8]; |
889 | |
890 | P[0] = P0; |
891 | P[1] = P0; |
892 | P[2] = P0; |
893 | P[3] = P0; |
894 | P[4] = P1; |
895 | P[5] = P1; |
896 | P[6] = P1; |
897 | P[7] = P1; |
898 | |
899 | float dx0 = lineWidth * 0.5f * P0.w / W; |
900 | float dy0 = lineWidth * 0.5f * P0.w / H; |
901 | |
902 | float dx1 = lineWidth * 0.5f * P1.w / W; |
903 | float dy1 = lineWidth * 0.5f * P1.w / H; |
904 | |
905 | P[0].x += -dx0; |
906 | C[0] = Clipper::ComputeClipFlags(P[0]); |
907 | |
908 | P[1].y += +dy0; |
909 | C[1] = Clipper::ComputeClipFlags(P[1]); |
910 | |
911 | P[2].x += +dx0; |
912 | C[2] = Clipper::ComputeClipFlags(P[2]); |
913 | |
914 | P[3].y += -dy0; |
915 | C[3] = Clipper::ComputeClipFlags(P[3]); |
916 | |
917 | P[4].x += -dx1; |
918 | C[4] = Clipper::ComputeClipFlags(P[4]); |
919 | |
920 | P[5].y += +dy1; |
921 | C[5] = Clipper::ComputeClipFlags(P[5]); |
922 | |
923 | P[6].x += +dx1; |
924 | C[6] = Clipper::ComputeClipFlags(P[6]); |
925 | |
926 | P[7].y += -dy1; |
927 | C[7] = Clipper::ComputeClipFlags(P[7]); |
928 | |
929 | if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE) |
930 | { |
931 | float4 L[6]; |
932 | |
933 | if(dx > -dy) |
934 | { |
935 | if(dx > dy) // Right |
936 | { |
937 | L[0] = P[0]; |
938 | L[1] = P[1]; |
939 | L[2] = P[5]; |
940 | L[3] = P[6]; |
941 | L[4] = P[7]; |
942 | L[5] = P[3]; |
943 | } |
944 | else // Down |
945 | { |
946 | L[0] = P[0]; |
947 | L[1] = P[4]; |
948 | L[2] = P[5]; |
949 | L[3] = P[6]; |
950 | L[4] = P[2]; |
951 | L[5] = P[3]; |
952 | } |
953 | } |
954 | else |
955 | { |
956 | if(dx > dy) // Up |
957 | { |
958 | L[0] = P[0]; |
959 | L[1] = P[1]; |
960 | L[2] = P[2]; |
961 | L[3] = P[6]; |
962 | L[4] = P[7]; |
963 | L[5] = P[4]; |
964 | } |
965 | else // Left |
966 | { |
967 | L[0] = P[1]; |
968 | L[1] = P[2]; |
969 | L[2] = P[3]; |
970 | L[3] = P[7]; |
971 | L[4] = P[4]; |
972 | L[5] = P[5]; |
973 | } |
974 | } |
975 | |
976 | Polygon polygon(L, 6); |
977 | |
978 | int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7]; |
979 | |
980 | if(clipFlagsOr != Clipper::CLIP_FINITE) |
981 | { |
982 | if(!Clipper::Clip(polygon, clipFlagsOr, draw)) |
983 | { |
984 | return false; |
985 | } |
986 | } |
987 | |
988 | return draw.setupRoutine(&primitive, &triangle, &polygon, &data); |
989 | } |
990 | } |
991 | else |
992 | { |
993 | // Parallelogram approximating Bresenham line |
994 | // This algorithm does not satisfy the ideal diamond-exit rule, but does avoid the |
995 | // duplicate fragment rasterization problem and satisfies all of Vulkan's minimum |
996 | // requirements for Bresenham line segment rasterization. |
997 | |
998 | float4 P[8]; |
999 | P[0] = P0; |
1000 | P[1] = P0; |
1001 | P[2] = P0; |
1002 | P[3] = P0; |
1003 | P[4] = P1; |
1004 | P[5] = P1; |
1005 | P[6] = P1; |
1006 | P[7] = P1; |
1007 | |
1008 | float dx0 = lineWidth * 0.5f * P0.w / W; |
1009 | float dy0 = lineWidth * 0.5f * P0.w / H; |
1010 | |
1011 | float dx1 = lineWidth * 0.5f * P1.w / W; |
1012 | float dy1 = lineWidth * 0.5f * P1.w / H; |
1013 | |
1014 | P[0].x += -dx0; |
1015 | P[1].y += +dy0; |
1016 | P[2].x += +dx0; |
1017 | P[3].y += -dy0; |
1018 | P[4].x += -dx1; |
1019 | P[5].y += +dy1; |
1020 | P[6].x += +dx1; |
1021 | P[7].y += -dy1; |
1022 | |
1023 | float4 L[4]; |
1024 | |
1025 | if(dx > -dy) |
1026 | { |
1027 | if(dx > dy) // Right |
1028 | { |
1029 | L[0] = P[1]; |
1030 | L[1] = P[5]; |
1031 | L[2] = P[7]; |
1032 | L[3] = P[3]; |
1033 | } |
1034 | else // Down |
1035 | { |
1036 | L[0] = P[0]; |
1037 | L[1] = P[4]; |
1038 | L[2] = P[6]; |
1039 | L[3] = P[2]; |
1040 | } |
1041 | } |
1042 | else |
1043 | { |
1044 | if(dx > dy) // Up |
1045 | { |
1046 | L[0] = P[0]; |
1047 | L[1] = P[2]; |
1048 | L[2] = P[6]; |
1049 | L[3] = P[4]; |
1050 | } |
1051 | else // Left |
1052 | { |
1053 | L[0] = P[1]; |
1054 | L[1] = P[3]; |
1055 | L[2] = P[7]; |
1056 | L[3] = P[5]; |
1057 | } |
1058 | } |
1059 | |
1060 | int C0 = Clipper::ComputeClipFlags(L[0]); |
1061 | int C1 = Clipper::ComputeClipFlags(L[1]); |
1062 | int C2 = Clipper::ComputeClipFlags(L[2]); |
1063 | int C3 = Clipper::ComputeClipFlags(L[3]); |
1064 | |
1065 | if((C0 & C1 & C2 & C3) == Clipper::CLIP_FINITE) |
1066 | { |
1067 | Polygon polygon(L, 4); |
1068 | |
1069 | int clipFlagsOr = C0 | C1 | C2 | C3; |
1070 | |
1071 | if(clipFlagsOr != Clipper::CLIP_FINITE) |
1072 | { |
1073 | if(!Clipper::Clip(polygon, clipFlagsOr, draw)) |
1074 | { |
1075 | return false; |
1076 | } |
1077 | } |
1078 | |
1079 | return draw.setupRoutine(&primitive, &triangle, &polygon, &data); |
1080 | } |
1081 | } |
1082 | |
1083 | return false; |
1084 | } |
1085 | |
1086 | bool DrawCall::setupPoint(Primitive &primitive, Triangle &triangle, const DrawCall &draw) |
1087 | { |
1088 | const DrawData &data = *draw.data; |
1089 | |
1090 | Vertex &v = triangle.v0; |
1091 | |
1092 | float pSize = v.pointSize; |
1093 | |
1094 | pSize = clamp(pSize, 1.0f, static_cast<float>(vk::MAX_POINT_SIZE)); |
1095 | |
1096 | float4 P[4]; |
1097 | int C[4]; |
1098 | |
1099 | P[0] = v.position; |
1100 | P[1] = v.position; |
1101 | P[2] = v.position; |
1102 | P[3] = v.position; |
1103 | |
1104 | const float X = pSize * P[0].w * data.halfPixelX[0]; |
1105 | const float Y = pSize * P[0].w * data.halfPixelY[0]; |
1106 | |
1107 | P[0].x -= X; |
1108 | P[0].y += Y; |
1109 | C[0] = Clipper::ComputeClipFlags(P[0]); |
1110 | |
1111 | P[1].x += X; |
1112 | P[1].y += Y; |
1113 | C[1] = Clipper::ComputeClipFlags(P[1]); |
1114 | |
1115 | P[2].x += X; |
1116 | P[2].y -= Y; |
1117 | C[2] = Clipper::ComputeClipFlags(P[2]); |
1118 | |
1119 | P[3].x -= X; |
1120 | P[3].y -= Y; |
1121 | C[3] = Clipper::ComputeClipFlags(P[3]); |
1122 | |
1123 | Polygon polygon(P, 4); |
1124 | |
1125 | if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE) |
1126 | { |
1127 | int clipFlagsOr = C[0] | C[1] | C[2] | C[3]; |
1128 | |
1129 | if(clipFlagsOr != Clipper::CLIP_FINITE) |
1130 | { |
1131 | if(!Clipper::Clip(polygon, clipFlagsOr, draw)) |
1132 | { |
1133 | return false; |
1134 | } |
1135 | } |
1136 | |
1137 | triangle.v1 = triangle.v0; |
1138 | triangle.v2 = triangle.v0; |
1139 | |
1140 | constexpr float subPixF = vk::SUBPIXEL_PRECISION_FACTOR; |
1141 | |
1142 | triangle.v1.projected.x += iround(subPixF * 0.5f * pSize); |
1143 | triangle.v2.projected.y -= iround(subPixF * 0.5f * pSize) * (data.HxF[0] > 0.0f ? 1 : -1); // Both Direct3D and OpenGL expect (0, 0) in the top-left corner |
1144 | return draw.setupRoutine(&primitive, &triangle, &polygon, &data); |
1145 | } |
1146 | |
1147 | return false; |
1148 | } |
1149 | |
1150 | void Renderer::addQuery(vk::Query *query) |
1151 | { |
1152 | ASSERT(query->getType() == VK_QUERY_TYPE_OCCLUSION); |
1153 | ASSERT(!occlusionQuery); |
1154 | |
1155 | occlusionQuery = query; |
1156 | } |
1157 | |
1158 | void Renderer::removeQuery(vk::Query *query) |
1159 | { |
1160 | ASSERT(query->getType() == VK_QUERY_TYPE_OCCLUSION); |
1161 | ASSERT(occlusionQuery == query); |
1162 | |
1163 | occlusionQuery = nullptr; |
1164 | } |
1165 | |
1166 | void Renderer::advanceInstanceAttributes(Stream* inputs) |
1167 | { |
1168 | for(uint32_t i = 0; i < vk::MAX_VERTEX_INPUT_BINDINGS; i++) |
1169 | { |
1170 | auto &attrib = inputs[i]; |
1171 | if (attrib.count && attrib.instanceStride && (attrib.instanceStride < attrib.robustnessSize)) |
1172 | { |
1173 | // Under the casts: attrib.buffer += attrib.instanceStride |
1174 | attrib.buffer = (void const *)((uintptr_t)attrib.buffer + attrib.instanceStride); |
1175 | attrib.robustnessSize -= attrib.instanceStride; |
1176 | } |
1177 | } |
1178 | } |
1179 | |
1180 | void Renderer::setViewport(const VkViewport &viewport) |
1181 | { |
1182 | this->viewport = viewport; |
1183 | } |
1184 | |
1185 | void Renderer::setScissor(const VkRect2D &scissor) |
1186 | { |
1187 | this->scissor = scissor; |
1188 | } |
1189 | |
1190 | } |
1191 | |