1// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
2#include "meshoptimizer.h"
3
4#include <assert.h>
5#include <float.h>
6#include <math.h>
7#include <string.h>
8
9// This work is based on:
10// Graham Wihlidal. Optimizing the Graphics Pipeline with Compute. 2016
11// Matthaeus Chajdas. GeometryFX 1.2 - Cluster Culling. 2016
12// Jack Ritter. An Efficient Bounding Sphere. 1990
13namespace meshopt
14{
15
16// This must be <= 255 since index 0xff is used internally to indice a vertex that doesn't belong to a meshlet
17const size_t kMeshletMaxVertices = 255;
18
19// A reasonable limit is around 2*max_vertices or less
20const size_t kMeshletMaxTriangles = 512;
21
22struct TriangleAdjacency2
23{
24 unsigned int* counts;
25 unsigned int* offsets;
26 unsigned int* data;
27};
28
29static void buildTriangleAdjacency(TriangleAdjacency2& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count, meshopt_Allocator& allocator)
30{
31 size_t face_count = index_count / 3;
32
33 // allocate arrays
34 adjacency.counts = allocator.allocate<unsigned int>(vertex_count);
35 adjacency.offsets = allocator.allocate<unsigned int>(vertex_count);
36 adjacency.data = allocator.allocate<unsigned int>(index_count);
37
38 // fill triangle counts
39 memset(adjacency.counts, 0, vertex_count * sizeof(unsigned int));
40
41 for (size_t i = 0; i < index_count; ++i)
42 {
43 assert(indices[i] < vertex_count);
44
45 adjacency.counts[indices[i]]++;
46 }
47
48 // fill offset table
49 unsigned int offset = 0;
50
51 for (size_t i = 0; i < vertex_count; ++i)
52 {
53 adjacency.offsets[i] = offset;
54 offset += adjacency.counts[i];
55 }
56
57 assert(offset == index_count);
58
59 // fill triangle data
60 for (size_t i = 0; i < face_count; ++i)
61 {
62 unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
63
64 adjacency.data[adjacency.offsets[a]++] = unsigned(i);
65 adjacency.data[adjacency.offsets[b]++] = unsigned(i);
66 adjacency.data[adjacency.offsets[c]++] = unsigned(i);
67 }
68
69 // fix offsets that have been disturbed by the previous pass
70 for (size_t i = 0; i < vertex_count; ++i)
71 {
72 assert(adjacency.offsets[i] >= adjacency.counts[i]);
73
74 adjacency.offsets[i] -= adjacency.counts[i];
75 }
76}
77
78static void computeBoundingSphere(float result[4], const float points[][3], size_t count)
79{
80 assert(count > 0);
81
82 // find extremum points along all 3 axes; for each axis we get a pair of points with min/max coordinates
83 size_t pmin[3] = {0, 0, 0};
84 size_t pmax[3] = {0, 0, 0};
85
86 for (size_t i = 0; i < count; ++i)
87 {
88 const float* p = points[i];
89
90 for (int axis = 0; axis < 3; ++axis)
91 {
92 pmin[axis] = (p[axis] < points[pmin[axis]][axis]) ? i : pmin[axis];
93 pmax[axis] = (p[axis] > points[pmax[axis]][axis]) ? i : pmax[axis];
94 }
95 }
96
97 // find the pair of points with largest distance
98 float paxisd2 = 0;
99 int paxis = 0;
100
101 for (int axis = 0; axis < 3; ++axis)
102 {
103 const float* p1 = points[pmin[axis]];
104 const float* p2 = points[pmax[axis]];
105
106 float d2 = (p2[0] - p1[0]) * (p2[0] - p1[0]) + (p2[1] - p1[1]) * (p2[1] - p1[1]) + (p2[2] - p1[2]) * (p2[2] - p1[2]);
107
108 if (d2 > paxisd2)
109 {
110 paxisd2 = d2;
111 paxis = axis;
112 }
113 }
114
115 // use the longest segment as the initial sphere diameter
116 const float* p1 = points[pmin[paxis]];
117 const float* p2 = points[pmax[paxis]];
118
119 float center[3] = {(p1[0] + p2[0]) / 2, (p1[1] + p2[1]) / 2, (p1[2] + p2[2]) / 2};
120 float radius = sqrtf(paxisd2) / 2;
121
122 // iteratively adjust the sphere up until all points fit
123 for (size_t i = 0; i < count; ++i)
124 {
125 const float* p = points[i];
126 float d2 = (p[0] - center[0]) * (p[0] - center[0]) + (p[1] - center[1]) * (p[1] - center[1]) + (p[2] - center[2]) * (p[2] - center[2]);
127
128 if (d2 > radius * radius)
129 {
130 float d = sqrtf(d2);
131 assert(d > 0);
132
133 float k = 0.5f + (radius / d) / 2;
134
135 center[0] = center[0] * k + p[0] * (1 - k);
136 center[1] = center[1] * k + p[1] * (1 - k);
137 center[2] = center[2] * k + p[2] * (1 - k);
138 radius = (radius + d) / 2;
139 }
140 }
141
142 result[0] = center[0];
143 result[1] = center[1];
144 result[2] = center[2];
145 result[3] = radius;
146}
147
148struct Cone
149{
150 float px, py, pz;
151 float nx, ny, nz;
152};
153
154static float getMeshletScore(float distance2, float spread, float cone_weight, float expected_radius)
155{
156 float cone = 1.f - spread * cone_weight;
157 float cone_clamped = cone < 1e-3f ? 1e-3f : cone;
158
159 return (1 + sqrtf(distance2) / expected_radius * (1 - cone_weight)) * cone_clamped;
160}
161
162static Cone getMeshletCone(const Cone& acc, unsigned int triangle_count)
163{
164 Cone result = acc;
165
166 float center_scale = triangle_count == 0 ? 0.f : 1.f / float(triangle_count);
167
168 result.px *= center_scale;
169 result.py *= center_scale;
170 result.pz *= center_scale;
171
172 float axis_length = result.nx * result.nx + result.ny * result.ny + result.nz * result.nz;
173 float axis_scale = axis_length == 0.f ? 0.f : 1.f / sqrtf(axis_length);
174
175 result.nx *= axis_scale;
176 result.ny *= axis_scale;
177 result.nz *= axis_scale;
178
179 return result;
180}
181
182static float computeTriangleCones(Cone* triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
183{
184 (void)vertex_count;
185
186 size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
187 size_t face_count = index_count / 3;
188
189 float mesh_area = 0;
190
191 for (size_t i = 0; i < face_count; ++i)
192 {
193 unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
194 assert(a < vertex_count && b < vertex_count && c < vertex_count);
195
196 const float* p0 = vertex_positions + vertex_stride_float * a;
197 const float* p1 = vertex_positions + vertex_stride_float * b;
198 const float* p2 = vertex_positions + vertex_stride_float * c;
199
200 float p10[3] = {p1[0] - p0[0], p1[1] - p0[1], p1[2] - p0[2]};
201 float p20[3] = {p2[0] - p0[0], p2[1] - p0[1], p2[2] - p0[2]};
202
203 float normalx = p10[1] * p20[2] - p10[2] * p20[1];
204 float normaly = p10[2] * p20[0] - p10[0] * p20[2];
205 float normalz = p10[0] * p20[1] - p10[1] * p20[0];
206
207 float area = sqrtf(normalx * normalx + normaly * normaly + normalz * normalz);
208 float invarea = (area == 0.f) ? 0.f : 1.f / area;
209
210 triangles[i].px = (p0[0] + p1[0] + p2[0]) / 3.f;
211 triangles[i].py = (p0[1] + p1[1] + p2[1]) / 3.f;
212 triangles[i].pz = (p0[2] + p1[2] + p2[2]) / 3.f;
213
214 triangles[i].nx = normalx * invarea;
215 triangles[i].ny = normaly * invarea;
216 triangles[i].nz = normalz * invarea;
217
218 mesh_area += area;
219 }
220
221 return mesh_area;
222}
223
224static void finishMeshlet(meshopt_Meshlet& meshlet, unsigned char* meshlet_triangles)
225{
226 size_t offset = meshlet.triangle_offset + meshlet.triangle_count * 3;
227
228 // fill 4b padding with 0
229 while (offset & 3)
230 meshlet_triangles[offset++] = 0;
231}
232
233static bool appendMeshlet(meshopt_Meshlet& meshlet, unsigned int a, unsigned int b, unsigned int c, unsigned char* used, meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, size_t meshlet_offset, size_t max_vertices, size_t max_triangles)
234{
235 unsigned char& av = used[a];
236 unsigned char& bv = used[b];
237 unsigned char& cv = used[c];
238
239 bool result = false;
240
241 unsigned int used_extra = (av == 0xff) + (bv == 0xff) + (cv == 0xff);
242
243 if (meshlet.vertex_count + used_extra > max_vertices || meshlet.triangle_count >= max_triangles)
244 {
245 meshlets[meshlet_offset] = meshlet;
246
247 for (size_t j = 0; j < meshlet.vertex_count; ++j)
248 used[meshlet_vertices[meshlet.vertex_offset + j]] = 0xff;
249
250 finishMeshlet(meshlet, meshlet_triangles);
251
252 meshlet.vertex_offset += meshlet.vertex_count;
253 meshlet.triangle_offset += (meshlet.triangle_count * 3 + 3) & ~3; // 4b padding
254 meshlet.vertex_count = 0;
255 meshlet.triangle_count = 0;
256
257 result = true;
258 }
259
260 if (av == 0xff)
261 {
262 av = (unsigned char)meshlet.vertex_count;
263 meshlet_vertices[meshlet.vertex_offset + meshlet.vertex_count++] = a;
264 }
265
266 if (bv == 0xff)
267 {
268 bv = (unsigned char)meshlet.vertex_count;
269 meshlet_vertices[meshlet.vertex_offset + meshlet.vertex_count++] = b;
270 }
271
272 if (cv == 0xff)
273 {
274 cv = (unsigned char)meshlet.vertex_count;
275 meshlet_vertices[meshlet.vertex_offset + meshlet.vertex_count++] = c;
276 }
277
278 meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 0] = av;
279 meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 1] = bv;
280 meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 2] = cv;
281 meshlet.triangle_count++;
282
283 return result;
284}
285
286static unsigned int getNeighborTriangle(const meshopt_Meshlet& meshlet, const Cone* meshlet_cone, unsigned int* meshlet_vertices, const unsigned int* indices, const TriangleAdjacency2& adjacency, const Cone* triangles, const unsigned int* live_triangles, const unsigned char* used, float meshlet_expected_radius, float cone_weight, unsigned int* out_extra)
287{
288 unsigned int best_triangle = ~0u;
289 unsigned int best_extra = 5;
290 float best_score = FLT_MAX;
291
292 for (size_t i = 0; i < meshlet.vertex_count; ++i)
293 {
294 unsigned int index = meshlet_vertices[meshlet.vertex_offset + i];
295
296 unsigned int* neighbors = &adjacency.data[0] + adjacency.offsets[index];
297 size_t neighbors_size = adjacency.counts[index];
298
299 for (size_t j = 0; j < neighbors_size; ++j)
300 {
301 unsigned int triangle = neighbors[j];
302 unsigned int a = indices[triangle * 3 + 0], b = indices[triangle * 3 + 1], c = indices[triangle * 3 + 2];
303
304 unsigned int extra = (used[a] == 0xff) + (used[b] == 0xff) + (used[c] == 0xff);
305
306 // triangles that don't add new vertices to meshlets are max. priority
307 if (extra != 0)
308 {
309 // artificially increase the priority of dangling triangles as they're expensive to add to new meshlets
310 if (live_triangles[a] == 1 || live_triangles[b] == 1 || live_triangles[c] == 1)
311 extra = 0;
312
313 extra++;
314 }
315
316 // since topology-based priority is always more important than the score, we can skip scoring in some cases
317 if (extra > best_extra)
318 continue;
319
320 float score = 0;
321
322 // caller selects one of two scoring functions: geometrical (based on meshlet cone) or topological (based on remaining triangles)
323 if (meshlet_cone)
324 {
325 const Cone& tri_cone = triangles[triangle];
326
327 float distance2 =
328 (tri_cone.px - meshlet_cone->px) * (tri_cone.px - meshlet_cone->px) +
329 (tri_cone.py - meshlet_cone->py) * (tri_cone.py - meshlet_cone->py) +
330 (tri_cone.pz - meshlet_cone->pz) * (tri_cone.pz - meshlet_cone->pz);
331
332 float spread = tri_cone.nx * meshlet_cone->nx + tri_cone.ny * meshlet_cone->ny + tri_cone.nz * meshlet_cone->nz;
333
334 score = getMeshletScore(distance2, spread, cone_weight, meshlet_expected_radius);
335 }
336 else
337 {
338 // each live_triangles entry is >= 1 since it includes the current triangle we're processing
339 score = float(live_triangles[a] + live_triangles[b] + live_triangles[c] - 3);
340 }
341
342 // note that topology-based priority is always more important than the score
343 // this helps maintain reasonable effectiveness of meshlet data and reduces scoring cost
344 if (extra < best_extra || score < best_score)
345 {
346 best_triangle = triangle;
347 best_extra = extra;
348 best_score = score;
349 }
350 }
351 }
352
353 if (out_extra)
354 *out_extra = best_extra;
355
356 return best_triangle;
357}
358
359struct KDNode
360{
361 union
362 {
363 float split;
364 unsigned int index;
365 };
366
367 // leaves: axis = 3, children = number of extra points after this one (0 if 'index' is the only point)
368 // branches: axis != 3, left subtree = skip 1, right subtree = skip 1+children
369 unsigned int axis : 2;
370 unsigned int children : 30;
371};
372
373static size_t kdtreePartition(unsigned int* indices, size_t count, const float* points, size_t stride, unsigned int axis, float pivot)
374{
375 size_t m = 0;
376
377 // invariant: elements in range [0, m) are < pivot, elements in range [m, i) are >= pivot
378 for (size_t i = 0; i < count; ++i)
379 {
380 float v = points[indices[i] * stride + axis];
381
382 // swap(m, i) unconditionally
383 unsigned int t = indices[m];
384 indices[m] = indices[i];
385 indices[i] = t;
386
387 // when v >= pivot, we swap i with m without advancing it, preserving invariants
388 m += v < pivot;
389 }
390
391 return m;
392}
393
394static size_t kdtreeBuildLeaf(size_t offset, KDNode* nodes, size_t node_count, unsigned int* indices, size_t count)
395{
396 assert(offset + count <= node_count);
397 (void)node_count;
398
399 KDNode& result = nodes[offset];
400
401 result.index = indices[0];
402 result.axis = 3;
403 result.children = unsigned(count - 1);
404
405 // all remaining points are stored in nodes immediately following the leaf
406 for (size_t i = 1; i < count; ++i)
407 {
408 KDNode& tail = nodes[offset + i];
409
410 tail.index = indices[i];
411 tail.axis = 3;
412 tail.children = ~0u >> 2; // bogus value to prevent misuse
413 }
414
415 return offset + count;
416}
417
418static size_t kdtreeBuild(size_t offset, KDNode* nodes, size_t node_count, const float* points, size_t stride, unsigned int* indices, size_t count, size_t leaf_size)
419{
420 assert(count > 0);
421 assert(offset < node_count);
422
423 if (count <= leaf_size)
424 return kdtreeBuildLeaf(offset, nodes, node_count, indices, count);
425
426 float mean[3] = {};
427 float vars[3] = {};
428 float runc = 1, runs = 1;
429
430 // gather statistics on the points in the subtree using Welford's algorithm
431 for (size_t i = 0; i < count; ++i, runc += 1.f, runs = 1.f / runc)
432 {
433 const float* point = points + indices[i] * stride;
434
435 for (int k = 0; k < 3; ++k)
436 {
437 float delta = point[k] - mean[k];
438 mean[k] += delta * runs;
439 vars[k] += delta * (point[k] - mean[k]);
440 }
441 }
442
443 // split axis is one where the variance is largest
444 unsigned int axis = vars[0] >= vars[1] && vars[0] >= vars[2] ? 0 : vars[1] >= vars[2] ? 1 : 2;
445
446 float split = mean[axis];
447 size_t middle = kdtreePartition(indices, count, points, stride, axis, split);
448
449 // when the partition is degenerate simply consolidate the points into a single node
450 if (middle <= leaf_size / 2 || middle >= count - leaf_size / 2)
451 return kdtreeBuildLeaf(offset, nodes, node_count, indices, count);
452
453 KDNode& result = nodes[offset];
454
455 result.split = split;
456 result.axis = axis;
457
458 // left subtree is right after our node
459 size_t next_offset = kdtreeBuild(offset + 1, nodes, node_count, points, stride, indices, middle, leaf_size);
460
461 // distance to the right subtree is represented explicitly
462 result.children = unsigned(next_offset - offset - 1);
463
464 return kdtreeBuild(next_offset, nodes, node_count, points, stride, indices + middle, count - middle, leaf_size);
465}
466
467static void kdtreeNearest(KDNode* nodes, unsigned int root, const float* points, size_t stride, const unsigned char* emitted_flags, const float* position, unsigned int& result, float& limit)
468{
469 const KDNode& node = nodes[root];
470
471 if (node.axis == 3)
472 {
473 // leaf
474 for (unsigned int i = 0; i <= node.children; ++i)
475 {
476 unsigned int index = nodes[root + i].index;
477
478 if (emitted_flags[index])
479 continue;
480
481 const float* point = points + index * stride;
482
483 float distance2 =
484 (point[0] - position[0]) * (point[0] - position[0]) +
485 (point[1] - position[1]) * (point[1] - position[1]) +
486 (point[2] - position[2]) * (point[2] - position[2]);
487 float distance = sqrtf(distance2);
488
489 if (distance < limit)
490 {
491 result = index;
492 limit = distance;
493 }
494 }
495 }
496 else
497 {
498 // branch; we order recursion to process the node that search position is in first
499 float delta = position[node.axis] - node.split;
500 unsigned int first = (delta <= 0) ? 0 : node.children;
501 unsigned int second = first ^ node.children;
502
503 kdtreeNearest(nodes, root + 1 + first, points, stride, emitted_flags, position, result, limit);
504
505 // only process the other node if it can have a match based on closest distance so far
506 if (fabsf(delta) <= limit)
507 kdtreeNearest(nodes, root + 1 + second, points, stride, emitted_flags, position, result, limit);
508 }
509}
510
511} // namespace meshopt
512
513size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_t max_triangles)
514{
515 using namespace meshopt;
516
517 assert(index_count % 3 == 0);
518 assert(max_vertices >= 3 && max_vertices <= kMeshletMaxVertices);
519 assert(max_triangles >= 1 && max_triangles <= kMeshletMaxTriangles);
520 assert(max_triangles % 4 == 0); // ensures the caller will compute output space properly as index data is 4b aligned
521
522 (void)kMeshletMaxVertices;
523 (void)kMeshletMaxTriangles;
524
525 // meshlet construction is limited by max vertices and max triangles per meshlet
526 // the worst case is that the input is an unindexed stream since this equally stresses both limits
527 // note that we assume that in the worst case, we leave 2 vertices unpacked in each meshlet - if we have space for 3 we can pack any triangle
528 size_t max_vertices_conservative = max_vertices - 2;
529 size_t meshlet_limit_vertices = (index_count + max_vertices_conservative - 1) / max_vertices_conservative;
530 size_t meshlet_limit_triangles = (index_count / 3 + max_triangles - 1) / max_triangles;
531
532 return meshlet_limit_vertices > meshlet_limit_triangles ? meshlet_limit_vertices : meshlet_limit_triangles;
533}
534
535size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight)
536{
537 using namespace meshopt;
538
539 assert(index_count % 3 == 0);
540 assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
541 assert(vertex_positions_stride % sizeof(float) == 0);
542
543 assert(max_vertices >= 3 && max_vertices <= kMeshletMaxVertices);
544 assert(max_triangles >= 1 && max_triangles <= kMeshletMaxTriangles);
545 assert(max_triangles % 4 == 0); // ensures the caller will compute output space properly as index data is 4b aligned
546
547 assert(cone_weight >= 0 && cone_weight <= 1);
548
549 meshopt_Allocator allocator;
550
551 TriangleAdjacency2 adjacency = {};
552 buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator);
553
554 unsigned int* live_triangles = allocator.allocate<unsigned int>(vertex_count);
555 memcpy(live_triangles, adjacency.counts, vertex_count * sizeof(unsigned int));
556
557 size_t face_count = index_count / 3;
558
559 unsigned char* emitted_flags = allocator.allocate<unsigned char>(face_count);
560 memset(emitted_flags, 0, face_count);
561
562 // for each triangle, precompute centroid & normal to use for scoring
563 Cone* triangles = allocator.allocate<Cone>(face_count);
564 float mesh_area = computeTriangleCones(triangles, indices, index_count, vertex_positions, vertex_count, vertex_positions_stride);
565
566 // assuming each meshlet is a square patch, expected radius is sqrt(expected area)
567 float triangle_area_avg = face_count == 0 ? 0.f : mesh_area / float(face_count) * 0.5f;
568 float meshlet_expected_radius = sqrtf(triangle_area_avg * max_triangles) * 0.5f;
569
570 // build a kd-tree for nearest neighbor lookup
571 unsigned int* kdindices = allocator.allocate<unsigned int>(face_count);
572 for (size_t i = 0; i < face_count; ++i)
573 kdindices[i] = unsigned(i);
574
575 KDNode* nodes = allocator.allocate<KDNode>(face_count * 2);
576 kdtreeBuild(0, nodes, face_count * 2, &triangles[0].px, sizeof(Cone) / sizeof(float), kdindices, face_count, /* leaf_size= */ 8);
577
578 // index of the vertex in the meshlet, 0xff if the vertex isn't used
579 unsigned char* used = allocator.allocate<unsigned char>(vertex_count);
580 memset(used, -1, vertex_count);
581
582 meshopt_Meshlet meshlet = {};
583 size_t meshlet_offset = 0;
584
585 Cone meshlet_cone_acc = {};
586
587 for (;;)
588 {
589 Cone meshlet_cone = getMeshletCone(meshlet_cone_acc, meshlet.triangle_count);
590
591 unsigned int best_extra = 0;
592 unsigned int best_triangle = getNeighborTriangle(meshlet, &meshlet_cone, meshlet_vertices, indices, adjacency, triangles, live_triangles, used, meshlet_expected_radius, cone_weight, &best_extra);
593
594 // if the best triangle doesn't fit into current meshlet, the spatial scoring we've used is not very meaningful, so we re-select using topological scoring
595 if (best_triangle != ~0u && (meshlet.vertex_count + best_extra > max_vertices || meshlet.triangle_count >= max_triangles))
596 {
597 best_triangle = getNeighborTriangle(meshlet, NULL, meshlet_vertices, indices, adjacency, triangles, live_triangles, used, meshlet_expected_radius, 0.f, NULL);
598 }
599
600 // when we run out of neighboring triangles we need to switch to spatial search; we currently just pick the closest triangle irrespective of connectivity
601 if (best_triangle == ~0u)
602 {
603 float position[3] = {meshlet_cone.px, meshlet_cone.py, meshlet_cone.pz};
604 unsigned int index = ~0u;
605 float limit = FLT_MAX;
606
607 kdtreeNearest(nodes, 0, &triangles[0].px, sizeof(Cone) / sizeof(float), emitted_flags, position, index, limit);
608
609 best_triangle = index;
610 }
611
612 if (best_triangle == ~0u)
613 break;
614
615 unsigned int a = indices[best_triangle * 3 + 0], b = indices[best_triangle * 3 + 1], c = indices[best_triangle * 3 + 2];
616 assert(a < vertex_count && b < vertex_count && c < vertex_count);
617
618 // add meshlet to the output; when the current meshlet is full we reset the accumulated bounds
619 if (appendMeshlet(meshlet, a, b, c, used, meshlets, meshlet_vertices, meshlet_triangles, meshlet_offset, max_vertices, max_triangles))
620 {
621 meshlet_offset++;
622 memset(&meshlet_cone_acc, 0, sizeof(meshlet_cone_acc));
623 }
624
625 live_triangles[a]--;
626 live_triangles[b]--;
627 live_triangles[c]--;
628
629 // remove emitted triangle from adjacency data
630 // this makes sure that we spend less time traversing these lists on subsequent iterations
631 for (size_t k = 0; k < 3; ++k)
632 {
633 unsigned int index = indices[best_triangle * 3 + k];
634
635 unsigned int* neighbors = &adjacency.data[0] + adjacency.offsets[index];
636 size_t neighbors_size = adjacency.counts[index];
637
638 for (size_t i = 0; i < neighbors_size; ++i)
639 {
640 unsigned int tri = neighbors[i];
641
642 if (tri == best_triangle)
643 {
644 neighbors[i] = neighbors[neighbors_size - 1];
645 adjacency.counts[index]--;
646 break;
647 }
648 }
649 }
650
651 // update aggregated meshlet cone data for scoring subsequent triangles
652 meshlet_cone_acc.px += triangles[best_triangle].px;
653 meshlet_cone_acc.py += triangles[best_triangle].py;
654 meshlet_cone_acc.pz += triangles[best_triangle].pz;
655 meshlet_cone_acc.nx += triangles[best_triangle].nx;
656 meshlet_cone_acc.ny += triangles[best_triangle].ny;
657 meshlet_cone_acc.nz += triangles[best_triangle].nz;
658
659 emitted_flags[best_triangle] = 1;
660 }
661
662 if (meshlet.triangle_count)
663 {
664 finishMeshlet(meshlet, meshlet_triangles);
665
666 meshlets[meshlet_offset++] = meshlet;
667 }
668
669 assert(meshlet_offset <= meshopt_buildMeshletsBound(index_count, max_vertices, max_triangles));
670 return meshlet_offset;
671}
672
673size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles)
674{
675 using namespace meshopt;
676
677 assert(index_count % 3 == 0);
678
679 assert(max_vertices >= 3 && max_vertices <= kMeshletMaxVertices);
680 assert(max_triangles >= 1 && max_triangles <= kMeshletMaxTriangles);
681 assert(max_triangles % 4 == 0); // ensures the caller will compute output space properly as index data is 4b aligned
682
683 meshopt_Allocator allocator;
684
685 // index of the vertex in the meshlet, 0xff if the vertex isn't used
686 unsigned char* used = allocator.allocate<unsigned char>(vertex_count);
687 memset(used, -1, vertex_count);
688
689 meshopt_Meshlet meshlet = {};
690 size_t meshlet_offset = 0;
691
692 for (size_t i = 0; i < index_count; i += 3)
693 {
694 unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2];
695 assert(a < vertex_count && b < vertex_count && c < vertex_count);
696
697 // appends triangle to the meshlet and writes previous meshlet to the output if full
698 meshlet_offset += appendMeshlet(meshlet, a, b, c, used, meshlets, meshlet_vertices, meshlet_triangles, meshlet_offset, max_vertices, max_triangles);
699 }
700
701 if (meshlet.triangle_count)
702 {
703 finishMeshlet(meshlet, meshlet_triangles);
704
705 meshlets[meshlet_offset++] = meshlet;
706 }
707
708 assert(meshlet_offset <= meshopt_buildMeshletsBound(index_count, max_vertices, max_triangles));
709 return meshlet_offset;
710}
711
712meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
713{
714 using namespace meshopt;
715
716 assert(index_count % 3 == 0);
717 assert(index_count / 3 <= kMeshletMaxTriangles);
718 assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
719 assert(vertex_positions_stride % sizeof(float) == 0);
720
721 (void)vertex_count;
722
723 size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
724
725 // compute triangle normals and gather triangle corners
726 float normals[kMeshletMaxTriangles][3];
727 float corners[kMeshletMaxTriangles][3][3];
728 size_t triangles = 0;
729
730 for (size_t i = 0; i < index_count; i += 3)
731 {
732 unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2];
733 assert(a < vertex_count && b < vertex_count && c < vertex_count);
734
735 const float* p0 = vertex_positions + vertex_stride_float * a;
736 const float* p1 = vertex_positions + vertex_stride_float * b;
737 const float* p2 = vertex_positions + vertex_stride_float * c;
738
739 float p10[3] = {p1[0] - p0[0], p1[1] - p0[1], p1[2] - p0[2]};
740 float p20[3] = {p2[0] - p0[0], p2[1] - p0[1], p2[2] - p0[2]};
741
742 float normalx = p10[1] * p20[2] - p10[2] * p20[1];
743 float normaly = p10[2] * p20[0] - p10[0] * p20[2];
744 float normalz = p10[0] * p20[1] - p10[1] * p20[0];
745
746 float area = sqrtf(normalx * normalx + normaly * normaly + normalz * normalz);
747
748 // no need to include degenerate triangles - they will be invisible anyway
749 if (area == 0.f)
750 continue;
751
752 // record triangle normals & corners for future use; normal and corner 0 define a plane equation
753 normals[triangles][0] = normalx / area;
754 normals[triangles][1] = normaly / area;
755 normals[triangles][2] = normalz / area;
756 memcpy(corners[triangles][0], p0, 3 * sizeof(float));
757 memcpy(corners[triangles][1], p1, 3 * sizeof(float));
758 memcpy(corners[triangles][2], p2, 3 * sizeof(float));
759 triangles++;
760 }
761
762 meshopt_Bounds bounds = {};
763
764 // degenerate cluster, no valid triangles => trivial reject (cone data is 0)
765 if (triangles == 0)
766 return bounds;
767
768 // compute cluster bounding sphere; we'll use the center to determine normal cone apex as well
769 float psphere[4] = {};
770 computeBoundingSphere(psphere, corners[0], triangles * 3);
771
772 float center[3] = {psphere[0], psphere[1], psphere[2]};
773
774 // treating triangle normals as points, find the bounding sphere - the sphere center determines the optimal cone axis
775 float nsphere[4] = {};
776 computeBoundingSphere(nsphere, normals, triangles);
777
778 float axis[3] = {nsphere[0], nsphere[1], nsphere[2]};
779 float axislength = sqrtf(axis[0] * axis[0] + axis[1] * axis[1] + axis[2] * axis[2]);
780 float invaxislength = axislength == 0.f ? 0.f : 1.f / axislength;
781
782 axis[0] *= invaxislength;
783 axis[1] *= invaxislength;
784 axis[2] *= invaxislength;
785
786 // compute a tight cone around all normals, mindp = cos(angle/2)
787 float mindp = 1.f;
788
789 for (size_t i = 0; i < triangles; ++i)
790 {
791 float dp = normals[i][0] * axis[0] + normals[i][1] * axis[1] + normals[i][2] * axis[2];
792
793 mindp = (dp < mindp) ? dp : mindp;
794 }
795
796 // fill bounding sphere info; note that below we can return bounds without cone information for degenerate cones
797 bounds.center[0] = center[0];
798 bounds.center[1] = center[1];
799 bounds.center[2] = center[2];
800 bounds.radius = psphere[3];
801
802 // degenerate cluster, normal cone is larger than a hemisphere => trivial accept
803 // note that if mindp is positive but close to 0, the triangle intersection code below gets less stable
804 // we arbitrarily decide that if a normal cone is ~168 degrees wide or more, the cone isn't useful
805 if (mindp <= 0.1f)
806 {
807 bounds.cone_cutoff = 1;
808 bounds.cone_cutoff_s8 = 127;
809 return bounds;
810 }
811
812 float maxt = 0;
813
814 // we need to find the point on center-t*axis ray that lies in negative half-space of all triangles
815 for (size_t i = 0; i < triangles; ++i)
816 {
817 // dot(center-t*axis-corner, trinormal) = 0
818 // dot(center-corner, trinormal) - t * dot(axis, trinormal) = 0
819 float cx = center[0] - corners[i][0][0];
820 float cy = center[1] - corners[i][0][1];
821 float cz = center[2] - corners[i][0][2];
822
823 float dc = cx * normals[i][0] + cy * normals[i][1] + cz * normals[i][2];
824 float dn = axis[0] * normals[i][0] + axis[1] * normals[i][1] + axis[2] * normals[i][2];
825
826 // dn should be larger than mindp cutoff above
827 assert(dn > 0.f);
828 float t = dc / dn;
829
830 maxt = (t > maxt) ? t : maxt;
831 }
832
833 // cone apex should be in the negative half-space of all cluster triangles by construction
834 bounds.cone_apex[0] = center[0] - axis[0] * maxt;
835 bounds.cone_apex[1] = center[1] - axis[1] * maxt;
836 bounds.cone_apex[2] = center[2] - axis[2] * maxt;
837
838 // note: this axis is the axis of the normal cone, but our test for perspective camera effectively negates the axis
839 bounds.cone_axis[0] = axis[0];
840 bounds.cone_axis[1] = axis[1];
841 bounds.cone_axis[2] = axis[2];
842
843 // cos(a) for normal cone is mindp; we need to add 90 degrees on both sides and invert the cone
844 // which gives us -cos(a+90) = -(-sin(a)) = sin(a) = sqrt(1 - cos^2(a))
845 bounds.cone_cutoff = sqrtf(1 - mindp * mindp);
846
847 // quantize axis & cutoff to 8-bit SNORM format
848 bounds.cone_axis_s8[0] = (signed char)(meshopt_quantizeSnorm(bounds.cone_axis[0], 8));
849 bounds.cone_axis_s8[1] = (signed char)(meshopt_quantizeSnorm(bounds.cone_axis[1], 8));
850 bounds.cone_axis_s8[2] = (signed char)(meshopt_quantizeSnorm(bounds.cone_axis[2], 8));
851
852 // for the 8-bit test to be conservative, we need to adjust the cutoff by measuring the max. error
853 float cone_axis_s8_e0 = fabsf(bounds.cone_axis_s8[0] / 127.f - bounds.cone_axis[0]);
854 float cone_axis_s8_e1 = fabsf(bounds.cone_axis_s8[1] / 127.f - bounds.cone_axis[1]);
855 float cone_axis_s8_e2 = fabsf(bounds.cone_axis_s8[2] / 127.f - bounds.cone_axis[2]);
856
857 // note that we need to round this up instead of rounding to nearest, hence +1
858 int cone_cutoff_s8 = int(127 * (bounds.cone_cutoff + cone_axis_s8_e0 + cone_axis_s8_e1 + cone_axis_s8_e2) + 1);
859
860 bounds.cone_cutoff_s8 = (cone_cutoff_s8 > 127) ? 127 : (signed char)(cone_cutoff_s8);
861
862 return bounds;
863}
864
865meshopt_Bounds meshopt_computeMeshletBounds(const unsigned int* meshlet_vertices, const unsigned char* meshlet_triangles, size_t triangle_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
866{
867 using namespace meshopt;
868
869 assert(triangle_count <= kMeshletMaxTriangles);
870 assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
871 assert(vertex_positions_stride % sizeof(float) == 0);
872
873 unsigned int indices[kMeshletMaxTriangles * 3];
874
875 for (size_t i = 0; i < triangle_count * 3; ++i)
876 {
877 unsigned int index = meshlet_vertices[meshlet_triangles[i]];
878 assert(index < vertex_count);
879
880 indices[i] = index;
881 }
882
883 return meshopt_computeClusterBounds(indices, triangle_count * 3, vertex_positions, vertex_count, vertex_positions_stride);
884}
885