1 | // This file is part of meshoptimizer library; see meshoptimizer.h for version/license details |
2 | #include "meshoptimizer.h" |
3 | |
4 | #include <assert.h> |
5 | #include <float.h> |
6 | #include <math.h> |
7 | #include <string.h> |
8 | |
9 | // This work is based on: |
10 | // Graham Wihlidal. Optimizing the Graphics Pipeline with Compute. 2016 |
11 | // Matthaeus Chajdas. GeometryFX 1.2 - Cluster Culling. 2016 |
12 | // Jack Ritter. An Efficient Bounding Sphere. 1990 |
13 | namespace meshopt |
14 | { |
15 | |
16 | // This must be <= 255 since index 0xff is used internally to indice a vertex that doesn't belong to a meshlet |
17 | const size_t kMeshletMaxVertices = 255; |
18 | |
19 | // A reasonable limit is around 2*max_vertices or less |
20 | const size_t kMeshletMaxTriangles = 512; |
21 | |
22 | struct TriangleAdjacency2 |
23 | { |
24 | unsigned int* counts; |
25 | unsigned int* offsets; |
26 | unsigned int* data; |
27 | }; |
28 | |
29 | static void buildTriangleAdjacency(TriangleAdjacency2& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count, meshopt_Allocator& allocator) |
30 | { |
31 | size_t face_count = index_count / 3; |
32 | |
33 | // allocate arrays |
34 | adjacency.counts = allocator.allocate<unsigned int>(vertex_count); |
35 | adjacency.offsets = allocator.allocate<unsigned int>(vertex_count); |
36 | adjacency.data = allocator.allocate<unsigned int>(index_count); |
37 | |
38 | // fill triangle counts |
39 | memset(adjacency.counts, 0, vertex_count * sizeof(unsigned int)); |
40 | |
41 | for (size_t i = 0; i < index_count; ++i) |
42 | { |
43 | assert(indices[i] < vertex_count); |
44 | |
45 | adjacency.counts[indices[i]]++; |
46 | } |
47 | |
48 | // fill offset table |
49 | unsigned int offset = 0; |
50 | |
51 | for (size_t i = 0; i < vertex_count; ++i) |
52 | { |
53 | adjacency.offsets[i] = offset; |
54 | offset += adjacency.counts[i]; |
55 | } |
56 | |
57 | assert(offset == index_count); |
58 | |
59 | // fill triangle data |
60 | for (size_t i = 0; i < face_count; ++i) |
61 | { |
62 | unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2]; |
63 | |
64 | adjacency.data[adjacency.offsets[a]++] = unsigned(i); |
65 | adjacency.data[adjacency.offsets[b]++] = unsigned(i); |
66 | adjacency.data[adjacency.offsets[c]++] = unsigned(i); |
67 | } |
68 | |
69 | // fix offsets that have been disturbed by the previous pass |
70 | for (size_t i = 0; i < vertex_count; ++i) |
71 | { |
72 | assert(adjacency.offsets[i] >= adjacency.counts[i]); |
73 | |
74 | adjacency.offsets[i] -= adjacency.counts[i]; |
75 | } |
76 | } |
77 | |
78 | static void computeBoundingSphere(float result[4], const float points[][3], size_t count) |
79 | { |
80 | assert(count > 0); |
81 | |
82 | // find extremum points along all 3 axes; for each axis we get a pair of points with min/max coordinates |
83 | size_t pmin[3] = {0, 0, 0}; |
84 | size_t pmax[3] = {0, 0, 0}; |
85 | |
86 | for (size_t i = 0; i < count; ++i) |
87 | { |
88 | const float* p = points[i]; |
89 | |
90 | for (int axis = 0; axis < 3; ++axis) |
91 | { |
92 | pmin[axis] = (p[axis] < points[pmin[axis]][axis]) ? i : pmin[axis]; |
93 | pmax[axis] = (p[axis] > points[pmax[axis]][axis]) ? i : pmax[axis]; |
94 | } |
95 | } |
96 | |
97 | // find the pair of points with largest distance |
98 | float paxisd2 = 0; |
99 | int paxis = 0; |
100 | |
101 | for (int axis = 0; axis < 3; ++axis) |
102 | { |
103 | const float* p1 = points[pmin[axis]]; |
104 | const float* p2 = points[pmax[axis]]; |
105 | |
106 | float d2 = (p2[0] - p1[0]) * (p2[0] - p1[0]) + (p2[1] - p1[1]) * (p2[1] - p1[1]) + (p2[2] - p1[2]) * (p2[2] - p1[2]); |
107 | |
108 | if (d2 > paxisd2) |
109 | { |
110 | paxisd2 = d2; |
111 | paxis = axis; |
112 | } |
113 | } |
114 | |
115 | // use the longest segment as the initial sphere diameter |
116 | const float* p1 = points[pmin[paxis]]; |
117 | const float* p2 = points[pmax[paxis]]; |
118 | |
119 | float center[3] = {(p1[0] + p2[0]) / 2, (p1[1] + p2[1]) / 2, (p1[2] + p2[2]) / 2}; |
120 | float radius = sqrtf(paxisd2) / 2; |
121 | |
122 | // iteratively adjust the sphere up until all points fit |
123 | for (size_t i = 0; i < count; ++i) |
124 | { |
125 | const float* p = points[i]; |
126 | float d2 = (p[0] - center[0]) * (p[0] - center[0]) + (p[1] - center[1]) * (p[1] - center[1]) + (p[2] - center[2]) * (p[2] - center[2]); |
127 | |
128 | if (d2 > radius * radius) |
129 | { |
130 | float d = sqrtf(d2); |
131 | assert(d > 0); |
132 | |
133 | float k = 0.5f + (radius / d) / 2; |
134 | |
135 | center[0] = center[0] * k + p[0] * (1 - k); |
136 | center[1] = center[1] * k + p[1] * (1 - k); |
137 | center[2] = center[2] * k + p[2] * (1 - k); |
138 | radius = (radius + d) / 2; |
139 | } |
140 | } |
141 | |
142 | result[0] = center[0]; |
143 | result[1] = center[1]; |
144 | result[2] = center[2]; |
145 | result[3] = radius; |
146 | } |
147 | |
148 | struct Cone |
149 | { |
150 | float px, py, pz; |
151 | float nx, ny, nz; |
152 | }; |
153 | |
154 | static float getMeshletScore(float distance2, float spread, float cone_weight, float expected_radius) |
155 | { |
156 | float cone = 1.f - spread * cone_weight; |
157 | float cone_clamped = cone < 1e-3f ? 1e-3f : cone; |
158 | |
159 | return (1 + sqrtf(distance2) / expected_radius * (1 - cone_weight)) * cone_clamped; |
160 | } |
161 | |
162 | static Cone getMeshletCone(const Cone& acc, unsigned int triangle_count) |
163 | { |
164 | Cone result = acc; |
165 | |
166 | float center_scale = triangle_count == 0 ? 0.f : 1.f / float(triangle_count); |
167 | |
168 | result.px *= center_scale; |
169 | result.py *= center_scale; |
170 | result.pz *= center_scale; |
171 | |
172 | float axis_length = result.nx * result.nx + result.ny * result.ny + result.nz * result.nz; |
173 | float axis_scale = axis_length == 0.f ? 0.f : 1.f / sqrtf(axis_length); |
174 | |
175 | result.nx *= axis_scale; |
176 | result.ny *= axis_scale; |
177 | result.nz *= axis_scale; |
178 | |
179 | return result; |
180 | } |
181 | |
182 | static float computeTriangleCones(Cone* triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) |
183 | { |
184 | (void)vertex_count; |
185 | |
186 | size_t vertex_stride_float = vertex_positions_stride / sizeof(float); |
187 | size_t face_count = index_count / 3; |
188 | |
189 | float mesh_area = 0; |
190 | |
191 | for (size_t i = 0; i < face_count; ++i) |
192 | { |
193 | unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2]; |
194 | assert(a < vertex_count && b < vertex_count && c < vertex_count); |
195 | |
196 | const float* p0 = vertex_positions + vertex_stride_float * a; |
197 | const float* p1 = vertex_positions + vertex_stride_float * b; |
198 | const float* p2 = vertex_positions + vertex_stride_float * c; |
199 | |
200 | float p10[3] = {p1[0] - p0[0], p1[1] - p0[1], p1[2] - p0[2]}; |
201 | float p20[3] = {p2[0] - p0[0], p2[1] - p0[1], p2[2] - p0[2]}; |
202 | |
203 | float normalx = p10[1] * p20[2] - p10[2] * p20[1]; |
204 | float normaly = p10[2] * p20[0] - p10[0] * p20[2]; |
205 | float normalz = p10[0] * p20[1] - p10[1] * p20[0]; |
206 | |
207 | float area = sqrtf(normalx * normalx + normaly * normaly + normalz * normalz); |
208 | float invarea = (area == 0.f) ? 0.f : 1.f / area; |
209 | |
210 | triangles[i].px = (p0[0] + p1[0] + p2[0]) / 3.f; |
211 | triangles[i].py = (p0[1] + p1[1] + p2[1]) / 3.f; |
212 | triangles[i].pz = (p0[2] + p1[2] + p2[2]) / 3.f; |
213 | |
214 | triangles[i].nx = normalx * invarea; |
215 | triangles[i].ny = normaly * invarea; |
216 | triangles[i].nz = normalz * invarea; |
217 | |
218 | mesh_area += area; |
219 | } |
220 | |
221 | return mesh_area; |
222 | } |
223 | |
224 | static void finishMeshlet(meshopt_Meshlet& meshlet, unsigned char* meshlet_triangles) |
225 | { |
226 | size_t offset = meshlet.triangle_offset + meshlet.triangle_count * 3; |
227 | |
228 | // fill 4b padding with 0 |
229 | while (offset & 3) |
230 | meshlet_triangles[offset++] = 0; |
231 | } |
232 | |
233 | static bool appendMeshlet(meshopt_Meshlet& meshlet, unsigned int a, unsigned int b, unsigned int c, unsigned char* used, meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, size_t meshlet_offset, size_t max_vertices, size_t max_triangles) |
234 | { |
235 | unsigned char& av = used[a]; |
236 | unsigned char& bv = used[b]; |
237 | unsigned char& cv = used[c]; |
238 | |
239 | bool result = false; |
240 | |
241 | unsigned int = (av == 0xff) + (bv == 0xff) + (cv == 0xff); |
242 | |
243 | if (meshlet.vertex_count + used_extra > max_vertices || meshlet.triangle_count >= max_triangles) |
244 | { |
245 | meshlets[meshlet_offset] = meshlet; |
246 | |
247 | for (size_t j = 0; j < meshlet.vertex_count; ++j) |
248 | used[meshlet_vertices[meshlet.vertex_offset + j]] = 0xff; |
249 | |
250 | finishMeshlet(meshlet, meshlet_triangles); |
251 | |
252 | meshlet.vertex_offset += meshlet.vertex_count; |
253 | meshlet.triangle_offset += (meshlet.triangle_count * 3 + 3) & ~3; // 4b padding |
254 | meshlet.vertex_count = 0; |
255 | meshlet.triangle_count = 0; |
256 | |
257 | result = true; |
258 | } |
259 | |
260 | if (av == 0xff) |
261 | { |
262 | av = (unsigned char)meshlet.vertex_count; |
263 | meshlet_vertices[meshlet.vertex_offset + meshlet.vertex_count++] = a; |
264 | } |
265 | |
266 | if (bv == 0xff) |
267 | { |
268 | bv = (unsigned char)meshlet.vertex_count; |
269 | meshlet_vertices[meshlet.vertex_offset + meshlet.vertex_count++] = b; |
270 | } |
271 | |
272 | if (cv == 0xff) |
273 | { |
274 | cv = (unsigned char)meshlet.vertex_count; |
275 | meshlet_vertices[meshlet.vertex_offset + meshlet.vertex_count++] = c; |
276 | } |
277 | |
278 | meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 0] = av; |
279 | meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 1] = bv; |
280 | meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 2] = cv; |
281 | meshlet.triangle_count++; |
282 | |
283 | return result; |
284 | } |
285 | |
286 | static unsigned int getNeighborTriangle(const meshopt_Meshlet& meshlet, const Cone* meshlet_cone, unsigned int* meshlet_vertices, const unsigned int* indices, const TriangleAdjacency2& adjacency, const Cone* triangles, const unsigned int* live_triangles, const unsigned char* used, float meshlet_expected_radius, float cone_weight, unsigned int* ) |
287 | { |
288 | unsigned int best_triangle = ~0u; |
289 | unsigned int = 5; |
290 | float best_score = FLT_MAX; |
291 | |
292 | for (size_t i = 0; i < meshlet.vertex_count; ++i) |
293 | { |
294 | unsigned int index = meshlet_vertices[meshlet.vertex_offset + i]; |
295 | |
296 | unsigned int* neighbors = &adjacency.data[0] + adjacency.offsets[index]; |
297 | size_t neighbors_size = adjacency.counts[index]; |
298 | |
299 | for (size_t j = 0; j < neighbors_size; ++j) |
300 | { |
301 | unsigned int triangle = neighbors[j]; |
302 | unsigned int a = indices[triangle * 3 + 0], b = indices[triangle * 3 + 1], c = indices[triangle * 3 + 2]; |
303 | |
304 | unsigned int = (used[a] == 0xff) + (used[b] == 0xff) + (used[c] == 0xff); |
305 | |
306 | // triangles that don't add new vertices to meshlets are max. priority |
307 | if (extra != 0) |
308 | { |
309 | // artificially increase the priority of dangling triangles as they're expensive to add to new meshlets |
310 | if (live_triangles[a] == 1 || live_triangles[b] == 1 || live_triangles[c] == 1) |
311 | extra = 0; |
312 | |
313 | extra++; |
314 | } |
315 | |
316 | // since topology-based priority is always more important than the score, we can skip scoring in some cases |
317 | if (extra > best_extra) |
318 | continue; |
319 | |
320 | float score = 0; |
321 | |
322 | // caller selects one of two scoring functions: geometrical (based on meshlet cone) or topological (based on remaining triangles) |
323 | if (meshlet_cone) |
324 | { |
325 | const Cone& tri_cone = triangles[triangle]; |
326 | |
327 | float distance2 = |
328 | (tri_cone.px - meshlet_cone->px) * (tri_cone.px - meshlet_cone->px) + |
329 | (tri_cone.py - meshlet_cone->py) * (tri_cone.py - meshlet_cone->py) + |
330 | (tri_cone.pz - meshlet_cone->pz) * (tri_cone.pz - meshlet_cone->pz); |
331 | |
332 | float spread = tri_cone.nx * meshlet_cone->nx + tri_cone.ny * meshlet_cone->ny + tri_cone.nz * meshlet_cone->nz; |
333 | |
334 | score = getMeshletScore(distance2, spread, cone_weight, meshlet_expected_radius); |
335 | } |
336 | else |
337 | { |
338 | // each live_triangles entry is >= 1 since it includes the current triangle we're processing |
339 | score = float(live_triangles[a] + live_triangles[b] + live_triangles[c] - 3); |
340 | } |
341 | |
342 | // note that topology-based priority is always more important than the score |
343 | // this helps maintain reasonable effectiveness of meshlet data and reduces scoring cost |
344 | if (extra < best_extra || score < best_score) |
345 | { |
346 | best_triangle = triangle; |
347 | best_extra = extra; |
348 | best_score = score; |
349 | } |
350 | } |
351 | } |
352 | |
353 | if (out_extra) |
354 | *out_extra = best_extra; |
355 | |
356 | return best_triangle; |
357 | } |
358 | |
359 | struct KDNode |
360 | { |
361 | union |
362 | { |
363 | float split; |
364 | unsigned int index; |
365 | }; |
366 | |
367 | // leaves: axis = 3, children = number of extra points after this one (0 if 'index' is the only point) |
368 | // branches: axis != 3, left subtree = skip 1, right subtree = skip 1+children |
369 | unsigned int axis : 2; |
370 | unsigned int children : 30; |
371 | }; |
372 | |
373 | static size_t kdtreePartition(unsigned int* indices, size_t count, const float* points, size_t stride, unsigned int axis, float pivot) |
374 | { |
375 | size_t m = 0; |
376 | |
377 | // invariant: elements in range [0, m) are < pivot, elements in range [m, i) are >= pivot |
378 | for (size_t i = 0; i < count; ++i) |
379 | { |
380 | float v = points[indices[i] * stride + axis]; |
381 | |
382 | // swap(m, i) unconditionally |
383 | unsigned int t = indices[m]; |
384 | indices[m] = indices[i]; |
385 | indices[i] = t; |
386 | |
387 | // when v >= pivot, we swap i with m without advancing it, preserving invariants |
388 | m += v < pivot; |
389 | } |
390 | |
391 | return m; |
392 | } |
393 | |
394 | static size_t kdtreeBuildLeaf(size_t offset, KDNode* nodes, size_t node_count, unsigned int* indices, size_t count) |
395 | { |
396 | assert(offset + count <= node_count); |
397 | (void)node_count; |
398 | |
399 | KDNode& result = nodes[offset]; |
400 | |
401 | result.index = indices[0]; |
402 | result.axis = 3; |
403 | result.children = unsigned(count - 1); |
404 | |
405 | // all remaining points are stored in nodes immediately following the leaf |
406 | for (size_t i = 1; i < count; ++i) |
407 | { |
408 | KDNode& tail = nodes[offset + i]; |
409 | |
410 | tail.index = indices[i]; |
411 | tail.axis = 3; |
412 | tail.children = ~0u >> 2; // bogus value to prevent misuse |
413 | } |
414 | |
415 | return offset + count; |
416 | } |
417 | |
418 | static size_t kdtreeBuild(size_t offset, KDNode* nodes, size_t node_count, const float* points, size_t stride, unsigned int* indices, size_t count, size_t leaf_size) |
419 | { |
420 | assert(count > 0); |
421 | assert(offset < node_count); |
422 | |
423 | if (count <= leaf_size) |
424 | return kdtreeBuildLeaf(offset, nodes, node_count, indices, count); |
425 | |
426 | float mean[3] = {}; |
427 | float vars[3] = {}; |
428 | float runc = 1, runs = 1; |
429 | |
430 | // gather statistics on the points in the subtree using Welford's algorithm |
431 | for (size_t i = 0; i < count; ++i, runc += 1.f, runs = 1.f / runc) |
432 | { |
433 | const float* point = points + indices[i] * stride; |
434 | |
435 | for (int k = 0; k < 3; ++k) |
436 | { |
437 | float delta = point[k] - mean[k]; |
438 | mean[k] += delta * runs; |
439 | vars[k] += delta * (point[k] - mean[k]); |
440 | } |
441 | } |
442 | |
443 | // split axis is one where the variance is largest |
444 | unsigned int axis = vars[0] >= vars[1] && vars[0] >= vars[2] ? 0 : vars[1] >= vars[2] ? 1 : 2; |
445 | |
446 | float split = mean[axis]; |
447 | size_t middle = kdtreePartition(indices, count, points, stride, axis, split); |
448 | |
449 | // when the partition is degenerate simply consolidate the points into a single node |
450 | if (middle <= leaf_size / 2 || middle >= count - leaf_size / 2) |
451 | return kdtreeBuildLeaf(offset, nodes, node_count, indices, count); |
452 | |
453 | KDNode& result = nodes[offset]; |
454 | |
455 | result.split = split; |
456 | result.axis = axis; |
457 | |
458 | // left subtree is right after our node |
459 | size_t next_offset = kdtreeBuild(offset + 1, nodes, node_count, points, stride, indices, middle, leaf_size); |
460 | |
461 | // distance to the right subtree is represented explicitly |
462 | result.children = unsigned(next_offset - offset - 1); |
463 | |
464 | return kdtreeBuild(next_offset, nodes, node_count, points, stride, indices + middle, count - middle, leaf_size); |
465 | } |
466 | |
467 | static void kdtreeNearest(KDNode* nodes, unsigned int root, const float* points, size_t stride, const unsigned char* emitted_flags, const float* position, unsigned int& result, float& limit) |
468 | { |
469 | const KDNode& node = nodes[root]; |
470 | |
471 | if (node.axis == 3) |
472 | { |
473 | // leaf |
474 | for (unsigned int i = 0; i <= node.children; ++i) |
475 | { |
476 | unsigned int index = nodes[root + i].index; |
477 | |
478 | if (emitted_flags[index]) |
479 | continue; |
480 | |
481 | const float* point = points + index * stride; |
482 | |
483 | float distance2 = |
484 | (point[0] - position[0]) * (point[0] - position[0]) + |
485 | (point[1] - position[1]) * (point[1] - position[1]) + |
486 | (point[2] - position[2]) * (point[2] - position[2]); |
487 | float distance = sqrtf(distance2); |
488 | |
489 | if (distance < limit) |
490 | { |
491 | result = index; |
492 | limit = distance; |
493 | } |
494 | } |
495 | } |
496 | else |
497 | { |
498 | // branch; we order recursion to process the node that search position is in first |
499 | float delta = position[node.axis] - node.split; |
500 | unsigned int first = (delta <= 0) ? 0 : node.children; |
501 | unsigned int second = first ^ node.children; |
502 | |
503 | kdtreeNearest(nodes, root + 1 + first, points, stride, emitted_flags, position, result, limit); |
504 | |
505 | // only process the other node if it can have a match based on closest distance so far |
506 | if (fabsf(delta) <= limit) |
507 | kdtreeNearest(nodes, root + 1 + second, points, stride, emitted_flags, position, result, limit); |
508 | } |
509 | } |
510 | |
511 | } // namespace meshopt |
512 | |
513 | size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_t max_triangles) |
514 | { |
515 | using namespace meshopt; |
516 | |
517 | assert(index_count % 3 == 0); |
518 | assert(max_vertices >= 3 && max_vertices <= kMeshletMaxVertices); |
519 | assert(max_triangles >= 1 && max_triangles <= kMeshletMaxTriangles); |
520 | assert(max_triangles % 4 == 0); // ensures the caller will compute output space properly as index data is 4b aligned |
521 | |
522 | (void)kMeshletMaxVertices; |
523 | (void)kMeshletMaxTriangles; |
524 | |
525 | // meshlet construction is limited by max vertices and max triangles per meshlet |
526 | // the worst case is that the input is an unindexed stream since this equally stresses both limits |
527 | // note that we assume that in the worst case, we leave 2 vertices unpacked in each meshlet - if we have space for 3 we can pack any triangle |
528 | size_t max_vertices_conservative = max_vertices - 2; |
529 | size_t meshlet_limit_vertices = (index_count + max_vertices_conservative - 1) / max_vertices_conservative; |
530 | size_t meshlet_limit_triangles = (index_count / 3 + max_triangles - 1) / max_triangles; |
531 | |
532 | return meshlet_limit_vertices > meshlet_limit_triangles ? meshlet_limit_vertices : meshlet_limit_triangles; |
533 | } |
534 | |
535 | size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight) |
536 | { |
537 | using namespace meshopt; |
538 | |
539 | assert(index_count % 3 == 0); |
540 | assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256); |
541 | assert(vertex_positions_stride % sizeof(float) == 0); |
542 | |
543 | assert(max_vertices >= 3 && max_vertices <= kMeshletMaxVertices); |
544 | assert(max_triangles >= 1 && max_triangles <= kMeshletMaxTriangles); |
545 | assert(max_triangles % 4 == 0); // ensures the caller will compute output space properly as index data is 4b aligned |
546 | |
547 | assert(cone_weight >= 0 && cone_weight <= 1); |
548 | |
549 | meshopt_Allocator allocator; |
550 | |
551 | TriangleAdjacency2 adjacency = {}; |
552 | buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator); |
553 | |
554 | unsigned int* live_triangles = allocator.allocate<unsigned int>(vertex_count); |
555 | memcpy(live_triangles, adjacency.counts, vertex_count * sizeof(unsigned int)); |
556 | |
557 | size_t face_count = index_count / 3; |
558 | |
559 | unsigned char* emitted_flags = allocator.allocate<unsigned char>(face_count); |
560 | memset(emitted_flags, 0, face_count); |
561 | |
562 | // for each triangle, precompute centroid & normal to use for scoring |
563 | Cone* triangles = allocator.allocate<Cone>(face_count); |
564 | float mesh_area = computeTriangleCones(triangles, indices, index_count, vertex_positions, vertex_count, vertex_positions_stride); |
565 | |
566 | // assuming each meshlet is a square patch, expected radius is sqrt(expected area) |
567 | float triangle_area_avg = face_count == 0 ? 0.f : mesh_area / float(face_count) * 0.5f; |
568 | float meshlet_expected_radius = sqrtf(triangle_area_avg * max_triangles) * 0.5f; |
569 | |
570 | // build a kd-tree for nearest neighbor lookup |
571 | unsigned int* kdindices = allocator.allocate<unsigned int>(face_count); |
572 | for (size_t i = 0; i < face_count; ++i) |
573 | kdindices[i] = unsigned(i); |
574 | |
575 | KDNode* nodes = allocator.allocate<KDNode>(face_count * 2); |
576 | kdtreeBuild(0, nodes, face_count * 2, &triangles[0].px, sizeof(Cone) / sizeof(float), kdindices, face_count, /* leaf_size= */ 8); |
577 | |
578 | // index of the vertex in the meshlet, 0xff if the vertex isn't used |
579 | unsigned char* used = allocator.allocate<unsigned char>(vertex_count); |
580 | memset(used, -1, vertex_count); |
581 | |
582 | meshopt_Meshlet meshlet = {}; |
583 | size_t meshlet_offset = 0; |
584 | |
585 | Cone meshlet_cone_acc = {}; |
586 | |
587 | for (;;) |
588 | { |
589 | Cone meshlet_cone = getMeshletCone(meshlet_cone_acc, meshlet.triangle_count); |
590 | |
591 | unsigned int = 0; |
592 | unsigned int best_triangle = getNeighborTriangle(meshlet, &meshlet_cone, meshlet_vertices, indices, adjacency, triangles, live_triangles, used, meshlet_expected_radius, cone_weight, &best_extra); |
593 | |
594 | // if the best triangle doesn't fit into current meshlet, the spatial scoring we've used is not very meaningful, so we re-select using topological scoring |
595 | if (best_triangle != ~0u && (meshlet.vertex_count + best_extra > max_vertices || meshlet.triangle_count >= max_triangles)) |
596 | { |
597 | best_triangle = getNeighborTriangle(meshlet, NULL, meshlet_vertices, indices, adjacency, triangles, live_triangles, used, meshlet_expected_radius, 0.f, NULL); |
598 | } |
599 | |
600 | // when we run out of neighboring triangles we need to switch to spatial search; we currently just pick the closest triangle irrespective of connectivity |
601 | if (best_triangle == ~0u) |
602 | { |
603 | float position[3] = {meshlet_cone.px, meshlet_cone.py, meshlet_cone.pz}; |
604 | unsigned int index = ~0u; |
605 | float limit = FLT_MAX; |
606 | |
607 | kdtreeNearest(nodes, 0, &triangles[0].px, sizeof(Cone) / sizeof(float), emitted_flags, position, index, limit); |
608 | |
609 | best_triangle = index; |
610 | } |
611 | |
612 | if (best_triangle == ~0u) |
613 | break; |
614 | |
615 | unsigned int a = indices[best_triangle * 3 + 0], b = indices[best_triangle * 3 + 1], c = indices[best_triangle * 3 + 2]; |
616 | assert(a < vertex_count && b < vertex_count && c < vertex_count); |
617 | |
618 | // add meshlet to the output; when the current meshlet is full we reset the accumulated bounds |
619 | if (appendMeshlet(meshlet, a, b, c, used, meshlets, meshlet_vertices, meshlet_triangles, meshlet_offset, max_vertices, max_triangles)) |
620 | { |
621 | meshlet_offset++; |
622 | memset(&meshlet_cone_acc, 0, sizeof(meshlet_cone_acc)); |
623 | } |
624 | |
625 | live_triangles[a]--; |
626 | live_triangles[b]--; |
627 | live_triangles[c]--; |
628 | |
629 | // remove emitted triangle from adjacency data |
630 | // this makes sure that we spend less time traversing these lists on subsequent iterations |
631 | for (size_t k = 0; k < 3; ++k) |
632 | { |
633 | unsigned int index = indices[best_triangle * 3 + k]; |
634 | |
635 | unsigned int* neighbors = &adjacency.data[0] + adjacency.offsets[index]; |
636 | size_t neighbors_size = adjacency.counts[index]; |
637 | |
638 | for (size_t i = 0; i < neighbors_size; ++i) |
639 | { |
640 | unsigned int tri = neighbors[i]; |
641 | |
642 | if (tri == best_triangle) |
643 | { |
644 | neighbors[i] = neighbors[neighbors_size - 1]; |
645 | adjacency.counts[index]--; |
646 | break; |
647 | } |
648 | } |
649 | } |
650 | |
651 | // update aggregated meshlet cone data for scoring subsequent triangles |
652 | meshlet_cone_acc.px += triangles[best_triangle].px; |
653 | meshlet_cone_acc.py += triangles[best_triangle].py; |
654 | meshlet_cone_acc.pz += triangles[best_triangle].pz; |
655 | meshlet_cone_acc.nx += triangles[best_triangle].nx; |
656 | meshlet_cone_acc.ny += triangles[best_triangle].ny; |
657 | meshlet_cone_acc.nz += triangles[best_triangle].nz; |
658 | |
659 | emitted_flags[best_triangle] = 1; |
660 | } |
661 | |
662 | if (meshlet.triangle_count) |
663 | { |
664 | finishMeshlet(meshlet, meshlet_triangles); |
665 | |
666 | meshlets[meshlet_offset++] = meshlet; |
667 | } |
668 | |
669 | assert(meshlet_offset <= meshopt_buildMeshletsBound(index_count, max_vertices, max_triangles)); |
670 | return meshlet_offset; |
671 | } |
672 | |
673 | size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles) |
674 | { |
675 | using namespace meshopt; |
676 | |
677 | assert(index_count % 3 == 0); |
678 | |
679 | assert(max_vertices >= 3 && max_vertices <= kMeshletMaxVertices); |
680 | assert(max_triangles >= 1 && max_triangles <= kMeshletMaxTriangles); |
681 | assert(max_triangles % 4 == 0); // ensures the caller will compute output space properly as index data is 4b aligned |
682 | |
683 | meshopt_Allocator allocator; |
684 | |
685 | // index of the vertex in the meshlet, 0xff if the vertex isn't used |
686 | unsigned char* used = allocator.allocate<unsigned char>(vertex_count); |
687 | memset(used, -1, vertex_count); |
688 | |
689 | meshopt_Meshlet meshlet = {}; |
690 | size_t meshlet_offset = 0; |
691 | |
692 | for (size_t i = 0; i < index_count; i += 3) |
693 | { |
694 | unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2]; |
695 | assert(a < vertex_count && b < vertex_count && c < vertex_count); |
696 | |
697 | // appends triangle to the meshlet and writes previous meshlet to the output if full |
698 | meshlet_offset += appendMeshlet(meshlet, a, b, c, used, meshlets, meshlet_vertices, meshlet_triangles, meshlet_offset, max_vertices, max_triangles); |
699 | } |
700 | |
701 | if (meshlet.triangle_count) |
702 | { |
703 | finishMeshlet(meshlet, meshlet_triangles); |
704 | |
705 | meshlets[meshlet_offset++] = meshlet; |
706 | } |
707 | |
708 | assert(meshlet_offset <= meshopt_buildMeshletsBound(index_count, max_vertices, max_triangles)); |
709 | return meshlet_offset; |
710 | } |
711 | |
712 | meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) |
713 | { |
714 | using namespace meshopt; |
715 | |
716 | assert(index_count % 3 == 0); |
717 | assert(index_count / 3 <= kMeshletMaxTriangles); |
718 | assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256); |
719 | assert(vertex_positions_stride % sizeof(float) == 0); |
720 | |
721 | (void)vertex_count; |
722 | |
723 | size_t vertex_stride_float = vertex_positions_stride / sizeof(float); |
724 | |
725 | // compute triangle normals and gather triangle corners |
726 | float normals[kMeshletMaxTriangles][3]; |
727 | float corners[kMeshletMaxTriangles][3][3]; |
728 | size_t triangles = 0; |
729 | |
730 | for (size_t i = 0; i < index_count; i += 3) |
731 | { |
732 | unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2]; |
733 | assert(a < vertex_count && b < vertex_count && c < vertex_count); |
734 | |
735 | const float* p0 = vertex_positions + vertex_stride_float * a; |
736 | const float* p1 = vertex_positions + vertex_stride_float * b; |
737 | const float* p2 = vertex_positions + vertex_stride_float * c; |
738 | |
739 | float p10[3] = {p1[0] - p0[0], p1[1] - p0[1], p1[2] - p0[2]}; |
740 | float p20[3] = {p2[0] - p0[0], p2[1] - p0[1], p2[2] - p0[2]}; |
741 | |
742 | float normalx = p10[1] * p20[2] - p10[2] * p20[1]; |
743 | float normaly = p10[2] * p20[0] - p10[0] * p20[2]; |
744 | float normalz = p10[0] * p20[1] - p10[1] * p20[0]; |
745 | |
746 | float area = sqrtf(normalx * normalx + normaly * normaly + normalz * normalz); |
747 | |
748 | // no need to include degenerate triangles - they will be invisible anyway |
749 | if (area == 0.f) |
750 | continue; |
751 | |
752 | // record triangle normals & corners for future use; normal and corner 0 define a plane equation |
753 | normals[triangles][0] = normalx / area; |
754 | normals[triangles][1] = normaly / area; |
755 | normals[triangles][2] = normalz / area; |
756 | memcpy(corners[triangles][0], p0, 3 * sizeof(float)); |
757 | memcpy(corners[triangles][1], p1, 3 * sizeof(float)); |
758 | memcpy(corners[triangles][2], p2, 3 * sizeof(float)); |
759 | triangles++; |
760 | } |
761 | |
762 | meshopt_Bounds bounds = {}; |
763 | |
764 | // degenerate cluster, no valid triangles => trivial reject (cone data is 0) |
765 | if (triangles == 0) |
766 | return bounds; |
767 | |
768 | // compute cluster bounding sphere; we'll use the center to determine normal cone apex as well |
769 | float psphere[4] = {}; |
770 | computeBoundingSphere(psphere, corners[0], triangles * 3); |
771 | |
772 | float center[3] = {psphere[0], psphere[1], psphere[2]}; |
773 | |
774 | // treating triangle normals as points, find the bounding sphere - the sphere center determines the optimal cone axis |
775 | float nsphere[4] = {}; |
776 | computeBoundingSphere(nsphere, normals, triangles); |
777 | |
778 | float axis[3] = {nsphere[0], nsphere[1], nsphere[2]}; |
779 | float axislength = sqrtf(axis[0] * axis[0] + axis[1] * axis[1] + axis[2] * axis[2]); |
780 | float invaxislength = axislength == 0.f ? 0.f : 1.f / axislength; |
781 | |
782 | axis[0] *= invaxislength; |
783 | axis[1] *= invaxislength; |
784 | axis[2] *= invaxislength; |
785 | |
786 | // compute a tight cone around all normals, mindp = cos(angle/2) |
787 | float mindp = 1.f; |
788 | |
789 | for (size_t i = 0; i < triangles; ++i) |
790 | { |
791 | float dp = normals[i][0] * axis[0] + normals[i][1] * axis[1] + normals[i][2] * axis[2]; |
792 | |
793 | mindp = (dp < mindp) ? dp : mindp; |
794 | } |
795 | |
796 | // fill bounding sphere info; note that below we can return bounds without cone information for degenerate cones |
797 | bounds.center[0] = center[0]; |
798 | bounds.center[1] = center[1]; |
799 | bounds.center[2] = center[2]; |
800 | bounds.radius = psphere[3]; |
801 | |
802 | // degenerate cluster, normal cone is larger than a hemisphere => trivial accept |
803 | // note that if mindp is positive but close to 0, the triangle intersection code below gets less stable |
804 | // we arbitrarily decide that if a normal cone is ~168 degrees wide or more, the cone isn't useful |
805 | if (mindp <= 0.1f) |
806 | { |
807 | bounds.cone_cutoff = 1; |
808 | bounds.cone_cutoff_s8 = 127; |
809 | return bounds; |
810 | } |
811 | |
812 | float maxt = 0; |
813 | |
814 | // we need to find the point on center-t*axis ray that lies in negative half-space of all triangles |
815 | for (size_t i = 0; i < triangles; ++i) |
816 | { |
817 | // dot(center-t*axis-corner, trinormal) = 0 |
818 | // dot(center-corner, trinormal) - t * dot(axis, trinormal) = 0 |
819 | float cx = center[0] - corners[i][0][0]; |
820 | float cy = center[1] - corners[i][0][1]; |
821 | float cz = center[2] - corners[i][0][2]; |
822 | |
823 | float dc = cx * normals[i][0] + cy * normals[i][1] + cz * normals[i][2]; |
824 | float dn = axis[0] * normals[i][0] + axis[1] * normals[i][1] + axis[2] * normals[i][2]; |
825 | |
826 | // dn should be larger than mindp cutoff above |
827 | assert(dn > 0.f); |
828 | float t = dc / dn; |
829 | |
830 | maxt = (t > maxt) ? t : maxt; |
831 | } |
832 | |
833 | // cone apex should be in the negative half-space of all cluster triangles by construction |
834 | bounds.cone_apex[0] = center[0] - axis[0] * maxt; |
835 | bounds.cone_apex[1] = center[1] - axis[1] * maxt; |
836 | bounds.cone_apex[2] = center[2] - axis[2] * maxt; |
837 | |
838 | // note: this axis is the axis of the normal cone, but our test for perspective camera effectively negates the axis |
839 | bounds.cone_axis[0] = axis[0]; |
840 | bounds.cone_axis[1] = axis[1]; |
841 | bounds.cone_axis[2] = axis[2]; |
842 | |
843 | // cos(a) for normal cone is mindp; we need to add 90 degrees on both sides and invert the cone |
844 | // which gives us -cos(a+90) = -(-sin(a)) = sin(a) = sqrt(1 - cos^2(a)) |
845 | bounds.cone_cutoff = sqrtf(1 - mindp * mindp); |
846 | |
847 | // quantize axis & cutoff to 8-bit SNORM format |
848 | bounds.cone_axis_s8[0] = (signed char)(meshopt_quantizeSnorm(bounds.cone_axis[0], 8)); |
849 | bounds.cone_axis_s8[1] = (signed char)(meshopt_quantizeSnorm(bounds.cone_axis[1], 8)); |
850 | bounds.cone_axis_s8[2] = (signed char)(meshopt_quantizeSnorm(bounds.cone_axis[2], 8)); |
851 | |
852 | // for the 8-bit test to be conservative, we need to adjust the cutoff by measuring the max. error |
853 | float cone_axis_s8_e0 = fabsf(bounds.cone_axis_s8[0] / 127.f - bounds.cone_axis[0]); |
854 | float cone_axis_s8_e1 = fabsf(bounds.cone_axis_s8[1] / 127.f - bounds.cone_axis[1]); |
855 | float cone_axis_s8_e2 = fabsf(bounds.cone_axis_s8[2] / 127.f - bounds.cone_axis[2]); |
856 | |
857 | // note that we need to round this up instead of rounding to nearest, hence +1 |
858 | int cone_cutoff_s8 = int(127 * (bounds.cone_cutoff + cone_axis_s8_e0 + cone_axis_s8_e1 + cone_axis_s8_e2) + 1); |
859 | |
860 | bounds.cone_cutoff_s8 = (cone_cutoff_s8 > 127) ? 127 : (signed char)(cone_cutoff_s8); |
861 | |
862 | return bounds; |
863 | } |
864 | |
865 | meshopt_Bounds meshopt_computeMeshletBounds(const unsigned int* meshlet_vertices, const unsigned char* meshlet_triangles, size_t triangle_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) |
866 | { |
867 | using namespace meshopt; |
868 | |
869 | assert(triangle_count <= kMeshletMaxTriangles); |
870 | assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256); |
871 | assert(vertex_positions_stride % sizeof(float) == 0); |
872 | |
873 | unsigned int indices[kMeshletMaxTriangles * 3]; |
874 | |
875 | for (size_t i = 0; i < triangle_count * 3; ++i) |
876 | { |
877 | unsigned int index = meshlet_vertices[meshlet_triangles[i]]; |
878 | assert(index < vertex_count); |
879 | |
880 | indices[i] = index; |
881 | } |
882 | |
883 | return meshopt_computeClusterBounds(indices, triangle_count * 3, vertex_positions, vertex_count, vertex_positions_stride); |
884 | } |
885 | |