1 | // This file is part of meshoptimizer library; see meshoptimizer.h for version/license details |
2 | #include "meshoptimizer.h" |
3 | |
4 | #include <assert.h> |
5 | #include <string.h> |
6 | |
7 | // This work is based on: |
8 | // Tom Forsyth. Linear-Speed Vertex Cache Optimisation. 2006 |
9 | // Pedro Sander, Diego Nehab and Joshua Barczak. Fast Triangle Reordering for Vertex Locality and Reduced Overdraw. 2007 |
10 | namespace meshopt |
11 | { |
12 | |
13 | const size_t kCacheSizeMax = 16; |
14 | const size_t kValenceMax = 8; |
15 | |
16 | struct VertexScoreTable |
17 | { |
18 | float cache[1 + kCacheSizeMax]; |
19 | float live[1 + kValenceMax]; |
20 | }; |
21 | |
22 | // Tuned to minimize the ACMR of a GPU that has a cache profile similar to NVidia and AMD |
23 | static const VertexScoreTable kVertexScoreTable = { |
24 | {0.f, 0.779f, 0.791f, 0.789f, 0.981f, 0.843f, 0.726f, 0.847f, 0.882f, 0.867f, 0.799f, 0.642f, 0.613f, 0.600f, 0.568f, 0.372f, 0.234f}, |
25 | {0.f, 0.995f, 0.713f, 0.450f, 0.404f, 0.059f, 0.005f, 0.147f, 0.006f}, |
26 | }; |
27 | |
28 | // Tuned to minimize the encoded index buffer size |
29 | static const VertexScoreTable kVertexScoreTableStrip = { |
30 | {0.f, 1.000f, 1.000f, 1.000f, 0.453f, 0.561f, 0.490f, 0.459f, 0.179f, 0.526f, 0.000f, 0.227f, 0.184f, 0.490f, 0.112f, 0.050f, 0.131f}, |
31 | {0.f, 0.956f, 0.786f, 0.577f, 0.558f, 0.618f, 0.549f, 0.499f, 0.489f}, |
32 | }; |
33 | |
34 | struct TriangleAdjacency |
35 | { |
36 | unsigned int* counts; |
37 | unsigned int* offsets; |
38 | unsigned int* data; |
39 | }; |
40 | |
41 | static void buildTriangleAdjacency(TriangleAdjacency& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count, meshopt_Allocator& allocator) |
42 | { |
43 | size_t face_count = index_count / 3; |
44 | |
45 | // allocate arrays |
46 | adjacency.counts = allocator.allocate<unsigned int>(vertex_count); |
47 | adjacency.offsets = allocator.allocate<unsigned int>(vertex_count); |
48 | adjacency.data = allocator.allocate<unsigned int>(index_count); |
49 | |
50 | // fill triangle counts |
51 | memset(adjacency.counts, 0, vertex_count * sizeof(unsigned int)); |
52 | |
53 | for (size_t i = 0; i < index_count; ++i) |
54 | { |
55 | assert(indices[i] < vertex_count); |
56 | |
57 | adjacency.counts[indices[i]]++; |
58 | } |
59 | |
60 | // fill offset table |
61 | unsigned int offset = 0; |
62 | |
63 | for (size_t i = 0; i < vertex_count; ++i) |
64 | { |
65 | adjacency.offsets[i] = offset; |
66 | offset += adjacency.counts[i]; |
67 | } |
68 | |
69 | assert(offset == index_count); |
70 | |
71 | // fill triangle data |
72 | for (size_t i = 0; i < face_count; ++i) |
73 | { |
74 | unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2]; |
75 | |
76 | adjacency.data[adjacency.offsets[a]++] = unsigned(i); |
77 | adjacency.data[adjacency.offsets[b]++] = unsigned(i); |
78 | adjacency.data[adjacency.offsets[c]++] = unsigned(i); |
79 | } |
80 | |
81 | // fix offsets that have been disturbed by the previous pass |
82 | for (size_t i = 0; i < vertex_count; ++i) |
83 | { |
84 | assert(adjacency.offsets[i] >= adjacency.counts[i]); |
85 | |
86 | adjacency.offsets[i] -= adjacency.counts[i]; |
87 | } |
88 | } |
89 | |
90 | static unsigned int getNextVertexDeadEnd(const unsigned int* dead_end, unsigned int& dead_end_top, unsigned int& input_cursor, const unsigned int* live_triangles, size_t vertex_count) |
91 | { |
92 | // check dead-end stack |
93 | while (dead_end_top) |
94 | { |
95 | unsigned int vertex = dead_end[--dead_end_top]; |
96 | |
97 | if (live_triangles[vertex] > 0) |
98 | return vertex; |
99 | } |
100 | |
101 | // input order |
102 | while (input_cursor < vertex_count) |
103 | { |
104 | if (live_triangles[input_cursor] > 0) |
105 | return input_cursor; |
106 | |
107 | ++input_cursor; |
108 | } |
109 | |
110 | return ~0u; |
111 | } |
112 | |
113 | static unsigned int getNextVertexNeighbor(const unsigned int* next_candidates_begin, const unsigned int* next_candidates_end, const unsigned int* live_triangles, const unsigned int* cache_timestamps, unsigned int timestamp, unsigned int cache_size) |
114 | { |
115 | unsigned int best_candidate = ~0u; |
116 | int best_priority = -1; |
117 | |
118 | for (const unsigned int* next_candidate = next_candidates_begin; next_candidate != next_candidates_end; ++next_candidate) |
119 | { |
120 | unsigned int vertex = *next_candidate; |
121 | |
122 | // otherwise we don't need to process it |
123 | if (live_triangles[vertex] > 0) |
124 | { |
125 | int priority = 0; |
126 | |
127 | // will it be in cache after fanning? |
128 | if (2 * live_triangles[vertex] + timestamp - cache_timestamps[vertex] <= cache_size) |
129 | { |
130 | priority = timestamp - cache_timestamps[vertex]; // position in cache |
131 | } |
132 | |
133 | if (priority > best_priority) |
134 | { |
135 | best_candidate = vertex; |
136 | best_priority = priority; |
137 | } |
138 | } |
139 | } |
140 | |
141 | return best_candidate; |
142 | } |
143 | |
144 | static float vertexScore(const VertexScoreTable* table, int cache_position, unsigned int live_triangles) |
145 | { |
146 | assert(cache_position >= -1 && cache_position < int(kCacheSizeMax)); |
147 | |
148 | unsigned int live_triangles_clamped = live_triangles < kValenceMax ? live_triangles : kValenceMax; |
149 | |
150 | return table->cache[1 + cache_position] + table->live[live_triangles_clamped]; |
151 | } |
152 | |
153 | static unsigned int getNextTriangleDeadEnd(unsigned int& input_cursor, const unsigned char* emitted_flags, size_t face_count) |
154 | { |
155 | // input order |
156 | while (input_cursor < face_count) |
157 | { |
158 | if (!emitted_flags[input_cursor]) |
159 | return input_cursor; |
160 | |
161 | ++input_cursor; |
162 | } |
163 | |
164 | return ~0u; |
165 | } |
166 | |
167 | } // namespace meshopt |
168 | |
169 | void meshopt_optimizeVertexCacheTable(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const meshopt::VertexScoreTable* table) |
170 | { |
171 | using namespace meshopt; |
172 | |
173 | assert(index_count % 3 == 0); |
174 | |
175 | meshopt_Allocator allocator; |
176 | |
177 | // guard for empty meshes |
178 | if (index_count == 0 || vertex_count == 0) |
179 | return; |
180 | |
181 | // support in-place optimization |
182 | if (destination == indices) |
183 | { |
184 | unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count); |
185 | memcpy(indices_copy, indices, index_count * sizeof(unsigned int)); |
186 | indices = indices_copy; |
187 | } |
188 | |
189 | unsigned int cache_size = 16; |
190 | assert(cache_size <= kCacheSizeMax); |
191 | |
192 | size_t face_count = index_count / 3; |
193 | |
194 | // build adjacency information |
195 | TriangleAdjacency adjacency = {}; |
196 | buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator); |
197 | |
198 | // live triangle counts |
199 | unsigned int* live_triangles = allocator.allocate<unsigned int>(vertex_count); |
200 | memcpy(live_triangles, adjacency.counts, vertex_count * sizeof(unsigned int)); |
201 | |
202 | // emitted flags |
203 | unsigned char* emitted_flags = allocator.allocate<unsigned char>(face_count); |
204 | memset(emitted_flags, 0, face_count); |
205 | |
206 | // compute initial vertex scores |
207 | float* vertex_scores = allocator.allocate<float>(vertex_count); |
208 | |
209 | for (size_t i = 0; i < vertex_count; ++i) |
210 | vertex_scores[i] = vertexScore(table, -1, live_triangles[i]); |
211 | |
212 | // compute triangle scores |
213 | float* triangle_scores = allocator.allocate<float>(face_count); |
214 | |
215 | for (size_t i = 0; i < face_count; ++i) |
216 | { |
217 | unsigned int a = indices[i * 3 + 0]; |
218 | unsigned int b = indices[i * 3 + 1]; |
219 | unsigned int c = indices[i * 3 + 2]; |
220 | |
221 | triangle_scores[i] = vertex_scores[a] + vertex_scores[b] + vertex_scores[c]; |
222 | } |
223 | |
224 | unsigned int cache_holder[2 * (kCacheSizeMax + 3)]; |
225 | unsigned int* cache = cache_holder; |
226 | unsigned int* cache_new = cache_holder + kCacheSizeMax + 3; |
227 | size_t cache_count = 0; |
228 | |
229 | unsigned int current_triangle = 0; |
230 | unsigned int input_cursor = 1; |
231 | |
232 | unsigned int output_triangle = 0; |
233 | |
234 | while (current_triangle != ~0u) |
235 | { |
236 | assert(output_triangle < face_count); |
237 | |
238 | unsigned int a = indices[current_triangle * 3 + 0]; |
239 | unsigned int b = indices[current_triangle * 3 + 1]; |
240 | unsigned int c = indices[current_triangle * 3 + 2]; |
241 | |
242 | // output indices |
243 | destination[output_triangle * 3 + 0] = a; |
244 | destination[output_triangle * 3 + 1] = b; |
245 | destination[output_triangle * 3 + 2] = c; |
246 | output_triangle++; |
247 | |
248 | // update emitted flags |
249 | emitted_flags[current_triangle] = true; |
250 | triangle_scores[current_triangle] = 0; |
251 | |
252 | // new triangle |
253 | size_t cache_write = 0; |
254 | cache_new[cache_write++] = a; |
255 | cache_new[cache_write++] = b; |
256 | cache_new[cache_write++] = c; |
257 | |
258 | // old triangles |
259 | for (size_t i = 0; i < cache_count; ++i) |
260 | { |
261 | unsigned int index = cache[i]; |
262 | |
263 | if (index != a && index != b && index != c) |
264 | { |
265 | cache_new[cache_write++] = index; |
266 | } |
267 | } |
268 | |
269 | unsigned int* cache_temp = cache; |
270 | cache = cache_new, cache_new = cache_temp; |
271 | cache_count = cache_write > cache_size ? cache_size : cache_write; |
272 | |
273 | // update live triangle counts |
274 | live_triangles[a]--; |
275 | live_triangles[b]--; |
276 | live_triangles[c]--; |
277 | |
278 | // remove emitted triangle from adjacency data |
279 | // this makes sure that we spend less time traversing these lists on subsequent iterations |
280 | for (size_t k = 0; k < 3; ++k) |
281 | { |
282 | unsigned int index = indices[current_triangle * 3 + k]; |
283 | |
284 | unsigned int* neighbors = &adjacency.data[0] + adjacency.offsets[index]; |
285 | size_t neighbors_size = adjacency.counts[index]; |
286 | |
287 | for (size_t i = 0; i < neighbors_size; ++i) |
288 | { |
289 | unsigned int tri = neighbors[i]; |
290 | |
291 | if (tri == current_triangle) |
292 | { |
293 | neighbors[i] = neighbors[neighbors_size - 1]; |
294 | adjacency.counts[index]--; |
295 | break; |
296 | } |
297 | } |
298 | } |
299 | |
300 | unsigned int best_triangle = ~0u; |
301 | float best_score = 0; |
302 | |
303 | // update cache positions, vertex scores and triangle scores, and find next best triangle |
304 | for (size_t i = 0; i < cache_write; ++i) |
305 | { |
306 | unsigned int index = cache[i]; |
307 | |
308 | int cache_position = i >= cache_size ? -1 : int(i); |
309 | |
310 | // update vertex score |
311 | float score = vertexScore(table, cache_position, live_triangles[index]); |
312 | float score_diff = score - vertex_scores[index]; |
313 | |
314 | vertex_scores[index] = score; |
315 | |
316 | // update scores of vertex triangles |
317 | const unsigned int* neighbors_begin = &adjacency.data[0] + adjacency.offsets[index]; |
318 | const unsigned int* neighbors_end = neighbors_begin + adjacency.counts[index]; |
319 | |
320 | for (const unsigned int* it = neighbors_begin; it != neighbors_end; ++it) |
321 | { |
322 | unsigned int tri = *it; |
323 | assert(!emitted_flags[tri]); |
324 | |
325 | float tri_score = triangle_scores[tri] + score_diff; |
326 | assert(tri_score > 0); |
327 | |
328 | if (best_score < tri_score) |
329 | { |
330 | best_triangle = tri; |
331 | best_score = tri_score; |
332 | } |
333 | |
334 | triangle_scores[tri] = tri_score; |
335 | } |
336 | } |
337 | |
338 | // step through input triangles in order if we hit a dead-end |
339 | current_triangle = best_triangle; |
340 | |
341 | if (current_triangle == ~0u) |
342 | { |
343 | current_triangle = getNextTriangleDeadEnd(input_cursor, &emitted_flags[0], face_count); |
344 | } |
345 | } |
346 | |
347 | assert(input_cursor == face_count); |
348 | assert(output_triangle == face_count); |
349 | } |
350 | |
351 | void meshopt_optimizeVertexCache(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count) |
352 | { |
353 | meshopt_optimizeVertexCacheTable(destination, indices, index_count, vertex_count, &meshopt::kVertexScoreTable); |
354 | } |
355 | |
356 | void meshopt_optimizeVertexCacheStrip(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count) |
357 | { |
358 | meshopt_optimizeVertexCacheTable(destination, indices, index_count, vertex_count, &meshopt::kVertexScoreTableStrip); |
359 | } |
360 | |
361 | void meshopt_optimizeVertexCacheFifo(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size) |
362 | { |
363 | using namespace meshopt; |
364 | |
365 | assert(index_count % 3 == 0); |
366 | assert(cache_size >= 3); |
367 | |
368 | meshopt_Allocator allocator; |
369 | |
370 | // guard for empty meshes |
371 | if (index_count == 0 || vertex_count == 0) |
372 | return; |
373 | |
374 | // support in-place optimization |
375 | if (destination == indices) |
376 | { |
377 | unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count); |
378 | memcpy(indices_copy, indices, index_count * sizeof(unsigned int)); |
379 | indices = indices_copy; |
380 | } |
381 | |
382 | size_t face_count = index_count / 3; |
383 | |
384 | // build adjacency information |
385 | TriangleAdjacency adjacency = {}; |
386 | buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator); |
387 | |
388 | // live triangle counts |
389 | unsigned int* live_triangles = allocator.allocate<unsigned int>(vertex_count); |
390 | memcpy(live_triangles, adjacency.counts, vertex_count * sizeof(unsigned int)); |
391 | |
392 | // cache time stamps |
393 | unsigned int* cache_timestamps = allocator.allocate<unsigned int>(vertex_count); |
394 | memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int)); |
395 | |
396 | // dead-end stack |
397 | unsigned int* dead_end = allocator.allocate<unsigned int>(index_count); |
398 | unsigned int dead_end_top = 0; |
399 | |
400 | // emitted flags |
401 | unsigned char* emitted_flags = allocator.allocate<unsigned char>(face_count); |
402 | memset(emitted_flags, 0, face_count); |
403 | |
404 | unsigned int current_vertex = 0; |
405 | |
406 | unsigned int timestamp = cache_size + 1; |
407 | unsigned int input_cursor = 1; // vertex to restart from in case of dead-end |
408 | |
409 | unsigned int output_triangle = 0; |
410 | |
411 | while (current_vertex != ~0u) |
412 | { |
413 | const unsigned int* next_candidates_begin = &dead_end[0] + dead_end_top; |
414 | |
415 | // emit all vertex neighbors |
416 | const unsigned int* neighbors_begin = &adjacency.data[0] + adjacency.offsets[current_vertex]; |
417 | const unsigned int* neighbors_end = neighbors_begin + adjacency.counts[current_vertex]; |
418 | |
419 | for (const unsigned int* it = neighbors_begin; it != neighbors_end; ++it) |
420 | { |
421 | unsigned int triangle = *it; |
422 | |
423 | if (!emitted_flags[triangle]) |
424 | { |
425 | unsigned int a = indices[triangle * 3 + 0], b = indices[triangle * 3 + 1], c = indices[triangle * 3 + 2]; |
426 | |
427 | // output indices |
428 | destination[output_triangle * 3 + 0] = a; |
429 | destination[output_triangle * 3 + 1] = b; |
430 | destination[output_triangle * 3 + 2] = c; |
431 | output_triangle++; |
432 | |
433 | // update dead-end stack |
434 | dead_end[dead_end_top + 0] = a; |
435 | dead_end[dead_end_top + 1] = b; |
436 | dead_end[dead_end_top + 2] = c; |
437 | dead_end_top += 3; |
438 | |
439 | // update live triangle counts |
440 | live_triangles[a]--; |
441 | live_triangles[b]--; |
442 | live_triangles[c]--; |
443 | |
444 | // update cache info |
445 | // if vertex is not in cache, put it in cache |
446 | if (timestamp - cache_timestamps[a] > cache_size) |
447 | cache_timestamps[a] = timestamp++; |
448 | |
449 | if (timestamp - cache_timestamps[b] > cache_size) |
450 | cache_timestamps[b] = timestamp++; |
451 | |
452 | if (timestamp - cache_timestamps[c] > cache_size) |
453 | cache_timestamps[c] = timestamp++; |
454 | |
455 | // update emitted flags |
456 | emitted_flags[triangle] = true; |
457 | } |
458 | } |
459 | |
460 | // next candidates are the ones we pushed to dead-end stack just now |
461 | const unsigned int* next_candidates_end = &dead_end[0] + dead_end_top; |
462 | |
463 | // get next vertex |
464 | current_vertex = getNextVertexNeighbor(next_candidates_begin, next_candidates_end, &live_triangles[0], &cache_timestamps[0], timestamp, cache_size); |
465 | |
466 | if (current_vertex == ~0u) |
467 | { |
468 | current_vertex = getNextVertexDeadEnd(&dead_end[0], dead_end_top, input_cursor, &live_triangles[0], vertex_count); |
469 | } |
470 | } |
471 | |
472 | assert(output_triangle == face_count); |
473 | } |
474 | |