overdrawoptimizer.cpp source code [Godot/thirdparty/meshoptimizer/overdrawoptimizer.cpp]

1	// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
2	#include "meshoptimizer.h"
3
4	#include <assert.h>
5	#include <math.h>
6	#include <string.h>
7
8	// This work is based on:
9	// Pedro Sander, Diego Nehab and Joshua Barczak. Fast Triangle Reordering for Vertex Locality and Reduced Overdraw. 2007
10	namespace meshopt
11	{
12
13	static void calculateSortData(float* sort_data, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_positions_stride, const unsigned int* clusters, size_t cluster_count)
14	{
15	size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
16
17	float mesh_centroid[`3`] = {};
18
19	for (size_t i = `0`; i < index_count; ++i)
20	{
21	const float* p = vertex_positions + vertex_stride_float * indices[i];
22
23	mesh_centroid[`0`] += p[`0`];
24	mesh_centroid[`1`] += p[`1`];
25	mesh_centroid[`2`] += p[`2`];
26	}
27
28	mesh_centroid[`0`] /= index_count;
29	mesh_centroid[`1`] /= index_count;
30	mesh_centroid[`2`] /= index_count;
31
32	for (size_t cluster = `0`; cluster < cluster_count; ++cluster)
33	{
34	size_t cluster_begin = clusters[cluster] * `3`;
35	size_t cluster_end = (cluster + `1` < cluster_count) ? clusters[cluster + `1`] * `3` : index_count;
36	assert(cluster_begin < cluster_end);
37
38	float cluster_area = `0`;
39	float cluster_centroid[`3`] = {};
40	float cluster_normal[`3`] = {};
41
42	for (size_t i = cluster_begin; i < cluster_end; i += `3`)
43	{
44	const float* p0 = vertex_positions + vertex_stride_float * indices[i + `0`];
45	const float* p1 = vertex_positions + vertex_stride_float * indices[i + `1`];
46	const float* p2 = vertex_positions + vertex_stride_float * indices[i + `2`];
47
48	float p10[`3`] = {p1[`0`] - p0[`0`], p1[`1`] - p0[`1`], p1[`2`] - p0[`2`]};
49	float p20[`3`] = {p2[`0`] - p0[`0`], p2[`1`] - p0[`1`], p2[`2`] - p0[`2`]};
50
51	float normalx = p10[`1`] * p20[`2`] - p10[`2`] * p20[`1`];
52	float normaly = p10[`2`] * p20[`0`] - p10[`0`] * p20[`2`];
53	float normalz = p10[`0`] * p20[`1`] - p10[`1`] * p20[`0`];
54
55	float area = sqrtf(normalx * normalx + normaly * normaly + normalz * normalz);
56
57	cluster_centroid[`0`] += (p0[`0`] + p1[`0`] + p2[`0`]) * (area / `3`);
58	cluster_centroid[`1`] += (p0[`1`] + p1[`1`] + p2[`1`]) * (area / `3`);
59	cluster_centroid[`2`] += (p0[`2`] + p1[`2`] + p2[`2`]) * (area / `3`);
60	cluster_normal[`0`] += normalx;
61	cluster_normal[`1`] += normaly;
62	cluster_normal[`2`] += normalz;
63	cluster_area += area;
64	}
65
66	float inv_cluster_area = cluster_area == `0` ? `0` : `1` / cluster_area;
67
68	cluster_centroid[`0`] *= inv_cluster_area;
69	cluster_centroid[`1`] *= inv_cluster_area;
70	cluster_centroid[`2`] *= inv_cluster_area;
71
72	float cluster_normal_length = sqrtf(cluster_normal[`0`] * cluster_normal[`0`] + cluster_normal[`1`] * cluster_normal[`1`] + cluster_normal[`2`] * cluster_normal[`2`]);
73	float inv_cluster_normal_length = cluster_normal_length == `0` ? `0` : `1` / cluster_normal_length;
74
75	cluster_normal[`0`] *= inv_cluster_normal_length;
76	cluster_normal[`1`] *= inv_cluster_normal_length;
77	cluster_normal[`2`] *= inv_cluster_normal_length;
78
79	float centroid_vector[`3`] = {cluster_centroid[`0`] - mesh_centroid[`0`], cluster_centroid[`1`] - mesh_centroid[`1`], cluster_centroid[`2`] - mesh_centroid[`2`]};
80
81	sort_data[cluster] = centroid_vector[`0`] * cluster_normal[`0`] + centroid_vector[`1`] * cluster_normal[`1`] + centroid_vector[`2`] * cluster_normal[`2`];
82	}
83	}
84
85	static void calculateSortOrderRadix(unsigned int* sort_order, const float* sort_data, unsigned short* sort_keys, size_t cluster_count)
86	{
87	// compute sort data bounds and renormalize, using fixed point snorm
88	float sort_data_max = `1e-3f`;
89
90	for (size_t i = `0`; i < cluster_count; ++i)
91	{
92	float dpa = fabsf(sort_data[i]);
93
94	sort_data_max = (sort_data_max < dpa) ? dpa : sort_data_max;
95	}
96
97	const int sort_bits = `11`;
98
99	for (size_t i = `0`; i < cluster_count; ++i)
100	{
101	// note that we flip distribution since high dot product should come first
102	float sort_key = `0.5f` - `0.5f` * (sort_data[i] / sort_data_max);
103
104	sort_keys[i] = meshopt_quantizeUnorm(sort_key, sort_bits) & ((`1` << sort_bits) - `1`);
105	}
106
107	// fill histogram for counting sort
108	unsigned int histogram[`1` << sort_bits];
109	memset(histogram, `0`, sizeof(histogram));
110
111	for (size_t i = `0`; i < cluster_count; ++i)
112	{
113	histogram[sort_keys[i]]++;
114	}
115
116	// compute offsets based on histogram data
117	size_t histogram_sum = `0`;
118
119	for (size_t i = `0`; i < `1` << sort_bits; ++i)
120	{
121	size_t count = histogram[i];
122	histogram[i] = unsigned(histogram_sum);
123	histogram_sum += count;
124	}
125
126	assert(histogram_sum == cluster_count);
127
128	// compute sort order based on offsets
129	for (size_t i = `0`; i < cluster_count; ++i)
130	{
131	sort_order[histogram[sort_keys[i]]++] = unsigned(i);
132	}
133	}
134
135	static unsigned int updateCache(unsigned int a, unsigned int b, unsigned int c, unsigned int cache_size, unsigned int* cache_timestamps, unsigned int& timestamp)
136	{
137	unsigned int cache_misses = `0`;
138
139	// if vertex is not in cache, put it in cache
140	if (timestamp - cache_timestamps[a] > cache_size)
141	{
142	cache_timestamps[a] = timestamp++;
143	cache_misses++;
144	}
145
146	if (timestamp - cache_timestamps[b] > cache_size)
147	{
148	cache_timestamps[b] = timestamp++;
149	cache_misses++;
150	}
151
152	if (timestamp - cache_timestamps[c] > cache_size)
153	{
154	cache_timestamps[c] = timestamp++;
155	cache_misses++;
156	}
157
158	return cache_misses;
159	}
160
161	static size_t generateHardBoundaries(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int* cache_timestamps)
162	{
163	memset(cache_timestamps, `0`, vertex_count * sizeof(unsigned int));
164
165	unsigned int timestamp = cache_size + `1`;
166
167	size_t face_count = index_count / `3`;
168
169	size_t result = `0`;
170
171	for (size_t i = `0`; i < face_count; ++i)
172	{
173	unsigned int m = updateCache(indices[i * `3` + `0`], indices[i * `3` + `1`], indices[i * `3` + `2`], cache_size, &cache_timestamps[`0`], timestamp);
174
175	// when all three vertices are not in the cache it's usually relatively safe to assume that this is a new patch in the mesh
176	// that is disjoint from previous vertices; sometimes it might come back to reference existing vertices but that frequently
177	// suggests an inefficiency in the vertex cache optimization algorithm
178	// usually the first triangle has 3 misses unless it's degenerate - thus we make sure the first cluster always starts with 0
179	if (i == `0` \|\| m == `3`)
180	{
181	destination[result++] = unsigned(i);
182	}
183	}
184
185	assert(result <= index_count / `3`);
186
187	return result;
188	}
189
190	static size_t generateSoftBoundaries(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const unsigned int* clusters, size_t cluster_count, unsigned int cache_size, float threshold, unsigned int* cache_timestamps)
191	{
192	memset(cache_timestamps, `0`, vertex_count * sizeof(unsigned int));
193
194	unsigned int timestamp = `0`;
195
196	size_t result = `0`;
197
198	for (size_t it = `0`; it < cluster_count; ++it)
199	{
200	size_t start = clusters[it];
201	size_t end = (it + `1` < cluster_count) ? clusters[it + `1`] : index_count / `3`;
202	assert(start < end);
203
204	// reset cache
205	timestamp += cache_size + `1`;
206
207	// measure cluster ACMR
208	unsigned int cluster_misses = `0`;
209
210	for (size_t i = start; i < end; ++i)
211	{
212	unsigned int m = updateCache(indices[i * `3` + `0`], indices[i * `3` + `1`], indices[i * `3` + `2`], cache_size, &cache_timestamps[`0`], timestamp);
213
214	cluster_misses += m;
215	}
216
217	float cluster_threshold = threshold * (float(cluster_misses) / float(end - start));
218
219	// first cluster always starts from the hard cluster boundary
220	destination[result++] = unsigned(start);
221
222	// reset cache
223	timestamp += cache_size + `1`;
224
225	unsigned int running_misses = `0`;
226	unsigned int running_faces = `0`;
227
228	for (size_t i = start; i < end; ++i)
229	{
230	unsigned int m = updateCache(indices[i * `3` + `0`], indices[i * `3` + `1`], indices[i * `3` + `2`], cache_size, &cache_timestamps[`0`], timestamp);
231
232	running_misses += m;
233	running_faces += `1`;
234
235	if (float(running_misses) / float(running_faces) <= cluster_threshold)
236	{
237	// we have reached the target ACMR with the current triangle so we need to start a new cluster on the next one
238	// note that this may mean that we add 'end` to destination for the last triangle, which will imply that the last
239	// cluster is empty; however, the 'pop_back' after the loop will clean it up
240	destination[result++] = unsigned(i + `1`);
241
242	// reset cache
243	timestamp += cache_size + `1`;
244
245	running_misses = `0`;
246	running_faces = `0`;
247	}
248	}
249
250	// each time we reach the target ACMR we flush the cluster
251	// this means that the last cluster is by definition not very good - there are frequent cases where we are left with a few triangles
252	// in the last cluster, producing a very bad ACMR and significantly penalizing the overall results
253	// thus we remove the last cluster boundary, merging the last complete cluster with the last incomplete one
254	// there are sometimes cases when the last cluster is actually good enough - in which case the code above would have added 'end'
255	// to the cluster boundary array which we need to remove anyway - this code will do that automatically
256	if (destination[result - `1`] != start)
257	{
258	result--;
259	}
260	}
261
262	assert(result >= cluster_count);
263	assert(result <= index_count / `3`);
264
265	return result;
266	}
267
268	} // namespace meshopt
269
270	void meshopt_optimizeOverdraw(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float threshold)
271	{
272	using namespace meshopt;
273
274	assert(index_count % `3` == `0`);
275	assert(vertex_positions_stride >= `12` && vertex_positions_stride <= `256`);
276	assert(vertex_positions_stride % sizeof(float) == `0`);
277
278	meshopt_Allocator allocator;
279
280	// guard for empty meshes
281	if (index_count == `0` \|\| vertex_count == `0`)
282	return;
283
284	// support in-place optimization
285	if (destination == indices)
286	{
287	unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count);
288	memcpy(indices_copy, indices, index_count * sizeof(unsigned int));
289	indices = indices_copy;
290	}
291
292	unsigned int cache_size = `16`;
293
294	unsigned int* cache_timestamps = allocator.allocate<unsigned int>(vertex_count);
295
296	// generate hard boundaries from full-triangle cache misses
297	unsigned int* hard_clusters = allocator.allocate<unsigned int>(index_count / `3`);
298	size_t hard_cluster_count = generateHardBoundaries(hard_clusters, indices, index_count, vertex_count, cache_size, cache_timestamps);
299
300	// generate soft boundaries
301	unsigned int* soft_clusters = allocator.allocate<unsigned int>(index_count / `3` + `1`);
302	size_t soft_cluster_count = generateSoftBoundaries(soft_clusters, indices, index_count, vertex_count, hard_clusters, hard_cluster_count, cache_size, threshold, cache_timestamps);
303
304	const unsigned int* clusters = soft_clusters;
305	size_t cluster_count = soft_cluster_count;
306
307	// fill sort data
308	float* sort_data = allocator.allocate<float>(cluster_count);
309	calculateSortData(sort_data, indices, index_count, vertex_positions, vertex_positions_stride, clusters, cluster_count);
310
311	// sort clusters using sort data
312	unsigned short* sort_keys = allocator.allocate<unsigned short>(cluster_count);
313	unsigned int* sort_order = allocator.allocate<unsigned int>(cluster_count);
314	calculateSortOrderRadix(sort_order, sort_data, sort_keys, cluster_count);
315
316	// fill output buffer
317	size_t offset = `0`;
318
319	for (size_t it = `0`; it < cluster_count; ++it)
320	{
321	unsigned int cluster = sort_order[it];
322	assert(cluster < cluster_count);
323
324	size_t cluster_begin = clusters[cluster] * `3`;
325	size_t cluster_end = (cluster + `1` < cluster_count) ? clusters[cluster + `1`] * `3` : index_count;
326	assert(cluster_begin < cluster_end);
327
328	memcpy(destination + offset, indices + cluster_begin, (cluster_end - cluster_begin) * sizeof(unsigned int));
329	offset += cluster_end - cluster_begin;
330	}
331
332	assert(offset == index_count);
333	}
334

Browse the source code of Godot/thirdparty/meshoptimizer/overdrawoptimizer.cpp