1// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
2#include "meshoptimizer.h"
3
4#include <assert.h>
5#include <string.h>
6
7// This work is based on:
8// Fabian Giesen. Simple lossless index buffer compression & follow-up. 2013
9// Conor Stokes. Vertex Cache Optimised Index Buffer Compression. 2014
10namespace meshopt
11{
12
13const unsigned char kIndexHeader = 0xe0;
14const unsigned char kSequenceHeader = 0xd0;
15
16static int gEncodeIndexVersion = 0;
17
18typedef unsigned int VertexFifo[16];
19typedef unsigned int EdgeFifo[16][2];
20
21static const unsigned int kTriangleIndexOrder[3][3] = {
22 {0, 1, 2},
23 {1, 2, 0},
24 {2, 0, 1},
25};
26
27static const unsigned char kCodeAuxEncodingTable[16] = {
28 0x00, 0x76, 0x87, 0x56, 0x67, 0x78, 0xa9, 0x86, 0x65, 0x89, 0x68, 0x98, 0x01, 0x69,
29 0, 0, // last two entries aren't used for encoding
30};
31
32static int rotateTriangle(unsigned int a, unsigned int b, unsigned int c, unsigned int next)
33{
34 (void)a;
35
36 return (b == next) ? 1 : (c == next) ? 2 : 0;
37}
38
39static int getEdgeFifo(EdgeFifo fifo, unsigned int a, unsigned int b, unsigned int c, size_t offset)
40{
41 for (int i = 0; i < 16; ++i)
42 {
43 size_t index = (offset - 1 - i) & 15;
44
45 unsigned int e0 = fifo[index][0];
46 unsigned int e1 = fifo[index][1];
47
48 if (e0 == a && e1 == b)
49 return (i << 2) | 0;
50 if (e0 == b && e1 == c)
51 return (i << 2) | 1;
52 if (e0 == c && e1 == a)
53 return (i << 2) | 2;
54 }
55
56 return -1;
57}
58
59static void pushEdgeFifo(EdgeFifo fifo, unsigned int a, unsigned int b, size_t& offset)
60{
61 fifo[offset][0] = a;
62 fifo[offset][1] = b;
63 offset = (offset + 1) & 15;
64}
65
66static int getVertexFifo(VertexFifo fifo, unsigned int v, size_t offset)
67{
68 for (int i = 0; i < 16; ++i)
69 {
70 size_t index = (offset - 1 - i) & 15;
71
72 if (fifo[index] == v)
73 return i;
74 }
75
76 return -1;
77}
78
79static void pushVertexFifo(VertexFifo fifo, unsigned int v, size_t& offset, int cond = 1)
80{
81 fifo[offset] = v;
82 offset = (offset + cond) & 15;
83}
84
85static void encodeVByte(unsigned char*& data, unsigned int v)
86{
87 // encode 32-bit value in up to 5 7-bit groups
88 do
89 {
90 *data++ = (v & 127) | (v > 127 ? 128 : 0);
91 v >>= 7;
92 } while (v);
93}
94
95static unsigned int decodeVByte(const unsigned char*& data)
96{
97 unsigned char lead = *data++;
98
99 // fast path: single byte
100 if (lead < 128)
101 return lead;
102
103 // slow path: up to 4 extra bytes
104 // note that this loop always terminates, which is important for malformed data
105 unsigned int result = lead & 127;
106 unsigned int shift = 7;
107
108 for (int i = 0; i < 4; ++i)
109 {
110 unsigned char group = *data++;
111 result |= unsigned(group & 127) << shift;
112 shift += 7;
113
114 if (group < 128)
115 break;
116 }
117
118 return result;
119}
120
121static void encodeIndex(unsigned char*& data, unsigned int index, unsigned int last)
122{
123 unsigned int d = index - last;
124 unsigned int v = (d << 1) ^ (int(d) >> 31);
125
126 encodeVByte(data, v);
127}
128
129static unsigned int decodeIndex(const unsigned char*& data, unsigned int last)
130{
131 unsigned int v = decodeVByte(data);
132 unsigned int d = (v >> 1) ^ -int(v & 1);
133
134 return last + d;
135}
136
137static int getCodeAuxIndex(unsigned char v, const unsigned char* table)
138{
139 for (int i = 0; i < 16; ++i)
140 if (table[i] == v)
141 return i;
142
143 return -1;
144}
145
146static void writeTriangle(void* destination, size_t offset, size_t index_size, unsigned int a, unsigned int b, unsigned int c)
147{
148 if (index_size == 2)
149 {
150 static_cast<unsigned short*>(destination)[offset + 0] = (unsigned short)(a);
151 static_cast<unsigned short*>(destination)[offset + 1] = (unsigned short)(b);
152 static_cast<unsigned short*>(destination)[offset + 2] = (unsigned short)(c);
153 }
154 else
155 {
156 static_cast<unsigned int*>(destination)[offset + 0] = a;
157 static_cast<unsigned int*>(destination)[offset + 1] = b;
158 static_cast<unsigned int*>(destination)[offset + 2] = c;
159 }
160}
161
162} // namespace meshopt
163
164size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count)
165{
166 using namespace meshopt;
167
168 assert(index_count % 3 == 0);
169
170 // the minimum valid encoding is header, 1 byte per triangle and a 16-byte codeaux table
171 if (buffer_size < 1 + index_count / 3 + 16)
172 return 0;
173
174 int version = gEncodeIndexVersion;
175
176 buffer[0] = (unsigned char)(kIndexHeader | version);
177
178 EdgeFifo edgefifo;
179 memset(edgefifo, -1, sizeof(edgefifo));
180
181 VertexFifo vertexfifo;
182 memset(vertexfifo, -1, sizeof(vertexfifo));
183
184 size_t edgefifooffset = 0;
185 size_t vertexfifooffset = 0;
186
187 unsigned int next = 0;
188 unsigned int last = 0;
189
190 unsigned char* code = buffer + 1;
191 unsigned char* data = code + index_count / 3;
192 unsigned char* data_safe_end = buffer + buffer_size - 16;
193
194 int fecmax = version >= 1 ? 13 : 15;
195
196 // use static encoding table; it's possible to pack the result and then build an optimal table and repack
197 // for now we keep it simple and use the table that has been generated based on symbol frequency on a training mesh set
198 const unsigned char* codeaux_table = kCodeAuxEncodingTable;
199
200 for (size_t i = 0; i < index_count; i += 3)
201 {
202 // make sure we have enough space to write a triangle
203 // each triangle writes at most 16 bytes: 1b for codeaux and 5b for each free index
204 // after this we can be sure we can write without extra bounds checks
205 if (data > data_safe_end)
206 return 0;
207
208 int fer = getEdgeFifo(edgefifo, indices[i + 0], indices[i + 1], indices[i + 2], edgefifooffset);
209
210 if (fer >= 0 && (fer >> 2) < 15)
211 {
212 const unsigned int* order = kTriangleIndexOrder[fer & 3];
213
214 unsigned int a = indices[i + order[0]], b = indices[i + order[1]], c = indices[i + order[2]];
215
216 // encode edge index and vertex fifo index, next or free index
217 int fe = fer >> 2;
218 int fc = getVertexFifo(vertexfifo, c, vertexfifooffset);
219
220 int fec = (fc >= 1 && fc < fecmax) ? fc : (c == next) ? (next++, 0) : 15;
221
222 if (fec == 15 && version >= 1)
223 {
224 // encode last-1 and last+1 to optimize strip-like sequences
225 if (c + 1 == last)
226 fec = 13, last = c;
227 if (c == last + 1)
228 fec = 14, last = c;
229 }
230
231 *code++ = (unsigned char)((fe << 4) | fec);
232
233 // note that we need to update the last index since free indices are delta-encoded
234 if (fec == 15)
235 encodeIndex(data, c, last), last = c;
236
237 // we only need to push third vertex since first two are likely already in the vertex fifo
238 if (fec == 0 || fec >= fecmax)
239 pushVertexFifo(vertexfifo, c, vertexfifooffset);
240
241 // we only need to push two new edges to edge fifo since the third one is already there
242 pushEdgeFifo(edgefifo, c, b, edgefifooffset);
243 pushEdgeFifo(edgefifo, a, c, edgefifooffset);
244 }
245 else
246 {
247 int rotation = rotateTriangle(indices[i + 0], indices[i + 1], indices[i + 2], next);
248 const unsigned int* order = kTriangleIndexOrder[rotation];
249
250 unsigned int a = indices[i + order[0]], b = indices[i + order[1]], c = indices[i + order[2]];
251
252 // if a/b/c are 0/1/2, we emit a reset code
253 bool reset = false;
254
255 if (a == 0 && b == 1 && c == 2 && next > 0 && version >= 1)
256 {
257 reset = true;
258 next = 0;
259
260 // reset vertex fifo to make sure we don't accidentally reference vertices from that in the future
261 // this makes sure next continues to get incremented instead of being stuck
262 memset(vertexfifo, -1, sizeof(vertexfifo));
263 }
264
265 int fb = getVertexFifo(vertexfifo, b, vertexfifooffset);
266 int fc = getVertexFifo(vertexfifo, c, vertexfifooffset);
267
268 // after rotation, a is almost always equal to next, so we don't waste bits on FIFO encoding for a
269 int fea = (a == next) ? (next++, 0) : 15;
270 int feb = (fb >= 0 && fb < 14) ? (fb + 1) : (b == next) ? (next++, 0) : 15;
271 int fec = (fc >= 0 && fc < 14) ? (fc + 1) : (c == next) ? (next++, 0) : 15;
272
273 // we encode feb & fec in 4 bits using a table if possible, and as a full byte otherwise
274 unsigned char codeaux = (unsigned char)((feb << 4) | fec);
275 int codeauxindex = getCodeAuxIndex(codeaux, codeaux_table);
276
277 // <14 encodes an index into codeaux table, 14 encodes fea=0, 15 encodes fea=15
278 if (fea == 0 && codeauxindex >= 0 && codeauxindex < 14 && !reset)
279 {
280 *code++ = (unsigned char)((15 << 4) | codeauxindex);
281 }
282 else
283 {
284 *code++ = (unsigned char)((15 << 4) | 14 | fea);
285 *data++ = codeaux;
286 }
287
288 // note that we need to update the last index since free indices are delta-encoded
289 if (fea == 15)
290 encodeIndex(data, a, last), last = a;
291
292 if (feb == 15)
293 encodeIndex(data, b, last), last = b;
294
295 if (fec == 15)
296 encodeIndex(data, c, last), last = c;
297
298 // only push vertices that weren't already in fifo
299 if (fea == 0 || fea == 15)
300 pushVertexFifo(vertexfifo, a, vertexfifooffset);
301
302 if (feb == 0 || feb == 15)
303 pushVertexFifo(vertexfifo, b, vertexfifooffset);
304
305 if (fec == 0 || fec == 15)
306 pushVertexFifo(vertexfifo, c, vertexfifooffset);
307
308 // all three edges aren't in the fifo; pushing all of them is important so that we can match them for later triangles
309 pushEdgeFifo(edgefifo, b, a, edgefifooffset);
310 pushEdgeFifo(edgefifo, c, b, edgefifooffset);
311 pushEdgeFifo(edgefifo, a, c, edgefifooffset);
312 }
313 }
314
315 // make sure we have enough space to write codeaux table
316 if (data > data_safe_end)
317 return 0;
318
319 // add codeaux encoding table to the end of the stream; this is used for decoding codeaux *and* as padding
320 // we need padding for decoding to be able to assume that each triangle is encoded as <= 16 bytes of extra data
321 // this is enough space for aux byte + 5 bytes per varint index which is the absolute worst case for any input
322 for (size_t i = 0; i < 16; ++i)
323 {
324 // decoder assumes that table entries never refer to separately encoded indices
325 assert((codeaux_table[i] & 0xf) != 0xf && (codeaux_table[i] >> 4) != 0xf);
326
327 *data++ = codeaux_table[i];
328 }
329
330 // since we encode restarts as codeaux without a table reference, we need to make sure 00 is encoded as a table reference
331 assert(codeaux_table[0] == 0);
332
333 assert(data >= buffer + index_count / 3 + 16);
334 assert(data <= buffer + buffer_size);
335
336 return data - buffer;
337}
338
339size_t meshopt_encodeIndexBufferBound(size_t index_count, size_t vertex_count)
340{
341 assert(index_count % 3 == 0);
342
343 // compute number of bits required for each index
344 unsigned int vertex_bits = 1;
345
346 while (vertex_bits < 32 && vertex_count > size_t(1) << vertex_bits)
347 vertex_bits++;
348
349 // worst-case encoding is 2 header bytes + 3 varint-7 encoded index deltas
350 unsigned int vertex_groups = (vertex_bits + 1 + 6) / 7;
351
352 return 1 + (index_count / 3) * (2 + 3 * vertex_groups) + 16;
353}
354
355void meshopt_encodeIndexVersion(int version)
356{
357 assert(unsigned(version) <= 1);
358
359 meshopt::gEncodeIndexVersion = version;
360}
361
362int meshopt_decodeIndexBuffer(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size)
363{
364 using namespace meshopt;
365
366 assert(index_count % 3 == 0);
367 assert(index_size == 2 || index_size == 4);
368
369 // the minimum valid encoding is header, 1 byte per triangle and a 16-byte codeaux table
370 if (buffer_size < 1 + index_count / 3 + 16)
371 return -2;
372
373 if ((buffer[0] & 0xf0) != kIndexHeader)
374 return -1;
375
376 int version = buffer[0] & 0x0f;
377 if (version > 1)
378 return -1;
379
380 EdgeFifo edgefifo;
381 memset(edgefifo, -1, sizeof(edgefifo));
382
383 VertexFifo vertexfifo;
384 memset(vertexfifo, -1, sizeof(vertexfifo));
385
386 size_t edgefifooffset = 0;
387 size_t vertexfifooffset = 0;
388
389 unsigned int next = 0;
390 unsigned int last = 0;
391
392 int fecmax = version >= 1 ? 13 : 15;
393
394 // since we store 16-byte codeaux table at the end, triangle data has to begin before data_safe_end
395 const unsigned char* code = buffer + 1;
396 const unsigned char* data = code + index_count / 3;
397 const unsigned char* data_safe_end = buffer + buffer_size - 16;
398
399 const unsigned char* codeaux_table = data_safe_end;
400
401 for (size_t i = 0; i < index_count; i += 3)
402 {
403 // make sure we have enough data to read for a triangle
404 // each triangle reads at most 16 bytes of data: 1b for codeaux and 5b for each free index
405 // after this we can be sure we can read without extra bounds checks
406 if (data > data_safe_end)
407 return -2;
408
409 unsigned char codetri = *code++;
410
411 if (codetri < 0xf0)
412 {
413 int fe = codetri >> 4;
414
415 // fifo reads are wrapped around 16 entry buffer
416 unsigned int a = edgefifo[(edgefifooffset - 1 - fe) & 15][0];
417 unsigned int b = edgefifo[(edgefifooffset - 1 - fe) & 15][1];
418
419 int fec = codetri & 15;
420
421 // note: this is the most common path in the entire decoder
422 // inside this if we try to stay branchless (by using cmov/etc.) since these aren't predictable
423 if (fec < fecmax)
424 {
425 // fifo reads are wrapped around 16 entry buffer
426 unsigned int cf = vertexfifo[(vertexfifooffset - 1 - fec) & 15];
427 unsigned int c = (fec == 0) ? next : cf;
428
429 int fec0 = fec == 0;
430 next += fec0;
431
432 // output triangle
433 writeTriangle(destination, i, index_size, a, b, c);
434
435 // push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
436 pushVertexFifo(vertexfifo, c, vertexfifooffset, fec0);
437
438 pushEdgeFifo(edgefifo, c, b, edgefifooffset);
439 pushEdgeFifo(edgefifo, a, c, edgefifooffset);
440 }
441 else
442 {
443 unsigned int c = 0;
444
445 // fec - (fec ^ 3) decodes 13, 14 into -1, 1
446 // note that we need to update the last index since free indices are delta-encoded
447 last = c = (fec != 15) ? last + (fec - (fec ^ 3)) : decodeIndex(data, last);
448
449 // output triangle
450 writeTriangle(destination, i, index_size, a, b, c);
451
452 // push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
453 pushVertexFifo(vertexfifo, c, vertexfifooffset);
454
455 pushEdgeFifo(edgefifo, c, b, edgefifooffset);
456 pushEdgeFifo(edgefifo, a, c, edgefifooffset);
457 }
458 }
459 else
460 {
461 // fast path: read codeaux from the table
462 if (codetri < 0xfe)
463 {
464 unsigned char codeaux = codeaux_table[codetri & 15];
465
466 // note: table can't contain feb/fec=15
467 int feb = codeaux >> 4;
468 int fec = codeaux & 15;
469
470 // fifo reads are wrapped around 16 entry buffer
471 // also note that we increment next for all three vertices before decoding indices - this matches encoder behavior
472 unsigned int a = next++;
473
474 unsigned int bf = vertexfifo[(vertexfifooffset - feb) & 15];
475 unsigned int b = (feb == 0) ? next : bf;
476
477 int feb0 = feb == 0;
478 next += feb0;
479
480 unsigned int cf = vertexfifo[(vertexfifooffset - fec) & 15];
481 unsigned int c = (fec == 0) ? next : cf;
482
483 int fec0 = fec == 0;
484 next += fec0;
485
486 // output triangle
487 writeTriangle(destination, i, index_size, a, b, c);
488
489 // push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
490 pushVertexFifo(vertexfifo, a, vertexfifooffset);
491 pushVertexFifo(vertexfifo, b, vertexfifooffset, feb0);
492 pushVertexFifo(vertexfifo, c, vertexfifooffset, fec0);
493
494 pushEdgeFifo(edgefifo, b, a, edgefifooffset);
495 pushEdgeFifo(edgefifo, c, b, edgefifooffset);
496 pushEdgeFifo(edgefifo, a, c, edgefifooffset);
497 }
498 else
499 {
500 // slow path: read a full byte for codeaux instead of using a table lookup
501 unsigned char codeaux = *data++;
502
503 int fea = codetri == 0xfe ? 0 : 15;
504 int feb = codeaux >> 4;
505 int fec = codeaux & 15;
506
507 // reset: codeaux is 0 but encoded as not-a-table
508 if (codeaux == 0)
509 next = 0;
510
511 // fifo reads are wrapped around 16 entry buffer
512 // also note that we increment next for all three vertices before decoding indices - this matches encoder behavior
513 unsigned int a = (fea == 0) ? next++ : 0;
514 unsigned int b = (feb == 0) ? next++ : vertexfifo[(vertexfifooffset - feb) & 15];
515 unsigned int c = (fec == 0) ? next++ : vertexfifo[(vertexfifooffset - fec) & 15];
516
517 // note that we need to update the last index since free indices are delta-encoded
518 if (fea == 15)
519 last = a = decodeIndex(data, last);
520
521 if (feb == 15)
522 last = b = decodeIndex(data, last);
523
524 if (fec == 15)
525 last = c = decodeIndex(data, last);
526
527 // output triangle
528 writeTriangle(destination, i, index_size, a, b, c);
529
530 // push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
531 pushVertexFifo(vertexfifo, a, vertexfifooffset);
532 pushVertexFifo(vertexfifo, b, vertexfifooffset, (feb == 0) | (feb == 15));
533 pushVertexFifo(vertexfifo, c, vertexfifooffset, (fec == 0) | (fec == 15));
534
535 pushEdgeFifo(edgefifo, b, a, edgefifooffset);
536 pushEdgeFifo(edgefifo, c, b, edgefifooffset);
537 pushEdgeFifo(edgefifo, a, c, edgefifooffset);
538 }
539 }
540 }
541
542 // we should've read all data bytes and stopped at the boundary between data and codeaux table
543 if (data != data_safe_end)
544 return -3;
545
546 return 0;
547}
548
549size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count)
550{
551 using namespace meshopt;
552
553 // the minimum valid encoding is header, 1 byte per index and a 4-byte tail
554 if (buffer_size < 1 + index_count + 4)
555 return 0;
556
557 int version = gEncodeIndexVersion;
558
559 buffer[0] = (unsigned char)(kSequenceHeader | version);
560
561 unsigned int last[2] = {};
562 unsigned int current = 0;
563
564 unsigned char* data = buffer + 1;
565 unsigned char* data_safe_end = buffer + buffer_size - 4;
566
567 for (size_t i = 0; i < index_count; ++i)
568 {
569 // make sure we have enough data to write
570 // each index writes at most 5 bytes of data; there's a 4 byte tail after data_safe_end
571 // after this we can be sure we can write without extra bounds checks
572 if (data >= data_safe_end)
573 return 0;
574
575 unsigned int index = indices[i];
576
577 // this is a heuristic that switches between baselines when the delta grows too large
578 // we want the encoded delta to fit into one byte (7 bits), but 2 bits are used for sign and baseline index
579 // for now we immediately switch the baseline when delta grows too large - this can be adjusted arbitrarily
580 int cd = int(index - last[current]);
581 current ^= ((cd < 0 ? -cd : cd) >= 30);
582
583 // encode delta from the last index
584 unsigned int d = index - last[current];
585 unsigned int v = (d << 1) ^ (int(d) >> 31);
586
587 // note: low bit encodes the index of the last baseline which will be used for reconstruction
588 encodeVByte(data, (v << 1) | current);
589
590 // update last for the next iteration that uses it
591 last[current] = index;
592 }
593
594 // make sure we have enough space to write tail
595 if (data > data_safe_end)
596 return 0;
597
598 for (int k = 0; k < 4; ++k)
599 *data++ = 0;
600
601 return data - buffer;
602}
603
604size_t meshopt_encodeIndexSequenceBound(size_t index_count, size_t vertex_count)
605{
606 // compute number of bits required for each index
607 unsigned int vertex_bits = 1;
608
609 while (vertex_bits < 32 && vertex_count > size_t(1) << vertex_bits)
610 vertex_bits++;
611
612 // worst-case encoding is 1 varint-7 encoded index delta for a K bit value and an extra bit
613 unsigned int vertex_groups = (vertex_bits + 1 + 1 + 6) / 7;
614
615 return 1 + index_count * vertex_groups + 4;
616}
617
618int meshopt_decodeIndexSequence(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size)
619{
620 using namespace meshopt;
621
622 // the minimum valid encoding is header, 1 byte per index and a 4-byte tail
623 if (buffer_size < 1 + index_count + 4)
624 return -2;
625
626 if ((buffer[0] & 0xf0) != kSequenceHeader)
627 return -1;
628
629 int version = buffer[0] & 0x0f;
630 if (version > 1)
631 return -1;
632
633 const unsigned char* data = buffer + 1;
634 const unsigned char* data_safe_end = buffer + buffer_size - 4;
635
636 unsigned int last[2] = {};
637
638 for (size_t i = 0; i < index_count; ++i)
639 {
640 // make sure we have enough data to read
641 // each index reads at most 5 bytes of data; there's a 4 byte tail after data_safe_end
642 // after this we can be sure we can read without extra bounds checks
643 if (data >= data_safe_end)
644 return -2;
645
646 unsigned int v = decodeVByte(data);
647
648 // decode the index of the last baseline
649 unsigned int current = v & 1;
650 v >>= 1;
651
652 // reconstruct index as a delta
653 unsigned int d = (v >> 1) ^ -int(v & 1);
654 unsigned int index = last[current] + d;
655
656 // update last for the next iteration that uses it
657 last[current] = index;
658
659 if (index_size == 2)
660 {
661 static_cast<unsigned short*>(destination)[i] = (unsigned short)(index);
662 }
663 else
664 {
665 static_cast<unsigned int*>(destination)[i] = index;
666 }
667 }
668
669 // we should've read all data bytes and stopped at the boundary between data and tail
670 if (data != data_safe_end)
671 return -3;
672
673 return 0;
674}
675