1#ifndef MINIMP3_H
2#define MINIMP3_H
3/*
4 https://github.com/lieff/minimp3
5 To the extent possible under law, the author(s) have dedicated all copyright and related and neighboring rights to this software to the public domain worldwide.
6 This software is distributed without any warranty.
7 See <http://creativecommons.org/publicdomain/zero/1.0/>.
8*/
9#include <stdint.h>
10
11#define MINIMP3_MAX_SAMPLES_PER_FRAME (1152*2)
12
13typedef struct
14{
15 int frame_bytes, frame_offset, channels, hz, layer, bitrate_kbps;
16} mp3dec_frame_info_t;
17
18typedef struct
19{
20 float mdct_overlap[2][9*32], qmf_state[15*2*32];
21 int reserv, free_format_bytes;
22 unsigned char header[4], reserv_buf[511];
23} mp3dec_t;
24
25#ifdef __cplusplus
26extern "C" {
27#endif /* __cplusplus */
28
29void mp3dec_init(mp3dec_t *dec);
30#ifndef MINIMP3_FLOAT_OUTPUT
31typedef int16_t mp3d_sample_t;
32#else /* MINIMP3_FLOAT_OUTPUT */
33typedef float mp3d_sample_t;
34void mp3dec_f32_to_s16(const float *in, int16_t *out, int num_samples);
35#endif /* MINIMP3_FLOAT_OUTPUT */
36int mp3dec_decode_frame(mp3dec_t *dec, const uint8_t *mp3, int mp3_bytes, mp3d_sample_t *pcm, mp3dec_frame_info_t *info);
37
38#ifdef __cplusplus
39}
40#endif /* __cplusplus */
41
42#endif /* MINIMP3_H */
43#if defined(MINIMP3_IMPLEMENTATION) && !defined(_MINIMP3_IMPLEMENTATION_GUARD)
44#define _MINIMP3_IMPLEMENTATION_GUARD
45
46#include <stdlib.h>
47#include <string.h>
48
49#define MAX_FREE_FORMAT_FRAME_SIZE 2304 /* more than ISO spec's */
50#ifndef MAX_FRAME_SYNC_MATCHES
51#define MAX_FRAME_SYNC_MATCHES 10
52#endif /* MAX_FRAME_SYNC_MATCHES */
53
54#define MAX_L3_FRAME_PAYLOAD_BYTES MAX_FREE_FORMAT_FRAME_SIZE /* MUST be >= 320000/8/32000*1152 = 1440 */
55
56#define MAX_BITRESERVOIR_BYTES 511
57#define SHORT_BLOCK_TYPE 2
58#define STOP_BLOCK_TYPE 3
59#define MODE_MONO 3
60#define MODE_JOINT_STEREO 1
61#define HDR_SIZE 4
62#define HDR_IS_MONO(h) (((h[3]) & 0xC0) == 0xC0)
63#define HDR_IS_MS_STEREO(h) (((h[3]) & 0xE0) == 0x60)
64#define HDR_IS_FREE_FORMAT(h) (((h[2]) & 0xF0) == 0)
65#define HDR_IS_CRC(h) (!((h[1]) & 1))
66#define HDR_TEST_PADDING(h) ((h[2]) & 0x2)
67#define HDR_TEST_MPEG1(h) ((h[1]) & 0x8)
68#define HDR_TEST_NOT_MPEG25(h) ((h[1]) & 0x10)
69#define HDR_TEST_I_STEREO(h) ((h[3]) & 0x10)
70#define HDR_TEST_MS_STEREO(h) ((h[3]) & 0x20)
71#define HDR_GET_STEREO_MODE(h) (((h[3]) >> 6) & 3)
72#define HDR_GET_STEREO_MODE_EXT(h) (((h[3]) >> 4) & 3)
73#define HDR_GET_LAYER(h) (((h[1]) >> 1) & 3)
74#define HDR_GET_BITRATE(h) ((h[2]) >> 4)
75#define HDR_GET_SAMPLE_RATE(h) (((h[2]) >> 2) & 3)
76#define HDR_GET_MY_SAMPLE_RATE(h) (HDR_GET_SAMPLE_RATE(h) + (((h[1] >> 3) & 1) + ((h[1] >> 4) & 1))*3)
77#define HDR_IS_FRAME_576(h) ((h[1] & 14) == 2)
78#define HDR_IS_LAYER_1(h) ((h[1] & 6) == 6)
79
80#define BITS_DEQUANTIZER_OUT -1
81#define MAX_SCF (255 + BITS_DEQUANTIZER_OUT*4 - 210)
82#define MAX_SCFI ((MAX_SCF + 3) & ~3)
83
84#define MINIMP3_MIN(a, b) ((a) > (b) ? (b) : (a))
85#define MINIMP3_MAX(a, b) ((a) < (b) ? (b) : (a))
86
87#if !defined(MINIMP3_NO_SIMD)
88
89#if !defined(MINIMP3_ONLY_SIMD) && (defined(_M_X64) || defined(__x86_64__) || defined(__aarch64__) || defined(_M_ARM64))
90/* x64 always have SSE2, arm64 always have neon, no need for generic code */
91#define MINIMP3_ONLY_SIMD
92#endif /* SIMD checks... */
93
94#if (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) || ((defined(__i386__) || defined(__x86_64__)) && defined(__SSE2__))
95#if defined(_MSC_VER)
96#include <intrin.h>
97#endif /* defined(_MSC_VER) */
98#include <immintrin.h>
99#define HAVE_SSE 1
100#define HAVE_SIMD 1
101#define VSTORE _mm_storeu_ps
102#define VLD _mm_loadu_ps
103#define VSET _mm_set1_ps
104#define VADD _mm_add_ps
105#define VSUB _mm_sub_ps
106#define VMUL _mm_mul_ps
107#define VMAC(a, x, y) _mm_add_ps(a, _mm_mul_ps(x, y))
108#define VMSB(a, x, y) _mm_sub_ps(a, _mm_mul_ps(x, y))
109#define VMUL_S(x, s) _mm_mul_ps(x, _mm_set1_ps(s))
110#define VREV(x) _mm_shuffle_ps(x, x, _MM_SHUFFLE(0, 1, 2, 3))
111typedef __m128 f4;
112#if defined(_MSC_VER) || defined(MINIMP3_ONLY_SIMD)
113#define minimp3_cpuid __cpuid
114#else /* defined(_MSC_VER) || defined(MINIMP3_ONLY_SIMD) */
115static __inline__ __attribute__((always_inline)) void minimp3_cpuid(int CPUInfo[], const int InfoType)
116{
117#if defined(__PIC__)
118 __asm__ __volatile__(
119#if defined(__x86_64__)
120 "push %%rbx\n"
121 "cpuid\n"
122 "xchgl %%ebx, %1\n"
123 "pop %%rbx\n"
124#else /* defined(__x86_64__) */
125 "xchgl %%ebx, %1\n"
126 "cpuid\n"
127 "xchgl %%ebx, %1\n"
128#endif /* defined(__x86_64__) */
129 : "=a" (CPUInfo[0]), "=r" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3])
130 : "a" (InfoType));
131#else /* defined(__PIC__) */
132 __asm__ __volatile__(
133 "cpuid"
134 : "=a" (CPUInfo[0]), "=b" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3])
135 : "a" (InfoType));
136#endif /* defined(__PIC__)*/
137}
138#endif /* defined(_MSC_VER) || defined(MINIMP3_ONLY_SIMD) */
139static int have_simd(void)
140{
141#ifdef MINIMP3_ONLY_SIMD
142 return 1;
143#else /* MINIMP3_ONLY_SIMD */
144 static int g_have_simd;
145 int CPUInfo[4];
146#ifdef MINIMP3_TEST
147 static int g_counter;
148 if (g_counter++ > 100)
149 return 0;
150#endif /* MINIMP3_TEST */
151 if (g_have_simd)
152 goto end;
153 minimp3_cpuid(CPUInfo, 0);
154 g_have_simd = 1;
155 if (CPUInfo[0] > 0)
156 {
157 minimp3_cpuid(CPUInfo, 1);
158 g_have_simd = (CPUInfo[3] & (1 << 26)) + 1; /* SSE2 */
159 }
160end:
161 return g_have_simd - 1;
162#endif /* MINIMP3_ONLY_SIMD */
163}
164#elif defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64)
165#include <arm_neon.h>
166#define HAVE_SSE 0
167#define HAVE_SIMD 1
168#define VSTORE vst1q_f32
169#define VLD vld1q_f32
170#define VSET vmovq_n_f32
171#define VADD vaddq_f32
172#define VSUB vsubq_f32
173#define VMUL vmulq_f32
174#define VMAC(a, x, y) vmlaq_f32(a, x, y)
175#define VMSB(a, x, y) vmlsq_f32(a, x, y)
176#define VMUL_S(x, s) vmulq_f32(x, vmovq_n_f32(s))
177#define VREV(x) vcombine_f32(vget_high_f32(vrev64q_f32(x)), vget_low_f32(vrev64q_f32(x)))
178typedef float32x4_t f4;
179static int have_simd()
180{ /* TODO: detect neon for !MINIMP3_ONLY_SIMD */
181 return 1;
182}
183#else /* SIMD checks... */
184#define HAVE_SSE 0
185#define HAVE_SIMD 0
186#ifdef MINIMP3_ONLY_SIMD
187#error MINIMP3_ONLY_SIMD used, but SSE/NEON not enabled
188#endif /* MINIMP3_ONLY_SIMD */
189#endif /* SIMD checks... */
190#else /* !defined(MINIMP3_NO_SIMD) */
191#define HAVE_SIMD 0
192#endif /* !defined(MINIMP3_NO_SIMD) */
193
194#if defined(__ARM_ARCH) && (__ARM_ARCH >= 6) && !defined(__aarch64__) && !defined(_M_ARM64)
195#define HAVE_ARMV6 1
196static __inline__ __attribute__((always_inline)) int32_t minimp3_clip_int16_arm(int32_t a)
197{
198 int32_t x = 0;
199 __asm__ ("ssat %0, #16, %1" : "=r"(x) : "r"(a));
200 return x;
201}
202#else
203#define HAVE_ARMV6 0
204#endif
205
206typedef struct
207{
208 const uint8_t *buf;
209 int pos, limit;
210} bs_t;
211
212typedef struct
213{
214 float scf[3*64];
215 uint8_t total_bands, stereo_bands, bitalloc[64], scfcod[64];
216} L12_scale_info;
217
218typedef struct
219{
220 uint8_t tab_offset, code_tab_width, band_count;
221} L12_subband_alloc_t;
222
223typedef struct
224{
225 const uint8_t *sfbtab;
226 uint16_t part_23_length, big_values, scalefac_compress;
227 uint8_t global_gain, block_type, mixed_block_flag, n_long_sfb, n_short_sfb;
228 uint8_t table_select[3], region_count[3], subblock_gain[3];
229 uint8_t preflag, scalefac_scale, count1_table, scfsi;
230} L3_gr_info_t;
231
232typedef struct
233{
234 bs_t bs;
235 uint8_t maindata[MAX_BITRESERVOIR_BYTES + MAX_L3_FRAME_PAYLOAD_BYTES];
236 L3_gr_info_t gr_info[4];
237 float grbuf[2][576], scf[40], syn[18 + 15][2*32];
238 uint8_t ist_pos[2][39];
239} mp3dec_scratch_t;
240
241static void bs_init(bs_t *bs, const uint8_t *data, int bytes)
242{
243 bs->buf = data;
244 bs->pos = 0;
245 bs->limit = bytes*8;
246}
247
248static uint32_t get_bits(bs_t *bs, int n)
249{
250 uint32_t next, cache = 0, s = bs->pos & 7;
251 int shl = n + s;
252 const uint8_t *p = bs->buf + (bs->pos >> 3);
253 if ((bs->pos += n) > bs->limit)
254 return 0;
255 next = *p++ & (255 >> s);
256 while ((shl -= 8) > 0)
257 {
258 cache |= next << shl;
259 next = *p++;
260 }
261 return cache | (next >> -shl);
262}
263
264static int hdr_valid(const uint8_t *h)
265{
266 return h[0] == 0xff &&
267 ((h[1] & 0xF0) == 0xf0 || (h[1] & 0xFE) == 0xe2) &&
268 (HDR_GET_LAYER(h) != 0) &&
269 (HDR_GET_BITRATE(h) != 15) &&
270 (HDR_GET_SAMPLE_RATE(h) != 3);
271}
272
273static int hdr_compare(const uint8_t *h1, const uint8_t *h2)
274{
275 return hdr_valid(h2) &&
276 ((h1[1] ^ h2[1]) & 0xFE) == 0 &&
277 ((h1[2] ^ h2[2]) & 0x0C) == 0 &&
278 !(HDR_IS_FREE_FORMAT(h1) ^ HDR_IS_FREE_FORMAT(h2));
279}
280
281static unsigned hdr_bitrate_kbps(const uint8_t *h)
282{
283 static const uint8_t halfrate[2][3][15] = {
284 { { 0,4,8,12,16,20,24,28,32,40,48,56,64,72,80 }, { 0,4,8,12,16,20,24,28,32,40,48,56,64,72,80 }, { 0,16,24,28,32,40,48,56,64,72,80,88,96,112,128 } },
285 { { 0,16,20,24,28,32,40,48,56,64,80,96,112,128,160 }, { 0,16,24,28,32,40,48,56,64,80,96,112,128,160,192 }, { 0,16,32,48,64,80,96,112,128,144,160,176,192,208,224 } },
286 };
287 return 2*halfrate[!!HDR_TEST_MPEG1(h)][HDR_GET_LAYER(h) - 1][HDR_GET_BITRATE(h)];
288}
289
290static unsigned hdr_sample_rate_hz(const uint8_t *h)
291{
292 static const unsigned g_hz[3] = { 44100, 48000, 32000 };
293 return g_hz[HDR_GET_SAMPLE_RATE(h)] >> (int)!HDR_TEST_MPEG1(h) >> (int)!HDR_TEST_NOT_MPEG25(h);
294}
295
296static unsigned hdr_frame_samples(const uint8_t *h)
297{
298 return HDR_IS_LAYER_1(h) ? 384 : (1152 >> (int)HDR_IS_FRAME_576(h));
299}
300
301static int hdr_frame_bytes(const uint8_t *h, int free_format_size)
302{
303 int frame_bytes = hdr_frame_samples(h)*hdr_bitrate_kbps(h)*125/hdr_sample_rate_hz(h);
304 if (HDR_IS_LAYER_1(h))
305 {
306 frame_bytes &= ~3; /* slot align */
307 }
308 return frame_bytes ? frame_bytes : free_format_size;
309}
310
311static int hdr_padding(const uint8_t *h)
312{
313 return HDR_TEST_PADDING(h) ? (HDR_IS_LAYER_1(h) ? 4 : 1) : 0;
314}
315
316#ifndef MINIMP3_ONLY_MP3
317static const L12_subband_alloc_t *L12_subband_alloc_table(const uint8_t *hdr, L12_scale_info *sci)
318{
319 const L12_subband_alloc_t *alloc;
320 int mode = HDR_GET_STEREO_MODE(hdr);
321 int nbands, stereo_bands = (mode == MODE_MONO) ? 0 : (mode == MODE_JOINT_STEREO) ? (HDR_GET_STEREO_MODE_EXT(hdr) << 2) + 4 : 32;
322
323 if (HDR_IS_LAYER_1(hdr))
324 {
325 static const L12_subband_alloc_t g_alloc_L1[] = { { 76, 4, 32 } };
326 alloc = g_alloc_L1;
327 nbands = 32;
328 } else if (!HDR_TEST_MPEG1(hdr))
329 {
330 static const L12_subband_alloc_t g_alloc_L2M2[] = { { 60, 4, 4 }, { 44, 3, 7 }, { 44, 2, 19 } };
331 alloc = g_alloc_L2M2;
332 nbands = 30;
333 } else
334 {
335 static const L12_subband_alloc_t g_alloc_L2M1[] = { { 0, 4, 3 }, { 16, 4, 8 }, { 32, 3, 12 }, { 40, 2, 7 } };
336 int sample_rate_idx = HDR_GET_SAMPLE_RATE(hdr);
337 unsigned kbps = hdr_bitrate_kbps(hdr) >> (int)(mode != MODE_MONO);
338 if (!kbps) /* free-format */
339 {
340 kbps = 192;
341 }
342
343 alloc = g_alloc_L2M1;
344 nbands = 27;
345 if (kbps < 56)
346 {
347 static const L12_subband_alloc_t g_alloc_L2M1_lowrate[] = { { 44, 4, 2 }, { 44, 3, 10 } };
348 alloc = g_alloc_L2M1_lowrate;
349 nbands = sample_rate_idx == 2 ? 12 : 8;
350 } else if (kbps >= 96 && sample_rate_idx != 1)
351 {
352 nbands = 30;
353 }
354 }
355
356 sci->total_bands = (uint8_t)nbands;
357 sci->stereo_bands = (uint8_t)MINIMP3_MIN(stereo_bands, nbands);
358
359 return alloc;
360}
361
362static void L12_read_scalefactors(bs_t *bs, uint8_t *pba, uint8_t *scfcod, int bands, float *scf)
363{
364 static const float g_deq_L12[18*3] = {
365#define DQ(x) 9.53674316e-07f/x, 7.56931807e-07f/x, 6.00777173e-07f/x
366 DQ(3),DQ(7),DQ(15),DQ(31),DQ(63),DQ(127),DQ(255),DQ(511),DQ(1023),DQ(2047),DQ(4095),DQ(8191),DQ(16383),DQ(32767),DQ(65535),DQ(3),DQ(5),DQ(9)
367 };
368 int i, m;
369 for (i = 0; i < bands; i++)
370 {
371 float s = 0;
372 int ba = *pba++;
373 int mask = ba ? 4 + ((19 >> scfcod[i]) & 3) : 0;
374 for (m = 4; m; m >>= 1)
375 {
376 if (mask & m)
377 {
378 int b = get_bits(bs, 6);
379 s = g_deq_L12[ba*3 - 6 + b % 3]*(1 << 21 >> b/3);
380 }
381 *scf++ = s;
382 }
383 }
384}
385
386static void L12_read_scale_info(const uint8_t *hdr, bs_t *bs, L12_scale_info *sci)
387{
388 static const uint8_t g_bitalloc_code_tab[] = {
389 0,17, 3, 4, 5,6,7, 8,9,10,11,12,13,14,15,16,
390 0,17,18, 3,19,4,5, 6,7, 8, 9,10,11,12,13,16,
391 0,17,18, 3,19,4,5,16,
392 0,17,18,16,
393 0,17,18,19, 4,5,6, 7,8, 9,10,11,12,13,14,15,
394 0,17,18, 3,19,4,5, 6,7, 8, 9,10,11,12,13,14,
395 0, 2, 3, 4, 5,6,7, 8,9,10,11,12,13,14,15,16
396 };
397 const L12_subband_alloc_t *subband_alloc = L12_subband_alloc_table(hdr, sci);
398
399 int i, k = 0, ba_bits = 0;
400 const uint8_t *ba_code_tab = g_bitalloc_code_tab;
401
402 for (i = 0; i < sci->total_bands; i++)
403 {
404 uint8_t ba;
405 if (i == k)
406 {
407 k += subband_alloc->band_count;
408 ba_bits = subband_alloc->code_tab_width;
409 ba_code_tab = g_bitalloc_code_tab + subband_alloc->tab_offset;
410 subband_alloc++;
411 }
412 ba = ba_code_tab[get_bits(bs, ba_bits)];
413 sci->bitalloc[2*i] = ba;
414 if (i < sci->stereo_bands)
415 {
416 ba = ba_code_tab[get_bits(bs, ba_bits)];
417 }
418 sci->bitalloc[2*i + 1] = sci->stereo_bands ? ba : 0;
419 }
420
421 for (i = 0; i < 2*sci->total_bands; i++)
422 {
423 sci->scfcod[i] = sci->bitalloc[i] ? HDR_IS_LAYER_1(hdr) ? 2 : get_bits(bs, 2) : 6;
424 }
425
426 L12_read_scalefactors(bs, sci->bitalloc, sci->scfcod, sci->total_bands*2, sci->scf);
427
428 for (i = sci->stereo_bands; i < sci->total_bands; i++)
429 {
430 sci->bitalloc[2*i + 1] = 0;
431 }
432}
433
434static int L12_dequantize_granule(float *grbuf, bs_t *bs, L12_scale_info *sci, int group_size)
435{
436 int i, j, k, choff = 576;
437 for (j = 0; j < 4; j++)
438 {
439 float *dst = grbuf + group_size*j;
440 for (i = 0; i < 2*sci->total_bands; i++)
441 {
442 int ba = sci->bitalloc[i];
443 if (ba != 0)
444 {
445 if (ba < 17)
446 {
447 int half = (1 << (ba - 1)) - 1;
448 for (k = 0; k < group_size; k++)
449 {
450 dst[k] = (float)((int)get_bits(bs, ba) - half);
451 }
452 } else
453 {
454 unsigned mod = (2 << (ba - 17)) + 1; /* 3, 5, 9 */
455 unsigned code = get_bits(bs, mod + 2 - (mod >> 3)); /* 5, 7, 10 */
456 for (k = 0; k < group_size; k++, code /= mod)
457 {
458 dst[k] = (float)((int)(code % mod - mod/2));
459 }
460 }
461 }
462 dst += choff;
463 choff = 18 - choff;
464 }
465 }
466 return group_size*4;
467}
468
469static void L12_apply_scf_384(L12_scale_info *sci, const float *scf, float *dst)
470{
471 int i, k;
472 memcpy(dst + 576 + sci->stereo_bands*18, dst + sci->stereo_bands*18, (sci->total_bands - sci->stereo_bands)*18*sizeof(float));
473 for (i = 0; i < sci->total_bands; i++, dst += 18, scf += 6)
474 {
475 for (k = 0; k < 12; k++)
476 {
477 dst[k + 0] *= scf[0];
478 dst[k + 576] *= scf[3];
479 }
480 }
481}
482#endif /* MINIMP3_ONLY_MP3 */
483
484static int L3_read_side_info(bs_t *bs, L3_gr_info_t *gr, const uint8_t *hdr)
485{
486 static const uint8_t g_scf_long[8][23] = {
487 { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 },
488 { 12,12,12,12,12,12,16,20,24,28,32,40,48,56,64,76,90,2,2,2,2,2,0 },
489 { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 },
490 { 6,6,6,6,6,6,8,10,12,14,16,18,22,26,32,38,46,54,62,70,76,36,0 },
491 { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 },
492 { 4,4,4,4,4,4,6,6,8,8,10,12,16,20,24,28,34,42,50,54,76,158,0 },
493 { 4,4,4,4,4,4,6,6,6,8,10,12,16,18,22,28,34,40,46,54,54,192,0 },
494 { 4,4,4,4,4,4,6,6,8,10,12,16,20,24,30,38,46,56,68,84,102,26,0 }
495 };
496 static const uint8_t g_scf_short[8][40] = {
497 { 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 },
498 { 8,8,8,8,8,8,8,8,8,12,12,12,16,16,16,20,20,20,24,24,24,28,28,28,36,36,36,2,2,2,2,2,2,2,2,2,26,26,26,0 },
499 { 4,4,4,4,4,4,4,4,4,6,6,6,6,6,6,8,8,8,10,10,10,14,14,14,18,18,18,26,26,26,32,32,32,42,42,42,18,18,18,0 },
500 { 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,32,32,32,44,44,44,12,12,12,0 },
501 { 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 },
502 { 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,22,22,22,30,30,30,56,56,56,0 },
503 { 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,6,6,6,10,10,10,12,12,12,14,14,14,16,16,16,20,20,20,26,26,26,66,66,66,0 },
504 { 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,12,12,12,16,16,16,20,20,20,26,26,26,34,34,34,42,42,42,12,12,12,0 }
505 };
506 static const uint8_t g_scf_mixed[8][40] = {
507 { 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 },
508 { 12,12,12,4,4,4,8,8,8,12,12,12,16,16,16,20,20,20,24,24,24,28,28,28,36,36,36,2,2,2,2,2,2,2,2,2,26,26,26,0 },
509 { 6,6,6,6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,14,14,14,18,18,18,26,26,26,32,32,32,42,42,42,18,18,18,0 },
510 { 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,32,32,32,44,44,44,12,12,12,0 },
511 { 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 },
512 { 4,4,4,4,4,4,6,6,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,22,22,22,30,30,30,56,56,56,0 },
513 { 4,4,4,4,4,4,6,6,4,4,4,6,6,6,6,6,6,10,10,10,12,12,12,14,14,14,16,16,16,20,20,20,26,26,26,66,66,66,0 },
514 { 4,4,4,4,4,4,6,6,4,4,4,6,6,6,8,8,8,12,12,12,16,16,16,20,20,20,26,26,26,34,34,34,42,42,42,12,12,12,0 }
515 };
516
517 unsigned tables, scfsi = 0;
518 int main_data_begin, part_23_sum = 0;
519 int sr_idx = HDR_GET_MY_SAMPLE_RATE(hdr); sr_idx -= (sr_idx != 0);
520 int gr_count = HDR_IS_MONO(hdr) ? 1 : 2;
521
522 if (HDR_TEST_MPEG1(hdr))
523 {
524 gr_count *= 2;
525 main_data_begin = get_bits(bs, 9);
526 scfsi = get_bits(bs, 7 + gr_count);
527 } else
528 {
529 main_data_begin = get_bits(bs, 8 + gr_count) >> gr_count;
530 }
531
532 do
533 {
534 if (HDR_IS_MONO(hdr))
535 {
536 scfsi <<= 4;
537 }
538 gr->part_23_length = (uint16_t)get_bits(bs, 12);
539 part_23_sum += gr->part_23_length;
540 gr->big_values = (uint16_t)get_bits(bs, 9);
541 if (gr->big_values > 288)
542 {
543 return -1;
544 }
545 gr->global_gain = (uint8_t)get_bits(bs, 8);
546 gr->scalefac_compress = (uint16_t)get_bits(bs, HDR_TEST_MPEG1(hdr) ? 4 : 9);
547 gr->sfbtab = g_scf_long[sr_idx];
548 gr->n_long_sfb = 22;
549 gr->n_short_sfb = 0;
550 if (get_bits(bs, 1))
551 {
552 gr->block_type = (uint8_t)get_bits(bs, 2);
553 if (!gr->block_type)
554 {
555 return -1;
556 }
557 gr->mixed_block_flag = (uint8_t)get_bits(bs, 1);
558 gr->region_count[0] = 7;
559 gr->region_count[1] = 255;
560 if (gr->block_type == SHORT_BLOCK_TYPE)
561 {
562 scfsi &= 0x0F0F;
563 if (!gr->mixed_block_flag)
564 {
565 gr->region_count[0] = 8;
566 gr->sfbtab = g_scf_short[sr_idx];
567 gr->n_long_sfb = 0;
568 gr->n_short_sfb = 39;
569 } else
570 {
571 gr->sfbtab = g_scf_mixed[sr_idx];
572 gr->n_long_sfb = HDR_TEST_MPEG1(hdr) ? 8 : 6;
573 gr->n_short_sfb = 30;
574 }
575 }
576 tables = get_bits(bs, 10);
577 tables <<= 5;
578 gr->subblock_gain[0] = (uint8_t)get_bits(bs, 3);
579 gr->subblock_gain[1] = (uint8_t)get_bits(bs, 3);
580 gr->subblock_gain[2] = (uint8_t)get_bits(bs, 3);
581 } else
582 {
583 gr->block_type = 0;
584 gr->mixed_block_flag = 0;
585 tables = get_bits(bs, 15);
586 gr->region_count[0] = (uint8_t)get_bits(bs, 4);
587 gr->region_count[1] = (uint8_t)get_bits(bs, 3);
588 gr->region_count[2] = 255;
589 }
590 gr->table_select[0] = (uint8_t)(tables >> 10);
591 gr->table_select[1] = (uint8_t)((tables >> 5) & 31);
592 gr->table_select[2] = (uint8_t)((tables) & 31);
593 gr->preflag = HDR_TEST_MPEG1(hdr) ? get_bits(bs, 1) : (gr->scalefac_compress >= 500);
594 gr->scalefac_scale = (uint8_t)get_bits(bs, 1);
595 gr->count1_table = (uint8_t)get_bits(bs, 1);
596 gr->scfsi = (uint8_t)((scfsi >> 12) & 15);
597 scfsi <<= 4;
598 gr++;
599 } while(--gr_count);
600
601 if (part_23_sum + bs->pos > bs->limit + main_data_begin*8)
602 {
603 return -1;
604 }
605
606 return main_data_begin;
607}
608
609static void L3_read_scalefactors(uint8_t *scf, uint8_t *ist_pos, const uint8_t *scf_size, const uint8_t *scf_count, bs_t *bitbuf, int scfsi)
610{
611 int i, k;
612 for (i = 0; i < 4 && scf_count[i]; i++, scfsi *= 2)
613 {
614 int cnt = scf_count[i];
615 if (scfsi & 8)
616 {
617 memcpy(scf, ist_pos, cnt);
618 } else
619 {
620 int bits = scf_size[i];
621 if (!bits)
622 {
623 memset(scf, 0, cnt);
624 memset(ist_pos, 0, cnt);
625 } else
626 {
627 int max_scf = (scfsi < 0) ? (1 << bits) - 1 : -1;
628 for (k = 0; k < cnt; k++)
629 {
630 int s = get_bits(bitbuf, bits);
631 ist_pos[k] = (s == max_scf ? -1 : s);
632 scf[k] = s;
633 }
634 }
635 }
636 ist_pos += cnt;
637 scf += cnt;
638 }
639 scf[0] = scf[1] = scf[2] = 0;
640}
641
642static float L3_ldexp_q2(float y, int exp_q2)
643{
644 static const float g_expfrac[4] = { 9.31322575e-10f,7.83145814e-10f,6.58544508e-10f,5.53767716e-10f };
645 int e;
646 do
647 {
648 e = MINIMP3_MIN(30*4, exp_q2);
649 y *= g_expfrac[e & 3]*(1 << 30 >> (e >> 2));
650 } while ((exp_q2 -= e) > 0);
651 return y;
652}
653
654static void L3_decode_scalefactors(const uint8_t *hdr, uint8_t *ist_pos, bs_t *bs, const L3_gr_info_t *gr, float *scf, int ch)
655{
656 static const uint8_t g_scf_partitions[3][28] = {
657 { 6,5,5, 5,6,5,5,5,6,5, 7,3,11,10,0,0, 7, 7, 7,0, 6, 6,6,3, 8, 8,5,0 },
658 { 8,9,6,12,6,9,9,9,6,9,12,6,15,18,0,0, 6,15,12,0, 6,12,9,6, 6,18,9,0 },
659 { 9,9,6,12,9,9,9,9,9,9,12,6,18,18,0,0,12,12,12,0,12, 9,9,6,15,12,9,0 }
660 };
661 const uint8_t *scf_partition = g_scf_partitions[!!gr->n_short_sfb + !gr->n_long_sfb];
662 uint8_t scf_size[4], iscf[40];
663 int i, scf_shift = gr->scalefac_scale + 1, gain_exp, scfsi = gr->scfsi;
664 float gain;
665
666 if (HDR_TEST_MPEG1(hdr))
667 {
668 static const uint8_t g_scfc_decode[16] = { 0,1,2,3, 12,5,6,7, 9,10,11,13, 14,15,18,19 };
669 int part = g_scfc_decode[gr->scalefac_compress];
670 scf_size[1] = scf_size[0] = (uint8_t)(part >> 2);
671 scf_size[3] = scf_size[2] = (uint8_t)(part & 3);
672 } else
673 {
674 static const uint8_t g_mod[6*4] = { 5,5,4,4,5,5,4,1,4,3,1,1,5,6,6,1,4,4,4,1,4,3,1,1 };
675 int k, modprod, sfc, ist = HDR_TEST_I_STEREO(hdr) && ch;
676 sfc = gr->scalefac_compress >> ist;
677 for (k = ist*3*4; sfc >= 0; sfc -= modprod, k += 4)
678 {
679 for (modprod = 1, i = 3; i >= 0; i--)
680 {
681 scf_size[i] = (uint8_t)(sfc / modprod % g_mod[k + i]);
682 modprod *= g_mod[k + i];
683 }
684 }
685 scf_partition += k;
686 scfsi = -16;
687 }
688 L3_read_scalefactors(iscf, ist_pos, scf_size, scf_partition, bs, scfsi);
689
690 if (gr->n_short_sfb)
691 {
692 int sh = 3 - scf_shift;
693 for (i = 0; i < gr->n_short_sfb; i += 3)
694 {
695 iscf[gr->n_long_sfb + i + 0] += gr->subblock_gain[0] << sh;
696 iscf[gr->n_long_sfb + i + 1] += gr->subblock_gain[1] << sh;
697 iscf[gr->n_long_sfb + i + 2] += gr->subblock_gain[2] << sh;
698 }
699 } else if (gr->preflag)
700 {
701 static const uint8_t g_preamp[10] = { 1,1,1,1,2,2,3,3,3,2 };
702 for (i = 0; i < 10; i++)
703 {
704 iscf[11 + i] += g_preamp[i];
705 }
706 }
707
708 gain_exp = gr->global_gain + BITS_DEQUANTIZER_OUT*4 - 210 - (HDR_IS_MS_STEREO(hdr) ? 2 : 0);
709 gain = L3_ldexp_q2(1 << (MAX_SCFI/4), MAX_SCFI - gain_exp);
710 for (i = 0; i < (int)(gr->n_long_sfb + gr->n_short_sfb); i++)
711 {
712 scf[i] = L3_ldexp_q2(gain, iscf[i] << scf_shift);
713 }
714}
715
716static const float g_pow43[129 + 16] = {
717 0,-1,-2.519842f,-4.326749f,-6.349604f,-8.549880f,-10.902724f,-13.390518f,-16.000000f,-18.720754f,-21.544347f,-24.463781f,-27.473142f,-30.567351f,-33.741992f,-36.993181f,
718 0,1,2.519842f,4.326749f,6.349604f,8.549880f,10.902724f,13.390518f,16.000000f,18.720754f,21.544347f,24.463781f,27.473142f,30.567351f,33.741992f,36.993181f,40.317474f,43.711787f,47.173345f,50.699631f,54.288352f,57.937408f,61.644865f,65.408941f,69.227979f,73.100443f,77.024898f,81.000000f,85.024491f,89.097188f,93.216975f,97.382800f,101.593667f,105.848633f,110.146801f,114.487321f,118.869381f,123.292209f,127.755065f,132.257246f,136.798076f,141.376907f,145.993119f,150.646117f,155.335327f,160.060199f,164.820202f,169.614826f,174.443577f,179.305980f,184.201575f,189.129918f,194.090580f,199.083145f,204.107210f,209.162385f,214.248292f,219.364564f,224.510845f,229.686789f,234.892058f,240.126328f,245.389280f,250.680604f,256.000000f,261.347174f,266.721841f,272.123723f,277.552547f,283.008049f,288.489971f,293.998060f,299.532071f,305.091761f,310.676898f,316.287249f,321.922592f,327.582707f,333.267377f,338.976394f,344.709550f,350.466646f,356.247482f,362.051866f,367.879608f,373.730522f,379.604427f,385.501143f,391.420496f,397.362314f,403.326427f,409.312672f,415.320884f,421.350905f,427.402579f,433.475750f,439.570269f,445.685987f,451.822757f,457.980436f,464.158883f,470.357960f,476.577530f,482.817459f,489.077615f,495.357868f,501.658090f,507.978156f,514.317941f,520.677324f,527.056184f,533.454404f,539.871867f,546.308458f,552.764065f,559.238575f,565.731879f,572.243870f,578.774440f,585.323483f,591.890898f,598.476581f,605.080431f,611.702349f,618.342238f,625.000000f,631.675540f,638.368763f,645.079578f
719};
720
721static float L3_pow_43(int x)
722{
723 float frac;
724 int sign, mult = 256;
725
726 if (x < 129)
727 {
728 return g_pow43[16 + x];
729 }
730
731 if (x < 1024)
732 {
733 mult = 16;
734 x <<= 3;
735 }
736
737 sign = 2*x & 64;
738 frac = (float)((x & 63) - sign) / ((x & ~63) + sign);
739 return g_pow43[16 + ((x + sign) >> 6)]*(1.f + frac*((4.f/3) + frac*(2.f/9)))*mult;
740}
741
742static void L3_huffman(float *dst, bs_t *bs, const L3_gr_info_t *gr_info, const float *scf, int layer3gr_limit)
743{
744 static const int16_t tabs[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
745 785,785,785,785,784,784,784,784,513,513,513,513,513,513,513,513,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,
746 -255,1313,1298,1282,785,785,785,785,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,290,288,
747 -255,1313,1298,1282,769,769,769,769,529,529,529,529,529,529,529,529,528,528,528,528,528,528,528,528,512,512,512,512,512,512,512,512,290,288,
748 -253,-318,-351,-367,785,785,785,785,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,819,818,547,547,275,275,275,275,561,560,515,546,289,274,288,258,
749 -254,-287,1329,1299,1314,1312,1057,1057,1042,1042,1026,1026,784,784,784,784,529,529,529,529,529,529,529,529,769,769,769,769,768,768,768,768,563,560,306,306,291,259,
750 -252,-413,-477,-542,1298,-575,1041,1041,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-383,-399,1107,1092,1106,1061,849,849,789,789,1104,1091,773,773,1076,1075,341,340,325,309,834,804,577,577,532,532,516,516,832,818,803,816,561,561,531,531,515,546,289,289,288,258,
751 -252,-429,-493,-559,1057,1057,1042,1042,529,529,529,529,529,529,529,529,784,784,784,784,769,769,769,769,512,512,512,512,512,512,512,512,-382,1077,-415,1106,1061,1104,849,849,789,789,1091,1076,1029,1075,834,834,597,581,340,340,339,324,804,833,532,532,832,772,818,803,817,787,816,771,290,290,290,290,288,258,
752 -253,-349,-414,-447,-463,1329,1299,-479,1314,1312,1057,1057,1042,1042,1026,1026,785,785,785,785,784,784,784,784,769,769,769,769,768,768,768,768,-319,851,821,-335,836,850,805,849,341,340,325,336,533,533,579,579,564,564,773,832,578,548,563,516,321,276,306,291,304,259,
753 -251,-572,-733,-830,-863,-879,1041,1041,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-511,-527,-543,1396,1351,1381,1366,1395,1335,1380,-559,1334,1138,1138,1063,1063,1350,1392,1031,1031,1062,1062,1364,1363,1120,1120,1333,1348,881,881,881,881,375,374,359,373,343,358,341,325,791,791,1123,1122,-703,1105,1045,-719,865,865,790,790,774,774,1104,1029,338,293,323,308,-799,-815,833,788,772,818,803,816,322,292,307,320,561,531,515,546,289,274,288,258,
754 -251,-525,-605,-685,-765,-831,-846,1298,1057,1057,1312,1282,785,785,785,785,784,784,784,784,769,769,769,769,512,512,512,512,512,512,512,512,1399,1398,1383,1367,1382,1396,1351,-511,1381,1366,1139,1139,1079,1079,1124,1124,1364,1349,1363,1333,882,882,882,882,807,807,807,807,1094,1094,1136,1136,373,341,535,535,881,775,867,822,774,-591,324,338,-671,849,550,550,866,864,609,609,293,336,534,534,789,835,773,-751,834,804,308,307,833,788,832,772,562,562,547,547,305,275,560,515,290,290,
755 -252,-397,-477,-557,-622,-653,-719,-735,-750,1329,1299,1314,1057,1057,1042,1042,1312,1282,1024,1024,785,785,785,785,784,784,784,784,769,769,769,769,-383,1127,1141,1111,1126,1140,1095,1110,869,869,883,883,1079,1109,882,882,375,374,807,868,838,881,791,-463,867,822,368,263,852,837,836,-543,610,610,550,550,352,336,534,534,865,774,851,821,850,805,593,533,579,564,773,832,578,578,548,548,577,577,307,276,306,291,516,560,259,259,
756 -250,-2107,-2507,-2764,-2909,-2974,-3007,-3023,1041,1041,1040,1040,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-767,-1052,-1213,-1277,-1358,-1405,-1469,-1535,-1550,-1582,-1614,-1647,-1662,-1694,-1726,-1759,-1774,-1807,-1822,-1854,-1886,1565,-1919,-1935,-1951,-1967,1731,1730,1580,1717,-1983,1729,1564,-1999,1548,-2015,-2031,1715,1595,-2047,1714,-2063,1610,-2079,1609,-2095,1323,1323,1457,1457,1307,1307,1712,1547,1641,1700,1699,1594,1685,1625,1442,1442,1322,1322,-780,-973,-910,1279,1278,1277,1262,1276,1261,1275,1215,1260,1229,-959,974,974,989,989,-943,735,478,478,495,463,506,414,-1039,1003,958,1017,927,942,987,957,431,476,1272,1167,1228,-1183,1256,-1199,895,895,941,941,1242,1227,1212,1135,1014,1014,490,489,503,487,910,1013,985,925,863,894,970,955,1012,847,-1343,831,755,755,984,909,428,366,754,559,-1391,752,486,457,924,997,698,698,983,893,740,740,908,877,739,739,667,667,953,938,497,287,271,271,683,606,590,712,726,574,302,302,738,736,481,286,526,725,605,711,636,724,696,651,589,681,666,710,364,467,573,695,466,466,301,465,379,379,709,604,665,679,316,316,634,633,436,436,464,269,424,394,452,332,438,363,347,408,393,448,331,422,362,407,392,421,346,406,391,376,375,359,1441,1306,-2367,1290,-2383,1337,-2399,-2415,1426,1321,-2431,1411,1336,-2447,-2463,-2479,1169,1169,1049,1049,1424,1289,1412,1352,1319,-2495,1154,1154,1064,1064,1153,1153,416,390,360,404,403,389,344,374,373,343,358,372,327,357,342,311,356,326,1395,1394,1137,1137,1047,1047,1365,1392,1287,1379,1334,1364,1349,1378,1318,1363,792,792,792,792,1152,1152,1032,1032,1121,1121,1046,1046,1120,1120,1030,1030,-2895,1106,1061,1104,849,849,789,789,1091,1076,1029,1090,1060,1075,833,833,309,324,532,532,832,772,818,803,561,561,531,560,515,546,289,274,288,258,
757 -250,-1179,-1579,-1836,-1996,-2124,-2253,-2333,-2413,-2477,-2542,-2574,-2607,-2622,-2655,1314,1313,1298,1312,1282,785,785,785,785,1040,1040,1025,1025,768,768,768,768,-766,-798,-830,-862,-895,-911,-927,-943,-959,-975,-991,-1007,-1023,-1039,-1055,-1070,1724,1647,-1103,-1119,1631,1767,1662,1738,1708,1723,-1135,1780,1615,1779,1599,1677,1646,1778,1583,-1151,1777,1567,1737,1692,1765,1722,1707,1630,1751,1661,1764,1614,1736,1676,1763,1750,1645,1598,1721,1691,1762,1706,1582,1761,1566,-1167,1749,1629,767,766,751,765,494,494,735,764,719,749,734,763,447,447,748,718,477,506,431,491,446,476,461,505,415,430,475,445,504,399,460,489,414,503,383,474,429,459,502,502,746,752,488,398,501,473,413,472,486,271,480,270,-1439,-1455,1357,-1471,-1487,-1503,1341,1325,-1519,1489,1463,1403,1309,-1535,1372,1448,1418,1476,1356,1462,1387,-1551,1475,1340,1447,1402,1386,-1567,1068,1068,1474,1461,455,380,468,440,395,425,410,454,364,467,466,464,453,269,409,448,268,432,1371,1473,1432,1417,1308,1460,1355,1446,1459,1431,1083,1083,1401,1416,1458,1445,1067,1067,1370,1457,1051,1051,1291,1430,1385,1444,1354,1415,1400,1443,1082,1082,1173,1113,1186,1066,1185,1050,-1967,1158,1128,1172,1097,1171,1081,-1983,1157,1112,416,266,375,400,1170,1142,1127,1065,793,793,1169,1033,1156,1096,1141,1111,1155,1080,1126,1140,898,898,808,808,897,897,792,792,1095,1152,1032,1125,1110,1139,1079,1124,882,807,838,881,853,791,-2319,867,368,263,822,852,837,866,806,865,-2399,851,352,262,534,534,821,836,594,594,549,549,593,593,533,533,848,773,579,579,564,578,548,563,276,276,577,576,306,291,516,560,305,305,275,259,
758 -251,-892,-2058,-2620,-2828,-2957,-3023,-3039,1041,1041,1040,1040,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-511,-527,-543,-559,1530,-575,-591,1528,1527,1407,1526,1391,1023,1023,1023,1023,1525,1375,1268,1268,1103,1103,1087,1087,1039,1039,1523,-604,815,815,815,815,510,495,509,479,508,463,507,447,431,505,415,399,-734,-782,1262,-815,1259,1244,-831,1258,1228,-847,-863,1196,-879,1253,987,987,748,-767,493,493,462,477,414,414,686,669,478,446,461,445,474,429,487,458,412,471,1266,1264,1009,1009,799,799,-1019,-1276,-1452,-1581,-1677,-1757,-1821,-1886,-1933,-1997,1257,1257,1483,1468,1512,1422,1497,1406,1467,1496,1421,1510,1134,1134,1225,1225,1466,1451,1374,1405,1252,1252,1358,1480,1164,1164,1251,1251,1238,1238,1389,1465,-1407,1054,1101,-1423,1207,-1439,830,830,1248,1038,1237,1117,1223,1148,1236,1208,411,426,395,410,379,269,1193,1222,1132,1235,1221,1116,976,976,1192,1162,1177,1220,1131,1191,963,963,-1647,961,780,-1663,558,558,994,993,437,408,393,407,829,978,813,797,947,-1743,721,721,377,392,844,950,828,890,706,706,812,859,796,960,948,843,934,874,571,571,-1919,690,555,689,421,346,539,539,944,779,918,873,932,842,903,888,570,570,931,917,674,674,-2575,1562,-2591,1609,-2607,1654,1322,1322,1441,1441,1696,1546,1683,1593,1669,1624,1426,1426,1321,1321,1639,1680,1425,1425,1305,1305,1545,1668,1608,1623,1667,1592,1638,1666,1320,1320,1652,1607,1409,1409,1304,1304,1288,1288,1664,1637,1395,1395,1335,1335,1622,1636,1394,1394,1319,1319,1606,1621,1392,1392,1137,1137,1137,1137,345,390,360,375,404,373,1047,-2751,-2767,-2783,1062,1121,1046,-2799,1077,-2815,1106,1061,789,789,1105,1104,263,355,310,340,325,354,352,262,339,324,1091,1076,1029,1090,1060,1075,833,833,788,788,1088,1028,818,818,803,803,561,561,531,531,816,771,546,546,289,274,288,258,
759 -253,-317,-381,-446,-478,-509,1279,1279,-811,-1179,-1451,-1756,-1900,-2028,-2189,-2253,-2333,-2414,-2445,-2511,-2526,1313,1298,-2559,1041,1041,1040,1040,1025,1025,1024,1024,1022,1007,1021,991,1020,975,1019,959,687,687,1018,1017,671,671,655,655,1016,1015,639,639,758,758,623,623,757,607,756,591,755,575,754,559,543,543,1009,783,-575,-621,-685,-749,496,-590,750,749,734,748,974,989,1003,958,988,973,1002,942,987,957,972,1001,926,986,941,971,956,1000,910,985,925,999,894,970,-1071,-1087,-1102,1390,-1135,1436,1509,1451,1374,-1151,1405,1358,1480,1420,-1167,1507,1494,1389,1342,1465,1435,1450,1326,1505,1310,1493,1373,1479,1404,1492,1464,1419,428,443,472,397,736,526,464,464,486,457,442,471,484,482,1357,1449,1434,1478,1388,1491,1341,1490,1325,1489,1463,1403,1309,1477,1372,1448,1418,1433,1476,1356,1462,1387,-1439,1475,1340,1447,1402,1474,1324,1461,1371,1473,269,448,1432,1417,1308,1460,-1711,1459,-1727,1441,1099,1099,1446,1386,1431,1401,-1743,1289,1083,1083,1160,1160,1458,1445,1067,1067,1370,1457,1307,1430,1129,1129,1098,1098,268,432,267,416,266,400,-1887,1144,1187,1082,1173,1113,1186,1066,1050,1158,1128,1143,1172,1097,1171,1081,420,391,1157,1112,1170,1142,1127,1065,1169,1049,1156,1096,1141,1111,1155,1080,1126,1154,1064,1153,1140,1095,1048,-2159,1125,1110,1137,-2175,823,823,1139,1138,807,807,384,264,368,263,868,838,853,791,867,822,852,837,866,806,865,790,-2319,851,821,836,352,262,850,805,849,-2399,533,533,835,820,336,261,578,548,563,577,532,532,832,772,562,562,547,547,305,275,560,515,290,290,288,258 };
760 static const uint8_t tab32[] = { 130,162,193,209,44,28,76,140,9,9,9,9,9,9,9,9,190,254,222,238,126,94,157,157,109,61,173,205 };
761 static const uint8_t tab33[] = { 252,236,220,204,188,172,156,140,124,108,92,76,60,44,28,12 };
762 static const int16_t tabindex[2*16] = { 0,32,64,98,0,132,180,218,292,364,426,538,648,746,0,1126,1460,1460,1460,1460,1460,1460,1460,1460,1842,1842,1842,1842,1842,1842,1842,1842 };
763 static const uint8_t g_linbits[] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,3,4,6,8,10,13,4,5,6,7,8,9,11,13 };
764
765#define PEEK_BITS(n) (bs_cache >> (32 - n))
766#define FLUSH_BITS(n) { bs_cache <<= (n); bs_sh += (n); }
767#define CHECK_BITS while (bs_sh >= 0) { bs_cache |= (uint32_t)*bs_next_ptr++ << bs_sh; bs_sh -= 8; }
768#define BSPOS ((bs_next_ptr - bs->buf)*8 - 24 + bs_sh)
769
770 float one = 0.0f;
771 int ireg = 0, big_val_cnt = gr_info->big_values;
772 const uint8_t *sfb = gr_info->sfbtab;
773 const uint8_t *bs_next_ptr = bs->buf + bs->pos/8;
774 uint32_t bs_cache = (((bs_next_ptr[0]*256u + bs_next_ptr[1])*256u + bs_next_ptr[2])*256u + bs_next_ptr[3]) << (bs->pos & 7);
775 int pairs_to_decode, np, bs_sh = (bs->pos & 7) - 8;
776 bs_next_ptr += 4;
777
778 while (big_val_cnt > 0)
779 {
780 int tab_num = gr_info->table_select[ireg];
781 int sfb_cnt = gr_info->region_count[ireg++];
782 const int16_t *codebook = tabs + tabindex[tab_num];
783 int linbits = g_linbits[tab_num];
784 if (linbits)
785 {
786 do
787 {
788 np = *sfb++ / 2;
789 pairs_to_decode = MINIMP3_MIN(big_val_cnt, np);
790 one = *scf++;
791 do
792 {
793 int j, w = 5;
794 int leaf = codebook[PEEK_BITS(w)];
795 while (leaf < 0)
796 {
797 FLUSH_BITS(w);
798 w = leaf & 7;
799 leaf = codebook[PEEK_BITS(w) - (leaf >> 3)];
800 }
801 FLUSH_BITS(leaf >> 8);
802
803 for (j = 0; j < 2; j++, dst++, leaf >>= 4)
804 {
805 int lsb = leaf & 0x0F;
806 if (lsb == 15)
807 {
808 lsb += PEEK_BITS(linbits);
809 FLUSH_BITS(linbits);
810 CHECK_BITS;
811 *dst = one*L3_pow_43(lsb)*((int32_t)bs_cache < 0 ? -1: 1);
812 } else
813 {
814 *dst = g_pow43[16 + lsb - 16*(bs_cache >> 31)]*one;
815 }
816 FLUSH_BITS(lsb ? 1 : 0);
817 }
818 CHECK_BITS;
819 } while (--pairs_to_decode);
820 } while ((big_val_cnt -= np) > 0 && --sfb_cnt >= 0);
821 } else
822 {
823 do
824 {
825 np = *sfb++ / 2;
826 pairs_to_decode = MINIMP3_MIN(big_val_cnt, np);
827 one = *scf++;
828 do
829 {
830 int j, w = 5;
831 int leaf = codebook[PEEK_BITS(w)];
832 while (leaf < 0)
833 {
834 FLUSH_BITS(w);
835 w = leaf & 7;
836 leaf = codebook[PEEK_BITS(w) - (leaf >> 3)];
837 }
838 FLUSH_BITS(leaf >> 8);
839
840 for (j = 0; j < 2; j++, dst++, leaf >>= 4)
841 {
842 int lsb = leaf & 0x0F;
843 *dst = g_pow43[16 + lsb - 16*(bs_cache >> 31)]*one;
844 FLUSH_BITS(lsb ? 1 : 0);
845 }
846 CHECK_BITS;
847 } while (--pairs_to_decode);
848 } while ((big_val_cnt -= np) > 0 && --sfb_cnt >= 0);
849 }
850 }
851
852 for (np = 1 - big_val_cnt;; dst += 4)
853 {
854 const uint8_t *codebook_count1 = (gr_info->count1_table) ? tab33 : tab32;
855 int leaf = codebook_count1[PEEK_BITS(4)];
856 if (!(leaf & 8))
857 {
858 leaf = codebook_count1[(leaf >> 3) + (bs_cache << 4 >> (32 - (leaf & 3)))];
859 }
860 FLUSH_BITS(leaf & 7);
861 if (BSPOS > layer3gr_limit)
862 {
863 break;
864 }
865#define RELOAD_SCALEFACTOR if (!--np) { np = *sfb++/2; if (!np) break; one = *scf++; }
866#define DEQ_COUNT1(s) if (leaf & (128 >> s)) { dst[s] = ((int32_t)bs_cache < 0) ? -one : one; FLUSH_BITS(1) }
867 RELOAD_SCALEFACTOR;
868 DEQ_COUNT1(0);
869 DEQ_COUNT1(1);
870 RELOAD_SCALEFACTOR;
871 DEQ_COUNT1(2);
872 DEQ_COUNT1(3);
873 CHECK_BITS;
874 }
875
876 bs->pos = layer3gr_limit;
877}
878
879static void L3_midside_stereo(float *left, int n)
880{
881 int i = 0;
882 float *right = left + 576;
883#if HAVE_SIMD
884 if (have_simd())
885 {
886 for (; i < n - 3; i += 4)
887 {
888 f4 vl = VLD(left + i);
889 f4 vr = VLD(right + i);
890 VSTORE(left + i, VADD(vl, vr));
891 VSTORE(right + i, VSUB(vl, vr));
892 }
893#ifdef __GNUC__
894 /* Workaround for spurious -Waggressive-loop-optimizations warning from gcc.
895 * For more info see: https://github.com/lieff/minimp3/issues/88
896 */
897 if (__builtin_constant_p(n % 4 == 0) && n % 4 == 0)
898 return;
899#endif
900 }
901#endif /* HAVE_SIMD */
902 for (; i < n; i++)
903 {
904 float a = left[i];
905 float b = right[i];
906 left[i] = a + b;
907 right[i] = a - b;
908 }
909}
910
911static void L3_intensity_stereo_band(float *left, int n, float kl, float kr)
912{
913 int i;
914 for (i = 0; i < n; i++)
915 {
916 left[i + 576] = left[i]*kr;
917 left[i] = left[i]*kl;
918 }
919}
920
921static void L3_stereo_top_band(const float *right, const uint8_t *sfb, int nbands, int max_band[3])
922{
923 int i, k;
924
925 max_band[0] = max_band[1] = max_band[2] = -1;
926
927 for (i = 0; i < nbands; i++)
928 {
929 for (k = 0; k < sfb[i]; k += 2)
930 {
931 if (right[k] != 0 || right[k + 1] != 0)
932 {
933 max_band[i % 3] = i;
934 break;
935 }
936 }
937 right += sfb[i];
938 }
939}
940
941static void L3_stereo_process(float *left, const uint8_t *ist_pos, const uint8_t *sfb, const uint8_t *hdr, int max_band[3], int mpeg2_sh)
942{
943 static const float g_pan[7*2] = { 0,1,0.21132487f,0.78867513f,0.36602540f,0.63397460f,0.5f,0.5f,0.63397460f,0.36602540f,0.78867513f,0.21132487f,1,0 };
944 unsigned i, max_pos = HDR_TEST_MPEG1(hdr) ? 7 : 64;
945
946 for (i = 0; sfb[i]; i++)
947 {
948 unsigned ipos = ist_pos[i];
949 if ((int)i > max_band[i % 3] && ipos < max_pos)
950 {
951 float kl, kr, s = HDR_TEST_MS_STEREO(hdr) ? 1.41421356f : 1;
952 if (HDR_TEST_MPEG1(hdr))
953 {
954 kl = g_pan[2*ipos];
955 kr = g_pan[2*ipos + 1];
956 } else
957 {
958 kl = 1;
959 kr = L3_ldexp_q2(1, (ipos + 1) >> 1 << mpeg2_sh);
960 if (ipos & 1)
961 {
962 kl = kr;
963 kr = 1;
964 }
965 }
966 L3_intensity_stereo_band(left, sfb[i], kl*s, kr*s);
967 } else if (HDR_TEST_MS_STEREO(hdr))
968 {
969 L3_midside_stereo(left, sfb[i]);
970 }
971 left += sfb[i];
972 }
973}
974
975static void L3_intensity_stereo(float *left, uint8_t *ist_pos, const L3_gr_info_t *gr, const uint8_t *hdr)
976{
977 int max_band[3], n_sfb = gr->n_long_sfb + gr->n_short_sfb;
978 int i, max_blocks = gr->n_short_sfb ? 3 : 1;
979
980 L3_stereo_top_band(left + 576, gr->sfbtab, n_sfb, max_band);
981 if (gr->n_long_sfb)
982 {
983 max_band[0] = max_band[1] = max_band[2] = MINIMP3_MAX(MINIMP3_MAX(max_band[0], max_band[1]), max_band[2]);
984 }
985 for (i = 0; i < max_blocks; i++)
986 {
987 int default_pos = HDR_TEST_MPEG1(hdr) ? 3 : 0;
988 int itop = n_sfb - max_blocks + i;
989 int prev = itop - max_blocks;
990 ist_pos[itop] = max_band[i] >= prev ? default_pos : ist_pos[prev];
991 }
992 L3_stereo_process(left, ist_pos, gr->sfbtab, hdr, max_band, gr[1].scalefac_compress & 1);
993}
994
995static void L3_reorder(float *grbuf, float *scratch, const uint8_t *sfb)
996{
997 int i, len;
998 float *src = grbuf, *dst = scratch;
999
1000 for (;0 != (len = *sfb); sfb += 3, src += 2*len)
1001 {
1002 for (i = 0; i < len; i++, src++)
1003 {
1004 *dst++ = src[0*len];
1005 *dst++ = src[1*len];
1006 *dst++ = src[2*len];
1007 }
1008 }
1009 memcpy(grbuf, scratch, (dst - scratch)*sizeof(float));
1010}
1011
1012static void L3_antialias(float *grbuf, int nbands)
1013{
1014 static const float g_aa[2][8] = {
1015 {0.85749293f,0.88174200f,0.94962865f,0.98331459f,0.99551782f,0.99916056f,0.99989920f,0.99999316f},
1016 {0.51449576f,0.47173197f,0.31337745f,0.18191320f,0.09457419f,0.04096558f,0.01419856f,0.00369997f}
1017 };
1018
1019 for (; nbands > 0; nbands--, grbuf += 18)
1020 {
1021 int i = 0;
1022#if HAVE_SIMD
1023 if (have_simd()) for (; i < 8; i += 4)
1024 {
1025 f4 vu = VLD(grbuf + 18 + i);
1026 f4 vd = VLD(grbuf + 14 - i);
1027 f4 vc0 = VLD(g_aa[0] + i);
1028 f4 vc1 = VLD(g_aa[1] + i);
1029 vd = VREV(vd);
1030 VSTORE(grbuf + 18 + i, VSUB(VMUL(vu, vc0), VMUL(vd, vc1)));
1031 vd = VADD(VMUL(vu, vc1), VMUL(vd, vc0));
1032 VSTORE(grbuf + 14 - i, VREV(vd));
1033 }
1034#endif /* HAVE_SIMD */
1035#ifndef MINIMP3_ONLY_SIMD
1036 for(; i < 8; i++)
1037 {
1038 float u = grbuf[18 + i];
1039 float d = grbuf[17 - i];
1040 grbuf[18 + i] = u*g_aa[0][i] - d*g_aa[1][i];
1041 grbuf[17 - i] = u*g_aa[1][i] + d*g_aa[0][i];
1042 }
1043#endif /* MINIMP3_ONLY_SIMD */
1044 }
1045}
1046
1047static void L3_dct3_9(float *y)
1048{
1049 float s0, s1, s2, s3, s4, s5, s6, s7, s8, t0, t2, t4;
1050
1051 s0 = y[0]; s2 = y[2]; s4 = y[4]; s6 = y[6]; s8 = y[8];
1052 t0 = s0 + s6*0.5f;
1053 s0 -= s6;
1054 t4 = (s4 + s2)*0.93969262f;
1055 t2 = (s8 + s2)*0.76604444f;
1056 s6 = (s4 - s8)*0.17364818f;
1057 s4 += s8 - s2;
1058
1059 s2 = s0 - s4*0.5f;
1060 y[4] = s4 + s0;
1061 s8 = t0 - t2 + s6;
1062 s0 = t0 - t4 + t2;
1063 s4 = t0 + t4 - s6;
1064
1065 s1 = y[1]; s3 = y[3]; s5 = y[5]; s7 = y[7];
1066
1067 s3 *= 0.86602540f;
1068 t0 = (s5 + s1)*0.98480775f;
1069 t4 = (s5 - s7)*0.34202014f;
1070 t2 = (s1 + s7)*0.64278761f;
1071 s1 = (s1 - s5 - s7)*0.86602540f;
1072
1073 s5 = t0 - s3 - t2;
1074 s7 = t4 - s3 - t0;
1075 s3 = t4 + s3 - t2;
1076
1077 y[0] = s4 - s7;
1078 y[1] = s2 + s1;
1079 y[2] = s0 - s3;
1080 y[3] = s8 + s5;
1081 y[5] = s8 - s5;
1082 y[6] = s0 + s3;
1083 y[7] = s2 - s1;
1084 y[8] = s4 + s7;
1085}
1086
1087static void L3_imdct36(float *grbuf, float *overlap, const float *window, int nbands)
1088{
1089 int i, j;
1090 static const float g_twid9[18] = {
1091 0.73727734f,0.79335334f,0.84339145f,0.88701083f,0.92387953f,0.95371695f,0.97629601f,0.99144486f,0.99904822f,0.67559021f,0.60876143f,0.53729961f,0.46174861f,0.38268343f,0.30070580f,0.21643961f,0.13052619f,0.04361938f
1092 };
1093
1094 for (j = 0; j < nbands; j++, grbuf += 18, overlap += 9)
1095 {
1096 float co[9], si[9];
1097 co[0] = -grbuf[0];
1098 si[0] = grbuf[17];
1099 for (i = 0; i < 4; i++)
1100 {
1101 si[8 - 2*i] = grbuf[4*i + 1] - grbuf[4*i + 2];
1102 co[1 + 2*i] = grbuf[4*i + 1] + grbuf[4*i + 2];
1103 si[7 - 2*i] = grbuf[4*i + 4] - grbuf[4*i + 3];
1104 co[2 + 2*i] = -(grbuf[4*i + 3] + grbuf[4*i + 4]);
1105 }
1106 L3_dct3_9(co);
1107 L3_dct3_9(si);
1108
1109 si[1] = -si[1];
1110 si[3] = -si[3];
1111 si[5] = -si[5];
1112 si[7] = -si[7];
1113
1114 i = 0;
1115
1116#if HAVE_SIMD
1117 if (have_simd()) for (; i < 8; i += 4)
1118 {
1119 f4 vovl = VLD(overlap + i);
1120 f4 vc = VLD(co + i);
1121 f4 vs = VLD(si + i);
1122 f4 vr0 = VLD(g_twid9 + i);
1123 f4 vr1 = VLD(g_twid9 + 9 + i);
1124 f4 vw0 = VLD(window + i);
1125 f4 vw1 = VLD(window + 9 + i);
1126 f4 vsum = VADD(VMUL(vc, vr1), VMUL(vs, vr0));
1127 VSTORE(overlap + i, VSUB(VMUL(vc, vr0), VMUL(vs, vr1)));
1128 VSTORE(grbuf + i, VSUB(VMUL(vovl, vw0), VMUL(vsum, vw1)));
1129 vsum = VADD(VMUL(vovl, vw1), VMUL(vsum, vw0));
1130 VSTORE(grbuf + 14 - i, VREV(vsum));
1131 }
1132#endif /* HAVE_SIMD */
1133 for (; i < 9; i++)
1134 {
1135 float ovl = overlap[i];
1136 float sum = co[i]*g_twid9[9 + i] + si[i]*g_twid9[0 + i];
1137 overlap[i] = co[i]*g_twid9[0 + i] - si[i]*g_twid9[9 + i];
1138 grbuf[i] = ovl*window[0 + i] - sum*window[9 + i];
1139 grbuf[17 - i] = ovl*window[9 + i] + sum*window[0 + i];
1140 }
1141 }
1142}
1143
1144static void L3_idct3(float x0, float x1, float x2, float *dst)
1145{
1146 float m1 = x1*0.86602540f;
1147 float a1 = x0 - x2*0.5f;
1148 dst[1] = x0 + x2;
1149 dst[0] = a1 + m1;
1150 dst[2] = a1 - m1;
1151}
1152
1153static void L3_imdct12(float *x, float *dst, float *overlap)
1154{
1155 static const float g_twid3[6] = { 0.79335334f,0.92387953f,0.99144486f, 0.60876143f,0.38268343f,0.13052619f };
1156 float co[3], si[3];
1157 int i;
1158
1159 L3_idct3(-x[0], x[6] + x[3], x[12] + x[9], co);
1160 L3_idct3(x[15], x[12] - x[9], x[6] - x[3], si);
1161 si[1] = -si[1];
1162
1163 for (i = 0; i < 3; i++)
1164 {
1165 float ovl = overlap[i];
1166 float sum = co[i]*g_twid3[3 + i] + si[i]*g_twid3[0 + i];
1167 overlap[i] = co[i]*g_twid3[0 + i] - si[i]*g_twid3[3 + i];
1168 dst[i] = ovl*g_twid3[2 - i] - sum*g_twid3[5 - i];
1169 dst[5 - i] = ovl*g_twid3[5 - i] + sum*g_twid3[2 - i];
1170 }
1171}
1172
1173static void L3_imdct_short(float *grbuf, float *overlap, int nbands)
1174{
1175 for (;nbands > 0; nbands--, overlap += 9, grbuf += 18)
1176 {
1177 float tmp[18];
1178 memcpy(tmp, grbuf, sizeof(tmp));
1179 memcpy(grbuf, overlap, 6*sizeof(float));
1180 L3_imdct12(tmp, grbuf + 6, overlap + 6);
1181 L3_imdct12(tmp + 1, grbuf + 12, overlap + 6);
1182 L3_imdct12(tmp + 2, overlap, overlap + 6);
1183 }
1184}
1185
1186static void L3_change_sign(float *grbuf)
1187{
1188 int b, i;
1189 for (b = 0, grbuf += 18; b < 32; b += 2, grbuf += 36)
1190 for (i = 1; i < 18; i += 2)
1191 grbuf[i] = -grbuf[i];
1192}
1193
1194static void L3_imdct_gr(float *grbuf, float *overlap, unsigned block_type, unsigned n_long_bands)
1195{
1196 static const float g_mdct_window[2][18] = {
1197 { 0.99904822f,0.99144486f,0.97629601f,0.95371695f,0.92387953f,0.88701083f,0.84339145f,0.79335334f,0.73727734f,0.04361938f,0.13052619f,0.21643961f,0.30070580f,0.38268343f,0.46174861f,0.53729961f,0.60876143f,0.67559021f },
1198 { 1,1,1,1,1,1,0.99144486f,0.92387953f,0.79335334f,0,0,0,0,0,0,0.13052619f,0.38268343f,0.60876143f }
1199 };
1200 if (n_long_bands)
1201 {
1202 L3_imdct36(grbuf, overlap, g_mdct_window[0], n_long_bands);
1203 grbuf += 18*n_long_bands;
1204 overlap += 9*n_long_bands;
1205 }
1206 if (block_type == SHORT_BLOCK_TYPE)
1207 L3_imdct_short(grbuf, overlap, 32 - n_long_bands);
1208 else
1209 L3_imdct36(grbuf, overlap, g_mdct_window[block_type == STOP_BLOCK_TYPE], 32 - n_long_bands);
1210}
1211
1212static void L3_save_reservoir(mp3dec_t *h, mp3dec_scratch_t *s)
1213{
1214 int pos = (s->bs.pos + 7)/8u;
1215 int remains = s->bs.limit/8u - pos;
1216 if (remains > MAX_BITRESERVOIR_BYTES)
1217 {
1218 pos += remains - MAX_BITRESERVOIR_BYTES;
1219 remains = MAX_BITRESERVOIR_BYTES;
1220 }
1221 if (remains > 0)
1222 {
1223 memmove(h->reserv_buf, s->maindata + pos, remains);
1224 }
1225 h->reserv = remains;
1226}
1227
1228static int L3_restore_reservoir(mp3dec_t *h, bs_t *bs, mp3dec_scratch_t *s, int main_data_begin)
1229{
1230 int frame_bytes = (bs->limit - bs->pos)/8;
1231 int bytes_have = MINIMP3_MIN(h->reserv, main_data_begin);
1232 memcpy(s->maindata, h->reserv_buf + MINIMP3_MAX(0, h->reserv - main_data_begin), MINIMP3_MIN(h->reserv, main_data_begin));
1233 memcpy(s->maindata + bytes_have, bs->buf + bs->pos/8, frame_bytes);
1234 bs_init(&s->bs, s->maindata, bytes_have + frame_bytes);
1235 return h->reserv >= main_data_begin;
1236}
1237
1238static void L3_decode(mp3dec_t *h, mp3dec_scratch_t *s, L3_gr_info_t *gr_info, int nch)
1239{
1240 int ch;
1241
1242 for (ch = 0; ch < nch; ch++)
1243 {
1244 int layer3gr_limit = s->bs.pos + gr_info[ch].part_23_length;
1245 L3_decode_scalefactors(h->header, s->ist_pos[ch], &s->bs, gr_info + ch, s->scf, ch);
1246 L3_huffman(s->grbuf[ch], &s->bs, gr_info + ch, s->scf, layer3gr_limit);
1247 }
1248
1249 if (HDR_TEST_I_STEREO(h->header))
1250 {
1251 L3_intensity_stereo(s->grbuf[0], s->ist_pos[1], gr_info, h->header);
1252 } else if (HDR_IS_MS_STEREO(h->header))
1253 {
1254 L3_midside_stereo(s->grbuf[0], 576);
1255 }
1256
1257 for (ch = 0; ch < nch; ch++, gr_info++)
1258 {
1259 int aa_bands = 31;
1260 int n_long_bands = (gr_info->mixed_block_flag ? 2 : 0) << (int)(HDR_GET_MY_SAMPLE_RATE(h->header) == 2);
1261
1262 if (gr_info->n_short_sfb)
1263 {
1264 aa_bands = n_long_bands - 1;
1265 L3_reorder(s->grbuf[ch] + n_long_bands*18, s->syn[0], gr_info->sfbtab + gr_info->n_long_sfb);
1266 }
1267
1268 L3_antialias(s->grbuf[ch], aa_bands);
1269 L3_imdct_gr(s->grbuf[ch], h->mdct_overlap[ch], gr_info->block_type, n_long_bands);
1270 L3_change_sign(s->grbuf[ch]);
1271 }
1272}
1273
1274static void mp3d_DCT_II(float *grbuf, int n)
1275{
1276 static const float g_sec[24] = {
1277 10.19000816f,0.50060302f,0.50241929f,3.40760851f,0.50547093f,0.52249861f,2.05778098f,0.51544732f,0.56694406f,1.48416460f,0.53104258f,0.64682180f,1.16943991f,0.55310392f,0.78815460f,0.97256821f,0.58293498f,1.06067765f,0.83934963f,0.62250412f,1.72244716f,0.74453628f,0.67480832f,5.10114861f
1278 };
1279 int i, k = 0;
1280#if HAVE_SIMD
1281 if (have_simd()) for (; k < n; k += 4)
1282 {
1283 f4 t[4][8], *x;
1284 float *y = grbuf + k;
1285
1286 for (x = t[0], i = 0; i < 8; i++, x++)
1287 {
1288 f4 x0 = VLD(&y[i*18]);
1289 f4 x1 = VLD(&y[(15 - i)*18]);
1290 f4 x2 = VLD(&y[(16 + i)*18]);
1291 f4 x3 = VLD(&y[(31 - i)*18]);
1292 f4 t0 = VADD(x0, x3);
1293 f4 t1 = VADD(x1, x2);
1294 f4 t2 = VMUL_S(VSUB(x1, x2), g_sec[3*i + 0]);
1295 f4 t3 = VMUL_S(VSUB(x0, x3), g_sec[3*i + 1]);
1296 x[0] = VADD(t0, t1);
1297 x[8] = VMUL_S(VSUB(t0, t1), g_sec[3*i + 2]);
1298 x[16] = VADD(t3, t2);
1299 x[24] = VMUL_S(VSUB(t3, t2), g_sec[3*i + 2]);
1300 }
1301 for (x = t[0], i = 0; i < 4; i++, x += 8)
1302 {
1303 f4 x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4], x5 = x[5], x6 = x[6], x7 = x[7], xt;
1304 xt = VSUB(x0, x7); x0 = VADD(x0, x7);
1305 x7 = VSUB(x1, x6); x1 = VADD(x1, x6);
1306 x6 = VSUB(x2, x5); x2 = VADD(x2, x5);
1307 x5 = VSUB(x3, x4); x3 = VADD(x3, x4);
1308 x4 = VSUB(x0, x3); x0 = VADD(x0, x3);
1309 x3 = VSUB(x1, x2); x1 = VADD(x1, x2);
1310 x[0] = VADD(x0, x1);
1311 x[4] = VMUL_S(VSUB(x0, x1), 0.70710677f);
1312 x5 = VADD(x5, x6);
1313 x6 = VMUL_S(VADD(x6, x7), 0.70710677f);
1314 x7 = VADD(x7, xt);
1315 x3 = VMUL_S(VADD(x3, x4), 0.70710677f);
1316 x5 = VSUB(x5, VMUL_S(x7, 0.198912367f)); /* rotate by PI/8 */
1317 x7 = VADD(x7, VMUL_S(x5, 0.382683432f));
1318 x5 = VSUB(x5, VMUL_S(x7, 0.198912367f));
1319 x0 = VSUB(xt, x6); xt = VADD(xt, x6);
1320 x[1] = VMUL_S(VADD(xt, x7), 0.50979561f);
1321 x[2] = VMUL_S(VADD(x4, x3), 0.54119611f);
1322 x[3] = VMUL_S(VSUB(x0, x5), 0.60134488f);
1323 x[5] = VMUL_S(VADD(x0, x5), 0.89997619f);
1324 x[6] = VMUL_S(VSUB(x4, x3), 1.30656302f);
1325 x[7] = VMUL_S(VSUB(xt, x7), 2.56291556f);
1326 }
1327
1328 if (k > n - 3)
1329 {
1330#if HAVE_SSE
1331#define VSAVE2(i, v) _mm_storel_pi((__m64 *)(void*)&y[i*18], v)
1332#else /* HAVE_SSE */
1333#define VSAVE2(i, v) vst1_f32((float32_t *)&y[i*18], vget_low_f32(v))
1334#endif /* HAVE_SSE */
1335 for (i = 0; i < 7; i++, y += 4*18)
1336 {
1337 f4 s = VADD(t[3][i], t[3][i + 1]);
1338 VSAVE2(0, t[0][i]);
1339 VSAVE2(1, VADD(t[2][i], s));
1340 VSAVE2(2, VADD(t[1][i], t[1][i + 1]));
1341 VSAVE2(3, VADD(t[2][1 + i], s));
1342 }
1343 VSAVE2(0, t[0][7]);
1344 VSAVE2(1, VADD(t[2][7], t[3][7]));
1345 VSAVE2(2, t[1][7]);
1346 VSAVE2(3, t[3][7]);
1347 } else
1348 {
1349#define VSAVE4(i, v) VSTORE(&y[i*18], v)
1350 for (i = 0; i < 7; i++, y += 4*18)
1351 {
1352 f4 s = VADD(t[3][i], t[3][i + 1]);
1353 VSAVE4(0, t[0][i]);
1354 VSAVE4(1, VADD(t[2][i], s));
1355 VSAVE4(2, VADD(t[1][i], t[1][i + 1]));
1356 VSAVE4(3, VADD(t[2][1 + i], s));
1357 }
1358 VSAVE4(0, t[0][7]);
1359 VSAVE4(1, VADD(t[2][7], t[3][7]));
1360 VSAVE4(2, t[1][7]);
1361 VSAVE4(3, t[3][7]);
1362 }
1363 } else
1364#endif /* HAVE_SIMD */
1365#ifdef MINIMP3_ONLY_SIMD
1366 {} /* for HAVE_SIMD=1, MINIMP3_ONLY_SIMD=1 case we do not need non-intrinsic "else" branch */
1367#else /* MINIMP3_ONLY_SIMD */
1368 for (; k < n; k++)
1369 {
1370 float t[4][8], *x, *y = grbuf + k;
1371
1372 for (x = t[0], i = 0; i < 8; i++, x++)
1373 {
1374 float x0 = y[i*18];
1375 float x1 = y[(15 - i)*18];
1376 float x2 = y[(16 + i)*18];
1377 float x3 = y[(31 - i)*18];
1378 float t0 = x0 + x3;
1379 float t1 = x1 + x2;
1380 float t2 = (x1 - x2)*g_sec[3*i + 0];
1381 float t3 = (x0 - x3)*g_sec[3*i + 1];
1382 x[0] = t0 + t1;
1383 x[8] = (t0 - t1)*g_sec[3*i + 2];
1384 x[16] = t3 + t2;
1385 x[24] = (t3 - t2)*g_sec[3*i + 2];
1386 }
1387 for (x = t[0], i = 0; i < 4; i++, x += 8)
1388 {
1389 float x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4], x5 = x[5], x6 = x[6], x7 = x[7], xt;
1390 xt = x0 - x7; x0 += x7;
1391 x7 = x1 - x6; x1 += x6;
1392 x6 = x2 - x5; x2 += x5;
1393 x5 = x3 - x4; x3 += x4;
1394 x4 = x0 - x3; x0 += x3;
1395 x3 = x1 - x2; x1 += x2;
1396 x[0] = x0 + x1;
1397 x[4] = (x0 - x1)*0.70710677f;
1398 x5 = x5 + x6;
1399 x6 = (x6 + x7)*0.70710677f;
1400 x7 = x7 + xt;
1401 x3 = (x3 + x4)*0.70710677f;
1402 x5 -= x7*0.198912367f; /* rotate by PI/8 */
1403 x7 += x5*0.382683432f;
1404 x5 -= x7*0.198912367f;
1405 x0 = xt - x6; xt += x6;
1406 x[1] = (xt + x7)*0.50979561f;
1407 x[2] = (x4 + x3)*0.54119611f;
1408 x[3] = (x0 - x5)*0.60134488f;
1409 x[5] = (x0 + x5)*0.89997619f;
1410 x[6] = (x4 - x3)*1.30656302f;
1411 x[7] = (xt - x7)*2.56291556f;
1412
1413 }
1414 for (i = 0; i < 7; i++, y += 4*18)
1415 {
1416 y[0*18] = t[0][i];
1417 y[1*18] = t[2][i] + t[3][i] + t[3][i + 1];
1418 y[2*18] = t[1][i] + t[1][i + 1];
1419 y[3*18] = t[2][i + 1] + t[3][i] + t[3][i + 1];
1420 }
1421 y[0*18] = t[0][7];
1422 y[1*18] = t[2][7] + t[3][7];
1423 y[2*18] = t[1][7];
1424 y[3*18] = t[3][7];
1425 }
1426#endif /* MINIMP3_ONLY_SIMD */
1427}
1428
1429#ifndef MINIMP3_FLOAT_OUTPUT
1430static int16_t mp3d_scale_pcm(float sample)
1431{
1432#if HAVE_ARMV6
1433 int32_t s32 = (int32_t)(sample + .5f);
1434 s32 -= (s32 < 0);
1435 int16_t s = (int16_t)minimp3_clip_int16_arm(s32);
1436#else
1437 if (sample >= 32766.5) return (int16_t) 32767;
1438 if (sample <= -32767.5) return (int16_t)-32768;
1439 int16_t s = (int16_t)(sample + .5f);
1440 s -= (s < 0); /* away from zero, to be compliant */
1441#endif
1442 return s;
1443}
1444#else /* MINIMP3_FLOAT_OUTPUT */
1445static float mp3d_scale_pcm(float sample)
1446{
1447 return sample*(1.f/32768.f);
1448}
1449#endif /* MINIMP3_FLOAT_OUTPUT */
1450
1451static void mp3d_synth_pair(mp3d_sample_t *pcm, int nch, const float *z)
1452{
1453 float a;
1454 a = (z[14*64] - z[ 0]) * 29;
1455 a += (z[ 1*64] + z[13*64]) * 213;
1456 a += (z[12*64] - z[ 2*64]) * 459;
1457 a += (z[ 3*64] + z[11*64]) * 2037;
1458 a += (z[10*64] - z[ 4*64]) * 5153;
1459 a += (z[ 5*64] + z[ 9*64]) * 6574;
1460 a += (z[ 8*64] - z[ 6*64]) * 37489;
1461 a += z[ 7*64] * 75038;
1462 pcm[0] = mp3d_scale_pcm(a);
1463
1464 z += 2;
1465 a = z[14*64] * 104;
1466 a += z[12*64] * 1567;
1467 a += z[10*64] * 9727;
1468 a += z[ 8*64] * 64019;
1469 a += z[ 6*64] * -9975;
1470 a += z[ 4*64] * -45;
1471 a += z[ 2*64] * 146;
1472 a += z[ 0*64] * -5;
1473 pcm[16*nch] = mp3d_scale_pcm(a);
1474}
1475
1476static void mp3d_synth(float *xl, mp3d_sample_t *dstl, int nch, float *lins)
1477{
1478 int i;
1479 float *xr = xl + 576*(nch - 1);
1480 mp3d_sample_t *dstr = dstl + (nch - 1);
1481
1482 static const float g_win[] = {
1483 -1,26,-31,208,218,401,-519,2063,2000,4788,-5517,7134,5959,35640,-39336,74992,
1484 -1,24,-35,202,222,347,-581,2080,1952,4425,-5879,7640,5288,33791,-41176,74856,
1485 -1,21,-38,196,225,294,-645,2087,1893,4063,-6237,8092,4561,31947,-43006,74630,
1486 -1,19,-41,190,227,244,-711,2085,1822,3705,-6589,8492,3776,30112,-44821,74313,
1487 -1,17,-45,183,228,197,-779,2075,1739,3351,-6935,8840,2935,28289,-46617,73908,
1488 -1,16,-49,176,228,153,-848,2057,1644,3004,-7271,9139,2037,26482,-48390,73415,
1489 -2,14,-53,169,227,111,-919,2032,1535,2663,-7597,9389,1082,24694,-50137,72835,
1490 -2,13,-58,161,224,72,-991,2001,1414,2330,-7910,9592,70,22929,-51853,72169,
1491 -2,11,-63,154,221,36,-1064,1962,1280,2006,-8209,9750,-998,21189,-53534,71420,
1492 -2,10,-68,147,215,2,-1137,1919,1131,1692,-8491,9863,-2122,19478,-55178,70590,
1493 -3,9,-73,139,208,-29,-1210,1870,970,1388,-8755,9935,-3300,17799,-56778,69679,
1494 -3,8,-79,132,200,-57,-1283,1817,794,1095,-8998,9966,-4533,16155,-58333,68692,
1495 -4,7,-85,125,189,-83,-1356,1759,605,814,-9219,9959,-5818,14548,-59838,67629,
1496 -4,7,-91,117,177,-106,-1428,1698,402,545,-9416,9916,-7154,12980,-61289,66494,
1497 -5,6,-97,111,163,-127,-1498,1634,185,288,-9585,9838,-8540,11455,-62684,65290
1498 };
1499 float *zlin = lins + 15*64;
1500 const float *w = g_win;
1501
1502 zlin[4*15] = xl[18*16];
1503 zlin[4*15 + 1] = xr[18*16];
1504 zlin[4*15 + 2] = xl[0];
1505 zlin[4*15 + 3] = xr[0];
1506
1507 zlin[4*31] = xl[1 + 18*16];
1508 zlin[4*31 + 1] = xr[1 + 18*16];
1509 zlin[4*31 + 2] = xl[1];
1510 zlin[4*31 + 3] = xr[1];
1511
1512 mp3d_synth_pair(dstr, nch, lins + 4*15 + 1);
1513 mp3d_synth_pair(dstr + 32*nch, nch, lins + 4*15 + 64 + 1);
1514 mp3d_synth_pair(dstl, nch, lins + 4*15);
1515 mp3d_synth_pair(dstl + 32*nch, nch, lins + 4*15 + 64);
1516
1517#if HAVE_SIMD
1518 if (have_simd()) for (i = 14; i >= 0; i--)
1519 {
1520#define VLOAD(k) f4 w0 = VSET(*w++); f4 w1 = VSET(*w++); f4 vz = VLD(&zlin[4*i - 64*k]); f4 vy = VLD(&zlin[4*i - 64*(15 - k)]);
1521#define V0(k) { VLOAD(k) b = VADD(VMUL(vz, w1), VMUL(vy, w0)) ; a = VSUB(VMUL(vz, w0), VMUL(vy, w1)); }
1522#define V1(k) { VLOAD(k) b = VADD(b, VADD(VMUL(vz, w1), VMUL(vy, w0))); a = VADD(a, VSUB(VMUL(vz, w0), VMUL(vy, w1))); }
1523#define V2(k) { VLOAD(k) b = VADD(b, VADD(VMUL(vz, w1), VMUL(vy, w0))); a = VADD(a, VSUB(VMUL(vy, w1), VMUL(vz, w0))); }
1524 f4 a, b;
1525 zlin[4*i] = xl[18*(31 - i)];
1526 zlin[4*i + 1] = xr[18*(31 - i)];
1527 zlin[4*i + 2] = xl[1 + 18*(31 - i)];
1528 zlin[4*i + 3] = xr[1 + 18*(31 - i)];
1529 zlin[4*i + 64] = xl[1 + 18*(1 + i)];
1530 zlin[4*i + 64 + 1] = xr[1 + 18*(1 + i)];
1531 zlin[4*i - 64 + 2] = xl[18*(1 + i)];
1532 zlin[4*i - 64 + 3] = xr[18*(1 + i)];
1533
1534 V0(0) V2(1) V1(2) V2(3) V1(4) V2(5) V1(6) V2(7)
1535
1536 {
1537#ifndef MINIMP3_FLOAT_OUTPUT
1538#if HAVE_SSE
1539 static const f4 g_max = { 32767.0f, 32767.0f, 32767.0f, 32767.0f };
1540 static const f4 g_min = { -32768.0f, -32768.0f, -32768.0f, -32768.0f };
1541 __m128i pcm8 = _mm_packs_epi32(_mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(a, g_max), g_min)),
1542 _mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(b, g_max), g_min)));
1543 dstr[(15 - i)*nch] = _mm_extract_epi16(pcm8, 1);
1544 dstr[(17 + i)*nch] = _mm_extract_epi16(pcm8, 5);
1545 dstl[(15 - i)*nch] = _mm_extract_epi16(pcm8, 0);
1546 dstl[(17 + i)*nch] = _mm_extract_epi16(pcm8, 4);
1547 dstr[(47 - i)*nch] = _mm_extract_epi16(pcm8, 3);
1548 dstr[(49 + i)*nch] = _mm_extract_epi16(pcm8, 7);
1549 dstl[(47 - i)*nch] = _mm_extract_epi16(pcm8, 2);
1550 dstl[(49 + i)*nch] = _mm_extract_epi16(pcm8, 6);
1551#else /* HAVE_SSE */
1552 int16x4_t pcma, pcmb;
1553 a = VADD(a, VSET(0.5f));
1554 b = VADD(b, VSET(0.5f));
1555 pcma = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(a), vreinterpretq_s32_u32(vcltq_f32(a, VSET(0)))));
1556 pcmb = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(b), vreinterpretq_s32_u32(vcltq_f32(b, VSET(0)))));
1557 vst1_lane_s16(dstr + (15 - i)*nch, pcma, 1);
1558 vst1_lane_s16(dstr + (17 + i)*nch, pcmb, 1);
1559 vst1_lane_s16(dstl + (15 - i)*nch, pcma, 0);
1560 vst1_lane_s16(dstl + (17 + i)*nch, pcmb, 0);
1561 vst1_lane_s16(dstr + (47 - i)*nch, pcma, 3);
1562 vst1_lane_s16(dstr + (49 + i)*nch, pcmb, 3);
1563 vst1_lane_s16(dstl + (47 - i)*nch, pcma, 2);
1564 vst1_lane_s16(dstl + (49 + i)*nch, pcmb, 2);
1565#endif /* HAVE_SSE */
1566
1567#else /* MINIMP3_FLOAT_OUTPUT */
1568
1569// -- GODOT start --
1570#if defined(_MSC_VER) && (defined(_M_ARM64) || defined(_M_ARM64EC) || defined(_M_ARM))
1571 static f4 g_scale;
1572 g_scale = vsetq_lane_f32(1.0f/32768.0f, g_scale, 0);
1573 g_scale = vsetq_lane_f32(1.0f/32768.0f, g_scale, 1);
1574 g_scale = vsetq_lane_f32(1.0f/32768.0f, g_scale, 2);
1575 g_scale = vsetq_lane_f32(1.0f/32768.0f, g_scale, 3);
1576#else
1577 static const f4 g_scale = { 1.0f/32768.0f, 1.0f/32768.0f, 1.0f/32768.0f, 1.0f/32768.0f };
1578#endif
1579// -- GODOT end --
1580
1581 a = VMUL(a, g_scale);
1582 b = VMUL(b, g_scale);
1583#if HAVE_SSE
1584 _mm_store_ss(dstr + (15 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 1, 1, 1)));
1585 _mm_store_ss(dstr + (17 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(1, 1, 1, 1)));
1586 _mm_store_ss(dstl + (15 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 0, 0, 0)));
1587 _mm_store_ss(dstl + (17 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(0, 0, 0, 0)));
1588 _mm_store_ss(dstr + (47 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 3, 3, 3)));
1589 _mm_store_ss(dstr + (49 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 3, 3, 3)));
1590 _mm_store_ss(dstl + (47 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 2, 2, 2)));
1591 _mm_store_ss(dstl + (49 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(2, 2, 2, 2)));
1592#else /* HAVE_SSE */
1593 vst1q_lane_f32(dstr + (15 - i)*nch, a, 1);
1594 vst1q_lane_f32(dstr + (17 + i)*nch, b, 1);
1595 vst1q_lane_f32(dstl + (15 - i)*nch, a, 0);
1596 vst1q_lane_f32(dstl + (17 + i)*nch, b, 0);
1597 vst1q_lane_f32(dstr + (47 - i)*nch, a, 3);
1598 vst1q_lane_f32(dstr + (49 + i)*nch, b, 3);
1599 vst1q_lane_f32(dstl + (47 - i)*nch, a, 2);
1600 vst1q_lane_f32(dstl + (49 + i)*nch, b, 2);
1601#endif /* HAVE_SSE */
1602#endif /* MINIMP3_FLOAT_OUTPUT */
1603 }
1604 } else
1605#endif /* HAVE_SIMD */
1606#ifdef MINIMP3_ONLY_SIMD
1607 {} /* for HAVE_SIMD=1, MINIMP3_ONLY_SIMD=1 case we do not need non-intrinsic "else" branch */
1608#else /* MINIMP3_ONLY_SIMD */
1609 for (i = 14; i >= 0; i--)
1610 {
1611#define LOAD(k) float w0 = *w++; float w1 = *w++; float *vz = &zlin[4*i - k*64]; float *vy = &zlin[4*i - (15 - k)*64];
1612#define S0(k) { int j; LOAD(k); for (j = 0; j < 4; j++) b[j] = vz[j]*w1 + vy[j]*w0, a[j] = vz[j]*w0 - vy[j]*w1; }
1613#define S1(k) { int j; LOAD(k); for (j = 0; j < 4; j++) b[j] += vz[j]*w1 + vy[j]*w0, a[j] += vz[j]*w0 - vy[j]*w1; }
1614#define S2(k) { int j; LOAD(k); for (j = 0; j < 4; j++) b[j] += vz[j]*w1 + vy[j]*w0, a[j] += vy[j]*w1 - vz[j]*w0; }
1615 float a[4], b[4];
1616
1617 zlin[4*i] = xl[18*(31 - i)];
1618 zlin[4*i + 1] = xr[18*(31 - i)];
1619 zlin[4*i + 2] = xl[1 + 18*(31 - i)];
1620 zlin[4*i + 3] = xr[1 + 18*(31 - i)];
1621 zlin[4*(i + 16)] = xl[1 + 18*(1 + i)];
1622 zlin[4*(i + 16) + 1] = xr[1 + 18*(1 + i)];
1623 zlin[4*(i - 16) + 2] = xl[18*(1 + i)];
1624 zlin[4*(i - 16) + 3] = xr[18*(1 + i)];
1625
1626 S0(0) S2(1) S1(2) S2(3) S1(4) S2(5) S1(6) S2(7)
1627
1628 dstr[(15 - i)*nch] = mp3d_scale_pcm(a[1]);
1629 dstr[(17 + i)*nch] = mp3d_scale_pcm(b[1]);
1630 dstl[(15 - i)*nch] = mp3d_scale_pcm(a[0]);
1631 dstl[(17 + i)*nch] = mp3d_scale_pcm(b[0]);
1632 dstr[(47 - i)*nch] = mp3d_scale_pcm(a[3]);
1633 dstr[(49 + i)*nch] = mp3d_scale_pcm(b[3]);
1634 dstl[(47 - i)*nch] = mp3d_scale_pcm(a[2]);
1635 dstl[(49 + i)*nch] = mp3d_scale_pcm(b[2]);
1636 }
1637#endif /* MINIMP3_ONLY_SIMD */
1638}
1639
1640static void mp3d_synth_granule(float *qmf_state, float *grbuf, int nbands, int nch, mp3d_sample_t *pcm, float *lins)
1641{
1642 int i;
1643 for (i = 0; i < nch; i++)
1644 {
1645 mp3d_DCT_II(grbuf + 576*i, nbands);
1646 }
1647
1648 memcpy(lins, qmf_state, sizeof(float)*15*64);
1649
1650 for (i = 0; i < nbands; i += 2)
1651 {
1652 mp3d_synth(grbuf + i, pcm + 32*nch*i, nch, lins + i*64);
1653 }
1654#ifndef MINIMP3_NONSTANDARD_BUT_LOGICAL
1655 if (nch == 1)
1656 {
1657 for (i = 0; i < 15*64; i += 2)
1658 {
1659 qmf_state[i] = lins[nbands*64 + i];
1660 }
1661 } else
1662#endif /* MINIMP3_NONSTANDARD_BUT_LOGICAL */
1663 {
1664 memcpy(qmf_state, lins + nbands*64, sizeof(float)*15*64);
1665 }
1666}
1667
1668static int mp3d_match_frame(const uint8_t *hdr, int mp3_bytes, int frame_bytes)
1669{
1670 int i, nmatch;
1671 for (i = 0, nmatch = 0; nmatch < MAX_FRAME_SYNC_MATCHES; nmatch++)
1672 {
1673 i += hdr_frame_bytes(hdr + i, frame_bytes) + hdr_padding(hdr + i);
1674 if (i + HDR_SIZE > mp3_bytes)
1675 return nmatch > 0;
1676 if (!hdr_compare(hdr, hdr + i))
1677 return 0;
1678 }
1679 return 1;
1680}
1681
1682static int mp3d_find_frame(const uint8_t *mp3, int mp3_bytes, int *free_format_bytes, int *ptr_frame_bytes)
1683{
1684 int i, k;
1685 for (i = 0; i < mp3_bytes - HDR_SIZE; i++, mp3++)
1686 {
1687 if (hdr_valid(mp3))
1688 {
1689 int frame_bytes = hdr_frame_bytes(mp3, *free_format_bytes);
1690 int frame_and_padding = frame_bytes + hdr_padding(mp3);
1691
1692 for (k = HDR_SIZE; !frame_bytes && k < MAX_FREE_FORMAT_FRAME_SIZE && i + 2*k < mp3_bytes - HDR_SIZE; k++)
1693 {
1694 if (hdr_compare(mp3, mp3 + k))
1695 {
1696 int fb = k - hdr_padding(mp3);
1697 int nextfb = fb + hdr_padding(mp3 + k);
1698 if (i + k + nextfb + HDR_SIZE > mp3_bytes || !hdr_compare(mp3, mp3 + k + nextfb))
1699 continue;
1700 frame_and_padding = k;
1701 frame_bytes = fb;
1702 *free_format_bytes = fb;
1703 }
1704 }
1705 if ((frame_bytes && i + frame_and_padding <= mp3_bytes &&
1706 mp3d_match_frame(mp3, mp3_bytes - i, frame_bytes)) ||
1707 (!i && frame_and_padding == mp3_bytes))
1708 {
1709 *ptr_frame_bytes = frame_and_padding;
1710 return i;
1711 }
1712 *free_format_bytes = 0;
1713 }
1714 }
1715 *ptr_frame_bytes = 0;
1716 return mp3_bytes;
1717}
1718
1719void mp3dec_init(mp3dec_t *dec)
1720{
1721 dec->header[0] = 0;
1722}
1723
1724int mp3dec_decode_frame(mp3dec_t *dec, const uint8_t *mp3, int mp3_bytes, mp3d_sample_t *pcm, mp3dec_frame_info_t *info)
1725{
1726 int i = 0, igr, frame_size = 0, success = 1;
1727 const uint8_t *hdr;
1728 bs_t bs_frame[1];
1729 mp3dec_scratch_t scratch;
1730
1731 if (mp3_bytes > 4 && dec->header[0] == 0xff && hdr_compare(dec->header, mp3))
1732 {
1733 frame_size = hdr_frame_bytes(mp3, dec->free_format_bytes) + hdr_padding(mp3);
1734 if (frame_size != mp3_bytes && (frame_size + HDR_SIZE > mp3_bytes || !hdr_compare(mp3, mp3 + frame_size)))
1735 {
1736 frame_size = 0;
1737 }
1738 }
1739 if (!frame_size)
1740 {
1741 memset(dec, 0, sizeof(mp3dec_t));
1742 i = mp3d_find_frame(mp3, mp3_bytes, &dec->free_format_bytes, &frame_size);
1743 if (!frame_size || i + frame_size > mp3_bytes)
1744 {
1745 info->frame_bytes = i;
1746 return 0;
1747 }
1748 }
1749
1750 hdr = mp3 + i;
1751 memcpy(dec->header, hdr, HDR_SIZE);
1752 info->frame_bytes = i + frame_size;
1753 info->frame_offset = i;
1754 info->channels = HDR_IS_MONO(hdr) ? 1 : 2;
1755 info->hz = hdr_sample_rate_hz(hdr);
1756 info->layer = 4 - HDR_GET_LAYER(hdr);
1757 info->bitrate_kbps = hdr_bitrate_kbps(hdr);
1758
1759 if (!pcm)
1760 {
1761 return hdr_frame_samples(hdr);
1762 }
1763
1764 bs_init(bs_frame, hdr + HDR_SIZE, frame_size - HDR_SIZE);
1765 if (HDR_IS_CRC(hdr))
1766 {
1767 get_bits(bs_frame, 16);
1768 }
1769
1770 if (info->layer == 3)
1771 {
1772 int main_data_begin = L3_read_side_info(bs_frame, scratch.gr_info, hdr);
1773 if (main_data_begin < 0 || bs_frame->pos > bs_frame->limit)
1774 {
1775 mp3dec_init(dec);
1776 return 0;
1777 }
1778 success = L3_restore_reservoir(dec, bs_frame, &scratch, main_data_begin);
1779 if (success)
1780 {
1781 for (igr = 0; igr < (HDR_TEST_MPEG1(hdr) ? 2 : 1); igr++, pcm += 576*info->channels)
1782 {
1783 memset(scratch.grbuf[0], 0, 576*2*sizeof(float));
1784 L3_decode(dec, &scratch, scratch.gr_info + igr*info->channels, info->channels);
1785 mp3d_synth_granule(dec->qmf_state, scratch.grbuf[0], 18, info->channels, pcm, scratch.syn[0]);
1786 }
1787 }
1788 L3_save_reservoir(dec, &scratch);
1789 } else
1790 {
1791#ifdef MINIMP3_ONLY_MP3
1792 return 0;
1793#else /* MINIMP3_ONLY_MP3 */
1794 L12_scale_info sci[1];
1795 L12_read_scale_info(hdr, bs_frame, sci);
1796
1797 memset(scratch.grbuf[0], 0, 576*2*sizeof(float));
1798 for (i = 0, igr = 0; igr < 3; igr++)
1799 {
1800 if (12 == (i += L12_dequantize_granule(scratch.grbuf[0] + i, bs_frame, sci, info->layer | 1)))
1801 {
1802 i = 0;
1803 L12_apply_scf_384(sci, sci->scf + igr, scratch.grbuf[0]);
1804 mp3d_synth_granule(dec->qmf_state, scratch.grbuf[0], 12, info->channels, pcm, scratch.syn[0]);
1805 memset(scratch.grbuf[0], 0, 576*2*sizeof(float));
1806 pcm += 384*info->channels;
1807 }
1808 if (bs_frame->pos > bs_frame->limit)
1809 {
1810 mp3dec_init(dec);
1811 return 0;
1812 }
1813 }
1814#endif /* MINIMP3_ONLY_MP3 */
1815 }
1816 return success*hdr_frame_samples(dec->header);
1817}
1818
1819#ifdef MINIMP3_FLOAT_OUTPUT
1820void mp3dec_f32_to_s16(const float *in, int16_t *out, int num_samples)
1821{
1822 int i = 0;
1823#if HAVE_SIMD
1824 int aligned_count = num_samples & ~7;
1825 for(; i < aligned_count; i += 8)
1826 {
1827
1828// -- GODOT start --
1829#if defined(_MSC_VER) && (defined(_M_ARM64) || defined(_M_ARM64EC) || defined(_M_ARM))
1830 static f4 g_scale;
1831 g_scale = vsetq_lane_f32(32768.0f, g_scale, 0);
1832 g_scale = vsetq_lane_f32(32768.0f, g_scale, 1);
1833 g_scale = vsetq_lane_f32(32768.0f, g_scale, 2);
1834 g_scale = vsetq_lane_f32(32768.0f, g_scale, 3);
1835#else
1836 static const f4 g_scale = { 32768.0f, 32768.0f, 32768.0f, 32768.0f };
1837#endif
1838// -- GODOT end --
1839
1840 f4 a = VMUL(VLD(&in[i ]), g_scale);
1841 f4 b = VMUL(VLD(&in[i+4]), g_scale);
1842#if HAVE_SSE
1843 static const f4 g_max = { 32767.0f, 32767.0f, 32767.0f, 32767.0f };
1844 static const f4 g_min = { -32768.0f, -32768.0f, -32768.0f, -32768.0f };
1845 __m128i pcm8 = _mm_packs_epi32(_mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(a, g_max), g_min)),
1846 _mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(b, g_max), g_min)));
1847 out[i ] = _mm_extract_epi16(pcm8, 0);
1848 out[i+1] = _mm_extract_epi16(pcm8, 1);
1849 out[i+2] = _mm_extract_epi16(pcm8, 2);
1850 out[i+3] = _mm_extract_epi16(pcm8, 3);
1851 out[i+4] = _mm_extract_epi16(pcm8, 4);
1852 out[i+5] = _mm_extract_epi16(pcm8, 5);
1853 out[i+6] = _mm_extract_epi16(pcm8, 6);
1854 out[i+7] = _mm_extract_epi16(pcm8, 7);
1855#else /* HAVE_SSE */
1856 int16x4_t pcma, pcmb;
1857 a = VADD(a, VSET(0.5f));
1858 b = VADD(b, VSET(0.5f));
1859 pcma = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(a), vreinterpretq_s32_u32(vcltq_f32(a, VSET(0)))));
1860 pcmb = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(b), vreinterpretq_s32_u32(vcltq_f32(b, VSET(0)))));
1861 vst1_lane_s16(out+i , pcma, 0);
1862 vst1_lane_s16(out+i+1, pcma, 1);
1863 vst1_lane_s16(out+i+2, pcma, 2);
1864 vst1_lane_s16(out+i+3, pcma, 3);
1865 vst1_lane_s16(out+i+4, pcmb, 0);
1866 vst1_lane_s16(out+i+5, pcmb, 1);
1867 vst1_lane_s16(out+i+6, pcmb, 2);
1868 vst1_lane_s16(out+i+7, pcmb, 3);
1869#endif /* HAVE_SSE */
1870 }
1871#endif /* HAVE_SIMD */
1872 for(; i < num_samples; i++)
1873 {
1874 float sample = in[i] * 32768.0f;
1875 if (sample >= 32766.5)
1876 out[i] = (int16_t) 32767;
1877 else if (sample <= -32767.5)
1878 out[i] = (int16_t)-32768;
1879 else
1880 {
1881 int16_t s = (int16_t)(sample + .5f);
1882 s -= (s < 0); /* away from zero, to be compliant */
1883 out[i] = s;
1884 }
1885 }
1886}
1887#endif /* MINIMP3_FLOAT_OUTPUT */
1888#endif /* MINIMP3_IMPLEMENTATION && !_MINIMP3_IMPLEMENTATION_GUARD */
1889