1/*
2 Simple DirectMedia Layer
3 Copyright (C) 1997-2025 Sam Lantinga <slouken@libsdl.org>
4
5 This software is provided 'as-is', without any express or implied
6 warranty. In no event will the authors be held liable for any damages
7 arising from the use of this software.
8
9 Permission is granted to anyone to use this software for any purpose,
10 including commercial applications, and to alter it and redistribute it
11 freely, subject to the following restrictions:
12
13 1. The origin of this software must not be misrepresented; you must not
14 claim that you wrote the original software. If you use this software
15 in a product, an acknowledgment in the product documentation would be
16 appreciated but is not required.
17 2. Altered source versions must be plainly marked as such, and must not be
18 misrepresented as being the original software.
19 3. This notice may not be removed or altered from any source distribution.
20*/
21#include "SDL_internal.h"
22
23#include "SDL_sysaudio.h"
24
25#include "SDL_audioresample.h"
26
27// SDL's resampler uses a "bandlimited interpolation" algorithm:
28// https://ccrma.stanford.edu/~jos/resample/
29
30// TODO: Support changing this at runtime?
31#if defined(SDL_SSE_INTRINSICS) || defined(SDL_NEON_INTRINSICS)
32// In <current year>, SSE is basically mandatory anyway
33// We want RESAMPLER_SAMPLES_PER_FRAME to be a multiple of 4, to make SIMD easier
34#define RESAMPLER_ZERO_CROSSINGS 6
35#else
36#define RESAMPLER_ZERO_CROSSINGS 5
37#endif
38
39#define RESAMPLER_SAMPLES_PER_FRAME (RESAMPLER_ZERO_CROSSINGS * 2)
40
41// For a given srcpos, `srcpos + frame` are sampled, where `-RESAMPLER_ZERO_CROSSINGS < frame <= RESAMPLER_ZERO_CROSSINGS`.
42// Note, when upsampling, it is also possible to start sampling from `srcpos = -1`.
43#define RESAMPLER_MAX_PADDING_FRAMES (RESAMPLER_ZERO_CROSSINGS + 1)
44
45// More bits gives more precision, at the cost of a larger table.
46#define RESAMPLER_BITS_PER_ZERO_CROSSING 3
47#define RESAMPLER_SAMPLES_PER_ZERO_CROSSING (1 << RESAMPLER_BITS_PER_ZERO_CROSSING)
48#define RESAMPLER_FILTER_INTERP_BITS (32 - RESAMPLER_BITS_PER_ZERO_CROSSING)
49#define RESAMPLER_FILTER_INTERP_RANGE (1 << RESAMPLER_FILTER_INTERP_BITS)
50
51// ResampleFrame is just a vector/matrix/matrix multiplication.
52// It performs cubic interpolation of the filter, then multiplies that with the input.
53// dst = [1, frac, frac^2, frac^3] * filter * src
54
55// Cubic Polynomial
56typedef union Cubic
57{
58 float v[4];
59
60#ifdef SDL_SSE_INTRINSICS
61 // Aligned loads can be used directly as memory operands for mul/add
62 __m128 v128;
63#endif
64
65#ifdef SDL_NEON_INTRINSICS
66 float32x4_t v128;
67#endif
68
69} Cubic;
70
71static void ResampleFrame_Generic(const float *src, float *dst, const Cubic *filter, float frac, int chans)
72{
73 const float frac2 = frac * frac;
74 const float frac3 = frac * frac2;
75
76 int i, chan;
77 float scales[RESAMPLER_SAMPLES_PER_FRAME];
78
79 for (i = 0; i < RESAMPLER_SAMPLES_PER_FRAME; ++i, ++filter) {
80 scales[i] = filter->v[0] + (filter->v[1] * frac) + (filter->v[2] * frac2) + (filter->v[3] * frac3);
81 }
82
83 for (chan = 0; chan < chans; ++chan) {
84 float out = 0.0f;
85
86 for (i = 0; i < RESAMPLER_SAMPLES_PER_FRAME; ++i) {
87 out += src[i * chans + chan] * scales[i];
88 }
89
90 dst[chan] = out;
91 }
92}
93
94static void ResampleFrame_Mono(const float *src, float *dst, const Cubic *filter, float frac, int chans)
95{
96 const float frac2 = frac * frac;
97 const float frac3 = frac * frac2;
98
99 int i;
100 float out = 0.0f;
101
102 for (i = 0; i < RESAMPLER_SAMPLES_PER_FRAME; ++i, ++filter) {
103 // Interpolate between the nearest two filters
104 const float scale = filter->v[0] + (filter->v[1] * frac) + (filter->v[2] * frac2) + (filter->v[3] * frac3);
105
106 out += src[i] * scale;
107 }
108
109 dst[0] = out;
110}
111
112static void ResampleFrame_Stereo(const float *src, float *dst, const Cubic *filter, float frac, int chans)
113{
114 const float frac2 = frac * frac;
115 const float frac3 = frac * frac2;
116
117 int i;
118 float out0 = 0.0f;
119 float out1 = 0.0f;
120
121 for (i = 0; i < RESAMPLER_SAMPLES_PER_FRAME; ++i, ++filter) {
122 // Interpolate between the nearest two filters
123 const float scale = filter->v[0] + (filter->v[1] * frac) + (filter->v[2] * frac2) + (filter->v[3] * frac3);
124
125 out0 += src[i * 2 + 0] * scale;
126 out1 += src[i * 2 + 1] * scale;
127 }
128
129 dst[0] = out0;
130 dst[1] = out1;
131}
132
133#ifdef SDL_SSE_INTRINSICS
134#define sdl_madd_ps(a, b, c) _mm_add_ps(a, _mm_mul_ps(b, c)) // Not-so-fused multiply-add
135
136static void SDL_TARGETING("sse") ResampleFrame_Generic_SSE(const float *src, float *dst, const Cubic *filter, float frac, int chans)
137{
138#if RESAMPLER_SAMPLES_PER_FRAME != 12
139#error Invalid samples per frame
140#endif
141
142 __m128 f0, f1, f2;
143
144 {
145 const __m128 frac1 = _mm_set1_ps(frac);
146 const __m128 frac2 = _mm_mul_ps(frac1, frac1);
147 const __m128 frac3 = _mm_mul_ps(frac1, frac2);
148
149// Transposed in SetupAudioResampler
150// Explicitly use _mm_load_ps to workaround ICE in GCC 4.9.4 accessing Cubic.v128
151#define X(out) \
152 out = _mm_load_ps(filter[0].v); \
153 out = sdl_madd_ps(out, frac1, _mm_load_ps(filter[1].v)); \
154 out = sdl_madd_ps(out, frac2, _mm_load_ps(filter[2].v)); \
155 out = sdl_madd_ps(out, frac3, _mm_load_ps(filter[3].v)); \
156 filter += 4
157
158 X(f0);
159 X(f1);
160 X(f2);
161
162#undef X
163 }
164
165 if (chans == 2) {
166 // Duplicate each of the filter elements and multiply by the input
167 // Use two accumulators to improve throughput
168 __m128 out0 = _mm_mul_ps(_mm_loadu_ps(src + 0), _mm_unpacklo_ps(f0, f0));
169 __m128 out1 = _mm_mul_ps(_mm_loadu_ps(src + 4), _mm_unpackhi_ps(f0, f0));
170 out0 = sdl_madd_ps(out0, _mm_loadu_ps(src + 8), _mm_unpacklo_ps(f1, f1));
171 out1 = sdl_madd_ps(out1, _mm_loadu_ps(src + 12), _mm_unpackhi_ps(f1, f1));
172 out0 = sdl_madd_ps(out0, _mm_loadu_ps(src + 16), _mm_unpacklo_ps(f2, f2));
173 out1 = sdl_madd_ps(out1, _mm_loadu_ps(src + 20), _mm_unpackhi_ps(f2, f2));
174
175 // Add the accumulators together
176 __m128 out = _mm_add_ps(out0, out1);
177
178 // Add the lower and upper pairs together
179 out = _mm_add_ps(out, _mm_movehl_ps(out, out));
180
181 // Store the result
182 _mm_storel_pi((__m64 *)dst, out);
183 return;
184 }
185
186 if (chans == 1) {
187 // Multiply the filter by the input
188 __m128 out = _mm_mul_ps(f0, _mm_loadu_ps(src + 0));
189 out = sdl_madd_ps(out, f1, _mm_loadu_ps(src + 4));
190 out = sdl_madd_ps(out, f2, _mm_loadu_ps(src + 8));
191
192 // Horizontal sum
193 __m128 shuf = _mm_shuffle_ps(out, out, _MM_SHUFFLE(2, 3, 0, 1));
194 out = _mm_add_ps(out, shuf);
195 out = _mm_add_ss(out, _mm_movehl_ps(shuf, out));
196
197 _mm_store_ss(dst, out);
198 return;
199 }
200
201 int chan = 0;
202
203 // Process 4 channels at once
204 for (; chan + 4 <= chans; chan += 4) {
205 const float *in = &src[chan];
206 __m128 out0 = _mm_setzero_ps();
207 __m128 out1 = _mm_setzero_ps();
208
209#define X(a, b, out) \
210 out = sdl_madd_ps(out, _mm_loadu_ps(in), _mm_shuffle_ps(a, a, _MM_SHUFFLE(b, b, b, b))); \
211 in += chans
212
213#define Y(a) \
214 X(a, 0, out0); \
215 X(a, 1, out1); \
216 X(a, 2, out0); \
217 X(a, 3, out1)
218
219 Y(f0);
220 Y(f1);
221 Y(f2);
222
223#undef X
224#undef Y
225
226 // Add the accumulators together
227 __m128 out = _mm_add_ps(out0, out1);
228
229 _mm_storeu_ps(&dst[chan], out);
230 }
231
232 // Process the remaining channels one at a time.
233 // Channel counts 1,2,4,8 are already handled above, leaving 3,5,6,7 to deal with (looping 3,1,2,3 times).
234 // Without vgatherdps (AVX2), this gets quite messy.
235 for (; chan < chans; ++chan) {
236 const float *in = &src[chan];
237 __m128 v0, v1, v2;
238
239#define X(x) \
240 x = _mm_unpacklo_ps(_mm_load_ss(in), _mm_load_ss(in + chans)); \
241 in += chans + chans; \
242 x = _mm_movelh_ps(x, _mm_unpacklo_ps(_mm_load_ss(in), _mm_load_ss(in + chans))); \
243 in += chans + chans
244
245 X(v0);
246 X(v1);
247 X(v2);
248
249#undef X
250
251 __m128 out = _mm_mul_ps(f0, v0);
252 out = sdl_madd_ps(out, f1, v1);
253 out = sdl_madd_ps(out, f2, v2);
254
255 // Horizontal sum
256 __m128 shuf = _mm_shuffle_ps(out, out, _MM_SHUFFLE(2, 3, 0, 1));
257 out = _mm_add_ps(out, shuf);
258 out = _mm_add_ss(out, _mm_movehl_ps(shuf, out));
259
260 _mm_store_ss(&dst[chan], out);
261 }
262}
263
264#undef sdl_madd_ps
265#endif
266
267#ifdef SDL_NEON_INTRINSICS
268static void ResampleFrame_Generic_NEON(const float *src, float *dst, const Cubic *filter, float frac, int chans)
269{
270#if RESAMPLER_SAMPLES_PER_FRAME != 12
271#error Invalid samples per frame
272#endif
273
274 float32x4_t f0, f1, f2;
275
276 {
277 const float32x4_t frac1 = vdupq_n_f32(frac);
278 const float32x4_t frac2 = vmulq_f32(frac1, frac1);
279 const float32x4_t frac3 = vmulq_f32(frac1, frac2);
280
281// Transposed in SetupAudioResampler
282#define X(out) \
283 out = vmlaq_f32(vmlaq_f32(vmlaq_f32(filter[0].v128, filter[1].v128, frac1), filter[2].v128, frac2), filter[3].v128, frac3); \
284 filter += 4
285
286 X(f0);
287 X(f1);
288 X(f2);
289
290#undef X
291 }
292
293 if (chans == 2) {
294 float32x4x2_t g0 = vzipq_f32(f0, f0);
295 float32x4x2_t g1 = vzipq_f32(f1, f1);
296 float32x4x2_t g2 = vzipq_f32(f2, f2);
297
298 // Duplicate each of the filter elements and multiply by the input
299 // Use two accumulators to improve throughput
300 float32x4_t out0 = vmulq_f32(vld1q_f32(src + 0), g0.val[0]);
301 float32x4_t out1 = vmulq_f32(vld1q_f32(src + 4), g0.val[1]);
302 out0 = vmlaq_f32(out0, vld1q_f32(src + 8), g1.val[0]);
303 out1 = vmlaq_f32(out1, vld1q_f32(src + 12), g1.val[1]);
304 out0 = vmlaq_f32(out0, vld1q_f32(src + 16), g2.val[0]);
305 out1 = vmlaq_f32(out1, vld1q_f32(src + 20), g2.val[1]);
306
307 // Add the accumulators together
308 out0 = vaddq_f32(out0, out1);
309
310 // Add the lower and upper pairs together
311 float32x2_t out = vadd_f32(vget_low_f32(out0), vget_high_f32(out0));
312
313 // Store the result
314 vst1_f32(dst, out);
315 return;
316 }
317
318 if (chans == 1) {
319 // Multiply the filter by the input
320 float32x4_t out = vmulq_f32(f0, vld1q_f32(src + 0));
321 out = vmlaq_f32(out, f1, vld1q_f32(src + 4));
322 out = vmlaq_f32(out, f2, vld1q_f32(src + 8));
323
324 // Horizontal sum
325 float32x2_t sum = vadd_f32(vget_low_f32(out), vget_high_f32(out));
326 sum = vpadd_f32(sum, sum);
327
328 vst1_lane_f32(dst, sum, 0);
329 return;
330 }
331
332 int chan = 0;
333
334 // Process 4 channels at once
335 for (; chan + 4 <= chans; chan += 4) {
336 const float *in = &src[chan];
337 float32x4_t out0 = vdupq_n_f32(0);
338 float32x4_t out1 = vdupq_n_f32(0);
339
340#define X(a, b, out) \
341 out = vmlaq_f32(out, vld1q_f32(in), vdupq_lane_f32(a, b)); \
342 in += chans
343
344#define Y(a) \
345 X(vget_low_f32(a), 0, out0); \
346 X(vget_low_f32(a), 1, out1); \
347 X(vget_high_f32(a), 0, out0); \
348 X(vget_high_f32(a), 1, out1)
349
350 Y(f0);
351 Y(f1);
352 Y(f2);
353
354#undef X
355#undef Y
356
357 // Add the accumulators together
358 float32x4_t out = vaddq_f32(out0, out1);
359
360 vst1q_f32(&dst[chan], out);
361 }
362
363 // Process the remaining channels one at a time.
364 // Channel counts 1,2,4,8 are already handled above, leaving 3,5,6,7 to deal with (looping 3,1,2,3 times).
365 for (; chan < chans; ++chan) {
366 const float *in = &src[chan];
367 float32x4_t v0, v1, v2;
368
369#define X(x) \
370 x = vld1q_dup_f32(in); \
371 in += chans; \
372 x = vld1q_lane_f32(in, x, 1); \
373 in += chans; \
374 x = vld1q_lane_f32(in, x, 2); \
375 in += chans; \
376 x = vld1q_lane_f32(in, x, 3); \
377 in += chans
378
379 X(v0);
380 X(v1);
381 X(v2);
382
383#undef X
384
385 float32x4_t out = vmulq_f32(f0, v0);
386 out = vmlaq_f32(out, f1, v1);
387 out = vmlaq_f32(out, f2, v2);
388
389 // Horizontal sum
390 float32x2_t sum = vadd_f32(vget_low_f32(out), vget_high_f32(out));
391 sum = vpadd_f32(sum, sum);
392
393 vst1_lane_f32(&dst[chan], sum, 0);
394 }
395}
396#endif
397
398// Calculate the cubic equation which passes through all four points.
399// https://en.wikipedia.org/wiki/Ordinary_least_squares
400// https://en.wikipedia.org/wiki/Polynomial_regression
401static void CubicLeastSquares(Cubic *coeffs, float y0, float y1, float y2, float y3)
402{
403 // Least squares matrix for xs = [0, 1/3, 2/3, 1]
404 // [ 1.0 0.0 0.0 0.0 ]
405 // [ -5.5 9.0 -4.5 1.0 ]
406 // [ 9.0 -22.5 18.0 -4.5 ]
407 // [ -4.5 13.5 -13.5 4.5 ]
408
409 coeffs->v[0] = y0;
410 coeffs->v[1] = -5.5f * y0 + 9.0f * y1 - 4.5f * y2 + y3;
411 coeffs->v[2] = 9.0f * y0 - 22.5f * y1 + 18.0f * y2 - 4.5f * y3;
412 coeffs->v[3] = -4.5f * y0 + 13.5f * y1 - 13.5f * y2 + 4.5f * y3;
413}
414
415// Zeroth-order modified Bessel function of the first kind
416// https://mathworld.wolfram.com/ModifiedBesselFunctionoftheFirstKind.html
417static float BesselI0(float x)
418{
419 float sum = 0.0f;
420 float i = 1.0f;
421 float t = 1.0f;
422 x *= x * 0.25f;
423
424 while (t >= sum * SDL_FLT_EPSILON) {
425 sum += t;
426 t *= x / (i * i);
427 ++i;
428 }
429
430 return sum;
431}
432
433// Pre-calculate 180 degrees of sin(pi * x) / pi
434// The speedup from this isn't huge, but it also avoids precision issues.
435// If sinf isn't available, SDL_sinf just calls SDL_sin.
436// Know what SDL_sin(SDL_PI_F) equals? Not quite zero.
437static void SincTable(float *table, int len)
438{
439 int i;
440
441 for (i = 0; i < len; ++i) {
442 table[i] = SDL_sinf(i * (SDL_PI_F / len)) / SDL_PI_F;
443 }
444}
445
446// Calculate Sinc(x/y), using a lookup table
447static float Sinc(const float *table, int x, int y)
448{
449 float s = table[x % y];
450 s = ((x / y) & 1) ? -s : s;
451 return (s * y) / x;
452}
453
454static Cubic ResamplerFilter[RESAMPLER_SAMPLES_PER_ZERO_CROSSING][RESAMPLER_SAMPLES_PER_FRAME];
455
456static void GenerateResamplerFilter(void)
457{
458 enum
459 {
460 // Generate samples at 3x the target resolution, so that we have samples at [0, 1/3, 2/3, 1] of each position
461 TABLE_SAMPLES_PER_ZERO_CROSSING = RESAMPLER_SAMPLES_PER_ZERO_CROSSING * 3,
462 TABLE_SIZE = RESAMPLER_ZERO_CROSSINGS * TABLE_SAMPLES_PER_ZERO_CROSSING,
463 };
464
465 // if dB > 50, beta=(0.1102 * (dB - 8.7)), according to Matlab.
466 const float dB = 80.0f;
467 const float beta = 0.1102f * (dB - 8.7f);
468 const float bessel_beta = BesselI0(beta);
469 const float lensqr = TABLE_SIZE * TABLE_SIZE;
470
471 int i, j;
472
473 float sinc[TABLE_SAMPLES_PER_ZERO_CROSSING];
474 SincTable(sinc, TABLE_SAMPLES_PER_ZERO_CROSSING);
475
476 // Generate one wing of the filter
477 // https://en.wikipedia.org/wiki/Kaiser_window
478 // https://en.wikipedia.org/wiki/Whittaker%E2%80%93Shannon_interpolation_formula
479 float filter[TABLE_SIZE + 1];
480 filter[0] = 1.0f;
481
482 for (i = 1; i <= TABLE_SIZE; ++i) {
483 float b = BesselI0(beta * SDL_sqrtf((lensqr - (i * i)) / lensqr)) / bessel_beta;
484 float s = Sinc(sinc, i, TABLE_SAMPLES_PER_ZERO_CROSSING);
485 filter[i] = b * s;
486 }
487
488 // Generate the coefficients for each point
489 // When interpolating, the fraction represents how far we are between input samples,
490 // so we need to align the filter by "moving" it to the right.
491 //
492 // For the left wing, this means interpolating "forwards" (away from the center)
493 // For the right wing, this means interpolating "backwards" (towards the center)
494 //
495 // The center of the filter is at the end of the left wing (RESAMPLER_ZERO_CROSSINGS - 1)
496 // The left wing is the filter, but reversed
497 // The right wing is the filter, but offset by 1
498 //
499 // Since the right wing is offset by 1, this just means we interpolate backwards
500 // between the same points, instead of forwards
501 // interp(p[n], p[n+1], t) = interp(p[n+1], p[n+1-1], 1 - t) = interp(p[n+1], p[n], 1 - t)
502 for (i = 0; i < RESAMPLER_SAMPLES_PER_ZERO_CROSSING; ++i) {
503 for (j = 0; j < RESAMPLER_ZERO_CROSSINGS; ++j) {
504 const float *ys = &filter[((j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING) + i) * 3];
505
506 Cubic *fwd = &ResamplerFilter[i][RESAMPLER_ZERO_CROSSINGS - j - 1];
507 Cubic *rev = &ResamplerFilter[RESAMPLER_SAMPLES_PER_ZERO_CROSSING - i - 1][RESAMPLER_ZERO_CROSSINGS + j];
508
509 // Calculate the cubic equation of the 4 points
510 CubicLeastSquares(fwd, ys[0], ys[1], ys[2], ys[3]);
511 CubicLeastSquares(rev, ys[3], ys[2], ys[1], ys[0]);
512 }
513 }
514}
515
516typedef void (*ResampleFrameFunc)(const float *src, float *dst, const Cubic *filter, float frac, int chans);
517static ResampleFrameFunc ResampleFrame[8];
518
519// Transpose 4x4 floats
520static void Transpose4x4(Cubic *data)
521{
522 int i, j;
523
524 Cubic temp[4] = { data[0], data[1], data[2], data[3] };
525
526 for (i = 0; i < 4; ++i) {
527 for (j = 0; j < 4; ++j) {
528 data[i].v[j] = temp[j].v[i];
529 }
530 }
531}
532
533static void SetupAudioResampler(void)
534{
535 int i, j;
536 bool transpose = false;
537
538 GenerateResamplerFilter();
539
540#ifdef SDL_SSE_INTRINSICS
541 if (SDL_HasSSE()) {
542 for (i = 0; i < 8; ++i) {
543 ResampleFrame[i] = ResampleFrame_Generic_SSE;
544 }
545 transpose = true;
546 } else
547#endif
548#ifdef SDL_NEON_INTRINSICS
549 if (SDL_HasNEON()) {
550 for (i = 0; i < 8; ++i) {
551 ResampleFrame[i] = ResampleFrame_Generic_NEON;
552 }
553 transpose = true;
554 } else
555#endif
556 {
557 for (i = 0; i < 8; ++i) {
558 ResampleFrame[i] = ResampleFrame_Generic;
559 }
560
561 ResampleFrame[0] = ResampleFrame_Mono;
562 ResampleFrame[1] = ResampleFrame_Stereo;
563 }
564
565 if (transpose) {
566 // Transpose each set of 4 coefficients, to reduce work when resampling
567 for (i = 0; i < RESAMPLER_SAMPLES_PER_ZERO_CROSSING; ++i) {
568 for (j = 0; j + 4 <= RESAMPLER_SAMPLES_PER_FRAME; j += 4) {
569 Transpose4x4(&ResamplerFilter[i][j]);
570 }
571 }
572 }
573}
574
575void SDL_SetupAudioResampler(void)
576{
577 static SDL_InitState init;
578
579 if (SDL_ShouldInit(&init)) {
580 SetupAudioResampler();
581 SDL_SetInitialized(&init, true);
582 }
583}
584
585Sint64 SDL_GetResampleRate(int src_rate, int dst_rate)
586{
587 SDL_assert(src_rate > 0);
588 SDL_assert(dst_rate > 0);
589
590 Sint64 numerator = (Sint64)src_rate << 32;
591 Sint64 denominator = (Sint64)dst_rate;
592
593 // Generally it's expected that `dst_frames = (src_frames * dst_rate) / src_rate`
594 // To match this as closely as possible without infinite precision, always round up the resample rate.
595 // For example, without rounding up, a sample ratio of 2:3 would have `sample_rate = 0xAAAAAAAA`
596 // After 3 frames, the position would be 0x1.FFFFFFFE, meaning we haven't fully consumed the second input frame.
597 // By rounding up to 0xAAAAAAAB, we would instead reach 0x2.00000001, fulling consuming the second frame.
598 // Technically you could say this is kicking the can 0x100000000 steps down the road, but I'm fine with that :)
599 // sample_rate = div_ceil(numerator, denominator)
600 Sint64 sample_rate = ((numerator - 1) / denominator) + 1;
601
602 SDL_assert(sample_rate > 0);
603
604 return sample_rate;
605}
606
607int SDL_GetResamplerHistoryFrames(void)
608{
609 // Even if we aren't currently resampling, make sure to keep enough history in case we need to later.
610
611 return RESAMPLER_MAX_PADDING_FRAMES;
612}
613
614int SDL_GetResamplerPaddingFrames(Sint64 resample_rate)
615{
616 // This must always be <= SDL_GetResamplerHistoryFrames()
617
618 return resample_rate ? RESAMPLER_MAX_PADDING_FRAMES : 0;
619}
620
621// These are not general purpose. They do not check for all possible underflow/overflow
622SDL_FORCE_INLINE bool ResamplerAdd(Sint64 a, Sint64 b, Sint64 *ret)
623{
624 if ((b > 0) && (a > SDL_MAX_SINT64 - b)) {
625 return false;
626 }
627
628 *ret = a + b;
629 return true;
630}
631
632SDL_FORCE_INLINE bool ResamplerMul(Sint64 a, Sint64 b, Sint64 *ret)
633{
634 if ((b > 0) && (a > SDL_MAX_SINT64 / b)) {
635 return false;
636 }
637
638 *ret = a * b;
639 return true;
640}
641
642Sint64 SDL_GetResamplerInputFrames(Sint64 output_frames, Sint64 resample_rate, Sint64 resample_offset)
643{
644 // Calculate the index of the last input frame, then add 1.
645 // ((((output_frames - 1) * resample_rate) + resample_offset) >> 32) + 1
646
647 Sint64 output_offset;
648 if (!ResamplerMul(output_frames, resample_rate, &output_offset) ||
649 !ResamplerAdd(output_offset, -resample_rate + resample_offset + 0x100000000, &output_offset)) {
650 output_offset = SDL_MAX_SINT64;
651 }
652
653 Sint64 input_frames = (Sint64)(Sint32)(output_offset >> 32);
654 input_frames = SDL_max(input_frames, 0);
655
656 return input_frames;
657}
658
659Sint64 SDL_GetResamplerOutputFrames(Sint64 input_frames, Sint64 resample_rate, Sint64 *inout_resample_offset)
660{
661 Sint64 resample_offset = *inout_resample_offset;
662
663 // input_offset = (input_frames << 32) - resample_offset;
664 Sint64 input_offset;
665 if (!ResamplerMul(input_frames, 0x100000000, &input_offset) ||
666 !ResamplerAdd(input_offset, -resample_offset, &input_offset)) {
667 input_offset = SDL_MAX_SINT64;
668 }
669
670 // output_frames = div_ceil(input_offset, resample_rate)
671 Sint64 output_frames = (input_offset > 0) ? ((input_offset - 1) / resample_rate) + 1 : 0;
672
673 *inout_resample_offset = (output_frames * resample_rate) - input_offset;
674
675 return output_frames;
676}
677
678void SDL_ResampleAudio(int chans, const float *src, int inframes, float *dst, int outframes,
679 Sint64 resample_rate, Sint64 *inout_resample_offset)
680{
681 int i;
682 Sint64 srcpos = *inout_resample_offset;
683 ResampleFrameFunc resample_frame = ResampleFrame[chans - 1];
684
685 SDL_assert(resample_rate > 0);
686
687 src -= (RESAMPLER_ZERO_CROSSINGS - 1) * chans;
688
689 for (i = 0; i < outframes; ++i) {
690 int srcindex = (int)(Sint32)(srcpos >> 32);
691 Uint32 srcfraction = (Uint32)(srcpos & 0xFFFFFFFF);
692 srcpos += resample_rate;
693
694 SDL_assert(srcindex >= -1 && srcindex < inframes);
695
696 const Cubic *filter = ResamplerFilter[srcfraction >> RESAMPLER_FILTER_INTERP_BITS];
697 const float frac = (float)(srcfraction & (RESAMPLER_FILTER_INTERP_RANGE - 1)) * (1.0f / RESAMPLER_FILTER_INTERP_RANGE);
698
699 const float *frame = &src[srcindex * chans];
700 resample_frame(frame, dst, filter, frac, chans);
701
702 dst += chans;
703 }
704
705 *inout_resample_offset = srcpos - ((Sint64)inframes << 32);
706}
707