SDL_audioresample.c source code [SDL/src/audio/SDL_audioresample.c]

1	/*
2	Simple DirectMedia Layer
3	Copyright (C) 1997-2025 Sam Lantinga <slouken@libsdl.org>
4
5	This software is provided 'as-is', without any express or implied
6	warranty. In no event will the authors be held liable for any damages
7	arising from the use of this software.
8
9	Permission is granted to anyone to use this software for any purpose,
10	including commercial applications, and to alter it and redistribute it
11	freely, subject to the following restrictions:
12
13	1. The origin of this software must not be misrepresented; you must not
14	claim that you wrote the original software. If you use this software
15	in a product, an acknowledgment in the product documentation would be
16	appreciated but is not required.
17	2. Altered source versions must be plainly marked as such, and must not be
18	misrepresented as being the original software.
19	3. This notice may not be removed or altered from any source distribution.
20	*/
21	#include "SDL_internal.h"
22
23	#include "SDL_sysaudio.h"
24
25	#include "SDL_audioresample.h"
26
27	// SDL's resampler uses a "bandlimited interpolation" algorithm:
28	// https://ccrma.stanford.edu/~jos/resample/
29
30	// TODO: Support changing this at runtime?
31	#if defined(SDL_SSE_INTRINSICS) \|\| defined(SDL_NEON_INTRINSICS)
32	// In <current year>, SSE is basically mandatory anyway
33	// We want RESAMPLER_SAMPLES_PER_FRAME to be a multiple of 4, to make SIMD easier
34	#define RESAMPLER_ZERO_CROSSINGS 6
35	#else
36	#define RESAMPLER_ZERO_CROSSINGS 5
37	#endif
38
39	#define RESAMPLER_SAMPLES_PER_FRAME (RESAMPLER_ZERO_CROSSINGS * 2)
40
41	// For a given srcpos, `srcpos + frame` are sampled, where `-RESAMPLER_ZERO_CROSSINGS < frame <= RESAMPLER_ZERO_CROSSINGS`.
42	// Note, when upsampling, it is also possible to start sampling from `srcpos = -1`.
43	#define RESAMPLER_MAX_PADDING_FRAMES (RESAMPLER_ZERO_CROSSINGS + 1)
44
45	// More bits gives more precision, at the cost of a larger table.
46	#define RESAMPLER_BITS_PER_ZERO_CROSSING 3
47	#define RESAMPLER_SAMPLES_PER_ZERO_CROSSING (1 << RESAMPLER_BITS_PER_ZERO_CROSSING)
48	#define RESAMPLER_FILTER_INTERP_BITS (32 - RESAMPLER_BITS_PER_ZERO_CROSSING)
49	#define RESAMPLER_FILTER_INTERP_RANGE (1 << RESAMPLER_FILTER_INTERP_BITS)
50
51	// ResampleFrame is just a vector/matrix/matrix multiplication.
52	// It performs cubic interpolation of the filter, then multiplies that with the input.
53	// dst = [1, frac, frac^2, frac^3] filter * src*
54
55	// Cubic Polynomial
56	typedef union Cubic
57	{
58	float v[`4`];
59
60	#ifdef SDL_SSE_INTRINSICS
61	// Aligned loads can be used directly as memory operands for mul/add
62	__m128 v128;
63	#endif
64
65	#ifdef SDL_NEON_INTRINSICS
66	float32x4_t v128;
67	#endif
68
69	} Cubic;
70
71	static void ResampleFrame_Generic(const float src, float* dst, const* Cubic filter, float* frac, int chans)
72	{
73	const float frac2 = frac * frac;
74	const float frac3 = frac * frac2;
75
76	int i, chan;
77	float scales[RESAMPLER_SAMPLES_PER_FRAME];
78
79	for (i = `0`; i < RESAMPLER_SAMPLES_PER_FRAME; ++i, ++filter) {
80	scales[i] = filter->v[`0`] + (filter->v[`1`] * frac) + (filter->v[`2`] * frac2) + (filter->v[`3`] * frac3);
81	}
82
83	for (chan = `0`; chan < chans; ++chan) {
84	float out = `0.0f`;
85
86	for (i = `0`; i < RESAMPLER_SAMPLES_PER_FRAME; ++i) {
87	out += src[i * chans + chan] * scales[i];
88	}
89
90	dst[chan] = out;
91	}
92	}
93
94	static void ResampleFrame_Mono(const float src, float* dst, const* Cubic filter, float* frac, int chans)
95	{
96	const float frac2 = frac * frac;
97	const float frac3 = frac * frac2;
98
99	int i;
100	float out = `0.0f`;
101
102	for (i = `0`; i < RESAMPLER_SAMPLES_PER_FRAME; ++i, ++filter) {
103	// Interpolate between the nearest two filters
104	const float scale = filter->v[`0`] + (filter->v[`1`] * frac) + (filter->v[`2`] * frac2) + (filter->v[`3`] * frac3);
105
106	out += src[i] * scale;
107	}
108
109	dst[`0`] = out;
110	}
111
112	static void ResampleFrame_Stereo(const float src, float* dst, const* Cubic filter, float* frac, int chans)
113	{
114	const float frac2 = frac * frac;
115	const float frac3 = frac * frac2;
116
117	int i;
118	float out0 = `0.0f`;
119	float out1 = `0.0f`;
120
121	for (i = `0`; i < RESAMPLER_SAMPLES_PER_FRAME; ++i, ++filter) {
122	// Interpolate between the nearest two filters
123	const float scale = filter->v[`0`] + (filter->v[`1`] * frac) + (filter->v[`2`] * frac2) + (filter->v[`3`] * frac3);
124
125	out0 += src[i * `2` + `0`] * scale;
126	out1 += src[i * `2` + `1`] * scale;
127	}
128
129	dst[`0`] = out0;
130	dst[`1`] = out1;
131	}
132
133	#ifdef SDL_SSE_INTRINSICS
134	#define sdl_madd_ps(a, b, c) _mm_add_ps(a, _mm_mul_ps(b, c)) // Not-so-fused multiply-add
135
136	static void SDL_TARGETING("sse") ResampleFrame_Generic_SSE(const float src, float* dst, const* Cubic filter, float* frac, int chans)
137	{
138	#if RESAMPLER_SAMPLES_PER_FRAME != 12
139	#error Invalid samples per frame
140	#endif
141
142	__m128 f0, f1, f2;
143
144	{
145	const __m128 frac1 = _mm_set1_ps(frac);
146	const __m128 frac2 = _mm_mul_ps(frac1, frac1);
147	const __m128 frac3 = _mm_mul_ps(frac1, frac2);
148
149	// Transposed in SetupAudioResampler
150	// Explicitly use _mm_load_ps to workaround ICE in GCC 4.9.4 accessing Cubic.v128
151	#define X(out) \
152	out = _mm_load_ps(filter[0].v); \
153	out = sdl_madd_ps(out, frac1, _mm_load_ps(filter[1].v)); \
154	out = sdl_madd_ps(out, frac2, _mm_load_ps(filter[2].v)); \
155	out = sdl_madd_ps(out, frac3, _mm_load_ps(filter[3].v)); \
156	filter += 4
157
158	X(f0);
159	X(f1);
160	X(f2);
161
162	#undef X
163	}
164
165	if (chans == `2`) {
166	// Duplicate each of the filter elements and multiply by the input
167	// Use two accumulators to improve throughput
168	__m128 out0 = _mm_mul_ps(_mm_loadu_ps(src + `0`), _mm_unpacklo_ps(f0, f0));
169	__m128 out1 = _mm_mul_ps(_mm_loadu_ps(src + `4`), _mm_unpackhi_ps(f0, f0));
170	out0 = sdl_madd_ps(out0, _mm_loadu_ps(src + `8`), _mm_unpacklo_ps(f1, f1));
171	out1 = sdl_madd_ps(out1, _mm_loadu_ps(src + `12`), _mm_unpackhi_ps(f1, f1));
172	out0 = sdl_madd_ps(out0, _mm_loadu_ps(src + `16`), _mm_unpacklo_ps(f2, f2));
173	out1 = sdl_madd_ps(out1, _mm_loadu_ps(src + `20`), _mm_unpackhi_ps(f2, f2));
174
175	// Add the accumulators together
176	__m128 out = _mm_add_ps(out0, out1);
177
178	// Add the lower and upper pairs together
179	out = _mm_add_ps(out, _mm_movehl_ps(out, out));
180
181	// Store the result
182	_mm_storel_pi((__m64 *)dst, out);
183	return;
184	}
185
186	if (chans == `1`) {
187	// Multiply the filter by the input
188	__m128 out = _mm_mul_ps(f0, _mm_loadu_ps(src + `0`));
189	out = sdl_madd_ps(out, f1, _mm_loadu_ps(src + `4`));
190	out = sdl_madd_ps(out, f2, _mm_loadu_ps(src + `8`));
191
192	// Horizontal sum
193	__m128 shuf = _mm_shuffle_ps(out, out, _MM_SHUFFLE(`2`, `3`, `0`, `1`));
194	out = _mm_add_ps(out, shuf);
195	out = _mm_add_ss(out, _mm_movehl_ps(shuf, out));
196
197	_mm_store_ss(dst, out);
198	return;
199	}
200
201	int chan = `0`;
202
203	// Process 4 channels at once
204	for (; chan + `4` <= chans; chan += `4`) {
205	const float *in = &src[chan];
206	__m128 out0 = _mm_setzero_ps();
207	__m128 out1 = _mm_setzero_ps();
208
209	#define X(a, b, out) \
210	out = sdl_madd_ps(out, _mm_loadu_ps(in), _mm_shuffle_ps(a, a, _MM_SHUFFLE(b, b, b, b))); \
211	in += chans
212
213	#define Y(a) \
214	X(a, 0, out0); \
215	X(a, 1, out1); \
216	X(a, 2, out0); \
217	X(a, 3, out1)
218
219	Y(f0);
220	Y(f1);
221	Y(f2);
222
223	#undef X
224	#undef Y
225
226	// Add the accumulators together
227	__m128 out = _mm_add_ps(out0, out1);
228
229	_mm_storeu_ps(&dst[chan], out);
230	}
231
232	// Process the remaining channels one at a time.
233	// Channel counts 1,2,4,8 are already handled above, leaving 3,5,6,7 to deal with (looping 3,1,2,3 times).
234	// Without vgatherdps (AVX2), this gets quite messy.
235	for (; chan < chans; ++chan) {
236	const float *in = &src[chan];
237	__m128 v0, v1, v2;
238
239	#define X(x) \
240	x = _mm_unpacklo_ps(_mm_load_ss(in), _mm_load_ss(in + chans)); \
241	in += chans + chans; \
242	x = _mm_movelh_ps(x, _mm_unpacklo_ps(_mm_load_ss(in), _mm_load_ss(in + chans))); \
243	in += chans + chans
244
245	X(v0);
246	X(v1);
247	X(v2);
248
249	#undef X
250
251	__m128 out = _mm_mul_ps(f0, v0);
252	out = sdl_madd_ps(out, f1, v1);
253	out = sdl_madd_ps(out, f2, v2);
254
255	// Horizontal sum
256	__m128 shuf = _mm_shuffle_ps(out, out, _MM_SHUFFLE(`2`, `3`, `0`, `1`));
257	out = _mm_add_ps(out, shuf);
258	out = _mm_add_ss(out, _mm_movehl_ps(shuf, out));
259
260	_mm_store_ss(&dst[chan], out);
261	}
262	}
263
264	#undef sdl_madd_ps
265	#endif
266
267	#ifdef SDL_NEON_INTRINSICS
268	static void ResampleFrame_Generic_NEON(const float src, float* dst, const* Cubic filter, float* frac, int chans)
269	{
270	#if RESAMPLER_SAMPLES_PER_FRAME != 12
271	#error Invalid samples per frame
272	#endif
273
274	float32x4_t f0, f1, f2;
275
276	{
277	const float32x4_t frac1 = vdupq_n_f32(frac);
278	const float32x4_t frac2 = vmulq_f32(frac1, frac1);
279	const float32x4_t frac3 = vmulq_f32(frac1, frac2);
280
281	// Transposed in SetupAudioResampler
282	#define X(out) \
283	out = vmlaq_f32(vmlaq_f32(vmlaq_f32(filter[0].v128, filter[1].v128, frac1), filter[2].v128, frac2), filter[3].v128, frac3); \
284	filter += 4
285
286	X(f0);
287	X(f1);
288	X(f2);
289
290	#undef X
291	}
292
293	if (chans == `2`) {
294	float32x4x2_t g0 = vzipq_f32(f0, f0);
295	float32x4x2_t g1 = vzipq_f32(f1, f1);
296	float32x4x2_t g2 = vzipq_f32(f2, f2);
297
298	// Duplicate each of the filter elements and multiply by the input
299	// Use two accumulators to improve throughput
300	float32x4_t out0 = vmulq_f32(vld1q_f32(src + `0`), g0.val[`0`]);
301	float32x4_t out1 = vmulq_f32(vld1q_f32(src + `4`), g0.val[`1`]);
302	out0 = vmlaq_f32(out0, vld1q_f32(src + `8`), g1.val[`0`]);
303	out1 = vmlaq_f32(out1, vld1q_f32(src + `12`), g1.val[`1`]);
304	out0 = vmlaq_f32(out0, vld1q_f32(src + `16`), g2.val[`0`]);
305	out1 = vmlaq_f32(out1, vld1q_f32(src + `20`), g2.val[`1`]);
306
307	// Add the accumulators together
308	out0 = vaddq_f32(out0, out1);
309
310	// Add the lower and upper pairs together
311	float32x2_t out = vadd_f32(vget_low_f32(out0), vget_high_f32(out0));
312
313	// Store the result
314	vst1_f32(dst, out);
315	return;
316	}
317
318	if (chans == `1`) {
319	// Multiply the filter by the input
320	float32x4_t out = vmulq_f32(f0, vld1q_f32(src + `0`));
321	out = vmlaq_f32(out, f1, vld1q_f32(src + `4`));
322	out = vmlaq_f32(out, f2, vld1q_f32(src + `8`));
323
324	// Horizontal sum
325	float32x2_t sum = vadd_f32(vget_low_f32(out), vget_high_f32(out));
326	sum = vpadd_f32(sum, sum);
327
328	vst1_lane_f32(dst, sum, `0`);
329	return;
330	}
331
332	int chan = `0`;
333
334	// Process 4 channels at once
335	for (; chan + `4` <= chans; chan += `4`) {
336	const float *in = &src[chan];
337	float32x4_t out0 = vdupq_n_f32(`0`);
338	float32x4_t out1 = vdupq_n_f32(`0`);
339
340	#define X(a, b, out) \
341	out = vmlaq_f32(out, vld1q_f32(in), vdupq_lane_f32(a, b)); \
342	in += chans
343
344	#define Y(a) \
345	X(vget_low_f32(a), 0, out0); \
346	X(vget_low_f32(a), 1, out1); \
347	X(vget_high_f32(a), 0, out0); \
348	X(vget_high_f32(a), 1, out1)
349
350	Y(f0);
351	Y(f1);
352	Y(f2);
353
354	#undef X
355	#undef Y
356
357	// Add the accumulators together
358	float32x4_t out = vaddq_f32(out0, out1);
359
360	vst1q_f32(&dst[chan], out);
361	}
362
363	// Process the remaining channels one at a time.
364	// Channel counts 1,2,4,8 are already handled above, leaving 3,5,6,7 to deal with (looping 3,1,2,3 times).
365	for (; chan < chans; ++chan) {
366	const float *in = &src[chan];
367	float32x4_t v0, v1, v2;
368
369	#define X(x) \
370	x = vld1q_dup_f32(in); \
371	in += chans; \
372	x = vld1q_lane_f32(in, x, 1); \
373	in += chans; \
374	x = vld1q_lane_f32(in, x, 2); \
375	in += chans; \
376	x = vld1q_lane_f32(in, x, 3); \
377	in += chans
378
379	X(v0);
380	X(v1);
381	X(v2);
382
383	#undef X
384
385	float32x4_t out = vmulq_f32(f0, v0);
386	out = vmlaq_f32(out, f1, v1);
387	out = vmlaq_f32(out, f2, v2);
388
389	// Horizontal sum
390	float32x2_t sum = vadd_f32(vget_low_f32(out), vget_high_f32(out));
391	sum = vpadd_f32(sum, sum);
392
393	vst1_lane_f32(&dst[chan], sum, `0`);
394	}
395	}
396	#endif
397
398	// Calculate the cubic equation which passes through all four points.
399	// https://en.wikipedia.org/wiki/Ordinary_least_squares
400	// https://en.wikipedia.org/wiki/Polynomial_regression
401	static void CubicLeastSquares(Cubic coeffs, float* y0, float y1, float y2, float y3)
402	{
403	// Least squares matrix for xs = [0, 1/3, 2/3, 1]
404	// [ 1.0 0.0 0.0 0.0 ]
405	// [ -5.5 9.0 -4.5 1.0 ]
406	// [ 9.0 -22.5 18.0 -4.5 ]
407	// [ -4.5 13.5 -13.5 4.5 ]
408
409	coeffs->v[`0`] = y0;
410	coeffs->v[`1`] = -`5.5f` * y0 + `9.0f` * y1 - `4.5f` * y2 + y3;
411	coeffs->v[`2`] = `9.0f` * y0 - `22.5f` * y1 + `18.0f` * y2 - `4.5f` * y3;
412	coeffs->v[`3`] = -`4.5f` * y0 + `13.5f` * y1 - `13.5f` * y2 + `4.5f` * y3;
413	}
414
415	// Zeroth-order modified Bessel function of the first kind
416	// https://mathworld.wolfram.com/ModifiedBesselFunctionoftheFirstKind.html
417	static float BesselI0(float x)
418	{
419	float sum = `0.0f`;
420	float i = `1.0f`;
421	float t = `1.0f`;
422	x = x `0.25f`;
423
424	while (t >= sum * SDL_FLT_EPSILON) {
425	sum += t;
426	t = x / (i i);
427	++i;
428	}
429
430	return sum;
431	}
432
433	// Pre-calculate 180 degrees of sin(pi x) / pi*
434	// The speedup from this isn't huge, but it also avoids precision issues.
435	// If sinf isn't available, SDL_sinf just calls SDL_sin.
436	// Know what SDL_sin(SDL_PI_F) equals? Not quite zero.
437	static void SincTable(float table, int* len)
438	{
439	int i;
440
441	for (i = `0`; i < len; ++i) {
442	table[i] = SDL_sinf(i * (SDL_PI_F / len)) / SDL_PI_F;
443	}
444	}
445
446	// Calculate Sinc(x/y), using a lookup table
447	static float Sinc(const float table, int* x, int y)
448	{
449	float s = table[x % y];
450	s = ((x / y) & `1`) ? -s : s;
451	return (s * y) / x;
452	}
453
454	static Cubic ResamplerFilter[RESAMPLER_SAMPLES_PER_ZERO_CROSSING][RESAMPLER_SAMPLES_PER_FRAME];
455
456	static void GenerateResamplerFilter(void)
457	{
458	enum
459	{
460	// Generate samples at 3x the target resolution, so that we have samples at [0, 1/3, 2/3, 1] of each position
461	TABLE_SAMPLES_PER_ZERO_CROSSING = RESAMPLER_SAMPLES_PER_ZERO_CROSSING * `3`,
462	TABLE_SIZE = RESAMPLER_ZERO_CROSSINGS * TABLE_SAMPLES_PER_ZERO_CROSSING,
463	};
464
465	// if dB > 50, beta=(0.1102 (dB - 8.7)), according to Matlab.*
466	const float dB = `80.0f`;
467	const float beta = `0.1102f` * (dB - `8.7f`);
468	const float bessel_beta = BesselI0(beta);
469	const float lensqr = TABLE_SIZE * TABLE_SIZE;
470
471	int i, j;
472
473	float sinc[TABLE_SAMPLES_PER_ZERO_CROSSING];
474	SincTable(sinc, TABLE_SAMPLES_PER_ZERO_CROSSING);
475
476	// Generate one wing of the filter
477	// https://en.wikipedia.org/wiki/Kaiser_window
478	// https://en.wikipedia.org/wiki/Whittaker%E2%80%93Shannon_interpolation_formula
479	float filter[TABLE_SIZE + `1`];
480	filter[`0`] = `1.0f`;
481
482	for (i = `1`; i <= TABLE_SIZE; ++i) {
483	float b = BesselI0(beta * SDL_sqrtf((lensqr - (i * i)) / lensqr)) / bessel_beta;
484	float s = Sinc(sinc, i, TABLE_SAMPLES_PER_ZERO_CROSSING);
485	filter[i] = b * s;
486	}
487
488	// Generate the coefficients for each point
489	// When interpolating, the fraction represents how far we are between input samples,
490	// so we need to align the filter by "moving" it to the right.
491	//
492	// For the left wing, this means interpolating "forwards" (away from the center)
493	// For the right wing, this means interpolating "backwards" (towards the center)
494	//
495	// The center of the filter is at the end of the left wing (RESAMPLER_ZERO_CROSSINGS - 1)
496	// The left wing is the filter, but reversed
497	// The right wing is the filter, but offset by 1
498	//
499	// Since the right wing is offset by 1, this just means we interpolate backwards
500	// between the same points, instead of forwards
501	// interp(p[n], p[n+1], t) = interp(p[n+1], p[n+1-1], 1 - t) = interp(p[n+1], p[n], 1 - t)
502	for (i = `0`; i < RESAMPLER_SAMPLES_PER_ZERO_CROSSING; ++i) {
503	for (j = `0`; j < RESAMPLER_ZERO_CROSSINGS; ++j) {
504	const float ys = &filter[((j RESAMPLER_SAMPLES_PER_ZERO_CROSSING) + i) * `3`];
505
506	Cubic *fwd = &ResamplerFilter[i][RESAMPLER_ZERO_CROSSINGS - j - `1`];
507	Cubic *rev = &ResamplerFilter[RESAMPLER_SAMPLES_PER_ZERO_CROSSING - i - `1`][RESAMPLER_ZERO_CROSSINGS + j];
508
509	// Calculate the cubic equation of the 4 points
510	CubicLeastSquares(fwd, ys[`0`], ys[`1`], ys[`2`], ys[`3`]);
511	CubicLeastSquares(rev, ys[`3`], ys[`2`], ys[`1`], ys[`0`]);
512	}
513	}
514	}
515
516	typedef void (ResampleFrameFunc)(const* float src, float* dst, const* Cubic filter, float* frac, int chans);
517	static ResampleFrameFunc ResampleFrame[`8`];
518
519	// Transpose 4x4 floats
520	static void Transpose4x4(Cubic *data)
521	{
522	int i, j;
523
524	Cubic temp[`4`] = { data[`0`], data[`1`], data[`2`], data[`3`] };
525
526	for (i = `0`; i < `4`; ++i) {
527	for (j = `0`; j < `4`; ++j) {
528	data[i].v[j] = temp[j].v[i];
529	}
530	}
531	}
532
533	static void SetupAudioResampler(void)
534	{
535	int i, j;
536	bool transpose = false;
537
538	GenerateResamplerFilter();
539
540	#ifdef SDL_SSE_INTRINSICS
541	if (SDL_HasSSE()) {
542	for (i = `0`; i < `8`; ++i) {
543	ResampleFrame[i] = ResampleFrame_Generic_SSE;
544	}
545	transpose = true;
546	} else
547	#endif
548	#ifdef SDL_NEON_INTRINSICS
549	if (SDL_HasNEON()) {
550	for (i = `0`; i < `8`; ++i) {
551	ResampleFrame[i] = ResampleFrame_Generic_NEON;
552	}
553	transpose = true;
554	} else
555	#endif
556	{
557	for (i = `0`; i < `8`; ++i) {
558	ResampleFrame[i] = ResampleFrame_Generic;
559	}
560
561	ResampleFrame[`0`] = ResampleFrame_Mono;
562	ResampleFrame[`1`] = ResampleFrame_Stereo;
563	}
564
565	if (transpose) {
566	// Transpose each set of 4 coefficients, to reduce work when resampling
567	for (i = `0`; i < RESAMPLER_SAMPLES_PER_ZERO_CROSSING; ++i) {
568	for (j = `0`; j + `4` <= RESAMPLER_SAMPLES_PER_FRAME; j += `4`) {
569	Transpose4x4(&ResamplerFilter[i][j]);
570	}
571	}
572	}
573	}
574
575	void SDL_SetupAudioResampler(void)
576	{
577	static SDL_InitState init;
578
579	if (SDL_ShouldInit(&init)) {
580	SetupAudioResampler();
581	SDL_SetInitialized(&init, true);
582	}
583	}
584
585	Sint64 SDL_GetResampleRate(int src_rate, int dst_rate)
586	{
587	SDL_assert(src_rate > `0`);
588	SDL_assert(dst_rate > `0`);
589
590	Sint64 numerator = (Sint64)src_rate << `32`;
591	Sint64 denominator = (Sint64)dst_rate;
592
593	// Generally it's expected that `dst_frames = (src_frames dst_rate) / src_rate`*
594	// To match this as closely as possible without infinite precision, always round up the resample rate.
595	// For example, without rounding up, a sample ratio of 2:3 would have `sample_rate = 0xAAAAAAAA`
596	// After 3 frames, the position would be 0x1.FFFFFFFE, meaning we haven't fully consumed the second input frame.
597	// By rounding up to 0xAAAAAAAB, we would instead reach 0x2.00000001, fulling consuming the second frame.
598	// Technically you could say this is kicking the can 0x100000000 steps down the road, but I'm fine with that :)
599	// sample_rate = div_ceil(numerator, denominator)
600	Sint64 sample_rate = ((numerator - `1`) / denominator) + `1`;
601
602	SDL_assert(sample_rate > `0`);
603
604	return sample_rate;
605	}
606
607	int SDL_GetResamplerHistoryFrames(void)
608	{
609	// Even if we aren't currently resampling, make sure to keep enough history in case we need to later.
610
611	return RESAMPLER_MAX_PADDING_FRAMES;
612	}
613
614	int SDL_GetResamplerPaddingFrames(Sint64 resample_rate)
615	{
616	// This must always be <= SDL_GetResamplerHistoryFrames()
617
618	return resample_rate ? RESAMPLER_MAX_PADDING_FRAMES : `0`;
619	}
620
621	// These are not general purpose. They do not check for all possible underflow/overflow
622	SDL_FORCE_INLINE bool ResamplerAdd(Sint64 a, Sint64 b, Sint64 *ret)
623	{
624	if ((b > `0`) && (a > SDL_MAX_SINT64 - b)) {
625	return false;
626	}
627
628	*ret = a + b;
629	return true;
630	}
631
632	SDL_FORCE_INLINE bool ResamplerMul(Sint64 a, Sint64 b, Sint64 *ret)
633	{
634	if ((b > `0`) && (a > SDL_MAX_SINT64 / b)) {
635	return false;
636	}
637
638	ret = a b;
639	return true;
640	}
641
642	Sint64 SDL_GetResamplerInputFrames(Sint64 output_frames, Sint64 resample_rate, Sint64 resample_offset)
643	{
644	// Calculate the index of the last input frame, then add 1.
645	// ((((output_frames - 1) resample_rate) + resample_offset) >> 32) + 1*
646
647	Sint64 output_offset;
648	if (!ResamplerMul(output_frames, resample_rate, &output_offset) \|\|
649	!ResamplerAdd(output_offset, -resample_rate + resample_offset + `0x100000000`, &output_offset)) {
650	output_offset = SDL_MAX_SINT64;
651	}
652
653	Sint64 input_frames = (Sint64)(Sint32)(output_offset >> `32`);
654	input_frames = SDL_max(input_frames, `0`);
655
656	return input_frames;
657	}
658
659	Sint64 SDL_GetResamplerOutputFrames(Sint64 input_frames, Sint64 resample_rate, Sint64 *inout_resample_offset)
660	{
661	Sint64 resample_offset = *inout_resample_offset;
662
663	// input_offset = (input_frames << 32) - resample_offset;
664	Sint64 input_offset;
665	if (!ResamplerMul(input_frames, `0x100000000`, &input_offset) \|\|
666	!ResamplerAdd(input_offset, -resample_offset, &input_offset)) {
667	input_offset = SDL_MAX_SINT64;
668	}
669
670	// output_frames = div_ceil(input_offset, resample_rate)
671	Sint64 output_frames = (input_offset > `0`) ? ((input_offset - `1`) / resample_rate) + `1` : `0`;
672
673	inout_resample_offset = (output_frames resample_rate) - input_offset;
674
675	return output_frames;
676	}
677
678	void SDL_ResampleAudio(int chans, const float src, int* inframes, float dst, int* outframes,
679	Sint64 resample_rate, Sint64 *inout_resample_offset)
680	{
681	int i;
682	Sint64 srcpos = *inout_resample_offset;
683	ResampleFrameFunc resample_frame = ResampleFrame[chans - `1`];
684
685	SDL_assert(resample_rate > `0`);
686
687	src -= (RESAMPLER_ZERO_CROSSINGS - `1`) * chans;
688
689	for (i = `0`; i < outframes; ++i) {
690	int srcindex = (int)(Sint32)(srcpos >> `32`);
691	Uint32 srcfraction = (Uint32)(srcpos & `0xFFFFFFFF`);
692	srcpos += resample_rate;
693
694	SDL_assert(srcindex >= -`1` && srcindex < inframes);
695
696	const Cubic *filter = ResamplerFilter[srcfraction >> RESAMPLER_FILTER_INTERP_BITS];
697	const float frac = (float)(srcfraction & (RESAMPLER_FILTER_INTERP_RANGE - `1`)) * (`1.0f` / RESAMPLER_FILTER_INTERP_RANGE);
698
699	const float frame = &src[srcindex chans];
700	resample_frame(frame, dst, filter, frac, chans);
701
702	dst += chans;
703	}
704
705	*inout_resample_offset = srcpos - ((Sint64)inframes << `32`);
706	}
707

Browse the source code of SDL/src/audio/SDL_audioresample.c