SDL_audiotypecvt.c source code [SDL/src/audio/SDL_audiotypecvt.c]

1	/*
2	Simple DirectMedia Layer
3	Copyright (C) 1997-2025 Sam Lantinga <slouken@libsdl.org>
4
5	This software is provided 'as-is', without any express or implied
6	warranty. In no event will the authors be held liable for any damages
7	arising from the use of this software.
8
9	Permission is granted to anyone to use this software for any purpose,
10	including commercial applications, and to alter it and redistribute it
11	freely, subject to the following restrictions:
12
13	1. The origin of this software must not be misrepresented; you must not
14	claim that you wrote the original software. If you use this software
15	in a product, an acknowledgment in the product documentation would be
16	appreciated but is not required.
17	2. Altered source versions must be plainly marked as such, and must not be
18	misrepresented as being the original software.
19	3. This notice may not be removed or altered from any source distribution.
20	*/
21	#include "SDL_internal.h"
22
23	#include "SDL_sysaudio.h"
24
25	#define DIVBY2147483648 0.0000000004656612873077392578125f // 0x1p-31f
26
27	// start fallback scalar converters
28
29	// This code requires that floats are in the IEEE-754 binary32 format
30	SDL_COMPILE_TIME_ASSERT(float_bits, sizeof(float) == sizeof(Uint32));
31
32	union float_bits {
33	Uint32 u32;
34	float f32;
35	};
36
37	static void SDL_Convert_S8_to_F32_Scalar(float dst, const* Sint8 src, int* num_samples)
38	{
39	int i;
40
41	LOG_DEBUG_AUDIO_CONVERT("S8", "F32");
42
43	for (i = num_samples - `1`; i >= `0`; --i) {
44	/ 1) Construct a float in the range [65536.0, 65538.0)*
45	* 2) Shift the float range to [-1.0, 1.0) */
46	union float_bits x;
47	x.u32 = (Uint8)src[i] ^ `0x47800080u`;
48	dst[i] = x.f32 - `65537.0f`;
49	}
50	}
51
52	static void SDL_Convert_U8_to_F32_Scalar(float dst, const* Uint8 src, int* num_samples)
53	{
54	int i;
55
56	LOG_DEBUG_AUDIO_CONVERT("U8", "F32");
57
58	for (i = num_samples - `1`; i >= `0`; --i) {
59	/ 1) Construct a float in the range [65536.0, 65538.0)*
60	* 2) Shift the float range to [-1.0, 1.0) */
61	union float_bits x;
62	x.u32 = src[i] ^ `0x47800000u`;
63	dst[i] = x.f32 - `65537.0f`;
64	}
65	}
66
67	static void SDL_Convert_S16_to_F32_Scalar(float dst, const* Sint16 src, int* num_samples)
68	{
69	int i;
70
71	LOG_DEBUG_AUDIO_CONVERT("S16", "F32");
72
73	for (i = num_samples - `1`; i >= `0`; --i) {
74	/ 1) Construct a float in the range [256.0, 258.0)*
75	* 2) Shift the float range to [-1.0, 1.0) */
76	union float_bits x;
77	x.u32 = (Uint16)src[i] ^ `0x43808000u`;
78	dst[i] = x.f32 - `257.0f`;
79	}
80	}
81
82	static void SDL_Convert_S32_to_F32_Scalar(float dst, const* Sint32 src, int* num_samples)
83	{
84	int i;
85
86	LOG_DEBUG_AUDIO_CONVERT("S32", "F32");
87
88	for (i = num_samples - `1`; i >= `0`; --i) {
89	dst[i] = (float)src[i] * DIVBY2147483648;
90	}
91	}
92
93	// Create a bit-mask based on the sign-bit. Should optimize to a single arithmetic-shift-right
94	#define SIGNMASK(x) (Uint32)(0u - ((Uint32)(x) >> 31))
95
96	static void SDL_Convert_F32_to_S8_Scalar(Sint8 dst, const* float src, int* num_samples)
97	{
98	int i;
99
100	LOG_DEBUG_AUDIO_CONVERT("F32", "S8");
101
102	for (i = `0`; i < num_samples; ++i) {
103	/ 1) Shift the float range from [-1.0, 1.0] to [98303.0, 98305.0]*
104	* 2) Shift the integer range from [0x47BFFF80, 0x47C00080] to [-128, 128]
105	* 3) Clamp the value to [-128, 127] */
106	union float_bits x;
107	x.f32 = src[i] + `98304.0f`;
108
109	Uint32 y = x.u32 - `0x47C00000u`;
110	Uint32 z = `0x7Fu` - (y ^ SIGNMASK(y));
111	y = y ^ (z & SIGNMASK(z));
112
113	dst[i] = (Sint8)(y & `0xFF`);
114	}
115	}
116
117	static void SDL_Convert_F32_to_U8_Scalar(Uint8 dst, const* float src, int* num_samples)
118	{
119	int i;
120
121	LOG_DEBUG_AUDIO_CONVERT("F32", "U8");
122
123	for (i = `0`; i < num_samples; ++i) {
124	/ 1) Shift the float range from [-1.0, 1.0] to [98303.0, 98305.0]*
125	* 2) Shift the integer range from [0x47BFFF80, 0x47C00080] to [-128, 128]
126	* 3) Clamp the value to [-128, 127]
127	* 4) Shift the integer range from [-128, 127] to [0, 255] */
128	union float_bits x;
129	x.f32 = src[i] + `98304.0f`;
130
131	Uint32 y = x.u32 - `0x47C00000u`;
132	Uint32 z = `0x7Fu` - (y ^ SIGNMASK(y));
133	y = (y ^ `0x80u`) ^ (z & SIGNMASK(z));
134
135	dst[i] = (Uint8)(y & `0xFF`);
136	}
137	}
138
139	static void SDL_Convert_F32_to_S16_Scalar(Sint16 dst, const* float src, int* num_samples)
140	{
141	int i;
142
143	LOG_DEBUG_AUDIO_CONVERT("F32", "S16");
144
145	for (i = `0`; i < num_samples; ++i) {
146	/ 1) Shift the float range from [-1.0, 1.0] to [383.0, 385.0]*
147	* 2) Shift the integer range from [0x43BF8000, 0x43C08000] to [-32768, 32768]
148	* 3) Clamp values outside the [-32768, 32767] range */
149	union float_bits x;
150	x.f32 = src[i] + `384.0f`;
151
152	Uint32 y = x.u32 - `0x43C00000u`;
153	Uint32 z = `0x7FFFu` - (y ^ SIGNMASK(y));
154	y = y ^ (z & SIGNMASK(z));
155
156	dst[i] = (Sint16)(y & `0xFFFF`);
157	}
158	}
159
160	static void SDL_Convert_F32_to_S32_Scalar(Sint32 dst, const* float src, int* num_samples)
161	{
162	int i;
163
164	LOG_DEBUG_AUDIO_CONVERT("F32", "S32");
165
166	for (i = `0`; i < num_samples; ++i) {
167	/ 1) Shift the float range from [-1.0, 1.0] to [-2147483648.0, 2147483648.0]*
168	* 2) Set values outside the [-2147483648.0, 2147483647.0] range to -2147483648.0
169	* 3) Convert the float to an integer, and fixup values outside the valid range */
170	union float_bits x;
171	x.f32 = src[i];
172
173	Uint32 y = x.u32 + `0x0F800000u`;
174	Uint32 z = y - `0xCF000000u`;
175	z &= SIGNMASK(y ^ z);
176	x.u32 = y - z;
177
178	dst[i] = (Sint32)x.f32 ^ (Sint32)SIGNMASK(z);
179	}
180	}
181
182	#undef SIGNMASK
183
184	static void SDL_Convert_Swap16_Scalar(Uint16* dst, const Uint16* src, int num_samples)
185	{
186	int i;
187
188	for (i = `0`; i < num_samples; ++i) {
189	dst[i] = SDL_Swap16(src[i]);
190	}
191	}
192
193	static void SDL_Convert_Swap32_Scalar(Uint32* dst, const Uint32* src, int num_samples)
194	{
195	int i;
196
197	for (i = `0`; i < num_samples; ++i) {
198	dst[i] = SDL_Swap32(src[i]);
199	}
200	}
201
202	// end fallback scalar converters
203
204	// Convert forwards, when sizeof(src) >= sizeof(dst)
205	#define CONVERT_16_FWD(CVT1, CVT16) \
206	int i = 0; \
207	if (num_samples >= 16) { \
208	while ((uintptr_t)(&dst[i]) & 15) { CVT1 ++i; } \
209	while ((i + 16) <= num_samples) { CVT16 i += 16; } \
210	} \
211	while (i < num_samples) { CVT1 ++i; }
212
213	// Convert backwards, when sizeof(src) <= sizeof(dst)
214	#define CONVERT_16_REV(CVT1, CVT16) \
215	int i = num_samples; \
216	if (i >= 16) { \
217	while ((uintptr_t)(&dst[i]) & 15) { --i; CVT1 } \
218	while (i >= 16) { i -= 16; CVT16 } \
219	} \
220	while (i > 0) { --i; CVT1 }
221
222	#ifdef SDL_SSE2_INTRINSICS
223	static void SDL_TARGETING("sse2") SDL_Convert_S8_to_F32_SSE2(float dst, const* Sint8 src, int* num_samples)
224	{
225	/ 1) Flip the sign bit to convert from S8 to U8 format*
226	* 2) Construct a float in the range [65536.0, 65538.0)
227	* 3) Shift the float range to [-1.0, 1.0)
228	* dst[i] = i2f((src[i] ^ 0x80) \| 0x47800000) - 65537.0 */
229	const __m128i zero = _mm_setzero_si128();
230	const __m128i flipper = _mm_set1_epi8(-`0x80`);
231	const __m128i caster = _mm_set1_epi16(`0x4780` / 0x47800000 = f2i(65536.0) /);
232	const __m128 offset = _mm_set1_ps(-`65537.0`);
233
234	LOG_DEBUG_AUDIO_CONVERT("S8", "F32 (using SSE2)");
235
236	CONVERT_16_REV({
237	_mm_store_ss(&dst[i], _mm_add_ss(_mm_castsi128_ps(_mm_cvtsi32_si128((Uint8)src[i] ^ `0x47800080u`)), offset));
238	}, {
239	const __m128i bytes = _mm_xor_si128(_mm_loadu_si128((const __m128i *)&src[i]), flipper);
240
241	const __m128i shorts0 = _mm_unpacklo_epi8(bytes, zero);
242	const __m128i shorts1 = _mm_unpackhi_epi8(bytes, zero);
243
244	const __m128 floats0 = _mm_add_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(shorts0, caster)), offset);
245	const __m128 floats1 = _mm_add_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(shorts0, caster)), offset);
246	const __m128 floats2 = _mm_add_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(shorts1, caster)), offset);
247	const __m128 floats3 = _mm_add_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(shorts1, caster)), offset);
248
249	_mm_store_ps(&dst[i], floats0);
250	_mm_store_ps(&dst[i + `4`], floats1);
251	_mm_store_ps(&dst[i + `8`], floats2);
252	_mm_store_ps(&dst[i + `12`], floats3);
253	})
254	}
255
256	static void SDL_TARGETING("sse2") SDL_Convert_U8_to_F32_SSE2(float dst, const* Uint8 src, int* num_samples)
257	{
258	/ 1) Construct a float in the range [65536.0, 65538.0)*
259	* 2) Shift the float range to [-1.0, 1.0)
260	* dst[i] = i2f(src[i] \| 0x47800000) - 65537.0 */
261	const __m128i zero = _mm_setzero_si128();
262	const __m128i caster = _mm_set1_epi16(`0x4780` / 0x47800000 = f2i(65536.0) /);
263	const __m128 offset = _mm_set1_ps(-`65537.0`);
264
265	LOG_DEBUG_AUDIO_CONVERT("U8", "F32 (using SSE2)");
266
267	CONVERT_16_REV({
268	_mm_store_ss(&dst[i], _mm_add_ss(_mm_castsi128_ps(_mm_cvtsi32_si128((Uint8)src[i] ^ `0x47800000u`)), offset));
269	}, {
270	const __m128i bytes = _mm_loadu_si128((const __m128i *)&src[i]);
271
272	const __m128i shorts0 = _mm_unpacklo_epi8(bytes, zero);
273	const __m128i shorts1 = _mm_unpackhi_epi8(bytes, zero);
274
275	const __m128 floats0 = _mm_add_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(shorts0, caster)), offset);
276	const __m128 floats1 = _mm_add_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(shorts0, caster)), offset);
277	const __m128 floats2 = _mm_add_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(shorts1, caster)), offset);
278	const __m128 floats3 = _mm_add_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(shorts1, caster)), offset);
279
280	_mm_store_ps(&dst[i], floats0);
281	_mm_store_ps(&dst[i + `4`], floats1);
282	_mm_store_ps(&dst[i + `8`], floats2);
283	_mm_store_ps(&dst[i + `12`], floats3);
284	})
285	}
286
287	static void SDL_TARGETING("sse2") SDL_Convert_S16_to_F32_SSE2(float dst, const* Sint16 src, int* num_samples)
288	{
289	/ 1) Flip the sign bit to convert from S16 to U16 format*
290	* 2) Construct a float in the range [256.0, 258.0)
291	* 3) Shift the float range to [-1.0, 1.0)
292	* dst[i] = i2f((src[i] ^ 0x8000) \| 0x43800000) - 257.0 */
293	const __m128i flipper = _mm_set1_epi16(-`0x8000`);
294	const __m128i caster = _mm_set1_epi16(`0x4380` / 0x43800000 = f2i(256.0) /);
295	const __m128 offset = _mm_set1_ps(-`257.0f`);
296
297	LOG_DEBUG_AUDIO_CONVERT("S16", "F32 (using SSE2)");
298
299	CONVERT_16_REV({
300	_mm_store_ss(&dst[i], _mm_add_ss(_mm_castsi128_ps(_mm_cvtsi32_si128((Uint16)src[i] ^ `0x43808000u`)), offset));
301	}, {
302	const __m128i shorts0 = _mm_xor_si128(_mm_loadu_si128((const __m128i *)&src[i]), flipper);
303	const __m128i shorts1 = _mm_xor_si128(_mm_loadu_si128((const __m128i *)&src[i + `8`]), flipper);
304
305	const __m128 floats0 = _mm_add_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(shorts0, caster)), offset);
306	const __m128 floats1 = _mm_add_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(shorts0, caster)), offset);
307	const __m128 floats2 = _mm_add_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(shorts1, caster)), offset);
308	const __m128 floats3 = _mm_add_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(shorts1, caster)), offset);
309
310	_mm_store_ps(&dst[i], floats0);
311	_mm_store_ps(&dst[i + `4`], floats1);
312	_mm_store_ps(&dst[i + `8`], floats2);
313	_mm_store_ps(&dst[i + `12`], floats3);
314	})
315	}
316
317	static void SDL_TARGETING("sse2") SDL_Convert_S32_to_F32_SSE2(float dst, const* Sint32 src, int* num_samples)
318	{
319	// dst[i] = f32(src[i]) / f32(0x80000000)
320	const __m128 scaler = _mm_set1_ps(DIVBY2147483648);
321
322	LOG_DEBUG_AUDIO_CONVERT("S32", "F32 (using SSE2)");
323
324	CONVERT_16_FWD({
325	_mm_store_ss(&dst[i], _mm_mul_ss(_mm_cvt_si2ss(_mm_setzero_ps(), src[i]), scaler));
326	}, {
327	const __m128i ints0 = _mm_loadu_si128((const __m128i *)&src[i]);
328	const __m128i ints1 = _mm_loadu_si128((const __m128i *)&src[i + `4`]);
329	const __m128i ints2 = _mm_loadu_si128((const __m128i *)&src[i + `8`]);
330	const __m128i ints3 = _mm_loadu_si128((const __m128i *)&src[i + `12`]);
331
332	const __m128 floats0 = _mm_mul_ps(_mm_cvtepi32_ps(ints0), scaler);
333	const __m128 floats1 = _mm_mul_ps(_mm_cvtepi32_ps(ints1), scaler);
334	const __m128 floats2 = _mm_mul_ps(_mm_cvtepi32_ps(ints2), scaler);
335	const __m128 floats3 = _mm_mul_ps(_mm_cvtepi32_ps(ints3), scaler);
336
337	_mm_store_ps(&dst[i], floats0);
338	_mm_store_ps(&dst[i + `4`], floats1);
339	_mm_store_ps(&dst[i + `8`], floats2);
340	_mm_store_ps(&dst[i + `12`], floats3);
341	})
342	}
343
344	static void SDL_TARGETING("sse2") SDL_Convert_F32_to_S8_SSE2(Sint8 dst, const* float src, int* num_samples)
345	{
346	/ 1) Shift the float range from [-1.0, 1.0] to [98303.0, 98305.0]*
347	* 2) Extract the lowest 16 bits and clamp to [-128, 127]
348	* Overflow is correctly handled for inputs between roughly [-255.0, 255.0]
349	* dst[i] = clamp(i16(f2i(src[i] + 98304.0) & 0xFFFF), -128, 127) */
350	const __m128 offset = _mm_set1_ps(`98304.0f`);
351	const __m128i mask = _mm_set1_epi16(`0xFF`);
352
353	LOG_DEBUG_AUDIO_CONVERT("F32", "S8 (using SSE2)");
354
355	CONVERT_16_FWD({
356	const __m128i ints = _mm_castps_si128(_mm_add_ss(_mm_load_ss(&src[i]), offset));
357	dst[i] = (Sint8)(_mm_cvtsi128_si32(_mm_packs_epi16(ints, ints)) & `0xFF`);
358	}, {
359	const __m128 floats0 = _mm_loadu_ps(&src[i]);
360	const __m128 floats1 = _mm_loadu_ps(&src[i + `4`]);
361	const __m128 floats2 = _mm_loadu_ps(&src[i + `8`]);
362	const __m128 floats3 = _mm_loadu_ps(&src[i + `12`]);
363
364	const __m128i ints0 = _mm_castps_si128(_mm_add_ps(floats0, offset));
365	const __m128i ints1 = _mm_castps_si128(_mm_add_ps(floats1, offset));
366	const __m128i ints2 = _mm_castps_si128(_mm_add_ps(floats2, offset));
367	const __m128i ints3 = _mm_castps_si128(_mm_add_ps(floats3, offset));
368
369	const __m128i shorts0 = _mm_and_si128(_mm_packs_epi16(ints0, ints1), mask);
370	const __m128i shorts1 = _mm_and_si128(_mm_packs_epi16(ints2, ints3), mask);
371
372	const __m128i bytes = _mm_packus_epi16(shorts0, shorts1);
373
374	_mm_store_si128((__m128i*)&dst[i], bytes);
375	})
376	}
377
378	static void SDL_TARGETING("sse2") SDL_Convert_F32_to_U8_SSE2(Uint8 dst, const* float src, int* num_samples)
379	{
380	/ 1) Shift the float range from [-1.0, 1.0] to [98304.0, 98306.0]*
381	* 2) Extract the lowest 16 bits and clamp to [0, 255]
382	* Overflow is correctly handled for inputs between roughly [-254.0, 254.0]
383	* dst[i] = clamp(i16(f2i(src[i] + 98305.0) & 0xFFFF), 0, 255) */
384	const __m128 offset = _mm_set1_ps(`98305.0f`);
385	const __m128i mask = _mm_set1_epi16(`0xFF`);
386
387	LOG_DEBUG_AUDIO_CONVERT("F32", "U8 (using SSE2)");
388
389	CONVERT_16_FWD({
390	const __m128i ints = _mm_castps_si128(_mm_add_ss(_mm_load_ss(&src[i]), offset));
391	dst[i] = (Uint8)(_mm_cvtsi128_si32(_mm_packus_epi16(ints, ints)) & `0xFF`);
392	}, {
393	const __m128 floats0 = _mm_loadu_ps(&src[i]);
394	const __m128 floats1 = _mm_loadu_ps(&src[i + `4`]);
395	const __m128 floats2 = _mm_loadu_ps(&src[i + `8`]);
396	const __m128 floats3 = _mm_loadu_ps(&src[i + `12`]);
397
398	const __m128i ints0 = _mm_castps_si128(_mm_add_ps(floats0, offset));
399	const __m128i ints1 = _mm_castps_si128(_mm_add_ps(floats1, offset));
400	const __m128i ints2 = _mm_castps_si128(_mm_add_ps(floats2, offset));
401	const __m128i ints3 = _mm_castps_si128(_mm_add_ps(floats3, offset));
402
403	const __m128i shorts0 = _mm_and_si128(_mm_packus_epi16(ints0, ints1), mask);
404	const __m128i shorts1 = _mm_and_si128(_mm_packus_epi16(ints2, ints3), mask);
405
406	const __m128i bytes = _mm_packus_epi16(shorts0, shorts1);
407
408	_mm_store_si128((__m128i*)&dst[i], bytes);
409	})
410	}
411
412	static void SDL_TARGETING("sse2") SDL_Convert_F32_to_S16_SSE2(Sint16 dst, const* float src, int* num_samples)
413	{
414	/ 1) Shift the float range from [-1.0, 1.0] to [256.0, 258.0]*
415	* 2) Shift the int range from [0x43800000, 0x43810000] to [-32768,32768]
416	* 3) Clamp to range [-32768,32767]
417	* Overflow is correctly handled for inputs between roughly [-257.0, +inf)
418	* dst[i] = clamp(f2i(src[i] + 257.0) - 0x43808000, -32768, 32767) */
419	const __m128 offset = _mm_set1_ps(`257.0f`);
420
421	LOG_DEBUG_AUDIO_CONVERT("F32", "S16 (using SSE2)");
422
423	CONVERT_16_FWD({
424	const __m128i ints = _mm_sub_epi32(_mm_castps_si128(_mm_add_ss(_mm_load_ss(&src[i]), offset)), _mm_castps_si128(offset));
425	dst[i] = (Sint16)(_mm_cvtsi128_si32(_mm_packs_epi32(ints, ints)) & `0xFFFF`);
426	}, {
427	const __m128 floats0 = _mm_loadu_ps(&src[i]);
428	const __m128 floats1 = _mm_loadu_ps(&src[i + `4`]);
429	const __m128 floats2 = _mm_loadu_ps(&src[i + `8`]);
430	const __m128 floats3 = _mm_loadu_ps(&src[i + `12`]);
431
432	const __m128i ints0 = _mm_sub_epi32(_mm_castps_si128(_mm_add_ps(floats0, offset)), _mm_castps_si128(offset));
433	const __m128i ints1 = _mm_sub_epi32(_mm_castps_si128(_mm_add_ps(floats1, offset)), _mm_castps_si128(offset));
434	const __m128i ints2 = _mm_sub_epi32(_mm_castps_si128(_mm_add_ps(floats2, offset)), _mm_castps_si128(offset));
435	const __m128i ints3 = _mm_sub_epi32(_mm_castps_si128(_mm_add_ps(floats3, offset)), _mm_castps_si128(offset));
436
437	const __m128i shorts0 = _mm_packs_epi32(ints0, ints1);
438	const __m128i shorts1 = _mm_packs_epi32(ints2, ints3);
439
440	_mm_store_si128((__m128i*)&dst[i], shorts0);
441	_mm_store_si128((__m128i*)&dst[i + `8`], shorts1);
442	})
443	}
444
445	static void SDL_TARGETING("sse2") SDL_Convert_F32_to_S32_SSE2(Sint32 dst, const* float src, int* num_samples)
446	{
447	/ 1) Scale the float range from [-1.0, 1.0] to [-2147483648.0, 2147483648.0]*
448	* 2) Convert to integer (values too small/large become 0x80000000 = -2147483648)
449	* 3) Fixup values which were too large (0x80000000 ^ 0xFFFFFFFF = 2147483647)
450	* dst[i] = i32(src[i] * 2147483648.0) ^ ((src[i] >= 2147483648.0) ? 0xFFFFFFFF : 0x00000000) */
451	const __m128 limit = _mm_set1_ps(`2147483648.0f`);
452
453	LOG_DEBUG_AUDIO_CONVERT("F32", "S32 (using SSE2)");
454
455	CONVERT_16_FWD({
456	const __m128 floats = _mm_load_ss(&src[i]);
457	const __m128 values = _mm_mul_ss(floats, limit);
458	const __m128i ints = _mm_xor_si128(_mm_cvttps_epi32(values), _mm_castps_si128(_mm_cmpge_ss(values, limit)));
459	dst[i] = (Sint32)_mm_cvtsi128_si32(ints);
460	}, {
461	const __m128 floats0 = _mm_loadu_ps(&src[i]);
462	const __m128 floats1 = _mm_loadu_ps(&src[i + `4`]);
463	const __m128 floats2 = _mm_loadu_ps(&src[i + `8`]);
464	const __m128 floats3 = _mm_loadu_ps(&src[i + `12`]);
465
466	const __m128 values1 = _mm_mul_ps(floats0, limit);
467	const __m128 values2 = _mm_mul_ps(floats1, limit);
468	const __m128 values3 = _mm_mul_ps(floats2, limit);
469	const __m128 values4 = _mm_mul_ps(floats3, limit);
470
471	const __m128i ints0 = _mm_xor_si128(_mm_cvttps_epi32(values1), _mm_castps_si128(_mm_cmpge_ps(values1, limit)));
472	const __m128i ints1 = _mm_xor_si128(_mm_cvttps_epi32(values2), _mm_castps_si128(_mm_cmpge_ps(values2, limit)));
473	const __m128i ints2 = _mm_xor_si128(_mm_cvttps_epi32(values3), _mm_castps_si128(_mm_cmpge_ps(values3, limit)));
474	const __m128i ints3 = _mm_xor_si128(_mm_cvttps_epi32(values4), _mm_castps_si128(_mm_cmpge_ps(values4, limit)));
475
476	_mm_store_si128((__m128i*)&dst[i], ints0);
477	_mm_store_si128((__m128i*)&dst[i + `4`], ints1);
478	_mm_store_si128((__m128i*)&dst[i + `8`], ints2);
479	_mm_store_si128((__m128i*)&dst[i + `12`], ints3);
480	})
481	}
482	#endif
483
484	// FIXME: SDL doesn't have SSSE3 detection, so use the next one up
485	#ifdef SDL_SSE4_1_INTRINSICS
486	static void SDL_TARGETING("ssse3") SDL_Convert_Swap16_SSSE3(Uint16* dst, const Uint16* src, int num_samples)
487	{
488	const __m128i shuffle = _mm_set_epi8(`14`, `15`, `12`, `13`, `10`, `11`, `8`, `9`, `6`, `7`, `4`, `5`, `2`, `3`, `0`, `1`);
489
490	CONVERT_16_FWD({
491	dst[i] = SDL_Swap16(src[i]);
492	}, {
493	__m128i ints0 = _mm_loadu_si128((const __m128i*)&src[i]);
494	__m128i ints1 = _mm_loadu_si128((const __m128i*)&src[i + `8`]);
495
496	ints0 = _mm_shuffle_epi8(ints0, shuffle);
497	ints1 = _mm_shuffle_epi8(ints1, shuffle);
498
499	_mm_store_si128((__m128i*)&dst[i], ints0);
500	_mm_store_si128((__m128i*)&dst[i + `8`], ints1);
501	})
502	}
503
504	static void SDL_TARGETING("ssse3") SDL_Convert_Swap32_SSSE3(Uint32* dst, const Uint32* src, int num_samples)
505	{
506	const __m128i shuffle = _mm_set_epi8(`12`, `13`, `14`, `15`, `8`, `9`, `10`, `11`, `4`, `5`, `6`, `7`, `0`, `1`, `2`, `3`);
507
508	CONVERT_16_FWD({
509	dst[i] = SDL_Swap32(src[i]);
510	}, {
511	__m128i ints0 = _mm_loadu_si128((const __m128i*)&src[i]);
512	__m128i ints1 = _mm_loadu_si128((const __m128i*)&src[i + `4`]);
513	__m128i ints2 = _mm_loadu_si128((const __m128i*)&src[i + `8`]);
514	__m128i ints3 = _mm_loadu_si128((const __m128i*)&src[i + `12`]);
515
516	ints0 = _mm_shuffle_epi8(ints0, shuffle);
517	ints1 = _mm_shuffle_epi8(ints1, shuffle);
518	ints2 = _mm_shuffle_epi8(ints2, shuffle);
519	ints3 = _mm_shuffle_epi8(ints3, shuffle);
520
521	_mm_store_si128((__m128i*)&dst[i], ints0);
522	_mm_store_si128((__m128i*)&dst[i + `4`], ints1);
523	_mm_store_si128((__m128i*)&dst[i + `8`], ints2);
524	_mm_store_si128((__m128i*)&dst[i + `12`], ints3);
525	})
526	}
527	#endif
528
529	#ifdef SDL_NEON_INTRINSICS
530	static void SDL_Convert_S8_to_F32_NEON(float dst, const* Sint8 src, int* num_samples)
531	{
532	LOG_DEBUG_AUDIO_CONVERT("S8", "F32 (using NEON)");
533
534	CONVERT_16_REV({
535	vst1_lane_f32(&dst[i], vcvt_n_f32_s32(vdup_n_s32(src[i]), `7`), `0`);
536	}, {
537	int8x16_t bytes = vld1q_s8(&src[i]);
538
539	int16x8_t shorts0 = vmovl_s8(vget_low_s8(bytes));
540	int16x8_t shorts1 = vmovl_s8(vget_high_s8(bytes));
541
542	float32x4_t floats0 = vcvtq_n_f32_s32(vmovl_s16(vget_low_s16(shorts0)), `7`);
543	float32x4_t floats1 = vcvtq_n_f32_s32(vmovl_s16(vget_high_s16(shorts0)), `7`);
544	float32x4_t floats2 = vcvtq_n_f32_s32(vmovl_s16(vget_low_s16(shorts1)), `7`);
545	float32x4_t floats3 = vcvtq_n_f32_s32(vmovl_s16(vget_high_s16(shorts1)), `7`);
546
547	vst1q_f32(&dst[i], floats0);
548	vst1q_f32(&dst[i + `4`], floats1);
549	vst1q_f32(&dst[i + `8`], floats2);
550	vst1q_f32(&dst[i + `12`], floats3);
551	})
552	}
553
554	static void SDL_Convert_U8_to_F32_NEON(float dst, const* Uint8 src, int* num_samples)
555	{
556	LOG_DEBUG_AUDIO_CONVERT("U8", "F32 (using NEON)");
557
558	uint8x16_t flipper = vdupq_n_u8(`0x80`);
559
560	CONVERT_16_REV({
561	vst1_lane_f32(&dst[i], vcvt_n_f32_s32(vdup_n_s32((Sint8)(src[i] ^ `0x80`)), `7`), `0`);
562	}, {
563	int8x16_t bytes = vreinterpretq_s8_u8(veorq_u8(vld1q_u8(&src[i]), flipper));
564
565	int16x8_t shorts0 = vmovl_s8(vget_low_s8(bytes));
566	int16x8_t shorts1 = vmovl_s8(vget_high_s8(bytes));
567
568	float32x4_t floats0 = vcvtq_n_f32_s32(vmovl_s16(vget_low_s16(shorts0)), `7`);
569	float32x4_t floats1 = vcvtq_n_f32_s32(vmovl_s16(vget_high_s16(shorts0)), `7`);
570	float32x4_t floats2 = vcvtq_n_f32_s32(vmovl_s16(vget_low_s16(shorts1)), `7`);
571	float32x4_t floats3 = vcvtq_n_f32_s32(vmovl_s16(vget_high_s16(shorts1)), `7`);
572
573	vst1q_f32(&dst[i], floats0);
574	vst1q_f32(&dst[i + `4`], floats1);
575	vst1q_f32(&dst[i + `8`], floats2);
576	vst1q_f32(&dst[i + `12`], floats3);
577	})
578	}
579
580	static void SDL_Convert_S16_to_F32_NEON(float dst, const* Sint16 src, int* num_samples)
581	{
582	LOG_DEBUG_AUDIO_CONVERT("S16", "F32 (using NEON)");
583
584	CONVERT_16_REV({
585	vst1_lane_f32(&dst[i], vcvt_n_f32_s32(vdup_n_s32(src[i]), `15`), `0`);
586	}, {
587	int16x8_t shorts0 = vld1q_s16(&src[i]);
588	int16x8_t shorts1 = vld1q_s16(&src[i + `8`]);
589
590	float32x4_t floats0 = vcvtq_n_f32_s32(vmovl_s16(vget_low_s16(shorts0)), `15`);
591	float32x4_t floats1 = vcvtq_n_f32_s32(vmovl_s16(vget_high_s16(shorts0)), `15`);
592	float32x4_t floats2 = vcvtq_n_f32_s32(vmovl_s16(vget_low_s16(shorts1)), `15`);
593	float32x4_t floats3 = vcvtq_n_f32_s32(vmovl_s16(vget_high_s16(shorts1)), `15`);
594
595	vst1q_f32(&dst[i], floats0);
596	vst1q_f32(&dst[i + `4`], floats1);
597	vst1q_f32(&dst[i + `8`], floats2);
598	vst1q_f32(&dst[i + `12`], floats3);
599	})
600	}
601
602	static void SDL_Convert_S32_to_F32_NEON(float dst, const* Sint32 src, int* num_samples)
603	{
604	LOG_DEBUG_AUDIO_CONVERT("S32", "F32 (using NEON)");
605
606	CONVERT_16_FWD({
607	vst1_lane_f32(&dst[i], vcvt_n_f32_s32(vld1_dup_s32(&src[i]), `31`), `0`);
608	}, {
609	int32x4_t ints0 = vld1q_s32(&src[i]);
610	int32x4_t ints1 = vld1q_s32(&src[i + `4`]);
611	int32x4_t ints2 = vld1q_s32(&src[i + `8`]);
612	int32x4_t ints3 = vld1q_s32(&src[i + `12`]);
613
614	float32x4_t floats0 = vcvtq_n_f32_s32(ints0, `31`);
615	float32x4_t floats1 = vcvtq_n_f32_s32(ints1, `31`);
616	float32x4_t floats2 = vcvtq_n_f32_s32(ints2, `31`);
617	float32x4_t floats3 = vcvtq_n_f32_s32(ints3, `31`);
618
619	vst1q_f32(&dst[i], floats0);
620	vst1q_f32(&dst[i + `4`], floats1);
621	vst1q_f32(&dst[i + `8`], floats2);
622	vst1q_f32(&dst[i + `12`], floats3);
623	})
624	}
625
626	static void SDL_Convert_F32_to_S8_NEON(Sint8 dst, const* float src, int* num_samples)
627	{
628	LOG_DEBUG_AUDIO_CONVERT("F32", "S8 (using NEON)");
629
630	CONVERT_16_FWD({
631	vst1_lane_s8(&dst[i], vreinterpret_s8_s32(vcvt_n_s32_f32(vld1_dup_f32(&src[i]), `31`)), `3`);
632	}, {
633	float32x4_t floats0 = vld1q_f32(&src[i]);
634	float32x4_t floats1 = vld1q_f32(&src[i + `4`]);
635	float32x4_t floats2 = vld1q_f32(&src[i + `8`]);
636	float32x4_t floats3 = vld1q_f32(&src[i + `12`]);
637
638	int32x4_t ints0 = vcvtq_n_s32_f32(floats0, `31`);
639	int32x4_t ints1 = vcvtq_n_s32_f32(floats1, `31`);
640	int32x4_t ints2 = vcvtq_n_s32_f32(floats2, `31`);
641	int32x4_t ints3 = vcvtq_n_s32_f32(floats3, `31`);
642
643	int16x8_t shorts0 = vcombine_s16(vshrn_n_s32(ints0, `16`), vshrn_n_s32(ints1, `16`));
644	int16x8_t shorts1 = vcombine_s16(vshrn_n_s32(ints2, `16`), vshrn_n_s32(ints3, `16`));
645
646	int8x16_t bytes = vcombine_s8(vshrn_n_s16(shorts0, `8`), vshrn_n_s16(shorts1, `8`));
647
648	vst1q_s8(&dst[i], bytes);
649	})
650	}
651
652	static void SDL_Convert_F32_to_U8_NEON(Uint8 dst, const* float src, int* num_samples)
653	{
654	LOG_DEBUG_AUDIO_CONVERT("F32", "U8 (using NEON)");
655
656	uint8x16_t flipper = vdupq_n_u8(`0x80`);
657
658	CONVERT_16_FWD({
659	vst1_lane_u8(&dst[i],
660	veor_u8(vreinterpret_u8_s32(vcvt_n_s32_f32(vld1_dup_f32(&src[i]), `31`)),
661	vget_low_u8(flipper)), `3`);
662	}, {
663	float32x4_t floats0 = vld1q_f32(&src[i]);
664	float32x4_t floats1 = vld1q_f32(&src[i + `4`]);
665	float32x4_t floats2 = vld1q_f32(&src[i + `8`]);
666	float32x4_t floats3 = vld1q_f32(&src[i + `12`]);
667
668	int32x4_t ints0 = vcvtq_n_s32_f32(floats0, `31`);
669	int32x4_t ints1 = vcvtq_n_s32_f32(floats1, `31`);
670	int32x4_t ints2 = vcvtq_n_s32_f32(floats2, `31`);
671	int32x4_t ints3 = vcvtq_n_s32_f32(floats3, `31`);
672
673	int16x8_t shorts0 = vcombine_s16(vshrn_n_s32(ints0, `16`), vshrn_n_s32(ints1, `16`));
674	int16x8_t shorts1 = vcombine_s16(vshrn_n_s32(ints2, `16`), vshrn_n_s32(ints3, `16`));
675
676	uint8x16_t bytes = veorq_u8(vreinterpretq_u8_s8(
677	vcombine_s8(vshrn_n_s16(shorts0, `8`), vshrn_n_s16(shorts1, `8`))),
678	flipper);
679
680	vst1q_u8(&dst[i], bytes);
681	})
682	}
683
684	static void SDL_Convert_F32_to_S16_NEON(Sint16 dst, const* float src, int* num_samples)
685	{
686	LOG_DEBUG_AUDIO_CONVERT("F32", "S16 (using NEON)");
687
688	CONVERT_16_FWD({
689	vst1_lane_s16(&dst[i], vreinterpret_s16_s32(vcvt_n_s32_f32(vld1_dup_f32(&src[i]), `31`)), `1`);
690	}, {
691	float32x4_t floats0 = vld1q_f32(&src[i]);
692	float32x4_t floats1 = vld1q_f32(&src[i + `4`]);
693	float32x4_t floats2 = vld1q_f32(&src[i + `8`]);
694	float32x4_t floats3 = vld1q_f32(&src[i + `12`]);
695
696	int32x4_t ints0 = vcvtq_n_s32_f32(floats0, `31`);
697	int32x4_t ints1 = vcvtq_n_s32_f32(floats1, `31`);
698	int32x4_t ints2 = vcvtq_n_s32_f32(floats2, `31`);
699	int32x4_t ints3 = vcvtq_n_s32_f32(floats3, `31`);
700
701	int16x8_t shorts0 = vcombine_s16(vshrn_n_s32(ints0, `16`), vshrn_n_s32(ints1, `16`));
702	int16x8_t shorts1 = vcombine_s16(vshrn_n_s32(ints2, `16`), vshrn_n_s32(ints3, `16`));
703
704	vst1q_s16(&dst[i], shorts0);
705	vst1q_s16(&dst[i + `8`], shorts1);
706	})
707	}
708
709	static void SDL_Convert_F32_to_S32_NEON(Sint32 dst, const* float src, int* num_samples)
710	{
711	LOG_DEBUG_AUDIO_CONVERT("F32", "S32 (using NEON)");
712
713	CONVERT_16_FWD({
714	vst1_lane_s32(&dst[i], vcvt_n_s32_f32(vld1_dup_f32(&src[i]), `31`), `0`);
715	}, {
716	float32x4_t floats0 = vld1q_f32(&src[i]);
717	float32x4_t floats1 = vld1q_f32(&src[i + `4`]);
718	float32x4_t floats2 = vld1q_f32(&src[i + `8`]);
719	float32x4_t floats3 = vld1q_f32(&src[i + `12`]);
720
721	int32x4_t ints0 = vcvtq_n_s32_f32(floats0, `31`);
722	int32x4_t ints1 = vcvtq_n_s32_f32(floats1, `31`);
723	int32x4_t ints2 = vcvtq_n_s32_f32(floats2, `31`);
724	int32x4_t ints3 = vcvtq_n_s32_f32(floats3, `31`);
725
726	vst1q_s32(&dst[i], ints0);
727	vst1q_s32(&dst[i + `4`], ints1);
728	vst1q_s32(&dst[i + `8`], ints2);
729	vst1q_s32(&dst[i + `12`], ints3);
730	})
731	}
732
733	static void SDL_Convert_Swap16_NEON(Uint16* dst, const Uint16* src, int num_samples)
734	{
735	CONVERT_16_FWD({
736	dst[i] = SDL_Swap16(src[i]);
737	}, {
738	uint8x16_t ints0 = vld1q_u8((const Uint8*)&src[i]);
739	uint8x16_t ints1 = vld1q_u8((const Uint8*)&src[i + `8`]);
740
741	ints0 = vrev16q_u8(ints0);
742	ints1 = vrev16q_u8(ints1);
743
744	vst1q_u8((Uint8*)&dst[i], ints0);
745	vst1q_u8((Uint8*)&dst[i + `8`], ints1);
746	})
747	}
748
749	static void SDL_Convert_Swap32_NEON(Uint32* dst, const Uint32* src, int num_samples)
750	{
751	CONVERT_16_FWD({
752	dst[i] = SDL_Swap32(src[i]);
753	}, {
754	uint8x16_t ints0 = vld1q_u8((const Uint8*)&src[i]);
755	uint8x16_t ints1 = vld1q_u8((const Uint8*)&src[i + `4`]);
756	uint8x16_t ints2 = vld1q_u8((const Uint8*)&src[i + `8`]);
757	uint8x16_t ints3 = vld1q_u8((const Uint8*)&src[i + `12`]);
758
759	ints0 = vrev32q_u8(ints0);
760	ints1 = vrev32q_u8(ints1);
761	ints2 = vrev32q_u8(ints2);
762	ints3 = vrev32q_u8(ints3);
763
764	vst1q_u8((Uint8*)&dst[i], ints0);
765	vst1q_u8((Uint8*)&dst[i + `4`], ints1);
766	vst1q_u8((Uint8*)&dst[i + `8`], ints2);
767	vst1q_u8((Uint8*)&dst[i + `12`], ints3);
768	})
769	}
770	#endif
771
772	#undef CONVERT_16_FWD
773	#undef CONVERT_16_REV
774
775	// Function pointers set to a CPU-specific implementation.
776	static void (SDL_Convert_S8_to_F32)(float* dst, const* Sint8 src, int* num_samples) = NULL;
777	static void (SDL_Convert_U8_to_F32)(float* dst, const* Uint8 src, int* num_samples) = NULL;
778	static void (SDL_Convert_S16_to_F32)(float* dst, const* Sint16 src, int* num_samples) = NULL;
779	static void (SDL_Convert_S32_to_F32)(float* dst, const* Sint32 src, int* num_samples) = NULL;
780	static void (SDL_Convert_F32_to_S8)(Sint8 dst, const float src, int* num_samples) = NULL;
781	static void (SDL_Convert_F32_to_U8)(Uint8 dst, const float src, int* num_samples) = NULL;
782	static void (SDL_Convert_F32_to_S16)(Sint16 dst, const float src, int* num_samples) = NULL;
783	static void (SDL_Convert_F32_to_S32)(Sint32 dst, const float src, int* num_samples) = NULL;
784
785	static void (SDL_Convert_Swap16)(Uint16 dst, const Uint16* src, int num_samples) = NULL;
786	static void (SDL_Convert_Swap32)(Uint32 dst, const Uint32* src, int num_samples) = NULL;
787
788	void ConvertAudioToFloat(float dst, const* void src, int* num_samples, SDL_AudioFormat src_fmt)
789	{
790	switch (src_fmt) {
791	case SDL_AUDIO_S8:
792	SDL_Convert_S8_to_F32(dst, (const Sint8 *) src, num_samples);
793	break;
794
795	case SDL_AUDIO_U8:
796	SDL_Convert_U8_to_F32(dst, (const Uint8 *) src, num_samples);
797	break;
798
799	case SDL_AUDIO_S16:
800	SDL_Convert_S16_to_F32(dst, (const Sint16 *) src, num_samples);
801	break;
802
803	case SDL_AUDIO_S16 ^ SDL_AUDIO_MASK_BIG_ENDIAN:
804	SDL_Convert_Swap16((Uint16) dst, (const* Uint16*) src, num_samples);
805	SDL_Convert_S16_to_F32(dst, (const Sint16 *) dst, num_samples);
806	break;
807
808	case SDL_AUDIO_S32:
809	SDL_Convert_S32_to_F32(dst, (const Sint32 *) src, num_samples);
810	break;
811
812	case SDL_AUDIO_S32 ^ SDL_AUDIO_MASK_BIG_ENDIAN:
813	SDL_Convert_Swap32((Uint32) dst, (const* Uint32*) src, num_samples);
814	SDL_Convert_S32_to_F32(dst, (const Sint32 *) dst, num_samples);
815	break;
816
817	case SDL_AUDIO_F32 ^ SDL_AUDIO_MASK_BIG_ENDIAN:
818	SDL_Convert_Swap32((Uint32) dst, (const* Uint32*) src, num_samples);
819	break;
820
821	default: SDL_assert(!"Unexpected audio format!"); break;
822	}
823	}
824
825	void ConvertAudioFromFloat(void dst, const* float src, int* num_samples, SDL_AudioFormat dst_fmt)
826	{
827	switch (dst_fmt) {
828	case SDL_AUDIO_S8:
829	SDL_Convert_F32_to_S8((Sint8 *) dst, src, num_samples);
830	break;
831
832	case SDL_AUDIO_U8:
833	SDL_Convert_F32_to_U8((Uint8 *) dst, src, num_samples);
834	break;
835
836	case SDL_AUDIO_S16:
837	SDL_Convert_F32_to_S16((Sint16 *) dst, src, num_samples);
838	break;
839
840	case SDL_AUDIO_S16 ^ SDL_AUDIO_MASK_BIG_ENDIAN:
841	SDL_Convert_F32_to_S16((Sint16 *) dst, src, num_samples);
842	SDL_Convert_Swap16((Uint16) dst, (const* Uint16*) dst, num_samples);
843	break;
844
845	case SDL_AUDIO_S32:
846	SDL_Convert_F32_to_S32((Sint32 *) dst, src, num_samples);
847	break;
848
849	case SDL_AUDIO_S32 ^ SDL_AUDIO_MASK_BIG_ENDIAN:
850	SDL_Convert_F32_to_S32((Sint32 *) dst, src, num_samples);
851	SDL_Convert_Swap32((Uint32) dst, (const* Uint32*) dst, num_samples);
852	break;
853
854	case SDL_AUDIO_F32 ^ SDL_AUDIO_MASK_BIG_ENDIAN:
855	SDL_Convert_Swap32((Uint32) dst, (const* Uint32*) src, num_samples);
856	break;
857
858	default: SDL_assert(!"Unexpected audio format!"); break;
859	}
860	}
861
862	void ConvertAudioSwapEndian(void* dst, const void* src, int num_samples, int bitsize)
863	{
864	switch (bitsize) {
865	case `16`: SDL_Convert_Swap16((Uint16) dst, (const* Uint16) src, num_samples); break*;
866	case `32`: SDL_Convert_Swap32((Uint32) dst, (const* Uint32) src, num_samples); break*;
867	default: SDL_assert(!"Unexpected audio format!"); break;
868	}
869	}
870
871	void SDL_ChooseAudioConverters(void)
872	{
873	static bool converters_chosen = false;
874	if (converters_chosen) {
875	return;
876	}
877
878	#define SET_CONVERTER_FUNCS(fntype) \
879	SDL_Convert_Swap16 = SDL_Convert_Swap16_##fntype; \
880	SDL_Convert_Swap32 = SDL_Convert_Swap32_##fntype;
881
882	#ifdef SDL_SSE4_1_INTRINSICS
883	if (SDL_HasSSE41()) {
884	SET_CONVERTER_FUNCS(SSSE3);
885	} else
886	#endif
887	#ifdef SDL_NEON_INTRINSICS
888	if (SDL_HasNEON()) {
889	SET_CONVERTER_FUNCS(NEON);
890	} else
891	#endif
892	{
893	SET_CONVERTER_FUNCS(Scalar);
894	}
895
896	#undef SET_CONVERTER_FUNCS
897
898	#define SET_CONVERTER_FUNCS(fntype) \
899	SDL_Convert_S8_to_F32 = SDL_Convert_S8_to_F32_##fntype; \
900	SDL_Convert_U8_to_F32 = SDL_Convert_U8_to_F32_##fntype; \
901	SDL_Convert_S16_to_F32 = SDL_Convert_S16_to_F32_##fntype; \
902	SDL_Convert_S32_to_F32 = SDL_Convert_S32_to_F32_##fntype; \
903	SDL_Convert_F32_to_S8 = SDL_Convert_F32_to_S8_##fntype; \
904	SDL_Convert_F32_to_U8 = SDL_Convert_F32_to_U8_##fntype; \
905	SDL_Convert_F32_to_S16 = SDL_Convert_F32_to_S16_##fntype; \
906	SDL_Convert_F32_to_S32 = SDL_Convert_F32_to_S32_##fntype; \
907
908	#ifdef SDL_SSE2_INTRINSICS
909	if (SDL_HasSSE2()) {
910	SET_CONVERTER_FUNCS(SSE2);
911	} else
912	#endif
913	#ifdef SDL_NEON_INTRINSICS
914	if (SDL_HasNEON()) {
915	SET_CONVERTER_FUNCS(NEON);
916	} else
917	#endif
918	{
919	SET_CONVERTER_FUNCS(Scalar);
920	}
921
922	#undef SET_CONVERTER_FUNCS
923
924	converters_chosen = true;
925	}
926

Browse the source code of SDL/src/audio/SDL_audiotypecvt.c