1 | /* blip_buf $vers. http://www.slack.net/~ant/ */ |
2 | |
3 | #include "blip_buf.h" |
4 | |
5 | #include <assert.h> |
6 | #include <limits.h> |
7 | #include <string.h> |
8 | #include <stdlib.h> |
9 | |
10 | /* Library Copyright (C) 2003-2009 Shay Green. This library is free software; |
11 | you can redistribute it and/or modify it under the terms of the GNU Lesser |
12 | General Public License as published by the Free Software Foundation; either |
13 | version 2.1 of the License, or (at your option) any later version. This |
14 | library is distributed in the hope that it will be useful, but WITHOUT ANY |
15 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR |
16 | A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more |
17 | details. You should have received a copy of the GNU Lesser General Public |
18 | License along with this module; if not, write to the Free Software Foundation, |
19 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ |
20 | |
21 | #if defined (BLARGG_TEST) && BLARGG_TEST |
22 | #include "blargg_test.h" |
23 | #endif |
24 | |
25 | /* Equivalent to ULONG_MAX >= 0xFFFFFFFF00000000. |
26 | Avoids constants that don't fit in 32 bits. */ |
27 | #if ULONG_MAX/0xFFFFFFFF > 0xFFFFFFFF |
28 | typedef unsigned long fixed_t; |
29 | enum { pre_shift = 32 }; |
30 | |
31 | #elif defined(ULLONG_MAX) |
32 | typedef unsigned long long fixed_t; |
33 | enum { pre_shift = 32 }; |
34 | |
35 | #else |
36 | typedef unsigned fixed_t; |
37 | enum { pre_shift = 0 }; |
38 | |
39 | #endif |
40 | |
41 | enum { time_bits = pre_shift + 20 }; |
42 | |
43 | static fixed_t const time_unit = (fixed_t) 1 << time_bits; |
44 | |
45 | enum { bass_shift = 9 }; /* affects high-pass filter breakpoint frequency */ |
46 | enum { = 2 }; /* allows deltas slightly after frame length */ |
47 | |
48 | enum { half_width = 8 }; |
49 | enum { = half_width*2 + end_frame_extra }; |
50 | enum { phase_bits = 5 }; |
51 | enum { phase_count = 1 << phase_bits }; |
52 | enum { delta_bits = 15 }; |
53 | enum { delta_unit = 1 << delta_bits }; |
54 | enum { frac_bits = time_bits - pre_shift }; |
55 | |
56 | /* We could eliminate avail and encode whole samples in offset, but that would |
57 | limit the total buffered samples to blip_max_frame. That could only be |
58 | increased by decreasing time_bits, which would reduce resample ratio accuracy. |
59 | */ |
60 | |
61 | /** Sample buffer that resamples to output rate and accumulates samples |
62 | until they're read out */ |
63 | struct blip_t |
64 | { |
65 | fixed_t factor; |
66 | fixed_t offset; |
67 | int avail; |
68 | int size; |
69 | int integrator; |
70 | }; |
71 | |
72 | typedef int buf_t; |
73 | |
74 | /* probably not totally portable */ |
75 | #define SAMPLES( buf ) ((buf_t*) ((buf) + 1)) |
76 | |
77 | /* Arithmetic (sign-preserving) right shift */ |
78 | #define ARITH_SHIFT( n, shift ) \ |
79 | ((n) >> (shift)) |
80 | |
81 | enum { max_sample = +32767 }; |
82 | enum { min_sample = -32768 }; |
83 | |
84 | #define CLAMP( n ) \ |
85 | {\ |
86 | if ( (short) n != n )\ |
87 | n = ARITH_SHIFT( n, 16 ) ^ max_sample;\ |
88 | } |
89 | |
90 | static void check_assumptions( void ) |
91 | { |
92 | int n; |
93 | |
94 | #if INT_MAX < 0x7FFFFFFF || UINT_MAX < 0xFFFFFFFF |
95 | #error "int must be at least 32 bits" |
96 | #endif |
97 | |
98 | assert( (-3 >> 1) == -2 ); /* right shift must preserve sign */ |
99 | |
100 | n = max_sample * 2; |
101 | CLAMP( n ); |
102 | assert( n == max_sample ); |
103 | |
104 | n = min_sample * 2; |
105 | CLAMP( n ); |
106 | assert( n == min_sample ); |
107 | |
108 | assert( blip_max_ratio <= time_unit ); |
109 | assert( blip_max_frame <= (fixed_t) -1 >> time_bits ); |
110 | } |
111 | |
112 | blip_t* blip_new( int size ) |
113 | { |
114 | blip_t* m; |
115 | assert( size >= 0 ); |
116 | |
117 | m = (blip_t*) malloc( sizeof *m + (size + buf_extra) * sizeof (buf_t) ); |
118 | if ( m ) |
119 | { |
120 | m->factor = time_unit / blip_max_ratio; |
121 | m->size = size; |
122 | blip_clear( m ); |
123 | check_assumptions(); |
124 | } |
125 | return m; |
126 | } |
127 | |
128 | void blip_delete( blip_t* m ) |
129 | { |
130 | if ( m != NULL ) |
131 | { |
132 | /* Clear fields in case user tries to use after freeing */ |
133 | memset( m, 0, sizeof *m ); |
134 | free( m ); |
135 | } |
136 | } |
137 | |
138 | void blip_set_rates( blip_t* m, double clock_rate, double sample_rate ) |
139 | { |
140 | double factor = time_unit * sample_rate / clock_rate; |
141 | m->factor = (fixed_t) factor; |
142 | |
143 | /* Fails if clock_rate exceeds maximum, relative to sample_rate */ |
144 | assert( 0 <= factor - m->factor && factor - m->factor < 1 ); |
145 | |
146 | /* Avoid requiring math.h. Equivalent to |
147 | m->factor = (int) ceil( factor ) */ |
148 | if ( m->factor < factor ) |
149 | m->factor++; |
150 | |
151 | /* At this point, factor is most likely rounded up, but could still |
152 | have been rounded down in the floating-point calculation. */ |
153 | } |
154 | |
155 | void blip_clear( blip_t* m ) |
156 | { |
157 | /* We could set offset to 0, factor/2, or factor-1. 0 is suitable if |
158 | factor is rounded up. factor-1 is suitable if factor is rounded down. |
159 | Since we don't know rounding direction, factor/2 accommodates either, |
160 | with the slight loss of showing an error in half the time. Since for |
161 | a 64-bit factor this is years, the halving isn't a problem. */ |
162 | |
163 | m->offset = m->factor / 2; |
164 | m->avail = 0; |
165 | m->integrator = 0; |
166 | memset( SAMPLES( m ), 0, (m->size + buf_extra) * sizeof (buf_t) ); |
167 | } |
168 | |
169 | int blip_clocks_needed( const blip_t* m, int samples ) |
170 | { |
171 | fixed_t needed; |
172 | |
173 | /* Fails if buffer can't hold that many more samples */ |
174 | assert( samples >= 0 && m->avail + samples <= m->size ); |
175 | |
176 | needed = (fixed_t) samples * time_unit; |
177 | if ( needed < m->offset ) |
178 | return 0; |
179 | |
180 | return (needed - m->offset + m->factor - 1) / m->factor; |
181 | } |
182 | |
183 | void blip_end_frame( blip_t* m, unsigned t ) |
184 | { |
185 | fixed_t off = t * m->factor + m->offset; |
186 | m->avail += off >> time_bits; |
187 | m->offset = off & (time_unit - 1); |
188 | |
189 | /* Fails if buffer size was exceeded */ |
190 | assert( m->avail <= m->size ); |
191 | } |
192 | |
193 | int blip_samples_avail( const blip_t* m ) |
194 | { |
195 | return m->avail; |
196 | } |
197 | |
198 | static void remove_samples( blip_t* m, int count ) |
199 | { |
200 | buf_t* buf = SAMPLES( m ); |
201 | int remain = m->avail + buf_extra - count; |
202 | m->avail -= count; |
203 | |
204 | memmove( &buf [0], &buf [count], remain * sizeof buf [0] ); |
205 | memset( &buf [remain], 0, count * sizeof buf [0] ); |
206 | } |
207 | |
208 | int blip_read_samples( blip_t* m, short out [], int count, int stereo ) |
209 | { |
210 | assert( count >= 0 ); |
211 | |
212 | if ( count > m->avail ) |
213 | count = m->avail; |
214 | |
215 | if ( count ) |
216 | { |
217 | int const step = stereo ? 2 : 1; |
218 | buf_t const* in = SAMPLES( m ); |
219 | buf_t const* end = in + count; |
220 | int sum = m->integrator; |
221 | do |
222 | { |
223 | /* Eliminate fraction */ |
224 | int s = ARITH_SHIFT( sum, delta_bits ); |
225 | |
226 | sum += *in++; |
227 | |
228 | CLAMP( s ); |
229 | |
230 | *out = s; |
231 | out += step; |
232 | |
233 | /* High-pass filter */ |
234 | sum -= s << (delta_bits - bass_shift); |
235 | } |
236 | while ( in != end ); |
237 | m->integrator = sum; |
238 | |
239 | remove_samples( m, count ); |
240 | } |
241 | |
242 | return count; |
243 | } |
244 | |
245 | /* Things that didn't help performance on x86: |
246 | __attribute__((aligned(128))) |
247 | #define short int |
248 | restrict |
249 | */ |
250 | |
251 | /* Sinc_Generator( 0.9, 0.55, 4.5 ) */ |
252 | static short const bl_step [phase_count + 1] [half_width] = |
253 | { |
254 | { 43, -115, 350, -488, 1136, -914, 5861,21022}, |
255 | { 44, -118, 348, -473, 1076, -799, 5274,21001}, |
256 | { 45, -121, 344, -454, 1011, -677, 4706,20936}, |
257 | { 46, -122, 336, -431, 942, -549, 4156,20829}, |
258 | { 47, -123, 327, -404, 868, -418, 3629,20679}, |
259 | { 47, -122, 316, -375, 792, -285, 3124,20488}, |
260 | { 47, -120, 303, -344, 714, -151, 2644,20256}, |
261 | { 46, -117, 289, -310, 634, -17, 2188,19985}, |
262 | { 46, -114, 273, -275, 553, 117, 1758,19675}, |
263 | { 44, -108, 255, -237, 471, 247, 1356,19327}, |
264 | { 43, -103, 237, -199, 390, 373, 981,18944}, |
265 | { 42, -98, 218, -160, 310, 495, 633,18527}, |
266 | { 40, -91, 198, -121, 231, 611, 314,18078}, |
267 | { 38, -84, 178, -81, 153, 722, 22,17599}, |
268 | { 36, -76, 157, -43, 80, 824, -241,17092}, |
269 | { 34, -68, 135, -3, 8, 919, -476,16558}, |
270 | { 32, -61, 115, 34, -60, 1006, -683,16001}, |
271 | { 29, -52, 94, 70, -123, 1083, -862,15422}, |
272 | { 27, -44, 73, 106, -184, 1152,-1015,14824}, |
273 | { 25, -36, 53, 139, -239, 1211,-1142,14210}, |
274 | { 22, -27, 34, 170, -290, 1261,-1244,13582}, |
275 | { 20, -20, 16, 199, -335, 1301,-1322,12942}, |
276 | { 18, -12, -3, 226, -375, 1331,-1376,12293}, |
277 | { 15, -4, -19, 250, -410, 1351,-1408,11638}, |
278 | { 13, 3, -35, 272, -439, 1361,-1419,10979}, |
279 | { 11, 9, -49, 292, -464, 1362,-1410,10319}, |
280 | { 9, 16, -63, 309, -483, 1354,-1383, 9660}, |
281 | { 7, 22, -75, 322, -496, 1337,-1339, 9005}, |
282 | { 6, 26, -85, 333, -504, 1312,-1280, 8355}, |
283 | { 4, 31, -94, 341, -507, 1278,-1205, 7713}, |
284 | { 3, 35, -102, 347, -506, 1238,-1119, 7082}, |
285 | { 1, 40, -110, 350, -499, 1190,-1021, 6464}, |
286 | { 0, 43, -115, 350, -488, 1136, -914, 5861} |
287 | }; |
288 | |
289 | /* Shifting by pre_shift allows calculation using unsigned int rather than |
290 | possibly-wider fixed_t. On 32-bit platforms, this is likely more efficient. |
291 | And by having pre_shift 32, a 32-bit platform can easily do the shift by |
292 | simply ignoring the low half. */ |
293 | |
294 | void blip_add_delta( blip_t* m, unsigned time, int delta ) |
295 | { |
296 | unsigned fixed = (unsigned) ((time * m->factor + m->offset) >> pre_shift); |
297 | buf_t* out = SAMPLES( m ) + m->avail + (fixed >> frac_bits); |
298 | |
299 | int const phase_shift = frac_bits - phase_bits; |
300 | int phase = fixed >> phase_shift & (phase_count - 1); |
301 | short const* in = bl_step [phase]; |
302 | short const* rev = bl_step [phase_count - phase]; |
303 | |
304 | int interp = fixed >> (phase_shift - delta_bits) & (delta_unit - 1); |
305 | int delta2 = (delta * interp) >> delta_bits; |
306 | delta -= delta2; |
307 | |
308 | /* Fails if buffer size was exceeded */ |
309 | assert( out <= &SAMPLES( m ) [m->size + end_frame_extra] ); |
310 | |
311 | out [0] += in[0]*delta + in[half_width+0]*delta2; |
312 | out [1] += in[1]*delta + in[half_width+1]*delta2; |
313 | out [2] += in[2]*delta + in[half_width+2]*delta2; |
314 | out [3] += in[3]*delta + in[half_width+3]*delta2; |
315 | out [4] += in[4]*delta + in[half_width+4]*delta2; |
316 | out [5] += in[5]*delta + in[half_width+5]*delta2; |
317 | out [6] += in[6]*delta + in[half_width+6]*delta2; |
318 | out [7] += in[7]*delta + in[half_width+7]*delta2; |
319 | |
320 | in = rev; |
321 | out [ 8] += in[7]*delta + in[7-half_width]*delta2; |
322 | out [ 9] += in[6]*delta + in[6-half_width]*delta2; |
323 | out [10] += in[5]*delta + in[5-half_width]*delta2; |
324 | out [11] += in[4]*delta + in[4-half_width]*delta2; |
325 | out [12] += in[3]*delta + in[3-half_width]*delta2; |
326 | out [13] += in[2]*delta + in[2-half_width]*delta2; |
327 | out [14] += in[1]*delta + in[1-half_width]*delta2; |
328 | out [15] += in[0]*delta + in[0-half_width]*delta2; |
329 | } |
330 | |
331 | void blip_add_delta_fast( blip_t* m, unsigned time, int delta ) |
332 | { |
333 | unsigned fixed = (unsigned) ((time * m->factor + m->offset) >> pre_shift); |
334 | buf_t* out = SAMPLES( m ) + m->avail + (fixed >> frac_bits); |
335 | |
336 | int interp = fixed >> (frac_bits - delta_bits) & (delta_unit - 1); |
337 | int delta2 = delta * interp; |
338 | |
339 | /* Fails if buffer size was exceeded */ |
340 | assert( out <= &SAMPLES( m ) [m->size + end_frame_extra] ); |
341 | |
342 | out [7] += delta * delta_unit - delta2; |
343 | out [8] += delta2; |
344 | } |
345 | |