1/* blip_buf $vers. http://www.slack.net/~ant/ */
2
3#include "blip_buf.h"
4
5#include <assert.h>
6#include <limits.h>
7#include <string.h>
8#include <stdlib.h>
9
10/* Library Copyright (C) 2003-2009 Shay Green. This library is free software;
11you can redistribute it and/or modify it under the terms of the GNU Lesser
12General Public License as published by the Free Software Foundation; either
13version 2.1 of the License, or (at your option) any later version. This
14library is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
16A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
17details. You should have received a copy of the GNU Lesser General Public
18License along with this module; if not, write to the Free Software Foundation,
19Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
20
21#if defined (BLARGG_TEST) && BLARGG_TEST
22 #include "blargg_test.h"
23#endif
24
25/* Equivalent to ULONG_MAX >= 0xFFFFFFFF00000000.
26Avoids constants that don't fit in 32 bits. */
27#if ULONG_MAX/0xFFFFFFFF > 0xFFFFFFFF
28 typedef unsigned long fixed_t;
29 enum { pre_shift = 32 };
30
31#elif defined(ULLONG_MAX)
32 typedef unsigned long long fixed_t;
33 enum { pre_shift = 32 };
34
35#else
36 typedef unsigned fixed_t;
37 enum { pre_shift = 0 };
38
39#endif
40
41enum { time_bits = pre_shift + 20 };
42
43static fixed_t const time_unit = (fixed_t) 1 << time_bits;
44
45enum { bass_shift = 9 }; /* affects high-pass filter breakpoint frequency */
46enum { end_frame_extra = 2 }; /* allows deltas slightly after frame length */
47
48enum { half_width = 8 };
49enum { buf_extra = half_width*2 + end_frame_extra };
50enum { phase_bits = 5 };
51enum { phase_count = 1 << phase_bits };
52enum { delta_bits = 15 };
53enum { delta_unit = 1 << delta_bits };
54enum { frac_bits = time_bits - pre_shift };
55
56/* We could eliminate avail and encode whole samples in offset, but that would
57limit the total buffered samples to blip_max_frame. That could only be
58increased by decreasing time_bits, which would reduce resample ratio accuracy.
59*/
60
61/** Sample buffer that resamples to output rate and accumulates samples
62until they're read out */
63struct blip_t
64{
65 fixed_t factor;
66 fixed_t offset;
67 int avail;
68 int size;
69 int integrator;
70};
71
72typedef int buf_t;
73
74/* probably not totally portable */
75#define SAMPLES( buf ) ((buf_t*) ((buf) + 1))
76
77/* Arithmetic (sign-preserving) right shift */
78#define ARITH_SHIFT( n, shift ) \
79 ((n) >> (shift))
80
81enum { max_sample = +32767 };
82enum { min_sample = -32768 };
83
84#define CLAMP( n ) \
85 {\
86 if ( (short) n != n )\
87 n = ARITH_SHIFT( n, 16 ) ^ max_sample;\
88 }
89
90static void check_assumptions( void )
91{
92 int n;
93
94 #if INT_MAX < 0x7FFFFFFF || UINT_MAX < 0xFFFFFFFF
95 #error "int must be at least 32 bits"
96 #endif
97
98 assert( (-3 >> 1) == -2 ); /* right shift must preserve sign */
99
100 n = max_sample * 2;
101 CLAMP( n );
102 assert( n == max_sample );
103
104 n = min_sample * 2;
105 CLAMP( n );
106 assert( n == min_sample );
107
108 assert( blip_max_ratio <= time_unit );
109 assert( blip_max_frame <= (fixed_t) -1 >> time_bits );
110}
111
112blip_t* blip_new( int size )
113{
114 blip_t* m;
115 assert( size >= 0 );
116
117 m = (blip_t*) malloc( sizeof *m + (size + buf_extra) * sizeof (buf_t) );
118 if ( m )
119 {
120 m->factor = time_unit / blip_max_ratio;
121 m->size = size;
122 blip_clear( m );
123 check_assumptions();
124 }
125 return m;
126}
127
128void blip_delete( blip_t* m )
129{
130 if ( m != NULL )
131 {
132 /* Clear fields in case user tries to use after freeing */
133 memset( m, 0, sizeof *m );
134 free( m );
135 }
136}
137
138void blip_set_rates( blip_t* m, double clock_rate, double sample_rate )
139{
140 double factor = time_unit * sample_rate / clock_rate;
141 m->factor = (fixed_t) factor;
142
143 /* Fails if clock_rate exceeds maximum, relative to sample_rate */
144 assert( 0 <= factor - m->factor && factor - m->factor < 1 );
145
146 /* Avoid requiring math.h. Equivalent to
147 m->factor = (int) ceil( factor ) */
148 if ( m->factor < factor )
149 m->factor++;
150
151 /* At this point, factor is most likely rounded up, but could still
152 have been rounded down in the floating-point calculation. */
153}
154
155void blip_clear( blip_t* m )
156{
157 /* We could set offset to 0, factor/2, or factor-1. 0 is suitable if
158 factor is rounded up. factor-1 is suitable if factor is rounded down.
159 Since we don't know rounding direction, factor/2 accommodates either,
160 with the slight loss of showing an error in half the time. Since for
161 a 64-bit factor this is years, the halving isn't a problem. */
162
163 m->offset = m->factor / 2;
164 m->avail = 0;
165 m->integrator = 0;
166 memset( SAMPLES( m ), 0, (m->size + buf_extra) * sizeof (buf_t) );
167}
168
169int blip_clocks_needed( const blip_t* m, int samples )
170{
171 fixed_t needed;
172
173 /* Fails if buffer can't hold that many more samples */
174 assert( samples >= 0 && m->avail + samples <= m->size );
175
176 needed = (fixed_t) samples * time_unit;
177 if ( needed < m->offset )
178 return 0;
179
180 return (needed - m->offset + m->factor - 1) / m->factor;
181}
182
183void blip_end_frame( blip_t* m, unsigned t )
184{
185 fixed_t off = t * m->factor + m->offset;
186 m->avail += off >> time_bits;
187 m->offset = off & (time_unit - 1);
188
189 /* Fails if buffer size was exceeded */
190 assert( m->avail <= m->size );
191}
192
193int blip_samples_avail( const blip_t* m )
194{
195 return m->avail;
196}
197
198static void remove_samples( blip_t* m, int count )
199{
200 buf_t* buf = SAMPLES( m );
201 int remain = m->avail + buf_extra - count;
202 m->avail -= count;
203
204 memmove( &buf [0], &buf [count], remain * sizeof buf [0] );
205 memset( &buf [remain], 0, count * sizeof buf [0] );
206}
207
208int blip_read_samples( blip_t* m, short out [], int count, int stereo )
209{
210 assert( count >= 0 );
211
212 if ( count > m->avail )
213 count = m->avail;
214
215 if ( count )
216 {
217 int const step = stereo ? 2 : 1;
218 buf_t const* in = SAMPLES( m );
219 buf_t const* end = in + count;
220 int sum = m->integrator;
221 do
222 {
223 /* Eliminate fraction */
224 int s = ARITH_SHIFT( sum, delta_bits );
225
226 sum += *in++;
227
228 CLAMP( s );
229
230 *out = s;
231 out += step;
232
233 /* High-pass filter */
234 sum -= s << (delta_bits - bass_shift);
235 }
236 while ( in != end );
237 m->integrator = sum;
238
239 remove_samples( m, count );
240 }
241
242 return count;
243}
244
245/* Things that didn't help performance on x86:
246 __attribute__((aligned(128)))
247 #define short int
248 restrict
249*/
250
251/* Sinc_Generator( 0.9, 0.55, 4.5 ) */
252static short const bl_step [phase_count + 1] [half_width] =
253{
254{ 43, -115, 350, -488, 1136, -914, 5861,21022},
255{ 44, -118, 348, -473, 1076, -799, 5274,21001},
256{ 45, -121, 344, -454, 1011, -677, 4706,20936},
257{ 46, -122, 336, -431, 942, -549, 4156,20829},
258{ 47, -123, 327, -404, 868, -418, 3629,20679},
259{ 47, -122, 316, -375, 792, -285, 3124,20488},
260{ 47, -120, 303, -344, 714, -151, 2644,20256},
261{ 46, -117, 289, -310, 634, -17, 2188,19985},
262{ 46, -114, 273, -275, 553, 117, 1758,19675},
263{ 44, -108, 255, -237, 471, 247, 1356,19327},
264{ 43, -103, 237, -199, 390, 373, 981,18944},
265{ 42, -98, 218, -160, 310, 495, 633,18527},
266{ 40, -91, 198, -121, 231, 611, 314,18078},
267{ 38, -84, 178, -81, 153, 722, 22,17599},
268{ 36, -76, 157, -43, 80, 824, -241,17092},
269{ 34, -68, 135, -3, 8, 919, -476,16558},
270{ 32, -61, 115, 34, -60, 1006, -683,16001},
271{ 29, -52, 94, 70, -123, 1083, -862,15422},
272{ 27, -44, 73, 106, -184, 1152,-1015,14824},
273{ 25, -36, 53, 139, -239, 1211,-1142,14210},
274{ 22, -27, 34, 170, -290, 1261,-1244,13582},
275{ 20, -20, 16, 199, -335, 1301,-1322,12942},
276{ 18, -12, -3, 226, -375, 1331,-1376,12293},
277{ 15, -4, -19, 250, -410, 1351,-1408,11638},
278{ 13, 3, -35, 272, -439, 1361,-1419,10979},
279{ 11, 9, -49, 292, -464, 1362,-1410,10319},
280{ 9, 16, -63, 309, -483, 1354,-1383, 9660},
281{ 7, 22, -75, 322, -496, 1337,-1339, 9005},
282{ 6, 26, -85, 333, -504, 1312,-1280, 8355},
283{ 4, 31, -94, 341, -507, 1278,-1205, 7713},
284{ 3, 35, -102, 347, -506, 1238,-1119, 7082},
285{ 1, 40, -110, 350, -499, 1190,-1021, 6464},
286{ 0, 43, -115, 350, -488, 1136, -914, 5861}
287};
288
289/* Shifting by pre_shift allows calculation using unsigned int rather than
290possibly-wider fixed_t. On 32-bit platforms, this is likely more efficient.
291And by having pre_shift 32, a 32-bit platform can easily do the shift by
292simply ignoring the low half. */
293
294void blip_add_delta( blip_t* m, unsigned time, int delta )
295{
296 unsigned fixed = (unsigned) ((time * m->factor + m->offset) >> pre_shift);
297 buf_t* out = SAMPLES( m ) + m->avail + (fixed >> frac_bits);
298
299 int const phase_shift = frac_bits - phase_bits;
300 int phase = fixed >> phase_shift & (phase_count - 1);
301 short const* in = bl_step [phase];
302 short const* rev = bl_step [phase_count - phase];
303
304 int interp = fixed >> (phase_shift - delta_bits) & (delta_unit - 1);
305 int delta2 = (delta * interp) >> delta_bits;
306 delta -= delta2;
307
308 /* Fails if buffer size was exceeded */
309 assert( out <= &SAMPLES( m ) [m->size + end_frame_extra] );
310
311 out [0] += in[0]*delta + in[half_width+0]*delta2;
312 out [1] += in[1]*delta + in[half_width+1]*delta2;
313 out [2] += in[2]*delta + in[half_width+2]*delta2;
314 out [3] += in[3]*delta + in[half_width+3]*delta2;
315 out [4] += in[4]*delta + in[half_width+4]*delta2;
316 out [5] += in[5]*delta + in[half_width+5]*delta2;
317 out [6] += in[6]*delta + in[half_width+6]*delta2;
318 out [7] += in[7]*delta + in[half_width+7]*delta2;
319
320 in = rev;
321 out [ 8] += in[7]*delta + in[7-half_width]*delta2;
322 out [ 9] += in[6]*delta + in[6-half_width]*delta2;
323 out [10] += in[5]*delta + in[5-half_width]*delta2;
324 out [11] += in[4]*delta + in[4-half_width]*delta2;
325 out [12] += in[3]*delta + in[3-half_width]*delta2;
326 out [13] += in[2]*delta + in[2-half_width]*delta2;
327 out [14] += in[1]*delta + in[1-half_width]*delta2;
328 out [15] += in[0]*delta + in[0-half_width]*delta2;
329}
330
331void blip_add_delta_fast( blip_t* m, unsigned time, int delta )
332{
333 unsigned fixed = (unsigned) ((time * m->factor + m->offset) >> pre_shift);
334 buf_t* out = SAMPLES( m ) + m->avail + (fixed >> frac_bits);
335
336 int interp = fixed >> (frac_bits - delta_bits) & (delta_unit - 1);
337 int delta2 = delta * interp;
338
339 /* Fails if buffer size was exceeded */
340 assert( out <= &SAMPLES( m ) [m->size + end_frame_extra] );
341
342 out [7] += delta * delta_unit - delta2;
343 out [8] += delta2;
344}
345