| 1 | /* blip_buf $vers. http://www.slack.net/~ant/ */ | 
|---|
| 2 |  | 
|---|
| 3 | #include "blip_buf.h" | 
|---|
| 4 |  | 
|---|
| 5 | #include <assert.h> | 
|---|
| 6 | #include <limits.h> | 
|---|
| 7 | #include <string.h> | 
|---|
| 8 | #include <stdlib.h> | 
|---|
| 9 |  | 
|---|
| 10 | /* Library Copyright (C) 2003-2009 Shay Green. This library is free software; | 
|---|
| 11 | you can redistribute it and/or modify it under the terms of the GNU Lesser | 
|---|
| 12 | General Public License as published by the Free Software Foundation; either | 
|---|
| 13 | version 2.1 of the License, or (at your option) any later version. This | 
|---|
| 14 | library is distributed in the hope that it will be useful, but WITHOUT ANY | 
|---|
| 15 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR | 
|---|
| 16 | A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more | 
|---|
| 17 | details. You should have received a copy of the GNU Lesser General Public | 
|---|
| 18 | License along with this module; if not, write to the Free Software Foundation, | 
|---|
| 19 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ | 
|---|
| 20 |  | 
|---|
| 21 | #if defined (BLARGG_TEST) && BLARGG_TEST | 
|---|
| 22 | #include "blargg_test.h" | 
|---|
| 23 | #endif | 
|---|
| 24 |  | 
|---|
| 25 | /* Equivalent to ULONG_MAX >= 0xFFFFFFFF00000000. | 
|---|
| 26 | Avoids constants that don't fit in 32 bits. */ | 
|---|
| 27 | #if ULONG_MAX/0xFFFFFFFF > 0xFFFFFFFF | 
|---|
| 28 | typedef unsigned long fixed_t; | 
|---|
| 29 | enum { pre_shift = 32 }; | 
|---|
| 30 |  | 
|---|
| 31 | #elif defined(ULLONG_MAX) | 
|---|
| 32 | typedef unsigned long long fixed_t; | 
|---|
| 33 | enum { pre_shift = 32 }; | 
|---|
| 34 |  | 
|---|
| 35 | #else | 
|---|
| 36 | typedef unsigned fixed_t; | 
|---|
| 37 | enum { pre_shift = 0 }; | 
|---|
| 38 |  | 
|---|
| 39 | #endif | 
|---|
| 40 |  | 
|---|
| 41 | enum { time_bits = pre_shift + 20 }; | 
|---|
| 42 |  | 
|---|
| 43 | static fixed_t const time_unit = (fixed_t) 1 << time_bits; | 
|---|
| 44 |  | 
|---|
| 45 | enum { bass_shift  = 9 }; /* affects high-pass filter breakpoint frequency */ | 
|---|
| 46 | enum {  = 2 }; /* allows deltas slightly after frame length */ | 
|---|
| 47 |  | 
|---|
| 48 | enum { half_width  = 8 }; | 
|---|
| 49 | enum {    = half_width*2 + end_frame_extra }; | 
|---|
| 50 | enum { phase_bits  = 5 }; | 
|---|
| 51 | enum { phase_count = 1 << phase_bits }; | 
|---|
| 52 | enum { delta_bits  = 15 }; | 
|---|
| 53 | enum { delta_unit  = 1 << delta_bits }; | 
|---|
| 54 | enum { frac_bits = time_bits - pre_shift }; | 
|---|
| 55 |  | 
|---|
| 56 | /* We could eliminate avail and encode whole samples in offset, but that would | 
|---|
| 57 | limit the total buffered samples to blip_max_frame. That could only be | 
|---|
| 58 | increased by decreasing time_bits, which would reduce resample ratio accuracy. | 
|---|
| 59 | */ | 
|---|
| 60 |  | 
|---|
| 61 | /** Sample buffer that resamples to output rate and accumulates samples | 
|---|
| 62 | until they're read out */ | 
|---|
| 63 | struct blip_t | 
|---|
| 64 | { | 
|---|
| 65 | fixed_t factor; | 
|---|
| 66 | fixed_t offset; | 
|---|
| 67 | int avail; | 
|---|
| 68 | int size; | 
|---|
| 69 | int integrator; | 
|---|
| 70 | }; | 
|---|
| 71 |  | 
|---|
| 72 | typedef int buf_t; | 
|---|
| 73 |  | 
|---|
| 74 | /* probably not totally portable */ | 
|---|
| 75 | #define SAMPLES( buf ) ((buf_t*) ((buf) + 1)) | 
|---|
| 76 |  | 
|---|
| 77 | /* Arithmetic (sign-preserving) right shift */ | 
|---|
| 78 | #define ARITH_SHIFT( n, shift ) \ | 
|---|
| 79 | ((n) >> (shift)) | 
|---|
| 80 |  | 
|---|
| 81 | enum { max_sample = +32767 }; | 
|---|
| 82 | enum { min_sample = -32768 }; | 
|---|
| 83 |  | 
|---|
| 84 | #define CLAMP( n ) \ | 
|---|
| 85 | {\ | 
|---|
| 86 | if ( (short) n != n )\ | 
|---|
| 87 | n = ARITH_SHIFT( n, 16 ) ^ max_sample;\ | 
|---|
| 88 | } | 
|---|
| 89 |  | 
|---|
| 90 | static void check_assumptions( void ) | 
|---|
| 91 | { | 
|---|
| 92 | int n; | 
|---|
| 93 |  | 
|---|
| 94 | #if INT_MAX < 0x7FFFFFFF || UINT_MAX < 0xFFFFFFFF | 
|---|
| 95 | #error "int must be at least 32 bits" | 
|---|
| 96 | #endif | 
|---|
| 97 |  | 
|---|
| 98 | assert( (-3 >> 1) == -2 ); /* right shift must preserve sign */ | 
|---|
| 99 |  | 
|---|
| 100 | n = max_sample * 2; | 
|---|
| 101 | CLAMP( n ); | 
|---|
| 102 | assert( n == max_sample ); | 
|---|
| 103 |  | 
|---|
| 104 | n = min_sample * 2; | 
|---|
| 105 | CLAMP( n ); | 
|---|
| 106 | assert( n == min_sample ); | 
|---|
| 107 |  | 
|---|
| 108 | assert( blip_max_ratio <= time_unit ); | 
|---|
| 109 | assert( blip_max_frame <= (fixed_t) -1 >> time_bits ); | 
|---|
| 110 | } | 
|---|
| 111 |  | 
|---|
| 112 | blip_t* blip_new( int size ) | 
|---|
| 113 | { | 
|---|
| 114 | blip_t* m; | 
|---|
| 115 | assert( size >= 0 ); | 
|---|
| 116 |  | 
|---|
| 117 | m = (blip_t*) malloc( sizeof *m + (size + buf_extra) * sizeof (buf_t) ); | 
|---|
| 118 | if ( m ) | 
|---|
| 119 | { | 
|---|
| 120 | m->factor = time_unit / blip_max_ratio; | 
|---|
| 121 | m->size   = size; | 
|---|
| 122 | blip_clear( m ); | 
|---|
| 123 | check_assumptions(); | 
|---|
| 124 | } | 
|---|
| 125 | return m; | 
|---|
| 126 | } | 
|---|
| 127 |  | 
|---|
| 128 | void blip_delete( blip_t* m ) | 
|---|
| 129 | { | 
|---|
| 130 | if ( m != NULL ) | 
|---|
| 131 | { | 
|---|
| 132 | /* Clear fields in case user tries to use after freeing */ | 
|---|
| 133 | memset( m, 0, sizeof *m ); | 
|---|
| 134 | free( m ); | 
|---|
| 135 | } | 
|---|
| 136 | } | 
|---|
| 137 |  | 
|---|
| 138 | void blip_set_rates( blip_t* m, double clock_rate, double sample_rate ) | 
|---|
| 139 | { | 
|---|
| 140 | double factor = time_unit * sample_rate / clock_rate; | 
|---|
| 141 | m->factor = (fixed_t) factor; | 
|---|
| 142 |  | 
|---|
| 143 | /* Fails if clock_rate exceeds maximum, relative to sample_rate */ | 
|---|
| 144 | assert( 0 <= factor - m->factor && factor - m->factor < 1 ); | 
|---|
| 145 |  | 
|---|
| 146 | /* Avoid requiring math.h. Equivalent to | 
|---|
| 147 | m->factor = (int) ceil( factor ) */ | 
|---|
| 148 | if ( m->factor < factor ) | 
|---|
| 149 | m->factor++; | 
|---|
| 150 |  | 
|---|
| 151 | /* At this point, factor is most likely rounded up, but could still | 
|---|
| 152 | have been rounded down in the floating-point calculation. */ | 
|---|
| 153 | } | 
|---|
| 154 |  | 
|---|
| 155 | void blip_clear( blip_t* m ) | 
|---|
| 156 | { | 
|---|
| 157 | /* We could set offset to 0, factor/2, or factor-1. 0 is suitable if | 
|---|
| 158 | factor is rounded up. factor-1 is suitable if factor is rounded down. | 
|---|
| 159 | Since we don't know rounding direction, factor/2 accommodates either, | 
|---|
| 160 | with the slight loss of showing an error in half the time. Since for | 
|---|
| 161 | a 64-bit factor this is years, the halving isn't a problem. */ | 
|---|
| 162 |  | 
|---|
| 163 | m->offset     = m->factor / 2; | 
|---|
| 164 | m->avail      = 0; | 
|---|
| 165 | m->integrator = 0; | 
|---|
| 166 | memset( SAMPLES( m ), 0, (m->size + buf_extra) * sizeof (buf_t) ); | 
|---|
| 167 | } | 
|---|
| 168 |  | 
|---|
| 169 | int blip_clocks_needed( const blip_t* m, int samples ) | 
|---|
| 170 | { | 
|---|
| 171 | fixed_t needed; | 
|---|
| 172 |  | 
|---|
| 173 | /* Fails if buffer can't hold that many more samples */ | 
|---|
| 174 | assert( samples >= 0 && m->avail + samples <= m->size ); | 
|---|
| 175 |  | 
|---|
| 176 | needed = (fixed_t) samples * time_unit; | 
|---|
| 177 | if ( needed < m->offset ) | 
|---|
| 178 | return 0; | 
|---|
| 179 |  | 
|---|
| 180 | return (needed - m->offset + m->factor - 1) / m->factor; | 
|---|
| 181 | } | 
|---|
| 182 |  | 
|---|
| 183 | void blip_end_frame( blip_t* m, unsigned t ) | 
|---|
| 184 | { | 
|---|
| 185 | fixed_t off = t * m->factor + m->offset; | 
|---|
| 186 | m->avail += off >> time_bits; | 
|---|
| 187 | m->offset = off & (time_unit - 1); | 
|---|
| 188 |  | 
|---|
| 189 | /* Fails if buffer size was exceeded */ | 
|---|
| 190 | assert( m->avail <= m->size ); | 
|---|
| 191 | } | 
|---|
| 192 |  | 
|---|
| 193 | int blip_samples_avail( const blip_t* m ) | 
|---|
| 194 | { | 
|---|
| 195 | return m->avail; | 
|---|
| 196 | } | 
|---|
| 197 |  | 
|---|
| 198 | static void remove_samples( blip_t* m, int count ) | 
|---|
| 199 | { | 
|---|
| 200 | buf_t* buf = SAMPLES( m ); | 
|---|
| 201 | int remain = m->avail + buf_extra - count; | 
|---|
| 202 | m->avail -= count; | 
|---|
| 203 |  | 
|---|
| 204 | memmove( &buf [0], &buf [count], remain * sizeof buf [0] ); | 
|---|
| 205 | memset( &buf [remain], 0, count * sizeof buf [0] ); | 
|---|
| 206 | } | 
|---|
| 207 |  | 
|---|
| 208 | int blip_read_samples( blip_t* m, short out [], int count, int stereo ) | 
|---|
| 209 | { | 
|---|
| 210 | assert( count >= 0 ); | 
|---|
| 211 |  | 
|---|
| 212 | if ( count > m->avail ) | 
|---|
| 213 | count = m->avail; | 
|---|
| 214 |  | 
|---|
| 215 | if ( count ) | 
|---|
| 216 | { | 
|---|
| 217 | int const step = stereo ? 2 : 1; | 
|---|
| 218 | buf_t const* in  = SAMPLES( m ); | 
|---|
| 219 | buf_t const* end = in + count; | 
|---|
| 220 | int sum = m->integrator; | 
|---|
| 221 | do | 
|---|
| 222 | { | 
|---|
| 223 | /* Eliminate fraction */ | 
|---|
| 224 | int s = ARITH_SHIFT( sum, delta_bits ); | 
|---|
| 225 |  | 
|---|
| 226 | sum += *in++; | 
|---|
| 227 |  | 
|---|
| 228 | CLAMP( s ); | 
|---|
| 229 |  | 
|---|
| 230 | *out = s; | 
|---|
| 231 | out += step; | 
|---|
| 232 |  | 
|---|
| 233 | /* High-pass filter */ | 
|---|
| 234 | sum -= s << (delta_bits - bass_shift); | 
|---|
| 235 | } | 
|---|
| 236 | while ( in != end ); | 
|---|
| 237 | m->integrator = sum; | 
|---|
| 238 |  | 
|---|
| 239 | remove_samples( m, count ); | 
|---|
| 240 | } | 
|---|
| 241 |  | 
|---|
| 242 | return count; | 
|---|
| 243 | } | 
|---|
| 244 |  | 
|---|
| 245 | /* Things that didn't help performance on x86: | 
|---|
| 246 | __attribute__((aligned(128))) | 
|---|
| 247 | #define short int | 
|---|
| 248 | restrict | 
|---|
| 249 | */ | 
|---|
| 250 |  | 
|---|
| 251 | /* Sinc_Generator( 0.9, 0.55, 4.5 ) */ | 
|---|
| 252 | static short const bl_step [phase_count + 1] [half_width] = | 
|---|
| 253 | { | 
|---|
| 254 | {   43, -115,  350, -488, 1136, -914, 5861,21022}, | 
|---|
| 255 | {   44, -118,  348, -473, 1076, -799, 5274,21001}, | 
|---|
| 256 | {   45, -121,  344, -454, 1011, -677, 4706,20936}, | 
|---|
| 257 | {   46, -122,  336, -431,  942, -549, 4156,20829}, | 
|---|
| 258 | {   47, -123,  327, -404,  868, -418, 3629,20679}, | 
|---|
| 259 | {   47, -122,  316, -375,  792, -285, 3124,20488}, | 
|---|
| 260 | {   47, -120,  303, -344,  714, -151, 2644,20256}, | 
|---|
| 261 | {   46, -117,  289, -310,  634,  -17, 2188,19985}, | 
|---|
| 262 | {   46, -114,  273, -275,  553,  117, 1758,19675}, | 
|---|
| 263 | {   44, -108,  255, -237,  471,  247, 1356,19327}, | 
|---|
| 264 | {   43, -103,  237, -199,  390,  373,  981,18944}, | 
|---|
| 265 | {   42,  -98,  218, -160,  310,  495,  633,18527}, | 
|---|
| 266 | {   40,  -91,  198, -121,  231,  611,  314,18078}, | 
|---|
| 267 | {   38,  -84,  178,  -81,  153,  722,   22,17599}, | 
|---|
| 268 | {   36,  -76,  157,  -43,   80,  824, -241,17092}, | 
|---|
| 269 | {   34,  -68,  135,   -3,    8,  919, -476,16558}, | 
|---|
| 270 | {   32,  -61,  115,   34,  -60, 1006, -683,16001}, | 
|---|
| 271 | {   29,  -52,   94,   70, -123, 1083, -862,15422}, | 
|---|
| 272 | {   27,  -44,   73,  106, -184, 1152,-1015,14824}, | 
|---|
| 273 | {   25,  -36,   53,  139, -239, 1211,-1142,14210}, | 
|---|
| 274 | {   22,  -27,   34,  170, -290, 1261,-1244,13582}, | 
|---|
| 275 | {   20,  -20,   16,  199, -335, 1301,-1322,12942}, | 
|---|
| 276 | {   18,  -12,   -3,  226, -375, 1331,-1376,12293}, | 
|---|
| 277 | {   15,   -4,  -19,  250, -410, 1351,-1408,11638}, | 
|---|
| 278 | {   13,    3,  -35,  272, -439, 1361,-1419,10979}, | 
|---|
| 279 | {   11,    9,  -49,  292, -464, 1362,-1410,10319}, | 
|---|
| 280 | {    9,   16,  -63,  309, -483, 1354,-1383, 9660}, | 
|---|
| 281 | {    7,   22,  -75,  322, -496, 1337,-1339, 9005}, | 
|---|
| 282 | {    6,   26,  -85,  333, -504, 1312,-1280, 8355}, | 
|---|
| 283 | {    4,   31,  -94,  341, -507, 1278,-1205, 7713}, | 
|---|
| 284 | {    3,   35, -102,  347, -506, 1238,-1119, 7082}, | 
|---|
| 285 | {    1,   40, -110,  350, -499, 1190,-1021, 6464}, | 
|---|
| 286 | {    0,   43, -115,  350, -488, 1136, -914, 5861} | 
|---|
| 287 | }; | 
|---|
| 288 |  | 
|---|
| 289 | /* Shifting by pre_shift allows calculation using unsigned int rather than | 
|---|
| 290 | possibly-wider fixed_t. On 32-bit platforms, this is likely more efficient. | 
|---|
| 291 | And by having pre_shift 32, a 32-bit platform can easily do the shift by | 
|---|
| 292 | simply ignoring the low half. */ | 
|---|
| 293 |  | 
|---|
| 294 | void blip_add_delta( blip_t* m, unsigned time, int delta ) | 
|---|
| 295 | { | 
|---|
| 296 | unsigned fixed = (unsigned) ((time * m->factor + m->offset) >> pre_shift); | 
|---|
| 297 | buf_t* out = SAMPLES( m ) + m->avail + (fixed >> frac_bits); | 
|---|
| 298 |  | 
|---|
| 299 | int const phase_shift = frac_bits - phase_bits; | 
|---|
| 300 | int phase = fixed >> phase_shift & (phase_count - 1); | 
|---|
| 301 | short const* in  = bl_step [phase]; | 
|---|
| 302 | short const* rev = bl_step [phase_count - phase]; | 
|---|
| 303 |  | 
|---|
| 304 | int interp = fixed >> (phase_shift - delta_bits) & (delta_unit - 1); | 
|---|
| 305 | int delta2 = (delta * interp) >> delta_bits; | 
|---|
| 306 | delta -= delta2; | 
|---|
| 307 |  | 
|---|
| 308 | /* Fails if buffer size was exceeded */ | 
|---|
| 309 | assert( out <= &SAMPLES( m ) [m->size + end_frame_extra] ); | 
|---|
| 310 |  | 
|---|
| 311 | out [0] += in[0]*delta + in[half_width+0]*delta2; | 
|---|
| 312 | out [1] += in[1]*delta + in[half_width+1]*delta2; | 
|---|
| 313 | out [2] += in[2]*delta + in[half_width+2]*delta2; | 
|---|
| 314 | out [3] += in[3]*delta + in[half_width+3]*delta2; | 
|---|
| 315 | out [4] += in[4]*delta + in[half_width+4]*delta2; | 
|---|
| 316 | out [5] += in[5]*delta + in[half_width+5]*delta2; | 
|---|
| 317 | out [6] += in[6]*delta + in[half_width+6]*delta2; | 
|---|
| 318 | out [7] += in[7]*delta + in[half_width+7]*delta2; | 
|---|
| 319 |  | 
|---|
| 320 | in = rev; | 
|---|
| 321 | out [ 8] += in[7]*delta + in[7-half_width]*delta2; | 
|---|
| 322 | out [ 9] += in[6]*delta + in[6-half_width]*delta2; | 
|---|
| 323 | out [10] += in[5]*delta + in[5-half_width]*delta2; | 
|---|
| 324 | out [11] += in[4]*delta + in[4-half_width]*delta2; | 
|---|
| 325 | out [12] += in[3]*delta + in[3-half_width]*delta2; | 
|---|
| 326 | out [13] += in[2]*delta + in[2-half_width]*delta2; | 
|---|
| 327 | out [14] += in[1]*delta + in[1-half_width]*delta2; | 
|---|
| 328 | out [15] += in[0]*delta + in[0-half_width]*delta2; | 
|---|
| 329 | } | 
|---|
| 330 |  | 
|---|
| 331 | void blip_add_delta_fast( blip_t* m, unsigned time, int delta ) | 
|---|
| 332 | { | 
|---|
| 333 | unsigned fixed = (unsigned) ((time * m->factor + m->offset) >> pre_shift); | 
|---|
| 334 | buf_t* out = SAMPLES( m ) + m->avail + (fixed >> frac_bits); | 
|---|
| 335 |  | 
|---|
| 336 | int interp = fixed >> (frac_bits - delta_bits) & (delta_unit - 1); | 
|---|
| 337 | int delta2 = delta * interp; | 
|---|
| 338 |  | 
|---|
| 339 | /* Fails if buffer size was exceeded */ | 
|---|
| 340 | assert( out <= &SAMPLES( m ) [m->size + end_frame_extra] ); | 
|---|
| 341 |  | 
|---|
| 342 | out [7] += delta * delta_unit - delta2; | 
|---|
| 343 | out [8] += delta2; | 
|---|
| 344 | } | 
|---|
| 345 |  | 
|---|