1/********************************************************************
2 * *
3 * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
7 * *
8 * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009 *
9 * by the Xiph.Org Foundation https://xiph.org/ *
10 * *
11 ********************************************************************
12
13 function: normalized modified discrete cosine transform
14 power of two length transform only [64 <= n ]
15
16 Original algorithm adapted long ago from _The use of multirate filter
17 banks for coding of high quality digital audio_, by T. Sporer,
18 K. Brandenburg and B. Edler, collection of the European Signal
19 Processing Conference (EUSIPCO), Amsterdam, June 1992, Vol.1, pp
20 211-214
21
22 The below code implements an algorithm that no longer looks much like
23 that presented in the paper, but the basic structure remains if you
24 dig deep enough to see it.
25
26 This module DOES NOT INCLUDE code to generate/apply the window
27 function. Everybody has their own weird favorite including me... I
28 happen to like the properties of y=sin(.5PI*sin^2(x)), but others may
29 vehemently disagree.
30
31 ********************************************************************/
32
33/* this can also be run as an integer transform by uncommenting a
34 define in mdct.h; the integerization is a first pass and although
35 it's likely stable for Vorbis, the dynamic range is constrained and
36 roundoff isn't done (so it's noisy). Consider it functional, but
37 only a starting point. There's no point on a machine with an FPU */
38
39#include <stdio.h>
40#include <stdlib.h>
41#include <string.h>
42#include <math.h>
43#include "vorbis/codec.h"
44#include "mdct.h"
45#include "os.h"
46#include "misc.h"
47
48/* build lookups for trig functions; also pre-figure scaling and
49 some window function algebra. */
50
51void mdct_init(mdct_lookup *lookup,int n){
52 int *bitrev=_ogg_malloc(sizeof(*bitrev)*(n/4));
53 DATA_TYPE *T=_ogg_malloc(sizeof(*T)*(n+n/4));
54
55 int i;
56 int n2=n>>1;
57 int log2n=lookup->log2n=rint(log((float)n)/log(2.f));
58 lookup->n=n;
59 lookup->trig=T;
60 lookup->bitrev=bitrev;
61
62/* trig lookups... */
63
64 for(i=0;i<n/4;i++){
65 T[i*2]=FLOAT_CONV(cos((M_PI/n)*(4*i)));
66 T[i*2+1]=FLOAT_CONV(-sin((M_PI/n)*(4*i)));
67 T[n2+i*2]=FLOAT_CONV(cos((M_PI/(2*n))*(2*i+1)));
68 T[n2+i*2+1]=FLOAT_CONV(sin((M_PI/(2*n))*(2*i+1)));
69 }
70 for(i=0;i<n/8;i++){
71 T[n+i*2]=FLOAT_CONV(cos((M_PI/n)*(4*i+2))*.5);
72 T[n+i*2+1]=FLOAT_CONV(-sin((M_PI/n)*(4*i+2))*.5);
73 }
74
75 /* bitreverse lookup... */
76
77 {
78 int mask=(1<<(log2n-1))-1,i,j;
79 int msb=1<<(log2n-2);
80 for(i=0;i<n/8;i++){
81 int acc=0;
82 for(j=0;msb>>j;j++)
83 if((msb>>j)&i)acc|=1<<j;
84 bitrev[i*2]=((~acc)&mask)-1;
85 bitrev[i*2+1]=acc;
86
87 }
88 }
89 lookup->scale=FLOAT_CONV(4.f/n);
90}
91
92/* 8 point butterfly (in place, 4 register) */
93STIN void mdct_butterfly_8(DATA_TYPE *x){
94 REG_TYPE r0 = x[6] + x[2];
95 REG_TYPE r1 = x[6] - x[2];
96 REG_TYPE r2 = x[4] + x[0];
97 REG_TYPE r3 = x[4] - x[0];
98
99 x[6] = r0 + r2;
100 x[4] = r0 - r2;
101
102 r0 = x[5] - x[1];
103 r2 = x[7] - x[3];
104 x[0] = r1 + r0;
105 x[2] = r1 - r0;
106
107 r0 = x[5] + x[1];
108 r1 = x[7] + x[3];
109 x[3] = r2 + r3;
110 x[1] = r2 - r3;
111 x[7] = r1 + r0;
112 x[5] = r1 - r0;
113
114}
115
116/* 16 point butterfly (in place, 4 register) */
117STIN void mdct_butterfly_16(DATA_TYPE *x){
118 REG_TYPE r0 = x[1] - x[9];
119 REG_TYPE r1 = x[0] - x[8];
120
121 x[8] += x[0];
122 x[9] += x[1];
123 x[0] = MULT_NORM((r0 + r1) * cPI2_8);
124 x[1] = MULT_NORM((r0 - r1) * cPI2_8);
125
126 r0 = x[3] - x[11];
127 r1 = x[10] - x[2];
128 x[10] += x[2];
129 x[11] += x[3];
130 x[2] = r0;
131 x[3] = r1;
132
133 r0 = x[12] - x[4];
134 r1 = x[13] - x[5];
135 x[12] += x[4];
136 x[13] += x[5];
137 x[4] = MULT_NORM((r0 - r1) * cPI2_8);
138 x[5] = MULT_NORM((r0 + r1) * cPI2_8);
139
140 r0 = x[14] - x[6];
141 r1 = x[15] - x[7];
142 x[14] += x[6];
143 x[15] += x[7];
144 x[6] = r0;
145 x[7] = r1;
146
147 mdct_butterfly_8(x);
148 mdct_butterfly_8(x+8);
149}
150
151/* 32 point butterfly (in place, 4 register) */
152STIN void mdct_butterfly_32(DATA_TYPE *x){
153 REG_TYPE r0 = x[30] - x[14];
154 REG_TYPE r1 = x[31] - x[15];
155
156 x[30] += x[14];
157 x[31] += x[15];
158 x[14] = r0;
159 x[15] = r1;
160
161 r0 = x[28] - x[12];
162 r1 = x[29] - x[13];
163 x[28] += x[12];
164 x[29] += x[13];
165 x[12] = MULT_NORM( r0 * cPI1_8 - r1 * cPI3_8 );
166 x[13] = MULT_NORM( r0 * cPI3_8 + r1 * cPI1_8 );
167
168 r0 = x[26] - x[10];
169 r1 = x[27] - x[11];
170 x[26] += x[10];
171 x[27] += x[11];
172 x[10] = MULT_NORM(( r0 - r1 ) * cPI2_8);
173 x[11] = MULT_NORM(( r0 + r1 ) * cPI2_8);
174
175 r0 = x[24] - x[8];
176 r1 = x[25] - x[9];
177 x[24] += x[8];
178 x[25] += x[9];
179 x[8] = MULT_NORM( r0 * cPI3_8 - r1 * cPI1_8 );
180 x[9] = MULT_NORM( r1 * cPI3_8 + r0 * cPI1_8 );
181
182 r0 = x[22] - x[6];
183 r1 = x[7] - x[23];
184 x[22] += x[6];
185 x[23] += x[7];
186 x[6] = r1;
187 x[7] = r0;
188
189 r0 = x[4] - x[20];
190 r1 = x[5] - x[21];
191 x[20] += x[4];
192 x[21] += x[5];
193 x[4] = MULT_NORM( r1 * cPI1_8 + r0 * cPI3_8 );
194 x[5] = MULT_NORM( r1 * cPI3_8 - r0 * cPI1_8 );
195
196 r0 = x[2] - x[18];
197 r1 = x[3] - x[19];
198 x[18] += x[2];
199 x[19] += x[3];
200 x[2] = MULT_NORM(( r1 + r0 ) * cPI2_8);
201 x[3] = MULT_NORM(( r1 - r0 ) * cPI2_8);
202
203 r0 = x[0] - x[16];
204 r1 = x[1] - x[17];
205 x[16] += x[0];
206 x[17] += x[1];
207 x[0] = MULT_NORM( r1 * cPI3_8 + r0 * cPI1_8 );
208 x[1] = MULT_NORM( r1 * cPI1_8 - r0 * cPI3_8 );
209
210 mdct_butterfly_16(x);
211 mdct_butterfly_16(x+16);
212
213}
214
215/* N point first stage butterfly (in place, 2 register) */
216STIN void mdct_butterfly_first(DATA_TYPE *T,
217 DATA_TYPE *x,
218 int points){
219
220 DATA_TYPE *x1 = x + points - 8;
221 DATA_TYPE *x2 = x + (points>>1) - 8;
222 REG_TYPE r0;
223 REG_TYPE r1;
224
225 do{
226
227 r0 = x1[6] - x2[6];
228 r1 = x1[7] - x2[7];
229 x1[6] += x2[6];
230 x1[7] += x2[7];
231 x2[6] = MULT_NORM(r1 * T[1] + r0 * T[0]);
232 x2[7] = MULT_NORM(r1 * T[0] - r0 * T[1]);
233
234 r0 = x1[4] - x2[4];
235 r1 = x1[5] - x2[5];
236 x1[4] += x2[4];
237 x1[5] += x2[5];
238 x2[4] = MULT_NORM(r1 * T[5] + r0 * T[4]);
239 x2[5] = MULT_NORM(r1 * T[4] - r0 * T[5]);
240
241 r0 = x1[2] - x2[2];
242 r1 = x1[3] - x2[3];
243 x1[2] += x2[2];
244 x1[3] += x2[3];
245 x2[2] = MULT_NORM(r1 * T[9] + r0 * T[8]);
246 x2[3] = MULT_NORM(r1 * T[8] - r0 * T[9]);
247
248 r0 = x1[0] - x2[0];
249 r1 = x1[1] - x2[1];
250 x1[0] += x2[0];
251 x1[1] += x2[1];
252 x2[0] = MULT_NORM(r1 * T[13] + r0 * T[12]);
253 x2[1] = MULT_NORM(r1 * T[12] - r0 * T[13]);
254
255 x1-=8;
256 x2-=8;
257 T+=16;
258
259 }while(x2>=x);
260}
261
262/* N/stage point generic N stage butterfly (in place, 2 register) */
263STIN void mdct_butterfly_generic(DATA_TYPE *T,
264 DATA_TYPE *x,
265 int points,
266 int trigint){
267
268 DATA_TYPE *x1 = x + points - 8;
269 DATA_TYPE *x2 = x + (points>>1) - 8;
270 REG_TYPE r0;
271 REG_TYPE r1;
272
273 do{
274
275 r0 = x1[6] - x2[6];
276 r1 = x1[7] - x2[7];
277 x1[6] += x2[6];
278 x1[7] += x2[7];
279 x2[6] = MULT_NORM(r1 * T[1] + r0 * T[0]);
280 x2[7] = MULT_NORM(r1 * T[0] - r0 * T[1]);
281
282 T+=trigint;
283
284 r0 = x1[4] - x2[4];
285 r1 = x1[5] - x2[5];
286 x1[4] += x2[4];
287 x1[5] += x2[5];
288 x2[4] = MULT_NORM(r1 * T[1] + r0 * T[0]);
289 x2[5] = MULT_NORM(r1 * T[0] - r0 * T[1]);
290
291 T+=trigint;
292
293 r0 = x1[2] - x2[2];
294 r1 = x1[3] - x2[3];
295 x1[2] += x2[2];
296 x1[3] += x2[3];
297 x2[2] = MULT_NORM(r1 * T[1] + r0 * T[0]);
298 x2[3] = MULT_NORM(r1 * T[0] - r0 * T[1]);
299
300 T+=trigint;
301
302 r0 = x1[0] - x2[0];
303 r1 = x1[1] - x2[1];
304 x1[0] += x2[0];
305 x1[1] += x2[1];
306 x2[0] = MULT_NORM(r1 * T[1] + r0 * T[0]);
307 x2[1] = MULT_NORM(r1 * T[0] - r0 * T[1]);
308
309 T+=trigint;
310 x1-=8;
311 x2-=8;
312
313 }while(x2>=x);
314}
315
316STIN void mdct_butterflies(mdct_lookup *init,
317 DATA_TYPE *x,
318 int points){
319
320 DATA_TYPE *T=init->trig;
321 int stages=init->log2n-5;
322 int i,j;
323
324 if(--stages>0){
325 mdct_butterfly_first(T,x,points);
326 }
327
328 for(i=1;--stages>0;i++){
329 for(j=0;j<(1<<i);j++)
330 mdct_butterfly_generic(T,x+(points>>i)*j,points>>i,4<<i);
331 }
332
333 for(j=0;j<points;j+=32)
334 mdct_butterfly_32(x+j);
335
336}
337
338void mdct_clear(mdct_lookup *l){
339 if(l){
340 if(l->trig)_ogg_free(l->trig);
341 if(l->bitrev)_ogg_free(l->bitrev);
342 memset(l,0,sizeof(*l));
343 }
344}
345
346STIN void mdct_bitreverse(mdct_lookup *init,
347 DATA_TYPE *x){
348 int n = init->n;
349 int *bit = init->bitrev;
350 DATA_TYPE *w0 = x;
351 DATA_TYPE *w1 = x = w0+(n>>1);
352 DATA_TYPE *T = init->trig+n;
353
354 do{
355 DATA_TYPE *x0 = x+bit[0];
356 DATA_TYPE *x1 = x+bit[1];
357
358 REG_TYPE r0 = x0[1] - x1[1];
359 REG_TYPE r1 = x0[0] + x1[0];
360 REG_TYPE r2 = MULT_NORM(r1 * T[0] + r0 * T[1]);
361 REG_TYPE r3 = MULT_NORM(r1 * T[1] - r0 * T[0]);
362
363 w1 -= 4;
364
365 r0 = HALVE(x0[1] + x1[1]);
366 r1 = HALVE(x0[0] - x1[0]);
367
368 w0[0] = r0 + r2;
369 w1[2] = r0 - r2;
370 w0[1] = r1 + r3;
371 w1[3] = r3 - r1;
372
373 x0 = x+bit[2];
374 x1 = x+bit[3];
375
376 r0 = x0[1] - x1[1];
377 r1 = x0[0] + x1[0];
378 r2 = MULT_NORM(r1 * T[2] + r0 * T[3]);
379 r3 = MULT_NORM(r1 * T[3] - r0 * T[2]);
380
381 r0 = HALVE(x0[1] + x1[1]);
382 r1 = HALVE(x0[0] - x1[0]);
383
384 w0[2] = r0 + r2;
385 w1[0] = r0 - r2;
386 w0[3] = r1 + r3;
387 w1[1] = r3 - r1;
388
389 T += 4;
390 bit += 4;
391 w0 += 4;
392
393 }while(w0<w1);
394}
395
396void mdct_backward(mdct_lookup *init, DATA_TYPE *in, DATA_TYPE *out){
397 int n=init->n;
398 int n2=n>>1;
399 int n4=n>>2;
400
401 /* rotate */
402
403 DATA_TYPE *iX = in+n2-7;
404 DATA_TYPE *oX = out+n2+n4;
405 DATA_TYPE *T = init->trig+n4;
406
407 do{
408 oX -= 4;
409 oX[0] = MULT_NORM(-iX[2] * T[3] - iX[0] * T[2]);
410 oX[1] = MULT_NORM (iX[0] * T[3] - iX[2] * T[2]);
411 oX[2] = MULT_NORM(-iX[6] * T[1] - iX[4] * T[0]);
412 oX[3] = MULT_NORM (iX[4] * T[1] - iX[6] * T[0]);
413 iX -= 8;
414 T += 4;
415 }while(iX>=in);
416
417 iX = in+n2-8;
418 oX = out+n2+n4;
419 T = init->trig+n4;
420
421 do{
422 T -= 4;
423 oX[0] = MULT_NORM (iX[4] * T[3] + iX[6] * T[2]);
424 oX[1] = MULT_NORM (iX[4] * T[2] - iX[6] * T[3]);
425 oX[2] = MULT_NORM (iX[0] * T[1] + iX[2] * T[0]);
426 oX[3] = MULT_NORM (iX[0] * T[0] - iX[2] * T[1]);
427 iX -= 8;
428 oX += 4;
429 }while(iX>=in);
430
431 mdct_butterflies(init,out+n2,n2);
432 mdct_bitreverse(init,out);
433
434 /* roatate + window */
435
436 {
437 DATA_TYPE *oX1=out+n2+n4;
438 DATA_TYPE *oX2=out+n2+n4;
439 DATA_TYPE *iX =out;
440 T =init->trig+n2;
441
442 do{
443 oX1-=4;
444
445 oX1[3] = MULT_NORM (iX[0] * T[1] - iX[1] * T[0]);
446 oX2[0] = -MULT_NORM (iX[0] * T[0] + iX[1] * T[1]);
447
448 oX1[2] = MULT_NORM (iX[2] * T[3] - iX[3] * T[2]);
449 oX2[1] = -MULT_NORM (iX[2] * T[2] + iX[3] * T[3]);
450
451 oX1[1] = MULT_NORM (iX[4] * T[5] - iX[5] * T[4]);
452 oX2[2] = -MULT_NORM (iX[4] * T[4] + iX[5] * T[5]);
453
454 oX1[0] = MULT_NORM (iX[6] * T[7] - iX[7] * T[6]);
455 oX2[3] = -MULT_NORM (iX[6] * T[6] + iX[7] * T[7]);
456
457 oX2+=4;
458 iX += 8;
459 T += 8;
460 }while(iX<oX1);
461
462 iX=out+n2+n4;
463 oX1=out+n4;
464 oX2=oX1;
465
466 do{
467 oX1-=4;
468 iX-=4;
469
470 oX2[0] = -(oX1[3] = iX[3]);
471 oX2[1] = -(oX1[2] = iX[2]);
472 oX2[2] = -(oX1[1] = iX[1]);
473 oX2[3] = -(oX1[0] = iX[0]);
474
475 oX2+=4;
476 }while(oX2<iX);
477
478 iX=out+n2+n4;
479 oX1=out+n2+n4;
480 oX2=out+n2;
481 do{
482 oX1-=4;
483 oX1[0]= iX[3];
484 oX1[1]= iX[2];
485 oX1[2]= iX[1];
486 oX1[3]= iX[0];
487 iX+=4;
488 }while(oX1>oX2);
489 }
490}
491
492void mdct_forward(mdct_lookup *init, DATA_TYPE *in, DATA_TYPE *out){
493 int n=init->n;
494 int n2=n>>1;
495 int n4=n>>2;
496 int n8=n>>3;
497 DATA_TYPE *w=alloca(n*sizeof(*w)); /* forward needs working space */
498 DATA_TYPE *w2=w+n2;
499
500 /* rotate */
501
502 /* window + rotate + step 1 */
503
504 REG_TYPE r0;
505 REG_TYPE r1;
506 DATA_TYPE *x0=in+n2+n4;
507 DATA_TYPE *x1=x0+1;
508 DATA_TYPE *T=init->trig+n2;
509
510 int i=0;
511
512 for(i=0;i<n8;i+=2){
513 x0 -=4;
514 T-=2;
515 r0= x0[2] + x1[0];
516 r1= x0[0] + x1[2];
517 w2[i]= MULT_NORM(r1*T[1] + r0*T[0]);
518 w2[i+1]= MULT_NORM(r1*T[0] - r0*T[1]);
519 x1 +=4;
520 }
521
522 x1=in+1;
523
524 for(;i<n2-n8;i+=2){
525 T-=2;
526 x0 -=4;
527 r0= x0[2] - x1[0];
528 r1= x0[0] - x1[2];
529 w2[i]= MULT_NORM(r1*T[1] + r0*T[0]);
530 w2[i+1]= MULT_NORM(r1*T[0] - r0*T[1]);
531 x1 +=4;
532 }
533
534 x0=in+n;
535
536 for(;i<n2;i+=2){
537 T-=2;
538 x0 -=4;
539 r0= -x0[2] - x1[0];
540 r1= -x0[0] - x1[2];
541 w2[i]= MULT_NORM(r1*T[1] + r0*T[0]);
542 w2[i+1]= MULT_NORM(r1*T[0] - r0*T[1]);
543 x1 +=4;
544 }
545
546
547 mdct_butterflies(init,w+n2,n2);
548 mdct_bitreverse(init,w);
549
550 /* roatate + window */
551
552 T=init->trig+n2;
553 x0=out+n2;
554
555 for(i=0;i<n4;i++){
556 x0--;
557 out[i] =MULT_NORM((w[0]*T[0]+w[1]*T[1])*init->scale);
558 x0[0] =MULT_NORM((w[0]*T[1]-w[1]*T[0])*init->scale);
559 w+=2;
560 T+=2;
561 }
562}
563