| 1 | /******************************************************************** |
| 2 | * * |
| 3 | * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * |
| 4 | * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * |
| 5 | * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * |
| 6 | * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * |
| 7 | * * |
| 8 | * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * |
| 9 | * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * |
| 10 | * * |
| 11 | ******************************************************************** |
| 12 | |
| 13 | function: |
| 14 | last mod: $Id$ |
| 15 | |
| 16 | ********************************************************************/ |
| 17 | |
| 18 | #include <string.h> |
| 19 | #include "internal.h" |
| 20 | #include "dct.h" |
| 21 | |
| 22 | /*Performs an inverse 8 point Type-II DCT transform. |
| 23 | The output is scaled by a factor of 2 relative to the orthonormal version of |
| 24 | the transform. |
| 25 | _y: The buffer to store the result in. |
| 26 | Data will be placed in every 8th entry (e.g., in a column of an 8x8 |
| 27 | block). |
| 28 | _x: The input coefficients. |
| 29 | The first 8 entries are used (e.g., from a row of an 8x8 block).*/ |
| 30 | static void idct8(ogg_int16_t *_y,const ogg_int16_t _x[8]){ |
| 31 | ogg_int32_t t[8]; |
| 32 | ogg_int32_t r; |
| 33 | /*Stage 1:*/ |
| 34 | /*0-1 butterfly.*/ |
| 35 | t[0]=OC_C4S4*(ogg_int16_t)(_x[0]+_x[4])>>16; |
| 36 | t[1]=OC_C4S4*(ogg_int16_t)(_x[0]-_x[4])>>16; |
| 37 | /*2-3 rotation by 6pi/16.*/ |
| 38 | t[2]=(OC_C6S2*_x[2]>>16)-(OC_C2S6*_x[6]>>16); |
| 39 | t[3]=(OC_C2S6*_x[2]>>16)+(OC_C6S2*_x[6]>>16); |
| 40 | /*4-7 rotation by 7pi/16.*/ |
| 41 | t[4]=(OC_C7S1*_x[1]>>16)-(OC_C1S7*_x[7]>>16); |
| 42 | /*5-6 rotation by 3pi/16.*/ |
| 43 | t[5]=(OC_C3S5*_x[5]>>16)-(OC_C5S3*_x[3]>>16); |
| 44 | t[6]=(OC_C5S3*_x[5]>>16)+(OC_C3S5*_x[3]>>16); |
| 45 | t[7]=(OC_C1S7*_x[1]>>16)+(OC_C7S1*_x[7]>>16); |
| 46 | /*Stage 2:*/ |
| 47 | /*4-5 butterfly.*/ |
| 48 | r=t[4]+t[5]; |
| 49 | t[5]=OC_C4S4*(ogg_int16_t)(t[4]-t[5])>>16; |
| 50 | t[4]=r; |
| 51 | /*7-6 butterfly.*/ |
| 52 | r=t[7]+t[6]; |
| 53 | t[6]=OC_C4S4*(ogg_int16_t)(t[7]-t[6])>>16; |
| 54 | t[7]=r; |
| 55 | /*Stage 3:*/ |
| 56 | /*0-3 butterfly.*/ |
| 57 | r=t[0]+t[3]; |
| 58 | t[3]=t[0]-t[3]; |
| 59 | t[0]=r; |
| 60 | /*1-2 butterfly.*/ |
| 61 | r=t[1]+t[2]; |
| 62 | t[2]=t[1]-t[2]; |
| 63 | t[1]=r; |
| 64 | /*6-5 butterfly.*/ |
| 65 | r=t[6]+t[5]; |
| 66 | t[5]=t[6]-t[5]; |
| 67 | t[6]=r; |
| 68 | /*Stage 4:*/ |
| 69 | /*0-7 butterfly.*/ |
| 70 | _y[0<<3]=(ogg_int16_t)(t[0]+t[7]); |
| 71 | /*1-6 butterfly.*/ |
| 72 | _y[1<<3]=(ogg_int16_t)(t[1]+t[6]); |
| 73 | /*2-5 butterfly.*/ |
| 74 | _y[2<<3]=(ogg_int16_t)(t[2]+t[5]); |
| 75 | /*3-4 butterfly.*/ |
| 76 | _y[3<<3]=(ogg_int16_t)(t[3]+t[4]); |
| 77 | _y[4<<3]=(ogg_int16_t)(t[3]-t[4]); |
| 78 | _y[5<<3]=(ogg_int16_t)(t[2]-t[5]); |
| 79 | _y[6<<3]=(ogg_int16_t)(t[1]-t[6]); |
| 80 | _y[7<<3]=(ogg_int16_t)(t[0]-t[7]); |
| 81 | } |
| 82 | |
| 83 | /*Performs an inverse 8 point Type-II DCT transform. |
| 84 | The output is scaled by a factor of 2 relative to the orthonormal version of |
| 85 | the transform. |
| 86 | _y: The buffer to store the result in. |
| 87 | Data will be placed in every 8th entry (e.g., in a column of an 8x8 |
| 88 | block). |
| 89 | _x: The input coefficients. |
| 90 | Only the first 4 entries are used. |
| 91 | The other 4 are assumed to be 0.*/ |
| 92 | static void idct8_4(ogg_int16_t *_y,const ogg_int16_t _x[8]){ |
| 93 | ogg_int32_t t[8]; |
| 94 | ogg_int32_t r; |
| 95 | /*Stage 1:*/ |
| 96 | t[0]=OC_C4S4*_x[0]>>16; |
| 97 | t[2]=OC_C6S2*_x[2]>>16; |
| 98 | t[3]=OC_C2S6*_x[2]>>16; |
| 99 | t[4]=OC_C7S1*_x[1]>>16; |
| 100 | t[5]=-(OC_C5S3*_x[3]>>16); |
| 101 | t[6]=OC_C3S5*_x[3]>>16; |
| 102 | t[7]=OC_C1S7*_x[1]>>16; |
| 103 | /*Stage 2:*/ |
| 104 | r=t[4]+t[5]; |
| 105 | t[5]=OC_C4S4*(ogg_int16_t)(t[4]-t[5])>>16; |
| 106 | t[4]=r; |
| 107 | r=t[7]+t[6]; |
| 108 | t[6]=OC_C4S4*(ogg_int16_t)(t[7]-t[6])>>16; |
| 109 | t[7]=r; |
| 110 | /*Stage 3:*/ |
| 111 | t[1]=t[0]+t[2]; |
| 112 | t[2]=t[0]-t[2]; |
| 113 | r=t[0]+t[3]; |
| 114 | t[3]=t[0]-t[3]; |
| 115 | t[0]=r; |
| 116 | r=t[6]+t[5]; |
| 117 | t[5]=t[6]-t[5]; |
| 118 | t[6]=r; |
| 119 | /*Stage 4:*/ |
| 120 | _y[0<<3]=(ogg_int16_t)(t[0]+t[7]); |
| 121 | _y[1<<3]=(ogg_int16_t)(t[1]+t[6]); |
| 122 | _y[2<<3]=(ogg_int16_t)(t[2]+t[5]); |
| 123 | _y[3<<3]=(ogg_int16_t)(t[3]+t[4]); |
| 124 | _y[4<<3]=(ogg_int16_t)(t[3]-t[4]); |
| 125 | _y[5<<3]=(ogg_int16_t)(t[2]-t[5]); |
| 126 | _y[6<<3]=(ogg_int16_t)(t[1]-t[6]); |
| 127 | _y[7<<3]=(ogg_int16_t)(t[0]-t[7]); |
| 128 | } |
| 129 | |
| 130 | /*Performs an inverse 8 point Type-II DCT transform. |
| 131 | The output is scaled by a factor of 2 relative to the orthonormal version of |
| 132 | the transform. |
| 133 | _y: The buffer to store the result in. |
| 134 | Data will be placed in every 8th entry (e.g., in a column of an 8x8 |
| 135 | block). |
| 136 | _x: The input coefficients. |
| 137 | Only the first 3 entries are used. |
| 138 | The other 5 are assumed to be 0.*/ |
| 139 | static void idct8_3(ogg_int16_t *_y,const ogg_int16_t _x[8]){ |
| 140 | ogg_int32_t t[8]; |
| 141 | ogg_int32_t r; |
| 142 | /*Stage 1:*/ |
| 143 | t[0]=OC_C4S4*_x[0]>>16; |
| 144 | t[2]=OC_C6S2*_x[2]>>16; |
| 145 | t[3]=OC_C2S6*_x[2]>>16; |
| 146 | t[4]=OC_C7S1*_x[1]>>16; |
| 147 | t[7]=OC_C1S7*_x[1]>>16; |
| 148 | /*Stage 2:*/ |
| 149 | t[5]=OC_C4S4*t[4]>>16; |
| 150 | t[6]=OC_C4S4*t[7]>>16; |
| 151 | /*Stage 3:*/ |
| 152 | t[1]=t[0]+t[2]; |
| 153 | t[2]=t[0]-t[2]; |
| 154 | r=t[0]+t[3]; |
| 155 | t[3]=t[0]-t[3]; |
| 156 | t[0]=r; |
| 157 | r=t[6]+t[5]; |
| 158 | t[5]=t[6]-t[5]; |
| 159 | t[6]=r; |
| 160 | /*Stage 4:*/ |
| 161 | _y[0<<3]=(ogg_int16_t)(t[0]+t[7]); |
| 162 | _y[1<<3]=(ogg_int16_t)(t[1]+t[6]); |
| 163 | _y[2<<3]=(ogg_int16_t)(t[2]+t[5]); |
| 164 | _y[3<<3]=(ogg_int16_t)(t[3]+t[4]); |
| 165 | _y[4<<3]=(ogg_int16_t)(t[3]-t[4]); |
| 166 | _y[5<<3]=(ogg_int16_t)(t[2]-t[5]); |
| 167 | _y[6<<3]=(ogg_int16_t)(t[1]-t[6]); |
| 168 | _y[7<<3]=(ogg_int16_t)(t[0]-t[7]); |
| 169 | } |
| 170 | |
| 171 | /*Performs an inverse 8 point Type-II DCT transform. |
| 172 | The output is scaled by a factor of 2 relative to the orthonormal version of |
| 173 | the transform. |
| 174 | _y: The buffer to store the result in. |
| 175 | Data will be placed in every 8th entry (e.g., in a column of an 8x8 |
| 176 | block). |
| 177 | _x: The input coefficients. |
| 178 | Only the first 2 entries are used. |
| 179 | The other 6 are assumed to be 0.*/ |
| 180 | static void idct8_2(ogg_int16_t *_y,const ogg_int16_t _x[8]){ |
| 181 | ogg_int32_t t[8]; |
| 182 | ogg_int32_t r; |
| 183 | /*Stage 1:*/ |
| 184 | t[0]=OC_C4S4*_x[0]>>16; |
| 185 | t[4]=OC_C7S1*_x[1]>>16; |
| 186 | t[7]=OC_C1S7*_x[1]>>16; |
| 187 | /*Stage 2:*/ |
| 188 | t[5]=OC_C4S4*t[4]>>16; |
| 189 | t[6]=OC_C4S4*t[7]>>16; |
| 190 | /*Stage 3:*/ |
| 191 | r=t[6]+t[5]; |
| 192 | t[5]=t[6]-t[5]; |
| 193 | t[6]=r; |
| 194 | /*Stage 4:*/ |
| 195 | _y[0<<3]=(ogg_int16_t)(t[0]+t[7]); |
| 196 | _y[1<<3]=(ogg_int16_t)(t[0]+t[6]); |
| 197 | _y[2<<3]=(ogg_int16_t)(t[0]+t[5]); |
| 198 | _y[3<<3]=(ogg_int16_t)(t[0]+t[4]); |
| 199 | _y[4<<3]=(ogg_int16_t)(t[0]-t[4]); |
| 200 | _y[5<<3]=(ogg_int16_t)(t[0]-t[5]); |
| 201 | _y[6<<3]=(ogg_int16_t)(t[0]-t[6]); |
| 202 | _y[7<<3]=(ogg_int16_t)(t[0]-t[7]); |
| 203 | } |
| 204 | |
| 205 | /*Performs an inverse 8 point Type-II DCT transform. |
| 206 | The output is scaled by a factor of 2 relative to the orthonormal version of |
| 207 | the transform. |
| 208 | _y: The buffer to store the result in. |
| 209 | Data will be placed in every 8th entry (e.g., in a column of an 8x8 |
| 210 | block). |
| 211 | _x: The input coefficients. |
| 212 | Only the first entry is used. |
| 213 | The other 7 are assumed to be 0.*/ |
| 214 | static void idct8_1(ogg_int16_t *_y,const ogg_int16_t _x[1]){ |
| 215 | _y[0<<3]=_y[1<<3]=_y[2<<3]=_y[3<<3]= |
| 216 | _y[4<<3]=_y[5<<3]=_y[6<<3]=_y[7<<3]=(ogg_int16_t)(OC_C4S4*_x[0]>>16); |
| 217 | } |
| 218 | |
| 219 | /*Performs an inverse 8x8 Type-II DCT transform. |
| 220 | The input is assumed to be scaled by a factor of 4 relative to orthonormal |
| 221 | version of the transform. |
| 222 | All coefficients but the first 3 in zig-zag scan order are assumed to be 0: |
| 223 | x x 0 0 0 0 0 0 |
| 224 | x 0 0 0 0 0 0 0 |
| 225 | 0 0 0 0 0 0 0 0 |
| 226 | 0 0 0 0 0 0 0 0 |
| 227 | 0 0 0 0 0 0 0 0 |
| 228 | 0 0 0 0 0 0 0 0 |
| 229 | 0 0 0 0 0 0 0 0 |
| 230 | 0 0 0 0 0 0 0 0 |
| 231 | _y: The buffer to store the result in. |
| 232 | This may be the same as _x. |
| 233 | _x: The input coefficients.*/ |
| 234 | static void oc_idct8x8_3(ogg_int16_t _y[64],ogg_int16_t _x[64]){ |
| 235 | ogg_int16_t w[64]; |
| 236 | int i; |
| 237 | /*Transform rows of x into columns of w.*/ |
| 238 | idct8_2(w,_x); |
| 239 | idct8_1(w+1,_x+8); |
| 240 | /*Transform rows of w into columns of y.*/ |
| 241 | for(i=0;i<8;i++)idct8_2(_y+i,w+i*8); |
| 242 | /*Adjust for the scale factor.*/ |
| 243 | for(i=0;i<64;i++)_y[i]=(ogg_int16_t)(_y[i]+8>>4); |
| 244 | /*Clear input data for next block.*/ |
| 245 | _x[0]=_x[1]=_x[8]=0; |
| 246 | } |
| 247 | |
| 248 | /*Performs an inverse 8x8 Type-II DCT transform. |
| 249 | The input is assumed to be scaled by a factor of 4 relative to orthonormal |
| 250 | version of the transform. |
| 251 | All coefficients but the first 10 in zig-zag scan order are assumed to be 0: |
| 252 | x x x x 0 0 0 0 |
| 253 | x x x 0 0 0 0 0 |
| 254 | x x 0 0 0 0 0 0 |
| 255 | x 0 0 0 0 0 0 0 |
| 256 | 0 0 0 0 0 0 0 0 |
| 257 | 0 0 0 0 0 0 0 0 |
| 258 | 0 0 0 0 0 0 0 0 |
| 259 | 0 0 0 0 0 0 0 0 |
| 260 | _y: The buffer to store the result in. |
| 261 | This may be the same as _x. |
| 262 | _x: The input coefficients.*/ |
| 263 | static void oc_idct8x8_10(ogg_int16_t _y[64],ogg_int16_t _x[64]){ |
| 264 | ogg_int16_t w[64]; |
| 265 | int i; |
| 266 | /*Transform rows of x into columns of w.*/ |
| 267 | idct8_4(w,_x); |
| 268 | idct8_3(w+1,_x+8); |
| 269 | idct8_2(w+2,_x+16); |
| 270 | idct8_1(w+3,_x+24); |
| 271 | /*Transform rows of w into columns of y.*/ |
| 272 | for(i=0;i<8;i++)idct8_4(_y+i,w+i*8); |
| 273 | /*Adjust for the scale factor.*/ |
| 274 | for(i=0;i<64;i++)_y[i]=(ogg_int16_t)(_y[i]+8>>4); |
| 275 | /*Clear input data for next block.*/ |
| 276 | _x[0]=_x[1]=_x[2]=_x[3]=_x[8]=_x[9]=_x[10]=_x[16]=_x[17]=_x[24]=0; |
| 277 | } |
| 278 | |
| 279 | /*Performs an inverse 8x8 Type-II DCT transform. |
| 280 | The input is assumed to be scaled by a factor of 4 relative to orthonormal |
| 281 | version of the transform. |
| 282 | _y: The buffer to store the result in. |
| 283 | This may be the same as _x. |
| 284 | _x: The input coefficients.*/ |
| 285 | static void oc_idct8x8_slow(ogg_int16_t _y[64],ogg_int16_t _x[64]){ |
| 286 | ogg_int16_t w[64]; |
| 287 | int i; |
| 288 | /*Transform rows of x into columns of w.*/ |
| 289 | for(i=0;i<8;i++)idct8(w+i,_x+i*8); |
| 290 | /*Transform rows of w into columns of y.*/ |
| 291 | for(i=0;i<8;i++)idct8(_y+i,w+i*8); |
| 292 | /*Adjust for the scale factor.*/ |
| 293 | for(i=0;i<64;i++)_y[i]=(ogg_int16_t)(_y[i]+8>>4); |
| 294 | /*Clear input data for next block.*/ |
| 295 | for(i=0;i<64;i++)_x[i]=0; |
| 296 | } |
| 297 | |
| 298 | /*Performs an inverse 8x8 Type-II DCT transform. |
| 299 | The input is assumed to be scaled by a factor of 4 relative to orthonormal |
| 300 | version of the transform.*/ |
| 301 | void oc_idct8x8_c(ogg_int16_t _y[64],ogg_int16_t _x[64],int _last_zzi){ |
| 302 | /*_last_zzi is subtly different from an actual count of the number of |
| 303 | coefficients we decoded for this block. |
| 304 | It contains the value of zzi BEFORE the final token in the block was |
| 305 | decoded. |
| 306 | In most cases this is an EOB token (the continuation of an EOB run from a |
| 307 | previous block counts), and so this is the same as the coefficient count. |
| 308 | However, in the case that the last token was NOT an EOB token, but filled |
| 309 | the block up with exactly 64 coefficients, _last_zzi will be less than 64. |
| 310 | Provided the last token was not a pure zero run, the minimum value it can |
| 311 | be is 46, and so that doesn't affect any of the cases in this routine. |
| 312 | However, if the last token WAS a pure zero run of length 63, then _last_zzi |
| 313 | will be 1 while the number of coefficients decoded is 64. |
| 314 | Thus, we will trigger the following special case, where the real |
| 315 | coefficient count would not. |
| 316 | Note also that a zero run of length 64 will give _last_zzi a value of 0, |
| 317 | but we still process the DC coefficient, which might have a non-zero value |
| 318 | due to DC prediction. |
| 319 | Although convoluted, this is arguably the correct behavior: it allows us to |
| 320 | use a smaller transform when the block ends with a long zero run instead |
| 321 | of a normal EOB token. |
| 322 | It could be smarter... multiple separate zero runs at the end of a block |
| 323 | will fool it, but an encoder that generates these really deserves what it |
| 324 | gets. |
| 325 | Needless to say we inherited this approach from VP3.*/ |
| 326 | /*Then perform the iDCT.*/ |
| 327 | if(_last_zzi<=3)oc_idct8x8_3(_y,_x); |
| 328 | else if(_last_zzi<=10)oc_idct8x8_10(_y,_x); |
| 329 | else oc_idct8x8_slow(_y,_x); |
| 330 | } |
| 331 | |