1/*
2 * jidctint.c
3 *
4 * Copyright (C) 1991-1998, Thomas G. Lane.
5 * Modification developed 2002-2009 by Guido Vollbeding.
6 * This file is part of the Independent JPEG Group's software.
7 * For conditions of distribution and use, see the accompanying README file.
8 *
9 * This file contains a slow-but-accurate integer implementation of the
10 * inverse DCT (Discrete Cosine Transform). In the IJG code, this routine
11 * must also perform dequantization of the input coefficients.
12 *
13 * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
14 * on each row (or vice versa, but it's more convenient to emit a row at
15 * a time). Direct algorithms are also available, but they are much more
16 * complex and seem not to be any faster when reduced to code.
17 *
18 * This implementation is based on an algorithm described in
19 * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
20 * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
21 * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
22 * The primary algorithm described there uses 11 multiplies and 29 adds.
23 * We use their alternate method with 12 multiplies and 32 adds.
24 * The advantage of this method is that no data path contains more than one
25 * multiplication; this allows a very simple and accurate implementation in
26 * scaled fixed-point arithmetic, with a minimal number of shifts.
27 *
28 * We also provide IDCT routines with various output sample block sizes for
29 * direct resolution reduction or enlargement and for direct resolving the
30 * common 2x1 and 1x2 subsampling cases without additional resampling: NxN
31 * (N=1...16), 2NxN, and Nx2N (N=1...8) pixels for one 8x8 input DCT block.
32 *
33 * For N<8 we simply take the corresponding low-frequency coefficients of
34 * the 8x8 input DCT block and apply an NxN point IDCT on the sub-block
35 * to yield the downscaled outputs.
36 * This can be seen as direct low-pass downsampling from the DCT domain
37 * point of view rather than the usual spatial domain point of view,
38 * yielding significant computational savings and results at least
39 * as good as common bilinear (averaging) spatial downsampling.
40 *
41 * For N>8 we apply a partial NxN IDCT on the 8 input coefficients as
42 * lower frequencies and higher frequencies assumed to be zero.
43 * It turns out that the computational effort is similar to the 8x8 IDCT
44 * regarding the output size.
45 * Furthermore, the scaling and descaling is the same for all IDCT sizes.
46 *
47 * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases
48 * since there would be too many additional constants to pre-calculate.
49 */
50
51#define JPEG_INTERNALS
52#include "jinclude.h"
53#include "jpeglib.h"
54#include "jdct.h" /* Private declarations for DCT subsystem */
55
56#ifdef DCT_ISLOW_SUPPORTED
57
58
59/*
60 * This module is specialized to the case DCTSIZE = 8.
61 */
62
63#if DCTSIZE != 8
64 Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
65#endif
66
67
68/*
69 * The poop on this scaling stuff is as follows:
70 *
71 * Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
72 * larger than the true IDCT outputs. The final outputs are therefore
73 * a factor of N larger than desired; since N=8 this can be cured by
74 * a simple right shift at the end of the algorithm. The advantage of
75 * this arrangement is that we save two multiplications per 1-D IDCT,
76 * because the y0 and y4 inputs need not be divided by sqrt(N).
77 *
78 * We have to do addition and subtraction of the integer inputs, which
79 * is no problem, and multiplication by fractional constants, which is
80 * a problem to do in integer arithmetic. We multiply all the constants
81 * by CONST_SCALE and convert them to integer constants (thus retaining
82 * CONST_BITS bits of precision in the constants). After doing a
83 * multiplication we have to divide the product by CONST_SCALE, with proper
84 * rounding, to produce the correct output. This division can be done
85 * cheaply as a right shift of CONST_BITS bits. We postpone shifting
86 * as long as possible so that partial sums can be added together with
87 * full fractional precision.
88 *
89 * The outputs of the first pass are scaled up by PASS1_BITS bits so that
90 * they are represented to better-than-integral precision. These outputs
91 * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
92 * with the recommended scaling. (To scale up 12-bit sample data further, an
93 * intermediate INT32 array would be needed.)
94 *
95 * To avoid overflow of the 32-bit intermediate results in pass 2, we must
96 * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis
97 * shows that the values given below are the most effective.
98 */
99
100#if BITS_IN_JSAMPLE == 8
101#define CONST_BITS 13
102#define PASS1_BITS 2
103#else
104#define CONST_BITS 13
105#define PASS1_BITS 1 /* lose a little precision to avoid overflow */
106#endif
107
108/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
109 * causing a lot of useless floating-point operations at run time.
110 * To get around this we use the following pre-calculated constants.
111 * If you change CONST_BITS you may want to add appropriate values.
112 * (With a reasonable C compiler, you can just rely on the FIX() macro...)
113 */
114
115#if CONST_BITS == 13
116#define FIX_0_298631336 ((INT32) 2446) /* FIX(0.298631336) */
117#define FIX_0_390180644 ((INT32) 3196) /* FIX(0.390180644) */
118#define FIX_0_541196100 ((INT32) 4433) /* FIX(0.541196100) */
119#define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */
120#define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */
121#define FIX_1_175875602 ((INT32) 9633) /* FIX(1.175875602) */
122#define FIX_1_501321110 ((INT32) 12299) /* FIX(1.501321110) */
123#define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */
124#define FIX_1_961570560 ((INT32) 16069) /* FIX(1.961570560) */
125#define FIX_2_053119869 ((INT32) 16819) /* FIX(2.053119869) */
126#define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */
127#define FIX_3_072711026 ((INT32) 25172) /* FIX(3.072711026) */
128#else
129#define FIX_0_298631336 FIX(0.298631336)
130#define FIX_0_390180644 FIX(0.390180644)
131#define FIX_0_541196100 FIX(0.541196100)
132#define FIX_0_765366865 FIX(0.765366865)
133#define FIX_0_899976223 FIX(0.899976223)
134#define FIX_1_175875602 FIX(1.175875602)
135#define FIX_1_501321110 FIX(1.501321110)
136#define FIX_1_847759065 FIX(1.847759065)
137#define FIX_1_961570560 FIX(1.961570560)
138#define FIX_2_053119869 FIX(2.053119869)
139#define FIX_2_562915447 FIX(2.562915447)
140#define FIX_3_072711026 FIX(3.072711026)
141#endif
142
143
144/* Clamp DC value to acceptable range for bug 697186 */
145#define CLAMP_DC(dcval) \
146 { \
147 if (dcval < -1024) \
148 dcval = -1024; \
149 else if (dcval > 1023) \
150 dcval = 1023; \
151 }
152
153/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
154 * For 8-bit samples with the recommended scaling, all the variable
155 * and constant values involved are no more than 16 bits wide, so a
156 * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
157 * For 12-bit samples, a full 32-bit multiplication will be needed.
158 */
159
160#if BITS_IN_JSAMPLE == 8
161#define MULTIPLY(var,const) MULTIPLY16C16(var,const)
162#else
163#define MULTIPLY(var,const) ((var) * (const))
164#endif
165
166
167/* Dequantize a coefficient by multiplying it by the multiplier-table
168 * entry; produce an int result. In this module, both inputs and result
169 * are 16 bits or less, so either int or short multiply will work.
170 */
171
172#define DEQUANTIZE(coef,quantval) (((ISLOW_MULT_TYPE) (coef)) * (quantval))
173
174
175/*
176 * Perform dequantization and inverse DCT on one block of coefficients.
177 */
178
179GLOBAL(void)
180jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
181 JCOEFPTR coef_block,
182 JSAMPARRAY output_buf, JDIMENSION output_col)
183{
184 INT32 tmp0, tmp1, tmp2, tmp3;
185 INT32 tmp10, tmp11, tmp12, tmp13;
186 INT32 z1, z2, z3;
187 JCOEFPTR inptr;
188 ISLOW_MULT_TYPE * quantptr;
189 int * wsptr;
190 JSAMPROW outptr;
191 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
192 int ctr;
193 int workspace[DCTSIZE2]; /* buffers data between passes */
194 SHIFT_TEMPS
195
196 /* Pass 1: process columns from input, store into work array. */
197 /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
198 /* furthermore, we scale the results by 2**PASS1_BITS. */
199
200 inptr = coef_block;
201 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
202 wsptr = workspace;
203 for (ctr = DCTSIZE; ctr > 0; ctr--) {
204 /* Due to quantization, we will usually find that many of the input
205 * coefficients are zero, especially the AC terms. We can exploit this
206 * by short-circuiting the IDCT calculation for any column in which all
207 * the AC terms are zero. In that case each output is equal to the
208 * DC coefficient (with scale factor as needed).
209 * With typical images and quantization tables, half or more of the
210 * column DCT calculations can be simplified this way.
211 */
212
213 if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
214 inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
215 inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
216 inptr[DCTSIZE*7] == 0) {
217 /* AC terms all zero */
218 int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
219 if (ctr == DCTSIZE)
220 CLAMP_DC(dcval);
221 dcval <<= PASS1_BITS;
222 wsptr[DCTSIZE*0] = dcval;
223 wsptr[DCTSIZE*1] = dcval;
224 wsptr[DCTSIZE*2] = dcval;
225 wsptr[DCTSIZE*3] = dcval;
226 wsptr[DCTSIZE*4] = dcval;
227 wsptr[DCTSIZE*5] = dcval;
228 wsptr[DCTSIZE*6] = dcval;
229 wsptr[DCTSIZE*7] = dcval;
230
231 inptr++; /* advance pointers to next column */
232 quantptr++;
233 wsptr++;
234 continue;
235 }
236
237 /* Even part: reverse the even part of the forward DCT. */
238 /* The rotator is sqrt(2)*c(-6). */
239
240 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
241 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
242
243 z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
244 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);
245 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);
246
247 z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
248 if (ctr == DCTSIZE)
249 CLAMP_DC(z2);
250 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
251 z2 <<= CONST_BITS;
252 z3 <<= CONST_BITS;
253 /* Add fudge factor here for final descale. */
254 z2 += ONE << (CONST_BITS-PASS1_BITS-1);
255
256 tmp0 = z2 + z3;
257 tmp1 = z2 - z3;
258
259 tmp10 = tmp0 + tmp2;
260 tmp13 = tmp0 - tmp2;
261 tmp11 = tmp1 + tmp3;
262 tmp12 = tmp1 - tmp3;
263
264 /* Odd part per figure 8; the matrix is unitary and hence its
265 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
266 */
267
268 tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
269 tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
270 tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
271 tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
272
273 z2 = tmp0 + tmp2;
274 z3 = tmp1 + tmp3;
275
276 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */
277 z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
278 z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
279 z2 += z1;
280 z3 += z1;
281
282 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
283 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
284 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
285 tmp0 += z1 + z2;
286 tmp3 += z1 + z3;
287
288 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
289 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
290 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
291 tmp1 += z1 + z3;
292 tmp2 += z1 + z2;
293
294 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
295
296 wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
297 wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
298 wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
299 wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
300 wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
301 wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
302 wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
303 wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
304
305 inptr++; /* advance pointers to next column */
306 quantptr++;
307 wsptr++;
308 }
309
310 /* Pass 2: process rows from work array, store into output array. */
311 /* Note that we must descale the results by a factor of 8 == 2**3, */
312 /* and also undo the PASS1_BITS scaling. */
313
314 wsptr = workspace;
315 for (ctr = 0; ctr < DCTSIZE; ctr++) {
316 outptr = output_buf[ctr] + output_col;
317 /* Rows of zeroes can be exploited in the same way as we did with columns.
318 * However, the column calculation has created many nonzero AC terms, so
319 * the simplification applies less often (typically 5% to 10% of the time).
320 * On machines with very fast multiplication, it's possible that the
321 * test takes more time than it's worth. In that case this section
322 * may be commented out.
323 */
324
325#ifndef NO_ZERO_ROW_TEST
326 if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
327 wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
328 /* AC terms all zero */
329 JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3)
330 & RANGE_MASK];
331
332 outptr[0] = dcval;
333 outptr[1] = dcval;
334 outptr[2] = dcval;
335 outptr[3] = dcval;
336 outptr[4] = dcval;
337 outptr[5] = dcval;
338 outptr[6] = dcval;
339 outptr[7] = dcval;
340
341 wsptr += DCTSIZE; /* advance pointer to next row */
342 continue;
343 }
344#endif
345
346 /* Even part: reverse the even part of the forward DCT. */
347 /* The rotator is sqrt(2)*c(-6). */
348
349 z2 = (INT32) wsptr[2];
350 z3 = (INT32) wsptr[6];
351
352 z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
353 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);
354 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);
355
356 /* Add fudge factor here for final descale. */
357 z2 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
358 z3 = (INT32) wsptr[4];
359
360 tmp0 = (z2 + z3) << CONST_BITS;
361 tmp1 = (z2 - z3) << CONST_BITS;
362
363 tmp10 = tmp0 + tmp2;
364 tmp13 = tmp0 - tmp2;
365 tmp11 = tmp1 + tmp3;
366 tmp12 = tmp1 - tmp3;
367
368 /* Odd part per figure 8; the matrix is unitary and hence its
369 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
370 */
371
372 tmp0 = (INT32) wsptr[7];
373 tmp1 = (INT32) wsptr[5];
374 tmp2 = (INT32) wsptr[3];
375 tmp3 = (INT32) wsptr[1];
376
377 z2 = tmp0 + tmp2;
378 z3 = tmp1 + tmp3;
379
380 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */
381 z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
382 z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
383 z2 += z1;
384 z3 += z1;
385
386 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
387 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
388 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
389 tmp0 += z1 + z2;
390 tmp3 += z1 + z3;
391
392 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
393 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
394 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
395 tmp1 += z1 + z3;
396 tmp2 += z1 + z2;
397
398 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
399
400 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
401 CONST_BITS+PASS1_BITS+3)
402 & RANGE_MASK];
403 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
404 CONST_BITS+PASS1_BITS+3)
405 & RANGE_MASK];
406 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
407 CONST_BITS+PASS1_BITS+3)
408 & RANGE_MASK];
409 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
410 CONST_BITS+PASS1_BITS+3)
411 & RANGE_MASK];
412 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
413 CONST_BITS+PASS1_BITS+3)
414 & RANGE_MASK];
415 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
416 CONST_BITS+PASS1_BITS+3)
417 & RANGE_MASK];
418 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
419 CONST_BITS+PASS1_BITS+3)
420 & RANGE_MASK];
421 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
422 CONST_BITS+PASS1_BITS+3)
423 & RANGE_MASK];
424
425 wsptr += DCTSIZE; /* advance pointer to next row */
426 }
427}
428
429#ifdef IDCT_SCALING_SUPPORTED
430
431
432/*
433 * Perform dequantization and inverse DCT on one block of coefficients,
434 * producing a 7x7 output block.
435 *
436 * Optimized algorithm with 12 multiplications in the 1-D kernel.
437 * cK represents sqrt(2) * cos(K*pi/14).
438 */
439
440GLOBAL(void)
441jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
442 JCOEFPTR coef_block,
443 JSAMPARRAY output_buf, JDIMENSION output_col)
444{
445 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13;
446 INT32 z1, z2, z3;
447 JCOEFPTR inptr;
448 ISLOW_MULT_TYPE * quantptr;
449 int * wsptr;
450 JSAMPROW outptr;
451 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
452 int ctr;
453 int workspace[7*7]; /* buffers data between passes */
454 SHIFT_TEMPS
455
456 /* Pass 1: process columns from input, store into work array. */
457
458 inptr = coef_block;
459 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
460 wsptr = workspace;
461 for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
462 /* Even part */
463
464 tmp13 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
465 if (ctr == 0)
466 CLAMP_DC(tmp13);
467 tmp13 <<= CONST_BITS;
468 /* Add fudge factor here for final descale. */
469 tmp13 += ONE << (CONST_BITS-PASS1_BITS-1);
470
471 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
472 z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
473 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
474
475 tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
476 tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
477 tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
478 tmp0 = z1 + z3;
479 z2 -= tmp0;
480 tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
481 tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
482 tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
483 tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
484
485 /* Odd part */
486
487 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
488 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
489 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
490
491 tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
492 tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
493 tmp0 = tmp1 - tmp2;
494 tmp1 += tmp2;
495 tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
496 tmp1 += tmp2;
497 z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
498 tmp0 += z2;
499 tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
500
501 /* Final output stage */
502
503 wsptr[7*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
504 wsptr[7*6] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
505 wsptr[7*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
506 wsptr[7*5] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
507 wsptr[7*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
508 wsptr[7*4] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
509 wsptr[7*3] = (int) RIGHT_SHIFT(tmp13, CONST_BITS-PASS1_BITS);
510 }
511
512 /* Pass 2: process 7 rows from work array, store into output array. */
513
514 wsptr = workspace;
515 for (ctr = 0; ctr < 7; ctr++) {
516 outptr = output_buf[ctr] + output_col;
517
518 /* Even part */
519
520 /* Add fudge factor here for final descale. */
521 tmp13 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
522 tmp13 <<= CONST_BITS;
523
524 z1 = (INT32) wsptr[2];
525 z2 = (INT32) wsptr[4];
526 z3 = (INT32) wsptr[6];
527
528 tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
529 tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
530 tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
531 tmp0 = z1 + z3;
532 z2 -= tmp0;
533 tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
534 tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
535 tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
536 tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
537
538 /* Odd part */
539
540 z1 = (INT32) wsptr[1];
541 z2 = (INT32) wsptr[3];
542 z3 = (INT32) wsptr[5];
543
544 tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
545 tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
546 tmp0 = tmp1 - tmp2;
547 tmp1 += tmp2;
548 tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
549 tmp1 += tmp2;
550 z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
551 tmp0 += z2;
552 tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
553
554 /* Final output stage */
555
556 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
557 CONST_BITS+PASS1_BITS+3)
558 & RANGE_MASK];
559 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
560 CONST_BITS+PASS1_BITS+3)
561 & RANGE_MASK];
562 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
563 CONST_BITS+PASS1_BITS+3)
564 & RANGE_MASK];
565 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
566 CONST_BITS+PASS1_BITS+3)
567 & RANGE_MASK];
568 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
569 CONST_BITS+PASS1_BITS+3)
570 & RANGE_MASK];
571 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
572 CONST_BITS+PASS1_BITS+3)
573 & RANGE_MASK];
574 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13,
575 CONST_BITS+PASS1_BITS+3)
576 & RANGE_MASK];
577
578 wsptr += 7; /* advance pointer to next row */
579 }
580}
581
582
583/*
584 * Perform dequantization and inverse DCT on one block of coefficients,
585 * producing a reduced-size 6x6 output block.
586 *
587 * Optimized algorithm with 3 multiplications in the 1-D kernel.
588 * cK represents sqrt(2) * cos(K*pi/12).
589 */
590
591GLOBAL(void)
592jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
593 JCOEFPTR coef_block,
594 JSAMPARRAY output_buf, JDIMENSION output_col)
595{
596 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
597 INT32 z1, z2, z3;
598 JCOEFPTR inptr;
599 ISLOW_MULT_TYPE * quantptr;
600 int * wsptr;
601 JSAMPROW outptr;
602 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
603 int ctr;
604 int workspace[6*6]; /* buffers data between passes */
605 SHIFT_TEMPS
606
607 /* Pass 1: process columns from input, store into work array. */
608
609 inptr = coef_block;
610 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
611 wsptr = workspace;
612 for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
613 /* Even part */
614
615 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
616 if (ctr == 0)
617 CLAMP_DC(tmp0);
618 tmp0 <<= CONST_BITS;
619 /* Add fudge factor here for final descale. */
620 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
621 tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
622 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
623 tmp1 = tmp0 + tmp10;
624 tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
625 tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
626 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
627 tmp10 = tmp1 + tmp0;
628 tmp12 = tmp1 - tmp0;
629
630 /* Odd part */
631
632 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
633 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
634 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
635 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
636 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
637 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
638 tmp1 = (z1 - z2 - z3) << PASS1_BITS;
639
640 /* Final output stage */
641
642 wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
643 wsptr[6*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
644 wsptr[6*1] = (int) (tmp11 + tmp1);
645 wsptr[6*4] = (int) (tmp11 - tmp1);
646 wsptr[6*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
647 wsptr[6*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
648 }
649
650 /* Pass 2: process 6 rows from work array, store into output array. */
651
652 wsptr = workspace;
653 for (ctr = 0; ctr < 6; ctr++) {
654 outptr = output_buf[ctr] + output_col;
655
656 /* Even part */
657
658 /* Add fudge factor here for final descale. */
659 tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
660 tmp0 <<= CONST_BITS;
661 tmp2 = (INT32) wsptr[4];
662 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
663 tmp1 = tmp0 + tmp10;
664 tmp11 = tmp0 - tmp10 - tmp10;
665 tmp10 = (INT32) wsptr[2];
666 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
667 tmp10 = tmp1 + tmp0;
668 tmp12 = tmp1 - tmp0;
669
670 /* Odd part */
671
672 z1 = (INT32) wsptr[1];
673 z2 = (INT32) wsptr[3];
674 z3 = (INT32) wsptr[5];
675 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
676 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
677 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
678 tmp1 = (z1 - z2 - z3) << CONST_BITS;
679
680 /* Final output stage */
681
682 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
683 CONST_BITS+PASS1_BITS+3)
684 & RANGE_MASK];
685 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
686 CONST_BITS+PASS1_BITS+3)
687 & RANGE_MASK];
688 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
689 CONST_BITS+PASS1_BITS+3)
690 & RANGE_MASK];
691 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
692 CONST_BITS+PASS1_BITS+3)
693 & RANGE_MASK];
694 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
695 CONST_BITS+PASS1_BITS+3)
696 & RANGE_MASK];
697 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
698 CONST_BITS+PASS1_BITS+3)
699 & RANGE_MASK];
700
701 wsptr += 6; /* advance pointer to next row */
702 }
703}
704
705
706/*
707 * Perform dequantization and inverse DCT on one block of coefficients,
708 * producing a reduced-size 5x5 output block.
709 *
710 * Optimized algorithm with 5 multiplications in the 1-D kernel.
711 * cK represents sqrt(2) * cos(K*pi/10).
712 */
713
714GLOBAL(void)
715jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
716 JCOEFPTR coef_block,
717 JSAMPARRAY output_buf, JDIMENSION output_col)
718{
719 INT32 tmp0, tmp1, tmp10, tmp11, tmp12;
720 INT32 z1, z2, z3;
721 JCOEFPTR inptr;
722 ISLOW_MULT_TYPE * quantptr;
723 int * wsptr;
724 JSAMPROW outptr;
725 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
726 int ctr;
727 int workspace[5*5]; /* buffers data between passes */
728 SHIFT_TEMPS
729
730 /* Pass 1: process columns from input, store into work array. */
731
732 inptr = coef_block;
733 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
734 wsptr = workspace;
735 for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
736 /* Even part */
737
738 tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
739 if (ctr == 0)
740 CLAMP_DC(tmp12);
741 tmp12 <<= CONST_BITS;
742 /* Add fudge factor here for final descale. */
743 tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
744 tmp0 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
745 tmp1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
746 z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
747 z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
748 z3 = tmp12 + z2;
749 tmp10 = z3 + z1;
750 tmp11 = z3 - z1;
751 tmp12 -= z2 << 2;
752
753 /* Odd part */
754
755 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
756 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
757
758 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
759 tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
760 tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
761
762 /* Final output stage */
763
764 wsptr[5*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
765 wsptr[5*4] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
766 wsptr[5*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
767 wsptr[5*3] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
768 wsptr[5*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
769 }
770
771 /* Pass 2: process 5 rows from work array, store into output array. */
772
773 wsptr = workspace;
774 for (ctr = 0; ctr < 5; ctr++) {
775 outptr = output_buf[ctr] + output_col;
776
777 /* Even part */
778
779 /* Add fudge factor here for final descale. */
780 tmp12 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
781 tmp12 <<= CONST_BITS;
782 tmp0 = (INT32) wsptr[2];
783 tmp1 = (INT32) wsptr[4];
784 z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
785 z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
786 z3 = tmp12 + z2;
787 tmp10 = z3 + z1;
788 tmp11 = z3 - z1;
789 tmp12 -= z2 << 2;
790
791 /* Odd part */
792
793 z2 = (INT32) wsptr[1];
794 z3 = (INT32) wsptr[3];
795
796 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
797 tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
798 tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
799
800 /* Final output stage */
801
802 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
803 CONST_BITS+PASS1_BITS+3)
804 & RANGE_MASK];
805 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
806 CONST_BITS+PASS1_BITS+3)
807 & RANGE_MASK];
808 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
809 CONST_BITS+PASS1_BITS+3)
810 & RANGE_MASK];
811 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
812 CONST_BITS+PASS1_BITS+3)
813 & RANGE_MASK];
814 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
815 CONST_BITS+PASS1_BITS+3)
816 & RANGE_MASK];
817
818 wsptr += 5; /* advance pointer to next row */
819 }
820}
821
822
823/*
824 * Perform dequantization and inverse DCT on one block of coefficients,
825 * producing a reduced-size 4x4 output block.
826 *
827 * Optimized algorithm with 3 multiplications in the 1-D kernel.
828 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
829 */
830
831GLOBAL(void)
832jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
833 JCOEFPTR coef_block,
834 JSAMPARRAY output_buf, JDIMENSION output_col)
835{
836 INT32 tmp0, tmp2, tmp10, tmp12;
837 INT32 z1, z2, z3;
838 JCOEFPTR inptr;
839 ISLOW_MULT_TYPE * quantptr;
840 int * wsptr;
841 JSAMPROW outptr;
842 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
843 int ctr;
844 int workspace[4*4]; /* buffers data between passes */
845 SHIFT_TEMPS
846
847 /* Pass 1: process columns from input, store into work array. */
848
849 inptr = coef_block;
850 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
851 wsptr = workspace;
852 for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
853 /* Even part */
854
855 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
856 if (ctr == 0)
857 CLAMP_DC(tmp0);
858 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
859
860 tmp10 = (tmp0 + tmp2) << PASS1_BITS;
861 tmp12 = (tmp0 - tmp2) << PASS1_BITS;
862
863 /* Odd part */
864 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
865
866 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
867 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
868
869 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
870 /* Add fudge factor here for final descale. */
871 z1 += ONE << (CONST_BITS-PASS1_BITS-1);
872 tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
873 CONST_BITS-PASS1_BITS);
874 tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
875 CONST_BITS-PASS1_BITS);
876
877 /* Final output stage */
878
879 wsptr[4*0] = (int) (tmp10 + tmp0);
880 wsptr[4*3] = (int) (tmp10 - tmp0);
881 wsptr[4*1] = (int) (tmp12 + tmp2);
882 wsptr[4*2] = (int) (tmp12 - tmp2);
883 }
884
885 /* Pass 2: process 4 rows from work array, store into output array. */
886
887 wsptr = workspace;
888 for (ctr = 0; ctr < 4; ctr++) {
889 outptr = output_buf[ctr] + output_col;
890
891 /* Even part */
892
893 /* Add fudge factor here for final descale. */
894 tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
895 tmp2 = (INT32) wsptr[2];
896
897 tmp10 = (tmp0 + tmp2) << CONST_BITS;
898 tmp12 = (tmp0 - tmp2) << CONST_BITS;
899
900 /* Odd part */
901 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
902
903 z2 = (INT32) wsptr[1];
904 z3 = (INT32) wsptr[3];
905
906 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
907 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
908 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
909
910 /* Final output stage */
911
912 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
913 CONST_BITS+PASS1_BITS+3)
914 & RANGE_MASK];
915 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
916 CONST_BITS+PASS1_BITS+3)
917 & RANGE_MASK];
918 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
919 CONST_BITS+PASS1_BITS+3)
920 & RANGE_MASK];
921 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
922 CONST_BITS+PASS1_BITS+3)
923 & RANGE_MASK];
924
925 wsptr += 4; /* advance pointer to next row */
926 }
927}
928
929
930/*
931 * Perform dequantization and inverse DCT on one block of coefficients,
932 * producing a reduced-size 3x3 output block.
933 *
934 * Optimized algorithm with 2 multiplications in the 1-D kernel.
935 * cK represents sqrt(2) * cos(K*pi/6).
936 */
937
938GLOBAL(void)
939jpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
940 JCOEFPTR coef_block,
941 JSAMPARRAY output_buf, JDIMENSION output_col)
942{
943 INT32 tmp0, tmp2, tmp10, tmp12;
944 JCOEFPTR inptr;
945 ISLOW_MULT_TYPE * quantptr;
946 int * wsptr;
947 JSAMPROW outptr;
948 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
949 int ctr;
950 int workspace[3*3]; /* buffers data between passes */
951 SHIFT_TEMPS
952
953 /* Pass 1: process columns from input, store into work array. */
954
955 inptr = coef_block;
956 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
957 wsptr = workspace;
958 for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
959 /* Even part */
960
961 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
962 if (ctr == 0)
963 CLAMP_DC(tmp0);
964 tmp0 <<= CONST_BITS;
965 /* Add fudge factor here for final descale. */
966 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
967 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
968 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
969 tmp10 = tmp0 + tmp12;
970 tmp2 = tmp0 - tmp12 - tmp12;
971
972 /* Odd part */
973
974 tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
975 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
976
977 /* Final output stage */
978
979 wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
980 wsptr[3*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
981 wsptr[3*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
982 }
983
984 /* Pass 2: process 3 rows from work array, store into output array. */
985
986 wsptr = workspace;
987 for (ctr = 0; ctr < 3; ctr++) {
988 outptr = output_buf[ctr] + output_col;
989
990 /* Even part */
991
992 /* Add fudge factor here for final descale. */
993 tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
994 tmp0 <<= CONST_BITS;
995 tmp2 = (INT32) wsptr[2];
996 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
997 tmp10 = tmp0 + tmp12;
998 tmp2 = tmp0 - tmp12 - tmp12;
999
1000 /* Odd part */
1001
1002 tmp12 = (INT32) wsptr[1];
1003 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
1004
1005 /* Final output stage */
1006
1007 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
1008 CONST_BITS+PASS1_BITS+3)
1009 & RANGE_MASK];
1010 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
1011 CONST_BITS+PASS1_BITS+3)
1012 & RANGE_MASK];
1013 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
1014 CONST_BITS+PASS1_BITS+3)
1015 & RANGE_MASK];
1016
1017 wsptr += 3; /* advance pointer to next row */
1018 }
1019}
1020
1021
1022/*
1023 * Perform dequantization and inverse DCT on one block of coefficients,
1024 * producing a reduced-size 2x2 output block.
1025 *
1026 * Multiplication-less algorithm.
1027 */
1028
1029GLOBAL(void)
1030jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1031 JCOEFPTR coef_block,
1032 JSAMPARRAY output_buf, JDIMENSION output_col)
1033{
1034 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
1035 ISLOW_MULT_TYPE * quantptr;
1036 JSAMPROW outptr;
1037 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1038 SHIFT_TEMPS
1039
1040 /* Pass 1: process columns from input. */
1041
1042 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1043
1044 /* Column 0 */
1045 tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
1046 CLAMP_DC(tmp4);
1047 tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
1048 /* Add fudge factor here for final descale. */
1049 tmp4 += ONE << 2;
1050
1051 tmp0 = tmp4 + tmp5;
1052 tmp2 = tmp4 - tmp5;
1053
1054 /* Column 1 */
1055 tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0+1], quantptr[DCTSIZE*0+1]);
1056 tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1+1], quantptr[DCTSIZE*1+1]);
1057
1058 tmp1 = tmp4 + tmp5;
1059 tmp3 = tmp4 - tmp5;
1060
1061 /* Pass 2: process 2 rows, store into output array. */
1062
1063 /* Row 0 */
1064 outptr = output_buf[0] + output_col;
1065
1066 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
1067 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
1068
1069 /* Row 1 */
1070 outptr = output_buf[1] + output_col;
1071
1072 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp2 + tmp3, 3) & RANGE_MASK];
1073 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2 - tmp3, 3) & RANGE_MASK];
1074}
1075
1076
1077/*
1078 * Perform dequantization and inverse DCT on one block of coefficients,
1079 * producing a reduced-size 1x1 output block.
1080 *
1081 * We hardly need an inverse DCT routine for this: just take the
1082 * average pixel value, which is one-eighth of the DC coefficient.
1083 */
1084
1085GLOBAL(void)
1086jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1087 JCOEFPTR coef_block,
1088 JSAMPARRAY output_buf, JDIMENSION output_col)
1089{
1090 int dcval;
1091 ISLOW_MULT_TYPE * quantptr;
1092 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1093 SHIFT_TEMPS
1094
1095 /* 1x1 is trivial: just take the DC coefficient divided by 8. */
1096 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1097 dcval = DEQUANTIZE(coef_block[0], quantptr[0]);
1098 CLAMP_DC(dcval);
1099 dcval = (int) DESCALE((INT32) dcval, 3);
1100
1101 output_buf[0][output_col] = range_limit[dcval & RANGE_MASK];
1102}
1103
1104
1105/*
1106 * Perform dequantization and inverse DCT on one block of coefficients,
1107 * producing a 9x9 output block.
1108 *
1109 * Optimized algorithm with 10 multiplications in the 1-D kernel.
1110 * cK represents sqrt(2) * cos(K*pi/18).
1111 */
1112
1113GLOBAL(void)
1114jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1115 JCOEFPTR coef_block,
1116 JSAMPARRAY output_buf, JDIMENSION output_col)
1117{
1118 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14;
1119 INT32 z1, z2, z3, z4;
1120 JCOEFPTR inptr;
1121 ISLOW_MULT_TYPE * quantptr;
1122 int * wsptr;
1123 JSAMPROW outptr;
1124 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1125 int ctr;
1126 int workspace[8*9]; /* buffers data between passes */
1127 SHIFT_TEMPS
1128
1129 /* Pass 1: process columns from input, store into work array. */
1130
1131 inptr = coef_block;
1132 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1133 wsptr = workspace;
1134 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1135 /* Even part */
1136
1137 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1138 if (ctr == 0)
1139 CLAMP_DC(tmp0);
1140 tmp0 <<= CONST_BITS;
1141 /* Add fudge factor here for final descale. */
1142 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
1143
1144 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1145 z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1146 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1147
1148 tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */
1149 tmp1 = tmp0 + tmp3;
1150 tmp2 = tmp0 - tmp3 - tmp3;
1151
1152 tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
1153 tmp11 = tmp2 + tmp0;
1154 tmp14 = tmp2 - tmp0 - tmp0;
1155
1156 tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
1157 tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */
1158 tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */
1159
1160 tmp10 = tmp1 + tmp0 - tmp3;
1161 tmp12 = tmp1 - tmp0 + tmp2;
1162 tmp13 = tmp1 - tmp2 + tmp3;
1163
1164 /* Odd part */
1165
1166 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1167 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1168 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1169 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1170
1171 z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */
1172
1173 tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */
1174 tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */
1175 tmp0 = tmp2 + tmp3 - z2;
1176 tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */
1177 tmp2 += z2 - tmp1;
1178 tmp3 += z2 + tmp1;
1179 tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
1180
1181 /* Final output stage */
1182
1183 wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
1184 wsptr[8*8] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
1185 wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
1186 wsptr[8*7] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
1187 wsptr[8*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
1188 wsptr[8*6] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
1189 wsptr[8*3] = (int) RIGHT_SHIFT(tmp13 + tmp3, CONST_BITS-PASS1_BITS);
1190 wsptr[8*5] = (int) RIGHT_SHIFT(tmp13 - tmp3, CONST_BITS-PASS1_BITS);
1191 wsptr[8*4] = (int) RIGHT_SHIFT(tmp14, CONST_BITS-PASS1_BITS);
1192 }
1193
1194 /* Pass 2: process 9 rows from work array, store into output array. */
1195
1196 wsptr = workspace;
1197 for (ctr = 0; ctr < 9; ctr++) {
1198 outptr = output_buf[ctr] + output_col;
1199
1200 /* Even part */
1201
1202 /* Add fudge factor here for final descale. */
1203 tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
1204 tmp0 <<= CONST_BITS;
1205
1206 z1 = (INT32) wsptr[2];
1207 z2 = (INT32) wsptr[4];
1208 z3 = (INT32) wsptr[6];
1209
1210 tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */
1211 tmp1 = tmp0 + tmp3;
1212 tmp2 = tmp0 - tmp3 - tmp3;
1213
1214 tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
1215 tmp11 = tmp2 + tmp0;
1216 tmp14 = tmp2 - tmp0 - tmp0;
1217
1218 tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
1219 tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */
1220 tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */
1221
1222 tmp10 = tmp1 + tmp0 - tmp3;
1223 tmp12 = tmp1 - tmp0 + tmp2;
1224 tmp13 = tmp1 - tmp2 + tmp3;
1225
1226 /* Odd part */
1227
1228 z1 = (INT32) wsptr[1];
1229 z2 = (INT32) wsptr[3];
1230 z3 = (INT32) wsptr[5];
1231 z4 = (INT32) wsptr[7];
1232
1233 z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */
1234
1235 tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */
1236 tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */
1237 tmp0 = tmp2 + tmp3 - z2;
1238 tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */
1239 tmp2 += z2 - tmp1;
1240 tmp3 += z2 + tmp1;
1241 tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
1242
1243 /* Final output stage */
1244
1245 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
1246 CONST_BITS+PASS1_BITS+3)
1247 & RANGE_MASK];
1248 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
1249 CONST_BITS+PASS1_BITS+3)
1250 & RANGE_MASK];
1251 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
1252 CONST_BITS+PASS1_BITS+3)
1253 & RANGE_MASK];
1254 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
1255 CONST_BITS+PASS1_BITS+3)
1256 & RANGE_MASK];
1257 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
1258 CONST_BITS+PASS1_BITS+3)
1259 & RANGE_MASK];
1260 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
1261 CONST_BITS+PASS1_BITS+3)
1262 & RANGE_MASK];
1263 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp3,
1264 CONST_BITS+PASS1_BITS+3)
1265 & RANGE_MASK];
1266 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp3,
1267 CONST_BITS+PASS1_BITS+3)
1268 & RANGE_MASK];
1269 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp14,
1270 CONST_BITS+PASS1_BITS+3)
1271 & RANGE_MASK];
1272
1273 wsptr += 8; /* advance pointer to next row */
1274 }
1275}
1276
1277
1278/*
1279 * Perform dequantization and inverse DCT on one block of coefficients,
1280 * producing a 10x10 output block.
1281 *
1282 * Optimized algorithm with 12 multiplications in the 1-D kernel.
1283 * cK represents sqrt(2) * cos(K*pi/20).
1284 */
1285
1286GLOBAL(void)
1287jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1288 JCOEFPTR coef_block,
1289 JSAMPARRAY output_buf, JDIMENSION output_col)
1290{
1291 INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
1292 INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
1293 INT32 z1, z2, z3, z4, z5;
1294 JCOEFPTR inptr;
1295 ISLOW_MULT_TYPE * quantptr;
1296 int * wsptr;
1297 JSAMPROW outptr;
1298 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1299 int ctr;
1300 int workspace[8*10]; /* buffers data between passes */
1301 SHIFT_TEMPS
1302
1303 /* Pass 1: process columns from input, store into work array. */
1304
1305 inptr = coef_block;
1306 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1307 wsptr = workspace;
1308 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1309 /* Even part */
1310
1311 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1312 if (ctr == 0)
1313 CLAMP_DC(z3);
1314 z3 <<= CONST_BITS;
1315 /* Add fudge factor here for final descale. */
1316 z3 += ONE << (CONST_BITS-PASS1_BITS-1);
1317 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1318 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
1319 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
1320 tmp10 = z3 + z1;
1321 tmp11 = z3 - z2;
1322
1323 tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1), /* c0 = (c4-c8)*2 */
1324 CONST_BITS-PASS1_BITS);
1325
1326 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1327 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1328
1329 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
1330 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
1331 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
1332
1333 tmp20 = tmp10 + tmp12;
1334 tmp24 = tmp10 - tmp12;
1335 tmp21 = tmp11 + tmp13;
1336 tmp23 = tmp11 - tmp13;
1337
1338 /* Odd part */
1339
1340 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1341 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1342 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1343 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1344
1345 tmp11 = z2 + z4;
1346 tmp13 = z2 - z4;
1347
1348 tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
1349 z5 = z3 << CONST_BITS;
1350
1351 z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
1352 z4 = z5 + tmp12;
1353
1354 tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
1355 tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
1356
1357 z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
1358 z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
1359
1360 tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
1361
1362 tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
1363 tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
1364
1365 /* Final output stage */
1366
1367 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1368 wsptr[8*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1369 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1370 wsptr[8*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1371 wsptr[8*2] = (int) (tmp22 + tmp12);
1372 wsptr[8*7] = (int) (tmp22 - tmp12);
1373 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1374 wsptr[8*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1375 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1376 wsptr[8*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1377 }
1378
1379 /* Pass 2: process 10 rows from work array, store into output array. */
1380
1381 wsptr = workspace;
1382 for (ctr = 0; ctr < 10; ctr++) {
1383 outptr = output_buf[ctr] + output_col;
1384
1385 /* Even part */
1386
1387 /* Add fudge factor here for final descale. */
1388 z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
1389 z3 <<= CONST_BITS;
1390 z4 = (INT32) wsptr[4];
1391 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
1392 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
1393 tmp10 = z3 + z1;
1394 tmp11 = z3 - z2;
1395
1396 tmp22 = z3 - ((z1 - z2) << 1); /* c0 = (c4-c8)*2 */
1397
1398 z2 = (INT32) wsptr[2];
1399 z3 = (INT32) wsptr[6];
1400
1401 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
1402 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
1403 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
1404
1405 tmp20 = tmp10 + tmp12;
1406 tmp24 = tmp10 - tmp12;
1407 tmp21 = tmp11 + tmp13;
1408 tmp23 = tmp11 - tmp13;
1409
1410 /* Odd part */
1411
1412 z1 = (INT32) wsptr[1];
1413 z2 = (INT32) wsptr[3];
1414 z3 = (INT32) wsptr[5];
1415 z3 <<= CONST_BITS;
1416 z4 = (INT32) wsptr[7];
1417
1418 tmp11 = z2 + z4;
1419 tmp13 = z2 - z4;
1420
1421 tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
1422
1423 z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
1424 z4 = z3 + tmp12;
1425
1426 tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
1427 tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
1428
1429 z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
1430 z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
1431
1432 tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
1433
1434 tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
1435 tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
1436
1437 /* Final output stage */
1438
1439 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
1440 CONST_BITS+PASS1_BITS+3)
1441 & RANGE_MASK];
1442 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
1443 CONST_BITS+PASS1_BITS+3)
1444 & RANGE_MASK];
1445 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
1446 CONST_BITS+PASS1_BITS+3)
1447 & RANGE_MASK];
1448 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
1449 CONST_BITS+PASS1_BITS+3)
1450 & RANGE_MASK];
1451 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
1452 CONST_BITS+PASS1_BITS+3)
1453 & RANGE_MASK];
1454 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
1455 CONST_BITS+PASS1_BITS+3)
1456 & RANGE_MASK];
1457 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
1458 CONST_BITS+PASS1_BITS+3)
1459 & RANGE_MASK];
1460 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
1461 CONST_BITS+PASS1_BITS+3)
1462 & RANGE_MASK];
1463 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
1464 CONST_BITS+PASS1_BITS+3)
1465 & RANGE_MASK];
1466 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
1467 CONST_BITS+PASS1_BITS+3)
1468 & RANGE_MASK];
1469
1470 wsptr += 8; /* advance pointer to next row */
1471 }
1472}
1473
1474
1475/*
1476 * Perform dequantization and inverse DCT on one block of coefficients,
1477 * producing a 11x11 output block.
1478 *
1479 * Optimized algorithm with 24 multiplications in the 1-D kernel.
1480 * cK represents sqrt(2) * cos(K*pi/22).
1481 */
1482
1483GLOBAL(void)
1484jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1485 JCOEFPTR coef_block,
1486 JSAMPARRAY output_buf, JDIMENSION output_col)
1487{
1488 INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
1489 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
1490 INT32 z1, z2, z3, z4;
1491 JCOEFPTR inptr;
1492 ISLOW_MULT_TYPE * quantptr;
1493 int * wsptr;
1494 JSAMPROW outptr;
1495 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1496 int ctr;
1497 int workspace[8*11]; /* buffers data between passes */
1498 SHIFT_TEMPS
1499
1500 /* Pass 1: process columns from input, store into work array. */
1501
1502 inptr = coef_block;
1503 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1504 wsptr = workspace;
1505 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1506 /* Even part */
1507
1508 tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1509 if (ctr == 0)
1510 CLAMP_DC(tmp10);
1511 tmp10 <<= CONST_BITS;
1512 /* Add fudge factor here for final descale. */
1513 tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
1514
1515 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1516 z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1517 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1518
1519 tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */
1520 tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */
1521 z4 = z1 + z3;
1522 tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */
1523 z4 -= z2;
1524 tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */
1525 tmp21 = tmp20 + tmp23 + tmp25 -
1526 MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */
1527 tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
1528 tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
1529 tmp24 += tmp25;
1530 tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */
1531 tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */
1532 MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */
1533 tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */
1534
1535 /* Odd part */
1536
1537 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1538 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1539 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1540 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1541
1542 tmp11 = z1 + z2;
1543 tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
1544 tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */
1545 tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */
1546 tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
1547 tmp10 = tmp11 + tmp12 + tmp13 -
1548 MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */
1549 z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
1550 tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */
1551 tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */
1552 z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */
1553 tmp11 += z1;
1554 tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */
1555 tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */
1556 MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */
1557 MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */
1558
1559 /* Final output stage */
1560
1561 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1562 wsptr[8*10] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1563 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1564 wsptr[8*9] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1565 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1566 wsptr[8*8] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1567 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1568 wsptr[8*7] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1569 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1570 wsptr[8*6] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1571 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25, CONST_BITS-PASS1_BITS);
1572 }
1573
1574 /* Pass 2: process 11 rows from work array, store into output array. */
1575
1576 wsptr = workspace;
1577 for (ctr = 0; ctr < 11; ctr++) {
1578 outptr = output_buf[ctr] + output_col;
1579
1580 /* Even part */
1581
1582 /* Add fudge factor here for final descale. */
1583 tmp10 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
1584 tmp10 <<= CONST_BITS;
1585
1586 z1 = (INT32) wsptr[2];
1587 z2 = (INT32) wsptr[4];
1588 z3 = (INT32) wsptr[6];
1589
1590 tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */
1591 tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */
1592 z4 = z1 + z3;
1593 tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */
1594 z4 -= z2;
1595 tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */
1596 tmp21 = tmp20 + tmp23 + tmp25 -
1597 MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */
1598 tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
1599 tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
1600 tmp24 += tmp25;
1601 tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */
1602 tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */
1603 MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */
1604 tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */
1605
1606 /* Odd part */
1607
1608 z1 = (INT32) wsptr[1];
1609 z2 = (INT32) wsptr[3];
1610 z3 = (INT32) wsptr[5];
1611 z4 = (INT32) wsptr[7];
1612
1613 tmp11 = z1 + z2;
1614 tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
1615 tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */
1616 tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */
1617 tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
1618 tmp10 = tmp11 + tmp12 + tmp13 -
1619 MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */
1620 z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
1621 tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */
1622 tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */
1623 z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */
1624 tmp11 += z1;
1625 tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */
1626 tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */
1627 MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */
1628 MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */
1629
1630 /* Final output stage */
1631
1632 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
1633 CONST_BITS+PASS1_BITS+3)
1634 & RANGE_MASK];
1635 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
1636 CONST_BITS+PASS1_BITS+3)
1637 & RANGE_MASK];
1638 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
1639 CONST_BITS+PASS1_BITS+3)
1640 & RANGE_MASK];
1641 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
1642 CONST_BITS+PASS1_BITS+3)
1643 & RANGE_MASK];
1644 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
1645 CONST_BITS+PASS1_BITS+3)
1646 & RANGE_MASK];
1647 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
1648 CONST_BITS+PASS1_BITS+3)
1649 & RANGE_MASK];
1650 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
1651 CONST_BITS+PASS1_BITS+3)
1652 & RANGE_MASK];
1653 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
1654 CONST_BITS+PASS1_BITS+3)
1655 & RANGE_MASK];
1656 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
1657 CONST_BITS+PASS1_BITS+3)
1658 & RANGE_MASK];
1659 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
1660 CONST_BITS+PASS1_BITS+3)
1661 & RANGE_MASK];
1662 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25,
1663 CONST_BITS+PASS1_BITS+3)
1664 & RANGE_MASK];
1665
1666 wsptr += 8; /* advance pointer to next row */
1667 }
1668}
1669
1670
1671/*
1672 * Perform dequantization and inverse DCT on one block of coefficients,
1673 * producing a 12x12 output block.
1674 *
1675 * Optimized algorithm with 15 multiplications in the 1-D kernel.
1676 * cK represents sqrt(2) * cos(K*pi/24).
1677 */
1678
1679GLOBAL(void)
1680jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1681 JCOEFPTR coef_block,
1682 JSAMPARRAY output_buf, JDIMENSION output_col)
1683{
1684 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
1685 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
1686 INT32 z1, z2, z3, z4;
1687 JCOEFPTR inptr;
1688 ISLOW_MULT_TYPE * quantptr;
1689 int * wsptr;
1690 JSAMPROW outptr;
1691 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1692 int ctr;
1693 int workspace[8*12]; /* buffers data between passes */
1694 SHIFT_TEMPS
1695
1696 /* Pass 1: process columns from input, store into work array. */
1697
1698 inptr = coef_block;
1699 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1700 wsptr = workspace;
1701 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1702 /* Even part */
1703
1704 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1705 if (ctr == 0)
1706 CLAMP_DC(z3);
1707 z3 <<= CONST_BITS;
1708 /* Add fudge factor here for final descale. */
1709 z3 += ONE << (CONST_BITS-PASS1_BITS-1);
1710
1711 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1712 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
1713
1714 tmp10 = z3 + z4;
1715 tmp11 = z3 - z4;
1716
1717 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1718 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
1719 z1 <<= CONST_BITS;
1720 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1721 z2 <<= CONST_BITS;
1722
1723 tmp12 = z1 - z2;
1724
1725 tmp21 = z3 + tmp12;
1726 tmp24 = z3 - tmp12;
1727
1728 tmp12 = z4 + z2;
1729
1730 tmp20 = tmp10 + tmp12;
1731 tmp25 = tmp10 - tmp12;
1732
1733 tmp12 = z4 - z1 - z2;
1734
1735 tmp22 = tmp11 + tmp12;
1736 tmp23 = tmp11 - tmp12;
1737
1738 /* Odd part */
1739
1740 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1741 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1742 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1743 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1744
1745 tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
1746 tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
1747
1748 tmp10 = z1 + z3;
1749 tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
1750 tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
1751 tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
1752 tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
1753 tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
1754 tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
1755 tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
1756 MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
1757
1758 z1 -= z4;
1759 z2 -= z3;
1760 z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
1761 tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
1762 tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
1763
1764 /* Final output stage */
1765
1766 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1767 wsptr[8*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1768 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1769 wsptr[8*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1770 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1771 wsptr[8*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1772 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1773 wsptr[8*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1774 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1775 wsptr[8*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1776 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
1777 wsptr[8*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
1778 }
1779
1780 /* Pass 2: process 12 rows from work array, store into output array. */
1781
1782 wsptr = workspace;
1783 for (ctr = 0; ctr < 12; ctr++) {
1784 outptr = output_buf[ctr] + output_col;
1785
1786 /* Even part */
1787
1788 /* Add fudge factor here for final descale. */
1789 z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
1790 z3 <<= CONST_BITS;
1791
1792 z4 = (INT32) wsptr[4];
1793 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
1794
1795 tmp10 = z3 + z4;
1796 tmp11 = z3 - z4;
1797
1798 z1 = (INT32) wsptr[2];
1799 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
1800 z1 <<= CONST_BITS;
1801 z2 = (INT32) wsptr[6];
1802 z2 <<= CONST_BITS;
1803
1804 tmp12 = z1 - z2;
1805
1806 tmp21 = z3 + tmp12;
1807 tmp24 = z3 - tmp12;
1808
1809 tmp12 = z4 + z2;
1810
1811 tmp20 = tmp10 + tmp12;
1812 tmp25 = tmp10 - tmp12;
1813
1814 tmp12 = z4 - z1 - z2;
1815
1816 tmp22 = tmp11 + tmp12;
1817 tmp23 = tmp11 - tmp12;
1818
1819 /* Odd part */
1820
1821 z1 = (INT32) wsptr[1];
1822 z2 = (INT32) wsptr[3];
1823 z3 = (INT32) wsptr[5];
1824 z4 = (INT32) wsptr[7];
1825
1826 tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
1827 tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
1828
1829 tmp10 = z1 + z3;
1830 tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
1831 tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
1832 tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
1833 tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
1834 tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
1835 tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
1836 tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
1837 MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
1838
1839 z1 -= z4;
1840 z2 -= z3;
1841 z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
1842 tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
1843 tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
1844
1845 /* Final output stage */
1846
1847 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
1848 CONST_BITS+PASS1_BITS+3)
1849 & RANGE_MASK];
1850 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
1851 CONST_BITS+PASS1_BITS+3)
1852 & RANGE_MASK];
1853 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
1854 CONST_BITS+PASS1_BITS+3)
1855 & RANGE_MASK];
1856 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
1857 CONST_BITS+PASS1_BITS+3)
1858 & RANGE_MASK];
1859 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
1860 CONST_BITS+PASS1_BITS+3)
1861 & RANGE_MASK];
1862 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
1863 CONST_BITS+PASS1_BITS+3)
1864 & RANGE_MASK];
1865 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
1866 CONST_BITS+PASS1_BITS+3)
1867 & RANGE_MASK];
1868 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
1869 CONST_BITS+PASS1_BITS+3)
1870 & RANGE_MASK];
1871 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
1872 CONST_BITS+PASS1_BITS+3)
1873 & RANGE_MASK];
1874 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
1875 CONST_BITS+PASS1_BITS+3)
1876 & RANGE_MASK];
1877 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
1878 CONST_BITS+PASS1_BITS+3)
1879 & RANGE_MASK];
1880 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
1881 CONST_BITS+PASS1_BITS+3)
1882 & RANGE_MASK];
1883
1884 wsptr += 8; /* advance pointer to next row */
1885 }
1886}
1887
1888
1889/*
1890 * Perform dequantization and inverse DCT on one block of coefficients,
1891 * producing a 13x13 output block.
1892 *
1893 * Optimized algorithm with 29 multiplications in the 1-D kernel.
1894 * cK represents sqrt(2) * cos(K*pi/26).
1895 */
1896
1897GLOBAL(void)
1898jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1899 JCOEFPTR coef_block,
1900 JSAMPARRAY output_buf, JDIMENSION output_col)
1901{
1902 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
1903 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
1904 INT32 z1, z2, z3, z4;
1905 JCOEFPTR inptr;
1906 ISLOW_MULT_TYPE * quantptr;
1907 int * wsptr;
1908 JSAMPROW outptr;
1909 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1910 int ctr;
1911 int workspace[8*13]; /* buffers data between passes */
1912 SHIFT_TEMPS
1913
1914 /* Pass 1: process columns from input, store into work array. */
1915
1916 inptr = coef_block;
1917 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1918 wsptr = workspace;
1919 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1920 /* Even part */
1921
1922 z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1923 if (ctr == 0)
1924 CLAMP_DC(z1);
1925 z1 <<= CONST_BITS;
1926 /* Add fudge factor here for final descale. */
1927 z1 += ONE << (CONST_BITS-PASS1_BITS-1);
1928
1929 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1930 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1931 z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1932
1933 tmp10 = z3 + z4;
1934 tmp11 = z3 - z4;
1935
1936 tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */
1937 tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */
1938
1939 tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */
1940 tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */
1941
1942 tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */
1943 tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */
1944
1945 tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */
1946 tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
1947
1948 tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */
1949 tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */
1950
1951 tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
1952 tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
1953
1954 tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */
1955
1956 /* Odd part */
1957
1958 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1959 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1960 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1961 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1962
1963 tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */
1964 tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */
1965 tmp15 = z1 + z4;
1966 tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */
1967 tmp10 = tmp11 + tmp12 + tmp13 -
1968 MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */
1969 tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */
1970 tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
1971 tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
1972 tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */
1973 tmp11 += tmp14;
1974 tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
1975 tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */
1976 tmp12 += tmp14;
1977 tmp13 += tmp14;
1978 tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */
1979 tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
1980 MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */
1981 z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */
1982 tmp14 += z1;
1983 tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */
1984 MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */
1985
1986 /* Final output stage */
1987
1988 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1989 wsptr[8*12] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1990 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1991 wsptr[8*11] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1992 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1993 wsptr[8*10] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1994 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1995 wsptr[8*9] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1996 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1997 wsptr[8*8] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1998 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
1999 wsptr[8*7] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
2000 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26, CONST_BITS-PASS1_BITS);
2001 }
2002
2003 /* Pass 2: process 13 rows from work array, store into output array. */
2004
2005 wsptr = workspace;
2006 for (ctr = 0; ctr < 13; ctr++) {
2007 outptr = output_buf[ctr] + output_col;
2008
2009 /* Even part */
2010
2011 /* Add fudge factor here for final descale. */
2012 z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
2013 z1 <<= CONST_BITS;
2014
2015 z2 = (INT32) wsptr[2];
2016 z3 = (INT32) wsptr[4];
2017 z4 = (INT32) wsptr[6];
2018
2019 tmp10 = z3 + z4;
2020 tmp11 = z3 - z4;
2021
2022 tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */
2023 tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */
2024
2025 tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */
2026 tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */
2027
2028 tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */
2029 tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */
2030
2031 tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */
2032 tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
2033
2034 tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */
2035 tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */
2036
2037 tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
2038 tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
2039
2040 tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */
2041
2042 /* Odd part */
2043
2044 z1 = (INT32) wsptr[1];
2045 z2 = (INT32) wsptr[3];
2046 z3 = (INT32) wsptr[5];
2047 z4 = (INT32) wsptr[7];
2048
2049 tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */
2050 tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */
2051 tmp15 = z1 + z4;
2052 tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */
2053 tmp10 = tmp11 + tmp12 + tmp13 -
2054 MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */
2055 tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */
2056 tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
2057 tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
2058 tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */
2059 tmp11 += tmp14;
2060 tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
2061 tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */
2062 tmp12 += tmp14;
2063 tmp13 += tmp14;
2064 tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */
2065 tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
2066 MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */
2067 z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */
2068 tmp14 += z1;
2069 tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */
2070 MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */
2071
2072 /* Final output stage */
2073
2074 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
2075 CONST_BITS+PASS1_BITS+3)
2076 & RANGE_MASK];
2077 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
2078 CONST_BITS+PASS1_BITS+3)
2079 & RANGE_MASK];
2080 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
2081 CONST_BITS+PASS1_BITS+3)
2082 & RANGE_MASK];
2083 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
2084 CONST_BITS+PASS1_BITS+3)
2085 & RANGE_MASK];
2086 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
2087 CONST_BITS+PASS1_BITS+3)
2088 & RANGE_MASK];
2089 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
2090 CONST_BITS+PASS1_BITS+3)
2091 & RANGE_MASK];
2092 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
2093 CONST_BITS+PASS1_BITS+3)
2094 & RANGE_MASK];
2095 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
2096 CONST_BITS+PASS1_BITS+3)
2097 & RANGE_MASK];
2098 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
2099 CONST_BITS+PASS1_BITS+3)
2100 & RANGE_MASK];
2101 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
2102 CONST_BITS+PASS1_BITS+3)
2103 & RANGE_MASK];
2104 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
2105 CONST_BITS+PASS1_BITS+3)
2106 & RANGE_MASK];
2107 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
2108 CONST_BITS+PASS1_BITS+3)
2109 & RANGE_MASK];
2110 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26,
2111 CONST_BITS+PASS1_BITS+3)
2112 & RANGE_MASK];
2113
2114 wsptr += 8; /* advance pointer to next row */
2115 }
2116}
2117
2118
2119/*
2120 * Perform dequantization and inverse DCT on one block of coefficients,
2121 * producing a 14x14 output block.
2122 *
2123 * Optimized algorithm with 20 multiplications in the 1-D kernel.
2124 * cK represents sqrt(2) * cos(K*pi/28).
2125 */
2126
2127GLOBAL(void)
2128jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2129 JCOEFPTR coef_block,
2130 JSAMPARRAY output_buf, JDIMENSION output_col)
2131{
2132 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
2133 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
2134 INT32 z1, z2, z3, z4;
2135 JCOEFPTR inptr;
2136 ISLOW_MULT_TYPE * quantptr;
2137 int * wsptr;
2138 JSAMPROW outptr;
2139 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2140 int ctr;
2141 int workspace[8*14]; /* buffers data between passes */
2142 SHIFT_TEMPS
2143
2144 /* Pass 1: process columns from input, store into work array. */
2145
2146 inptr = coef_block;
2147 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2148 wsptr = workspace;
2149 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2150 /* Even part */
2151
2152 z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2153 if (ctr == 0)
2154 CLAMP_DC(z1);
2155 z1 <<= CONST_BITS;
2156 /* Add fudge factor here for final descale. */
2157 z1 += ONE << (CONST_BITS-PASS1_BITS-1);
2158 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2159 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
2160 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
2161 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
2162
2163 tmp10 = z1 + z2;
2164 tmp11 = z1 + z3;
2165 tmp12 = z1 - z4;
2166
2167 tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
2168 CONST_BITS-PASS1_BITS);
2169
2170 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2171 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2172
2173 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
2174
2175 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
2176 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
2177 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
2178 MULTIPLY(z2, FIX(1.378756276)); /* c2 */
2179
2180 tmp20 = tmp10 + tmp13;
2181 tmp26 = tmp10 - tmp13;
2182 tmp21 = tmp11 + tmp14;
2183 tmp25 = tmp11 - tmp14;
2184 tmp22 = tmp12 + tmp15;
2185 tmp24 = tmp12 - tmp15;
2186
2187 /* Odd part */
2188
2189 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2190 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2191 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2192 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2193 tmp13 = z4 << CONST_BITS;
2194
2195 tmp14 = z1 + z3;
2196 tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
2197 tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
2198 tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
2199 tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
2200 tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
2201 z1 -= z2;
2202 tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13; /* c11 */
2203 tmp16 += tmp15;
2204 z1 += z4;
2205 z4 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
2206 tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
2207 tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
2208 z4 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
2209 tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
2210 tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
2211
2212 tmp13 = (z1 - z3) << PASS1_BITS;
2213
2214 /* Final output stage */
2215
2216 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
2217 wsptr[8*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
2218 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
2219 wsptr[8*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
2220 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
2221 wsptr[8*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
2222 wsptr[8*3] = (int) (tmp23 + tmp13);
2223 wsptr[8*10] = (int) (tmp23 - tmp13);
2224 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
2225 wsptr[8*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
2226 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
2227 wsptr[8*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
2228 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
2229 wsptr[8*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
2230 }
2231
2232 /* Pass 2: process 14 rows from work array, store into output array. */
2233
2234 wsptr = workspace;
2235 for (ctr = 0; ctr < 14; ctr++) {
2236 outptr = output_buf[ctr] + output_col;
2237
2238 /* Even part */
2239
2240 /* Add fudge factor here for final descale. */
2241 z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
2242 z1 <<= CONST_BITS;
2243 z4 = (INT32) wsptr[4];
2244 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
2245 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
2246 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
2247
2248 tmp10 = z1 + z2;
2249 tmp11 = z1 + z3;
2250 tmp12 = z1 - z4;
2251
2252 tmp23 = z1 - ((z2 + z3 - z4) << 1); /* c0 = (c4+c12-c8)*2 */
2253
2254 z1 = (INT32) wsptr[2];
2255 z2 = (INT32) wsptr[6];
2256
2257 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
2258
2259 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
2260 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
2261 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
2262 MULTIPLY(z2, FIX(1.378756276)); /* c2 */
2263
2264 tmp20 = tmp10 + tmp13;
2265 tmp26 = tmp10 - tmp13;
2266 tmp21 = tmp11 + tmp14;
2267 tmp25 = tmp11 - tmp14;
2268 tmp22 = tmp12 + tmp15;
2269 tmp24 = tmp12 - tmp15;
2270
2271 /* Odd part */
2272
2273 z1 = (INT32) wsptr[1];
2274 z2 = (INT32) wsptr[3];
2275 z3 = (INT32) wsptr[5];
2276 z4 = (INT32) wsptr[7];
2277 z4 <<= CONST_BITS;
2278
2279 tmp14 = z1 + z3;
2280 tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
2281 tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
2282 tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
2283 tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
2284 tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
2285 z1 -= z2;
2286 tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4; /* c11 */
2287 tmp16 += tmp15;
2288 tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4; /* -c13 */
2289 tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
2290 tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
2291 tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
2292 tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
2293 tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
2294
2295 tmp13 = ((z1 - z3) << CONST_BITS) + z4;
2296
2297 /* Final output stage */
2298
2299 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
2300 CONST_BITS+PASS1_BITS+3)
2301 & RANGE_MASK];
2302 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
2303 CONST_BITS+PASS1_BITS+3)
2304 & RANGE_MASK];
2305 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
2306 CONST_BITS+PASS1_BITS+3)
2307 & RANGE_MASK];
2308 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
2309 CONST_BITS+PASS1_BITS+3)
2310 & RANGE_MASK];
2311 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
2312 CONST_BITS+PASS1_BITS+3)
2313 & RANGE_MASK];
2314 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
2315 CONST_BITS+PASS1_BITS+3)
2316 & RANGE_MASK];
2317 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
2318 CONST_BITS+PASS1_BITS+3)
2319 & RANGE_MASK];
2320 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
2321 CONST_BITS+PASS1_BITS+3)
2322 & RANGE_MASK];
2323 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
2324 CONST_BITS+PASS1_BITS+3)
2325 & RANGE_MASK];
2326 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
2327 CONST_BITS+PASS1_BITS+3)
2328 & RANGE_MASK];
2329 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
2330 CONST_BITS+PASS1_BITS+3)
2331 & RANGE_MASK];
2332 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
2333 CONST_BITS+PASS1_BITS+3)
2334 & RANGE_MASK];
2335 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
2336 CONST_BITS+PASS1_BITS+3)
2337 & RANGE_MASK];
2338 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
2339 CONST_BITS+PASS1_BITS+3)
2340 & RANGE_MASK];
2341
2342 wsptr += 8; /* advance pointer to next row */
2343 }
2344}
2345
2346
2347/*
2348 * Perform dequantization and inverse DCT on one block of coefficients,
2349 * producing a 15x15 output block.
2350 *
2351 * Optimized algorithm with 22 multiplications in the 1-D kernel.
2352 * cK represents sqrt(2) * cos(K*pi/30).
2353 */
2354
2355GLOBAL(void)
2356jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2357 JCOEFPTR coef_block,
2358 JSAMPARRAY output_buf, JDIMENSION output_col)
2359{
2360 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
2361 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
2362 INT32 z1, z2, z3, z4;
2363 JCOEFPTR inptr;
2364 ISLOW_MULT_TYPE * quantptr;
2365 int * wsptr;
2366 JSAMPROW outptr;
2367 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2368 int ctr;
2369 int workspace[8*15]; /* buffers data between passes */
2370 SHIFT_TEMPS
2371
2372 /* Pass 1: process columns from input, store into work array. */
2373
2374 inptr = coef_block;
2375 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2376 wsptr = workspace;
2377 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2378 /* Even part */
2379
2380 z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2381 if (ctr == 0)
2382 CLAMP_DC(z1);
2383 z1 <<= CONST_BITS;
2384 /* Add fudge factor here for final descale. */
2385 z1 += ONE << (CONST_BITS-PASS1_BITS-1);
2386
2387 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2388 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2389 z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2390
2391 tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
2392 tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
2393
2394 tmp12 = z1 - tmp10;
2395 tmp13 = z1 + tmp11;
2396 z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */
2397
2398 z4 = z2 - z3;
2399 z3 += z2;
2400 tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
2401 tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
2402 z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */
2403
2404 tmp20 = tmp13 + tmp10 + tmp11;
2405 tmp23 = tmp12 - tmp10 + tmp11 + z2;
2406
2407 tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
2408 tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
2409
2410 tmp25 = tmp13 - tmp10 - tmp11;
2411 tmp26 = tmp12 + tmp10 - tmp11 - z2;
2412
2413 tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
2414 tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
2415
2416 tmp21 = tmp12 + tmp10 + tmp11;
2417 tmp24 = tmp13 - tmp10 + tmp11;
2418 tmp11 += tmp11;
2419 tmp22 = z1 + tmp11; /* c10 = c6-c12 */
2420 tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */
2421
2422 /* Odd part */
2423
2424 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2425 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2426 z4 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2427 z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */
2428 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2429
2430 tmp13 = z2 - z4;
2431 tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */
2432 tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */
2433 tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */
2434
2435 tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */
2436 tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */
2437 z2 = z1 - z4;
2438 tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */
2439
2440 tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
2441 tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
2442 tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */
2443 z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */
2444 tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */
2445 tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */
2446
2447 /* Final output stage */
2448
2449 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
2450 wsptr[8*14] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
2451 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
2452 wsptr[8*13] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
2453 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
2454 wsptr[8*12] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
2455 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
2456 wsptr[8*11] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
2457 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
2458 wsptr[8*10] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
2459 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
2460 wsptr[8*9] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
2461 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
2462 wsptr[8*8] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
2463 wsptr[8*7] = (int) RIGHT_SHIFT(tmp27, CONST_BITS-PASS1_BITS);
2464 }
2465
2466 /* Pass 2: process 15 rows from work array, store into output array. */
2467
2468 wsptr = workspace;
2469 for (ctr = 0; ctr < 15; ctr++) {
2470 outptr = output_buf[ctr] + output_col;
2471
2472 /* Even part */
2473
2474 /* Add fudge factor here for final descale. */
2475 z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
2476 z1 <<= CONST_BITS;
2477
2478 z2 = (INT32) wsptr[2];
2479 z3 = (INT32) wsptr[4];
2480 z4 = (INT32) wsptr[6];
2481
2482 tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
2483 tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
2484
2485 tmp12 = z1 - tmp10;
2486 tmp13 = z1 + tmp11;
2487 z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */
2488
2489 z4 = z2 - z3;
2490 z3 += z2;
2491 tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
2492 tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
2493 z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */
2494
2495 tmp20 = tmp13 + tmp10 + tmp11;
2496 tmp23 = tmp12 - tmp10 + tmp11 + z2;
2497
2498 tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
2499 tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
2500
2501 tmp25 = tmp13 - tmp10 - tmp11;
2502 tmp26 = tmp12 + tmp10 - tmp11 - z2;
2503
2504 tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
2505 tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
2506
2507 tmp21 = tmp12 + tmp10 + tmp11;
2508 tmp24 = tmp13 - tmp10 + tmp11;
2509 tmp11 += tmp11;
2510 tmp22 = z1 + tmp11; /* c10 = c6-c12 */
2511 tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */
2512
2513 /* Odd part */
2514
2515 z1 = (INT32) wsptr[1];
2516 z2 = (INT32) wsptr[3];
2517 z4 = (INT32) wsptr[5];
2518 z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */
2519 z4 = (INT32) wsptr[7];
2520
2521 tmp13 = z2 - z4;
2522 tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */
2523 tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */
2524 tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */
2525
2526 tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */
2527 tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */
2528 z2 = z1 - z4;
2529 tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */
2530
2531 tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
2532 tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
2533 tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */
2534 z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */
2535 tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */
2536 tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */
2537
2538 /* Final output stage */
2539
2540 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
2541 CONST_BITS+PASS1_BITS+3)
2542 & RANGE_MASK];
2543 outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
2544 CONST_BITS+PASS1_BITS+3)
2545 & RANGE_MASK];
2546 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
2547 CONST_BITS+PASS1_BITS+3)
2548 & RANGE_MASK];
2549 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
2550 CONST_BITS+PASS1_BITS+3)
2551 & RANGE_MASK];
2552 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
2553 CONST_BITS+PASS1_BITS+3)
2554 & RANGE_MASK];
2555 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
2556 CONST_BITS+PASS1_BITS+3)
2557 & RANGE_MASK];
2558 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
2559 CONST_BITS+PASS1_BITS+3)
2560 & RANGE_MASK];
2561 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
2562 CONST_BITS+PASS1_BITS+3)
2563 & RANGE_MASK];
2564 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
2565 CONST_BITS+PASS1_BITS+3)
2566 & RANGE_MASK];
2567 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
2568 CONST_BITS+PASS1_BITS+3)
2569 & RANGE_MASK];
2570 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
2571 CONST_BITS+PASS1_BITS+3)
2572 & RANGE_MASK];
2573 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
2574 CONST_BITS+PASS1_BITS+3)
2575 & RANGE_MASK];
2576 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
2577 CONST_BITS+PASS1_BITS+3)
2578 & RANGE_MASK];
2579 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
2580 CONST_BITS+PASS1_BITS+3)
2581 & RANGE_MASK];
2582 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27,
2583 CONST_BITS+PASS1_BITS+3)
2584 & RANGE_MASK];
2585
2586 wsptr += 8; /* advance pointer to next row */
2587 }
2588}
2589
2590
2591/*
2592 * Perform dequantization and inverse DCT on one block of coefficients,
2593 * producing a 16x16 output block.
2594 *
2595 * Optimized algorithm with 28 multiplications in the 1-D kernel.
2596 * cK represents sqrt(2) * cos(K*pi/32).
2597 */
2598
2599GLOBAL(void)
2600jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2601 JCOEFPTR coef_block,
2602 JSAMPARRAY output_buf, JDIMENSION output_col)
2603{
2604 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
2605 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
2606 INT32 z1, z2, z3, z4;
2607 JCOEFPTR inptr;
2608 ISLOW_MULT_TYPE * quantptr;
2609 int * wsptr;
2610 JSAMPROW outptr;
2611 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2612 int ctr;
2613 int workspace[8*16]; /* buffers data between passes */
2614 SHIFT_TEMPS
2615
2616 /* Pass 1: process columns from input, store into work array. */
2617
2618 inptr = coef_block;
2619 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2620 wsptr = workspace;
2621 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2622 /* Even part */
2623
2624 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2625 if (ctr == 0)
2626 CLAMP_DC(tmp0);
2627 tmp0 <<= CONST_BITS;
2628 /* Add fudge factor here for final descale. */
2629 tmp0 += 1 << (CONST_BITS-PASS1_BITS-1);
2630
2631 z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2632 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
2633 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
2634
2635 tmp10 = tmp0 + tmp1;
2636 tmp11 = tmp0 - tmp1;
2637 tmp12 = tmp0 + tmp2;
2638 tmp13 = tmp0 - tmp2;
2639
2640 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2641 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2642 z3 = z1 - z2;
2643 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
2644 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
2645
2646 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
2647 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
2648 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
2649 tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
2650
2651 tmp20 = tmp10 + tmp0;
2652 tmp27 = tmp10 - tmp0;
2653 tmp21 = tmp12 + tmp1;
2654 tmp26 = tmp12 - tmp1;
2655 tmp22 = tmp13 + tmp2;
2656 tmp25 = tmp13 - tmp2;
2657 tmp23 = tmp11 + tmp3;
2658 tmp24 = tmp11 - tmp3;
2659
2660 /* Odd part */
2661
2662 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2663 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2664 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2665 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2666
2667 tmp11 = z1 + z3;
2668
2669 tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
2670 tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
2671 tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
2672 tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
2673 tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
2674 tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
2675 tmp0 = tmp1 + tmp2 + tmp3 -
2676 MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
2677 tmp13 = tmp10 + tmp11 + tmp12 -
2678 MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
2679 z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
2680 tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
2681 tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
2682 z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
2683 tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
2684 tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
2685 z2 += z4;
2686 z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
2687 tmp1 += z1;
2688 tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
2689 z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
2690 tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
2691 tmp12 += z2;
2692 z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
2693 tmp2 += z2;
2694 tmp3 += z2;
2695 z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
2696 tmp10 += z2;
2697 tmp11 += z2;
2698
2699 /* Final output stage */
2700
2701 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS-PASS1_BITS);
2702 wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS-PASS1_BITS);
2703 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS-PASS1_BITS);
2704 wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS-PASS1_BITS);
2705 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS-PASS1_BITS);
2706 wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS-PASS1_BITS);
2707 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS);
2708 wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS);
2709 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
2710 wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
2711 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
2712 wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
2713 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
2714 wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
2715 wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
2716 wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
2717 }
2718
2719 /* Pass 2: process 16 rows from work array, store into output array. */
2720
2721 wsptr = workspace;
2722 for (ctr = 0; ctr < 16; ctr++) {
2723 outptr = output_buf[ctr] + output_col;
2724
2725 /* Even part */
2726
2727 /* Add fudge factor here for final descale. */
2728 tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
2729 tmp0 <<= CONST_BITS;
2730
2731 z1 = (INT32) wsptr[4];
2732 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
2733 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
2734
2735 tmp10 = tmp0 + tmp1;
2736 tmp11 = tmp0 - tmp1;
2737 tmp12 = tmp0 + tmp2;
2738 tmp13 = tmp0 - tmp2;
2739
2740 z1 = (INT32) wsptr[2];
2741 z2 = (INT32) wsptr[6];
2742 z3 = z1 - z2;
2743 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
2744 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
2745
2746 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
2747 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
2748 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
2749 tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
2750
2751 tmp20 = tmp10 + tmp0;
2752 tmp27 = tmp10 - tmp0;
2753 tmp21 = tmp12 + tmp1;
2754 tmp26 = tmp12 - tmp1;
2755 tmp22 = tmp13 + tmp2;
2756 tmp25 = tmp13 - tmp2;
2757 tmp23 = tmp11 + tmp3;
2758 tmp24 = tmp11 - tmp3;
2759
2760 /* Odd part */
2761
2762 z1 = (INT32) wsptr[1];
2763 z2 = (INT32) wsptr[3];
2764 z3 = (INT32) wsptr[5];
2765 z4 = (INT32) wsptr[7];
2766
2767 tmp11 = z1 + z3;
2768
2769 tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
2770 tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
2771 tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
2772 tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
2773 tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
2774 tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
2775 tmp0 = tmp1 + tmp2 + tmp3 -
2776 MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
2777 tmp13 = tmp10 + tmp11 + tmp12 -
2778 MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
2779 z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
2780 tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
2781 tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
2782 z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
2783 tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
2784 tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
2785 z2 += z4;
2786 z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
2787 tmp1 += z1;
2788 tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
2789 z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
2790 tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
2791 tmp12 += z2;
2792 z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
2793 tmp2 += z2;
2794 tmp3 += z2;
2795 z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
2796 tmp10 += z2;
2797 tmp11 += z2;
2798
2799 /* Final output stage */
2800
2801 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
2802 CONST_BITS+PASS1_BITS+3)
2803 & RANGE_MASK];
2804 outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
2805 CONST_BITS+PASS1_BITS+3)
2806 & RANGE_MASK];
2807 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
2808 CONST_BITS+PASS1_BITS+3)
2809 & RANGE_MASK];
2810 outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
2811 CONST_BITS+PASS1_BITS+3)
2812 & RANGE_MASK];
2813 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
2814 CONST_BITS+PASS1_BITS+3)
2815 & RANGE_MASK];
2816 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
2817 CONST_BITS+PASS1_BITS+3)
2818 & RANGE_MASK];
2819 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
2820 CONST_BITS+PASS1_BITS+3)
2821 & RANGE_MASK];
2822 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
2823 CONST_BITS+PASS1_BITS+3)
2824 & RANGE_MASK];
2825 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
2826 CONST_BITS+PASS1_BITS+3)
2827 & RANGE_MASK];
2828 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
2829 CONST_BITS+PASS1_BITS+3)
2830 & RANGE_MASK];
2831 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
2832 CONST_BITS+PASS1_BITS+3)
2833 & RANGE_MASK];
2834 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
2835 CONST_BITS+PASS1_BITS+3)
2836 & RANGE_MASK];
2837 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
2838 CONST_BITS+PASS1_BITS+3)
2839 & RANGE_MASK];
2840 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
2841 CONST_BITS+PASS1_BITS+3)
2842 & RANGE_MASK];
2843 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
2844 CONST_BITS+PASS1_BITS+3)
2845 & RANGE_MASK];
2846 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
2847 CONST_BITS+PASS1_BITS+3)
2848 & RANGE_MASK];
2849
2850 wsptr += 8; /* advance pointer to next row */
2851 }
2852}
2853
2854
2855/*
2856 * Perform dequantization and inverse DCT on one block of coefficients,
2857 * producing a 16x8 output block.
2858 *
2859 * 8-point IDCT in pass 1 (columns), 16-point in pass 2 (rows).
2860 */
2861
2862GLOBAL(void)
2863jpeg_idct_16x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2864 JCOEFPTR coef_block,
2865 JSAMPARRAY output_buf, JDIMENSION output_col)
2866{
2867 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
2868 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
2869 INT32 z1, z2, z3, z4;
2870 JCOEFPTR inptr;
2871 ISLOW_MULT_TYPE * quantptr;
2872 int * wsptr;
2873 JSAMPROW outptr;
2874 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2875 int ctr;
2876 int workspace[8*8]; /* buffers data between passes */
2877 SHIFT_TEMPS
2878
2879 /* Pass 1: process columns from input, store into work array. */
2880 /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
2881 /* furthermore, we scale the results by 2**PASS1_BITS. */
2882
2883 inptr = coef_block;
2884 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2885 wsptr = workspace;
2886 for (ctr = DCTSIZE; ctr > 0; ctr--) {
2887 /* Due to quantization, we will usually find that many of the input
2888 * coefficients are zero, especially the AC terms. We can exploit this
2889 * by short-circuiting the IDCT calculation for any column in which all
2890 * the AC terms are zero. In that case each output is equal to the
2891 * DC coefficient (with scale factor as needed).
2892 * With typical images and quantization tables, half or more of the
2893 * column DCT calculations can be simplified this way.
2894 */
2895
2896 if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
2897 inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
2898 inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
2899 inptr[DCTSIZE*7] == 0) {
2900 /* AC terms all zero */
2901 int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2902 if (ctr == DCTSIZE)
2903 CLAMP_DC(dcval);
2904 dcval <<= PASS1_BITS;
2905 wsptr[DCTSIZE*0] = dcval;
2906 wsptr[DCTSIZE*1] = dcval;
2907 wsptr[DCTSIZE*2] = dcval;
2908 wsptr[DCTSIZE*3] = dcval;
2909 wsptr[DCTSIZE*4] = dcval;
2910 wsptr[DCTSIZE*5] = dcval;
2911 wsptr[DCTSIZE*6] = dcval;
2912 wsptr[DCTSIZE*7] = dcval;
2913
2914 inptr++; /* advance pointers to next column */
2915 quantptr++;
2916 wsptr++;
2917 continue;
2918 }
2919
2920 /* Even part: reverse the even part of the forward DCT. */
2921 /* The rotator is sqrt(2)*c(-6). */
2922
2923 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2924 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2925
2926 z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
2927 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);
2928 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);
2929
2930 z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2931 if (ctr == DCTSIZE)
2932 CLAMP_DC(z2);
2933 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2934 z2 <<= CONST_BITS;
2935 z3 <<= CONST_BITS;
2936 /* Add fudge factor here for final descale. */
2937 z2 += ONE << (CONST_BITS-PASS1_BITS-1);
2938
2939 tmp0 = z2 + z3;
2940 tmp1 = z2 - z3;
2941
2942 tmp10 = tmp0 + tmp2;
2943 tmp13 = tmp0 - tmp2;
2944 tmp11 = tmp1 + tmp3;
2945 tmp12 = tmp1 - tmp3;
2946
2947 /* Odd part per figure 8; the matrix is unitary and hence its
2948 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
2949 */
2950
2951 tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2952 tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2953 tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2954 tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2955
2956 z2 = tmp0 + tmp2;
2957 z3 = tmp1 + tmp3;
2958
2959 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */
2960 z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
2961 z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
2962 z2 += z1;
2963 z3 += z1;
2964
2965 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
2966 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
2967 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
2968 tmp0 += z1 + z2;
2969 tmp3 += z1 + z3;
2970
2971 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
2972 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
2973 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
2974 tmp1 += z1 + z3;
2975 tmp2 += z1 + z2;
2976
2977 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
2978
2979 wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
2980 wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
2981 wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
2982 wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
2983 wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
2984 wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
2985 wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
2986 wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
2987
2988 inptr++; /* advance pointers to next column */
2989 quantptr++;
2990 wsptr++;
2991 }
2992
2993 /* Pass 2: process 8 rows from work array, store into output array.
2994 * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
2995 */
2996 wsptr = workspace;
2997 for (ctr = 0; ctr < 8; ctr++) {
2998 outptr = output_buf[ctr] + output_col;
2999
3000 /* Even part */
3001
3002 /* Add fudge factor here for final descale. */
3003 tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
3004 tmp0 <<= CONST_BITS;
3005
3006 z1 = (INT32) wsptr[4];
3007 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
3008 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
3009
3010 tmp10 = tmp0 + tmp1;
3011 tmp11 = tmp0 - tmp1;
3012 tmp12 = tmp0 + tmp2;
3013 tmp13 = tmp0 - tmp2;
3014
3015 z1 = (INT32) wsptr[2];
3016 z2 = (INT32) wsptr[6];
3017 z3 = z1 - z2;
3018 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
3019 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
3020
3021 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
3022 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
3023 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
3024 tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
3025
3026 tmp20 = tmp10 + tmp0;
3027 tmp27 = tmp10 - tmp0;
3028 tmp21 = tmp12 + tmp1;
3029 tmp26 = tmp12 - tmp1;
3030 tmp22 = tmp13 + tmp2;
3031 tmp25 = tmp13 - tmp2;
3032 tmp23 = tmp11 + tmp3;
3033 tmp24 = tmp11 - tmp3;
3034
3035 /* Odd part */
3036
3037 z1 = (INT32) wsptr[1];
3038 z2 = (INT32) wsptr[3];
3039 z3 = (INT32) wsptr[5];
3040 z4 = (INT32) wsptr[7];
3041
3042 tmp11 = z1 + z3;
3043
3044 tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
3045 tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
3046 tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
3047 tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
3048 tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
3049 tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
3050 tmp0 = tmp1 + tmp2 + tmp3 -
3051 MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
3052 tmp13 = tmp10 + tmp11 + tmp12 -
3053 MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
3054 z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
3055 tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
3056 tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
3057 z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
3058 tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
3059 tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
3060 z2 += z4;
3061 z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
3062 tmp1 += z1;
3063 tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
3064 z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
3065 tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
3066 tmp12 += z2;
3067 z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
3068 tmp2 += z2;
3069 tmp3 += z2;
3070 z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
3071 tmp10 += z2;
3072 tmp11 += z2;
3073
3074 /* Final output stage */
3075
3076 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
3077 CONST_BITS+PASS1_BITS+3)
3078 & RANGE_MASK];
3079 outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
3080 CONST_BITS+PASS1_BITS+3)
3081 & RANGE_MASK];
3082 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
3083 CONST_BITS+PASS1_BITS+3)
3084 & RANGE_MASK];
3085 outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
3086 CONST_BITS+PASS1_BITS+3)
3087 & RANGE_MASK];
3088 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
3089 CONST_BITS+PASS1_BITS+3)
3090 & RANGE_MASK];
3091 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
3092 CONST_BITS+PASS1_BITS+3)
3093 & RANGE_MASK];
3094 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
3095 CONST_BITS+PASS1_BITS+3)
3096 & RANGE_MASK];
3097 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
3098 CONST_BITS+PASS1_BITS+3)
3099 & RANGE_MASK];
3100 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
3101 CONST_BITS+PASS1_BITS+3)
3102 & RANGE_MASK];
3103 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
3104 CONST_BITS+PASS1_BITS+3)
3105 & RANGE_MASK];
3106 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
3107 CONST_BITS+PASS1_BITS+3)
3108 & RANGE_MASK];
3109 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
3110 CONST_BITS+PASS1_BITS+3)
3111 & RANGE_MASK];
3112 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
3113 CONST_BITS+PASS1_BITS+3)
3114 & RANGE_MASK];
3115 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
3116 CONST_BITS+PASS1_BITS+3)
3117 & RANGE_MASK];
3118 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
3119 CONST_BITS+PASS1_BITS+3)
3120 & RANGE_MASK];
3121 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
3122 CONST_BITS+PASS1_BITS+3)
3123 & RANGE_MASK];
3124
3125 wsptr += 8; /* advance pointer to next row */
3126 }
3127}
3128
3129
3130/*
3131 * Perform dequantization and inverse DCT on one block of coefficients,
3132 * producing a 14x7 output block.
3133 *
3134 * 7-point IDCT in pass 1 (columns), 14-point in pass 2 (rows).
3135 */
3136
3137GLOBAL(void)
3138jpeg_idct_14x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3139 JCOEFPTR coef_block,
3140 JSAMPARRAY output_buf, JDIMENSION output_col)
3141{
3142 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
3143 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
3144 INT32 z1, z2, z3, z4;
3145 JCOEFPTR inptr;
3146 ISLOW_MULT_TYPE * quantptr;
3147 int * wsptr;
3148 JSAMPROW outptr;
3149 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3150 int ctr;
3151 int workspace[8*7]; /* buffers data between passes */
3152 SHIFT_TEMPS
3153
3154 /* Pass 1: process columns from input, store into work array.
3155 * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
3156 */
3157 inptr = coef_block;
3158 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3159 wsptr = workspace;
3160 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3161 /* Even part */
3162
3163 tmp23 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3164 if (ctr == 0)
3165 CLAMP_DC(tmp23);
3166 tmp23 <<= CONST_BITS;
3167 /* Add fudge factor here for final descale. */
3168 tmp23 += ONE << (CONST_BITS-PASS1_BITS-1);
3169
3170 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3171 z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3172 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
3173
3174 tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
3175 tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
3176 tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
3177 tmp10 = z1 + z3;
3178 z2 -= tmp10;
3179 tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
3180 tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
3181 tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
3182 tmp23 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
3183
3184 /* Odd part */
3185
3186 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3187 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3188 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
3189
3190 tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
3191 tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
3192 tmp10 = tmp11 - tmp12;
3193 tmp11 += tmp12;
3194 tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
3195 tmp11 += tmp12;
3196 z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
3197 tmp10 += z2;
3198 tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
3199
3200 /* Final output stage */
3201
3202 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
3203 wsptr[8*6] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
3204 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
3205 wsptr[8*5] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
3206 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
3207 wsptr[8*4] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
3208 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23, CONST_BITS-PASS1_BITS);
3209 }
3210
3211 /* Pass 2: process 7 rows from work array, store into output array.
3212 * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
3213 */
3214 wsptr = workspace;
3215 for (ctr = 0; ctr < 7; ctr++) {
3216 outptr = output_buf[ctr] + output_col;
3217
3218 /* Even part */
3219
3220 /* Add fudge factor here for final descale. */
3221 z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
3222 z1 <<= CONST_BITS;
3223 z4 = (INT32) wsptr[4];
3224 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
3225 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
3226 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
3227
3228 tmp10 = z1 + z2;
3229 tmp11 = z1 + z3;
3230 tmp12 = z1 - z4;
3231
3232 tmp23 = z1 - ((z2 + z3 - z4) << 1); /* c0 = (c4+c12-c8)*2 */
3233
3234 z1 = (INT32) wsptr[2];
3235 z2 = (INT32) wsptr[6];
3236
3237 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
3238
3239 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
3240 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
3241 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
3242 MULTIPLY(z2, FIX(1.378756276)); /* c2 */
3243
3244 tmp20 = tmp10 + tmp13;
3245 tmp26 = tmp10 - tmp13;
3246 tmp21 = tmp11 + tmp14;
3247 tmp25 = tmp11 - tmp14;
3248 tmp22 = tmp12 + tmp15;
3249 tmp24 = tmp12 - tmp15;
3250
3251 /* Odd part */
3252
3253 z1 = (INT32) wsptr[1];
3254 z2 = (INT32) wsptr[3];
3255 z3 = (INT32) wsptr[5];
3256 z4 = (INT32) wsptr[7];
3257 z4 <<= CONST_BITS;
3258
3259 tmp14 = z1 + z3;
3260 tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
3261 tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
3262 tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
3263 tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
3264 tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
3265 z1 -= z2;
3266 tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4; /* c11 */
3267 tmp16 += tmp15;
3268 tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4; /* -c13 */
3269 tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
3270 tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
3271 tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
3272 tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
3273 tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
3274
3275 tmp13 = ((z1 - z3) << CONST_BITS) + z4;
3276
3277 /* Final output stage */
3278
3279 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
3280 CONST_BITS+PASS1_BITS+3)
3281 & RANGE_MASK];
3282 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
3283 CONST_BITS+PASS1_BITS+3)
3284 & RANGE_MASK];
3285 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
3286 CONST_BITS+PASS1_BITS+3)
3287 & RANGE_MASK];
3288 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
3289 CONST_BITS+PASS1_BITS+3)
3290 & RANGE_MASK];
3291 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
3292 CONST_BITS+PASS1_BITS+3)
3293 & RANGE_MASK];
3294 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
3295 CONST_BITS+PASS1_BITS+3)
3296 & RANGE_MASK];
3297 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
3298 CONST_BITS+PASS1_BITS+3)
3299 & RANGE_MASK];
3300 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
3301 CONST_BITS+PASS1_BITS+3)
3302 & RANGE_MASK];
3303 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
3304 CONST_BITS+PASS1_BITS+3)
3305 & RANGE_MASK];
3306 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
3307 CONST_BITS+PASS1_BITS+3)
3308 & RANGE_MASK];
3309 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
3310 CONST_BITS+PASS1_BITS+3)
3311 & RANGE_MASK];
3312 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
3313 CONST_BITS+PASS1_BITS+3)
3314 & RANGE_MASK];
3315 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
3316 CONST_BITS+PASS1_BITS+3)
3317 & RANGE_MASK];
3318 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
3319 CONST_BITS+PASS1_BITS+3)
3320 & RANGE_MASK];
3321
3322 wsptr += 8; /* advance pointer to next row */
3323 }
3324}
3325
3326
3327/*
3328 * Perform dequantization and inverse DCT on one block of coefficients,
3329 * producing a 12x6 output block.
3330 *
3331 * 6-point IDCT in pass 1 (columns), 12-point in pass 2 (rows).
3332 */
3333
3334GLOBAL(void)
3335jpeg_idct_12x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3336 JCOEFPTR coef_block,
3337 JSAMPARRAY output_buf, JDIMENSION output_col)
3338{
3339 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
3340 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
3341 INT32 z1, z2, z3, z4;
3342 JCOEFPTR inptr;
3343 ISLOW_MULT_TYPE * quantptr;
3344 int * wsptr;
3345 JSAMPROW outptr;
3346 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3347 int ctr;
3348 int workspace[8*6]; /* buffers data between passes */
3349 SHIFT_TEMPS
3350
3351 /* Pass 1: process columns from input, store into work array.
3352 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
3353 */
3354 inptr = coef_block;
3355 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3356 wsptr = workspace;
3357 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3358 /* Even part */
3359
3360 tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3361 if (ctr == 0)
3362 CLAMP_DC(tmp10);
3363 tmp10 <<= CONST_BITS;
3364 /* Add fudge factor here for final descale. */
3365 tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
3366 tmp12 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3367 tmp20 = MULTIPLY(tmp12, FIX(0.707106781)); /* c4 */
3368 tmp11 = tmp10 + tmp20;
3369 tmp21 = RIGHT_SHIFT(tmp10 - tmp20 - tmp20, CONST_BITS-PASS1_BITS);
3370 tmp20 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3371 tmp10 = MULTIPLY(tmp20, FIX(1.224744871)); /* c2 */
3372 tmp20 = tmp11 + tmp10;
3373 tmp22 = tmp11 - tmp10;
3374
3375 /* Odd part */
3376
3377 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3378 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3379 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
3380 tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
3381 tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
3382 tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
3383 tmp11 = (z1 - z2 - z3) << PASS1_BITS;
3384
3385 /* Final output stage */
3386
3387 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
3388 wsptr[8*5] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
3389 wsptr[8*1] = (int) (tmp21 + tmp11);
3390 wsptr[8*4] = (int) (tmp21 - tmp11);
3391 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
3392 wsptr[8*3] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
3393 }
3394
3395 /* Pass 2: process 6 rows from work array, store into output array.
3396 * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
3397 */
3398 wsptr = workspace;
3399 for (ctr = 0; ctr < 6; ctr++) {
3400 outptr = output_buf[ctr] + output_col;
3401
3402 /* Even part */
3403
3404 /* Add fudge factor here for final descale. */
3405 z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
3406 z3 <<= CONST_BITS;
3407
3408 z4 = (INT32) wsptr[4];
3409 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
3410
3411 tmp10 = z3 + z4;
3412 tmp11 = z3 - z4;
3413
3414 z1 = (INT32) wsptr[2];
3415 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
3416 z1 <<= CONST_BITS;
3417 z2 = (INT32) wsptr[6];
3418 z2 <<= CONST_BITS;
3419
3420 tmp12 = z1 - z2;
3421
3422 tmp21 = z3 + tmp12;
3423 tmp24 = z3 - tmp12;
3424
3425 tmp12 = z4 + z2;
3426
3427 tmp20 = tmp10 + tmp12;
3428 tmp25 = tmp10 - tmp12;
3429
3430 tmp12 = z4 - z1 - z2;
3431
3432 tmp22 = tmp11 + tmp12;
3433 tmp23 = tmp11 - tmp12;
3434
3435 /* Odd part */
3436
3437 z1 = (INT32) wsptr[1];
3438 z2 = (INT32) wsptr[3];
3439 z3 = (INT32) wsptr[5];
3440 z4 = (INT32) wsptr[7];
3441
3442 tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
3443 tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
3444
3445 tmp10 = z1 + z3;
3446 tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
3447 tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
3448 tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
3449 tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
3450 tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
3451 tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
3452 tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
3453 MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
3454
3455 z1 -= z4;
3456 z2 -= z3;
3457 z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
3458 tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
3459 tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
3460
3461 /* Final output stage */
3462
3463 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
3464 CONST_BITS+PASS1_BITS+3)
3465 & RANGE_MASK];
3466 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
3467 CONST_BITS+PASS1_BITS+3)
3468 & RANGE_MASK];
3469 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
3470 CONST_BITS+PASS1_BITS+3)
3471 & RANGE_MASK];
3472 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
3473 CONST_BITS+PASS1_BITS+3)
3474 & RANGE_MASK];
3475 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
3476 CONST_BITS+PASS1_BITS+3)
3477 & RANGE_MASK];
3478 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
3479 CONST_BITS+PASS1_BITS+3)
3480 & RANGE_MASK];
3481 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
3482 CONST_BITS+PASS1_BITS+3)
3483 & RANGE_MASK];
3484 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
3485 CONST_BITS+PASS1_BITS+3)
3486 & RANGE_MASK];
3487 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
3488 CONST_BITS+PASS1_BITS+3)
3489 & RANGE_MASK];
3490 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
3491 CONST_BITS+PASS1_BITS+3)
3492 & RANGE_MASK];
3493 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
3494 CONST_BITS+PASS1_BITS+3)
3495 & RANGE_MASK];
3496 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
3497 CONST_BITS+PASS1_BITS+3)
3498 & RANGE_MASK];
3499
3500 wsptr += 8; /* advance pointer to next row */
3501 }
3502}
3503
3504
3505/*
3506 * Perform dequantization and inverse DCT on one block of coefficients,
3507 * producing a 10x5 output block.
3508 *
3509 * 5-point IDCT in pass 1 (columns), 10-point in pass 2 (rows).
3510 */
3511
3512GLOBAL(void)
3513jpeg_idct_10x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3514 JCOEFPTR coef_block,
3515 JSAMPARRAY output_buf, JDIMENSION output_col)
3516{
3517 INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
3518 INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
3519 INT32 z1, z2, z3, z4;
3520 JCOEFPTR inptr;
3521 ISLOW_MULT_TYPE * quantptr;
3522 int * wsptr;
3523 JSAMPROW outptr;
3524 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3525 int ctr;
3526 int workspace[8*5]; /* buffers data between passes */
3527 SHIFT_TEMPS
3528
3529 /* Pass 1: process columns from input, store into work array.
3530 * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
3531 */
3532 inptr = coef_block;
3533 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3534 wsptr = workspace;
3535 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3536 /* Even part */
3537
3538 tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3539 if (ctr == 0)
3540 CLAMP_DC(tmp12);
3541 tmp12 <<= CONST_BITS;
3542 /* Add fudge factor here for final descale. */
3543 tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
3544 tmp13 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3545 tmp14 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3546 z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
3547 z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
3548 z3 = tmp12 + z2;
3549 tmp10 = z3 + z1;
3550 tmp11 = z3 - z1;
3551 tmp12 -= z2 << 2;
3552
3553 /* Odd part */
3554
3555 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3556 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3557
3558 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
3559 tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
3560 tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
3561
3562 /* Final output stage */
3563
3564 wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp13, CONST_BITS-PASS1_BITS);
3565 wsptr[8*4] = (int) RIGHT_SHIFT(tmp10 - tmp13, CONST_BITS-PASS1_BITS);
3566 wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp14, CONST_BITS-PASS1_BITS);
3567 wsptr[8*3] = (int) RIGHT_SHIFT(tmp11 - tmp14, CONST_BITS-PASS1_BITS);
3568 wsptr[8*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
3569 }
3570
3571 /* Pass 2: process 5 rows from work array, store into output array.
3572 * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
3573 */
3574 wsptr = workspace;
3575 for (ctr = 0; ctr < 5; ctr++) {
3576 outptr = output_buf[ctr] + output_col;
3577
3578 /* Even part */
3579
3580 /* Add fudge factor here for final descale. */
3581 z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
3582 z3 <<= CONST_BITS;
3583 z4 = (INT32) wsptr[4];
3584 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
3585 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
3586 tmp10 = z3 + z1;
3587 tmp11 = z3 - z2;
3588
3589 tmp22 = z3 - ((z1 - z2) << 1); /* c0 = (c4-c8)*2 */
3590
3591 z2 = (INT32) wsptr[2];
3592 z3 = (INT32) wsptr[6];
3593
3594 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
3595 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
3596 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
3597
3598 tmp20 = tmp10 + tmp12;
3599 tmp24 = tmp10 - tmp12;
3600 tmp21 = tmp11 + tmp13;
3601 tmp23 = tmp11 - tmp13;
3602
3603 /* Odd part */
3604
3605 z1 = (INT32) wsptr[1];
3606 z2 = (INT32) wsptr[3];
3607 z3 = (INT32) wsptr[5];
3608 z3 <<= CONST_BITS;
3609 z4 = (INT32) wsptr[7];
3610
3611 tmp11 = z2 + z4;
3612 tmp13 = z2 - z4;
3613
3614 tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
3615
3616 z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
3617 z4 = z3 + tmp12;
3618
3619 tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
3620 tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
3621
3622 z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
3623 z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
3624
3625 tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
3626
3627 tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
3628 tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
3629
3630 /* Final output stage */
3631
3632 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
3633 CONST_BITS+PASS1_BITS+3)
3634 & RANGE_MASK];
3635 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
3636 CONST_BITS+PASS1_BITS+3)
3637 & RANGE_MASK];
3638 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
3639 CONST_BITS+PASS1_BITS+3)
3640 & RANGE_MASK];
3641 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
3642 CONST_BITS+PASS1_BITS+3)
3643 & RANGE_MASK];
3644 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
3645 CONST_BITS+PASS1_BITS+3)
3646 & RANGE_MASK];
3647 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
3648 CONST_BITS+PASS1_BITS+3)
3649 & RANGE_MASK];
3650 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
3651 CONST_BITS+PASS1_BITS+3)
3652 & RANGE_MASK];
3653 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
3654 CONST_BITS+PASS1_BITS+3)
3655 & RANGE_MASK];
3656 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
3657 CONST_BITS+PASS1_BITS+3)
3658 & RANGE_MASK];
3659 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
3660 CONST_BITS+PASS1_BITS+3)
3661 & RANGE_MASK];
3662
3663 wsptr += 8; /* advance pointer to next row */
3664 }
3665}
3666
3667
3668/*
3669 * Perform dequantization and inverse DCT on one block of coefficients,
3670 * producing a 8x4 output block.
3671 *
3672 * 4-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
3673 */
3674
3675GLOBAL(void)
3676jpeg_idct_8x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3677 JCOEFPTR coef_block,
3678 JSAMPARRAY output_buf, JDIMENSION output_col)
3679{
3680 INT32 tmp0, tmp1, tmp2, tmp3;
3681 INT32 tmp10, tmp11, tmp12, tmp13;
3682 INT32 z1, z2, z3;
3683 JCOEFPTR inptr;
3684 ISLOW_MULT_TYPE * quantptr;
3685 int * wsptr;
3686 JSAMPROW outptr;
3687 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3688 int ctr;
3689 int workspace[8*4]; /* buffers data between passes */
3690 SHIFT_TEMPS
3691
3692 /* Pass 1: process columns from input, store into work array.
3693 * 4-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
3694 */
3695 inptr = coef_block;
3696 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3697 wsptr = workspace;
3698 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3699 /* Even part */
3700
3701 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3702 if (ctr == 0)
3703 CLAMP_DC(tmp0);
3704 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3705
3706 tmp10 = (tmp0 + tmp2) << PASS1_BITS;
3707 tmp12 = (tmp0 - tmp2) << PASS1_BITS;
3708
3709 /* Odd part */
3710 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
3711
3712 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3713 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3714
3715 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
3716 /* Add fudge factor here for final descale. */
3717 z1 += ONE << (CONST_BITS-PASS1_BITS-1);
3718 tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
3719 CONST_BITS-PASS1_BITS);
3720 tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
3721 CONST_BITS-PASS1_BITS);
3722
3723 /* Final output stage */
3724
3725 wsptr[8*0] = (int) (tmp10 + tmp0);
3726 wsptr[8*3] = (int) (tmp10 - tmp0);
3727 wsptr[8*1] = (int) (tmp12 + tmp2);
3728 wsptr[8*2] = (int) (tmp12 - tmp2);
3729 }
3730
3731 /* Pass 2: process rows from work array, store into output array. */
3732 /* Note that we must descale the results by a factor of 8 == 2**3, */
3733 /* and also undo the PASS1_BITS scaling. */
3734
3735 wsptr = workspace;
3736 for (ctr = 0; ctr < 4; ctr++) {
3737 outptr = output_buf[ctr] + output_col;
3738
3739 /* Even part: reverse the even part of the forward DCT. */
3740 /* The rotator is sqrt(2)*c(-6). */
3741
3742 z2 = (INT32) wsptr[2];
3743 z3 = (INT32) wsptr[6];
3744
3745 z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
3746 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);
3747 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);
3748
3749 /* Add fudge factor here for final descale. */
3750 z2 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
3751 z3 = (INT32) wsptr[4];
3752
3753 tmp0 = (z2 + z3) << CONST_BITS;
3754 tmp1 = (z2 - z3) << CONST_BITS;
3755
3756 tmp10 = tmp0 + tmp2;
3757 tmp13 = tmp0 - tmp2;
3758 tmp11 = tmp1 + tmp3;
3759 tmp12 = tmp1 - tmp3;
3760
3761 /* Odd part per figure 8; the matrix is unitary and hence its
3762 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
3763 */
3764
3765 tmp0 = (INT32) wsptr[7];
3766 tmp1 = (INT32) wsptr[5];
3767 tmp2 = (INT32) wsptr[3];
3768 tmp3 = (INT32) wsptr[1];
3769
3770 z2 = tmp0 + tmp2;
3771 z3 = tmp1 + tmp3;
3772
3773 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */
3774 z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
3775 z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
3776 z2 += z1;
3777 z3 += z1;
3778
3779 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
3780 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
3781 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
3782 tmp0 += z1 + z2;
3783 tmp3 += z1 + z3;
3784
3785 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
3786 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
3787 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
3788 tmp1 += z1 + z3;
3789 tmp2 += z1 + z2;
3790
3791 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
3792
3793 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
3794 CONST_BITS+PASS1_BITS+3)
3795 & RANGE_MASK];
3796 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
3797 CONST_BITS+PASS1_BITS+3)
3798 & RANGE_MASK];
3799 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
3800 CONST_BITS+PASS1_BITS+3)
3801 & RANGE_MASK];
3802 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
3803 CONST_BITS+PASS1_BITS+3)
3804 & RANGE_MASK];
3805 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
3806 CONST_BITS+PASS1_BITS+3)
3807 & RANGE_MASK];
3808 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
3809 CONST_BITS+PASS1_BITS+3)
3810 & RANGE_MASK];
3811 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
3812 CONST_BITS+PASS1_BITS+3)
3813 & RANGE_MASK];
3814 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
3815 CONST_BITS+PASS1_BITS+3)
3816 & RANGE_MASK];
3817
3818 wsptr += DCTSIZE; /* advance pointer to next row */
3819 }
3820}
3821
3822
3823/*
3824 * Perform dequantization and inverse DCT on one block of coefficients,
3825 * producing a reduced-size 6x3 output block.
3826 *
3827 * 3-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
3828 */
3829
3830GLOBAL(void)
3831jpeg_idct_6x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3832 JCOEFPTR coef_block,
3833 JSAMPARRAY output_buf, JDIMENSION output_col)
3834{
3835 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
3836 INT32 z1, z2, z3;
3837 JCOEFPTR inptr;
3838 ISLOW_MULT_TYPE * quantptr;
3839 int * wsptr;
3840 JSAMPROW outptr;
3841 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3842 int ctr;
3843 int workspace[6*3]; /* buffers data between passes */
3844 SHIFT_TEMPS
3845
3846 /* Pass 1: process columns from input, store into work array.
3847 * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
3848 */
3849 inptr = coef_block;
3850 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3851 wsptr = workspace;
3852 for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
3853 /* Even part */
3854
3855 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3856 if (ctr == 0)
3857 CLAMP_DC(tmp0);
3858 tmp0 <<= CONST_BITS;
3859 /* Add fudge factor here for final descale. */
3860 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
3861 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3862 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
3863 tmp10 = tmp0 + tmp12;
3864 tmp2 = tmp0 - tmp12 - tmp12;
3865
3866 /* Odd part */
3867
3868 tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3869 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
3870
3871 /* Final output stage */
3872
3873 wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
3874 wsptr[6*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
3875 wsptr[6*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
3876 }
3877
3878 /* Pass 2: process 3 rows from work array, store into output array.
3879 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
3880 */
3881 wsptr = workspace;
3882 for (ctr = 0; ctr < 3; ctr++) {
3883 outptr = output_buf[ctr] + output_col;
3884
3885 /* Even part */
3886
3887 /* Add fudge factor here for final descale. */
3888 tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
3889 tmp0 <<= CONST_BITS;
3890 tmp2 = (INT32) wsptr[4];
3891 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
3892 tmp1 = tmp0 + tmp10;
3893 tmp11 = tmp0 - tmp10 - tmp10;
3894 tmp10 = (INT32) wsptr[2];
3895 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
3896 tmp10 = tmp1 + tmp0;
3897 tmp12 = tmp1 - tmp0;
3898
3899 /* Odd part */
3900
3901 z1 = (INT32) wsptr[1];
3902 z2 = (INT32) wsptr[3];
3903 z3 = (INT32) wsptr[5];
3904 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
3905 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
3906 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
3907 tmp1 = (z1 - z2 - z3) << CONST_BITS;
3908
3909 /* Final output stage */
3910
3911 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
3912 CONST_BITS+PASS1_BITS+3)
3913 & RANGE_MASK];
3914 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
3915 CONST_BITS+PASS1_BITS+3)
3916 & RANGE_MASK];
3917 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
3918 CONST_BITS+PASS1_BITS+3)
3919 & RANGE_MASK];
3920 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
3921 CONST_BITS+PASS1_BITS+3)
3922 & RANGE_MASK];
3923 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
3924 CONST_BITS+PASS1_BITS+3)
3925 & RANGE_MASK];
3926 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
3927 CONST_BITS+PASS1_BITS+3)
3928 & RANGE_MASK];
3929
3930 wsptr += 6; /* advance pointer to next row */
3931 }
3932}
3933
3934
3935/*
3936 * Perform dequantization and inverse DCT on one block of coefficients,
3937 * producing a 4x2 output block.
3938 *
3939 * 2-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
3940 */
3941
3942GLOBAL(void)
3943jpeg_idct_4x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3944 JCOEFPTR coef_block,
3945 JSAMPARRAY output_buf, JDIMENSION output_col)
3946{
3947 INT32 tmp0, tmp2, tmp10, tmp12;
3948 INT32 z1, z2, z3;
3949 JCOEFPTR inptr;
3950 ISLOW_MULT_TYPE * quantptr;
3951 INT32 * wsptr;
3952 JSAMPROW outptr;
3953 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3954 int ctr;
3955 INT32 workspace[4*2]; /* buffers data between passes */
3956 SHIFT_TEMPS
3957
3958 /* Pass 1: process columns from input, store into work array. */
3959
3960 inptr = coef_block;
3961 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3962 wsptr = workspace;
3963 for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
3964 /* Even part */
3965
3966 tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3967 if (ctr == 0)
3968 CLAMP_DC(tmp10);
3969
3970 /* Odd part */
3971
3972 tmp0 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3973
3974 /* Final output stage */
3975
3976 wsptr[4*0] = tmp10 + tmp0;
3977 wsptr[4*1] = tmp10 - tmp0;
3978 }
3979
3980 /* Pass 2: process 2 rows from work array, store into output array.
3981 * 4-point IDCT kernel,
3982 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
3983 */
3984 wsptr = workspace;
3985 for (ctr = 0; ctr < 2; ctr++) {
3986 outptr = output_buf[ctr] + output_col;
3987
3988 /* Even part */
3989
3990 /* Add fudge factor here for final descale. */
3991 tmp0 = wsptr[0] + (ONE << 2);
3992 tmp2 = wsptr[2];
3993
3994 tmp10 = (tmp0 + tmp2) << CONST_BITS;
3995 tmp12 = (tmp0 - tmp2) << CONST_BITS;
3996
3997 /* Odd part */
3998 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
3999
4000 z2 = wsptr[1];
4001 z3 = wsptr[3];
4002
4003 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
4004 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
4005 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
4006
4007 /* Final output stage */
4008
4009 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
4010 CONST_BITS+3)
4011 & RANGE_MASK];
4012 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
4013 CONST_BITS+3)
4014 & RANGE_MASK];
4015 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
4016 CONST_BITS+3)
4017 & RANGE_MASK];
4018 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
4019 CONST_BITS+3)
4020 & RANGE_MASK];
4021
4022 wsptr += 4; /* advance pointer to next row */
4023 }
4024}
4025
4026
4027/*
4028 * Perform dequantization and inverse DCT on one block of coefficients,
4029 * producing a 2x1 output block.
4030 *
4031 * 1-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
4032 */
4033
4034GLOBAL(void)
4035jpeg_idct_2x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4036 JCOEFPTR coef_block,
4037 JSAMPARRAY output_buf, JDIMENSION output_col)
4038{
4039 INT32 tmp0, tmp10;
4040 ISLOW_MULT_TYPE * quantptr;
4041 JSAMPROW outptr;
4042 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4043 SHIFT_TEMPS
4044
4045 /* Pass 1: empty. */
4046
4047 /* Pass 2: process 1 row from input, store into output array. */
4048
4049 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4050 outptr = output_buf[0] + output_col;
4051
4052 /* Even part */
4053
4054 tmp10 = DEQUANTIZE(coef_block[0], quantptr[0]);
4055 CLAMP_DC(tmp10);
4056 /* Add fudge factor here for final descale. */
4057 tmp10 += ONE << 2;
4058
4059 /* Odd part */
4060
4061 tmp0 = DEQUANTIZE(coef_block[1], quantptr[1]);
4062
4063 /* Final output stage */
4064
4065 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 3) & RANGE_MASK];
4066 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 3) & RANGE_MASK];
4067}
4068
4069
4070/*
4071 * Perform dequantization and inverse DCT on one block of coefficients,
4072 * producing a 8x16 output block.
4073 *
4074 * 16-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
4075 */
4076
4077GLOBAL(void)
4078jpeg_idct_8x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4079 JCOEFPTR coef_block,
4080 JSAMPARRAY output_buf, JDIMENSION output_col)
4081{
4082 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
4083 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
4084 INT32 z1, z2, z3, z4;
4085 JCOEFPTR inptr;
4086 ISLOW_MULT_TYPE * quantptr;
4087 int * wsptr;
4088 JSAMPROW outptr;
4089 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4090 int ctr;
4091 int workspace[8*16]; /* buffers data between passes */
4092 SHIFT_TEMPS
4093
4094 /* Pass 1: process columns from input, store into work array.
4095 * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
4096 */
4097 inptr = coef_block;
4098 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4099 wsptr = workspace;
4100 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
4101 /* Even part */
4102
4103 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4104 if (ctr == 0)
4105 CLAMP_DC(tmp0);
4106 tmp0 <<= CONST_BITS;
4107 /* Add fudge factor here for final descale. */
4108 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
4109
4110 z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4111 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
4112 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
4113
4114 tmp10 = tmp0 + tmp1;
4115 tmp11 = tmp0 - tmp1;
4116 tmp12 = tmp0 + tmp2;
4117 tmp13 = tmp0 - tmp2;
4118
4119 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4120 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4121 z3 = z1 - z2;
4122 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
4123 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
4124
4125 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
4126 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
4127 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
4128 tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
4129
4130 tmp20 = tmp10 + tmp0;
4131 tmp27 = tmp10 - tmp0;
4132 tmp21 = tmp12 + tmp1;
4133 tmp26 = tmp12 - tmp1;
4134 tmp22 = tmp13 + tmp2;
4135 tmp25 = tmp13 - tmp2;
4136 tmp23 = tmp11 + tmp3;
4137 tmp24 = tmp11 - tmp3;
4138
4139 /* Odd part */
4140
4141 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4142 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4143 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4144 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4145
4146 tmp11 = z1 + z3;
4147
4148 tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
4149 tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
4150 tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
4151 tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
4152 tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
4153 tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
4154 tmp0 = tmp1 + tmp2 + tmp3 -
4155 MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
4156 tmp13 = tmp10 + tmp11 + tmp12 -
4157 MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
4158 z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
4159 tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
4160 tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
4161 z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
4162 tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
4163 tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
4164 z2 += z4;
4165 z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
4166 tmp1 += z1;
4167 tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
4168 z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
4169 tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
4170 tmp12 += z2;
4171 z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
4172 tmp2 += z2;
4173 tmp3 += z2;
4174 z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
4175 tmp10 += z2;
4176 tmp11 += z2;
4177
4178 /* Final output stage */
4179
4180 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS-PASS1_BITS);
4181 wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS-PASS1_BITS);
4182 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS-PASS1_BITS);
4183 wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS-PASS1_BITS);
4184 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS-PASS1_BITS);
4185 wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS-PASS1_BITS);
4186 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS);
4187 wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS);
4188 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
4189 wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
4190 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
4191 wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
4192 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
4193 wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
4194 wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
4195 wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
4196 }
4197
4198 /* Pass 2: process rows from work array, store into output array. */
4199 /* Note that we must descale the results by a factor of 8 == 2**3, */
4200 /* and also undo the PASS1_BITS scaling. */
4201
4202 wsptr = workspace;
4203 for (ctr = 0; ctr < 16; ctr++) {
4204 outptr = output_buf[ctr] + output_col;
4205
4206 /* Even part: reverse the even part of the forward DCT. */
4207 /* The rotator is sqrt(2)*c(-6). */
4208
4209 z2 = (INT32) wsptr[2];
4210 z3 = (INT32) wsptr[6];
4211
4212 z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
4213 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);
4214 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);
4215
4216 /* Add fudge factor here for final descale. */
4217 z2 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
4218 z3 = (INT32) wsptr[4];
4219
4220 tmp0 = (z2 + z3) << CONST_BITS;
4221 tmp1 = (z2 - z3) << CONST_BITS;
4222
4223 tmp10 = tmp0 + tmp2;
4224 tmp13 = tmp0 - tmp2;
4225 tmp11 = tmp1 + tmp3;
4226 tmp12 = tmp1 - tmp3;
4227
4228 /* Odd part per figure 8; the matrix is unitary and hence its
4229 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
4230 */
4231
4232 tmp0 = (INT32) wsptr[7];
4233 tmp1 = (INT32) wsptr[5];
4234 tmp2 = (INT32) wsptr[3];
4235 tmp3 = (INT32) wsptr[1];
4236
4237 z2 = tmp0 + tmp2;
4238 z3 = tmp1 + tmp3;
4239
4240 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */
4241 z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
4242 z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
4243 z2 += z1;
4244 z3 += z1;
4245
4246 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
4247 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
4248 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
4249 tmp0 += z1 + z2;
4250 tmp3 += z1 + z3;
4251
4252 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
4253 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
4254 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
4255 tmp1 += z1 + z3;
4256 tmp2 += z1 + z2;
4257
4258 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
4259
4260 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
4261 CONST_BITS+PASS1_BITS+3)
4262 & RANGE_MASK];
4263 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
4264 CONST_BITS+PASS1_BITS+3)
4265 & RANGE_MASK];
4266 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
4267 CONST_BITS+PASS1_BITS+3)
4268 & RANGE_MASK];
4269 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
4270 CONST_BITS+PASS1_BITS+3)
4271 & RANGE_MASK];
4272 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
4273 CONST_BITS+PASS1_BITS+3)
4274 & RANGE_MASK];
4275 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
4276 CONST_BITS+PASS1_BITS+3)
4277 & RANGE_MASK];
4278 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
4279 CONST_BITS+PASS1_BITS+3)
4280 & RANGE_MASK];
4281 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
4282 CONST_BITS+PASS1_BITS+3)
4283 & RANGE_MASK];
4284
4285 wsptr += DCTSIZE; /* advance pointer to next row */
4286 }
4287}
4288
4289
4290/*
4291 * Perform dequantization and inverse DCT on one block of coefficients,
4292 * producing a 7x14 output block.
4293 *
4294 * 14-point IDCT in pass 1 (columns), 7-point in pass 2 (rows).
4295 */
4296
4297GLOBAL(void)
4298jpeg_idct_7x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4299 JCOEFPTR coef_block,
4300 JSAMPARRAY output_buf, JDIMENSION output_col)
4301{
4302 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
4303 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
4304 INT32 z1, z2, z3, z4;
4305 JCOEFPTR inptr;
4306 ISLOW_MULT_TYPE * quantptr;
4307 int * wsptr;
4308 JSAMPROW outptr;
4309 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4310 int ctr;
4311 int workspace[7*14]; /* buffers data between passes */
4312 SHIFT_TEMPS
4313
4314 /* Pass 1: process columns from input, store into work array.
4315 * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
4316 */
4317 inptr = coef_block;
4318 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4319 wsptr = workspace;
4320 for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
4321 /* Even part */
4322
4323 z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4324 if (ctr == 0)
4325 CLAMP_DC(z1);
4326 z1 <<= CONST_BITS;
4327 /* Add fudge factor here for final descale. */
4328 z1 += ONE << (CONST_BITS-PASS1_BITS-1);
4329 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4330 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
4331 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
4332 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
4333
4334 tmp10 = z1 + z2;
4335 tmp11 = z1 + z3;
4336 tmp12 = z1 - z4;
4337
4338 tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
4339 CONST_BITS-PASS1_BITS);
4340
4341 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4342 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4343
4344 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
4345
4346 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
4347 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
4348 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
4349 MULTIPLY(z2, FIX(1.378756276)); /* c2 */
4350
4351 tmp20 = tmp10 + tmp13;
4352 tmp26 = tmp10 - tmp13;
4353 tmp21 = tmp11 + tmp14;
4354 tmp25 = tmp11 - tmp14;
4355 tmp22 = tmp12 + tmp15;
4356 tmp24 = tmp12 - tmp15;
4357
4358 /* Odd part */
4359
4360 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4361 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4362 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4363 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4364 tmp13 = z4 << CONST_BITS;
4365
4366 tmp14 = z1 + z3;
4367 tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
4368 tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
4369 tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
4370 tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
4371 tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
4372 z1 -= z2;
4373 tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13; /* c11 */
4374 tmp16 += tmp15;
4375 z1 += z4;
4376 z4 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
4377 tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
4378 tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
4379 z4 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
4380 tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
4381 tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
4382
4383 tmp13 = (z1 - z3) << PASS1_BITS;
4384
4385 /* Final output stage */
4386
4387 wsptr[7*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
4388 wsptr[7*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
4389 wsptr[7*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
4390 wsptr[7*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
4391 wsptr[7*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
4392 wsptr[7*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
4393 wsptr[7*3] = (int) (tmp23 + tmp13);
4394 wsptr[7*10] = (int) (tmp23 - tmp13);
4395 wsptr[7*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
4396 wsptr[7*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
4397 wsptr[7*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
4398 wsptr[7*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
4399 wsptr[7*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
4400 wsptr[7*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
4401 }
4402
4403 /* Pass 2: process 14 rows from work array, store into output array.
4404 * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
4405 */
4406 wsptr = workspace;
4407 for (ctr = 0; ctr < 14; ctr++) {
4408 outptr = output_buf[ctr] + output_col;
4409
4410 /* Even part */
4411
4412 /* Add fudge factor here for final descale. */
4413 tmp23 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
4414 tmp23 <<= CONST_BITS;
4415
4416 z1 = (INT32) wsptr[2];
4417 z2 = (INT32) wsptr[4];
4418 z3 = (INT32) wsptr[6];
4419
4420 tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
4421 tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
4422 tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
4423 tmp10 = z1 + z3;
4424 z2 -= tmp10;
4425 tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
4426 tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
4427 tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
4428 tmp23 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
4429
4430 /* Odd part */
4431
4432 z1 = (INT32) wsptr[1];
4433 z2 = (INT32) wsptr[3];
4434 z3 = (INT32) wsptr[5];
4435
4436 tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
4437 tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
4438 tmp10 = tmp11 - tmp12;
4439 tmp11 += tmp12;
4440 tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
4441 tmp11 += tmp12;
4442 z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
4443 tmp10 += z2;
4444 tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
4445
4446 /* Final output stage */
4447
4448 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
4449 CONST_BITS+PASS1_BITS+3)
4450 & RANGE_MASK];
4451 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
4452 CONST_BITS+PASS1_BITS+3)
4453 & RANGE_MASK];
4454 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
4455 CONST_BITS+PASS1_BITS+3)
4456 & RANGE_MASK];
4457 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
4458 CONST_BITS+PASS1_BITS+3)
4459 & RANGE_MASK];
4460 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
4461 CONST_BITS+PASS1_BITS+3)
4462 & RANGE_MASK];
4463 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
4464 CONST_BITS+PASS1_BITS+3)
4465 & RANGE_MASK];
4466 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23,
4467 CONST_BITS+PASS1_BITS+3)
4468 & RANGE_MASK];
4469
4470 wsptr += 7; /* advance pointer to next row */
4471 }
4472}
4473
4474
4475/*
4476 * Perform dequantization and inverse DCT on one block of coefficients,
4477 * producing a 6x12 output block.
4478 *
4479 * 12-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
4480 */
4481
4482GLOBAL(void)
4483jpeg_idct_6x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4484 JCOEFPTR coef_block,
4485 JSAMPARRAY output_buf, JDIMENSION output_col)
4486{
4487 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
4488 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
4489 INT32 z1, z2, z3, z4;
4490 JCOEFPTR inptr;
4491 ISLOW_MULT_TYPE * quantptr;
4492 int * wsptr;
4493 JSAMPROW outptr;
4494 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4495 int ctr;
4496 int workspace[6*12]; /* buffers data between passes */
4497 SHIFT_TEMPS
4498
4499 /* Pass 1: process columns from input, store into work array.
4500 * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
4501 */
4502 inptr = coef_block;
4503 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4504 wsptr = workspace;
4505 for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
4506 /* Even part */
4507
4508 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4509 if (ctr == 0)
4510 CLAMP_DC(z3);
4511 z3 <<= CONST_BITS;
4512 /* Add fudge factor here for final descale. */
4513 z3 += ONE << (CONST_BITS-PASS1_BITS-1);
4514
4515 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4516 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
4517
4518 tmp10 = z3 + z4;
4519 tmp11 = z3 - z4;
4520
4521 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4522 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
4523 z1 <<= CONST_BITS;
4524 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4525 z2 <<= CONST_BITS;
4526
4527 tmp12 = z1 - z2;
4528
4529 tmp21 = z3 + tmp12;
4530 tmp24 = z3 - tmp12;
4531
4532 tmp12 = z4 + z2;
4533
4534 tmp20 = tmp10 + tmp12;
4535 tmp25 = tmp10 - tmp12;
4536
4537 tmp12 = z4 - z1 - z2;
4538
4539 tmp22 = tmp11 + tmp12;
4540 tmp23 = tmp11 - tmp12;
4541
4542 /* Odd part */
4543
4544 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4545 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4546 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4547 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4548
4549 tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
4550 tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
4551
4552 tmp10 = z1 + z3;
4553 tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
4554 tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
4555 tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
4556 tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
4557 tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
4558 tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
4559 tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
4560 MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
4561
4562 z1 -= z4;
4563 z2 -= z3;
4564 z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
4565 tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
4566 tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
4567
4568 /* Final output stage */
4569
4570 wsptr[6*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
4571 wsptr[6*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
4572 wsptr[6*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
4573 wsptr[6*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
4574 wsptr[6*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
4575 wsptr[6*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
4576 wsptr[6*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
4577 wsptr[6*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
4578 wsptr[6*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
4579 wsptr[6*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
4580 wsptr[6*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
4581 wsptr[6*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
4582 }
4583
4584 /* Pass 2: process 12 rows from work array, store into output array.
4585 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
4586 */
4587 wsptr = workspace;
4588 for (ctr = 0; ctr < 12; ctr++) {
4589 outptr = output_buf[ctr] + output_col;
4590
4591 /* Even part */
4592
4593 /* Add fudge factor here for final descale. */
4594 tmp10 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
4595 tmp10 <<= CONST_BITS;
4596 tmp12 = (INT32) wsptr[4];
4597 tmp20 = MULTIPLY(tmp12, FIX(0.707106781)); /* c4 */
4598 tmp11 = tmp10 + tmp20;
4599 tmp21 = tmp10 - tmp20 - tmp20;
4600 tmp20 = (INT32) wsptr[2];
4601 tmp10 = MULTIPLY(tmp20, FIX(1.224744871)); /* c2 */
4602 tmp20 = tmp11 + tmp10;
4603 tmp22 = tmp11 - tmp10;
4604
4605 /* Odd part */
4606
4607 z1 = (INT32) wsptr[1];
4608 z2 = (INT32) wsptr[3];
4609 z3 = (INT32) wsptr[5];
4610 tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
4611 tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
4612 tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
4613 tmp11 = (z1 - z2 - z3) << CONST_BITS;
4614
4615 /* Final output stage */
4616
4617 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
4618 CONST_BITS+PASS1_BITS+3)
4619 & RANGE_MASK];
4620 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
4621 CONST_BITS+PASS1_BITS+3)
4622 & RANGE_MASK];
4623 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
4624 CONST_BITS+PASS1_BITS+3)
4625 & RANGE_MASK];
4626 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
4627 CONST_BITS+PASS1_BITS+3)
4628 & RANGE_MASK];
4629 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
4630 CONST_BITS+PASS1_BITS+3)
4631 & RANGE_MASK];
4632 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
4633 CONST_BITS+PASS1_BITS+3)
4634 & RANGE_MASK];
4635
4636 wsptr += 6; /* advance pointer to next row */
4637 }
4638}
4639
4640
4641/*
4642 * Perform dequantization and inverse DCT on one block of coefficients,
4643 * producing a 5x10 output block.
4644 *
4645 * 10-point IDCT in pass 1 (columns), 5-point in pass 2 (rows).
4646 */
4647
4648GLOBAL(void)
4649jpeg_idct_5x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4650 JCOEFPTR coef_block,
4651 JSAMPARRAY output_buf, JDIMENSION output_col)
4652{
4653 INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
4654 INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
4655 INT32 z1, z2, z3, z4, z5;
4656 JCOEFPTR inptr;
4657 ISLOW_MULT_TYPE * quantptr;
4658 int * wsptr;
4659 JSAMPROW outptr;
4660 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4661 int ctr;
4662 int workspace[5*10]; /* buffers data between passes */
4663 SHIFT_TEMPS
4664
4665 /* Pass 1: process columns from input, store into work array.
4666 * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
4667 */
4668 inptr = coef_block;
4669 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4670 wsptr = workspace;
4671 for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
4672 /* Even part */
4673
4674 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4675 if (ctr == 0)
4676 CLAMP_DC(z3);
4677 z3 <<= CONST_BITS;
4678 /* Add fudge factor here for final descale. */
4679 z3 += ONE << (CONST_BITS-PASS1_BITS-1);
4680 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4681 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
4682 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
4683 tmp10 = z3 + z1;
4684 tmp11 = z3 - z2;
4685
4686 tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1), /* c0 = (c4-c8)*2 */
4687 CONST_BITS-PASS1_BITS);
4688
4689 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4690 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4691
4692 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
4693 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
4694 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
4695
4696 tmp20 = tmp10 + tmp12;
4697 tmp24 = tmp10 - tmp12;
4698 tmp21 = tmp11 + tmp13;
4699 tmp23 = tmp11 - tmp13;
4700
4701 /* Odd part */
4702
4703 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4704 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4705 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4706 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4707
4708 tmp11 = z2 + z4;
4709 tmp13 = z2 - z4;
4710
4711 tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
4712 z5 = z3 << CONST_BITS;
4713
4714 z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
4715 z4 = z5 + tmp12;
4716
4717 tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
4718 tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
4719
4720 z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
4721 z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
4722
4723 tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
4724
4725 tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
4726 tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
4727
4728 /* Final output stage */
4729
4730 wsptr[5*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
4731 wsptr[5*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
4732 wsptr[5*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
4733 wsptr[5*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
4734 wsptr[5*2] = (int) (tmp22 + tmp12);
4735 wsptr[5*7] = (int) (tmp22 - tmp12);
4736 wsptr[5*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
4737 wsptr[5*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
4738 wsptr[5*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
4739 wsptr[5*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
4740 }
4741
4742 /* Pass 2: process 10 rows from work array, store into output array.
4743 * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
4744 */
4745 wsptr = workspace;
4746 for (ctr = 0; ctr < 10; ctr++) {
4747 outptr = output_buf[ctr] + output_col;
4748
4749 /* Even part */
4750
4751 /* Add fudge factor here for final descale. */
4752 tmp12 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
4753 tmp12 <<= CONST_BITS;
4754 tmp13 = (INT32) wsptr[2];
4755 tmp14 = (INT32) wsptr[4];
4756 z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
4757 z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
4758 z3 = tmp12 + z2;
4759 tmp10 = z3 + z1;
4760 tmp11 = z3 - z1;
4761 tmp12 -= z2 << 2;
4762
4763 /* Odd part */
4764
4765 z2 = (INT32) wsptr[1];
4766 z3 = (INT32) wsptr[3];
4767
4768 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
4769 tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
4770 tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
4771
4772 /* Final output stage */
4773
4774 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp13,
4775 CONST_BITS+PASS1_BITS+3)
4776 & RANGE_MASK];
4777 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp13,
4778 CONST_BITS+PASS1_BITS+3)
4779 & RANGE_MASK];
4780 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp14,
4781 CONST_BITS+PASS1_BITS+3)
4782 & RANGE_MASK];
4783 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp14,
4784 CONST_BITS+PASS1_BITS+3)
4785 & RANGE_MASK];
4786 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
4787 CONST_BITS+PASS1_BITS+3)
4788 & RANGE_MASK];
4789
4790 wsptr += 5; /* advance pointer to next row */
4791 }
4792}
4793
4794
4795/*
4796 * Perform dequantization and inverse DCT on one block of coefficients,
4797 * producing a 4x8 output block.
4798 *
4799 * 8-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
4800 */
4801
4802GLOBAL(void)
4803jpeg_idct_4x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4804 JCOEFPTR coef_block,
4805 JSAMPARRAY output_buf, JDIMENSION output_col)
4806{
4807 INT32 tmp0, tmp1, tmp2, tmp3;
4808 INT32 tmp10, tmp11, tmp12, tmp13;
4809 INT32 z1, z2, z3;
4810 JCOEFPTR inptr;
4811 ISLOW_MULT_TYPE * quantptr;
4812 int * wsptr;
4813 JSAMPROW outptr;
4814 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4815 int ctr;
4816 int workspace[4*8]; /* buffers data between passes */
4817 SHIFT_TEMPS
4818
4819 /* Pass 1: process columns from input, store into work array. */
4820 /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
4821 /* furthermore, we scale the results by 2**PASS1_BITS. */
4822
4823 inptr = coef_block;
4824 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4825 wsptr = workspace;
4826 for (ctr = 4; ctr > 0; ctr--) {
4827 /* Due to quantization, we will usually find that many of the input
4828 * coefficients are zero, especially the AC terms. We can exploit this
4829 * by short-circuiting the IDCT calculation for any column in which all
4830 * the AC terms are zero. In that case each output is equal to the
4831 * DC coefficient (with scale factor as needed).
4832 * With typical images and quantization tables, half or more of the
4833 * column DCT calculations can be simplified this way.
4834 */
4835
4836 if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
4837 inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
4838 inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
4839 inptr[DCTSIZE*7] == 0) {
4840 /* AC terms all zero */
4841 int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4842 if (ctr == 4)
4843 CLAMP_DC(dcval);
4844 dcval <<= PASS1_BITS;
4845 wsptr[4*0] = dcval;
4846 wsptr[4*1] = dcval;
4847 wsptr[4*2] = dcval;
4848 wsptr[4*3] = dcval;
4849 wsptr[4*4] = dcval;
4850 wsptr[4*5] = dcval;
4851 wsptr[4*6] = dcval;
4852 wsptr[4*7] = dcval;
4853
4854 inptr++; /* advance pointers to next column */
4855 quantptr++;
4856 wsptr++;
4857 continue;
4858 }
4859
4860 /* Even part: reverse the even part of the forward DCT. */
4861 /* The rotator is sqrt(2)*c(-6). */
4862
4863 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4864 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4865
4866 z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
4867 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);
4868 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);
4869
4870 z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4871 if (ctr == 4)
4872 CLAMP_DC(z2);
4873 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4874 z2 <<= CONST_BITS;
4875 z3 <<= CONST_BITS;
4876 /* Add fudge factor here for final descale. */
4877 z2 += ONE << (CONST_BITS-PASS1_BITS-1);
4878
4879 tmp0 = z2 + z3;
4880 tmp1 = z2 - z3;
4881
4882 tmp10 = tmp0 + tmp2;
4883 tmp13 = tmp0 - tmp2;
4884 tmp11 = tmp1 + tmp3;
4885 tmp12 = tmp1 - tmp3;
4886
4887 /* Odd part per figure 8; the matrix is unitary and hence its
4888 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
4889 */
4890
4891 tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4892 tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4893 tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4894 tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4895
4896 z2 = tmp0 + tmp2;
4897 z3 = tmp1 + tmp3;
4898
4899 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */
4900 z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
4901 z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
4902 z2 += z1;
4903 z3 += z1;
4904
4905 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
4906 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
4907 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
4908 tmp0 += z1 + z2;
4909 tmp3 += z1 + z3;
4910
4911 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
4912 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
4913 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
4914 tmp1 += z1 + z3;
4915 tmp2 += z1 + z2;
4916
4917 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
4918
4919 wsptr[4*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
4920 wsptr[4*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
4921 wsptr[4*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
4922 wsptr[4*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
4923 wsptr[4*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
4924 wsptr[4*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
4925 wsptr[4*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
4926 wsptr[4*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
4927
4928 inptr++; /* advance pointers to next column */
4929 quantptr++;
4930 wsptr++;
4931 }
4932
4933 /* Pass 2: process 8 rows from work array, store into output array.
4934 * 4-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
4935 */
4936 wsptr = workspace;
4937 for (ctr = 0; ctr < 8; ctr++) {
4938 outptr = output_buf[ctr] + output_col;
4939
4940 /* Even part */
4941
4942 /* Add fudge factor here for final descale. */
4943 tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
4944 tmp2 = (INT32) wsptr[2];
4945
4946 tmp10 = (tmp0 + tmp2) << CONST_BITS;
4947 tmp12 = (tmp0 - tmp2) << CONST_BITS;
4948
4949 /* Odd part */
4950 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
4951
4952 z2 = (INT32) wsptr[1];
4953 z3 = (INT32) wsptr[3];
4954
4955 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
4956 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
4957 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
4958
4959 /* Final output stage */
4960
4961 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
4962 CONST_BITS+PASS1_BITS+3)
4963 & RANGE_MASK];
4964 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
4965 CONST_BITS+PASS1_BITS+3)
4966 & RANGE_MASK];
4967 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
4968 CONST_BITS+PASS1_BITS+3)
4969 & RANGE_MASK];
4970 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
4971 CONST_BITS+PASS1_BITS+3)
4972 & RANGE_MASK];
4973
4974 wsptr += 4; /* advance pointer to next row */
4975 }
4976}
4977
4978
4979/*
4980 * Perform dequantization and inverse DCT on one block of coefficients,
4981 * producing a reduced-size 3x6 output block.
4982 *
4983 * 6-point IDCT in pass 1 (columns), 3-point in pass 2 (rows).
4984 */
4985
4986GLOBAL(void)
4987jpeg_idct_3x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4988 JCOEFPTR coef_block,
4989 JSAMPARRAY output_buf, JDIMENSION output_col)
4990{
4991 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
4992 INT32 z1, z2, z3;
4993 JCOEFPTR inptr;
4994 ISLOW_MULT_TYPE * quantptr;
4995 int * wsptr;
4996 JSAMPROW outptr;
4997 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4998 int ctr;
4999 int workspace[3*6]; /* buffers data between passes */
5000 SHIFT_TEMPS
5001
5002 /* Pass 1: process columns from input, store into work array.
5003 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
5004 */
5005 inptr = coef_block;
5006 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
5007 wsptr = workspace;
5008 for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
5009 /* Even part */
5010
5011 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
5012 if (ctr == 0)
5013 CLAMP_DC(tmp0);
5014 tmp0 <<= CONST_BITS;
5015 /* Add fudge factor here for final descale. */
5016 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
5017 tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
5018 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
5019 tmp1 = tmp0 + tmp10;
5020 tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
5021 tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
5022 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
5023 tmp10 = tmp1 + tmp0;
5024 tmp12 = tmp1 - tmp0;
5025
5026 /* Odd part */
5027
5028 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
5029 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
5030 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
5031 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
5032 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
5033 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
5034 tmp1 = (z1 - z2 - z3) << PASS1_BITS;
5035
5036 /* Final output stage */
5037
5038 wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
5039 wsptr[3*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
5040 wsptr[3*1] = (int) (tmp11 + tmp1);
5041 wsptr[3*4] = (int) (tmp11 - tmp1);
5042 wsptr[3*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
5043 wsptr[3*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
5044 }
5045
5046 /* Pass 2: process 6 rows from work array, store into output array.
5047 * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
5048 */
5049 wsptr = workspace;
5050 for (ctr = 0; ctr < 6; ctr++) {
5051 outptr = output_buf[ctr] + output_col;
5052
5053 /* Even part */
5054
5055 /* Add fudge factor here for final descale. */
5056 tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
5057 tmp0 <<= CONST_BITS;
5058 tmp2 = (INT32) wsptr[2];
5059 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
5060 tmp10 = tmp0 + tmp12;
5061 tmp2 = tmp0 - tmp12 - tmp12;
5062
5063 /* Odd part */
5064
5065 tmp12 = (INT32) wsptr[1];
5066 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
5067
5068 /* Final output stage */
5069
5070 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
5071 CONST_BITS+PASS1_BITS+3)
5072 & RANGE_MASK];
5073 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
5074 CONST_BITS+PASS1_BITS+3)
5075 & RANGE_MASK];
5076 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
5077 CONST_BITS+PASS1_BITS+3)
5078 & RANGE_MASK];
5079
5080 wsptr += 3; /* advance pointer to next row */
5081 }
5082}
5083
5084
5085/*
5086 * Perform dequantization and inverse DCT on one block of coefficients,
5087 * producing a 2x4 output block.
5088 *
5089 * 4-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
5090 */
5091
5092GLOBAL(void)
5093jpeg_idct_2x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
5094 JCOEFPTR coef_block,
5095 JSAMPARRAY output_buf, JDIMENSION output_col)
5096{
5097 INT32 tmp0, tmp2, tmp10, tmp12;
5098 INT32 z1, z2, z3;
5099 JCOEFPTR inptr;
5100 ISLOW_MULT_TYPE * quantptr;
5101 INT32 * wsptr;
5102 JSAMPROW outptr;
5103 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
5104 int ctr;
5105 INT32 workspace[2*4]; /* buffers data between passes */
5106 SHIFT_TEMPS
5107
5108 /* Pass 1: process columns from input, store into work array.
5109 * 4-point IDCT kernel,
5110 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
5111 */
5112 inptr = coef_block;
5113 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
5114 wsptr = workspace;
5115 for (ctr = 0; ctr < 2; ctr++, inptr++, quantptr++, wsptr++) {
5116 /* Even part */
5117
5118 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
5119 if (ctr == 0)
5120 CLAMP_DC(tmp0);
5121 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
5122
5123 tmp10 = (tmp0 + tmp2) << CONST_BITS;
5124 tmp12 = (tmp0 - tmp2) << CONST_BITS;
5125
5126 /* Odd part */
5127 /* Same rotation as in the even part of the 8x8 LL&M IDCT */
5128
5129 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
5130 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
5131
5132 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
5133 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
5134 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
5135
5136 /* Final output stage */
5137
5138 wsptr[2*0] = tmp10 + tmp0;
5139 wsptr[2*3] = tmp10 - tmp0;
5140 wsptr[2*1] = tmp12 + tmp2;
5141 wsptr[2*2] = tmp12 - tmp2;
5142 }
5143
5144 /* Pass 2: process 4 rows from work array, store into output array. */
5145
5146 wsptr = workspace;
5147 for (ctr = 0; ctr < 4; ctr++) {
5148 outptr = output_buf[ctr] + output_col;
5149
5150 /* Even part */
5151
5152 /* Add fudge factor here for final descale. */
5153 tmp10 = wsptr[0] + (ONE << (CONST_BITS+2));
5154
5155 /* Odd part */
5156
5157 tmp0 = wsptr[1];
5158
5159 /* Final output stage */
5160
5161 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS+3)
5162 & RANGE_MASK];
5163 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS+3)
5164 & RANGE_MASK];
5165
5166 wsptr += 2; /* advance pointer to next row */
5167 }
5168}
5169
5170
5171/*
5172 * Perform dequantization and inverse DCT on one block of coefficients,
5173 * producing a 1x2 output block.
5174 *
5175 * 2-point IDCT in pass 1 (columns), 1-point in pass 2 (rows).
5176 */
5177
5178GLOBAL(void)
5179jpeg_idct_1x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
5180 JCOEFPTR coef_block,
5181 JSAMPARRAY output_buf, JDIMENSION output_col)
5182{
5183 INT32 tmp0, tmp10;
5184 ISLOW_MULT_TYPE * quantptr;
5185 JSAMPLE *range_limit = IDCT_range_limit(cinfo);
5186 SHIFT_TEMPS
5187
5188 /* Process 1 column from input, store into output array. */
5189
5190 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
5191
5192 /* Even part */
5193
5194 tmp10 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
5195 CLAMP_DC(tmp10);
5196 /* Add fudge factor here for final descale. */
5197 tmp10 += ONE << 2;
5198
5199 /* Odd part */
5200
5201 tmp0 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
5202
5203 /* Final output stage */
5204
5205 output_buf[0][output_col] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 3)
5206 & RANGE_MASK];
5207 output_buf[1][output_col] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 3)
5208 & RANGE_MASK];
5209}
5210
5211#endif /* IDCT_SCALING_SUPPORTED */
5212#endif /* DCT_ISLOW_SUPPORTED */
5213