1/*
2 * reserved comment block
3 * DO NOT REMOVE OR ALTER!
4 */
5/*
6 * jcdctmgr.c
7 *
8 * Copyright (C) 1994-1996, Thomas G. Lane.
9 * This file is part of the Independent JPEG Group's software.
10 * For conditions of distribution and use, see the accompanying README file.
11 *
12 * This file contains the forward-DCT management logic.
13 * This code selects a particular DCT implementation to be used,
14 * and it performs related housekeeping chores including coefficient
15 * quantization.
16 */
17
18#define JPEG_INTERNALS
19#include "jinclude.h"
20#include "jpeglib.h"
21#include "jdct.h" /* Private declarations for DCT subsystem */
22
23
24/* Private subobject for this module */
25
26typedef struct {
27 struct jpeg_forward_dct pub; /* public fields */
28
29 /* Pointer to the DCT routine actually in use */
30 forward_DCT_method_ptr do_dct;
31
32 /* The actual post-DCT divisors --- not identical to the quant table
33 * entries, because of scaling (especially for an unnormalized DCT).
34 * Each table is given in normal array order.
35 */
36 DCTELEM * divisors[NUM_QUANT_TBLS];
37
38#ifdef DCT_FLOAT_SUPPORTED
39 /* Same as above for the floating-point case. */
40 float_DCT_method_ptr do_float_dct;
41 FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
42#endif
43} my_fdct_controller;
44
45typedef my_fdct_controller * my_fdct_ptr;
46
47
48/*
49 * Initialize for a processing pass.
50 * Verify that all referenced Q-tables are present, and set up
51 * the divisor table for each one.
52 * In the current implementation, DCT of all components is done during
53 * the first pass, even if only some components will be output in the
54 * first scan. Hence all components should be examined here.
55 */
56
57METHODDEF(void)
58start_pass_fdctmgr (j_compress_ptr cinfo)
59{
60 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
61 int ci, qtblno, i;
62 jpeg_component_info *compptr;
63 JQUANT_TBL * qtbl;
64 DCTELEM * dtbl;
65
66 for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
67 ci++, compptr++) {
68 qtblno = compptr->quant_tbl_no;
69 /* Make sure specified quantization table is present */
70 if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
71 cinfo->quant_tbl_ptrs[qtblno] == NULL)
72 ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
73 qtbl = cinfo->quant_tbl_ptrs[qtblno];
74 /* Compute divisors for this quant table */
75 /* We may do this more than once for same table, but it's not a big deal */
76 switch (cinfo->dct_method) {
77#ifdef DCT_ISLOW_SUPPORTED
78 case JDCT_ISLOW:
79 /* For LL&M IDCT method, divisors are equal to raw quantization
80 * coefficients multiplied by 8 (to counteract scaling).
81 */
82 if (fdct->divisors[qtblno] == NULL) {
83 fdct->divisors[qtblno] = (DCTELEM *)
84 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
85 DCTSIZE2 * SIZEOF(DCTELEM));
86 }
87 dtbl = fdct->divisors[qtblno];
88 for (i = 0; i < DCTSIZE2; i++) {
89 dtbl[i] = ((DCTELEM) qtbl->quantval[i]) << 3;
90 }
91 break;
92#endif
93#ifdef DCT_IFAST_SUPPORTED
94 case JDCT_IFAST:
95 {
96 /* For AA&N IDCT method, divisors are equal to quantization
97 * coefficients scaled by scalefactor[row]*scalefactor[col], where
98 * scalefactor[0] = 1
99 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
100 * We apply a further scale factor of 8.
101 */
102#define CONST_BITS 14
103 static const INT16 aanscales[DCTSIZE2] = {
104 /* precomputed values scaled up by 14 bits */
105 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
106 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
107 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
108 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
109 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
110 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
111 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
112 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
113 };
114 SHIFT_TEMPS
115
116 if (fdct->divisors[qtblno] == NULL) {
117 fdct->divisors[qtblno] = (DCTELEM *)
118 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
119 DCTSIZE2 * SIZEOF(DCTELEM));
120 }
121 dtbl = fdct->divisors[qtblno];
122 for (i = 0; i < DCTSIZE2; i++) {
123 dtbl[i] = (DCTELEM)
124 DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
125 (INT32) aanscales[i]),
126 CONST_BITS-3);
127 }
128 }
129 break;
130#endif
131#ifdef DCT_FLOAT_SUPPORTED
132 case JDCT_FLOAT:
133 {
134 /* For float AA&N IDCT method, divisors are equal to quantization
135 * coefficients scaled by scalefactor[row]*scalefactor[col], where
136 * scalefactor[0] = 1
137 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
138 * We apply a further scale factor of 8.
139 * What's actually stored is 1/divisor so that the inner loop can
140 * use a multiplication rather than a division.
141 */
142 FAST_FLOAT * fdtbl;
143 int row, col;
144 static const double aanscalefactor[DCTSIZE] = {
145 1.0, 1.387039845, 1.306562965, 1.175875602,
146 1.0, 0.785694958, 0.541196100, 0.275899379
147 };
148
149 if (fdct->float_divisors[qtblno] == NULL) {
150 fdct->float_divisors[qtblno] = (FAST_FLOAT *)
151 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
152 DCTSIZE2 * SIZEOF(FAST_FLOAT));
153 }
154 fdtbl = fdct->float_divisors[qtblno];
155 i = 0;
156 for (row = 0; row < DCTSIZE; row++) {
157 for (col = 0; col < DCTSIZE; col++) {
158 fdtbl[i] = (FAST_FLOAT)
159 (1.0 / (((double) qtbl->quantval[i] *
160 aanscalefactor[row] * aanscalefactor[col] * 8.0)));
161 i++;
162 }
163 }
164 }
165 break;
166#endif
167 default:
168 ERREXIT(cinfo, JERR_NOT_COMPILED);
169 break;
170 }
171 }
172}
173
174
175/*
176 * Perform forward DCT on one or more blocks of a component.
177 *
178 * The input samples are taken from the sample_data[] array starting at
179 * position start_row/start_col, and moving to the right for any additional
180 * blocks. The quantized coefficients are returned in coef_blocks[].
181 */
182
183METHODDEF(void)
184forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
185 JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
186 JDIMENSION start_row, JDIMENSION start_col,
187 JDIMENSION num_blocks)
188/* This version is used for integer DCT implementations. */
189{
190 /* This routine is heavily used, so it's worth coding it tightly. */
191 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
192 forward_DCT_method_ptr do_dct = fdct->do_dct;
193 DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
194 DCTELEM workspace[DCTSIZE2]; /* work area for FDCT subroutine */
195 JDIMENSION bi;
196
197 sample_data += start_row; /* fold in the vertical offset once */
198
199 for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
200 /* Load data into workspace, applying unsigned->signed conversion */
201 { register DCTELEM *workspaceptr;
202 register JSAMPROW elemptr;
203 register int elemr;
204
205 workspaceptr = workspace;
206 for (elemr = 0; elemr < DCTSIZE; elemr++) {
207 elemptr = sample_data[elemr] + start_col;
208#if DCTSIZE == 8 /* unroll the inner loop */
209 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
210 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
211 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
212 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
213 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
214 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
215 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
216 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
217#else
218 { register int elemc;
219 for (elemc = DCTSIZE; elemc > 0; elemc--) {
220 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
221 }
222 }
223#endif
224 }
225 }
226
227 /* Perform the DCT */
228 (*do_dct) (workspace);
229
230 /* Quantize/descale the coefficients, and store into coef_blocks[] */
231 { register DCTELEM temp, qval;
232 register int i;
233 register JCOEFPTR output_ptr = coef_blocks[bi];
234
235 for (i = 0; i < DCTSIZE2; i++) {
236 qval = divisors[i];
237 temp = workspace[i];
238 /* Divide the coefficient value by qval, ensuring proper rounding.
239 * Since C does not specify the direction of rounding for negative
240 * quotients, we have to force the dividend positive for portability.
241 *
242 * In most files, at least half of the output values will be zero
243 * (at default quantization settings, more like three-quarters...)
244 * so we should ensure that this case is fast. On many machines,
245 * a comparison is enough cheaper than a divide to make a special test
246 * a win. Since both inputs will be nonnegative, we need only test
247 * for a < b to discover whether a/b is 0.
248 * If your machine's division is fast enough, define FAST_DIVIDE.
249 */
250#ifdef FAST_DIVIDE
251#define DIVIDE_BY(a,b) a /= b
252#else
253#define DIVIDE_BY(a,b) if (a >= b) a /= b; else a = 0
254#endif
255 if (temp < 0) {
256 temp = -temp;
257 temp += qval>>1; /* for rounding */
258 DIVIDE_BY(temp, qval);
259 temp = -temp;
260 } else {
261 temp += qval>>1; /* for rounding */
262 DIVIDE_BY(temp, qval);
263 }
264 output_ptr[i] = (JCOEF) temp;
265 }
266 }
267 }
268}
269
270
271#ifdef DCT_FLOAT_SUPPORTED
272
273METHODDEF(void)
274forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
275 JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
276 JDIMENSION start_row, JDIMENSION start_col,
277 JDIMENSION num_blocks)
278/* This version is used for floating-point DCT implementations. */
279{
280 /* This routine is heavily used, so it's worth coding it tightly. */
281 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
282 float_DCT_method_ptr do_dct = fdct->do_float_dct;
283 FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
284 FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */
285 JDIMENSION bi;
286
287 sample_data += start_row; /* fold in the vertical offset once */
288
289 for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
290 /* Load data into workspace, applying unsigned->signed conversion */
291 { register FAST_FLOAT *workspaceptr;
292 register JSAMPROW elemptr;
293 register int elemr;
294
295 workspaceptr = workspace;
296 for (elemr = 0; elemr < DCTSIZE; elemr++) {
297 elemptr = sample_data[elemr] + start_col;
298#if DCTSIZE == 8 /* unroll the inner loop */
299 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
300 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
301 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
302 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
303 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
304 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
305 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
306 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
307#else
308 { register int elemc;
309 for (elemc = DCTSIZE; elemc > 0; elemc--) {
310 *workspaceptr++ = (FAST_FLOAT)
311 (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
312 }
313 }
314#endif
315 }
316 }
317
318 /* Perform the DCT */
319 (*do_dct) (workspace);
320
321 /* Quantize/descale the coefficients, and store into coef_blocks[] */
322 { register FAST_FLOAT temp;
323 register int i;
324 register JCOEFPTR output_ptr = coef_blocks[bi];
325
326 for (i = 0; i < DCTSIZE2; i++) {
327 /* Apply the quantization and scaling factor */
328 temp = workspace[i] * divisors[i];
329 /* Round to nearest integer.
330 * Since C does not specify the direction of rounding for negative
331 * quotients, we have to force the dividend positive for portability.
332 * The maximum coefficient size is +-16K (for 12-bit data), so this
333 * code should work for either 16-bit or 32-bit ints.
334 */
335 output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
336 }
337 }
338 }
339}
340
341#endif /* DCT_FLOAT_SUPPORTED */
342
343
344/*
345 * Initialize FDCT manager.
346 */
347
348GLOBAL(void)
349jinit_forward_dct (j_compress_ptr cinfo)
350{
351 my_fdct_ptr fdct;
352 int i;
353
354 fdct = (my_fdct_ptr)
355 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
356 SIZEOF(my_fdct_controller));
357 cinfo->fdct = (struct jpeg_forward_dct *) fdct;
358 fdct->pub.start_pass = start_pass_fdctmgr;
359
360 switch (cinfo->dct_method) {
361#ifdef DCT_ISLOW_SUPPORTED
362 case JDCT_ISLOW:
363 fdct->pub.forward_DCT = forward_DCT;
364 fdct->do_dct = jpeg_fdct_islow;
365 break;
366#endif
367#ifdef DCT_IFAST_SUPPORTED
368 case JDCT_IFAST:
369 fdct->pub.forward_DCT = forward_DCT;
370 fdct->do_dct = jpeg_fdct_ifast;
371 break;
372#endif
373#ifdef DCT_FLOAT_SUPPORTED
374 case JDCT_FLOAT:
375 fdct->pub.forward_DCT = forward_DCT_float;
376 fdct->do_float_dct = jpeg_fdct_float;
377 break;
378#endif
379 default:
380 ERREXIT(cinfo, JERR_NOT_COMPILED);
381 break;
382 }
383
384 /* Mark divisor tables unallocated */
385 for (i = 0; i < NUM_QUANT_TBLS; i++) {
386 fdct->divisors[i] = NULL;
387#ifdef DCT_FLOAT_SUPPORTED
388 fdct->float_divisors[i] = NULL;
389#endif
390 }
391}
392