jcdctmgr.c source code [Skia/third_party/externals/libjpeg-turbo/jcdctmgr.c]

1	/*
2	* jcdctmgr.c
3	*
4	* This file was part of the Independent JPEG Group's software:
5	* Copyright (C) 1994-1996, Thomas G. Lane.
6	* libjpeg-turbo Modifications:
7	* Copyright (C) 1999-2006, MIYASAKA Masaru.
8	* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
9	* Copyright (C) 2011, 2014-2015, D. R. Commander.
10	* For conditions of distribution and use, see the accompanying README.ijg
11	* file.
12	*
13	* This file contains the forward-DCT management logic.
14	* This code selects a particular DCT implementation to be used,
15	* and it performs related housekeeping chores including coefficient
16	* quantization.
17	*/
18
19	#define JPEG_INTERNALS
20	#include "jinclude.h"
21	#include "jpeglib.h"
22	#include "jdct.h" /* Private declarations for DCT subsystem */
23	#include "jsimddct.h"
24
25
26	/ Private subobject for this module /
27
28	typedef void (forward_DCT_method_ptr) (DCTELEM data);
29	typedef void (float_DCT_method_ptr) (FAST_FLOAT data);
30
31	typedef void (*convsamp_method_ptr) (JSAMPARRAY sample_data,
32	JDIMENSION start_col,
33	DCTELEM *workspace);
34	typedef void (*float_convsamp_method_ptr) (JSAMPARRAY sample_data,
35	JDIMENSION start_col,
36	FAST_FLOAT *workspace);
37
38	typedef void (quantize_method_ptr) (JCOEFPTR coef_block, DCTELEM divisors,
39	DCTELEM *workspace);
40	typedef void (*float_quantize_method_ptr) (JCOEFPTR coef_block,
41	FAST_FLOAT *divisors,
42	FAST_FLOAT *workspace);
43
44	METHODDEF(void) quantize(JCOEFPTR, DCTELEM , DCTELEM );
45
46	typedef struct {
47	struct jpeg_forward_dct pub; / public fields /
48
49	/ Pointer to the DCT routine actually in use /
50	forward_DCT_method_ptr dct;
51	convsamp_method_ptr convsamp;
52	quantize_method_ptr quantize;
53
54	/ The actual post-DCT divisors --- not identical to the quant table*
55	* entries, because of scaling (especially for an unnormalized DCT).
56	* Each table is given in normal array order.
57	*/
58	DCTELEM *divisors[NUM_QUANT_TBLS];
59
60	/ work area for FDCT subroutine /
61	DCTELEM *workspace;
62
63	#ifdef DCT_FLOAT_SUPPORTED
64	/ Same as above for the floating-point case. /
65	float_DCT_method_ptr float_dct;
66	float_convsamp_method_ptr float_convsamp;
67	float_quantize_method_ptr float_quantize;
68	FAST_FLOAT *float_divisors[NUM_QUANT_TBLS];
69	FAST_FLOAT *float_workspace;
70	#endif
71	} my_fdct_controller;
72
73	typedef my_fdct_controller *my_fdct_ptr;
74
75
76	#if BITS_IN_JSAMPLE == 8
77
78	/*
79	* Find the highest bit in an integer through binary search.
80	*/
81
82	LOCAL(int)
83	flss(UINT16 val)
84	{
85	int bit;
86
87	bit = `16`;
88
89	if (!val)
90	return `0`;
91
92	if (!(val & `0xff00`)) {
93	bit -= `8`;
94	val <<= `8`;
95	}
96	if (!(val & `0xf000`)) {
97	bit -= `4`;
98	val <<= `4`;
99	}
100	if (!(val & `0xc000`)) {
101	bit -= `2`;
102	val <<= `2`;
103	}
104	if (!(val & `0x8000`)) {
105	bit -= `1`;
106	val <<= `1`;
107	}
108
109	return bit;
110	}
111
112
113	/*
114	* Compute values to do a division using reciprocal.
115	*
116	* This implementation is based on an algorithm described in
117	* "How to optimize for the Pentium family of microprocessors"
118	* (http://www.agner.org/assem/).
119	* More information about the basic algorithm can be found in
120	* the paper "Integer Division Using Reciprocals" by Robert Alverson.
121	*
122	* The basic idea is to replace x/d by x * d^-1. In order to store
123	* d^-1 with enough precision we shift it left a few places. It turns
124	* out that this algoright gives just enough precision, and also fits
125	* into DCTELEM:
126	*
127	* b = (the number of significant bits in divisor) - 1
128	* r = (word size) + b
129	* f = 2^r / divisor
130	*
131	* f will not be an integer for most cases, so we need to compensate
132	* for the rounding error introduced:
133	*
134	* no fractional part:
135	*
136	* result = input >> r
137	*
138	* fractional part of f < 0.5:
139	*
140	* round f down to nearest integer
141	* result = ((input + 1) * f) >> r
142	*
143	* fractional part of f > 0.5:
144	*
145	* round f up to nearest integer
146	* result = (input * f) >> r
147	*
148	* This is the original algorithm that gives truncated results. But we
149	* want properly rounded results, so we replace "input" with
150	* "input + divisor/2".
151	*
152	* In order to allow SIMD implementations we also tweak the values to
153	* allow the same calculation to be made at all times:
154	*
155	* dctbl[0] = f rounded to nearest integer
156	* dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5)
157	* dctbl[2] = 1 << ((word size) * 2 - r)
158	* dctbl[3] = r - (word size)
159	*
160	* dctbl[2] is for stupid instruction sets where the shift operation
161	* isn't member wise (e.g. MMX).
162	*
163	* The reason dctbl[2] and dctbl[3] reduce the shift with (word size)
164	* is that most SIMD implementations have a "multiply and store top
165	* half" operation.
166	*
167	* Lastly, we store each of the values in their own table instead
168	* of in a consecutive manner, yet again in order to allow SIMD
169	* routines.
170	*/
171
172	LOCAL(int)
173	compute_reciprocal(UINT16 divisor, DCTELEM *dtbl)
174	{
175	UDCTELEM2 fq, fr;
176	UDCTELEM c;
177	int b, r;
178
179	if (divisor == `1`) {
180	/ divisor == 1 means unquantized, so these reciprocal/correction/shift*
181	* values will cause the C quantization algorithm to act like the
182	* identity function. Since only the C quantization algorithm is used in
183	* these cases, the scale value is irrelevant.
184	*/
185	dtbl[DCTSIZE2 * `0`] = (DCTELEM)`1`; / reciprocal /
186	dtbl[DCTSIZE2 * `1`] = (DCTELEM)`0`; / correction /
187	dtbl[DCTSIZE2 * `2`] = (DCTELEM)`1`; / scale /
188	dtbl[DCTSIZE2 * `3`] = -(DCTELEM)(sizeof(DCTELEM) * `8`); / shift /
189	return `0`;
190	}
191
192	b = flss(divisor) - `1`;
193	r = sizeof(DCTELEM) * `8` + b;
194
195	fq = ((UDCTELEM2)`1` << r) / divisor;
196	fr = ((UDCTELEM2)`1` << r) % divisor;
197
198	c = divisor / `2`; / for rounding /
199
200	if (fr == `0`) { / divisor is power of two /
201	/ fq will be one bit too large to fit in DCTELEM, so adjust /
202	fq >>= `1`;
203	r--;
204	} else if (fr <= (divisor / `2U`)) { / fractional part is < 0.5 /
205	c++;
206	} else { / fractional part is > 0.5 /
207	fq++;
208	}
209
210	dtbl[DCTSIZE2 * `0`] = (DCTELEM)fq; / reciprocal /
211	dtbl[DCTSIZE2 * `1`] = (DCTELEM)c; / correction + roundfactor /
212	#ifdef WITH_SIMD
213	dtbl[DCTSIZE2 * `2`] = (DCTELEM)(`1` << (sizeof(DCTELEM) * `8` * `2` - r)); / scale /
214	#else
215	dtbl[DCTSIZE2 * `2`] = `1`;
216	#endif
217	dtbl[DCTSIZE2 * `3`] = (DCTELEM)r - sizeof(DCTELEM) * `8`; / shift /
218
219	if (r <= `16`) return `0`;
220	else return `1`;
221	}
222
223	#endif
224
225
226	/*
227	* Initialize for a processing pass.
228	* Verify that all referenced Q-tables are present, and set up
229	* the divisor table for each one.
230	* In the current implementation, DCT of all components is done during
231	* the first pass, even if only some components will be output in the
232	* first scan. Hence all components should be examined here.
233	*/
234
235	METHODDEF(void)
236	start_pass_fdctmgr(j_compress_ptr cinfo)
237	{
238	my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
239	int ci, qtblno, i;
240	jpeg_component_info *compptr;
241	JQUANT_TBL *qtbl;
242	DCTELEM *dtbl;
243
244	for (ci = `0`, compptr = cinfo->comp_info; ci < cinfo->num_components;
245	ci++, compptr++) {
246	qtblno = compptr->quant_tbl_no;
247	/ Make sure specified quantization table is present /
248	if (qtblno < `0` \|\| qtblno >= NUM_QUANT_TBLS \|\|
249	cinfo->quant_tbl_ptrs[qtblno] == NULL)
250	ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
251	qtbl = cinfo->quant_tbl_ptrs[qtblno];
252	/ Compute divisors for this quant table /
253	/ We may do this more than once for same table, but it's not a big deal /
254	switch (cinfo->dct_method) {
255	#ifdef DCT_ISLOW_SUPPORTED
256	case JDCT_ISLOW:
257	/ For LL&M IDCT method, divisors are equal to raw quantization*
258	* coefficients multiplied by 8 (to counteract scaling).
259	*/
260	if (fdct->divisors[qtblno] == NULL) {
261	fdct->divisors[qtblno] = (DCTELEM *)
262	(*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
263	(DCTSIZE2 * `4`) * sizeof(DCTELEM));
264	}
265	dtbl = fdct->divisors[qtblno];
266	for (i = `0`; i < DCTSIZE2; i++) {
267	#if BITS_IN_JSAMPLE == 8
268	if (!compute_reciprocal(qtbl->quantval[i] << `3`, &dtbl[i]) &&
269	fdct->quantize == jsimd_quantize)
270	fdct->quantize = quantize;
271	#else
272	dtbl[i] = ((DCTELEM)qtbl->quantval[i]) << `3`;
273	#endif
274	}
275	break;
276	#endif
277	#ifdef DCT_IFAST_SUPPORTED
278	case JDCT_IFAST:
279	{
280	/ For AA&N IDCT method, divisors are equal to quantization*
281	* coefficients scaled by scalefactor[row]*scalefactor[col], where
282	* scalefactor[0] = 1
283	* scalefactor[k] = cos(kPI/16) sqrt(2) for k=1..7
284	* We apply a further scale factor of 8.
285	*/
286	#define CONST_BITS 14
287	static const INT16 aanscales[DCTSIZE2] = {
288	/ precomputed values scaled up by 14 bits /
289	`16384`, `22725`, `21407`, `19266`, `16384`, `12873`, `8867`, `4520`,
290	`22725`, `31521`, `29692`, `26722`, `22725`, `17855`, `12299`, `6270`,
291	`21407`, `29692`, `27969`, `25172`, `21407`, `16819`, `11585`, `5906`,
292	`19266`, `26722`, `25172`, `22654`, `19266`, `15137`, `10426`, `5315`,
293	`16384`, `22725`, `21407`, `19266`, `16384`, `12873`, `8867`, `4520`,
294	`12873`, `17855`, `16819`, `15137`, `12873`, `10114`, `6967`, `3552`,
295	`8867`, `12299`, `11585`, `10426`, `8867`, `6967`, `4799`, `2446`,
296	`4520`, `6270`, `5906`, `5315`, `4520`, `3552`, `2446`, `1247`
297	};
298	SHIFT_TEMPS
299
300	if (fdct->divisors[qtblno] == NULL) {
301	fdct->divisors[qtblno] = (DCTELEM *)
302	(*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
303	(DCTSIZE2 * `4`) * sizeof(DCTELEM));
304	}
305	dtbl = fdct->divisors[qtblno];
306	for (i = `0`; i < DCTSIZE2; i++) {
307	#if BITS_IN_JSAMPLE == 8
308	if (!compute_reciprocal(
309	DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
310	(JLONG)aanscales[i]),
311	CONST_BITS - `3`), &dtbl[i]) &&
312	fdct->quantize == jsimd_quantize)
313	fdct->quantize = quantize;
314	#else
315	dtbl[i] = (DCTELEM)
316	DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
317	(JLONG)aanscales[i]),
318	CONST_BITS - `3`);
319	#endif
320	}
321	}
322	break;
323	#endif
324	#ifdef DCT_FLOAT_SUPPORTED
325	case JDCT_FLOAT:
326	{
327	/ For float AA&N IDCT method, divisors are equal to quantization*
328	* coefficients scaled by scalefactor[row]*scalefactor[col], where
329	* scalefactor[0] = 1
330	* scalefactor[k] = cos(kPI/16) sqrt(2) for k=1..7
331	* We apply a further scale factor of 8.
332	* What's actually stored is 1/divisor so that the inner loop can
333	* use a multiplication rather than a division.
334	*/
335	FAST_FLOAT *fdtbl;
336	int row, col;
337	static const double aanscalefactor[DCTSIZE] = {
338	`1.0`, `1.387039845`, `1.306562965`, `1.175875602`,
339	`1.0`, `0.785694958`, `0.541196100`, `0.275899379`
340	};
341
342	if (fdct->float_divisors[qtblno] == NULL) {
343	fdct->float_divisors[qtblno] = (FAST_FLOAT *)
344	(*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
345	DCTSIZE2 * sizeof(FAST_FLOAT));
346	}
347	fdtbl = fdct->float_divisors[qtblno];
348	i = `0`;
349	for (row = `0`; row < DCTSIZE; row++) {
350	for (col = `0`; col < DCTSIZE; col++) {
351	fdtbl[i] = (FAST_FLOAT)
352	(`1.0` / (((double)qtbl->quantval[i] *
353	aanscalefactor[row] * aanscalefactor[col] * `8.0`)));
354	i++;
355	}
356	}
357	}
358	break;
359	#endif
360	default:
361	ERREXIT(cinfo, JERR_NOT_COMPILED);
362	break;
363	}
364	}
365	}
366
367
368	/*
369	* Load data into workspace, applying unsigned->signed conversion.
370	*/
371
372	METHODDEF(void)
373	convsamp(JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace)
374	{
375	register DCTELEM *workspaceptr;
376	register JSAMPROW elemptr;
377	register int elemr;
378
379	workspaceptr = workspace;
380	for (elemr = `0`; elemr < DCTSIZE; elemr++) {
381	elemptr = sample_data[elemr] + start_col;
382
383	#if DCTSIZE == 8 /* unroll the inner loop */
384	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
385	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
386	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
387	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
388	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
389	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
390	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
391	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
392	#else
393	{
394	register int elemc;
395	for (elemc = DCTSIZE; elemc > `0`; elemc--)
396	workspaceptr++ = GETJSAMPLE(elemptr++) - CENTERJSAMPLE;
397	}
398	#endif
399	}
400	}
401
402
403	/*
404	* Quantize/descale the coefficients, and store into coef_blocks[].
405	*/
406
407	METHODDEF(void)
408	quantize(JCOEFPTR coef_block, DCTELEM divisors, DCTELEM workspace)
409	{
410	int i;
411	DCTELEM temp;
412	JCOEFPTR output_ptr = coef_block;
413
414	#if BITS_IN_JSAMPLE == 8
415
416	UDCTELEM recip, corr;
417	int shift;
418	UDCTELEM2 product;
419
420	for (i = `0`; i < DCTSIZE2; i++) {
421	temp = workspace[i];
422	recip = divisors[i + DCTSIZE2 * `0`];
423	corr = divisors[i + DCTSIZE2 * `1`];
424	shift = divisors[i + DCTSIZE2 * `3`];
425
426	if (temp < `0`) {
427	temp = -temp;
428	product = (UDCTELEM2)(temp + corr) * recip;
429	product >>= shift + sizeof(DCTELEM) * `8`;
430	temp = (DCTELEM)product;
431	temp = -temp;
432	} else {
433	product = (UDCTELEM2)(temp + corr) * recip;
434	product >>= shift + sizeof(DCTELEM) * `8`;
435	temp = (DCTELEM)product;
436	}
437	output_ptr[i] = (JCOEF)temp;
438	}
439
440	#else
441
442	register DCTELEM qval;
443
444	for (i = `0`; i < DCTSIZE2; i++) {
445	qval = divisors[i];
446	temp = workspace[i];
447	/ Divide the coefficient value by qval, ensuring proper rounding.*
448	* Since C does not specify the direction of rounding for negative
449	* quotients, we have to force the dividend positive for portability.
450	*
451	* In most files, at least half of the output values will be zero
452	* (at default quantization settings, more like three-quarters...)
453	* so we should ensure that this case is fast. On many machines,
454	* a comparison is enough cheaper than a divide to make a special test
455	* a win. Since both inputs will be nonnegative, we need only test
456	* for a < b to discover whether a/b is 0.
457	* If your machine's division is fast enough, define FAST_DIVIDE.
458	*/
459	#ifdef FAST_DIVIDE
460	#define DIVIDE_BY(a, b) a /= b
461	#else
462	#define DIVIDE_BY(a, b) if (a >= b) a /= b; else a = 0
463	#endif
464	if (temp < `0`) {
465	temp = -temp;
466	temp += qval >> `1`; / for rounding /
467	DIVIDE_BY(temp, qval);
468	temp = -temp;
469	} else {
470	temp += qval >> `1`; / for rounding /
471	DIVIDE_BY(temp, qval);
472	}
473	output_ptr[i] = (JCOEF)temp;
474	}
475
476	#endif
477
478	}
479
480
481	/*
482	* Perform forward DCT on one or more blocks of a component.
483	*
484	* The input samples are taken from the sample_data[] array starting at
485	* position start_row/start_col, and moving to the right for any additional
486	* blocks. The quantized coefficients are returned in coef_blocks[].
487	*/
488
489	METHODDEF(void)
490	forward_DCT(j_compress_ptr cinfo, jpeg_component_info *compptr,
491	JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
492	JDIMENSION start_row, JDIMENSION start_col, JDIMENSION num_blocks)
493	/ This version is used for integer DCT implementations. /
494	{
495	/ This routine is heavily used, so it's worth coding it tightly. /
496	my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
497	DCTELEM *divisors = fdct->divisors[compptr->quant_tbl_no];
498	DCTELEM *workspace;
499	JDIMENSION bi;
500
501	/ Make sure the compiler doesn't look up these every pass /
502	forward_DCT_method_ptr do_dct = fdct->dct;
503	convsamp_method_ptr do_convsamp = fdct->convsamp;
504	quantize_method_ptr do_quantize = fdct->quantize;
505	workspace = fdct->workspace;
506
507	sample_data += start_row; / fold in the vertical offset once /
508
509	for (bi = `0`; bi < num_blocks; bi++, start_col += DCTSIZE) {
510	/ Load data into workspace, applying unsigned->signed conversion /
511	(*do_convsamp) (sample_data, start_col, workspace);
512
513	/ Perform the DCT /
514	(*do_dct) (workspace);
515
516	/ Quantize/descale the coefficients, and store into coef_blocks[] /
517	(*do_quantize) (coef_blocks[bi], divisors, workspace);
518	}
519	}
520
521
522	#ifdef DCT_FLOAT_SUPPORTED
523
524	METHODDEF(void)
525	convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
526	FAST_FLOAT *workspace)
527	{
528	register FAST_FLOAT *workspaceptr;
529	register JSAMPROW elemptr;
530	register int elemr;
531
532	workspaceptr = workspace;
533	for (elemr = `0`; elemr < DCTSIZE; elemr++) {
534	elemptr = sample_data[elemr] + start_col;
535	#if DCTSIZE == 8 /* unroll the inner loop */
536	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
537	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
538	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
539	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
540	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
541	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
542	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
543	workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(elemptr++) - CENTERJSAMPLE);
544	#else
545	{
546	register int elemc;
547	for (elemc = DCTSIZE; elemc > `0`; elemc--)
548	*workspaceptr++ = (FAST_FLOAT)
549	(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
550	}
551	#endif
552	}
553	}
554
555
556	METHODDEF(void)
557	quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
558	FAST_FLOAT *workspace)
559	{
560	register FAST_FLOAT temp;
561	register int i;
562	register JCOEFPTR output_ptr = coef_block;
563
564	for (i = `0`; i < DCTSIZE2; i++) {
565	/ Apply the quantization and scaling factor /
566	temp = workspace[i] * divisors[i];
567
568	/ Round to nearest integer.*
569	* Since C does not specify the direction of rounding for negative
570	* quotients, we have to force the dividend positive for portability.
571	* The maximum coefficient size is +-16K (for 12-bit data), so this
572	* code should work for either 16-bit or 32-bit ints.
573	*/
574	output_ptr[i] = (JCOEF)((int)(temp + (FAST_FLOAT)`16384.5`) - `16384`);
575	}
576	}
577
578
579	METHODDEF(void)
580	forward_DCT_float(j_compress_ptr cinfo, jpeg_component_info *compptr,
581	JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
582	JDIMENSION start_row, JDIMENSION start_col,
583	JDIMENSION num_blocks)
584	/ This version is used for floating-point DCT implementations. /
585	{
586	/ This routine is heavily used, so it's worth coding it tightly. /
587	my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
588	FAST_FLOAT *divisors = fdct->float_divisors[compptr->quant_tbl_no];
589	FAST_FLOAT *workspace;
590	JDIMENSION bi;
591
592
593	/ Make sure the compiler doesn't look up these every pass /
594	float_DCT_method_ptr do_dct = fdct->float_dct;
595	float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
596	float_quantize_method_ptr do_quantize = fdct->float_quantize;
597	workspace = fdct->float_workspace;
598
599	sample_data += start_row; / fold in the vertical offset once /
600
601	for (bi = `0`; bi < num_blocks; bi++, start_col += DCTSIZE) {
602	/ Load data into workspace, applying unsigned->signed conversion /
603	(*do_convsamp) (sample_data, start_col, workspace);
604
605	/ Perform the DCT /
606	(*do_dct) (workspace);
607
608	/ Quantize/descale the coefficients, and store into coef_blocks[] /
609	(*do_quantize) (coef_blocks[bi], divisors, workspace);
610	}
611	}
612
613	#endif /* DCT_FLOAT_SUPPORTED */
614
615
616	/*
617	* Initialize FDCT manager.
618	*/
619
620	GLOBAL(void)
621	jinit_forward_dct(j_compress_ptr cinfo)
622	{
623	my_fdct_ptr fdct;
624	int i;
625
626	fdct = (my_fdct_ptr)
627	(*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
628	sizeof(my_fdct_controller));
629	cinfo->fdct = (struct jpeg_forward_dct *)fdct;
630	fdct->pub.start_pass = start_pass_fdctmgr;
631
632	/ First determine the DCT... /
633	switch (cinfo->dct_method) {
634	#ifdef DCT_ISLOW_SUPPORTED
635	case JDCT_ISLOW:
636	fdct->pub.forward_DCT = forward_DCT;
637	if (jsimd_can_fdct_islow())
638	fdct->dct = jsimd_fdct_islow;
639	else
640	fdct->dct = jpeg_fdct_islow;
641	break;
642	#endif
643	#ifdef DCT_IFAST_SUPPORTED
644	case JDCT_IFAST:
645	fdct->pub.forward_DCT = forward_DCT;
646	if (jsimd_can_fdct_ifast())
647	fdct->dct = jsimd_fdct_ifast;
648	else
649	fdct->dct = jpeg_fdct_ifast;
650	break;
651	#endif
652	#ifdef DCT_FLOAT_SUPPORTED
653	case JDCT_FLOAT:
654	fdct->pub.forward_DCT = forward_DCT_float;
655	if (jsimd_can_fdct_float())
656	fdct->float_dct = jsimd_fdct_float;
657	else
658	fdct->float_dct = jpeg_fdct_float;
659	break;
660	#endif
661	default:
662	ERREXIT(cinfo, JERR_NOT_COMPILED);
663	break;
664	}
665
666	/ ...then the supporting stages. /
667	switch (cinfo->dct_method) {
668	#ifdef DCT_ISLOW_SUPPORTED
669	case JDCT_ISLOW:
670	#endif
671	#ifdef DCT_IFAST_SUPPORTED
672	case JDCT_IFAST:
673	#endif
674	#if defined(DCT_ISLOW_SUPPORTED) \|\| defined(DCT_IFAST_SUPPORTED)
675	if (jsimd_can_convsamp())
676	fdct->convsamp = jsimd_convsamp;
677	else
678	fdct->convsamp = convsamp;
679	if (jsimd_can_quantize())
680	fdct->quantize = jsimd_quantize;
681	else
682	fdct->quantize = quantize;
683	break;
684	#endif
685	#ifdef DCT_FLOAT_SUPPORTED
686	case JDCT_FLOAT:
687	if (jsimd_can_convsamp_float())
688	fdct->float_convsamp = jsimd_convsamp_float;
689	else
690	fdct->float_convsamp = convsamp_float;
691	if (jsimd_can_quantize_float())
692	fdct->float_quantize = jsimd_quantize_float;
693	else
694	fdct->float_quantize = quantize_float;
695	break;
696	#endif
697	default:
698	ERREXIT(cinfo, JERR_NOT_COMPILED);
699	break;
700	}
701
702	/ Allocate workspace memory /
703	#ifdef DCT_FLOAT_SUPPORTED
704	if (cinfo->dct_method == JDCT_FLOAT)
705	fdct->float_workspace = (FAST_FLOAT *)
706	(*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
707	sizeof(FAST_FLOAT) * DCTSIZE2);
708	else
709	#endif
710	fdct->workspace = (DCTELEM *)
711	(*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
712	sizeof(DCTELEM) * DCTSIZE2);
713
714	/ Mark divisor tables unallocated /
715	for (i = `0`; i < NUM_QUANT_TBLS; i++) {
716	fdct->divisors[i] = NULL;
717	#ifdef DCT_FLOAT_SUPPORTED
718	fdct->float_divisors[i] = NULL;
719	#endif
720	}
721	}
722

Browse the source code of Skia/third_party/externals/libjpeg-turbo/jcdctmgr.c