cmsopt.c source code [OpenJDK/src/java.desktop/share/native/liblcms/cmsopt.c]

1	/*
2	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
3	*
4	* This code is free software; you can redistribute it and/or modify it
5	* under the terms of the GNU General Public License version 2 only, as
6	* published by the Free Software Foundation. Oracle designates this
7	* particular file as subject to the "Classpath" exception as provided
8	* by Oracle in the LICENSE file that accompanied this code.
9	*
10	* This code is distributed in the hope that it will be useful, but WITHOUT
11	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13	* version 2 for more details (a copy is included in the LICENSE file that
14	* accompanied this code).
15	*
16	* You should have received a copy of the GNU General Public License version
17	* 2 along with this work; if not, write to the Free Software Foundation,
18	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19	*
20	* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21	* or visit www.oracle.com if you need additional information or have any
22	* questions.
23	*/
24
25	// This file is available under and governed by the GNU General Public
26	// License version 2 only, as published by the Free Software Foundation.
27	// However, the following notice accompanied the original version of this
28	// file:
29	//
30	//---------------------------------------------------------------------------------
31	//
32	// Little Color Management System
33	// Copyright (c) 1998-2017 Marti Maria Saguer
34	//
35	// Permission is hereby granted, free of charge, to any person obtaining
36	// a copy of this software and associated documentation files (the "Software"),
37	// to deal in the Software without restriction, including without limitation
38	// the rights to use, copy, modify, merge, publish, distribute, sublicense,
39	// and/or sell copies of the Software, and to permit persons to whom the Software
40	// is furnished to do so, subject to the following conditions:
41	//
42	// The above copyright notice and this permission notice shall be included in
43	// all copies or substantial portions of the Software.
44	//
45	// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
46	// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
47	// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
48	// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
49	// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
50	// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
51	// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
52	//
53	//---------------------------------------------------------------------------------
54	//
55
56	#include "lcms2_internal.h"
57
58
59	//----------------------------------------------------------------------------------
60
61	// Optimization for 8 bits, Shaper-CLUT (3 inputs only)
62	typedef struct {
63
64	cmsContext ContextID;
65
66	const cmsInterpParams* p; // Tetrahedrical interpolation parameters. This is a not-owned pointer.
67
68	cmsUInt16Number rx[`256`], ry[`256`], rz[`256`];
69	cmsUInt32Number X0[`256`], Y0[`256`], Z0[`256`]; // Precomputed nodes and offsets for 8-bit input data
70
71
72	} Prelin8Data;
73
74
75	// Generic optimization for 16 bits Shaper-CLUT-Shaper (any inputs)
76	typedef struct {
77
78	cmsContext ContextID;
79
80	// Number of channels
81	cmsUInt32Number nInputs;
82	cmsUInt32Number nOutputs;
83
84	_cmsInterpFn16 EvalCurveIn16[MAX_INPUT_DIMENSIONS]; // The maximum number of input channels is known in advance
85	cmsInterpParams* ParamsCurveIn16[MAX_INPUT_DIMENSIONS];
86
87	_cmsInterpFn16 EvalCLUT; // The evaluator for 3D grid
88	const cmsInterpParams* CLUTparams; // (not-owned pointer)
89
90
91	_cmsInterpFn16* EvalCurveOut16; // Points to an array of curve evaluators in 16 bits (not-owned pointer)
92	cmsInterpParams** ParamsCurveOut16; // Points to an array of references to interpolation params (not-owned pointer)
93
94
95	} Prelin16Data;
96
97
98	// Optimization for matrix-shaper in 8 bits. Numbers are operated in n.14 signed, tables are stored in 1.14 fixed
99
100	typedef cmsInt32Number cmsS1Fixed14Number; // Note that this may hold more than 16 bits!
101
102	#define DOUBLE_TO_1FIXED14(x) ((cmsS1Fixed14Number) floor((x) * 16384.0 + 0.5))
103
104	typedef struct {
105
106	cmsContext ContextID;
107
108	cmsS1Fixed14Number Shaper1R[`256`]; // from 0..255 to 1.14 (0.0...1.0)
109	cmsS1Fixed14Number Shaper1G[`256`];
110	cmsS1Fixed14Number Shaper1B[`256`];
111
112	cmsS1Fixed14Number Mat[`3`][`3`]; // n.14 to n.14 (needs a saturation after that)
113	cmsS1Fixed14Number Off[`3`];
114
115	cmsUInt16Number Shaper2R[`16385`]; // 1.14 to 0..255
116	cmsUInt16Number Shaper2G[`16385`];
117	cmsUInt16Number Shaper2B[`16385`];
118
119	} MatShaper8Data;
120
121	// Curves, optimization is shared between 8 and 16 bits
122	typedef struct {
123
124	cmsContext ContextID;
125
126	cmsUInt32Number nCurves; // Number of curves
127	cmsUInt32Number nElements; // Elements in curves
128	cmsUInt16Number** Curves; // Points to a dynamically allocated array
129
130	} Curves16Data;
131
132
133	// Simple optimizations ----------------------------------------------------------------------------------------------------------
134
135
136	// Remove an element in linked chain
137	static
138	void _RemoveElement(cmsStage** head)
139	{
140	cmsStage* mpe = *head;
141	cmsStage* next = mpe ->Next;
142	*head = next;
143	cmsStageFree(mpe);
144	}
145
146	// Remove all identities in chain. Note that pt actually is a double pointer to the element that holds the pointer.
147	static
148	cmsBool _Remove1Op(cmsPipeline* Lut, cmsStageSignature UnaryOp)
149	{
150	cmsStage** pt = &Lut ->Elements;
151	cmsBool AnyOpt = FALSE;
152
153	while (*pt != NULL) {
154
155	if ((*pt) ->Implements == UnaryOp) {
156	_RemoveElement(pt);
157	AnyOpt = TRUE;
158	}
159	else
160	pt = &((*pt) -> Next);
161	}
162
163	return AnyOpt;
164	}
165
166	// Same, but only if two adjacent elements are found
167	static
168	cmsBool _Remove2Op(cmsPipeline* Lut, cmsStageSignature Op1, cmsStageSignature Op2)
169	{
170	cmsStage** pt1;
171	cmsStage** pt2;
172	cmsBool AnyOpt = FALSE;
173
174	pt1 = &Lut ->Elements;
175	if (pt1 == NULL) return* AnyOpt;
176
177	while (*pt1 != NULL) {
178
179	pt2 = &((*pt1) -> Next);
180	if (pt2 == NULL) return* AnyOpt;
181
182	if ((pt1) ->Implements == Op1 && (pt2) ->Implements == Op2) {
183	_RemoveElement(pt2);
184	_RemoveElement(pt1);
185	AnyOpt = TRUE;
186	}
187	else
188	pt1 = &((*pt1) -> Next);
189	}
190
191	return AnyOpt;
192	}
193
194
195	static
196	cmsBool CloseEnoughFloat(cmsFloat64Number a, cmsFloat64Number b)
197	{
198	return fabs(b - a) < `0.00001f`;
199	}
200
201	static
202	cmsBool isFloatMatrixIdentity(const cmsMAT3* a)
203	{
204	cmsMAT3 Identity;
205	int i, j;
206
207	_cmsMAT3identity(&Identity);
208
209	for (i = `0`; i < `3`; i++)
210	for (j = `0`; j < `3`; j++)
211	if (!CloseEnoughFloat(a->v[i].n[j], Identity.v[i].n[j])) return FALSE;
212
213	return TRUE;
214	}
215	// if two adjacent matrices are found, multiply them.
216	static
217	cmsBool _MultiplyMatrix(cmsPipeline* Lut)
218	{
219	cmsStage** pt1;
220	cmsStage** pt2;
221	cmsStage* chain;
222	cmsBool AnyOpt = FALSE;
223
224	pt1 = &Lut->Elements;
225	if (pt1 == NULL) return* AnyOpt;
226
227	while (*pt1 != NULL) {
228
229	pt2 = &((*pt1)->Next);
230	if (pt2 == NULL) return* AnyOpt;
231
232	if ((pt1)->Implements == cmsSigMatrixElemType && (pt2)->Implements == cmsSigMatrixElemType) {
233
234	// Get both matrices
235	_cmsStageMatrixData* m1 = (_cmsStageMatrixData) cmsStageData(pt1);
236	_cmsStageMatrixData* m2 = (_cmsStageMatrixData) cmsStageData(pt2);
237	cmsMAT3 res;
238
239	// Input offset and output offset should be zero to use this optimization
240	if (m1->Offset != NULL \|\| m2 ->Offset != NULL \|\|
241	cmsStageInputChannels(pt1) != `3` \|\| cmsStageOutputChannels(pt1) != `3` \|\|
242	cmsStageInputChannels(pt2) != `3` \|\| cmsStageOutputChannels(pt2) != `3`)
243	return FALSE;
244
245	// Multiply both matrices to get the result
246	_cmsMAT3per(&res, (cmsMAT3)m2->Double, (cmsMAT3)m1->Double);
247
248	// Get the next in chain after the matrices
249	chain = (*pt2)->Next;
250
251	// Remove both matrices
252	_RemoveElement(pt2);
253	_RemoveElement(pt1);
254
255	// Now what if the result is a plain identity?
256	if (!isFloatMatrixIdentity(&res)) {
257
258	// We can not get rid of full matrix
259	cmsStage* Multmat = cmsStageAllocMatrix(Lut->ContextID, `3`, `3`, (const cmsFloat64Number*) &res, NULL);
260	if (Multmat == NULL) return FALSE; // Should never happen
261
262	// Recover the chain
263	Multmat->Next = chain;
264	*pt1 = Multmat;
265	}
266
267	AnyOpt = TRUE;
268	}
269	else
270	pt1 = &((*pt1)->Next);
271	}
272
273	return AnyOpt;
274	}
275
276
277	// Preoptimize just gets rif of no-ops coming paired. Conversion from v2 to v4 followed
278	// by a v4 to v2 and vice-versa. The elements are then discarded.
279	static
280	cmsBool PreOptimize(cmsPipeline* Lut)
281	{
282	cmsBool AnyOpt = FALSE, Opt;
283
284	do {
285
286	Opt = FALSE;
287
288	// Remove all identities
289	Opt \|= _Remove1Op(Lut, cmsSigIdentityElemType);
290
291	// Remove XYZ2Lab followed by Lab2XYZ
292	Opt \|= _Remove2Op(Lut, cmsSigXYZ2LabElemType, cmsSigLab2XYZElemType);
293
294	// Remove Lab2XYZ followed by XYZ2Lab
295	Opt \|= _Remove2Op(Lut, cmsSigLab2XYZElemType, cmsSigXYZ2LabElemType);
296
297	// Remove V4 to V2 followed by V2 to V4
298	Opt \|= _Remove2Op(Lut, cmsSigLabV4toV2, cmsSigLabV2toV4);
299
300	// Remove V2 to V4 followed by V4 to V2
301	Opt \|= _Remove2Op(Lut, cmsSigLabV2toV4, cmsSigLabV4toV2);
302
303	// Remove float pcs Lab conversions
304	Opt \|= _Remove2Op(Lut, cmsSigLab2FloatPCS, cmsSigFloatPCS2Lab);
305
306	// Remove float pcs Lab conversions
307	Opt \|= _Remove2Op(Lut, cmsSigXYZ2FloatPCS, cmsSigFloatPCS2XYZ);
308
309	// Simplify matrix.
310	Opt \|= _MultiplyMatrix(Lut);
311
312	if (Opt) AnyOpt = TRUE;
313
314	} while (Opt);
315
316	return AnyOpt;
317	}
318
319	static
320	void Eval16nop1D(register const cmsUInt16Number Input[],
321	register cmsUInt16Number Output[],
322	register const struct _cms_interp_struc* p)
323	{
324	Output[`0`] = Input[`0`];
325
326	cmsUNUSED_PARAMETER(p);
327	}
328
329	static
330	void PrelinEval16(register const cmsUInt16Number Input[],
331	register cmsUInt16Number Output[],
332	register const void* D)
333	{
334	Prelin16Data* p16 = (Prelin16Data*) D;
335	cmsUInt16Number StageABC[MAX_INPUT_DIMENSIONS];
336	cmsUInt16Number StageDEF[cmsMAXCHANNELS];
337	cmsUInt32Number i;
338
339	for (i=`0`; i < p16 ->nInputs; i++) {
340
341	p16 ->EvalCurveIn16[i](&Input[i], &StageABC[i], p16 ->ParamsCurveIn16[i]);
342	}
343
344	p16 ->EvalCLUT(StageABC, StageDEF, p16 ->CLUTparams);
345
346	for (i=`0`; i < p16 ->nOutputs; i++) {
347
348	p16 ->EvalCurveOut16[i](&StageDEF[i], &Output[i], p16 ->ParamsCurveOut16[i]);
349	}
350	}
351
352
353	static
354	void PrelinOpt16free(cmsContext ContextID, void* ptr)
355	{
356	Prelin16Data* p16 = (Prelin16Data*) ptr;
357
358	_cmsFree(ContextID, p16 ->EvalCurveOut16);
359	_cmsFree(ContextID, p16 ->ParamsCurveOut16);
360
361	_cmsFree(ContextID, p16);
362	}
363
364	static
365	void* Prelin16dup(cmsContext ContextID, const void* ptr)
366	{
367	Prelin16Data* p16 = (Prelin16Data*) ptr;
368	Prelin16Data* Duped = (Prelin16Data) _cmsDupMem(ContextID, p16, sizeof*(Prelin16Data));
369
370	if (Duped == NULL) return NULL;
371
372	Duped->EvalCurveOut16 = (_cmsInterpFn16) _cmsDupMem(ContextID, p16->EvalCurveOut16, p16->nOutputs sizeof(_cmsInterpFn16));
373	Duped->ParamsCurveOut16 = (cmsInterpParams*)_cmsDupMem(ContextID, p16->ParamsCurveOut16, p16->nOutputs sizeof(cmsInterpParams*));
374
375	return Duped;
376	}
377
378
379	static
380	Prelin16Data* PrelinOpt16alloc(cmsContext ContextID,
381	const cmsInterpParams* ColorMap,
382	cmsUInt32Number nInputs, cmsToneCurve** In,
383	cmsUInt32Number nOutputs, cmsToneCurve** Out )
384	{
385	cmsUInt32Number i;
386	Prelin16Data* p16 = (Prelin16Data)_cmsMallocZero(ContextID, sizeof*(Prelin16Data));
387	if (p16 == NULL) return NULL;
388
389	p16 ->nInputs = nInputs;
390	p16 ->nOutputs = nOutputs;
391
392
393	for (i=`0`; i < nInputs; i++) {
394
395	if (In == NULL) {
396	p16 -> ParamsCurveIn16[i] = NULL;
397	p16 -> EvalCurveIn16[i] = Eval16nop1D;
398
399	}
400	else {
401	p16 -> ParamsCurveIn16[i] = In[i] ->InterpParams;
402	p16 -> EvalCurveIn16[i] = p16 ->ParamsCurveIn16[i]->Interpolation.Lerp16;
403	}
404	}
405
406	p16 ->CLUTparams = ColorMap;
407	p16 ->EvalCLUT = ColorMap ->Interpolation.Lerp16;
408
409
410	p16 -> EvalCurveOut16 = (_cmsInterpFn16) _cmsCalloc(ContextID, nOutputs, sizeof*(_cmsInterpFn16));
411	p16 -> ParamsCurveOut16 = (cmsInterpParams) _cmsCalloc(ContextID, nOutputs, sizeof*(cmsInterpParams ));
412
413	for (i=`0`; i < nOutputs; i++) {
414
415	if (Out == NULL) {
416	p16 ->ParamsCurveOut16[i] = NULL;
417	p16 -> EvalCurveOut16[i] = Eval16nop1D;
418	}
419	else {
420
421	p16 ->ParamsCurveOut16[i] = Out[i] ->InterpParams;
422	p16 -> EvalCurveOut16[i] = p16 ->ParamsCurveOut16[i]->Interpolation.Lerp16;
423	}
424	}
425
426	return p16;
427	}
428
429
430
431	// Resampling ---------------------------------------------------------------------------------
432
433	#define PRELINEARIZATION_POINTS 4096
434
435	// Sampler implemented by another LUT. This is a clean way to precalculate the devicelink 3D CLUT for
436	// almost any transform. We use floating point precision and then convert from floating point to 16 bits.
437	static
438	cmsInt32Number XFormSampler16(register const cmsUInt16Number In[], register cmsUInt16Number Out[], register void* Cargo)
439	{
440	cmsPipeline* Lut = (cmsPipeline*) Cargo;
441	cmsFloat32Number InFloat[cmsMAXCHANNELS], OutFloat[cmsMAXCHANNELS];
442	cmsUInt32Number i;
443
444	_cmsAssert(Lut -> InputChannels < cmsMAXCHANNELS);
445	_cmsAssert(Lut -> OutputChannels < cmsMAXCHANNELS);
446
447	// From 16 bit to floating point
448	for (i=`0`; i < Lut ->InputChannels; i++)
449	InFloat[i] = (cmsFloat32Number) (In[i] / `65535.0`);
450
451	// Evaluate in floating point
452	cmsPipelineEvalFloat(InFloat, OutFloat, Lut);
453
454	// Back to 16 bits representation
455	for (i=`0`; i < Lut ->OutputChannels; i++)
456	Out[i] = _cmsQuickSaturateWord(OutFloat[i] * `65535.0`);
457
458	// Always succeed
459	return TRUE;
460	}
461
462	// Try to see if the curves of a given MPE are linear
463	static
464	cmsBool AllCurvesAreLinear(cmsStage* mpe)
465	{
466	cmsToneCurve** Curves;
467	cmsUInt32Number i, n;
468
469	Curves = _cmsStageGetPtrToCurveSet(mpe);
470	if (Curves == NULL) return FALSE;
471
472	n = cmsStageOutputChannels(mpe);
473
474	for (i=`0`; i < n; i++) {
475	if (!cmsIsToneCurveLinear(Curves[i])) return FALSE;
476	}
477
478	return TRUE;
479	}
480
481	// This function replaces a specific node placed in "At" by the "Value" numbers. Its purpose
482	// is to fix scum dot on broken profiles/transforms. Works on 1, 3 and 4 channels
483	static
484	cmsBool PatchLUT(cmsStage* CLUT, cmsUInt16Number At[], cmsUInt16Number Value[],
485	cmsUInt32Number nChannelsOut, cmsUInt32Number nChannelsIn)
486	{
487	_cmsStageCLutData* Grid = (_cmsStageCLutData*) CLUT ->Data;
488	cmsInterpParams* p16 = Grid ->Params;
489	cmsFloat64Number px, py, pz, pw;
490	int x0, y0, z0, w0;
491	int i, index;
492
493	if (CLUT -> Type != cmsSigCLutElemType) {
494	cmsSignalError(CLUT->ContextID, cmsERROR_INTERNAL, "(internal) Attempt to PatchLUT on non-lut stage");
495	return FALSE;
496	}
497
498	if (nChannelsIn == `4`) {
499
500	px = ((cmsFloat64Number) At[`0`] * (p16->Domain[`0`])) / `65535.0`;
501	py = ((cmsFloat64Number) At[`1`] * (p16->Domain[`1`])) / `65535.0`;
502	pz = ((cmsFloat64Number) At[`2`] * (p16->Domain[`2`])) / `65535.0`;
503	pw = ((cmsFloat64Number) At[`3`] * (p16->Domain[`3`])) / `65535.0`;
504
505	x0 = (int) floor(px);
506	y0 = (int) floor(py);
507	z0 = (int) floor(pz);
508	w0 = (int) floor(pw);
509
510	if (((px - x0) != `0`) \|\|
511	((py - y0) != `0`) \|\|
512	((pz - z0) != `0`) \|\|
513	((pw - w0) != `0`)) return FALSE; // Not on exact node
514
515	index = (int) p16 -> opta[`3`] * x0 +
516	(int) p16 -> opta[`2`] * y0 +
517	(int) p16 -> opta[`1`] * z0 +
518	(int) p16 -> opta[`0`] * w0;
519	}
520	else
521	if (nChannelsIn == `3`) {
522
523	px = ((cmsFloat64Number) At[`0`] * (p16->Domain[`0`])) / `65535.0`;
524	py = ((cmsFloat64Number) At[`1`] * (p16->Domain[`1`])) / `65535.0`;
525	pz = ((cmsFloat64Number) At[`2`] * (p16->Domain[`2`])) / `65535.0`;
526
527	x0 = (int) floor(px);
528	y0 = (int) floor(py);
529	z0 = (int) floor(pz);
530
531	if (((px - x0) != `0`) \|\|
532	((py - y0) != `0`) \|\|
533	((pz - z0) != `0`)) return FALSE; // Not on exact node
534
535	index = (int) p16 -> opta[`2`] * x0 +
536	(int) p16 -> opta[`1`] * y0 +
537	(int) p16 -> opta[`0`] * z0;
538	}
539	else
540	if (nChannelsIn == `1`) {
541
542	px = ((cmsFloat64Number) At[`0`] * (p16->Domain[`0`])) / `65535.0`;
543
544	x0 = (int) floor(px);
545
546	if (((px - x0) != `0`)) return FALSE; // Not on exact node
547
548	index = (int) p16 -> opta[`0`] * x0;
549	}
550	else {
551	cmsSignalError(CLUT->ContextID, cmsERROR_INTERNAL, "(internal) %d Channels are not supported on PatchLUT", nChannelsIn);
552	return FALSE;
553	}
554
555	for (i = `0`; i < (int) nChannelsOut; i++)
556	Grid->Tab.T[index + i] = Value[i];
557
558	return TRUE;
559	}
560
561	// Auxiliary, to see if two values are equal or very different
562	static
563	cmsBool WhitesAreEqual(cmsUInt32Number n, cmsUInt16Number White1[], cmsUInt16Number White2[] )
564	{
565	cmsUInt32Number i;
566
567	for (i=`0`; i < n; i++) {
568
569	if (abs(White1[i] - White2[i]) > `0xf000`) return TRUE; // Values are so extremely different that the fixup should be avoided
570	if (White1[i] != White2[i]) return FALSE;
571	}
572	return TRUE;
573	}
574
575
576	// Locate the node for the white point and fix it to pure white in order to avoid scum dot.
577	static
578	cmsBool FixWhiteMisalignment(cmsPipeline* Lut, cmsColorSpaceSignature EntryColorSpace, cmsColorSpaceSignature ExitColorSpace)
579	{
580	cmsUInt16Number WhitePointIn, WhitePointOut;
581	cmsUInt16Number WhiteIn[cmsMAXCHANNELS], WhiteOut[cmsMAXCHANNELS], ObtainedOut[cmsMAXCHANNELS];
582	cmsUInt32Number i, nOuts, nIns;
583	cmsStage PreLin = NULL, CLUT = NULL, *PostLin = NULL;
584
585	if (!_cmsEndPointsBySpace(EntryColorSpace,
586	&WhitePointIn, NULL, &nIns)) return FALSE;
587
588	if (!_cmsEndPointsBySpace(ExitColorSpace,
589	&WhitePointOut, NULL, &nOuts)) return FALSE;
590
591	// It needs to be fixed?
592	if (Lut ->InputChannels != nIns) return FALSE;
593	if (Lut ->OutputChannels != nOuts) return FALSE;
594
595	cmsPipelineEval16(WhitePointIn, ObtainedOut, Lut);
596
597	if (WhitesAreEqual(nOuts, WhitePointOut, ObtainedOut)) return TRUE; // whites already match
598
599	// Check if the LUT comes as Prelin, CLUT or Postlin. We allow all combinations
600	if (!cmsPipelineCheckAndRetreiveStages(Lut, `3`, cmsSigCurveSetElemType, cmsSigCLutElemType, cmsSigCurveSetElemType, &PreLin, &CLUT, &PostLin))
601	if (!cmsPipelineCheckAndRetreiveStages(Lut, `2`, cmsSigCurveSetElemType, cmsSigCLutElemType, &PreLin, &CLUT))
602	if (!cmsPipelineCheckAndRetreiveStages(Lut, `2`, cmsSigCLutElemType, cmsSigCurveSetElemType, &CLUT, &PostLin))
603	if (!cmsPipelineCheckAndRetreiveStages(Lut, `1`, cmsSigCLutElemType, &CLUT))
604	return FALSE;
605
606	// We need to interpolate white points of both, pre and post curves
607	if (PreLin) {
608
609	cmsToneCurve** Curves = _cmsStageGetPtrToCurveSet(PreLin);
610
611	for (i=`0`; i < nIns; i++) {
612	WhiteIn[i] = cmsEvalToneCurve16(Curves[i], WhitePointIn[i]);
613	}
614	}
615	else {
616	for (i=`0`; i < nIns; i++)
617	WhiteIn[i] = WhitePointIn[i];
618	}
619
620	// If any post-linearization, we need to find how is represented white before the curve, do
621	// a reverse interpolation in this case.
622	if (PostLin) {
623
624	cmsToneCurve** Curves = _cmsStageGetPtrToCurveSet(PostLin);
625
626	for (i=`0`; i < nOuts; i++) {
627
628	cmsToneCurve* InversePostLin = cmsReverseToneCurve(Curves[i]);
629	if (InversePostLin == NULL) {
630	WhiteOut[i] = WhitePointOut[i];
631
632	} else {
633
634	WhiteOut[i] = cmsEvalToneCurve16(InversePostLin, WhitePointOut[i]);
635	cmsFreeToneCurve(InversePostLin);
636	}
637	}
638	}
639	else {
640	for (i=`0`; i < nOuts; i++)
641	WhiteOut[i] = WhitePointOut[i];
642	}
643
644	// Ok, proceed with patching. May fail and we don't care if it fails
645	PatchLUT(CLUT, WhiteIn, WhiteOut, nOuts, nIns);
646
647	return TRUE;
648	}
649
650	// -----------------------------------------------------------------------------------------------------------------------------------------------
651	// This function creates simple LUT from complex ones. The generated LUT has an optional set of
652	// prelinearization curves, a CLUT of nGridPoints and optional postlinearization tables.
653	// These curves have to exist in the original LUT in order to be used in the simplified output.
654	// Caller may also use the flags to allow this feature.
655	// LUTS with all curves will be simplified to a single curve. Parametric curves are lost.
656	// This function should be used on 16-bits LUTS only, as floating point losses precision when simplified
657	// -----------------------------------------------------------------------------------------------------------------------------------------------
658
659	static
660	cmsBool OptimizeByResampling(cmsPipeline** Lut, cmsUInt32Number Intent, cmsUInt32Number* InputFormat, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags)
661	{
662	cmsPipeline* Src = NULL;
663	cmsPipeline* Dest = NULL;
664	cmsStage* mpe;
665	cmsStage* CLUT;
666	cmsStage KeepPreLin = NULL, KeepPostLin = NULL;
667	cmsUInt32Number nGridPoints;
668	cmsColorSpaceSignature ColorSpace, OutputColorSpace;
669	cmsStage *NewPreLin = NULL;
670	cmsStage *NewPostLin = NULL;
671	_cmsStageCLutData* DataCLUT;
672	cmsToneCurve** DataSetIn;
673	cmsToneCurve** DataSetOut;
674	Prelin16Data* p16;
675
676	// This is a loosy optimization! does not apply in floating-point cases
677	if (_cmsFormatterIsFloat(InputFormat) \|\| _cmsFormatterIsFloat(OutputFormat)) return FALSE;
678
679	ColorSpace = _cmsICCcolorSpace((int) T_COLORSPACE(*InputFormat));
680	OutputColorSpace = _cmsICCcolorSpace((int) T_COLORSPACE(*OutputFormat));
681
682	// Color space must be specified
683	if (ColorSpace == (cmsColorSpaceSignature)`0` \|\|
684	OutputColorSpace == (cmsColorSpaceSignature)`0`) return FALSE;
685
686	nGridPoints = _cmsReasonableGridpointsByColorspace(ColorSpace, *dwFlags);
687
688	// For empty LUTs, 2 points are enough
689	if (cmsPipelineStageCount(*Lut) == `0`)
690	nGridPoints = `2`;
691
692	Src = *Lut;
693
694	// Named color pipelines cannot be optimized either
695	for (mpe = cmsPipelineGetPtrToFirstStage(Src);
696	mpe != NULL;
697	mpe = cmsStageNext(mpe)) {
698	if (cmsStageType(mpe) == cmsSigNamedColorElemType) return FALSE;
699	}
700
701	// Allocate an empty LUT
702	Dest = cmsPipelineAlloc(Src ->ContextID, Src ->InputChannels, Src ->OutputChannels);
703	if (!Dest) return FALSE;
704
705	// Prelinearization tables are kept unless indicated by flags
706	if (*dwFlags & cmsFLAGS_CLUT_PRE_LINEARIZATION) {
707
708	// Get a pointer to the prelinearization element
709	cmsStage* PreLin = cmsPipelineGetPtrToFirstStage(Src);
710
711	// Check if suitable
712	if (PreLin && PreLin ->Type == cmsSigCurveSetElemType) {
713
714	// Maybe this is a linear tram, so we can avoid the whole stuff
715	if (!AllCurvesAreLinear(PreLin)) {
716
717	// All seems ok, proceed.
718	NewPreLin = cmsStageDup(PreLin);
719	if(!cmsPipelineInsertStage(Dest, cmsAT_BEGIN, NewPreLin))
720	goto Error;
721
722	// Remove prelinearization. Since we have duplicated the curve
723	// in destination LUT, the sampling should be applied after this stage.
724	cmsPipelineUnlinkStage(Src, cmsAT_BEGIN, &KeepPreLin);
725	}
726	}
727	}
728
729	// Allocate the CLUT
730	CLUT = cmsStageAllocCLut16bit(Src ->ContextID, nGridPoints, Src ->InputChannels, Src->OutputChannels, NULL);
731	if (CLUT == NULL) goto Error;
732
733	// Add the CLUT to the destination LUT
734	if (!cmsPipelineInsertStage(Dest, cmsAT_END, CLUT)) {
735	goto Error;
736	}
737
738	// Postlinearization tables are kept unless indicated by flags
739	if (*dwFlags & cmsFLAGS_CLUT_POST_LINEARIZATION) {
740
741	// Get a pointer to the postlinearization if present
742	cmsStage* PostLin = cmsPipelineGetPtrToLastStage(Src);
743
744	// Check if suitable
745	if (PostLin && cmsStageType(PostLin) == cmsSigCurveSetElemType) {
746
747	// Maybe this is a linear tram, so we can avoid the whole stuff
748	if (!AllCurvesAreLinear(PostLin)) {
749
750	// All seems ok, proceed.
751	NewPostLin = cmsStageDup(PostLin);
752	if (!cmsPipelineInsertStage(Dest, cmsAT_END, NewPostLin))
753	goto Error;
754
755	// In destination LUT, the sampling should be applied after this stage.
756	cmsPipelineUnlinkStage(Src, cmsAT_END, &KeepPostLin);
757	}
758	}
759	}
760
761	// Now its time to do the sampling. We have to ignore pre/post linearization
762	// The source LUT without pre/post curves is passed as parameter.
763	if (!cmsStageSampleCLut16bit(CLUT, XFormSampler16, (void*) Src, `0`)) {
764	Error:
765	// Ops, something went wrong, Restore stages
766	if (KeepPreLin != NULL) {
767	if (!cmsPipelineInsertStage(Src, cmsAT_BEGIN, KeepPreLin)) {
768	_cmsAssert(`0`); // This never happens
769	}
770	}
771	if (KeepPostLin != NULL) {
772	if (!cmsPipelineInsertStage(Src, cmsAT_END, KeepPostLin)) {
773	_cmsAssert(`0`); // This never happens
774	}
775	}
776	cmsPipelineFree(Dest);
777	return FALSE;
778	}
779
780	// Done.
781
782	if (KeepPreLin != NULL) cmsStageFree(KeepPreLin);
783	if (KeepPostLin != NULL) cmsStageFree(KeepPostLin);
784	cmsPipelineFree(Src);
785
786	DataCLUT = (_cmsStageCLutData*) CLUT ->Data;
787
788	if (NewPreLin == NULL) DataSetIn = NULL;
789	else DataSetIn = ((_cmsStageToneCurvesData*) NewPreLin ->Data) ->TheCurves;
790
791	if (NewPostLin == NULL) DataSetOut = NULL;
792	else DataSetOut = ((_cmsStageToneCurvesData*) NewPostLin ->Data) ->TheCurves;
793
794
795	if (DataSetIn == NULL && DataSetOut == NULL) {
796
797	_cmsPipelineSetOptimizationParameters(Dest, (_cmsOPTeval16Fn) DataCLUT->Params->Interpolation.Lerp16, DataCLUT->Params, NULL, NULL);
798	}
799	else {
800
801	p16 = PrelinOpt16alloc(Dest ->ContextID,
802	DataCLUT ->Params,
803	Dest ->InputChannels,
804	DataSetIn,
805	Dest ->OutputChannels,
806	DataSetOut);
807
808	_cmsPipelineSetOptimizationParameters(Dest, PrelinEval16, (void*) p16, PrelinOpt16free, Prelin16dup);
809	}
810
811
812	// Don't fix white on absolute colorimetric
813	if (Intent == INTENT_ABSOLUTE_COLORIMETRIC)
814	*dwFlags \|= cmsFLAGS_NOWHITEONWHITEFIXUP;
815
816	if (!(*dwFlags & cmsFLAGS_NOWHITEONWHITEFIXUP)) {
817
818	FixWhiteMisalignment(Dest, ColorSpace, OutputColorSpace);
819	}
820
821	*Lut = Dest;
822	return TRUE;
823
824	cmsUNUSED_PARAMETER(Intent);
825	}
826
827
828	// -----------------------------------------------------------------------------------------------------------------------------------------------
829	// Fixes the gamma balancing of transform. This is described in my paper "Prelinearization Stages on
830	// Color-Management Application-Specific Integrated Circuits (ASICs)" presented at NIP24. It only works
831	// for RGB transforms. See the paper for more details
832	// -----------------------------------------------------------------------------------------------------------------------------------------------
833
834
835	// Normalize endpoints by slope limiting max and min. This assures endpoints as well.
836	// Descending curves are handled as well.
837	static
838	void SlopeLimiting(cmsToneCurve* g)
839	{
840	int BeginVal, EndVal;
841	int AtBegin = (int) floor((cmsFloat64Number) g ->nEntries * `0.02` + `0.5`); // Cutoff at 2%
842	int AtEnd = (int) g ->nEntries - AtBegin - `1`; // And 98%
843	cmsFloat64Number Val, Slope, beta;
844	int i;
845
846	if (cmsIsToneCurveDescending(g)) {
847	BeginVal = `0xffff`; EndVal = `0`;
848	}
849	else {
850	BeginVal = `0`; EndVal = `0xffff`;
851	}
852
853	// Compute slope and offset for begin of curve
854	Val = g ->Table16[AtBegin];
855	Slope = (Val - BeginVal) / AtBegin;
856	beta = Val - Slope * AtBegin;
857
858	for (i=`0`; i < AtBegin; i++)
859	g ->Table16[i] = _cmsQuickSaturateWord(i * Slope + beta);
860
861	// Compute slope and offset for the end
862	Val = g ->Table16[AtEnd];
863	Slope = (EndVal - Val) / AtBegin; // AtBegin holds the X interval, which is same in both cases
864	beta = Val - Slope * AtEnd;
865
866	for (i = AtEnd; i < (int) g ->nEntries; i++)
867	g ->Table16[i] = _cmsQuickSaturateWord(i * Slope + beta);
868	}
869
870
871	// Precomputes tables for 8-bit on input devicelink.
872	static
873	Prelin8Data* PrelinOpt8alloc(cmsContext ContextID, const cmsInterpParams* p, cmsToneCurve* G[`3`])
874	{
875	int i;
876	cmsUInt16Number Input[`3`];
877	cmsS15Fixed16Number v1, v2, v3;
878	Prelin8Data* p8;
879
880	p8 = (Prelin8Data)_cmsMallocZero(ContextID, sizeof*(Prelin8Data));
881	if (p8 == NULL) return NULL;
882
883	// Since this only works for 8 bit input, values comes always as x 257,*
884	// we can safely take msb byte (x << 8 + x)
885
886	for (i=`0`; i < `256`; i++) {
887
888	if (G != NULL) {
889
890	// Get 16-bit representation
891	Input[`0`] = cmsEvalToneCurve16(G[`0`], FROM_8_TO_16(i));
892	Input[`1`] = cmsEvalToneCurve16(G[`1`], FROM_8_TO_16(i));
893	Input[`2`] = cmsEvalToneCurve16(G[`2`], FROM_8_TO_16(i));
894	}
895	else {
896	Input[`0`] = FROM_8_TO_16(i);
897	Input[`1`] = FROM_8_TO_16(i);
898	Input[`2`] = FROM_8_TO_16(i);
899	}
900
901
902	// Move to 0..1.0 in fixed domain
903	v1 = _cmsToFixedDomain((int) (Input[`0`] * p -> Domain[`0`]));
904	v2 = _cmsToFixedDomain((int) (Input[`1`] * p -> Domain[`1`]));
905	v3 = _cmsToFixedDomain((int) (Input[`2`] * p -> Domain[`2`]));
906
907	// Store the precalculated table of nodes
908	p8 ->X0[i] = (p->opta[`2`] * FIXED_TO_INT(v1));
909	p8 ->Y0[i] = (p->opta[`1`] * FIXED_TO_INT(v2));
910	p8 ->Z0[i] = (p->opta[`0`] * FIXED_TO_INT(v3));
911
912	// Store the precalculated table of offsets
913	p8 ->rx[i] = (cmsUInt16Number) FIXED_REST_TO_INT(v1);
914	p8 ->ry[i] = (cmsUInt16Number) FIXED_REST_TO_INT(v2);
915	p8 ->rz[i] = (cmsUInt16Number) FIXED_REST_TO_INT(v3);
916	}
917
918	p8 ->ContextID = ContextID;
919	p8 ->p = p;
920
921	return p8;
922	}
923
924	static
925	void Prelin8free(cmsContext ContextID, void* ptr)
926	{
927	_cmsFree(ContextID, ptr);
928	}
929
930	static
931	void* Prelin8dup(cmsContext ContextID, const void* ptr)
932	{
933	return _cmsDupMem(ContextID, ptr, sizeof(Prelin8Data));
934	}
935
936
937
938	// A optimized interpolation for 8-bit input.
939	#define DENS(i,j,k) (LutTable[(i)+(j)+(k)+OutChan])
940	static
941	void PrelinEval8(register const cmsUInt16Number Input[],
942	register cmsUInt16Number Output[],
943	register const void* D)
944	{
945
946	cmsUInt8Number r, g, b;
947	cmsS15Fixed16Number rx, ry, rz;
948	cmsS15Fixed16Number c0, c1, c2, c3, Rest;
949	int OutChan;
950	register cmsS15Fixed16Number X0, X1, Y0, Y1, Z0, Z1;
951	Prelin8Data* p8 = (Prelin8Data*) D;
952	register const cmsInterpParams* p = p8 ->p;
953	int TotalOut = (int) p -> nOutputs;
954	const cmsUInt16Number* LutTable = (const cmsUInt16Number*) p->Table;
955
956	r = (cmsUInt8Number) (Input[`0`] >> `8`);
957	g = (cmsUInt8Number) (Input[`1`] >> `8`);
958	b = (cmsUInt8Number) (Input[`2`] >> `8`);
959
960	X0 = X1 = (cmsS15Fixed16Number) p8->X0[r];
961	Y0 = Y1 = (cmsS15Fixed16Number) p8->Y0[g];
962	Z0 = Z1 = (cmsS15Fixed16Number) p8->Z0[b];
963
964	rx = p8 ->rx[r];
965	ry = p8 ->ry[g];
966	rz = p8 ->rz[b];
967
968	X1 = X0 + (cmsS15Fixed16Number)((rx == `0`) ? `0` : p ->opta[`2`]);
969	Y1 = Y0 + (cmsS15Fixed16Number)((ry == `0`) ? `0` : p ->opta[`1`]);
970	Z1 = Z0 + (cmsS15Fixed16Number)((rz == `0`) ? `0` : p ->opta[`0`]);
971
972
973	// These are the 6 Tetrahedral
974	for (OutChan=`0`; OutChan < TotalOut; OutChan++) {
975
976	c0 = DENS(X0, Y0, Z0);
977
978	if (rx >= ry && ry >= rz)
979	{
980	c1 = DENS(X1, Y0, Z0) - c0;
981	c2 = DENS(X1, Y1, Z0) - DENS(X1, Y0, Z0);
982	c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0);
983	}
984	else
985	if (rx >= rz && rz >= ry)
986	{
987	c1 = DENS(X1, Y0, Z0) - c0;
988	c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1);
989	c3 = DENS(X1, Y0, Z1) - DENS(X1, Y0, Z0);
990	}
991	else
992	if (rz >= rx && rx >= ry)
993	{
994	c1 = DENS(X1, Y0, Z1) - DENS(X0, Y0, Z1);
995	c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1);
996	c3 = DENS(X0, Y0, Z1) - c0;
997	}
998	else
999	if (ry >= rx && rx >= rz)
1000	{
1001	c1 = DENS(X1, Y1, Z0) - DENS(X0, Y1, Z0);
1002	c2 = DENS(X0, Y1, Z0) - c0;
1003	c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0);
1004	}
1005	else
1006	if (ry >= rz && rz >= rx)
1007	{
1008	c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1);
1009	c2 = DENS(X0, Y1, Z0) - c0;
1010	c3 = DENS(X0, Y1, Z1) - DENS(X0, Y1, Z0);
1011	}
1012	else
1013	if (rz >= ry && ry >= rx)
1014	{
1015	c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1);
1016	c2 = DENS(X0, Y1, Z1) - DENS(X0, Y0, Z1);
1017	c3 = DENS(X0, Y0, Z1) - c0;
1018	}
1019	else {
1020	c1 = c2 = c3 = `0`;
1021	}
1022
1023	Rest = c1 * rx + c2 * ry + c3 * rz + `0x8001`;
1024	Output[OutChan] = (cmsUInt16Number) (c0 + ((Rest + (Rest >> `16`)) >> `16`));
1025
1026	}
1027	}
1028
1029	#undef DENS
1030
1031
1032	// Curves that contain wide empty areas are not optimizeable
1033	static
1034	cmsBool IsDegenerated(const cmsToneCurve* g)
1035	{
1036	cmsUInt32Number i, Zeros = `0`, Poles = `0`;
1037	cmsUInt32Number nEntries = g ->nEntries;
1038
1039	for (i=`0`; i < nEntries; i++) {
1040
1041	if (g ->Table16[i] == `0x0000`) Zeros++;
1042	if (g ->Table16[i] == `0xffff`) Poles++;
1043	}
1044
1045	if (Zeros == `1` && Poles == `1`) return FALSE; // For linear tables
1046	if (Zeros > (nEntries / `20`)) return TRUE; // Degenerated, many zeros
1047	if (Poles > (nEntries / `20`)) return TRUE; // Degenerated, many poles
1048
1049	return FALSE;
1050	}
1051
1052	// --------------------------------------------------------------------------------------------------------------
1053	// We need xput over here
1054
1055	static
1056	cmsBool OptimizeByComputingLinearization(cmsPipeline** Lut, cmsUInt32Number Intent, cmsUInt32Number* InputFormat, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags)
1057	{
1058	cmsPipeline* OriginalLut;
1059	cmsUInt32Number nGridPoints;
1060	cmsToneCurve Trans[cmsMAXCHANNELS], TransReverse[cmsMAXCHANNELS];
1061	cmsUInt32Number t, i;
1062	cmsFloat32Number v, In[cmsMAXCHANNELS], Out[cmsMAXCHANNELS];
1063	cmsBool lIsSuitable, lIsLinear;
1064	cmsPipeline* OptimizedLUT = NULL, *LutPlusCurves = NULL;
1065	cmsStage* OptimizedCLUTmpe;
1066	cmsColorSpaceSignature ColorSpace, OutputColorSpace;
1067	cmsStage* OptimizedPrelinMpe;
1068	cmsStage* mpe;
1069	cmsToneCurve** OptimizedPrelinCurves;
1070	_cmsStageCLutData* OptimizedPrelinCLUT;
1071
1072
1073	// This is a loosy optimization! does not apply in floating-point cases
1074	if (_cmsFormatterIsFloat(InputFormat) \|\| _cmsFormatterIsFloat(OutputFormat)) return FALSE;
1075
1076	// Only on chunky RGB
1077	if (T_COLORSPACE(InputFormat) != PT_RGB) return* FALSE;
1078	if (T_PLANAR(InputFormat)) return* FALSE;
1079
1080	if (T_COLORSPACE(OutputFormat) != PT_RGB) return* FALSE;
1081	if (T_PLANAR(OutputFormat)) return* FALSE;
1082
1083	// On 16 bits, user has to specify the feature
1084	if (!_cmsFormatterIs8bit(*InputFormat)) {
1085	if (!(dwFlags & cmsFLAGS_CLUT_PRE_LINEARIZATION)) return* FALSE;
1086	}
1087
1088	OriginalLut = *Lut;
1089
1090	// Named color pipelines cannot be optimized either
1091	for (mpe = cmsPipelineGetPtrToFirstStage(OriginalLut);
1092	mpe != NULL;
1093	mpe = cmsStageNext(mpe)) {
1094	if (cmsStageType(mpe) == cmsSigNamedColorElemType) return FALSE;
1095	}
1096
1097	ColorSpace = _cmsICCcolorSpace((int) T_COLORSPACE(*InputFormat));
1098	OutputColorSpace = _cmsICCcolorSpace((int) T_COLORSPACE(*OutputFormat));
1099
1100	// Color space must be specified
1101	if (ColorSpace == (cmsColorSpaceSignature)`0` \|\|
1102	OutputColorSpace == (cmsColorSpaceSignature)`0`) return FALSE;
1103
1104	nGridPoints = _cmsReasonableGridpointsByColorspace(ColorSpace, *dwFlags);
1105
1106	// Empty gamma containers
1107	memset(Trans, `0`, sizeof(Trans));
1108	memset(TransReverse, `0`, sizeof(TransReverse));
1109
1110	// If the last stage of the original lut are curves, and those curves are
1111	// degenerated, it is likely the transform is squeezing and clipping
1112	// the output from previous CLUT. We cannot optimize this case
1113	{
1114	cmsStage* last = cmsPipelineGetPtrToLastStage(OriginalLut);
1115
1116	if (cmsStageType(last) == cmsSigCurveSetElemType) {
1117
1118	_cmsStageToneCurvesData* Data = (_cmsStageToneCurvesData*)cmsStageData(last);
1119	for (i = `0`; i < Data->nCurves; i++) {
1120	if (IsDegenerated(Data->TheCurves[i]))
1121	goto Error;
1122	}
1123	}
1124	}
1125
1126	for (t = `0`; t < OriginalLut ->InputChannels; t++) {
1127	Trans[t] = cmsBuildTabulatedToneCurve16(OriginalLut ->ContextID, PRELINEARIZATION_POINTS, NULL);
1128	if (Trans[t] == NULL) goto Error;
1129	}
1130
1131	// Populate the curves
1132	for (i=`0`; i < PRELINEARIZATION_POINTS; i++) {
1133
1134	v = (cmsFloat32Number) ((cmsFloat64Number) i / (PRELINEARIZATION_POINTS - `1`));
1135
1136	// Feed input with a gray ramp
1137	for (t=`0`; t < OriginalLut ->InputChannels; t++)
1138	In[t] = v;
1139
1140	// Evaluate the gray value
1141	cmsPipelineEvalFloat(In, Out, OriginalLut);
1142
1143	// Store result in curve
1144	for (t=`0`; t < OriginalLut ->InputChannels; t++)
1145	Trans[t] ->Table16[i] = _cmsQuickSaturateWord(Out[t] * `65535.0`);
1146	}
1147
1148	// Slope-limit the obtained curves
1149	for (t = `0`; t < OriginalLut ->InputChannels; t++)
1150	SlopeLimiting(Trans[t]);
1151
1152	// Check for validity
1153	lIsSuitable = TRUE;
1154	lIsLinear = TRUE;
1155	for (t=`0`; (lIsSuitable && (t < OriginalLut ->InputChannels)); t++) {
1156
1157	// Exclude if already linear
1158	if (!cmsIsToneCurveLinear(Trans[t]))
1159	lIsLinear = FALSE;
1160
1161	// Exclude if non-monotonic
1162	if (!cmsIsToneCurveMonotonic(Trans[t]))
1163	lIsSuitable = FALSE;
1164
1165	if (IsDegenerated(Trans[t]))
1166	lIsSuitable = FALSE;
1167	}
1168
1169	// If it is not suitable, just quit
1170	if (!lIsSuitable) goto Error;
1171
1172	// Invert curves if possible
1173	for (t = `0`; t < OriginalLut ->InputChannels; t++) {
1174	TransReverse[t] = cmsReverseToneCurveEx(PRELINEARIZATION_POINTS, Trans[t]);
1175	if (TransReverse[t] == NULL) goto Error;
1176	}
1177
1178	// Now inset the reversed curves at the begin of transform
1179	LutPlusCurves = cmsPipelineDup(OriginalLut);
1180	if (LutPlusCurves == NULL) goto Error;
1181
1182	if (!cmsPipelineInsertStage(LutPlusCurves, cmsAT_BEGIN, cmsStageAllocToneCurves(OriginalLut ->ContextID, OriginalLut ->InputChannels, TransReverse)))
1183	goto Error;
1184
1185	// Create the result LUT
1186	OptimizedLUT = cmsPipelineAlloc(OriginalLut ->ContextID, OriginalLut ->InputChannels, OriginalLut ->OutputChannels);
1187	if (OptimizedLUT == NULL) goto Error;
1188
1189	OptimizedPrelinMpe = cmsStageAllocToneCurves(OriginalLut ->ContextID, OriginalLut ->InputChannels, Trans);
1190
1191	// Create and insert the curves at the beginning
1192	if (!cmsPipelineInsertStage(OptimizedLUT, cmsAT_BEGIN, OptimizedPrelinMpe))
1193	goto Error;
1194
1195	// Allocate the CLUT for result
1196	OptimizedCLUTmpe = cmsStageAllocCLut16bit(OriginalLut ->ContextID, nGridPoints, OriginalLut ->InputChannels, OriginalLut ->OutputChannels, NULL);
1197
1198	// Add the CLUT to the destination LUT
1199	if (!cmsPipelineInsertStage(OptimizedLUT, cmsAT_END, OptimizedCLUTmpe))
1200	goto Error;
1201
1202	// Resample the LUT
1203	if (!cmsStageSampleCLut16bit(OptimizedCLUTmpe, XFormSampler16, (void) LutPlusCurves, `0`)) goto* Error;
1204
1205	// Free resources
1206	for (t = `0`; t < OriginalLut ->InputChannels; t++) {
1207
1208	if (Trans[t]) cmsFreeToneCurve(Trans[t]);
1209	if (TransReverse[t]) cmsFreeToneCurve(TransReverse[t]);
1210	}
1211
1212	cmsPipelineFree(LutPlusCurves);
1213
1214
1215	OptimizedPrelinCurves = _cmsStageGetPtrToCurveSet(OptimizedPrelinMpe);
1216	OptimizedPrelinCLUT = (_cmsStageCLutData*) OptimizedCLUTmpe ->Data;
1217
1218	// Set the evaluator if 8-bit
1219	if (_cmsFormatterIs8bit(*InputFormat)) {
1220
1221	Prelin8Data* p8 = PrelinOpt8alloc(OptimizedLUT ->ContextID,
1222	OptimizedPrelinCLUT ->Params,
1223	OptimizedPrelinCurves);
1224	if (p8 == NULL) {
1225	cmsPipelineFree(OptimizedLUT);
1226	return FALSE;
1227	}
1228
1229	_cmsPipelineSetOptimizationParameters(OptimizedLUT, PrelinEval8, (void*) p8, Prelin8free, Prelin8dup);
1230
1231	}
1232	else
1233	{
1234	Prelin16Data* p16 = PrelinOpt16alloc(OptimizedLUT ->ContextID,
1235	OptimizedPrelinCLUT ->Params,
1236	`3`, OptimizedPrelinCurves, `3`, NULL);
1237	if (p16 == NULL) {
1238	cmsPipelineFree(OptimizedLUT);
1239	return FALSE;
1240	}
1241
1242	_cmsPipelineSetOptimizationParameters(OptimizedLUT, PrelinEval16, (void*) p16, PrelinOpt16free, Prelin16dup);
1243
1244	}
1245
1246	// Don't fix white on absolute colorimetric
1247	if (Intent == INTENT_ABSOLUTE_COLORIMETRIC)
1248	*dwFlags \|= cmsFLAGS_NOWHITEONWHITEFIXUP;
1249
1250	if (!(*dwFlags & cmsFLAGS_NOWHITEONWHITEFIXUP)) {
1251
1252	if (!FixWhiteMisalignment(OptimizedLUT, ColorSpace, OutputColorSpace)) {
1253
1254	return FALSE;
1255	}
1256	}
1257
1258	// And return the obtained LUT
1259
1260	cmsPipelineFree(OriginalLut);
1261	*Lut = OptimizedLUT;
1262	return TRUE;
1263
1264	Error:
1265
1266	for (t = `0`; t < OriginalLut ->InputChannels; t++) {
1267
1268	if (Trans[t]) cmsFreeToneCurve(Trans[t]);
1269	if (TransReverse[t]) cmsFreeToneCurve(TransReverse[t]);
1270	}
1271
1272	if (LutPlusCurves != NULL) cmsPipelineFree(LutPlusCurves);
1273	if (OptimizedLUT != NULL) cmsPipelineFree(OptimizedLUT);
1274
1275	return FALSE;
1276
1277	cmsUNUSED_PARAMETER(Intent);
1278	cmsUNUSED_PARAMETER(lIsLinear);
1279	}
1280
1281
1282	// Curves optimizer ------------------------------------------------------------------------------------------------------------------
1283
1284	static
1285	void CurvesFree(cmsContext ContextID, void* ptr)
1286	{
1287	Curves16Data* Data = (Curves16Data*) ptr;
1288	cmsUInt32Number i;
1289
1290	for (i=`0`; i < Data -> nCurves; i++) {
1291
1292	_cmsFree(ContextID, Data ->Curves[i]);
1293	}
1294
1295	_cmsFree(ContextID, Data ->Curves);
1296	_cmsFree(ContextID, ptr);
1297	}
1298
1299	static
1300	void* CurvesDup(cmsContext ContextID, const void* ptr)
1301	{
1302	Curves16Data* Data = (Curves16Data)_cmsDupMem(ContextID, ptr, sizeof*(Curves16Data));
1303	cmsUInt32Number i;
1304
1305	if (Data == NULL) return NULL;
1306
1307	Data->Curves = (cmsUInt16Number*) _cmsDupMem(ContextID, Data->Curves, Data->nCurves sizeof(cmsUInt16Number*));
1308
1309	for (i=`0`; i < Data -> nCurves; i++) {
1310	Data->Curves[i] = (cmsUInt16Number) _cmsDupMem(ContextID, Data->Curves[i], Data->nElements sizeof(cmsUInt16Number));
1311	}
1312
1313	return (void*) Data;
1314	}
1315
1316	// Precomputes tables for 8-bit on input devicelink.
1317	static
1318	Curves16Data* CurvesAlloc(cmsContext ContextID, cmsUInt32Number nCurves, cmsUInt32Number nElements, cmsToneCurve** G)
1319	{
1320	cmsUInt32Number i, j;
1321	Curves16Data* c16;
1322
1323	c16 = (Curves16Data)_cmsMallocZero(ContextID, sizeof*(Curves16Data));
1324	if (c16 == NULL) return NULL;
1325
1326	c16 ->nCurves = nCurves;
1327	c16 ->nElements = nElements;
1328
1329	c16->Curves = (cmsUInt16Number) _cmsCalloc(ContextID, nCurves, sizeof*(cmsUInt16Number));
1330	if (c16->Curves == NULL) {
1331	_cmsFree(ContextID, c16);
1332	return NULL;
1333	}
1334
1335	for (i=`0`; i < nCurves; i++) {
1336
1337	c16->Curves[i] = (cmsUInt16Number) _cmsCalloc(ContextID, nElements, sizeof*(cmsUInt16Number));
1338
1339	if (c16->Curves[i] == NULL) {
1340
1341	for (j=`0`; j < i; j++) {
1342	_cmsFree(ContextID, c16->Curves[j]);
1343	}
1344	_cmsFree(ContextID, c16->Curves);
1345	_cmsFree(ContextID, c16);
1346	return NULL;
1347	}
1348
1349	if (nElements == `256U`) {
1350
1351	for (j=`0`; j < nElements; j++) {
1352
1353	c16 ->Curves[i][j] = cmsEvalToneCurve16(G[i], FROM_8_TO_16(j));
1354	}
1355	}
1356	else {
1357
1358	for (j=`0`; j < nElements; j++) {
1359	c16 ->Curves[i][j] = cmsEvalToneCurve16(G[i], (cmsUInt16Number) j);
1360	}
1361	}
1362	}
1363
1364	return c16;
1365	}
1366
1367	static
1368	void FastEvaluateCurves8(register const cmsUInt16Number In[],
1369	register cmsUInt16Number Out[],
1370	register const void* D)
1371	{
1372	Curves16Data* Data = (Curves16Data*) D;
1373	int x;
1374	cmsUInt32Number i;
1375
1376	for (i=`0`; i < Data ->nCurves; i++) {
1377
1378	x = (In[i] >> `8`);
1379	Out[i] = Data -> Curves[i][x];
1380	}
1381	}
1382
1383
1384	static
1385	void FastEvaluateCurves16(register const cmsUInt16Number In[],
1386	register cmsUInt16Number Out[],
1387	register const void* D)
1388	{
1389	Curves16Data* Data = (Curves16Data*) D;
1390	cmsUInt32Number i;
1391
1392	for (i=`0`; i < Data ->nCurves; i++) {
1393	Out[i] = Data -> Curves[i][In[i]];
1394	}
1395	}
1396
1397
1398	static
1399	void FastIdentity16(register const cmsUInt16Number In[],
1400	register cmsUInt16Number Out[],
1401	register const void* D)
1402	{
1403	cmsPipeline* Lut = (cmsPipeline*) D;
1404	cmsUInt32Number i;
1405
1406	for (i=`0`; i < Lut ->InputChannels; i++) {
1407	Out[i] = In[i];
1408	}
1409	}
1410
1411
1412	// If the target LUT holds only curves, the optimization procedure is to join all those
1413	// curves together. That only works on curves and does not work on matrices.
1414	static
1415	cmsBool OptimizeByJoiningCurves(cmsPipeline** Lut, cmsUInt32Number Intent, cmsUInt32Number* InputFormat, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags)
1416	{
1417	cmsToneCurve** GammaTables = NULL;
1418	cmsFloat32Number InFloat[cmsMAXCHANNELS], OutFloat[cmsMAXCHANNELS];
1419	cmsUInt32Number i, j;
1420	cmsPipeline* Src = *Lut;
1421	cmsPipeline* Dest = NULL;
1422	cmsStage* mpe;
1423	cmsStage* ObtainedCurves = NULL;
1424
1425
1426	// This is a loosy optimization! does not apply in floating-point cases
1427	if (_cmsFormatterIsFloat(InputFormat) \|\| _cmsFormatterIsFloat(OutputFormat)) return FALSE;
1428
1429	// Only curves in this LUT?
1430	for (mpe = cmsPipelineGetPtrToFirstStage(Src);
1431	mpe != NULL;
1432	mpe = cmsStageNext(mpe)) {
1433	if (cmsStageType(mpe) != cmsSigCurveSetElemType) return FALSE;
1434	}
1435
1436	// Allocate an empty LUT
1437	Dest = cmsPipelineAlloc(Src ->ContextID, Src ->InputChannels, Src ->OutputChannels);
1438	if (Dest == NULL) return FALSE;
1439
1440	// Create target curves
1441	GammaTables = (cmsToneCurve) _cmsCalloc(Src ->ContextID, Src ->InputChannels, sizeof*(cmsToneCurve));
1442	if (GammaTables == NULL) goto Error;
1443
1444	for (i=`0`; i < Src ->InputChannels; i++) {
1445	GammaTables[i] = cmsBuildTabulatedToneCurve16(Src ->ContextID, PRELINEARIZATION_POINTS, NULL);
1446	if (GammaTables[i] == NULL) goto Error;
1447	}
1448
1449	// Compute 16 bit result by using floating point
1450	for (i=`0`; i < PRELINEARIZATION_POINTS; i++) {
1451
1452	for (j=`0`; j < Src ->InputChannels; j++)
1453	InFloat[j] = (cmsFloat32Number) ((cmsFloat64Number) i / (PRELINEARIZATION_POINTS - `1`));
1454
1455	cmsPipelineEvalFloat(InFloat, OutFloat, Src);
1456
1457	for (j=`0`; j < Src ->InputChannels; j++)
1458	GammaTables[j] -> Table16[i] = _cmsQuickSaturateWord(OutFloat[j] * `65535.0`);
1459	}
1460
1461	ObtainedCurves = cmsStageAllocToneCurves(Src ->ContextID, Src ->InputChannels, GammaTables);
1462	if (ObtainedCurves == NULL) goto Error;
1463
1464	for (i=`0`; i < Src ->InputChannels; i++) {
1465	cmsFreeToneCurve(GammaTables[i]);
1466	GammaTables[i] = NULL;
1467	}
1468
1469	if (GammaTables != NULL) {
1470	_cmsFree(Src->ContextID, GammaTables);
1471	GammaTables = NULL;
1472	}
1473
1474	// Maybe the curves are linear at the end
1475	if (!AllCurvesAreLinear(ObtainedCurves)) {
1476
1477	if (!cmsPipelineInsertStage(Dest, cmsAT_BEGIN, ObtainedCurves))
1478	goto Error;
1479
1480	// If the curves are to be applied in 8 bits, we can save memory
1481	if (_cmsFormatterIs8bit(*InputFormat)) {
1482
1483	_cmsStageToneCurvesData* Data = (_cmsStageToneCurvesData*) ObtainedCurves ->Data;
1484	Curves16Data* c16 = CurvesAlloc(Dest ->ContextID, Data ->nCurves, `256`, Data ->TheCurves);
1485
1486	if (c16 == NULL) goto Error;
1487	*dwFlags \|= cmsFLAGS_NOCACHE;
1488	_cmsPipelineSetOptimizationParameters(Dest, FastEvaluateCurves8, c16, CurvesFree, CurvesDup);
1489
1490	}
1491	else {
1492
1493	_cmsStageToneCurvesData* Data = (_cmsStageToneCurvesData*) cmsStageData(ObtainedCurves);
1494	Curves16Data* c16 = CurvesAlloc(Dest ->ContextID, Data ->nCurves, `65536`, Data ->TheCurves);
1495
1496	if (c16 == NULL) goto Error;
1497	*dwFlags \|= cmsFLAGS_NOCACHE;
1498	_cmsPipelineSetOptimizationParameters(Dest, FastEvaluateCurves16, c16, CurvesFree, CurvesDup);
1499	}
1500	}
1501	else {
1502
1503	// LUT optimizes to nothing. Set the identity LUT
1504	cmsStageFree(ObtainedCurves);
1505	ObtainedCurves = NULL;
1506
1507	if (!cmsPipelineInsertStage(Dest, cmsAT_BEGIN, cmsStageAllocIdentity(Dest ->ContextID, Src ->InputChannels)))
1508	goto Error;
1509
1510	*dwFlags \|= cmsFLAGS_NOCACHE;
1511	_cmsPipelineSetOptimizationParameters(Dest, FastIdentity16, (void*) Dest, NULL, NULL);
1512	}
1513
1514	// We are done.
1515	cmsPipelineFree(Src);
1516	*Lut = Dest;
1517	return TRUE;
1518
1519	Error:
1520
1521	if (ObtainedCurves != NULL) cmsStageFree(ObtainedCurves);
1522	if (GammaTables != NULL) {
1523	for (i=`0`; i < Src ->InputChannels; i++) {
1524	if (GammaTables[i] != NULL) cmsFreeToneCurve(GammaTables[i]);
1525	}
1526
1527	_cmsFree(Src ->ContextID, GammaTables);
1528	}
1529
1530	if (Dest != NULL) cmsPipelineFree(Dest);
1531	return FALSE;
1532
1533	cmsUNUSED_PARAMETER(Intent);
1534	cmsUNUSED_PARAMETER(InputFormat);
1535	cmsUNUSED_PARAMETER(OutputFormat);
1536	cmsUNUSED_PARAMETER(dwFlags);
1537	}
1538
1539	// -------------------------------------------------------------------------------------------------------------------------------------
1540	// LUT is Shaper - Matrix - Matrix - Shaper, which is very frequent when combining two matrix-shaper profiles
1541
1542
1543	static
1544	void FreeMatShaper(cmsContext ContextID, void* Data)
1545	{
1546	if (Data != NULL) _cmsFree(ContextID, Data);
1547	}
1548
1549	static
1550	void* DupMatShaper(cmsContext ContextID, const void* Data)
1551	{
1552	return _cmsDupMem(ContextID, Data, sizeof(MatShaper8Data));
1553	}
1554
1555
1556	// A fast matrix-shaper evaluator for 8 bits. This is a bit ticky since I'm using 1.14 signed fixed point
1557	// to accomplish some performance. Actually it takes 256x3 16 bits tables and 16385 x 3 tables of 8 bits,
1558	// in total about 50K, and the performance boost is huge!
1559	static
1560	void MatShaperEval16(register const cmsUInt16Number In[],
1561	register cmsUInt16Number Out[],
1562	register const void* D)
1563	{
1564	MatShaper8Data* p = (MatShaper8Data*) D;
1565	cmsS1Fixed14Number l1, l2, l3, r, g, b;
1566	cmsUInt32Number ri, gi, bi;
1567
1568	// In this case (and only in this case!) we can use this simplification since
1569	// In[] is assured to come from a 8 bit number. (a << 8 \| a)
1570	ri = In[`0`] & `0xFFU`;
1571	gi = In[`1`] & `0xFFU`;
1572	bi = In[`2`] & `0xFFU`;
1573
1574	// Across first shaper, which also converts to 1.14 fixed point
1575	r = p->Shaper1R[ri];
1576	g = p->Shaper1G[gi];
1577	b = p->Shaper1B[bi];
1578
1579	// Evaluate the matrix in 1.14 fixed point
1580	l1 = (p->Mat[`0`][`0`] * r + p->Mat[`0`][`1`] * g + p->Mat[`0`][`2`] * b + p->Off[`0`] + `0x2000`) >> `14`;
1581	l2 = (p->Mat[`1`][`0`] * r + p->Mat[`1`][`1`] * g + p->Mat[`1`][`2`] * b + p->Off[`1`] + `0x2000`) >> `14`;
1582	l3 = (p->Mat[`2`][`0`] * r + p->Mat[`2`][`1`] * g + p->Mat[`2`][`2`] * b + p->Off[`2`] + `0x2000`) >> `14`;
1583
1584	// Now we have to clip to 0..1.0 range
1585	ri = (l1 < `0`) ? `0` : ((l1 > `16384`) ? `16384U` : (cmsUInt32Number) l1);
1586	gi = (l2 < `0`) ? `0` : ((l2 > `16384`) ? `16384U` : (cmsUInt32Number) l2);
1587	bi = (l3 < `0`) ? `0` : ((l3 > `16384`) ? `16384U` : (cmsUInt32Number) l3);
1588
1589	// And across second shaper,
1590	Out[`0`] = p->Shaper2R[ri];
1591	Out[`1`] = p->Shaper2G[gi];
1592	Out[`2`] = p->Shaper2B[bi];
1593
1594	}
1595
1596	// This table converts from 8 bits to 1.14 after applying the curve
1597	static
1598	void FillFirstShaper(cmsS1Fixed14Number* Table, cmsToneCurve* Curve)
1599	{
1600	int i;
1601	cmsFloat32Number R, y;
1602
1603	for (i=`0`; i < `256`; i++) {
1604
1605	R = (cmsFloat32Number) (i / `255.0`);
1606	y = cmsEvalToneCurveFloat(Curve, R);
1607
1608	if (y < `131072.0`)
1609	Table[i] = DOUBLE_TO_1FIXED14(y);
1610	else
1611	Table[i] = `0x7fffffff`;
1612	}
1613	}
1614
1615	// This table converts form 1.14 (being 0x4000 the last entry) to 8 bits after applying the curve
1616	static
1617	void FillSecondShaper(cmsUInt16Number* Table, cmsToneCurve* Curve, cmsBool Is8BitsOutput)
1618	{
1619	int i;
1620	cmsFloat32Number R, Val;
1621
1622	for (i=`0`; i < `16385`; i++) {
1623
1624	R = (cmsFloat32Number) (i / `16384.0`);
1625	Val = cmsEvalToneCurveFloat(Curve, R); // Val comes 0..1.0
1626
1627	if (Val < `0`)
1628	Val = `0`;
1629
1630	if (Val > `1.0`)
1631	Val = `1.0`;
1632
1633	if (Is8BitsOutput) {
1634
1635	// If 8 bits output, we can optimize further by computing the / 257 part.
1636	// first we compute the resulting byte and then we store the byte times
1637	// 257. This quantization allows to round very quick by doing a >> 8, but
1638	// since the low byte is always equal to msb, we can do a & 0xff and this works!
1639	cmsUInt16Number w = _cmsQuickSaturateWord(Val * `65535.0`);
1640	cmsUInt8Number b = FROM_16_TO_8(w);
1641
1642	Table[i] = FROM_8_TO_16(b);
1643	}
1644	else Table[i] = _cmsQuickSaturateWord(Val * `65535.0`);
1645	}
1646	}
1647
1648	// Compute the matrix-shaper structure
1649	static
1650	cmsBool SetMatShaper(cmsPipeline* Dest, cmsToneCurve* Curve1[`3`], cmsMAT3* Mat, cmsVEC3* Off, cmsToneCurve* Curve2[`3`], cmsUInt32Number* OutputFormat)
1651	{
1652	MatShaper8Data* p;
1653	int i, j;
1654	cmsBool Is8Bits = _cmsFormatterIs8bit(*OutputFormat);
1655
1656	// Allocate a big chuck of memory to store precomputed tables
1657	p = (MatShaper8Data) _cmsMalloc(Dest ->ContextID, sizeof*(MatShaper8Data));
1658	if (p == NULL) return FALSE;
1659
1660	p -> ContextID = Dest -> ContextID;
1661
1662	// Precompute tables
1663	FillFirstShaper(p ->Shaper1R, Curve1[`0`]);
1664	FillFirstShaper(p ->Shaper1G, Curve1[`1`]);
1665	FillFirstShaper(p ->Shaper1B, Curve1[`2`]);
1666
1667	FillSecondShaper(p ->Shaper2R, Curve2[`0`], Is8Bits);
1668	FillSecondShaper(p ->Shaper2G, Curve2[`1`], Is8Bits);
1669	FillSecondShaper(p ->Shaper2B, Curve2[`2`], Is8Bits);
1670
1671	// Convert matrix to nFixed14. Note that those values may take more than 16 bits
1672	for (i=`0`; i < `3`; i++) {
1673	for (j=`0`; j < `3`; j++) {
1674	p ->Mat[i][j] = DOUBLE_TO_1FIXED14(Mat->v[i].n[j]);
1675	}
1676	}
1677
1678	for (i=`0`; i < `3`; i++) {
1679
1680	if (Off == NULL) {
1681	p ->Off[i] = `0`;
1682	}
1683	else {
1684	p ->Off[i] = DOUBLE_TO_1FIXED14(Off->n[i]);
1685	}
1686	}
1687
1688	// Mark as optimized for faster formatter
1689	if (Is8Bits)
1690	*OutputFormat \|= OPTIMIZED_SH(`1`);
1691
1692	// Fill function pointers
1693	_cmsPipelineSetOptimizationParameters(Dest, MatShaperEval16, (void*) p, FreeMatShaper, DupMatShaper);
1694	return TRUE;
1695	}
1696
1697	// 8 bits on input allows matrix-shaper boot up to 25 Mpixels per second on RGB. That's fast!
1698	static
1699	cmsBool OptimizeMatrixShaper(cmsPipeline** Lut, cmsUInt32Number Intent, cmsUInt32Number* InputFormat, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags)
1700	{
1701	cmsStage* Curve1, *Curve2;
1702	cmsStage* Matrix1, *Matrix2;
1703	cmsMAT3 res;
1704	cmsBool IdentityMat;
1705	cmsPipeline* Dest, *Src;
1706	cmsFloat64Number* Offset;
1707
1708	// Only works on RGB to RGB
1709	if (T_CHANNELS(InputFormat) != `3` \|\| T_CHANNELS(OutputFormat) != `3`) return FALSE;
1710
1711	// Only works on 8 bit input
1712	if (!_cmsFormatterIs8bit(InputFormat)) return* FALSE;
1713
1714	// Seems suitable, proceed
1715	Src = *Lut;
1716
1717	// Check for:
1718	//
1719	// shaper-matrix-matrix-shaper
1720	// shaper-matrix-shaper
1721	//
1722	// Both of those constructs are possible (first because abs. colorimetric).
1723	// additionally, In the first case, the input matrix offset should be zero.
1724
1725	IdentityMat = FALSE;
1726	if (cmsPipelineCheckAndRetreiveStages(Src, `4`,
1727	cmsSigCurveSetElemType, cmsSigMatrixElemType, cmsSigMatrixElemType, cmsSigCurveSetElemType,
1728	&Curve1, &Matrix1, &Matrix2, &Curve2)) {
1729
1730	// Get both matrices
1731	_cmsStageMatrixData* Data1 = (_cmsStageMatrixData*)cmsStageData(Matrix1);
1732	_cmsStageMatrixData* Data2 = (_cmsStageMatrixData*)cmsStageData(Matrix2);
1733
1734	// Input offset should be zero
1735	if (Data1->Offset != NULL) return FALSE;
1736
1737	// Multiply both matrices to get the result
1738	_cmsMAT3per(&res, (cmsMAT3)Data2->Double, (cmsMAT3)Data1->Double);
1739
1740	// Only 2nd matrix has offset, or it is zero
1741	Offset = Data2->Offset;
1742
1743	// Now the result is in res + Data2 -> Offset. Maybe is a plain identity?
1744	if (_cmsMAT3isIdentity(&res) && Offset == NULL) {
1745
1746	// We can get rid of full matrix
1747	IdentityMat = TRUE;
1748	}
1749
1750	}
1751	else {
1752
1753	if (cmsPipelineCheckAndRetreiveStages(Src, `3`,
1754	cmsSigCurveSetElemType, cmsSigMatrixElemType, cmsSigCurveSetElemType,
1755	&Curve1, &Matrix1, &Curve2)) {
1756
1757	_cmsStageMatrixData* Data = (_cmsStageMatrixData*)cmsStageData(Matrix1);
1758
1759	// Copy the matrix to our result
1760	memcpy(&res, Data->Double, sizeof(res));
1761
1762	// Preserve the Odffset (may be NULL as a zero offset)
1763	Offset = Data->Offset;
1764
1765	if (_cmsMAT3isIdentity(&res) && Offset == NULL) {
1766
1767	// We can get rid of full matrix
1768	IdentityMat = TRUE;
1769	}
1770	}
1771	else
1772	return FALSE; // Not optimizeable this time
1773
1774	}
1775
1776	// Allocate an empty LUT
1777	Dest = cmsPipelineAlloc(Src ->ContextID, Src ->InputChannels, Src ->OutputChannels);
1778	if (!Dest) return FALSE;
1779
1780	// Assamble the new LUT
1781	if (!cmsPipelineInsertStage(Dest, cmsAT_BEGIN, cmsStageDup(Curve1)))
1782	goto Error;
1783
1784	if (!IdentityMat) {
1785
1786	if (!cmsPipelineInsertStage(Dest, cmsAT_END, cmsStageAllocMatrix(Dest->ContextID, `3`, `3`, (const cmsFloat64Number*)&res, Offset)))
1787	goto Error;
1788	}
1789
1790	if (!cmsPipelineInsertStage(Dest, cmsAT_END, cmsStageDup(Curve2)))
1791	goto Error;
1792
1793	// If identity on matrix, we can further optimize the curves, so call the join curves routine
1794	if (IdentityMat) {
1795
1796	OptimizeByJoiningCurves(&Dest, Intent, InputFormat, OutputFormat, dwFlags);
1797	}
1798	else {
1799	_cmsStageToneCurvesData* mpeC1 = (_cmsStageToneCurvesData*) cmsStageData(Curve1);
1800	_cmsStageToneCurvesData* mpeC2 = (_cmsStageToneCurvesData*) cmsStageData(Curve2);
1801
1802	// In this particular optimization, cach� does not help as it takes more time to deal with
1803	// the cach� that with the pixel handling
1804	*dwFlags \|= cmsFLAGS_NOCACHE;
1805
1806	// Setup the optimizarion routines
1807	SetMatShaper(Dest, mpeC1 ->TheCurves, &res, (cmsVEC3*) Offset, mpeC2->TheCurves, OutputFormat);
1808	}
1809
1810	cmsPipelineFree(Src);
1811	*Lut = Dest;
1812	return TRUE;
1813	Error:
1814	// Leave Src unchanged
1815	cmsPipelineFree(Dest);
1816	return FALSE;
1817	}
1818
1819
1820	// -------------------------------------------------------------------------------------------------------------------------------------
1821	// Optimization plug-ins
1822
1823	// List of optimizations
1824	typedef struct _cmsOptimizationCollection_st {
1825
1826	_cmsOPToptimizeFn OptimizePtr;
1827
1828	struct _cmsOptimizationCollection_st *Next;
1829
1830	} _cmsOptimizationCollection;
1831
1832
1833	// The built-in list. We currently implement 4 types of optimizations. Joining of curves, matrix-shaper, linearization and resampling
1834	static _cmsOptimizationCollection DefaultOptimization[] = {
1835
1836	{ OptimizeByJoiningCurves, &DefaultOptimization[`1`] },
1837	{ OptimizeMatrixShaper, &DefaultOptimization[`2`] },
1838	{ OptimizeByComputingLinearization, &DefaultOptimization[`3`] },
1839	{ OptimizeByResampling, NULL }
1840	};
1841
1842	// The linked list head
1843	_cmsOptimizationPluginChunkType _cmsOptimizationPluginChunk = { NULL };
1844
1845
1846	// Duplicates the zone of memory used by the plug-in in the new context
1847	static
1848	void DupPluginOptimizationList(struct _cmsContext_struct* ctx,
1849	const struct _cmsContext_struct* src)
1850	{
1851	_cmsOptimizationPluginChunkType newHead = { NULL };
1852	_cmsOptimizationCollection* entry;
1853	_cmsOptimizationCollection* Anterior = NULL;
1854	_cmsOptimizationPluginChunkType* head = (_cmsOptimizationPluginChunkType*) src->chunks[OptimizationPlugin];
1855
1856	_cmsAssert(ctx != NULL);
1857	_cmsAssert(head != NULL);
1858
1859	// Walk the list copying all nodes
1860	for (entry = head->OptimizationCollection;
1861	entry != NULL;
1862	entry = entry ->Next) {
1863
1864	_cmsOptimizationCollection newEntry = ( _cmsOptimizationCollection ) _cmsSubAllocDup(ctx ->MemPool, entry, sizeof(_cmsOptimizationCollection));
1865
1866	if (newEntry == NULL)
1867	return;
1868
1869	// We want to keep the linked list order, so this is a little bit tricky
1870	newEntry -> Next = NULL;
1871	if (Anterior)
1872	Anterior -> Next = newEntry;
1873
1874	Anterior = newEntry;
1875
1876	if (newHead.OptimizationCollection == NULL)
1877	newHead.OptimizationCollection = newEntry;
1878	}
1879
1880	ctx ->chunks[OptimizationPlugin] = _cmsSubAllocDup(ctx->MemPool, &newHead, sizeof(_cmsOptimizationPluginChunkType));
1881	}
1882
1883	void _cmsAllocOptimizationPluginChunk(struct _cmsContext_struct* ctx,
1884	const struct _cmsContext_struct* src)
1885	{
1886	if (src != NULL) {
1887
1888	// Copy all linked list
1889	DupPluginOptimizationList(ctx, src);
1890	}
1891	else {
1892	static _cmsOptimizationPluginChunkType OptimizationPluginChunkType = { NULL };
1893	ctx ->chunks[OptimizationPlugin] = _cmsSubAllocDup(ctx ->MemPool, &OptimizationPluginChunkType, sizeof(_cmsOptimizationPluginChunkType));
1894	}
1895	}
1896
1897
1898	// Register new ways to optimize
1899	cmsBool _cmsRegisterOptimizationPlugin(cmsContext ContextID, cmsPluginBase* Data)
1900	{
1901	cmsPluginOptimization* Plugin = (cmsPluginOptimization*) Data;
1902	_cmsOptimizationPluginChunkType* ctx = ( _cmsOptimizationPluginChunkType*) _cmsContextGetClientChunk(ContextID, OptimizationPlugin);
1903	_cmsOptimizationCollection* fl;
1904
1905	if (Data == NULL) {
1906
1907	ctx->OptimizationCollection = NULL;
1908	return TRUE;
1909	}
1910
1911	// Optimizer callback is required
1912	if (Plugin ->OptimizePtr == NULL) return FALSE;
1913
1914	fl = (_cmsOptimizationCollection) _cmsPluginMalloc(ContextID, sizeof*(_cmsOptimizationCollection));
1915	if (fl == NULL) return FALSE;
1916
1917	// Copy the parameters
1918	fl ->OptimizePtr = Plugin ->OptimizePtr;
1919
1920	// Keep linked list
1921	fl ->Next = ctx->OptimizationCollection;
1922
1923	// Set the head
1924	ctx ->OptimizationCollection = fl;
1925
1926	// All is ok
1927	return TRUE;
1928	}
1929
1930	// The entry point for LUT optimization
1931	cmsBool _cmsOptimizePipeline(cmsContext ContextID,
1932	cmsPipeline** PtrLut,
1933	cmsUInt32Number Intent,
1934	cmsUInt32Number* InputFormat,
1935	cmsUInt32Number* OutputFormat,
1936	cmsUInt32Number* dwFlags)
1937	{
1938	_cmsOptimizationPluginChunkType* ctx = ( _cmsOptimizationPluginChunkType*) _cmsContextGetClientChunk(ContextID, OptimizationPlugin);
1939	_cmsOptimizationCollection* Opts;
1940	cmsBool AnySuccess = FALSE;
1941
1942	// A CLUT is being asked, so force this specific optimization
1943	if (*dwFlags & cmsFLAGS_FORCE_CLUT) {
1944
1945	PreOptimize(*PtrLut);
1946	return OptimizeByResampling(PtrLut, Intent, InputFormat, OutputFormat, dwFlags);
1947	}
1948
1949	// Anything to optimize?
1950	if ((*PtrLut) ->Elements == NULL) {
1951	_cmsPipelineSetOptimizationParameters(PtrLut, FastIdentity16, (void*) PtrLut, NULL, NULL);
1952	return TRUE;
1953	}
1954
1955	// Try to get rid of identities and trivial conversions.
1956	AnySuccess = PreOptimize(*PtrLut);
1957
1958	// After removal do we end with an identity?
1959	if ((*PtrLut) ->Elements == NULL) {
1960	_cmsPipelineSetOptimizationParameters(PtrLut, FastIdentity16, (void*) PtrLut, NULL, NULL);
1961	return TRUE;
1962	}
1963
1964	// Do not optimize, keep all precision
1965	if (*dwFlags & cmsFLAGS_NOOPTIMIZE)
1966	return FALSE;
1967
1968	// Try plug-in optimizations
1969	for (Opts = ctx->OptimizationCollection;
1970	Opts != NULL;
1971	Opts = Opts ->Next) {
1972
1973	// If one schema succeeded, we are done
1974	if (Opts ->OptimizePtr(PtrLut, Intent, InputFormat, OutputFormat, dwFlags)) {
1975
1976	return TRUE; // Optimized!
1977	}
1978	}
1979
1980	// Try built-in optimizations
1981	for (Opts = DefaultOptimization;
1982	Opts != NULL;
1983	Opts = Opts ->Next) {
1984
1985	if (Opts ->OptimizePtr(PtrLut, Intent, InputFormat, OutputFormat, dwFlags)) {
1986
1987	return TRUE;
1988	}
1989	}
1990
1991	// Only simple optimizations succeeded
1992	return AnySuccess;
1993	}
1994
1995
1996
1997

Browse the source code of OpenJDK/src/java.desktop/share/native/liblcms/cmsopt.c