1 | //--------------------------------------------------------------------------------- |
2 | // |
3 | // Little Color Management System |
4 | // Copyright (c) 1998-2017 Marti Maria Saguer |
5 | // |
6 | // Permission is hereby granted, free of charge, to any person obtaining |
7 | // a copy of this software and associated documentation files (the "Software"), |
8 | // to deal in the Software without restriction, including without limitation |
9 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, |
10 | // and/or sell copies of the Software, and to permit persons to whom the Software |
11 | // is furnished to do so, subject to the following conditions: |
12 | // |
13 | // The above copyright notice and this permission notice shall be included in |
14 | // all copies or substantial portions of the Software. |
15 | // |
16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
17 | // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO |
18 | // THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
19 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE |
20 | // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
21 | // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
22 | // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
23 | // |
24 | //--------------------------------------------------------------------------------- |
25 | // |
26 | |
27 | #include "lcms2_internal.h" |
28 | |
29 | |
30 | //---------------------------------------------------------------------------------- |
31 | |
32 | // Optimization for 8 bits, Shaper-CLUT (3 inputs only) |
33 | typedef struct { |
34 | |
35 | cmsContext ContextID; |
36 | |
37 | const cmsInterpParams* p; // Tetrahedrical interpolation parameters. This is a not-owned pointer. |
38 | |
39 | cmsUInt16Number rx[256], ry[256], rz[256]; |
40 | cmsUInt32Number X0[256], Y0[256], Z0[256]; // Precomputed nodes and offsets for 8-bit input data |
41 | |
42 | |
43 | } Prelin8Data; |
44 | |
45 | |
46 | // Generic optimization for 16 bits Shaper-CLUT-Shaper (any inputs) |
47 | typedef struct { |
48 | |
49 | cmsContext ContextID; |
50 | |
51 | // Number of channels |
52 | cmsUInt32Number nInputs; |
53 | cmsUInt32Number nOutputs; |
54 | |
55 | _cmsInterpFn16 EvalCurveIn16[MAX_INPUT_DIMENSIONS]; // The maximum number of input channels is known in advance |
56 | cmsInterpParams* ParamsCurveIn16[MAX_INPUT_DIMENSIONS]; |
57 | |
58 | _cmsInterpFn16 EvalCLUT; // The evaluator for 3D grid |
59 | const cmsInterpParams* CLUTparams; // (not-owned pointer) |
60 | |
61 | |
62 | _cmsInterpFn16* EvalCurveOut16; // Points to an array of curve evaluators in 16 bits (not-owned pointer) |
63 | cmsInterpParams** ParamsCurveOut16; // Points to an array of references to interpolation params (not-owned pointer) |
64 | |
65 | |
66 | } Prelin16Data; |
67 | |
68 | |
69 | // Optimization for matrix-shaper in 8 bits. Numbers are operated in n.14 signed, tables are stored in 1.14 fixed |
70 | |
71 | typedef cmsInt32Number cmsS1Fixed14Number; // Note that this may hold more than 16 bits! |
72 | |
73 | #define DOUBLE_TO_1FIXED14(x) ((cmsS1Fixed14Number) floor((x) * 16384.0 + 0.5)) |
74 | |
75 | typedef struct { |
76 | |
77 | cmsContext ContextID; |
78 | |
79 | cmsS1Fixed14Number Shaper1R[256]; // from 0..255 to 1.14 (0.0...1.0) |
80 | cmsS1Fixed14Number Shaper1G[256]; |
81 | cmsS1Fixed14Number Shaper1B[256]; |
82 | |
83 | cmsS1Fixed14Number Mat[3][3]; // n.14 to n.14 (needs a saturation after that) |
84 | cmsS1Fixed14Number Off[3]; |
85 | |
86 | cmsUInt16Number Shaper2R[16385]; // 1.14 to 0..255 |
87 | cmsUInt16Number Shaper2G[16385]; |
88 | cmsUInt16Number Shaper2B[16385]; |
89 | |
90 | } MatShaper8Data; |
91 | |
92 | // Curves, optimization is shared between 8 and 16 bits |
93 | typedef struct { |
94 | cmsUInt32Number nCurves; // Number of curves |
95 | cmsUInt32Number nElements; // Elements in curves |
96 | cmsUInt16Number** Curves; // Points to a dynamically allocated array |
97 | |
98 | } Curves16Data; |
99 | |
100 | |
101 | // Simple optimizations ---------------------------------------------------------------------------------------------------------- |
102 | |
103 | |
104 | // Remove an element in linked chain |
105 | static |
106 | void _RemoveElement(cmsContext ContextID, cmsStage** head) |
107 | { |
108 | cmsStage* mpe = *head; |
109 | cmsStage* next = mpe ->Next; |
110 | *head = next; |
111 | cmsStageFree(ContextID, mpe); |
112 | } |
113 | |
114 | // Remove all identities in chain. Note that pt actually is a double pointer to the element that holds the pointer. |
115 | static |
116 | cmsBool _Remove1Op(cmsContext ContextID, cmsPipeline* Lut, cmsStageSignature UnaryOp) |
117 | { |
118 | cmsStage** pt = &Lut ->Elements; |
119 | cmsBool AnyOpt = FALSE; |
120 | |
121 | while (*pt != NULL) { |
122 | |
123 | if ((*pt) ->Implements == UnaryOp) { |
124 | _RemoveElement(ContextID, pt); |
125 | AnyOpt = TRUE; |
126 | } |
127 | else |
128 | pt = &((*pt) -> Next); |
129 | } |
130 | |
131 | return AnyOpt; |
132 | } |
133 | |
134 | // Same, but only if two adjacent elements are found |
135 | static |
136 | cmsBool _Remove2Op(cmsContext ContextID, cmsPipeline* Lut, cmsStageSignature Op1, cmsStageSignature Op2) |
137 | { |
138 | cmsStage** pt1; |
139 | cmsStage** pt2; |
140 | cmsBool AnyOpt = FALSE; |
141 | |
142 | pt1 = &Lut ->Elements; |
143 | if (*pt1 == NULL) return AnyOpt; |
144 | |
145 | while (*pt1 != NULL) { |
146 | |
147 | pt2 = &((*pt1) -> Next); |
148 | if (*pt2 == NULL) return AnyOpt; |
149 | |
150 | if ((*pt1) ->Implements == Op1 && (*pt2) ->Implements == Op2) { |
151 | _RemoveElement(ContextID, pt2); |
152 | _RemoveElement(ContextID, pt1); |
153 | AnyOpt = TRUE; |
154 | } |
155 | else |
156 | pt1 = &((*pt1) -> Next); |
157 | } |
158 | |
159 | return AnyOpt; |
160 | } |
161 | |
162 | |
163 | static |
164 | cmsBool CloseEnoughFloat(cmsFloat64Number a, cmsFloat64Number b) |
165 | { |
166 | return fabs(b - a) < 0.00001f; |
167 | } |
168 | |
169 | static |
170 | cmsBool isFloatMatrixIdentity(cmsContext ContextID, const cmsMAT3* a) |
171 | { |
172 | cmsMAT3 Identity; |
173 | int i, j; |
174 | |
175 | _cmsMAT3identity(ContextID, &Identity); |
176 | |
177 | for (i = 0; i < 3; i++) |
178 | for (j = 0; j < 3; j++) |
179 | if (!CloseEnoughFloat(a->v[i].n[j], Identity.v[i].n[j])) return FALSE; |
180 | |
181 | return TRUE; |
182 | } |
183 | // if two adjacent matrices are found, multiply them. |
184 | static |
185 | cmsBool _MultiplyMatrix(cmsContext ContextID, cmsPipeline* Lut) |
186 | { |
187 | cmsStage** pt1; |
188 | cmsStage** pt2; |
189 | cmsStage* chain; |
190 | cmsBool AnyOpt = FALSE; |
191 | |
192 | pt1 = &Lut->Elements; |
193 | if (*pt1 == NULL) return AnyOpt; |
194 | |
195 | while (*pt1 != NULL) { |
196 | |
197 | pt2 = &((*pt1)->Next); |
198 | if (*pt2 == NULL) return AnyOpt; |
199 | |
200 | if ((*pt1)->Implements == cmsSigMatrixElemType && (*pt2)->Implements == cmsSigMatrixElemType) { |
201 | |
202 | // Get both matrices |
203 | _cmsStageMatrixData* m1 = (_cmsStageMatrixData*) cmsStageData(ContextID, *pt1); |
204 | _cmsStageMatrixData* m2 = (_cmsStageMatrixData*) cmsStageData(ContextID, *pt2); |
205 | cmsMAT3 res; |
206 | |
207 | // Input offset and output offset should be zero to use this optimization |
208 | if (m1->Offset != NULL || m2 ->Offset != NULL || |
209 | cmsStageInputChannels(ContextID, *pt1) != 3 || cmsStageOutputChannels(ContextID, *pt1) != 3 || |
210 | cmsStageInputChannels(ContextID, *pt2) != 3 || cmsStageOutputChannels(ContextID, *pt2) != 3) |
211 | return FALSE; |
212 | |
213 | // Multiply both matrices to get the result |
214 | _cmsMAT3per(ContextID, &res, (cmsMAT3*)m2->Double, (cmsMAT3*)m1->Double); |
215 | |
216 | // Get the next in chain after the matrices |
217 | chain = (*pt2)->Next; |
218 | |
219 | // Remove both matrices |
220 | _RemoveElement(ContextID, pt2); |
221 | _RemoveElement(ContextID, pt1); |
222 | |
223 | // Now what if the result is a plain identity? |
224 | if (!isFloatMatrixIdentity(ContextID, &res)) { |
225 | |
226 | // We can not get rid of full matrix |
227 | cmsStage* Multmat = cmsStageAllocMatrix(ContextID, 3, 3, (const cmsFloat64Number*) &res, NULL); |
228 | if (Multmat == NULL) return FALSE; // Should never happen |
229 | |
230 | // Recover the chain |
231 | Multmat->Next = chain; |
232 | *pt1 = Multmat; |
233 | } |
234 | |
235 | AnyOpt = TRUE; |
236 | } |
237 | else |
238 | pt1 = &((*pt1)->Next); |
239 | } |
240 | |
241 | return AnyOpt; |
242 | } |
243 | |
244 | |
245 | // Preoptimize just gets rif of no-ops coming paired. Conversion from v2 to v4 followed |
246 | // by a v4 to v2 and vice-versa. The elements are then discarded. |
247 | static |
248 | cmsBool PreOptimize(cmsContext ContextID, cmsPipeline* Lut) |
249 | { |
250 | cmsBool AnyOpt = FALSE, Opt; |
251 | |
252 | do { |
253 | |
254 | Opt = FALSE; |
255 | |
256 | // Remove all identities |
257 | Opt |= _Remove1Op(ContextID, Lut, cmsSigIdentityElemType); |
258 | |
259 | // Remove XYZ2Lab followed by Lab2XYZ |
260 | Opt |= _Remove2Op(ContextID, Lut, cmsSigXYZ2LabElemType, cmsSigLab2XYZElemType); |
261 | |
262 | // Remove Lab2XYZ followed by XYZ2Lab |
263 | Opt |= _Remove2Op(ContextID, Lut, cmsSigLab2XYZElemType, cmsSigXYZ2LabElemType); |
264 | |
265 | // Remove V4 to V2 followed by V2 to V4 |
266 | Opt |= _Remove2Op(ContextID, Lut, cmsSigLabV4toV2, cmsSigLabV2toV4); |
267 | |
268 | // Remove V2 to V4 followed by V4 to V2 |
269 | Opt |= _Remove2Op(ContextID, Lut, cmsSigLabV2toV4, cmsSigLabV4toV2); |
270 | |
271 | // Remove float pcs Lab conversions |
272 | Opt |= _Remove2Op(ContextID, Lut, cmsSigLab2FloatPCS, cmsSigFloatPCS2Lab); |
273 | |
274 | // Remove float pcs Lab conversions |
275 | Opt |= _Remove2Op(ContextID, Lut, cmsSigXYZ2FloatPCS, cmsSigFloatPCS2XYZ); |
276 | |
277 | // Simplify matrix. |
278 | Opt |= _MultiplyMatrix(ContextID, Lut); |
279 | |
280 | if (Opt) AnyOpt = TRUE; |
281 | |
282 | } while (Opt); |
283 | |
284 | return AnyOpt; |
285 | } |
286 | |
287 | static |
288 | void Eval16nop1D(cmsContext ContextID, register const cmsUInt16Number Input[], |
289 | register cmsUInt16Number Output[], |
290 | register const struct _cms_interp_struc* p) |
291 | { |
292 | cmsUNUSED_PARAMETER(ContextID); |
293 | Output[0] = Input[0]; |
294 | |
295 | cmsUNUSED_PARAMETER(p); |
296 | } |
297 | |
298 | static |
299 | void PrelinEval16(cmsContext ContextID, register const cmsUInt16Number Input[], |
300 | register cmsUInt16Number Output[], |
301 | register const void* D) |
302 | { |
303 | Prelin16Data* p16 = (Prelin16Data*) D; |
304 | cmsUInt16Number StageABC[MAX_INPUT_DIMENSIONS]; |
305 | cmsUInt16Number StageDEF[cmsMAXCHANNELS]; |
306 | cmsUInt32Number i; |
307 | |
308 | for (i=0; i < p16 ->nInputs; i++) { |
309 | |
310 | p16 ->EvalCurveIn16[i](ContextID, &Input[i], &StageABC[i], p16 ->ParamsCurveIn16[i]); |
311 | } |
312 | |
313 | p16 ->EvalCLUT(ContextID, StageABC, StageDEF, p16 ->CLUTparams); |
314 | |
315 | for (i=0; i < p16 ->nOutputs; i++) { |
316 | |
317 | p16 ->EvalCurveOut16[i](ContextID, &StageDEF[i], &Output[i], p16 ->ParamsCurveOut16[i]); |
318 | } |
319 | } |
320 | |
321 | |
322 | static |
323 | void PrelinOpt16free(cmsContext ContextID, void* ptr) |
324 | { |
325 | Prelin16Data* p16 = (Prelin16Data*) ptr; |
326 | |
327 | _cmsFree(ContextID, p16 ->EvalCurveOut16); |
328 | _cmsFree(ContextID, p16 ->ParamsCurveOut16); |
329 | |
330 | _cmsFree(ContextID, p16); |
331 | } |
332 | |
333 | static |
334 | void* Prelin16dup(cmsContext ContextID, const void* ptr) |
335 | { |
336 | Prelin16Data* p16 = (Prelin16Data*) ptr; |
337 | Prelin16Data* Duped = (Prelin16Data*) _cmsDupMem(ContextID, p16, sizeof(Prelin16Data)); |
338 | |
339 | if (Duped == NULL) return NULL; |
340 | |
341 | Duped->EvalCurveOut16 = (_cmsInterpFn16*) _cmsDupMem(ContextID, p16->EvalCurveOut16, p16->nOutputs * sizeof(_cmsInterpFn16)); |
342 | Duped->ParamsCurveOut16 = (cmsInterpParams**)_cmsDupMem(ContextID, p16->ParamsCurveOut16, p16->nOutputs * sizeof(cmsInterpParams*)); |
343 | |
344 | return Duped; |
345 | } |
346 | |
347 | |
348 | static |
349 | Prelin16Data* PrelinOpt16alloc(cmsContext ContextID, |
350 | const cmsInterpParams* ColorMap, |
351 | cmsUInt32Number nInputs, cmsToneCurve** In, |
352 | cmsUInt32Number nOutputs, cmsToneCurve** Out ) |
353 | { |
354 | cmsUInt32Number i; |
355 | Prelin16Data* p16 = (Prelin16Data*)_cmsMallocZero(ContextID, sizeof(Prelin16Data)); |
356 | if (p16 == NULL) return NULL; |
357 | |
358 | p16 ->nInputs = nInputs; |
359 | p16 ->nOutputs = nOutputs; |
360 | |
361 | |
362 | for (i=0; i < nInputs; i++) { |
363 | |
364 | if (In == NULL) { |
365 | p16 -> ParamsCurveIn16[i] = NULL; |
366 | p16 -> EvalCurveIn16[i] = Eval16nop1D; |
367 | |
368 | } |
369 | else { |
370 | p16 -> ParamsCurveIn16[i] = In[i] ->InterpParams; |
371 | p16 -> EvalCurveIn16[i] = p16 ->ParamsCurveIn16[i]->Interpolation.Lerp16; |
372 | } |
373 | } |
374 | |
375 | p16 ->CLUTparams = ColorMap; |
376 | p16 ->EvalCLUT = ColorMap ->Interpolation.Lerp16; |
377 | |
378 | |
379 | p16 -> EvalCurveOut16 = (_cmsInterpFn16*) _cmsCalloc(ContextID, nOutputs, sizeof(_cmsInterpFn16)); |
380 | p16 -> ParamsCurveOut16 = (cmsInterpParams**) _cmsCalloc(ContextID, nOutputs, sizeof(cmsInterpParams* )); |
381 | |
382 | for (i=0; i < nOutputs; i++) { |
383 | |
384 | if (Out == NULL) { |
385 | p16 ->ParamsCurveOut16[i] = NULL; |
386 | p16 -> EvalCurveOut16[i] = Eval16nop1D; |
387 | } |
388 | else { |
389 | |
390 | p16 ->ParamsCurveOut16[i] = Out[i] ->InterpParams; |
391 | p16 -> EvalCurveOut16[i] = p16 ->ParamsCurveOut16[i]->Interpolation.Lerp16; |
392 | } |
393 | } |
394 | |
395 | return p16; |
396 | } |
397 | |
398 | |
399 | |
400 | // Resampling --------------------------------------------------------------------------------- |
401 | |
402 | #define PRELINEARIZATION_POINTS 4096 |
403 | |
404 | // Sampler implemented by another LUT. This is a clean way to precalculate the devicelink 3D CLUT for |
405 | // almost any transform. We use floating point precision and then convert from floating point to 16 bits. |
406 | static |
407 | cmsInt32Number XFormSampler16(cmsContext ContextID, register const cmsUInt16Number In[], register cmsUInt16Number Out[], register void* Cargo) |
408 | { |
409 | cmsPipeline* Lut = (cmsPipeline*) Cargo; |
410 | cmsFloat32Number InFloat[cmsMAXCHANNELS], OutFloat[cmsMAXCHANNELS]; |
411 | cmsUInt32Number i; |
412 | |
413 | _cmsAssert(Lut -> InputChannels < cmsMAXCHANNELS); |
414 | _cmsAssert(Lut -> OutputChannels < cmsMAXCHANNELS); |
415 | |
416 | // From 16 bit to floating point |
417 | for (i=0; i < Lut ->InputChannels; i++) |
418 | InFloat[i] = (cmsFloat32Number) (In[i] / 65535.0); |
419 | |
420 | // Evaluate in floating point |
421 | cmsPipelineEvalFloat(ContextID, InFloat, OutFloat, Lut); |
422 | |
423 | // Back to 16 bits representation |
424 | for (i=0; i < Lut ->OutputChannels; i++) |
425 | Out[i] = _cmsQuickSaturateWord(OutFloat[i] * 65535.0); |
426 | |
427 | // Always succeed |
428 | return TRUE; |
429 | } |
430 | |
431 | // Try to see if the curves of a given MPE are linear |
432 | static |
433 | cmsBool AllCurvesAreLinear(cmsContext ContextID, cmsStage* mpe) |
434 | { |
435 | cmsToneCurve** Curves; |
436 | cmsUInt32Number i, n; |
437 | |
438 | Curves = _cmsStageGetPtrToCurveSet(mpe); |
439 | if (Curves == NULL) return FALSE; |
440 | |
441 | n = cmsStageOutputChannels(ContextID, mpe); |
442 | |
443 | for (i=0; i < n; i++) { |
444 | if (!cmsIsToneCurveLinear(ContextID, Curves[i])) return FALSE; |
445 | } |
446 | |
447 | return TRUE; |
448 | } |
449 | |
450 | // This function replaces a specific node placed in "At" by the "Value" numbers. Its purpose |
451 | // is to fix scum dot on broken profiles/transforms. Works on 1, 3 and 4 channels |
452 | static |
453 | cmsBool PatchLUT(cmsContext ContextID, cmsStage* CLUT, cmsUInt16Number At[], cmsUInt16Number Value[], |
454 | cmsUInt32Number nChannelsOut, cmsUInt32Number nChannelsIn) |
455 | { |
456 | _cmsStageCLutData* Grid = (_cmsStageCLutData*) CLUT ->Data; |
457 | cmsInterpParams* p16 = Grid ->Params; |
458 | cmsFloat64Number px, py, pz, pw; |
459 | int x0, y0, z0, w0; |
460 | int i, index; |
461 | |
462 | if (CLUT -> Type != cmsSigCLutElemType) { |
463 | cmsSignalError(ContextID, cmsERROR_INTERNAL, "(internal) Attempt to PatchLUT on non-lut stage" ); |
464 | return FALSE; |
465 | } |
466 | |
467 | if (nChannelsIn == 4) { |
468 | |
469 | px = ((cmsFloat64Number) At[0] * (p16->Domain[0])) / 65535.0; |
470 | py = ((cmsFloat64Number) At[1] * (p16->Domain[1])) / 65535.0; |
471 | pz = ((cmsFloat64Number) At[2] * (p16->Domain[2])) / 65535.0; |
472 | pw = ((cmsFloat64Number) At[3] * (p16->Domain[3])) / 65535.0; |
473 | |
474 | x0 = (int) floor(px); |
475 | y0 = (int) floor(py); |
476 | z0 = (int) floor(pz); |
477 | w0 = (int) floor(pw); |
478 | |
479 | if (((px - x0) != 0) || |
480 | ((py - y0) != 0) || |
481 | ((pz - z0) != 0) || |
482 | ((pw - w0) != 0)) return FALSE; // Not on exact node |
483 | |
484 | index = (int) p16 -> opta[3] * x0 + |
485 | (int) p16 -> opta[2] * y0 + |
486 | (int) p16 -> opta[1] * z0 + |
487 | (int) p16 -> opta[0] * w0; |
488 | } |
489 | else |
490 | if (nChannelsIn == 3) { |
491 | |
492 | px = ((cmsFloat64Number) At[0] * (p16->Domain[0])) / 65535.0; |
493 | py = ((cmsFloat64Number) At[1] * (p16->Domain[1])) / 65535.0; |
494 | pz = ((cmsFloat64Number) At[2] * (p16->Domain[2])) / 65535.0; |
495 | |
496 | x0 = (int) floor(px); |
497 | y0 = (int) floor(py); |
498 | z0 = (int) floor(pz); |
499 | |
500 | if (((px - x0) != 0) || |
501 | ((py - y0) != 0) || |
502 | ((pz - z0) != 0)) return FALSE; // Not on exact node |
503 | |
504 | index = (int) p16 -> opta[2] * x0 + |
505 | (int) p16 -> opta[1] * y0 + |
506 | (int) p16 -> opta[0] * z0; |
507 | } |
508 | else |
509 | if (nChannelsIn == 1) { |
510 | |
511 | px = ((cmsFloat64Number) At[0] * (p16->Domain[0])) / 65535.0; |
512 | |
513 | x0 = (int) floor(px); |
514 | |
515 | if (((px - x0) != 0)) return FALSE; // Not on exact node |
516 | |
517 | index = (int) p16 -> opta[0] * x0; |
518 | } |
519 | else { |
520 | cmsSignalError(ContextID, cmsERROR_INTERNAL, "(internal) %d Channels are not supported on PatchLUT" , nChannelsIn); |
521 | return FALSE; |
522 | } |
523 | |
524 | for (i = 0; i < (int) nChannelsOut; i++) |
525 | Grid->Tab.T[index + i] = Value[i]; |
526 | |
527 | return TRUE; |
528 | } |
529 | |
530 | // Auxiliary, to see if two values are equal or very different |
531 | static |
532 | cmsBool WhitesAreEqual(cmsUInt32Number n, cmsUInt16Number White1[], cmsUInt16Number White2[] ) |
533 | { |
534 | cmsUInt32Number i; |
535 | |
536 | for (i=0; i < n; i++) { |
537 | |
538 | if (abs(White1[i] - White2[i]) > 0xf000) return TRUE; // Values are so extremely different that the fixup should be avoided |
539 | if (White1[i] != White2[i]) return FALSE; |
540 | } |
541 | return TRUE; |
542 | } |
543 | |
544 | |
545 | // Locate the node for the white point and fix it to pure white in order to avoid scum dot. |
546 | static |
547 | cmsBool FixWhiteMisalignment(cmsContext ContextID, cmsPipeline* Lut, cmsColorSpaceSignature EntryColorSpace, cmsColorSpaceSignature ExitColorSpace) |
548 | { |
549 | cmsUInt16Number *WhitePointIn, *WhitePointOut; |
550 | cmsUInt16Number WhiteIn[cmsMAXCHANNELS], WhiteOut[cmsMAXCHANNELS], ObtainedOut[cmsMAXCHANNELS]; |
551 | cmsUInt32Number i, nOuts, nIns; |
552 | cmsStage *PreLin = NULL, *CLUT = NULL, *PostLin = NULL; |
553 | |
554 | if (!_cmsEndPointsBySpace(EntryColorSpace, |
555 | &WhitePointIn, NULL, &nIns)) return FALSE; |
556 | |
557 | if (!_cmsEndPointsBySpace(ExitColorSpace, |
558 | &WhitePointOut, NULL, &nOuts)) return FALSE; |
559 | |
560 | // It needs to be fixed? |
561 | if (Lut ->InputChannels != nIns) return FALSE; |
562 | if (Lut ->OutputChannels != nOuts) return FALSE; |
563 | |
564 | cmsPipelineEval16(ContextID, WhitePointIn, ObtainedOut, Lut); |
565 | |
566 | if (WhitesAreEqual(nOuts, WhitePointOut, ObtainedOut)) return TRUE; // whites already match |
567 | |
568 | // Check if the LUT comes as Prelin, CLUT or Postlin. We allow all combinations |
569 | if (!cmsPipelineCheckAndRetreiveStages(ContextID, Lut, 3, cmsSigCurveSetElemType, cmsSigCLutElemType, cmsSigCurveSetElemType, &PreLin, &CLUT, &PostLin)) |
570 | if (!cmsPipelineCheckAndRetreiveStages(ContextID, Lut, 2, cmsSigCurveSetElemType, cmsSigCLutElemType, &PreLin, &CLUT)) |
571 | if (!cmsPipelineCheckAndRetreiveStages(ContextID, Lut, 2, cmsSigCLutElemType, cmsSigCurveSetElemType, &CLUT, &PostLin)) |
572 | if (!cmsPipelineCheckAndRetreiveStages(ContextID, Lut, 1, cmsSigCLutElemType, &CLUT)) |
573 | return FALSE; |
574 | |
575 | // We need to interpolate white points of both, pre and post curves |
576 | if (PreLin) { |
577 | |
578 | cmsToneCurve** Curves = _cmsStageGetPtrToCurveSet(PreLin); |
579 | |
580 | for (i=0; i < nIns; i++) { |
581 | WhiteIn[i] = cmsEvalToneCurve16(ContextID, Curves[i], WhitePointIn[i]); |
582 | } |
583 | } |
584 | else { |
585 | for (i=0; i < nIns; i++) |
586 | WhiteIn[i] = WhitePointIn[i]; |
587 | } |
588 | |
589 | // If any post-linearization, we need to find how is represented white before the curve, do |
590 | // a reverse interpolation in this case. |
591 | if (PostLin) { |
592 | |
593 | cmsToneCurve** Curves = _cmsStageGetPtrToCurveSet(PostLin); |
594 | |
595 | for (i=0; i < nOuts; i++) { |
596 | |
597 | cmsToneCurve* InversePostLin = cmsReverseToneCurve(ContextID, Curves[i]); |
598 | if (InversePostLin == NULL) { |
599 | WhiteOut[i] = WhitePointOut[i]; |
600 | |
601 | } else { |
602 | |
603 | WhiteOut[i] = cmsEvalToneCurve16(ContextID, InversePostLin, WhitePointOut[i]); |
604 | cmsFreeToneCurve(ContextID, InversePostLin); |
605 | } |
606 | } |
607 | } |
608 | else { |
609 | for (i=0; i < nOuts; i++) |
610 | WhiteOut[i] = WhitePointOut[i]; |
611 | } |
612 | |
613 | // Ok, proceed with patching. May fail and we don't care if it fails |
614 | PatchLUT(ContextID, CLUT, WhiteIn, WhiteOut, nOuts, nIns); |
615 | |
616 | return TRUE; |
617 | } |
618 | |
619 | // ----------------------------------------------------------------------------------------------------------------------------------------------- |
620 | // This function creates simple LUT from complex ones. The generated LUT has an optional set of |
621 | // prelinearization curves, a CLUT of nGridPoints and optional postlinearization tables. |
622 | // These curves have to exist in the original LUT in order to be used in the simplified output. |
623 | // Caller may also use the flags to allow this feature. |
624 | // LUTS with all curves will be simplified to a single curve. Parametric curves are lost. |
625 | // This function should be used on 16-bits LUTS only, as floating point losses precision when simplified |
626 | // ----------------------------------------------------------------------------------------------------------------------------------------------- |
627 | |
628 | static |
629 | cmsBool OptimizeByResampling(cmsContext ContextID, cmsPipeline** Lut, cmsUInt32Number Intent, cmsUInt32Number* InputFormat, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags) |
630 | { |
631 | cmsPipeline* Src = NULL; |
632 | cmsPipeline* Dest = NULL; |
633 | cmsStage* mpe; |
634 | cmsStage* CLUT; |
635 | cmsStage *KeepPreLin = NULL, *KeepPostLin = NULL; |
636 | cmsUInt32Number nGridPoints; |
637 | cmsColorSpaceSignature ColorSpace, OutputColorSpace; |
638 | cmsStage *NewPreLin = NULL; |
639 | cmsStage *NewPostLin = NULL; |
640 | _cmsStageCLutData* DataCLUT; |
641 | cmsToneCurve** DataSetIn; |
642 | cmsToneCurve** DataSetOut; |
643 | Prelin16Data* p16; |
644 | |
645 | // This is a lossy optimization! does not apply in floating-point cases |
646 | if (_cmsFormatterIsFloat(*InputFormat) || _cmsFormatterIsFloat(*OutputFormat)) return FALSE; |
647 | |
648 | ColorSpace = _cmsICCcolorSpace(ContextID, (int) T_COLORSPACE(*InputFormat)); |
649 | OutputColorSpace = _cmsICCcolorSpace(ContextID, (int) T_COLORSPACE(*OutputFormat)); |
650 | |
651 | // Color space must be specified |
652 | if (ColorSpace == (cmsColorSpaceSignature)0 || |
653 | OutputColorSpace == (cmsColorSpaceSignature)0) return FALSE; |
654 | |
655 | nGridPoints = _cmsReasonableGridpointsByColorspace(ContextID, ColorSpace, *dwFlags); |
656 | |
657 | // For empty LUTs, 2 points are enough |
658 | if (cmsPipelineStageCount(ContextID, *Lut) == 0) |
659 | nGridPoints = 2; |
660 | |
661 | Src = *Lut; |
662 | |
663 | // Named color pipelines cannot be optimized either |
664 | for (mpe = cmsPipelineGetPtrToFirstStage(ContextID, Src); |
665 | mpe != NULL; |
666 | mpe = cmsStageNext(ContextID, mpe)) { |
667 | if (cmsStageType(ContextID, mpe) == cmsSigNamedColorElemType) return FALSE; |
668 | } |
669 | |
670 | // Allocate an empty LUT |
671 | Dest = cmsPipelineAlloc(ContextID, Src ->InputChannels, Src ->OutputChannels); |
672 | if (!Dest) return FALSE; |
673 | |
674 | // Prelinearization tables are kept unless indicated by flags |
675 | if (*dwFlags & cmsFLAGS_CLUT_PRE_LINEARIZATION) { |
676 | |
677 | // Get a pointer to the prelinearization element |
678 | cmsStage* PreLin = cmsPipelineGetPtrToFirstStage(ContextID, Src); |
679 | |
680 | // Check if suitable |
681 | if (PreLin && PreLin ->Type == cmsSigCurveSetElemType) { |
682 | |
683 | // Maybe this is a linear tram, so we can avoid the whole stuff |
684 | if (!AllCurvesAreLinear(ContextID, PreLin)) { |
685 | |
686 | // All seems ok, proceed. |
687 | NewPreLin = cmsStageDup(ContextID, PreLin); |
688 | if(!cmsPipelineInsertStage(ContextID, Dest, cmsAT_BEGIN, NewPreLin)) |
689 | goto Error; |
690 | |
691 | // Remove prelinearization. Since we have duplicated the curve |
692 | // in destination LUT, the sampling should be applied after this stage. |
693 | cmsPipelineUnlinkStage(ContextID, Src, cmsAT_BEGIN, &KeepPreLin); |
694 | } |
695 | } |
696 | } |
697 | |
698 | // Allocate the CLUT |
699 | CLUT = cmsStageAllocCLut16bit(ContextID, nGridPoints, Src ->InputChannels, Src->OutputChannels, NULL); |
700 | if (CLUT == NULL) goto Error; |
701 | |
702 | // Add the CLUT to the destination LUT |
703 | if (!cmsPipelineInsertStage(ContextID, Dest, cmsAT_END, CLUT)) { |
704 | goto Error; |
705 | } |
706 | |
707 | // Postlinearization tables are kept unless indicated by flags |
708 | if (*dwFlags & cmsFLAGS_CLUT_POST_LINEARIZATION) { |
709 | |
710 | // Get a pointer to the postlinearization if present |
711 | cmsStage* PostLin = cmsPipelineGetPtrToLastStage(ContextID, Src); |
712 | |
713 | // Check if suitable |
714 | if (PostLin && cmsStageType(ContextID, PostLin) == cmsSigCurveSetElemType) { |
715 | |
716 | // Maybe this is a linear tram, so we can avoid the whole stuff |
717 | if (!AllCurvesAreLinear(ContextID, PostLin)) { |
718 | |
719 | // All seems ok, proceed. |
720 | NewPostLin = cmsStageDup(ContextID, PostLin); |
721 | if (!cmsPipelineInsertStage(ContextID, Dest, cmsAT_END, NewPostLin)) |
722 | goto Error; |
723 | |
724 | // In destination LUT, the sampling should be applied after this stage. |
725 | cmsPipelineUnlinkStage(ContextID, Src, cmsAT_END, &KeepPostLin); |
726 | } |
727 | } |
728 | } |
729 | |
730 | // Now its time to do the sampling. We have to ignore pre/post linearization |
731 | // The source LUT without pre/post curves is passed as parameter. |
732 | if (!cmsStageSampleCLut16bit(ContextID, CLUT, XFormSampler16, (void*) Src, 0)) { |
733 | Error: |
734 | // Ops, something went wrong, Restore stages |
735 | if (KeepPreLin != NULL) { |
736 | if (!cmsPipelineInsertStage(ContextID, Src, cmsAT_BEGIN, KeepPreLin)) { |
737 | _cmsAssert(0); // This never happens |
738 | } |
739 | } |
740 | if (KeepPostLin != NULL) { |
741 | if (!cmsPipelineInsertStage(ContextID, Src, cmsAT_END, KeepPostLin)) { |
742 | _cmsAssert(0); // This never happens |
743 | } |
744 | } |
745 | cmsPipelineFree(ContextID, Dest); |
746 | return FALSE; |
747 | } |
748 | |
749 | // Done. |
750 | |
751 | if (KeepPreLin != NULL) cmsStageFree(ContextID, KeepPreLin); |
752 | if (KeepPostLin != NULL) cmsStageFree(ContextID, KeepPostLin); |
753 | cmsPipelineFree(ContextID, Src); |
754 | |
755 | DataCLUT = (_cmsStageCLutData*) CLUT ->Data; |
756 | |
757 | if (NewPreLin == NULL) DataSetIn = NULL; |
758 | else DataSetIn = ((_cmsStageToneCurvesData*) NewPreLin ->Data) ->TheCurves; |
759 | |
760 | if (NewPostLin == NULL) DataSetOut = NULL; |
761 | else DataSetOut = ((_cmsStageToneCurvesData*) NewPostLin ->Data) ->TheCurves; |
762 | |
763 | |
764 | if (DataSetIn == NULL && DataSetOut == NULL) { |
765 | |
766 | _cmsPipelineSetOptimizationParameters(ContextID, Dest, (_cmsOPTeval16Fn) DataCLUT->Params->Interpolation.Lerp16, DataCLUT->Params, NULL, NULL); |
767 | } |
768 | else { |
769 | |
770 | p16 = PrelinOpt16alloc(ContextID, |
771 | DataCLUT ->Params, |
772 | Dest ->InputChannels, |
773 | DataSetIn, |
774 | Dest ->OutputChannels, |
775 | DataSetOut); |
776 | |
777 | _cmsPipelineSetOptimizationParameters(ContextID, Dest, PrelinEval16, (void*) p16, PrelinOpt16free, Prelin16dup); |
778 | } |
779 | |
780 | |
781 | // Don't fix white on absolute colorimetric |
782 | if (Intent == INTENT_ABSOLUTE_COLORIMETRIC) |
783 | *dwFlags |= cmsFLAGS_NOWHITEONWHITEFIXUP; |
784 | |
785 | if (!(*dwFlags & cmsFLAGS_NOWHITEONWHITEFIXUP)) { |
786 | |
787 | FixWhiteMisalignment(ContextID, Dest, ColorSpace, OutputColorSpace); |
788 | } |
789 | |
790 | *Lut = Dest; |
791 | return TRUE; |
792 | |
793 | cmsUNUSED_PARAMETER(Intent); |
794 | } |
795 | |
796 | |
797 | // ----------------------------------------------------------------------------------------------------------------------------------------------- |
798 | // Fixes the gamma balancing of transform. This is described in my paper "Prelinearization Stages on |
799 | // Color-Management Application-Specific Integrated Circuits (ASICs)" presented at NIP24. It only works |
800 | // for RGB transforms. See the paper for more details |
801 | // ----------------------------------------------------------------------------------------------------------------------------------------------- |
802 | |
803 | |
804 | // Normalize endpoints by slope limiting max and min. This assures endpoints as well. |
805 | // Descending curves are handled as well. |
806 | static |
807 | void SlopeLimiting(cmsContext ContextID, cmsToneCurve* g) |
808 | { |
809 | int BeginVal, EndVal; |
810 | int AtBegin = (int) floor((cmsFloat64Number) g ->nEntries * 0.02 + 0.5); // Cutoff at 2% |
811 | int AtEnd = (int) g ->nEntries - AtBegin - 1; // And 98% |
812 | cmsFloat64Number Val, Slope, beta; |
813 | int i; |
814 | |
815 | if (cmsIsToneCurveDescending(ContextID, g)) { |
816 | BeginVal = 0xffff; EndVal = 0; |
817 | } |
818 | else { |
819 | BeginVal = 0; EndVal = 0xffff; |
820 | } |
821 | |
822 | // Compute slope and offset for begin of curve |
823 | Val = g ->Table16[AtBegin]; |
824 | Slope = (Val - BeginVal) / AtBegin; |
825 | beta = Val - Slope * AtBegin; |
826 | |
827 | for (i=0; i < AtBegin; i++) |
828 | g ->Table16[i] = _cmsQuickSaturateWord(i * Slope + beta); |
829 | |
830 | // Compute slope and offset for the end |
831 | Val = g ->Table16[AtEnd]; |
832 | Slope = (EndVal - Val) / AtBegin; // AtBegin holds the X interval, which is same in both cases |
833 | beta = Val - Slope * AtEnd; |
834 | |
835 | for (i = AtEnd; i < (int) g ->nEntries; i++) |
836 | g ->Table16[i] = _cmsQuickSaturateWord(i * Slope + beta); |
837 | } |
838 | |
839 | |
840 | // Precomputes tables for 8-bit on input devicelink. |
841 | static |
842 | Prelin8Data* PrelinOpt8alloc(cmsContext ContextID, const cmsInterpParams* p, cmsToneCurve* G[3]) |
843 | { |
844 | int i; |
845 | cmsUInt16Number Input[3]; |
846 | cmsS15Fixed16Number v1, v2, v3; |
847 | Prelin8Data* p8; |
848 | |
849 | p8 = (Prelin8Data*)_cmsMallocZero(ContextID, sizeof(Prelin8Data)); |
850 | if (p8 == NULL) return NULL; |
851 | |
852 | // Since this only works for 8 bit input, values comes always as x * 257, |
853 | // we can safely take msb byte (x << 8 + x) |
854 | |
855 | for (i=0; i < 256; i++) { |
856 | |
857 | if (G != NULL) { |
858 | |
859 | // Get 16-bit representation |
860 | Input[0] = cmsEvalToneCurve16(ContextID, G[0], FROM_8_TO_16(i)); |
861 | Input[1] = cmsEvalToneCurve16(ContextID, G[1], FROM_8_TO_16(i)); |
862 | Input[2] = cmsEvalToneCurve16(ContextID, G[2], FROM_8_TO_16(i)); |
863 | } |
864 | else { |
865 | Input[0] = FROM_8_TO_16(i); |
866 | Input[1] = FROM_8_TO_16(i); |
867 | Input[2] = FROM_8_TO_16(i); |
868 | } |
869 | |
870 | |
871 | // Move to 0..1.0 in fixed domain |
872 | v1 = _cmsToFixedDomain((int) (Input[0] * p -> Domain[0])); |
873 | v2 = _cmsToFixedDomain((int) (Input[1] * p -> Domain[1])); |
874 | v3 = _cmsToFixedDomain((int) (Input[2] * p -> Domain[2])); |
875 | |
876 | // Store the precalculated table of nodes |
877 | p8 ->X0[i] = (p->opta[2] * FIXED_TO_INT(v1)); |
878 | p8 ->Y0[i] = (p->opta[1] * FIXED_TO_INT(v2)); |
879 | p8 ->Z0[i] = (p->opta[0] * FIXED_TO_INT(v3)); |
880 | |
881 | // Store the precalculated table of offsets |
882 | p8 ->rx[i] = (cmsUInt16Number) FIXED_REST_TO_INT(v1); |
883 | p8 ->ry[i] = (cmsUInt16Number) FIXED_REST_TO_INT(v2); |
884 | p8 ->rz[i] = (cmsUInt16Number) FIXED_REST_TO_INT(v3); |
885 | } |
886 | |
887 | p8 ->ContextID = ContextID; |
888 | p8 ->p = p; |
889 | |
890 | return p8; |
891 | } |
892 | |
893 | static |
894 | void Prelin8free(cmsContext ContextID, void* ptr) |
895 | { |
896 | _cmsFree(ContextID, ptr); |
897 | } |
898 | |
899 | static |
900 | void* Prelin8dup(cmsContext ContextID, const void* ptr) |
901 | { |
902 | return _cmsDupMem(ContextID, ptr, sizeof(Prelin8Data)); |
903 | } |
904 | |
905 | |
906 | |
907 | // A optimized interpolation for 8-bit input. |
908 | #define DENS(i,j,k) (LutTable[(i)+(j)+(k)+OutChan]) |
909 | static CMS_NO_SANITIZE |
910 | void PrelinEval8(cmsContext ContextID, register const cmsUInt16Number Input[], |
911 | register cmsUInt16Number Output[], |
912 | register const void* D) |
913 | { |
914 | cmsUInt8Number r, g, b; |
915 | cmsS15Fixed16Number rx, ry, rz; |
916 | cmsS15Fixed16Number c0, c1, c2, c3, Rest; |
917 | int OutChan; |
918 | register cmsS15Fixed16Number X0, X1, Y0, Y1, Z0, Z1; |
919 | Prelin8Data* p8 = (Prelin8Data*) D; |
920 | register const cmsInterpParams* p = p8 ->p; |
921 | int TotalOut = (int) p -> nOutputs; |
922 | const cmsUInt16Number* LutTable = (const cmsUInt16Number*) p->Table; |
923 | cmsUNUSED_PARAMETER(ContextID); |
924 | |
925 | r = (cmsUInt8Number) (Input[0] >> 8); |
926 | g = (cmsUInt8Number) (Input[1] >> 8); |
927 | b = (cmsUInt8Number) (Input[2] >> 8); |
928 | |
929 | X0 = X1 = (cmsS15Fixed16Number) p8->X0[r]; |
930 | Y0 = Y1 = (cmsS15Fixed16Number) p8->Y0[g]; |
931 | Z0 = Z1 = (cmsS15Fixed16Number) p8->Z0[b]; |
932 | |
933 | rx = p8 ->rx[r]; |
934 | ry = p8 ->ry[g]; |
935 | rz = p8 ->rz[b]; |
936 | |
937 | X1 = X0 + (cmsS15Fixed16Number)((rx == 0) ? 0 : p ->opta[2]); |
938 | Y1 = Y0 + (cmsS15Fixed16Number)((ry == 0) ? 0 : p ->opta[1]); |
939 | Z1 = Z0 + (cmsS15Fixed16Number)((rz == 0) ? 0 : p ->opta[0]); |
940 | |
941 | |
942 | // These are the 6 Tetrahedral |
943 | for (OutChan=0; OutChan < TotalOut; OutChan++) { |
944 | |
945 | c0 = DENS(X0, Y0, Z0); |
946 | |
947 | if (rx >= ry && ry >= rz) |
948 | { |
949 | c1 = DENS(X1, Y0, Z0) - c0; |
950 | c2 = DENS(X1, Y1, Z0) - DENS(X1, Y0, Z0); |
951 | c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0); |
952 | } |
953 | else |
954 | if (rx >= rz && rz >= ry) |
955 | { |
956 | c1 = DENS(X1, Y0, Z0) - c0; |
957 | c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1); |
958 | c3 = DENS(X1, Y0, Z1) - DENS(X1, Y0, Z0); |
959 | } |
960 | else |
961 | if (rz >= rx && rx >= ry) |
962 | { |
963 | c1 = DENS(X1, Y0, Z1) - DENS(X0, Y0, Z1); |
964 | c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1); |
965 | c3 = DENS(X0, Y0, Z1) - c0; |
966 | } |
967 | else |
968 | if (ry >= rx && rx >= rz) |
969 | { |
970 | c1 = DENS(X1, Y1, Z0) - DENS(X0, Y1, Z0); |
971 | c2 = DENS(X0, Y1, Z0) - c0; |
972 | c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0); |
973 | } |
974 | else |
975 | if (ry >= rz && rz >= rx) |
976 | { |
977 | c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1); |
978 | c2 = DENS(X0, Y1, Z0) - c0; |
979 | c3 = DENS(X0, Y1, Z1) - DENS(X0, Y1, Z0); |
980 | } |
981 | else |
982 | if (rz >= ry && ry >= rx) |
983 | { |
984 | c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1); |
985 | c2 = DENS(X0, Y1, Z1) - DENS(X0, Y0, Z1); |
986 | c3 = DENS(X0, Y0, Z1) - c0; |
987 | } |
988 | else { |
989 | c1 = c2 = c3 = 0; |
990 | } |
991 | |
992 | Rest = c1 * rx + c2 * ry + c3 * rz + 0x8001; |
993 | Output[OutChan] = (cmsUInt16Number) (c0 + ((Rest + (Rest >> 16)) >> 16)); |
994 | |
995 | } |
996 | } |
997 | |
998 | #undef DENS |
999 | |
1000 | |
1001 | // Curves that contain wide empty areas are not optimizeable |
1002 | static |
1003 | cmsBool IsDegenerated(const cmsToneCurve* g) |
1004 | { |
1005 | cmsUInt32Number i, Zeros = 0, Poles = 0; |
1006 | cmsUInt32Number nEntries = g ->nEntries; |
1007 | |
1008 | for (i=0; i < nEntries; i++) { |
1009 | |
1010 | if (g ->Table16[i] == 0x0000) Zeros++; |
1011 | if (g ->Table16[i] == 0xffff) Poles++; |
1012 | } |
1013 | |
1014 | if (Zeros == 1 && Poles == 1) return FALSE; // For linear tables |
1015 | if (Zeros > (nEntries / 20)) return TRUE; // Degenerated, many zeros |
1016 | if (Poles > (nEntries / 20)) return TRUE; // Degenerated, many poles |
1017 | |
1018 | return FALSE; |
1019 | } |
1020 | |
1021 | // -------------------------------------------------------------------------------------------------------------- |
1022 | // We need xput over here |
1023 | |
1024 | static |
1025 | cmsBool OptimizeByComputingLinearization(cmsContext ContextID, cmsPipeline** Lut, cmsUInt32Number Intent, cmsUInt32Number* InputFormat, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags) |
1026 | { |
1027 | cmsPipeline* OriginalLut; |
1028 | cmsUInt32Number nGridPoints; |
1029 | cmsToneCurve *Trans[cmsMAXCHANNELS], *TransReverse[cmsMAXCHANNELS]; |
1030 | cmsUInt32Number t, i; |
1031 | cmsFloat32Number v, In[cmsMAXCHANNELS], Out[cmsMAXCHANNELS]; |
1032 | cmsBool lIsSuitable, lIsLinear; |
1033 | cmsPipeline* OptimizedLUT = NULL, *LutPlusCurves = NULL; |
1034 | cmsStage* OptimizedCLUTmpe; |
1035 | cmsColorSpaceSignature ColorSpace, OutputColorSpace; |
1036 | cmsStage* OptimizedPrelinMpe; |
1037 | cmsStage* mpe; |
1038 | cmsToneCurve** OptimizedPrelinCurves; |
1039 | _cmsStageCLutData* OptimizedPrelinCLUT; |
1040 | |
1041 | |
1042 | // This is a lossy optimization! does not apply in floating-point cases |
1043 | if (_cmsFormatterIsFloat(*InputFormat) || _cmsFormatterIsFloat(*OutputFormat)) return FALSE; |
1044 | |
1045 | // Only on chunky RGB |
1046 | if (T_COLORSPACE(*InputFormat) != PT_RGB) return FALSE; |
1047 | if (T_PLANAR(*InputFormat)) return FALSE; |
1048 | |
1049 | if (T_COLORSPACE(*OutputFormat) != PT_RGB) return FALSE; |
1050 | if (T_PLANAR(*OutputFormat)) return FALSE; |
1051 | |
1052 | // On 16 bits, user has to specify the feature |
1053 | if (!_cmsFormatterIs8bit(*InputFormat)) { |
1054 | if (!(*dwFlags & cmsFLAGS_CLUT_PRE_LINEARIZATION)) return FALSE; |
1055 | } |
1056 | |
1057 | OriginalLut = *Lut; |
1058 | |
1059 | // Named color pipelines cannot be optimized either |
1060 | for (mpe = cmsPipelineGetPtrToFirstStage(ContextID, OriginalLut); |
1061 | mpe != NULL; |
1062 | mpe = cmsStageNext(ContextID, mpe)) { |
1063 | if (cmsStageType(ContextID, mpe) == cmsSigNamedColorElemType) return FALSE; |
1064 | } |
1065 | |
1066 | ColorSpace = _cmsICCcolorSpace(ContextID, (int) T_COLORSPACE(*InputFormat)); |
1067 | OutputColorSpace = _cmsICCcolorSpace(ContextID, (int) T_COLORSPACE(*OutputFormat)); |
1068 | |
1069 | // Color space must be specified |
1070 | if (ColorSpace == (cmsColorSpaceSignature)0 || |
1071 | OutputColorSpace == (cmsColorSpaceSignature)0) return FALSE; |
1072 | |
1073 | nGridPoints = _cmsReasonableGridpointsByColorspace(ContextID, ColorSpace, *dwFlags); |
1074 | |
1075 | // Empty gamma containers |
1076 | memset(Trans, 0, sizeof(Trans)); |
1077 | memset(TransReverse, 0, sizeof(TransReverse)); |
1078 | |
1079 | // If the last stage of the original lut are curves, and those curves are |
1080 | // degenerated, it is likely the transform is squeezing and clipping |
1081 | // the output from previous CLUT. We cannot optimize this case |
1082 | { |
1083 | cmsStage* last = cmsPipelineGetPtrToLastStage(ContextID, OriginalLut); |
1084 | |
1085 | if (cmsStageType(ContextID, last) == cmsSigCurveSetElemType) { |
1086 | |
1087 | _cmsStageToneCurvesData* Data = (_cmsStageToneCurvesData*)cmsStageData(ContextID, last); |
1088 | for (i = 0; i < Data->nCurves; i++) { |
1089 | if (IsDegenerated(Data->TheCurves[i])) |
1090 | goto Error; |
1091 | } |
1092 | } |
1093 | } |
1094 | |
1095 | for (t = 0; t < OriginalLut ->InputChannels; t++) { |
1096 | Trans[t] = cmsBuildTabulatedToneCurve16(ContextID, PRELINEARIZATION_POINTS, NULL); |
1097 | if (Trans[t] == NULL) goto Error; |
1098 | } |
1099 | |
1100 | // Populate the curves |
1101 | for (i=0; i < PRELINEARIZATION_POINTS; i++) { |
1102 | |
1103 | v = (cmsFloat32Number) ((cmsFloat64Number) i / (PRELINEARIZATION_POINTS - 1)); |
1104 | |
1105 | // Feed input with a gray ramp |
1106 | for (t=0; t < OriginalLut ->InputChannels; t++) |
1107 | In[t] = v; |
1108 | |
1109 | // Evaluate the gray value |
1110 | cmsPipelineEvalFloat(ContextID, In, Out, OriginalLut); |
1111 | |
1112 | // Store result in curve |
1113 | for (t=0; t < OriginalLut ->InputChannels; t++) |
1114 | Trans[t] ->Table16[i] = _cmsQuickSaturateWord(Out[t] * 65535.0); |
1115 | } |
1116 | |
1117 | // Slope-limit the obtained curves |
1118 | for (t = 0; t < OriginalLut ->InputChannels; t++) |
1119 | SlopeLimiting(ContextID, Trans[t]); |
1120 | |
1121 | // Check for validity |
1122 | lIsSuitable = TRUE; |
1123 | lIsLinear = TRUE; |
1124 | for (t=0; (lIsSuitable && (t < OriginalLut ->InputChannels)); t++) { |
1125 | |
1126 | // Exclude if already linear |
1127 | if (!cmsIsToneCurveLinear(ContextID, Trans[t])) |
1128 | lIsLinear = FALSE; |
1129 | |
1130 | // Exclude if non-monotonic |
1131 | if (!cmsIsToneCurveMonotonic(ContextID, Trans[t])) |
1132 | lIsSuitable = FALSE; |
1133 | |
1134 | if (IsDegenerated(Trans[t])) |
1135 | lIsSuitable = FALSE; |
1136 | } |
1137 | |
1138 | // If it is not suitable, just quit |
1139 | if (!lIsSuitable) goto Error; |
1140 | |
1141 | // Invert curves if possible |
1142 | for (t = 0; t < OriginalLut ->InputChannels; t++) { |
1143 | TransReverse[t] = cmsReverseToneCurveEx(ContextID, PRELINEARIZATION_POINTS, Trans[t]); |
1144 | if (TransReverse[t] == NULL) goto Error; |
1145 | } |
1146 | |
1147 | // Now inset the reversed curves at the begin of transform |
1148 | LutPlusCurves = cmsPipelineDup(ContextID, OriginalLut); |
1149 | if (LutPlusCurves == NULL) goto Error; |
1150 | |
1151 | if (!cmsPipelineInsertStage(ContextID, LutPlusCurves, cmsAT_BEGIN, cmsStageAllocToneCurves(ContextID, OriginalLut ->InputChannels, TransReverse))) |
1152 | goto Error; |
1153 | |
1154 | // Create the result LUT |
1155 | OptimizedLUT = cmsPipelineAlloc(ContextID, OriginalLut ->InputChannels, OriginalLut ->OutputChannels); |
1156 | if (OptimizedLUT == NULL) goto Error; |
1157 | |
1158 | OptimizedPrelinMpe = cmsStageAllocToneCurves(ContextID, OriginalLut ->InputChannels, Trans); |
1159 | |
1160 | // Create and insert the curves at the beginning |
1161 | if (!cmsPipelineInsertStage(ContextID, OptimizedLUT, cmsAT_BEGIN, OptimizedPrelinMpe)) |
1162 | goto Error; |
1163 | |
1164 | // Allocate the CLUT for result |
1165 | OptimizedCLUTmpe = cmsStageAllocCLut16bit(ContextID, nGridPoints, OriginalLut ->InputChannels, OriginalLut ->OutputChannels, NULL); |
1166 | |
1167 | // Add the CLUT to the destination LUT |
1168 | if (!cmsPipelineInsertStage(ContextID, OptimizedLUT, cmsAT_END, OptimizedCLUTmpe)) |
1169 | goto Error; |
1170 | |
1171 | // Resample the LUT |
1172 | if (!cmsStageSampleCLut16bit(ContextID, OptimizedCLUTmpe, XFormSampler16, (void*) LutPlusCurves, 0)) goto Error; |
1173 | |
1174 | // Free resources |
1175 | for (t = 0; t < OriginalLut ->InputChannels; t++) { |
1176 | |
1177 | if (Trans[t]) cmsFreeToneCurve(ContextID, Trans[t]); |
1178 | if (TransReverse[t]) cmsFreeToneCurve(ContextID, TransReverse[t]); |
1179 | } |
1180 | |
1181 | cmsPipelineFree(ContextID, LutPlusCurves); |
1182 | |
1183 | |
1184 | OptimizedPrelinCurves = _cmsStageGetPtrToCurveSet(OptimizedPrelinMpe); |
1185 | OptimizedPrelinCLUT = (_cmsStageCLutData*) OptimizedCLUTmpe ->Data; |
1186 | |
1187 | // Set the evaluator if 8-bit |
1188 | if (_cmsFormatterIs8bit(*InputFormat)) { |
1189 | |
1190 | Prelin8Data* p8 = PrelinOpt8alloc(ContextID, |
1191 | OptimizedPrelinCLUT ->Params, |
1192 | OptimizedPrelinCurves); |
1193 | if (p8 == NULL) return FALSE; |
1194 | |
1195 | _cmsPipelineSetOptimizationParameters(ContextID, OptimizedLUT, PrelinEval8, (void*) p8, Prelin8free, Prelin8dup); |
1196 | |
1197 | } |
1198 | else |
1199 | { |
1200 | Prelin16Data* p16 = PrelinOpt16alloc(ContextID, |
1201 | OptimizedPrelinCLUT ->Params, |
1202 | 3, OptimizedPrelinCurves, 3, NULL); |
1203 | if (p16 == NULL) return FALSE; |
1204 | |
1205 | _cmsPipelineSetOptimizationParameters(ContextID, OptimizedLUT, PrelinEval16, (void*) p16, PrelinOpt16free, Prelin16dup); |
1206 | |
1207 | } |
1208 | |
1209 | // Don't fix white on absolute colorimetric |
1210 | if (Intent == INTENT_ABSOLUTE_COLORIMETRIC) |
1211 | *dwFlags |= cmsFLAGS_NOWHITEONWHITEFIXUP; |
1212 | |
1213 | if (!(*dwFlags & cmsFLAGS_NOWHITEONWHITEFIXUP)) { |
1214 | |
1215 | if (!FixWhiteMisalignment(ContextID, OptimizedLUT, ColorSpace, OutputColorSpace)) { |
1216 | |
1217 | return FALSE; |
1218 | } |
1219 | } |
1220 | |
1221 | // And return the obtained LUT |
1222 | |
1223 | cmsPipelineFree(ContextID, OriginalLut); |
1224 | *Lut = OptimizedLUT; |
1225 | return TRUE; |
1226 | |
1227 | Error: |
1228 | |
1229 | for (t = 0; t < OriginalLut ->InputChannels; t++) { |
1230 | |
1231 | if (Trans[t]) cmsFreeToneCurve(ContextID, Trans[t]); |
1232 | if (TransReverse[t]) cmsFreeToneCurve(ContextID, TransReverse[t]); |
1233 | } |
1234 | |
1235 | if (LutPlusCurves != NULL) cmsPipelineFree(ContextID, LutPlusCurves); |
1236 | if (OptimizedLUT != NULL) cmsPipelineFree(ContextID, OptimizedLUT); |
1237 | |
1238 | return FALSE; |
1239 | |
1240 | cmsUNUSED_PARAMETER(Intent); |
1241 | cmsUNUSED_PARAMETER(lIsLinear); |
1242 | } |
1243 | |
1244 | |
1245 | // Curves optimizer ------------------------------------------------------------------------------------------------------------------ |
1246 | |
1247 | static |
1248 | void CurvesFree(cmsContext ContextID, void* ptr) |
1249 | { |
1250 | Curves16Data* Data = (Curves16Data*) ptr; |
1251 | cmsUInt32Number i; |
1252 | |
1253 | for (i=0; i < Data -> nCurves; i++) { |
1254 | |
1255 | _cmsFree(ContextID, Data ->Curves[i]); |
1256 | } |
1257 | |
1258 | _cmsFree(ContextID, Data ->Curves); |
1259 | _cmsFree(ContextID, ptr); |
1260 | } |
1261 | |
1262 | static |
1263 | void* CurvesDup(cmsContext ContextID, const void* ptr) |
1264 | { |
1265 | Curves16Data* Data = (Curves16Data*)_cmsDupMem(ContextID, ptr, sizeof(Curves16Data)); |
1266 | cmsUInt32Number i; |
1267 | |
1268 | if (Data == NULL) return NULL; |
1269 | |
1270 | Data->Curves = (cmsUInt16Number**) _cmsDupMem(ContextID, Data->Curves, Data->nCurves * sizeof(cmsUInt16Number*)); |
1271 | |
1272 | for (i=0; i < Data -> nCurves; i++) { |
1273 | Data->Curves[i] = (cmsUInt16Number*) _cmsDupMem(ContextID, Data->Curves[i], Data->nElements * sizeof(cmsUInt16Number)); |
1274 | } |
1275 | |
1276 | return (void*) Data; |
1277 | } |
1278 | |
1279 | // Precomputes tables for 8-bit on input devicelink. |
1280 | static |
1281 | Curves16Data* CurvesAlloc(cmsContext ContextID, cmsUInt32Number nCurves, cmsUInt32Number nElements, cmsToneCurve** G) |
1282 | { |
1283 | cmsUInt32Number i, j; |
1284 | Curves16Data* c16; |
1285 | |
1286 | c16 = (Curves16Data*)_cmsMallocZero(ContextID, sizeof(Curves16Data)); |
1287 | if (c16 == NULL) return NULL; |
1288 | |
1289 | c16 ->nCurves = nCurves; |
1290 | c16 ->nElements = nElements; |
1291 | |
1292 | c16->Curves = (cmsUInt16Number**) _cmsCalloc(ContextID, nCurves, sizeof(cmsUInt16Number*)); |
1293 | if (c16->Curves == NULL) { |
1294 | _cmsFree(ContextID, c16); |
1295 | return NULL; |
1296 | } |
1297 | |
1298 | for (i=0; i < nCurves; i++) { |
1299 | |
1300 | c16->Curves[i] = (cmsUInt16Number*) _cmsCalloc(ContextID, nElements, sizeof(cmsUInt16Number)); |
1301 | |
1302 | if (c16->Curves[i] == NULL) { |
1303 | |
1304 | for (j=0; j < i; j++) { |
1305 | _cmsFree(ContextID, c16->Curves[j]); |
1306 | } |
1307 | _cmsFree(ContextID, c16->Curves); |
1308 | _cmsFree(ContextID, c16); |
1309 | return NULL; |
1310 | } |
1311 | |
1312 | if (nElements == 256U) { |
1313 | |
1314 | for (j=0; j < nElements; j++) { |
1315 | |
1316 | c16 ->Curves[i][j] = cmsEvalToneCurve16(ContextID, G[i], FROM_8_TO_16(j)); |
1317 | } |
1318 | } |
1319 | else { |
1320 | |
1321 | for (j=0; j < nElements; j++) { |
1322 | c16 ->Curves[i][j] = cmsEvalToneCurve16(ContextID, G[i], (cmsUInt16Number) j); |
1323 | } |
1324 | } |
1325 | } |
1326 | |
1327 | return c16; |
1328 | } |
1329 | |
1330 | static |
1331 | void FastEvaluateCurves8(cmsContext ContextID, register const cmsUInt16Number In[], |
1332 | register cmsUInt16Number Out[], |
1333 | register const void* D) |
1334 | { |
1335 | Curves16Data* Data = (Curves16Data*) D; |
1336 | int x; |
1337 | cmsUInt32Number i; |
1338 | cmsUNUSED_PARAMETER(ContextID); |
1339 | |
1340 | for (i=0; i < Data ->nCurves; i++) { |
1341 | |
1342 | x = (In[i] >> 8); |
1343 | Out[i] = Data -> Curves[i][x]; |
1344 | } |
1345 | } |
1346 | |
1347 | |
1348 | static |
1349 | void FastEvaluateCurves16(cmsContext ContextID, register const cmsUInt16Number In[], |
1350 | register cmsUInt16Number Out[], |
1351 | register const void* D) |
1352 | { |
1353 | Curves16Data* Data = (Curves16Data*) D; |
1354 | cmsUInt32Number i; |
1355 | cmsUNUSED_PARAMETER(ContextID); |
1356 | |
1357 | for (i=0; i < Data ->nCurves; i++) { |
1358 | Out[i] = Data -> Curves[i][In[i]]; |
1359 | } |
1360 | } |
1361 | |
1362 | |
1363 | static |
1364 | void FastIdentity16(cmsContext ContextID, register const cmsUInt16Number In[], |
1365 | register cmsUInt16Number Out[], |
1366 | register const void* D) |
1367 | { |
1368 | cmsPipeline* Lut = (cmsPipeline*) D; |
1369 | cmsUInt32Number i; |
1370 | cmsUNUSED_PARAMETER(ContextID); |
1371 | |
1372 | for (i=0; i < Lut ->InputChannels; i++) { |
1373 | Out[i] = In[i]; |
1374 | } |
1375 | } |
1376 | |
1377 | |
1378 | // If the target LUT holds only curves, the optimization procedure is to join all those |
1379 | // curves together. That only works on curves and does not work on matrices. |
1380 | static |
1381 | cmsBool OptimizeByJoiningCurves(cmsContext ContextID, cmsPipeline** Lut, cmsUInt32Number Intent, cmsUInt32Number* InputFormat, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags) |
1382 | { |
1383 | cmsToneCurve** GammaTables = NULL; |
1384 | cmsFloat32Number InFloat[cmsMAXCHANNELS], OutFloat[cmsMAXCHANNELS]; |
1385 | cmsUInt32Number i, j; |
1386 | cmsPipeline* Src = *Lut; |
1387 | cmsPipeline* Dest = NULL; |
1388 | cmsStage* mpe; |
1389 | cmsStage* ObtainedCurves = NULL; |
1390 | |
1391 | |
1392 | // This is a lossy optimization! does not apply in floating-point cases |
1393 | if (_cmsFormatterIsFloat(*InputFormat) || _cmsFormatterIsFloat(*OutputFormat)) return FALSE; |
1394 | |
1395 | // Only curves in this LUT? |
1396 | for (mpe = cmsPipelineGetPtrToFirstStage(ContextID, Src); |
1397 | mpe != NULL; |
1398 | mpe = cmsStageNext(ContextID, mpe)) { |
1399 | if (cmsStageType(ContextID, mpe) != cmsSigCurveSetElemType) return FALSE; |
1400 | } |
1401 | |
1402 | // Allocate an empty LUT |
1403 | Dest = cmsPipelineAlloc(ContextID, Src ->InputChannels, Src ->OutputChannels); |
1404 | if (Dest == NULL) return FALSE; |
1405 | |
1406 | // Create target curves |
1407 | GammaTables = (cmsToneCurve**) _cmsCalloc(ContextID, Src ->InputChannels, sizeof(cmsToneCurve*)); |
1408 | if (GammaTables == NULL) goto Error; |
1409 | |
1410 | for (i=0; i < Src ->InputChannels; i++) { |
1411 | GammaTables[i] = cmsBuildTabulatedToneCurve16(ContextID, PRELINEARIZATION_POINTS, NULL); |
1412 | if (GammaTables[i] == NULL) goto Error; |
1413 | } |
1414 | |
1415 | // Compute 16 bit result by using floating point |
1416 | for (i=0; i < PRELINEARIZATION_POINTS; i++) { |
1417 | |
1418 | for (j=0; j < Src ->InputChannels; j++) |
1419 | InFloat[j] = (cmsFloat32Number) ((cmsFloat64Number) i / (PRELINEARIZATION_POINTS - 1)); |
1420 | |
1421 | cmsPipelineEvalFloat(ContextID, InFloat, OutFloat, Src); |
1422 | |
1423 | for (j=0; j < Src ->InputChannels; j++) |
1424 | GammaTables[j] -> Table16[i] = _cmsQuickSaturateWord(OutFloat[j] * 65535.0); |
1425 | } |
1426 | |
1427 | ObtainedCurves = cmsStageAllocToneCurves(ContextID, Src ->InputChannels, GammaTables); |
1428 | if (ObtainedCurves == NULL) goto Error; |
1429 | |
1430 | for (i=0; i < Src ->InputChannels; i++) { |
1431 | cmsFreeToneCurve(ContextID, GammaTables[i]); |
1432 | GammaTables[i] = NULL; |
1433 | } |
1434 | |
1435 | if (GammaTables != NULL) { |
1436 | _cmsFree(ContextID, GammaTables); |
1437 | GammaTables = NULL; |
1438 | } |
1439 | |
1440 | // Maybe the curves are linear at the end |
1441 | if (!AllCurvesAreLinear(ContextID, ObtainedCurves)) { |
1442 | _cmsStageToneCurvesData* Data; |
1443 | |
1444 | if (!cmsPipelineInsertStage(ContextID, Dest, cmsAT_BEGIN, ObtainedCurves)) |
1445 | goto Error; |
1446 | Data = (_cmsStageToneCurvesData*) cmsStageData(ContextID, ObtainedCurves); |
1447 | ObtainedCurves = NULL; |
1448 | |
1449 | // If the curves are to be applied in 8 bits, we can save memory |
1450 | if (_cmsFormatterIs8bit(*InputFormat)) { |
1451 | Curves16Data* c16 = CurvesAlloc(ContextID, Data ->nCurves, 256, Data ->TheCurves); |
1452 | |
1453 | if (c16 == NULL) goto Error; |
1454 | *dwFlags |= cmsFLAGS_NOCACHE; |
1455 | _cmsPipelineSetOptimizationParameters(ContextID, Dest, FastEvaluateCurves8, c16, CurvesFree, CurvesDup); |
1456 | |
1457 | } |
1458 | else { |
1459 | Curves16Data* c16 = CurvesAlloc(ContextID, Data ->nCurves, 65536, Data ->TheCurves); |
1460 | |
1461 | if (c16 == NULL) goto Error; |
1462 | *dwFlags |= cmsFLAGS_NOCACHE; |
1463 | _cmsPipelineSetOptimizationParameters(ContextID, Dest, FastEvaluateCurves16, c16, CurvesFree, CurvesDup); |
1464 | } |
1465 | } |
1466 | else { |
1467 | |
1468 | // LUT optimizes to nothing. Set the identity LUT |
1469 | cmsStageFree(ContextID, ObtainedCurves); |
1470 | ObtainedCurves = NULL; |
1471 | |
1472 | if (!cmsPipelineInsertStage(ContextID, Dest, cmsAT_BEGIN, cmsStageAllocIdentity(ContextID, Src ->InputChannels))) |
1473 | goto Error; |
1474 | |
1475 | *dwFlags |= cmsFLAGS_NOCACHE; |
1476 | _cmsPipelineSetOptimizationParameters(ContextID, Dest, FastIdentity16, (void*) Dest, NULL, NULL); |
1477 | } |
1478 | |
1479 | // We are done. |
1480 | cmsPipelineFree(ContextID, Src); |
1481 | *Lut = Dest; |
1482 | return TRUE; |
1483 | |
1484 | Error: |
1485 | |
1486 | if (ObtainedCurves != NULL) cmsStageFree(ContextID, ObtainedCurves); |
1487 | if (GammaTables != NULL) { |
1488 | for (i=0; i < Src ->InputChannels; i++) { |
1489 | if (GammaTables[i] != NULL) cmsFreeToneCurve(ContextID, GammaTables[i]); |
1490 | } |
1491 | |
1492 | _cmsFree(ContextID, GammaTables); |
1493 | } |
1494 | |
1495 | if (Dest != NULL) cmsPipelineFree(ContextID, Dest); |
1496 | return FALSE; |
1497 | |
1498 | cmsUNUSED_PARAMETER(Intent); |
1499 | cmsUNUSED_PARAMETER(InputFormat); |
1500 | cmsUNUSED_PARAMETER(OutputFormat); |
1501 | cmsUNUSED_PARAMETER(dwFlags); |
1502 | } |
1503 | |
1504 | // ------------------------------------------------------------------------------------------------------------------------------------- |
1505 | // LUT is Shaper - Matrix - Matrix - Shaper, which is very frequent when combining two matrix-shaper profiles |
1506 | |
1507 | |
1508 | static |
1509 | void FreeMatShaper(cmsContext ContextID, void* Data) |
1510 | { |
1511 | if (Data != NULL) _cmsFree(ContextID, Data); |
1512 | } |
1513 | |
1514 | static |
1515 | void* DupMatShaper(cmsContext ContextID, const void* Data) |
1516 | { |
1517 | return _cmsDupMem(ContextID, Data, sizeof(MatShaper8Data)); |
1518 | } |
1519 | |
1520 | |
1521 | // A fast matrix-shaper evaluator for 8 bits. This is a bit ticky since I'm using 1.14 signed fixed point |
1522 | // to accomplish some performance. Actually it takes 256x3 16 bits tables and 16385 x 3 tables of 8 bits, |
1523 | // in total about 50K, and the performance boost is huge! |
1524 | static |
1525 | void MatShaperEval16(cmsContext ContextID, register const cmsUInt16Number In[], |
1526 | register cmsUInt16Number Out[], |
1527 | register const void* D) |
1528 | { |
1529 | MatShaper8Data* p = (MatShaper8Data*) D; |
1530 | cmsS1Fixed14Number l1, l2, l3, r, g, b; |
1531 | cmsUInt32Number ri, gi, bi; |
1532 | cmsUNUSED_PARAMETER(ContextID); |
1533 | |
1534 | // In this case (and only in this case!) we can use this simplification since |
1535 | // In[] is assured to come from a 8 bit number. (a << 8 | a) |
1536 | ri = In[0] & 0xFFU; |
1537 | gi = In[1] & 0xFFU; |
1538 | bi = In[2] & 0xFFU; |
1539 | |
1540 | // Across first shaper, which also converts to 1.14 fixed point |
1541 | r = p->Shaper1R[ri]; |
1542 | g = p->Shaper1G[gi]; |
1543 | b = p->Shaper1B[bi]; |
1544 | |
1545 | // Evaluate the matrix in 1.14 fixed point |
1546 | l1 = (p->Mat[0][0] * r + p->Mat[0][1] * g + p->Mat[0][2] * b + p->Off[0] + 0x2000) >> 14; |
1547 | l2 = (p->Mat[1][0] * r + p->Mat[1][1] * g + p->Mat[1][2] * b + p->Off[1] + 0x2000) >> 14; |
1548 | l3 = (p->Mat[2][0] * r + p->Mat[2][1] * g + p->Mat[2][2] * b + p->Off[2] + 0x2000) >> 14; |
1549 | |
1550 | // Now we have to clip to 0..1.0 range |
1551 | ri = (l1 < 0) ? 0 : ((l1 > 16384) ? 16384U : (cmsUInt32Number) l1); |
1552 | gi = (l2 < 0) ? 0 : ((l2 > 16384) ? 16384U : (cmsUInt32Number) l2); |
1553 | bi = (l3 < 0) ? 0 : ((l3 > 16384) ? 16384U : (cmsUInt32Number) l3); |
1554 | |
1555 | // And across second shaper, |
1556 | Out[0] = p->Shaper2R[ri]; |
1557 | Out[1] = p->Shaper2G[gi]; |
1558 | Out[2] = p->Shaper2B[bi]; |
1559 | |
1560 | } |
1561 | |
1562 | // This table converts from 8 bits to 1.14 after applying the curve |
1563 | static |
1564 | void FillFirstShaper(cmsContext ContextID, cmsS1Fixed14Number* Table, cmsToneCurve* Curve) |
1565 | { |
1566 | int i; |
1567 | cmsFloat32Number R, y; |
1568 | |
1569 | for (i=0; i < 256; i++) { |
1570 | |
1571 | R = (cmsFloat32Number) (i / 255.0); |
1572 | y = cmsEvalToneCurveFloat(ContextID, Curve, R); |
1573 | |
1574 | if (y < 131072.0) |
1575 | Table[i] = DOUBLE_TO_1FIXED14(y); |
1576 | else |
1577 | Table[i] = 0x7fffffff; |
1578 | } |
1579 | } |
1580 | |
1581 | // This table converts form 1.14 (being 0x4000 the last entry) to 8 bits after applying the curve |
1582 | static |
1583 | void FillSecondShaper(cmsContext ContextID, cmsUInt16Number* Table, cmsToneCurve* Curve, cmsBool Is8BitsOutput) |
1584 | { |
1585 | int i; |
1586 | cmsFloat32Number R, Val; |
1587 | |
1588 | for (i=0; i < 16385; i++) { |
1589 | |
1590 | R = (cmsFloat32Number) (i / 16384.0); |
1591 | Val = cmsEvalToneCurveFloat(ContextID, Curve, R); // Val comes 0..1.0 |
1592 | |
1593 | if (Val < 0) |
1594 | Val = 0; |
1595 | |
1596 | if (Val > 1.0) |
1597 | Val = 1.0; |
1598 | |
1599 | if (Is8BitsOutput) { |
1600 | |
1601 | // If 8 bits output, we can optimize further by computing the / 257 part. |
1602 | // first we compute the resulting byte and then we store the byte times |
1603 | // 257. This quantization allows to round very quick by doing a >> 8, but |
1604 | // since the low byte is always equal to msb, we can do a & 0xff and this works! |
1605 | cmsUInt16Number w = _cmsQuickSaturateWord(Val * 65535.0); |
1606 | cmsUInt8Number b = FROM_16_TO_8(w); |
1607 | |
1608 | Table[i] = FROM_8_TO_16(b); |
1609 | } |
1610 | else Table[i] = _cmsQuickSaturateWord(Val * 65535.0); |
1611 | } |
1612 | } |
1613 | |
1614 | // Compute the matrix-shaper structure |
1615 | static |
1616 | cmsBool SetMatShaper(cmsContext ContextID, cmsPipeline* Dest, cmsToneCurve* Curve1[3], cmsMAT3* Mat, cmsVEC3* Off, cmsToneCurve* Curve2[3], cmsUInt32Number* OutputFormat) |
1617 | { |
1618 | MatShaper8Data* p; |
1619 | int i, j; |
1620 | cmsBool Is8Bits = _cmsFormatterIs8bit(*OutputFormat); |
1621 | |
1622 | // Allocate a big chuck of memory to store precomputed tables |
1623 | p = (MatShaper8Data*) _cmsMalloc(ContextID, sizeof(MatShaper8Data)); |
1624 | if (p == NULL) return FALSE; |
1625 | |
1626 | // Precompute tables |
1627 | FillFirstShaper(ContextID, p ->Shaper1R, Curve1[0]); |
1628 | FillFirstShaper(ContextID, p ->Shaper1G, Curve1[1]); |
1629 | FillFirstShaper(ContextID, p ->Shaper1B, Curve1[2]); |
1630 | |
1631 | FillSecondShaper(ContextID, p ->Shaper2R, Curve2[0], Is8Bits); |
1632 | FillSecondShaper(ContextID, p ->Shaper2G, Curve2[1], Is8Bits); |
1633 | FillSecondShaper(ContextID, p ->Shaper2B, Curve2[2], Is8Bits); |
1634 | |
1635 | // Convert matrix to nFixed14. Note that those values may take more than 16 bits |
1636 | for (i=0; i < 3; i++) { |
1637 | for (j=0; j < 3; j++) { |
1638 | p ->Mat[i][j] = DOUBLE_TO_1FIXED14(Mat->v[i].n[j]); |
1639 | } |
1640 | } |
1641 | |
1642 | for (i=0; i < 3; i++) { |
1643 | |
1644 | if (Off == NULL) { |
1645 | p ->Off[i] = 0; |
1646 | } |
1647 | else { |
1648 | p ->Off[i] = DOUBLE_TO_1FIXED14(Off->n[i]); |
1649 | } |
1650 | } |
1651 | |
1652 | // Mark as optimized for faster formatter |
1653 | if (Is8Bits) |
1654 | *OutputFormat |= OPTIMIZED_SH(1); |
1655 | |
1656 | // Fill function pointers |
1657 | _cmsPipelineSetOptimizationParameters(ContextID, Dest, MatShaperEval16, (void*) p, FreeMatShaper, DupMatShaper); |
1658 | return TRUE; |
1659 | } |
1660 | |
1661 | // 8 bits on input allows matrix-shaper boot up to 25 Mpixels per second on RGB. That's fast! |
1662 | static |
1663 | cmsBool OptimizeMatrixShaper(cmsContext ContextID, cmsPipeline** Lut, cmsUInt32Number Intent, cmsUInt32Number* InputFormat, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags) |
1664 | { |
1665 | cmsStage* Curve1, *Curve2; |
1666 | cmsStage* Matrix1, *Matrix2; |
1667 | cmsMAT3 res; |
1668 | cmsBool IdentityMat; |
1669 | cmsPipeline* Dest, *Src; |
1670 | cmsFloat64Number* Offset; |
1671 | |
1672 | // Only works on RGB to RGB |
1673 | if (T_CHANNELS(*InputFormat) != 3 || T_CHANNELS(*OutputFormat) != 3) return FALSE; |
1674 | |
1675 | // Only works on 8 bit input |
1676 | if (!_cmsFormatterIs8bit(*InputFormat)) return FALSE; |
1677 | |
1678 | // Seems suitable, proceed |
1679 | Src = *Lut; |
1680 | |
1681 | // Check for: |
1682 | // |
1683 | // shaper-matrix-matrix-shaper |
1684 | // shaper-matrix-shaper |
1685 | // |
1686 | // Both of those constructs are possible (first because abs. colorimetric). |
1687 | // additionally, In the first case, the input matrix offset should be zero. |
1688 | |
1689 | IdentityMat = FALSE; |
1690 | if (cmsPipelineCheckAndRetreiveStages(ContextID, Src, 4, |
1691 | cmsSigCurveSetElemType, cmsSigMatrixElemType, cmsSigMatrixElemType, cmsSigCurveSetElemType, |
1692 | &Curve1, &Matrix1, &Matrix2, &Curve2)) { |
1693 | |
1694 | // Get both matrices |
1695 | _cmsStageMatrixData* Data1 = (_cmsStageMatrixData*)cmsStageData(ContextID, Matrix1); |
1696 | _cmsStageMatrixData* Data2 = (_cmsStageMatrixData*)cmsStageData(ContextID, Matrix2); |
1697 | |
1698 | // Input offset should be zero |
1699 | if (Data1->Offset != NULL) return FALSE; |
1700 | |
1701 | // Multiply both matrices to get the result |
1702 | _cmsMAT3per(ContextID, &res, (cmsMAT3*)Data2->Double, (cmsMAT3*)Data1->Double); |
1703 | |
1704 | // Only 2nd matrix has offset, or it is zero |
1705 | Offset = Data2->Offset; |
1706 | |
1707 | // Now the result is in res + Data2 -> Offset. Maybe is a plain identity? |
1708 | if (_cmsMAT3isIdentity(ContextID, &res) && Offset == NULL) { |
1709 | |
1710 | // We can get rid of full matrix |
1711 | IdentityMat = TRUE; |
1712 | } |
1713 | |
1714 | } |
1715 | else { |
1716 | |
1717 | if (cmsPipelineCheckAndRetreiveStages(ContextID, Src, 3, |
1718 | cmsSigCurveSetElemType, cmsSigMatrixElemType, cmsSigCurveSetElemType, |
1719 | &Curve1, &Matrix1, &Curve2)) { |
1720 | |
1721 | _cmsStageMatrixData* Data = (_cmsStageMatrixData*)cmsStageData(ContextID, Matrix1); |
1722 | |
1723 | // Copy the matrix to our result |
1724 | memcpy(&res, Data->Double, sizeof(res)); |
1725 | |
1726 | // Preserve the Odffset (may be NULL as a zero offset) |
1727 | Offset = Data->Offset; |
1728 | |
1729 | if (_cmsMAT3isIdentity(ContextID, &res) && Offset == NULL) { |
1730 | |
1731 | // We can get rid of full matrix |
1732 | IdentityMat = TRUE; |
1733 | } |
1734 | } |
1735 | else |
1736 | return FALSE; // Not optimizeable this time |
1737 | |
1738 | } |
1739 | |
1740 | // Allocate an empty LUT |
1741 | Dest = cmsPipelineAlloc(ContextID, Src ->InputChannels, Src ->OutputChannels); |
1742 | if (!Dest) return FALSE; |
1743 | |
1744 | // Assamble the new LUT |
1745 | if (!cmsPipelineInsertStage(ContextID, Dest, cmsAT_BEGIN, cmsStageDup(ContextID, Curve1))) |
1746 | goto Error; |
1747 | |
1748 | if (!IdentityMat) { |
1749 | |
1750 | if (!cmsPipelineInsertStage(ContextID, Dest, cmsAT_END, cmsStageAllocMatrix(ContextID, 3, 3, (const cmsFloat64Number*)&res, Offset))) |
1751 | goto Error; |
1752 | } |
1753 | |
1754 | if (!cmsPipelineInsertStage(ContextID, Dest, cmsAT_END, cmsStageDup(ContextID, Curve2))) |
1755 | goto Error; |
1756 | |
1757 | // If identity on matrix, we can further optimize the curves, so call the join curves routine |
1758 | if (IdentityMat) { |
1759 | |
1760 | OptimizeByJoiningCurves(ContextID, &Dest, Intent, InputFormat, OutputFormat, dwFlags); |
1761 | } |
1762 | else { |
1763 | _cmsStageToneCurvesData* mpeC1 = (_cmsStageToneCurvesData*) cmsStageData(ContextID, Curve1); |
1764 | _cmsStageToneCurvesData* mpeC2 = (_cmsStageToneCurvesData*) cmsStageData(ContextID, Curve2); |
1765 | |
1766 | // In this particular optimization, cache does not help as it takes more time to deal with |
1767 | // the cache that with the pixel handling |
1768 | *dwFlags |= cmsFLAGS_NOCACHE; |
1769 | |
1770 | // Setup the optimizarion routines |
1771 | SetMatShaper(ContextID, Dest, mpeC1 ->TheCurves, &res, (cmsVEC3*) Offset, mpeC2->TheCurves, OutputFormat); |
1772 | } |
1773 | |
1774 | cmsPipelineFree(ContextID, Src); |
1775 | *Lut = Dest; |
1776 | return TRUE; |
1777 | Error: |
1778 | // Leave Src unchanged |
1779 | cmsPipelineFree(ContextID, Dest); |
1780 | return FALSE; |
1781 | } |
1782 | |
1783 | |
1784 | // ------------------------------------------------------------------------------------------------------------------------------------- |
1785 | // Optimization plug-ins |
1786 | |
1787 | // List of optimizations |
1788 | typedef struct _cmsOptimizationCollection_st { |
1789 | |
1790 | _cmsOPToptimizeFn OptimizePtr; |
1791 | |
1792 | struct _cmsOptimizationCollection_st *Next; |
1793 | |
1794 | } _cmsOptimizationCollection; |
1795 | |
1796 | |
1797 | // The built-in list. We currently implement 4 types of optimizations. Joining of curves, matrix-shaper, linearization and resampling |
1798 | static _cmsOptimizationCollection DefaultOptimization[] = { |
1799 | |
1800 | { OptimizeByJoiningCurves, &DefaultOptimization[1] }, |
1801 | { OptimizeMatrixShaper, &DefaultOptimization[2] }, |
1802 | { OptimizeByComputingLinearization, &DefaultOptimization[3] }, |
1803 | { OptimizeByResampling, NULL } |
1804 | }; |
1805 | |
1806 | // The linked list head |
1807 | _cmsOptimizationPluginChunkType _cmsOptimizationPluginChunk = { NULL }; |
1808 | |
1809 | |
1810 | // Duplicates the zone of memory used by the plug-in in the new context |
1811 | static |
1812 | void DupPluginOptimizationList(struct _cmsContext_struct* ctx, |
1813 | const struct _cmsContext_struct* src) |
1814 | { |
1815 | _cmsOptimizationPluginChunkType newHead = { NULL }; |
1816 | _cmsOptimizationCollection* entry; |
1817 | _cmsOptimizationCollection* Anterior = NULL; |
1818 | _cmsOptimizationPluginChunkType* head = (_cmsOptimizationPluginChunkType*) src->chunks[OptimizationPlugin]; |
1819 | |
1820 | _cmsAssert(ctx != NULL); |
1821 | _cmsAssert(head != NULL); |
1822 | |
1823 | // Walk the list copying all nodes |
1824 | for (entry = head->OptimizationCollection; |
1825 | entry != NULL; |
1826 | entry = entry ->Next) { |
1827 | |
1828 | _cmsOptimizationCollection *newEntry = ( _cmsOptimizationCollection *) _cmsSubAllocDup(ctx ->MemPool, entry, sizeof(_cmsOptimizationCollection)); |
1829 | |
1830 | if (newEntry == NULL) |
1831 | return; |
1832 | |
1833 | // We want to keep the linked list order, so this is a little bit tricky |
1834 | newEntry -> Next = NULL; |
1835 | if (Anterior) |
1836 | Anterior -> Next = newEntry; |
1837 | |
1838 | Anterior = newEntry; |
1839 | |
1840 | if (newHead.OptimizationCollection == NULL) |
1841 | newHead.OptimizationCollection = newEntry; |
1842 | } |
1843 | |
1844 | ctx ->chunks[OptimizationPlugin] = _cmsSubAllocDup(ctx->MemPool, &newHead, sizeof(_cmsOptimizationPluginChunkType)); |
1845 | } |
1846 | |
1847 | void _cmsAllocOptimizationPluginChunk(struct _cmsContext_struct* ctx, |
1848 | const struct _cmsContext_struct* src) |
1849 | { |
1850 | if (src != NULL) { |
1851 | |
1852 | // Copy all linked list |
1853 | DupPluginOptimizationList(ctx, src); |
1854 | } |
1855 | else { |
1856 | static _cmsOptimizationPluginChunkType OptimizationPluginChunkType = { NULL }; |
1857 | ctx ->chunks[OptimizationPlugin] = _cmsSubAllocDup(ctx ->MemPool, &OptimizationPluginChunkType, sizeof(_cmsOptimizationPluginChunkType)); |
1858 | } |
1859 | } |
1860 | |
1861 | |
1862 | // Register new ways to optimize |
1863 | cmsBool _cmsRegisterOptimizationPlugin(cmsContext ContextID, cmsPluginBase* Data) |
1864 | { |
1865 | cmsPluginOptimization* Plugin = (cmsPluginOptimization*) Data; |
1866 | _cmsOptimizationPluginChunkType* ctx = ( _cmsOptimizationPluginChunkType*) _cmsContextGetClientChunk(ContextID, OptimizationPlugin); |
1867 | _cmsOptimizationCollection* fl; |
1868 | |
1869 | if (Data == NULL) { |
1870 | |
1871 | ctx->OptimizationCollection = NULL; |
1872 | return TRUE; |
1873 | } |
1874 | |
1875 | // Optimizer callback is required |
1876 | if (Plugin ->OptimizePtr == NULL) return FALSE; |
1877 | |
1878 | fl = (_cmsOptimizationCollection*) _cmsPluginMalloc(ContextID, sizeof(_cmsOptimizationCollection)); |
1879 | if (fl == NULL) return FALSE; |
1880 | |
1881 | // Copy the parameters |
1882 | fl ->OptimizePtr = Plugin ->OptimizePtr; |
1883 | |
1884 | // Keep linked list |
1885 | fl ->Next = ctx->OptimizationCollection; |
1886 | |
1887 | // Set the head |
1888 | ctx ->OptimizationCollection = fl; |
1889 | |
1890 | // All is ok |
1891 | return TRUE; |
1892 | } |
1893 | |
1894 | // The entry point for LUT optimization |
1895 | cmsBool _cmsOptimizePipeline(cmsContext ContextID, |
1896 | cmsPipeline** PtrLut, |
1897 | cmsUInt32Number Intent, |
1898 | cmsUInt32Number* InputFormat, |
1899 | cmsUInt32Number* OutputFormat, |
1900 | cmsUInt32Number* dwFlags) |
1901 | { |
1902 | _cmsOptimizationPluginChunkType* ctx = ( _cmsOptimizationPluginChunkType*) _cmsContextGetClientChunk(ContextID, OptimizationPlugin); |
1903 | _cmsOptimizationCollection* Opts; |
1904 | cmsBool AnySuccess = FALSE; |
1905 | |
1906 | // A CLUT is being asked, so force this specific optimization |
1907 | if (*dwFlags & cmsFLAGS_FORCE_CLUT) { |
1908 | |
1909 | PreOptimize(ContextID, *PtrLut); |
1910 | return OptimizeByResampling(ContextID, PtrLut, Intent, InputFormat, OutputFormat, dwFlags); |
1911 | } |
1912 | |
1913 | // Anything to optimize? |
1914 | if ((*PtrLut) ->Elements == NULL) { |
1915 | _cmsPipelineSetOptimizationParameters(ContextID, *PtrLut, FastIdentity16, (void*) *PtrLut, NULL, NULL); |
1916 | return TRUE; |
1917 | } |
1918 | |
1919 | // Try to get rid of identities and trivial conversions. |
1920 | AnySuccess = PreOptimize(ContextID, *PtrLut); |
1921 | |
1922 | // After removal do we end with an identity? |
1923 | if ((*PtrLut) ->Elements == NULL) { |
1924 | _cmsPipelineSetOptimizationParameters(ContextID, *PtrLut, FastIdentity16, (void*) *PtrLut, NULL, NULL); |
1925 | return TRUE; |
1926 | } |
1927 | |
1928 | // Do not optimize, keep all precision |
1929 | if (*dwFlags & cmsFLAGS_NOOPTIMIZE) |
1930 | return FALSE; |
1931 | |
1932 | // Try plug-in optimizations |
1933 | for (Opts = ctx->OptimizationCollection; |
1934 | Opts != NULL; |
1935 | Opts = Opts ->Next) { |
1936 | |
1937 | // If one schema succeeded, we are done |
1938 | if (Opts ->OptimizePtr(ContextID, PtrLut, Intent, InputFormat, OutputFormat, dwFlags)) { |
1939 | |
1940 | return TRUE; // Optimized! |
1941 | } |
1942 | } |
1943 | |
1944 | // Try built-in optimizations |
1945 | for (Opts = DefaultOptimization; |
1946 | Opts != NULL; |
1947 | Opts = Opts ->Next) { |
1948 | |
1949 | if (Opts ->OptimizePtr(ContextID, PtrLut, Intent, InputFormat, OutputFormat, dwFlags)) { |
1950 | |
1951 | return TRUE; |
1952 | } |
1953 | } |
1954 | |
1955 | // Only simple optimizations succeeded |
1956 | return AnySuccess; |
1957 | } |
1958 | |
1959 | cmsBool _cmsLutIsIdentity(cmsPipeline *PtrLut) |
1960 | { |
1961 | return !PtrLut || PtrLut->Eval16Fn == FastIdentity16; |
1962 | } |
1963 | |