1/*
2 * Copyright 2018 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "skcms.h"
9#include "skcms_internal.h"
10#include <assert.h>
11#include <float.h>
12#include <limits.h>
13#include <stdlib.h>
14#include <string.h>
15
16#if defined(__ARM_NEON)
17 #include <arm_neon.h>
18#elif defined(__SSE__)
19 #include <immintrin.h>
20
21 #if defined(__clang__)
22 // That #include <immintrin.h> is usually enough, but Clang's headers
23 // "helpfully" skip including the whole kitchen sink when _MSC_VER is
24 // defined, because lots of programs on Windows would include that and
25 // it'd be a lot slower. But we want all those headers included so we
26 // can use their features after runtime checks later.
27 #include <smmintrin.h>
28 #include <avxintrin.h>
29 #include <avx2intrin.h>
30 #include <avx512fintrin.h>
31 #include <avx512dqintrin.h>
32 #endif
33#endif
34
35// sizeof(x) will return size_t, which is 32-bit on some machines and 64-bit on others.
36// We have better testing on 64-bit machines, so force 32-bit machines to behave like 64-bit.
37//
38// Please do not use sizeof() directly, and size_t only when required.
39// (We have no way of enforcing these requests...)
40#define SAFE_SIZEOF(x) ((uint64_t)sizeof(x))
41
42// Same sort of thing for _Layout structs with a variable sized array at the end (named "variable").
43#define SAFE_FIXED_SIZE(type) ((uint64_t)offsetof(type, variable))
44
45static const union {
46 uint32_t bits;
47 float f;
48} inf_ = { 0x7f800000 };
49#define INFINITY_ inf_.f
50
51#if defined(__clang__) || defined(__GNUC__)
52 #define small_memcpy __builtin_memcpy
53#else
54 #define small_memcpy memcpy
55#endif
56
57static float log2f_(float x) {
58 // The first approximation of log2(x) is its exponent 'e', minus 127.
59 int32_t bits;
60 small_memcpy(&bits, &x, sizeof(bits));
61
62 float e = (float)bits * (1.0f / (1<<23));
63
64 // If we use the mantissa too we can refine the error signficantly.
65 int32_t m_bits = (bits & 0x007fffff) | 0x3f000000;
66 float m;
67 small_memcpy(&m, &m_bits, sizeof(m));
68
69 return (e - 124.225514990f
70 - 1.498030302f*m
71 - 1.725879990f/(0.3520887068f + m));
72}
73static float logf_(float x) {
74 const float ln2 = 0.69314718f;
75 return ln2*log2f_(x);
76}
77
78static float exp2f_(float x) {
79 float fract = x - floorf_(x);
80
81 float fbits = (1.0f * (1<<23)) * (x + 121.274057500f
82 - 1.490129070f*fract
83 + 27.728023300f/(4.84252568f - fract));
84
85 // Before we cast fbits to int32_t, check for out of range values to pacify UBSAN.
86 // INT_MAX is not exactly representable as a float, so exclude it as effectively infinite.
87 // INT_MIN is a power of 2 and exactly representable as a float, so it's fine.
88 if (fbits >= (float)INT_MAX) {
89 return INFINITY_;
90 } else if (fbits < (float)INT_MIN) {
91 return -INFINITY_;
92 }
93
94 int32_t bits = (int32_t)fbits;
95 small_memcpy(&x, &bits, sizeof(x));
96 return x;
97}
98
99// Not static, as it's used by some test tools.
100float powf_(float x, float y) {
101 assert (x >= 0);
102 return (x == 0) || (x == 1) ? x
103 : exp2f_(log2f_(x) * y);
104}
105
106static float expf_(float x) {
107 const float log2_e = 1.4426950408889634074f;
108 return exp2f_(log2_e * x);
109}
110
111static float fmaxf_(float x, float y) { return x > y ? x : y; }
112static float fminf_(float x, float y) { return x < y ? x : y; }
113
114static bool isfinitef_(float x) { return 0 == x*0; }
115
116static float minus_1_ulp(float x) {
117 int32_t bits;
118 memcpy(&bits, &x, sizeof(bits));
119 bits = bits - 1;
120 memcpy(&x, &bits, sizeof(bits));
121 return x;
122}
123
124// Most transfer functions we work with are sRGBish.
125// For exotic HDR transfer functions, we encode them using a tf.g that makes no sense,
126// and repurpose the other fields to hold the parameters of the HDR functions.
127enum TFKind { Bad, sRGBish, PQish, HLGish, HLGinvish };
128struct TF_PQish { float A,B,C,D,E,F; };
129struct TF_HLGish { float R,G,a,b,c; };
130
131static float TFKind_marker(TFKind kind) {
132 // We'd use different NaNs, but those aren't guaranteed to be preserved by WASM.
133 return -(float)kind;
134}
135
136static TFKind classify(const skcms_TransferFunction& tf, TF_PQish* pq = nullptr
137 , TF_HLGish* hlg = nullptr) {
138 if (tf.g < 0 && (int)tf.g == tf.g) {
139 // TODO: sanity checks for PQ/HLG like we do for sRGBish.
140 switch ((int)tf.g) {
141 case -PQish: if (pq ) { memcpy(pq , &tf.a, sizeof(*pq )); } return PQish;
142 case -HLGish: if (hlg) { memcpy(hlg, &tf.a, sizeof(*hlg)); } return HLGish;
143 case -HLGinvish: if (hlg) { memcpy(hlg, &tf.a, sizeof(*hlg)); } return HLGinvish;
144 }
145 return Bad;
146 }
147
148 // Basic sanity checks for sRGBish transfer functions.
149 if (isfinitef_(tf.a + tf.b + tf.c + tf.d + tf.e + tf.f + tf.g)
150 // a,c,d,g should be non-negative to make any sense.
151 && tf.a >= 0
152 && tf.c >= 0
153 && tf.d >= 0
154 && tf.g >= 0
155 // Raising a negative value to a fractional tf->g produces complex numbers.
156 && tf.a * tf.d + tf.b >= 0) {
157 return sRGBish;
158 }
159
160 return Bad;
161}
162
163bool skcms_TransferFunction_makePQish(skcms_TransferFunction* tf,
164 float A, float B, float C,
165 float D, float E, float F) {
166 *tf = { TFKind_marker(PQish), A,B,C,D,E,F };
167 assert(classify(*tf) == PQish);
168 return true;
169}
170
171bool skcms_TransferFunction_makeHLGish(skcms_TransferFunction* tf,
172 float R, float G,
173 float a, float b, float c) {
174 *tf = { TFKind_marker(HLGish), R,G, a,b,c, 0 };
175 assert(classify(*tf) == HLGish);
176 return true;
177}
178
179float skcms_TransferFunction_eval(const skcms_TransferFunction* tf, float x) {
180 float sign = x < 0 ? -1.0f : 1.0f;
181 x *= sign;
182
183 TF_PQish pq;
184 TF_HLGish hlg;
185 switch (classify(*tf, &pq, &hlg)) {
186 case Bad: break;
187
188 case HLGish: return sign * (x*hlg.R <= 1 ? powf_(x*hlg.R, hlg.G)
189 : expf_((x-hlg.c)*hlg.a) + hlg.b);
190
191 // skcms_TransferFunction_invert() inverts R, G, and a for HLGinvish so this math is fast.
192 case HLGinvish: return sign * (x <= 1 ? hlg.R * powf_(x, hlg.G)
193 : hlg.a * logf_(x - hlg.b) + hlg.c);
194
195
196 case sRGBish: return sign * (x < tf->d ? tf->c * x + tf->f
197 : powf_(tf->a * x + tf->b, tf->g) + tf->e);
198
199 case PQish: return sign * powf_(fmaxf_(pq.A + pq.B * powf_(x, pq.C), 0)
200 / (pq.D + pq.E * powf_(x, pq.C)),
201 pq.F);
202 }
203 return 0;
204}
205
206
207static float eval_curve(const skcms_Curve* curve, float x) {
208 if (curve->table_entries == 0) {
209 return skcms_TransferFunction_eval(&curve->parametric, x);
210 }
211
212 float ix = fmaxf_(0, fminf_(x, 1)) * (curve->table_entries - 1);
213 int lo = (int) ix ,
214 hi = (int)(float)minus_1_ulp(ix + 1.0f);
215 float t = ix - (float)lo;
216
217 float l, h;
218 if (curve->table_8) {
219 l = curve->table_8[lo] * (1/255.0f);
220 h = curve->table_8[hi] * (1/255.0f);
221 } else {
222 uint16_t be_l, be_h;
223 memcpy(&be_l, curve->table_16 + 2*lo, 2);
224 memcpy(&be_h, curve->table_16 + 2*hi, 2);
225 uint16_t le_l = ((be_l << 8) | (be_l >> 8)) & 0xffff;
226 uint16_t le_h = ((be_h << 8) | (be_h >> 8)) & 0xffff;
227 l = le_l * (1/65535.0f);
228 h = le_h * (1/65535.0f);
229 }
230 return l + (h-l)*t;
231}
232
233float skcms_MaxRoundtripError(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
234 uint32_t N = curve->table_entries > 256 ? curve->table_entries : 256;
235 const float dx = 1.0f / (N - 1);
236 float err = 0;
237 for (uint32_t i = 0; i < N; i++) {
238 float x = i * dx,
239 y = eval_curve(curve, x);
240 err = fmaxf_(err, fabsf_(x - skcms_TransferFunction_eval(inv_tf, y)));
241 }
242 return err;
243}
244
245bool skcms_AreApproximateInverses(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
246 return skcms_MaxRoundtripError(curve, inv_tf) < (1/512.0f);
247}
248
249// Additional ICC signature values that are only used internally
250enum {
251 // File signature
252 skcms_Signature_acsp = 0x61637370,
253
254 // Tag signatures
255 skcms_Signature_rTRC = 0x72545243,
256 skcms_Signature_gTRC = 0x67545243,
257 skcms_Signature_bTRC = 0x62545243,
258 skcms_Signature_kTRC = 0x6B545243,
259
260 skcms_Signature_rXYZ = 0x7258595A,
261 skcms_Signature_gXYZ = 0x6758595A,
262 skcms_Signature_bXYZ = 0x6258595A,
263
264 skcms_Signature_A2B0 = 0x41324230,
265 skcms_Signature_A2B1 = 0x41324231,
266 skcms_Signature_mAB = 0x6D414220,
267
268 skcms_Signature_CHAD = 0x63686164,
269 skcms_Signature_WTPT = 0x77747074,
270
271 // Type signatures
272 skcms_Signature_curv = 0x63757276,
273 skcms_Signature_mft1 = 0x6D667431,
274 skcms_Signature_mft2 = 0x6D667432,
275 skcms_Signature_para = 0x70617261,
276 skcms_Signature_sf32 = 0x73663332,
277 // XYZ is also a PCS signature, so it's defined in skcms.h
278 // skcms_Signature_XYZ = 0x58595A20,
279};
280
281static uint16_t read_big_u16(const uint8_t* ptr) {
282 uint16_t be;
283 memcpy(&be, ptr, sizeof(be));
284#if defined(_MSC_VER)
285 return _byteswap_ushort(be);
286#else
287 return __builtin_bswap16(be);
288#endif
289}
290
291static uint32_t read_big_u32(const uint8_t* ptr) {
292 uint32_t be;
293 memcpy(&be, ptr, sizeof(be));
294#if defined(_MSC_VER)
295 return _byteswap_ulong(be);
296#else
297 return __builtin_bswap32(be);
298#endif
299}
300
301static int32_t read_big_i32(const uint8_t* ptr) {
302 return (int32_t)read_big_u32(ptr);
303}
304
305static float read_big_fixed(const uint8_t* ptr) {
306 return read_big_i32(ptr) * (1.0f / 65536.0f);
307}
308
309// Maps to an in-memory profile so that fields line up to the locations specified
310// in ICC.1:2010, section 7.2
311typedef struct {
312 uint8_t size [ 4];
313 uint8_t cmm_type [ 4];
314 uint8_t version [ 4];
315 uint8_t profile_class [ 4];
316 uint8_t data_color_space [ 4];
317 uint8_t pcs [ 4];
318 uint8_t creation_date_time [12];
319 uint8_t signature [ 4];
320 uint8_t platform [ 4];
321 uint8_t flags [ 4];
322 uint8_t device_manufacturer [ 4];
323 uint8_t device_model [ 4];
324 uint8_t device_attributes [ 8];
325 uint8_t rendering_intent [ 4];
326 uint8_t illuminant_X [ 4];
327 uint8_t illuminant_Y [ 4];
328 uint8_t illuminant_Z [ 4];
329 uint8_t creator [ 4];
330 uint8_t profile_id [16];
331 uint8_t reserved [28];
332 uint8_t tag_count [ 4]; // Technically not part of header, but required
333} header_Layout;
334
335typedef struct {
336 uint8_t signature [4];
337 uint8_t offset [4];
338 uint8_t size [4];
339} tag_Layout;
340
341static const tag_Layout* get_tag_table(const skcms_ICCProfile* profile) {
342 return (const tag_Layout*)(profile->buffer + SAFE_SIZEOF(header_Layout));
343}
344
345// s15Fixed16ArrayType is technically variable sized, holding N values. However, the only valid
346// use of the type is for the CHAD tag that stores exactly nine values.
347typedef struct {
348 uint8_t type [ 4];
349 uint8_t reserved [ 4];
350 uint8_t values [36];
351} sf32_Layout;
352
353bool skcms_GetCHAD(const skcms_ICCProfile* profile, skcms_Matrix3x3* m) {
354 skcms_ICCTag tag;
355 if (!skcms_GetTagBySignature(profile, skcms_Signature_CHAD, &tag)) {
356 return false;
357 }
358
359 if (tag.type != skcms_Signature_sf32 || tag.size < SAFE_SIZEOF(sf32_Layout)) {
360 return false;
361 }
362
363 const sf32_Layout* sf32Tag = (const sf32_Layout*)tag.buf;
364 const uint8_t* values = sf32Tag->values;
365 for (int r = 0; r < 3; ++r)
366 for (int c = 0; c < 3; ++c, values += 4) {
367 m->vals[r][c] = read_big_fixed(values);
368 }
369 return true;
370}
371
372// XYZType is technically variable sized, holding N XYZ triples. However, the only valid uses of
373// the type are for tags/data that store exactly one triple.
374typedef struct {
375 uint8_t type [4];
376 uint8_t reserved [4];
377 uint8_t X [4];
378 uint8_t Y [4];
379 uint8_t Z [4];
380} XYZ_Layout;
381
382static bool read_tag_xyz(const skcms_ICCTag* tag, float* x, float* y, float* z) {
383 if (tag->type != skcms_Signature_XYZ || tag->size < SAFE_SIZEOF(XYZ_Layout)) {
384 return false;
385 }
386
387 const XYZ_Layout* xyzTag = (const XYZ_Layout*)tag->buf;
388
389 *x = read_big_fixed(xyzTag->X);
390 *y = read_big_fixed(xyzTag->Y);
391 *z = read_big_fixed(xyzTag->Z);
392 return true;
393}
394
395bool skcms_GetWTPT(const skcms_ICCProfile* profile, float xyz[3]) {
396 skcms_ICCTag tag;
397 return skcms_GetTagBySignature(profile, skcms_Signature_WTPT, &tag) &&
398 read_tag_xyz(&tag, &xyz[0], &xyz[1], &xyz[2]);
399}
400
401static bool read_to_XYZD50(const skcms_ICCTag* rXYZ, const skcms_ICCTag* gXYZ,
402 const skcms_ICCTag* bXYZ, skcms_Matrix3x3* toXYZ) {
403 return read_tag_xyz(rXYZ, &toXYZ->vals[0][0], &toXYZ->vals[1][0], &toXYZ->vals[2][0]) &&
404 read_tag_xyz(gXYZ, &toXYZ->vals[0][1], &toXYZ->vals[1][1], &toXYZ->vals[2][1]) &&
405 read_tag_xyz(bXYZ, &toXYZ->vals[0][2], &toXYZ->vals[1][2], &toXYZ->vals[2][2]);
406}
407
408typedef struct {
409 uint8_t type [4];
410 uint8_t reserved_a [4];
411 uint8_t function_type [2];
412 uint8_t reserved_b [2];
413 uint8_t variable [1/*variable*/]; // 1, 3, 4, 5, or 7 s15.16, depending on function_type
414} para_Layout;
415
416static bool read_curve_para(const uint8_t* buf, uint32_t size,
417 skcms_Curve* curve, uint32_t* curve_size) {
418 if (size < SAFE_FIXED_SIZE(para_Layout)) {
419 return false;
420 }
421
422 const para_Layout* paraTag = (const para_Layout*)buf;
423
424 enum { kG = 0, kGAB = 1, kGABC = 2, kGABCD = 3, kGABCDEF = 4 };
425 uint16_t function_type = read_big_u16(paraTag->function_type);
426 if (function_type > kGABCDEF) {
427 return false;
428 }
429
430 static const uint32_t curve_bytes[] = { 4, 12, 16, 20, 28 };
431 if (size < SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type]) {
432 return false;
433 }
434
435 if (curve_size) {
436 *curve_size = SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type];
437 }
438
439 curve->table_entries = 0;
440 curve->parametric.a = 1.0f;
441 curve->parametric.b = 0.0f;
442 curve->parametric.c = 0.0f;
443 curve->parametric.d = 0.0f;
444 curve->parametric.e = 0.0f;
445 curve->parametric.f = 0.0f;
446 curve->parametric.g = read_big_fixed(paraTag->variable);
447
448 switch (function_type) {
449 case kGAB:
450 curve->parametric.a = read_big_fixed(paraTag->variable + 4);
451 curve->parametric.b = read_big_fixed(paraTag->variable + 8);
452 if (curve->parametric.a == 0) {
453 return false;
454 }
455 curve->parametric.d = -curve->parametric.b / curve->parametric.a;
456 break;
457 case kGABC:
458 curve->parametric.a = read_big_fixed(paraTag->variable + 4);
459 curve->parametric.b = read_big_fixed(paraTag->variable + 8);
460 curve->parametric.e = read_big_fixed(paraTag->variable + 12);
461 if (curve->parametric.a == 0) {
462 return false;
463 }
464 curve->parametric.d = -curve->parametric.b / curve->parametric.a;
465 curve->parametric.f = curve->parametric.e;
466 break;
467 case kGABCD:
468 curve->parametric.a = read_big_fixed(paraTag->variable + 4);
469 curve->parametric.b = read_big_fixed(paraTag->variable + 8);
470 curve->parametric.c = read_big_fixed(paraTag->variable + 12);
471 curve->parametric.d = read_big_fixed(paraTag->variable + 16);
472 break;
473 case kGABCDEF:
474 curve->parametric.a = read_big_fixed(paraTag->variable + 4);
475 curve->parametric.b = read_big_fixed(paraTag->variable + 8);
476 curve->parametric.c = read_big_fixed(paraTag->variable + 12);
477 curve->parametric.d = read_big_fixed(paraTag->variable + 16);
478 curve->parametric.e = read_big_fixed(paraTag->variable + 20);
479 curve->parametric.f = read_big_fixed(paraTag->variable + 24);
480 break;
481 }
482 return classify(curve->parametric) == sRGBish;
483}
484
485typedef struct {
486 uint8_t type [4];
487 uint8_t reserved [4];
488 uint8_t value_count [4];
489 uint8_t variable [1/*variable*/]; // value_count, 8.8 if 1, uint16 (n*65535) if > 1
490} curv_Layout;
491
492static bool read_curve_curv(const uint8_t* buf, uint32_t size,
493 skcms_Curve* curve, uint32_t* curve_size) {
494 if (size < SAFE_FIXED_SIZE(curv_Layout)) {
495 return false;
496 }
497
498 const curv_Layout* curvTag = (const curv_Layout*)buf;
499
500 uint32_t value_count = read_big_u32(curvTag->value_count);
501 if (size < SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t)) {
502 return false;
503 }
504
505 if (curve_size) {
506 *curve_size = SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t);
507 }
508
509 if (value_count < 2) {
510 curve->table_entries = 0;
511 curve->parametric.a = 1.0f;
512 curve->parametric.b = 0.0f;
513 curve->parametric.c = 0.0f;
514 curve->parametric.d = 0.0f;
515 curve->parametric.e = 0.0f;
516 curve->parametric.f = 0.0f;
517 if (value_count == 0) {
518 // Empty tables are a shorthand for an identity curve
519 curve->parametric.g = 1.0f;
520 } else {
521 // Single entry tables are a shorthand for simple gamma
522 curve->parametric.g = read_big_u16(curvTag->variable) * (1.0f / 256.0f);
523 }
524 } else {
525 curve->table_8 = nullptr;
526 curve->table_16 = curvTag->variable;
527 curve->table_entries = value_count;
528 }
529
530 return true;
531}
532
533// Parses both curveType and parametricCurveType data. Ensures that at most 'size' bytes are read.
534// If curve_size is not nullptr, writes the number of bytes used by the curve in (*curve_size).
535static bool read_curve(const uint8_t* buf, uint32_t size,
536 skcms_Curve* curve, uint32_t* curve_size) {
537 if (!buf || size < 4 || !curve) {
538 return false;
539 }
540
541 uint32_t type = read_big_u32(buf);
542 if (type == skcms_Signature_para) {
543 return read_curve_para(buf, size, curve, curve_size);
544 } else if (type == skcms_Signature_curv) {
545 return read_curve_curv(buf, size, curve, curve_size);
546 }
547
548 return false;
549}
550
551// mft1 and mft2 share a large chunk of data
552typedef struct {
553 uint8_t type [ 4];
554 uint8_t reserved_a [ 4];
555 uint8_t input_channels [ 1];
556 uint8_t output_channels [ 1];
557 uint8_t grid_points [ 1];
558 uint8_t reserved_b [ 1];
559 uint8_t matrix [36];
560} mft_CommonLayout;
561
562typedef struct {
563 mft_CommonLayout common [1];
564
565 uint8_t variable [1/*variable*/];
566} mft1_Layout;
567
568typedef struct {
569 mft_CommonLayout common [1];
570
571 uint8_t input_table_entries [2];
572 uint8_t output_table_entries [2];
573 uint8_t variable [1/*variable*/];
574} mft2_Layout;
575
576static bool read_mft_common(const mft_CommonLayout* mftTag, skcms_A2B* a2b) {
577 // MFT matrices are applied before the first set of curves, but must be identity unless the
578 // input is PCSXYZ. We don't support PCSXYZ profiles, so we ignore this matrix. Note that the
579 // matrix in skcms_A2B is applied later in the pipe, so supporting this would require another
580 // field/flag.
581 a2b->matrix_channels = 0;
582
583 a2b->input_channels = mftTag->input_channels[0];
584 a2b->output_channels = mftTag->output_channels[0];
585
586 // We require exactly three (ie XYZ/Lab/RGB) output channels
587 if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
588 return false;
589 }
590 // We require at least one, and no more than four (ie CMYK) input channels
591 if (a2b->input_channels < 1 || a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
592 return false;
593 }
594
595 for (uint32_t i = 0; i < a2b->input_channels; ++i) {
596 a2b->grid_points[i] = mftTag->grid_points[0];
597 }
598 // The grid only makes sense with at least two points along each axis
599 if (a2b->grid_points[0] < 2) {
600 return false;
601 }
602
603 return true;
604}
605
606static bool init_a2b_tables(const uint8_t* table_base, uint64_t max_tables_len, uint32_t byte_width,
607 uint32_t input_table_entries, uint32_t output_table_entries,
608 skcms_A2B* a2b) {
609 // byte_width is 1 or 2, [input|output]_table_entries are in [2, 4096], so no overflow
610 uint32_t byte_len_per_input_table = input_table_entries * byte_width;
611 uint32_t byte_len_per_output_table = output_table_entries * byte_width;
612
613 // [input|output]_channels are <= 4, so still no overflow
614 uint32_t byte_len_all_input_tables = a2b->input_channels * byte_len_per_input_table;
615 uint32_t byte_len_all_output_tables = a2b->output_channels * byte_len_per_output_table;
616
617 uint64_t grid_size = a2b->output_channels * byte_width;
618 for (uint32_t axis = 0; axis < a2b->input_channels; ++axis) {
619 grid_size *= a2b->grid_points[axis];
620 }
621
622 if (max_tables_len < byte_len_all_input_tables + grid_size + byte_len_all_output_tables) {
623 return false;
624 }
625
626 for (uint32_t i = 0; i < a2b->input_channels; ++i) {
627 a2b->input_curves[i].table_entries = input_table_entries;
628 if (byte_width == 1) {
629 a2b->input_curves[i].table_8 = table_base + i * byte_len_per_input_table;
630 a2b->input_curves[i].table_16 = nullptr;
631 } else {
632 a2b->input_curves[i].table_8 = nullptr;
633 a2b->input_curves[i].table_16 = table_base + i * byte_len_per_input_table;
634 }
635 }
636
637 if (byte_width == 1) {
638 a2b->grid_8 = table_base + byte_len_all_input_tables;
639 a2b->grid_16 = nullptr;
640 } else {
641 a2b->grid_8 = nullptr;
642 a2b->grid_16 = table_base + byte_len_all_input_tables;
643 }
644
645 const uint8_t* output_table_base = table_base + byte_len_all_input_tables + grid_size;
646 for (uint32_t i = 0; i < a2b->output_channels; ++i) {
647 a2b->output_curves[i].table_entries = output_table_entries;
648 if (byte_width == 1) {
649 a2b->output_curves[i].table_8 = output_table_base + i * byte_len_per_output_table;
650 a2b->output_curves[i].table_16 = nullptr;
651 } else {
652 a2b->output_curves[i].table_8 = nullptr;
653 a2b->output_curves[i].table_16 = output_table_base + i * byte_len_per_output_table;
654 }
655 }
656
657 return true;
658}
659
660static bool read_tag_mft1(const skcms_ICCTag* tag, skcms_A2B* a2b) {
661 if (tag->size < SAFE_FIXED_SIZE(mft1_Layout)) {
662 return false;
663 }
664
665 const mft1_Layout* mftTag = (const mft1_Layout*)tag->buf;
666 if (!read_mft_common(mftTag->common, a2b)) {
667 return false;
668 }
669
670 uint32_t input_table_entries = 256;
671 uint32_t output_table_entries = 256;
672
673 return init_a2b_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft1_Layout), 1,
674 input_table_entries, output_table_entries, a2b);
675}
676
677static bool read_tag_mft2(const skcms_ICCTag* tag, skcms_A2B* a2b) {
678 if (tag->size < SAFE_FIXED_SIZE(mft2_Layout)) {
679 return false;
680 }
681
682 const mft2_Layout* mftTag = (const mft2_Layout*)tag->buf;
683 if (!read_mft_common(mftTag->common, a2b)) {
684 return false;
685 }
686
687 uint32_t input_table_entries = read_big_u16(mftTag->input_table_entries);
688 uint32_t output_table_entries = read_big_u16(mftTag->output_table_entries);
689
690 // ICC spec mandates that 2 <= table_entries <= 4096
691 if (input_table_entries < 2 || input_table_entries > 4096 ||
692 output_table_entries < 2 || output_table_entries > 4096) {
693 return false;
694 }
695
696 return init_a2b_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft2_Layout), 2,
697 input_table_entries, output_table_entries, a2b);
698}
699
700static bool read_curves(const uint8_t* buf, uint32_t size, uint32_t curve_offset,
701 uint32_t num_curves, skcms_Curve* curves) {
702 for (uint32_t i = 0; i < num_curves; ++i) {
703 if (curve_offset > size) {
704 return false;
705 }
706
707 uint32_t curve_bytes;
708 if (!read_curve(buf + curve_offset, size - curve_offset, &curves[i], &curve_bytes)) {
709 return false;
710 }
711
712 if (curve_bytes > UINT32_MAX - 3) {
713 return false;
714 }
715 curve_bytes = (curve_bytes + 3) & ~3U;
716
717 uint64_t new_offset_64 = (uint64_t)curve_offset + curve_bytes;
718 curve_offset = (uint32_t)new_offset_64;
719 if (new_offset_64 != curve_offset) {
720 return false;
721 }
722 }
723
724 return true;
725}
726
727typedef struct {
728 uint8_t type [ 4];
729 uint8_t reserved_a [ 4];
730 uint8_t input_channels [ 1];
731 uint8_t output_channels [ 1];
732 uint8_t reserved_b [ 2];
733 uint8_t b_curve_offset [ 4];
734 uint8_t matrix_offset [ 4];
735 uint8_t m_curve_offset [ 4];
736 uint8_t clut_offset [ 4];
737 uint8_t a_curve_offset [ 4];
738} mAB_Layout;
739
740typedef struct {
741 uint8_t grid_points [16];
742 uint8_t grid_byte_width [ 1];
743 uint8_t reserved [ 3];
744 uint8_t variable [1/*variable*/];
745} mABCLUT_Layout;
746
747static bool read_tag_mab(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
748 if (tag->size < SAFE_SIZEOF(mAB_Layout)) {
749 return false;
750 }
751
752 const mAB_Layout* mABTag = (const mAB_Layout*)tag->buf;
753
754 a2b->input_channels = mABTag->input_channels[0];
755 a2b->output_channels = mABTag->output_channels[0];
756
757 // We require exactly three (ie XYZ/Lab/RGB) output channels
758 if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
759 return false;
760 }
761 // We require no more than four (ie CMYK) input channels
762 if (a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
763 return false;
764 }
765
766 uint32_t b_curve_offset = read_big_u32(mABTag->b_curve_offset);
767 uint32_t matrix_offset = read_big_u32(mABTag->matrix_offset);
768 uint32_t m_curve_offset = read_big_u32(mABTag->m_curve_offset);
769 uint32_t clut_offset = read_big_u32(mABTag->clut_offset);
770 uint32_t a_curve_offset = read_big_u32(mABTag->a_curve_offset);
771
772 // "B" curves must be present
773 if (0 == b_curve_offset) {
774 return false;
775 }
776
777 if (!read_curves(tag->buf, tag->size, b_curve_offset, a2b->output_channels,
778 a2b->output_curves)) {
779 return false;
780 }
781
782 // "M" curves and Matrix must be used together
783 if (0 != m_curve_offset) {
784 if (0 == matrix_offset) {
785 return false;
786 }
787 a2b->matrix_channels = a2b->output_channels;
788 if (!read_curves(tag->buf, tag->size, m_curve_offset, a2b->matrix_channels,
789 a2b->matrix_curves)) {
790 return false;
791 }
792
793 // Read matrix, which is stored as a row-major 3x3, followed by the fourth column
794 if (tag->size < matrix_offset + 12 * SAFE_SIZEOF(uint32_t)) {
795 return false;
796 }
797 float encoding_factor = pcs_is_xyz ? 65535 / 32768.0f : 1.0f;
798 const uint8_t* mtx_buf = tag->buf + matrix_offset;
799 a2b->matrix.vals[0][0] = encoding_factor * read_big_fixed(mtx_buf + 0);
800 a2b->matrix.vals[0][1] = encoding_factor * read_big_fixed(mtx_buf + 4);
801 a2b->matrix.vals[0][2] = encoding_factor * read_big_fixed(mtx_buf + 8);
802 a2b->matrix.vals[1][0] = encoding_factor * read_big_fixed(mtx_buf + 12);
803 a2b->matrix.vals[1][1] = encoding_factor * read_big_fixed(mtx_buf + 16);
804 a2b->matrix.vals[1][2] = encoding_factor * read_big_fixed(mtx_buf + 20);
805 a2b->matrix.vals[2][0] = encoding_factor * read_big_fixed(mtx_buf + 24);
806 a2b->matrix.vals[2][1] = encoding_factor * read_big_fixed(mtx_buf + 28);
807 a2b->matrix.vals[2][2] = encoding_factor * read_big_fixed(mtx_buf + 32);
808 a2b->matrix.vals[0][3] = encoding_factor * read_big_fixed(mtx_buf + 36);
809 a2b->matrix.vals[1][3] = encoding_factor * read_big_fixed(mtx_buf + 40);
810 a2b->matrix.vals[2][3] = encoding_factor * read_big_fixed(mtx_buf + 44);
811 } else {
812 if (0 != matrix_offset) {
813 return false;
814 }
815 a2b->matrix_channels = 0;
816 }
817
818 // "A" curves and CLUT must be used together
819 if (0 != a_curve_offset) {
820 if (0 == clut_offset) {
821 return false;
822 }
823 if (!read_curves(tag->buf, tag->size, a_curve_offset, a2b->input_channels,
824 a2b->input_curves)) {
825 return false;
826 }
827
828 if (tag->size < clut_offset + SAFE_FIXED_SIZE(mABCLUT_Layout)) {
829 return false;
830 }
831 const mABCLUT_Layout* clut = (const mABCLUT_Layout*)(tag->buf + clut_offset);
832
833 if (clut->grid_byte_width[0] == 1) {
834 a2b->grid_8 = clut->variable;
835 a2b->grid_16 = nullptr;
836 } else if (clut->grid_byte_width[0] == 2) {
837 a2b->grid_8 = nullptr;
838 a2b->grid_16 = clut->variable;
839 } else {
840 return false;
841 }
842
843 uint64_t grid_size = a2b->output_channels * clut->grid_byte_width[0];
844 for (uint32_t i = 0; i < a2b->input_channels; ++i) {
845 a2b->grid_points[i] = clut->grid_points[i];
846 // The grid only makes sense with at least two points along each axis
847 if (a2b->grid_points[i] < 2) {
848 return false;
849 }
850 grid_size *= a2b->grid_points[i];
851 }
852 if (tag->size < clut_offset + SAFE_FIXED_SIZE(mABCLUT_Layout) + grid_size) {
853 return false;
854 }
855 } else {
856 if (0 != clut_offset) {
857 return false;
858 }
859
860 // If there is no CLUT, the number of input and output channels must match
861 if (a2b->input_channels != a2b->output_channels) {
862 return false;
863 }
864
865 // Zero out the number of input channels to signal that we're skipping this stage
866 a2b->input_channels = 0;
867 }
868
869 return true;
870}
871
872// If you pass f, we'll fit a possibly-non-zero value for *f.
873// If you pass nullptr, we'll assume you want *f to be treated as zero.
874static int fit_linear(const skcms_Curve* curve, int N, float tol,
875 float* c, float* d, float* f = nullptr) {
876 assert(N > 1);
877 // We iteratively fit the first points to the TF's linear piece.
878 // We want the cx + f line to pass through the first and last points we fit exactly.
879 //
880 // As we walk along the points we find the minimum and maximum slope of the line before the
881 // error would exceed our tolerance. We stop when the range [slope_min, slope_max] becomes
882 // emtpy, when we definitely can't add any more points.
883 //
884 // Some points' error intervals may intersect the running interval but not lie fully
885 // within it. So we keep track of the last point we saw that is a valid end point candidate,
886 // and once the search is done, back up to build the line through *that* point.
887 const float dx = 1.0f / (N - 1);
888
889 int lin_points = 1;
890
891 float f_zero = 0.0f;
892 if (f) {
893 *f = eval_curve(curve, 0);
894 } else {
895 f = &f_zero;
896 }
897
898
899 float slope_min = -INFINITY_;
900 float slope_max = +INFINITY_;
901 for (int i = 1; i < N; ++i) {
902 float x = i * dx;
903 float y = eval_curve(curve, x);
904
905 float slope_max_i = (y + tol - *f) / x,
906 slope_min_i = (y - tol - *f) / x;
907 if (slope_max_i < slope_min || slope_max < slope_min_i) {
908 // Slope intervals would no longer overlap.
909 break;
910 }
911 slope_max = fminf_(slope_max, slope_max_i);
912 slope_min = fmaxf_(slope_min, slope_min_i);
913
914 float cur_slope = (y - *f) / x;
915 if (slope_min <= cur_slope && cur_slope <= slope_max) {
916 lin_points = i + 1;
917 *c = cur_slope;
918 }
919 }
920
921 // Set D to the last point that met our tolerance.
922 *d = (lin_points - 1) * dx;
923 return lin_points;
924}
925
926static bool read_a2b(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
927 bool ok = false;
928 if (tag->type == skcms_Signature_mft1) {
929 ok = read_tag_mft1(tag, a2b);
930 } else if (tag->type == skcms_Signature_mft2) {
931 ok = read_tag_mft2(tag, a2b);
932 } else if (tag->type == skcms_Signature_mAB) {
933 ok = read_tag_mab(tag, a2b, pcs_is_xyz);
934 }
935 if (!ok) {
936 return false;
937 }
938
939 // Detect and canonicalize identity tables.
940 skcms_Curve* curves[] = {
941 a2b->input_channels > 0 ? a2b->input_curves + 0 : nullptr,
942 a2b->input_channels > 1 ? a2b->input_curves + 1 : nullptr,
943 a2b->input_channels > 2 ? a2b->input_curves + 2 : nullptr,
944 a2b->input_channels > 3 ? a2b->input_curves + 3 : nullptr,
945 a2b->matrix_channels > 0 ? a2b->matrix_curves + 0 : nullptr,
946 a2b->matrix_channels > 1 ? a2b->matrix_curves + 1 : nullptr,
947 a2b->matrix_channels > 2 ? a2b->matrix_curves + 2 : nullptr,
948 a2b->output_channels > 0 ? a2b->output_curves + 0 : nullptr,
949 a2b->output_channels > 1 ? a2b->output_curves + 1 : nullptr,
950 a2b->output_channels > 2 ? a2b->output_curves + 2 : nullptr,
951 };
952
953 for (int i = 0; i < ARRAY_COUNT(curves); i++) {
954 skcms_Curve* curve = curves[i];
955
956 if (curve && curve->table_entries && curve->table_entries <= (uint32_t)INT_MAX) {
957 int N = (int)curve->table_entries;
958
959 float c = 0.0f, d = 0.0f, f = 0.0f;
960 if (N == fit_linear(curve, N, 1.0f/(2*N), &c,&d,&f)
961 && c == 1.0f
962 && f == 0.0f) {
963 curve->table_entries = 0;
964 curve->table_8 = nullptr;
965 curve->table_16 = nullptr;
966 curve->parametric = skcms_TransferFunction{1,1,0,0,0,0,0};
967 }
968 }
969 }
970
971 return true;
972}
973
974void skcms_GetTagByIndex(const skcms_ICCProfile* profile, uint32_t idx, skcms_ICCTag* tag) {
975 if (!profile || !profile->buffer || !tag) { return; }
976 if (idx > profile->tag_count) { return; }
977 const tag_Layout* tags = get_tag_table(profile);
978 tag->signature = read_big_u32(tags[idx].signature);
979 tag->size = read_big_u32(tags[idx].size);
980 tag->buf = read_big_u32(tags[idx].offset) + profile->buffer;
981 tag->type = read_big_u32(tag->buf);
982}
983
984bool skcms_GetTagBySignature(const skcms_ICCProfile* profile, uint32_t sig, skcms_ICCTag* tag) {
985 if (!profile || !profile->buffer || !tag) { return false; }
986 const tag_Layout* tags = get_tag_table(profile);
987 for (uint32_t i = 0; i < profile->tag_count; ++i) {
988 if (read_big_u32(tags[i].signature) == sig) {
989 tag->signature = sig;
990 tag->size = read_big_u32(tags[i].size);
991 tag->buf = read_big_u32(tags[i].offset) + profile->buffer;
992 tag->type = read_big_u32(tag->buf);
993 return true;
994 }
995 }
996 return false;
997}
998
999static bool usable_as_src(const skcms_ICCProfile* profile) {
1000 return profile->has_A2B
1001 || (profile->has_trc && profile->has_toXYZD50);
1002}
1003
1004bool skcms_Parse(const void* buf, size_t len, skcms_ICCProfile* profile) {
1005 assert(SAFE_SIZEOF(header_Layout) == 132);
1006
1007 if (!profile) {
1008 return false;
1009 }
1010 memset(profile, 0, SAFE_SIZEOF(*profile));
1011
1012 if (len < SAFE_SIZEOF(header_Layout)) {
1013 return false;
1014 }
1015
1016 // Byte-swap all header fields
1017 const header_Layout* header = (const header_Layout*)buf;
1018 profile->buffer = (const uint8_t*)buf;
1019 profile->size = read_big_u32(header->size);
1020 uint32_t version = read_big_u32(header->version);
1021 profile->data_color_space = read_big_u32(header->data_color_space);
1022 profile->pcs = read_big_u32(header->pcs);
1023 uint32_t signature = read_big_u32(header->signature);
1024 float illuminant_X = read_big_fixed(header->illuminant_X);
1025 float illuminant_Y = read_big_fixed(header->illuminant_Y);
1026 float illuminant_Z = read_big_fixed(header->illuminant_Z);
1027 profile->tag_count = read_big_u32(header->tag_count);
1028
1029 // Validate signature, size (smaller than buffer, large enough to hold tag table),
1030 // and major version
1031 uint64_t tag_table_size = profile->tag_count * SAFE_SIZEOF(tag_Layout);
1032 if (signature != skcms_Signature_acsp ||
1033 profile->size > len ||
1034 profile->size < SAFE_SIZEOF(header_Layout) + tag_table_size ||
1035 (version >> 24) > 4) {
1036 return false;
1037 }
1038
1039 // Validate that illuminant is D50 white
1040 if (fabsf_(illuminant_X - 0.9642f) > 0.0100f ||
1041 fabsf_(illuminant_Y - 1.0000f) > 0.0100f ||
1042 fabsf_(illuminant_Z - 0.8249f) > 0.0100f) {
1043 return false;
1044 }
1045
1046 // Validate that all tag entries have sane offset + size
1047 const tag_Layout* tags = get_tag_table(profile);
1048 for (uint32_t i = 0; i < profile->tag_count; ++i) {
1049 uint32_t tag_offset = read_big_u32(tags[i].offset);
1050 uint32_t tag_size = read_big_u32(tags[i].size);
1051 uint64_t tag_end = (uint64_t)tag_offset + (uint64_t)tag_size;
1052 if (tag_size < 4 || tag_end > profile->size) {
1053 return false;
1054 }
1055 }
1056
1057 if (profile->pcs != skcms_Signature_XYZ && profile->pcs != skcms_Signature_Lab) {
1058 return false;
1059 }
1060
1061 bool pcs_is_xyz = profile->pcs == skcms_Signature_XYZ;
1062
1063 // Pre-parse commonly used tags.
1064 skcms_ICCTag kTRC;
1065 if (profile->data_color_space == skcms_Signature_Gray &&
1066 skcms_GetTagBySignature(profile, skcms_Signature_kTRC, &kTRC)) {
1067 if (!read_curve(kTRC.buf, kTRC.size, &profile->trc[0], nullptr)) {
1068 // Malformed tag
1069 return false;
1070 }
1071 profile->trc[1] = profile->trc[0];
1072 profile->trc[2] = profile->trc[0];
1073 profile->has_trc = true;
1074
1075 if (pcs_is_xyz) {
1076 profile->toXYZD50.vals[0][0] = illuminant_X;
1077 profile->toXYZD50.vals[1][1] = illuminant_Y;
1078 profile->toXYZD50.vals[2][2] = illuminant_Z;
1079 profile->has_toXYZD50 = true;
1080 }
1081 } else {
1082 skcms_ICCTag rTRC, gTRC, bTRC;
1083 if (skcms_GetTagBySignature(profile, skcms_Signature_rTRC, &rTRC) &&
1084 skcms_GetTagBySignature(profile, skcms_Signature_gTRC, &gTRC) &&
1085 skcms_GetTagBySignature(profile, skcms_Signature_bTRC, &bTRC)) {
1086 if (!read_curve(rTRC.buf, rTRC.size, &profile->trc[0], nullptr) ||
1087 !read_curve(gTRC.buf, gTRC.size, &profile->trc[1], nullptr) ||
1088 !read_curve(bTRC.buf, bTRC.size, &profile->trc[2], nullptr)) {
1089 // Malformed TRC tags
1090 return false;
1091 }
1092 profile->has_trc = true;
1093 }
1094
1095 skcms_ICCTag rXYZ, gXYZ, bXYZ;
1096 if (skcms_GetTagBySignature(profile, skcms_Signature_rXYZ, &rXYZ) &&
1097 skcms_GetTagBySignature(profile, skcms_Signature_gXYZ, &gXYZ) &&
1098 skcms_GetTagBySignature(profile, skcms_Signature_bXYZ, &bXYZ)) {
1099 if (!read_to_XYZD50(&rXYZ, &gXYZ, &bXYZ, &profile->toXYZD50)) {
1100 // Malformed XYZ tags
1101 return false;
1102 }
1103 profile->has_toXYZD50 = true;
1104 }
1105 }
1106
1107 skcms_ICCTag a2b_tag;
1108
1109 // For now, we're preferring A2B0, like Skia does and the ICC spec tells us to.
1110 // TODO: prefer A2B1 (relative colormetric) over A2B0 (perceptual)?
1111 // This breaks with the ICC spec, but we think it's a good idea, given that TRC curves
1112 // and all our known users are thinking exclusively in terms of relative colormetric.
1113 const uint32_t sigs[] = { skcms_Signature_A2B0, skcms_Signature_A2B1 };
1114 for (int i = 0; i < ARRAY_COUNT(sigs); i++) {
1115 if (skcms_GetTagBySignature(profile, sigs[i], &a2b_tag)) {
1116 if (!read_a2b(&a2b_tag, &profile->A2B, pcs_is_xyz)) {
1117 // Malformed A2B tag
1118 return false;
1119 }
1120 profile->has_A2B = true;
1121 break;
1122 }
1123 }
1124
1125 return usable_as_src(profile);
1126}
1127
1128
1129const skcms_ICCProfile* skcms_sRGB_profile() {
1130 static const skcms_ICCProfile sRGB_profile = {
1131 nullptr, // buffer, moot here
1132
1133 0, // size, moot here
1134 skcms_Signature_RGB, // data_color_space
1135 skcms_Signature_XYZ, // pcs
1136 0, // tag count, moot here
1137
1138 // We choose to represent sRGB with its canonical transfer function,
1139 // and with its canonical XYZD50 gamut matrix.
1140 true, // has_trc, followed by the 3 trc curves
1141 {
1142 {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1143 {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1144 {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}},
1145 },
1146
1147 true, // has_toXYZD50, followed by 3x3 toXYZD50 matrix
1148 {{
1149 { 0.436065674f, 0.385147095f, 0.143066406f },
1150 { 0.222488403f, 0.716873169f, 0.060607910f },
1151 { 0.013916016f, 0.097076416f, 0.714096069f },
1152 }},
1153
1154 false, // has_A2B, followed by a2b itself which we don't care about.
1155 {
1156 0,
1157 {
1158 {{0, {0,0, 0,0,0,0,0}}},
1159 {{0, {0,0, 0,0,0,0,0}}},
1160 {{0, {0,0, 0,0,0,0,0}}},
1161 {{0, {0,0, 0,0,0,0,0}}},
1162 },
1163 {0,0,0,0},
1164 nullptr,
1165 nullptr,
1166
1167 0,
1168 {
1169 {{0, {0,0, 0,0,0,0,0}}},
1170 {{0, {0,0, 0,0,0,0,0}}},
1171 {{0, {0,0, 0,0,0,0,0}}},
1172 },
1173 {{
1174 { 0,0,0,0 },
1175 { 0,0,0,0 },
1176 { 0,0,0,0 },
1177 }},
1178
1179 0,
1180 {
1181 {{0, {0,0, 0,0,0,0,0}}},
1182 {{0, {0,0, 0,0,0,0,0}}},
1183 {{0, {0,0, 0,0,0,0,0}}},
1184 },
1185 },
1186 };
1187 return &sRGB_profile;
1188}
1189
1190const skcms_ICCProfile* skcms_XYZD50_profile() {
1191 // Just like sRGB above, but with identity transfer functions and toXYZD50 matrix.
1192 static const skcms_ICCProfile XYZD50_profile = {
1193 nullptr, // buffer, moot here
1194
1195 0, // size, moot here
1196 skcms_Signature_RGB, // data_color_space
1197 skcms_Signature_XYZ, // pcs
1198 0, // tag count, moot here
1199
1200 true, // has_trc, followed by the 3 trc curves
1201 {
1202 {{0, {1,1, 0,0,0,0,0}}},
1203 {{0, {1,1, 0,0,0,0,0}}},
1204 {{0, {1,1, 0,0,0,0,0}}},
1205 },
1206
1207 true, // has_toXYZD50, followed by 3x3 toXYZD50 matrix
1208 {{
1209 { 1,0,0 },
1210 { 0,1,0 },
1211 { 0,0,1 },
1212 }},
1213
1214 false, // has_A2B, followed by a2b itself which we don't care about.
1215 {
1216 0,
1217 {
1218 {{0, {0,0, 0,0,0,0,0}}},
1219 {{0, {0,0, 0,0,0,0,0}}},
1220 {{0, {0,0, 0,0,0,0,0}}},
1221 {{0, {0,0, 0,0,0,0,0}}},
1222 },
1223 {0,0,0,0},
1224 nullptr,
1225 nullptr,
1226
1227 0,
1228 {
1229 {{0, {0,0, 0,0,0,0,0}}},
1230 {{0, {0,0, 0,0,0,0,0}}},
1231 {{0, {0,0, 0,0,0,0,0}}},
1232 },
1233 {{
1234 { 0,0,0,0 },
1235 { 0,0,0,0 },
1236 { 0,0,0,0 },
1237 }},
1238
1239 0,
1240 {
1241 {{0, {0,0, 0,0,0,0,0}}},
1242 {{0, {0,0, 0,0,0,0,0}}},
1243 {{0, {0,0, 0,0,0,0,0}}},
1244 },
1245 },
1246 };
1247
1248 return &XYZD50_profile;
1249}
1250
1251const skcms_TransferFunction* skcms_sRGB_TransferFunction() {
1252 return &skcms_sRGB_profile()->trc[0].parametric;
1253}
1254
1255const skcms_TransferFunction* skcms_sRGB_Inverse_TransferFunction() {
1256 static const skcms_TransferFunction sRGB_inv =
1257 {0.416666657f, 1.137283325f, -0.0f, 12.920000076f, 0.003130805f, -0.054969788f, -0.0f};
1258 return &sRGB_inv;
1259}
1260
1261const skcms_TransferFunction* skcms_Identity_TransferFunction() {
1262 static const skcms_TransferFunction identity = {1,1,0,0,0,0,0};
1263 return &identity;
1264}
1265
1266const uint8_t skcms_252_random_bytes[] = {
1267 8, 179, 128, 204, 253, 38, 134, 184, 68, 102, 32, 138, 99, 39, 169, 215,
1268 119, 26, 3, 223, 95, 239, 52, 132, 114, 74, 81, 234, 97, 116, 244, 205, 30,
1269 154, 173, 12, 51, 159, 122, 153, 61, 226, 236, 178, 229, 55, 181, 220, 191,
1270 194, 160, 126, 168, 82, 131, 18, 180, 245, 163, 22, 246, 69, 235, 252, 57,
1271 108, 14, 6, 152, 240, 255, 171, 242, 20, 227, 177, 238, 96, 85, 16, 211,
1272 70, 200, 149, 155, 146, 127, 145, 100, 151, 109, 19, 165, 208, 195, 164,
1273 137, 254, 182, 248, 64, 201, 45, 209, 5, 147, 207, 210, 113, 162, 83, 225,
1274 9, 31, 15, 231, 115, 37, 58, 53, 24, 49, 197, 56, 120, 172, 48, 21, 214,
1275 129, 111, 11, 50, 187, 196, 34, 60, 103, 71, 144, 47, 203, 77, 80, 232,
1276 140, 222, 250, 206, 166, 247, 139, 249, 221, 72, 106, 27, 199, 117, 54,
1277 219, 135, 118, 40, 79, 41, 251, 46, 93, 212, 92, 233, 148, 28, 121, 63,
1278 123, 158, 105, 59, 29, 42, 143, 23, 0, 107, 176, 87, 104, 183, 156, 193,
1279 189, 90, 188, 65, 190, 17, 198, 7, 186, 161, 1, 124, 78, 125, 170, 133,
1280 174, 218, 67, 157, 75, 101, 89, 217, 62, 33, 141, 228, 25, 35, 91, 230, 4,
1281 2, 13, 73, 86, 167, 237, 84, 243, 44, 185, 66, 130, 110, 150, 142, 216, 88,
1282 112, 36, 224, 136, 202, 76, 94, 98, 175, 213
1283};
1284
1285bool skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile* A, const skcms_ICCProfile* B) {
1286 // Test for exactly equal profiles first.
1287 if (A == B || 0 == memcmp(A,B, sizeof(skcms_ICCProfile))) {
1288 return true;
1289 }
1290
1291 // For now this is the essentially the same strategy we use in test_only.c
1292 // for our skcms_Transform() smoke tests:
1293 // 1) transform A to XYZD50
1294 // 2) transform B to XYZD50
1295 // 3) return true if they're similar enough
1296 // Our current criterion in 3) is maximum 1 bit error per XYZD50 byte.
1297
1298 // skcms_252_random_bytes are 252 of a random shuffle of all possible bytes.
1299 // 252 is evenly divisible by 3 and 4. Only 192, 10, 241, and 43 are missing.
1300
1301 if (A->data_color_space != B->data_color_space) {
1302 return false;
1303 }
1304
1305 // Interpret as RGB_888 if data color space is RGB or GRAY, RGBA_8888 if CMYK.
1306 // TODO: working with RGBA_8888 either way is probably fastest.
1307 skcms_PixelFormat fmt = skcms_PixelFormat_RGB_888;
1308 size_t npixels = 84;
1309 if (A->data_color_space == skcms_Signature_CMYK) {
1310 fmt = skcms_PixelFormat_RGBA_8888;
1311 npixels = 63;
1312 }
1313
1314 // TODO: if A or B is a known profile (skcms_sRGB_profile, skcms_XYZD50_profile),
1315 // use pre-canned results and skip that skcms_Transform() call?
1316 uint8_t dstA[252],
1317 dstB[252];
1318 if (!skcms_Transform(
1319 skcms_252_random_bytes, fmt, skcms_AlphaFormat_Unpremul, A,
1320 dstA, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1321 npixels)) {
1322 return false;
1323 }
1324 if (!skcms_Transform(
1325 skcms_252_random_bytes, fmt, skcms_AlphaFormat_Unpremul, B,
1326 dstB, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1327 npixels)) {
1328 return false;
1329 }
1330
1331 // TODO: make sure this final check has reasonable codegen.
1332 for (size_t i = 0; i < 252; i++) {
1333 if (abs((int)dstA[i] - (int)dstB[i]) > 1) {
1334 return false;
1335 }
1336 }
1337 return true;
1338}
1339
1340bool skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile* profile,
1341 const skcms_TransferFunction* inv_tf) {
1342 if (!profile || !profile->has_trc) {
1343 return false;
1344 }
1345
1346 return skcms_AreApproximateInverses(&profile->trc[0], inv_tf) &&
1347 skcms_AreApproximateInverses(&profile->trc[1], inv_tf) &&
1348 skcms_AreApproximateInverses(&profile->trc[2], inv_tf);
1349}
1350
1351static bool is_zero_to_one(float x) {
1352 return 0 <= x && x <= 1;
1353}
1354
1355typedef struct { float vals[3]; } skcms_Vector3;
1356
1357static skcms_Vector3 mv_mul(const skcms_Matrix3x3* m, const skcms_Vector3* v) {
1358 skcms_Vector3 dst = {{0,0,0}};
1359 for (int row = 0; row < 3; ++row) {
1360 dst.vals[row] = m->vals[row][0] * v->vals[0]
1361 + m->vals[row][1] * v->vals[1]
1362 + m->vals[row][2] * v->vals[2];
1363 }
1364 return dst;
1365}
1366
1367bool skcms_AdaptToXYZD50(float wx, float wy,
1368 skcms_Matrix3x3* toXYZD50) {
1369 if (!is_zero_to_one(wx) || !is_zero_to_one(wy) ||
1370 !toXYZD50) {
1371 return false;
1372 }
1373
1374 // Assumes that Y is 1.0f.
1375 skcms_Vector3 wXYZ = { { wx / wy, 1, (1 - wx - wy) / wy } };
1376
1377 // Now convert toXYZ matrix to toXYZD50.
1378 skcms_Vector3 wXYZD50 = { { 0.96422f, 1.0f, 0.82521f } };
1379
1380 // Calculate the chromatic adaptation matrix. We will use the Bradford method, thus
1381 // the matrices below. The Bradford method is used by Adobe and is widely considered
1382 // to be the best.
1383 skcms_Matrix3x3 xyz_to_lms = {{
1384 { 0.8951f, 0.2664f, -0.1614f },
1385 { -0.7502f, 1.7135f, 0.0367f },
1386 { 0.0389f, -0.0685f, 1.0296f },
1387 }};
1388 skcms_Matrix3x3 lms_to_xyz = {{
1389 { 0.9869929f, -0.1470543f, 0.1599627f },
1390 { 0.4323053f, 0.5183603f, 0.0492912f },
1391 { -0.0085287f, 0.0400428f, 0.9684867f },
1392 }};
1393
1394 skcms_Vector3 srcCone = mv_mul(&xyz_to_lms, &wXYZ);
1395 skcms_Vector3 dstCone = mv_mul(&xyz_to_lms, &wXYZD50);
1396
1397 *toXYZD50 = {{
1398 { dstCone.vals[0] / srcCone.vals[0], 0, 0 },
1399 { 0, dstCone.vals[1] / srcCone.vals[1], 0 },
1400 { 0, 0, dstCone.vals[2] / srcCone.vals[2] },
1401 }};
1402 *toXYZD50 = skcms_Matrix3x3_concat(toXYZD50, &xyz_to_lms);
1403 *toXYZD50 = skcms_Matrix3x3_concat(&lms_to_xyz, toXYZD50);
1404
1405 return true;
1406}
1407
1408bool skcms_PrimariesToXYZD50(float rx, float ry,
1409 float gx, float gy,
1410 float bx, float by,
1411 float wx, float wy,
1412 skcms_Matrix3x3* toXYZD50) {
1413 if (!is_zero_to_one(rx) || !is_zero_to_one(ry) ||
1414 !is_zero_to_one(gx) || !is_zero_to_one(gy) ||
1415 !is_zero_to_one(bx) || !is_zero_to_one(by) ||
1416 !is_zero_to_one(wx) || !is_zero_to_one(wy) ||
1417 !toXYZD50) {
1418 return false;
1419 }
1420
1421 // First, we need to convert xy values (primaries) to XYZ.
1422 skcms_Matrix3x3 primaries = {{
1423 { rx, gx, bx },
1424 { ry, gy, by },
1425 { 1 - rx - ry, 1 - gx - gy, 1 - bx - by },
1426 }};
1427 skcms_Matrix3x3 primaries_inv;
1428 if (!skcms_Matrix3x3_invert(&primaries, &primaries_inv)) {
1429 return false;
1430 }
1431
1432 // Assumes that Y is 1.0f.
1433 skcms_Vector3 wXYZ = { { wx / wy, 1, (1 - wx - wy) / wy } };
1434 skcms_Vector3 XYZ = mv_mul(&primaries_inv, &wXYZ);
1435
1436 skcms_Matrix3x3 toXYZ = {{
1437 { XYZ.vals[0], 0, 0 },
1438 { 0, XYZ.vals[1], 0 },
1439 { 0, 0, XYZ.vals[2] },
1440 }};
1441 toXYZ = skcms_Matrix3x3_concat(&primaries, &toXYZ);
1442
1443 skcms_Matrix3x3 DXtoD50;
1444 if (!skcms_AdaptToXYZD50(wx, wy, &DXtoD50)) {
1445 return false;
1446 }
1447
1448 *toXYZD50 = skcms_Matrix3x3_concat(&DXtoD50, &toXYZ);
1449 return true;
1450}
1451
1452
1453bool skcms_Matrix3x3_invert(const skcms_Matrix3x3* src, skcms_Matrix3x3* dst) {
1454 double a00 = src->vals[0][0],
1455 a01 = src->vals[1][0],
1456 a02 = src->vals[2][0],
1457 a10 = src->vals[0][1],
1458 a11 = src->vals[1][1],
1459 a12 = src->vals[2][1],
1460 a20 = src->vals[0][2],
1461 a21 = src->vals[1][2],
1462 a22 = src->vals[2][2];
1463
1464 double b0 = a00*a11 - a01*a10,
1465 b1 = a00*a12 - a02*a10,
1466 b2 = a01*a12 - a02*a11,
1467 b3 = a20,
1468 b4 = a21,
1469 b5 = a22;
1470
1471 double determinant = b0*b5
1472 - b1*b4
1473 + b2*b3;
1474
1475 if (determinant == 0) {
1476 return false;
1477 }
1478
1479 double invdet = 1.0 / determinant;
1480 if (invdet > +FLT_MAX || invdet < -FLT_MAX || !isfinitef_((float)invdet)) {
1481 return false;
1482 }
1483
1484 b0 *= invdet;
1485 b1 *= invdet;
1486 b2 *= invdet;
1487 b3 *= invdet;
1488 b4 *= invdet;
1489 b5 *= invdet;
1490
1491 dst->vals[0][0] = (float)( a11*b5 - a12*b4 );
1492 dst->vals[1][0] = (float)( a02*b4 - a01*b5 );
1493 dst->vals[2][0] = (float)( + b2 );
1494 dst->vals[0][1] = (float)( a12*b3 - a10*b5 );
1495 dst->vals[1][1] = (float)( a00*b5 - a02*b3 );
1496 dst->vals[2][1] = (float)( - b1 );
1497 dst->vals[0][2] = (float)( a10*b4 - a11*b3 );
1498 dst->vals[1][2] = (float)( a01*b3 - a00*b4 );
1499 dst->vals[2][2] = (float)( + b0 );
1500
1501 for (int r = 0; r < 3; ++r)
1502 for (int c = 0; c < 3; ++c) {
1503 if (!isfinitef_(dst->vals[r][c])) {
1504 return false;
1505 }
1506 }
1507 return true;
1508}
1509
1510skcms_Matrix3x3 skcms_Matrix3x3_concat(const skcms_Matrix3x3* A, const skcms_Matrix3x3* B) {
1511 skcms_Matrix3x3 m = { { { 0,0,0 },{ 0,0,0 },{ 0,0,0 } } };
1512 for (int r = 0; r < 3; r++)
1513 for (int c = 0; c < 3; c++) {
1514 m.vals[r][c] = A->vals[r][0] * B->vals[0][c]
1515 + A->vals[r][1] * B->vals[1][c]
1516 + A->vals[r][2] * B->vals[2][c];
1517 }
1518 return m;
1519}
1520
1521#if defined(__clang__)
1522 [[clang::no_sanitize("float-divide-by-zero")]] // Checked for by classify() on the way out.
1523#endif
1524bool skcms_TransferFunction_invert(const skcms_TransferFunction* src, skcms_TransferFunction* dst) {
1525 TF_PQish pq;
1526 TF_HLGish hlg;
1527 switch (classify(*src, &pq, &hlg)) {
1528 case Bad: return false;
1529 case sRGBish: break; // handled below
1530
1531 case PQish:
1532 *dst = { TFKind_marker(PQish), -pq.A, pq.D, 1.0f/pq.F
1533 , pq.B, -pq.E, 1.0f/pq.C};
1534 return true;
1535
1536 case HLGish:
1537 *dst = { TFKind_marker(HLGinvish), 1.0f/hlg.R, 1.0f/hlg.G
1538 , 1.0f/hlg.a, hlg.b, hlg.c, 0 };
1539 return true;
1540
1541 case HLGinvish:
1542 *dst = { TFKind_marker(HLGish), 1.0f/hlg.R, 1.0f/hlg.G
1543 , 1.0f/hlg.a, hlg.b, hlg.c, 0 };
1544 return true;
1545 }
1546
1547 assert (classify(*src) == sRGBish);
1548
1549 // We're inverting this function, solving for x in terms of y.
1550 // y = (cx + f) x < d
1551 // (ax + b)^g + e x ≥ d
1552 // The inverse of this function can be expressed in the same piecewise form.
1553 skcms_TransferFunction inv = {0,0,0,0,0,0,0};
1554
1555 // We'll start by finding the new threshold inv.d.
1556 // In principle we should be able to find that by solving for y at x=d from either side.
1557 // (If those two d values aren't the same, it's a discontinuous transfer function.)
1558 float d_l = src->c * src->d + src->f,
1559 d_r = powf_(src->a * src->d + src->b, src->g) + src->e;
1560 if (fabsf_(d_l - d_r) > 1/512.0f) {
1561 return false;
1562 }
1563 inv.d = d_l; // TODO(mtklein): better in practice to choose d_r?
1564
1565 // When d=0, the linear section collapses to a point. We leave c,d,f all zero in that case.
1566 if (inv.d > 0) {
1567 // Inverting the linear section is pretty straightfoward:
1568 // y = cx + f
1569 // y - f = cx
1570 // (1/c)y - f/c = x
1571 inv.c = 1.0f/src->c;
1572 inv.f = -src->f/src->c;
1573 }
1574
1575 // The interesting part is inverting the nonlinear section:
1576 // y = (ax + b)^g + e.
1577 // y - e = (ax + b)^g
1578 // (y - e)^1/g = ax + b
1579 // (y - e)^1/g - b = ax
1580 // (1/a)(y - e)^1/g - b/a = x
1581 //
1582 // To make that fit our form, we need to move the (1/a) term inside the exponentiation:
1583 // let k = (1/a)^g
1584 // (1/a)( y - e)^1/g - b/a = x
1585 // (ky - ke)^1/g - b/a = x
1586
1587 float k = powf_(src->a, -src->g); // (1/a)^g == a^-g
1588 inv.g = 1.0f / src->g;
1589 inv.a = k;
1590 inv.b = -k * src->e;
1591 inv.e = -src->b / src->a;
1592
1593 // We need to enforce the same constraints here that we do when fitting a curve,
1594 // a >= 0 and ad+b >= 0. These constraints are checked by classify(), so they're true
1595 // of the source function if we're here.
1596
1597 // Just like when fitting the curve, there's really no way to rescue a < 0.
1598 if (inv.a < 0) {
1599 return false;
1600 }
1601 // On the other hand we can rescue an ad+b that's gone slightly negative here.
1602 if (inv.a * inv.d + inv.b < 0) {
1603 inv.b = -inv.a * inv.d;
1604 }
1605
1606 // That should usually make classify(inv) == sRGBish true, but there are a couple situations
1607 // where we might still fail here, like non-finite parameter values.
1608 if (classify(inv) != sRGBish) {
1609 return false;
1610 }
1611
1612 assert (inv.a >= 0);
1613 assert (inv.a * inv.d + inv.b >= 0);
1614
1615 // Now in principle we're done.
1616 // But to preserve the valuable invariant inv(src(1.0f)) == 1.0f, we'll tweak
1617 // e or f of the inverse, depending on which segment contains src(1.0f).
1618 float s = skcms_TransferFunction_eval(src, 1.0f);
1619 if (!isfinitef_(s)) {
1620 return false;
1621 }
1622
1623 float sign = s < 0 ? -1.0f : 1.0f;
1624 s *= sign;
1625 if (s < inv.d) {
1626 inv.f = 1.0f - sign * inv.c * s;
1627 } else {
1628 inv.e = 1.0f - sign * powf_(inv.a * s + inv.b, inv.g);
1629 }
1630
1631 *dst = inv;
1632 return classify(*dst) == sRGBish;
1633}
1634
1635// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ //
1636
1637// From here below we're approximating an skcms_Curve with an skcms_TransferFunction{g,a,b,c,d,e,f}:
1638//
1639// tf(x) = cx + f x < d
1640// tf(x) = (ax + b)^g + e x ≥ d
1641//
1642// When fitting, we add the additional constraint that both pieces meet at d:
1643//
1644// cd + f = (ad + b)^g + e
1645//
1646// Solving for e and folding it through gives an alternate formulation of the non-linear piece:
1647//
1648// tf(x) = cx + f x < d
1649// tf(x) = (ax + b)^g - (ad + b)^g + cd + f x ≥ d
1650//
1651// Our overall strategy is then:
1652// For a couple tolerances,
1653// - fit_linear(): fit c,d,f iteratively to as many points as our tolerance allows
1654// - invert c,d,f
1655// - fit_nonlinear(): fit g,a,b using Gauss-Newton given those inverted c,d,f
1656// (and by constraint, inverted e) to the inverse of the table.
1657// Return the parameters with least maximum error.
1658//
1659// To run Gauss-Newton to find g,a,b, we'll also need the gradient of the residuals
1660// of round-trip f_inv(x), the inverse of the non-linear piece of f(x).
1661//
1662// let y = Table(x)
1663// r(x) = x - f_inv(y)
1664//
1665// ∂r/∂g = ln(ay + b)*(ay + b)^g
1666// - ln(ad + b)*(ad + b)^g
1667// ∂r/∂a = yg(ay + b)^(g-1)
1668// - dg(ad + b)^(g-1)
1669// ∂r/∂b = g(ay + b)^(g-1)
1670// - g(ad + b)^(g-1)
1671
1672// Return the residual of roundtripping skcms_Curve(x) through f_inv(y) with parameters P,
1673// and fill out the gradient of the residual into dfdP.
1674static float rg_nonlinear(float x,
1675 const skcms_Curve* curve,
1676 const skcms_TransferFunction* tf,
1677 float dfdP[3]) {
1678 const float y = eval_curve(curve, x);
1679
1680 const float g = tf->g, a = tf->a, b = tf->b,
1681 c = tf->c, d = tf->d, f = tf->f;
1682
1683 const float Y = fmaxf_(a*y + b, 0.0f),
1684 D = a*d + b;
1685 assert (D >= 0);
1686
1687 // The gradient.
1688 dfdP[0] = logf_(Y)*powf_(Y, g)
1689 - logf_(D)*powf_(D, g);
1690 dfdP[1] = y*g*powf_(Y, g-1)
1691 - d*g*powf_(D, g-1);
1692 dfdP[2] = g*powf_(Y, g-1)
1693 - g*powf_(D, g-1);
1694
1695 // The residual.
1696 const float f_inv = powf_(Y, g)
1697 - powf_(D, g)
1698 + c*d + f;
1699 return x - f_inv;
1700}
1701
1702static bool gauss_newton_step(const skcms_Curve* curve,
1703 skcms_TransferFunction* tf,
1704 float x0, float dx, int N) {
1705 // We'll sample x from the range [x0,x1] (both inclusive) N times with even spacing.
1706 //
1707 // Let P = [ tf->g, tf->a, tf->b ] (the three terms that we're adjusting).
1708 //
1709 // We want to do P' = P + (Jf^T Jf)^-1 Jf^T r(P),
1710 // where r(P) is the residual vector
1711 // and Jf is the Jacobian matrix of f(), ∂r/∂P.
1712 //
1713 // Let's review the shape of each of these expressions:
1714 // r(P) is [N x 1], a column vector with one entry per value of x tested
1715 // Jf is [N x 3], a matrix with an entry for each (x,P) pair
1716 // Jf^T is [3 x N], the transpose of Jf
1717 //
1718 // Jf^T Jf is [3 x N] * [N x 3] == [3 x 3], a 3x3 matrix,
1719 // and so is its inverse (Jf^T Jf)^-1
1720 // Jf^T r(P) is [3 x N] * [N x 1] == [3 x 1], a column vector with the same shape as P
1721 //
1722 // Our implementation strategy to get to the final ∆P is
1723 // 1) evaluate Jf^T Jf, call that lhs
1724 // 2) evaluate Jf^T r(P), call that rhs
1725 // 3) invert lhs
1726 // 4) multiply inverse lhs by rhs
1727 //
1728 // This is a friendly implementation strategy because we don't have to have any
1729 // buffers that scale with N, and equally nice don't have to perform any matrix
1730 // operations that are variable size.
1731 //
1732 // Other implementation strategies could trade this off, e.g. evaluating the
1733 // pseudoinverse of Jf ( (Jf^T Jf)^-1 Jf^T ) directly, then multiplying that by
1734 // the residuals. That would probably require implementing singular value
1735 // decomposition, and would create a [3 x N] matrix to be multiplied by the
1736 // [N x 1] residual vector, but on the upside I think that'd eliminate the
1737 // possibility of this gauss_newton_step() function ever failing.
1738
1739 // 0) start off with lhs and rhs safely zeroed.
1740 skcms_Matrix3x3 lhs = {{ {0,0,0}, {0,0,0}, {0,0,0} }};
1741 skcms_Vector3 rhs = { {0,0,0} };
1742
1743 // 1,2) evaluate lhs and evaluate rhs
1744 // We want to evaluate Jf only once, but both lhs and rhs involve Jf^T,
1745 // so we'll have to update lhs and rhs at the same time.
1746 for (int i = 0; i < N; i++) {
1747 float x = x0 + i*dx;
1748
1749 float dfdP[3] = {0,0,0};
1750 float resid = rg_nonlinear(x,curve,tf, dfdP);
1751
1752 for (int r = 0; r < 3; r++) {
1753 for (int c = 0; c < 3; c++) {
1754 lhs.vals[r][c] += dfdP[r] * dfdP[c];
1755 }
1756 rhs.vals[r] += dfdP[r] * resid;
1757 }
1758 }
1759
1760 // If any of the 3 P parameters are unused, this matrix will be singular.
1761 // Detect those cases and fix them up to indentity instead, so we can invert.
1762 for (int k = 0; k < 3; k++) {
1763 if (lhs.vals[0][k]==0 && lhs.vals[1][k]==0 && lhs.vals[2][k]==0 &&
1764 lhs.vals[k][0]==0 && lhs.vals[k][1]==0 && lhs.vals[k][2]==0) {
1765 lhs.vals[k][k] = 1;
1766 }
1767 }
1768
1769 // 3) invert lhs
1770 skcms_Matrix3x3 lhs_inv;
1771 if (!skcms_Matrix3x3_invert(&lhs, &lhs_inv)) {
1772 return false;
1773 }
1774
1775 // 4) multiply inverse lhs by rhs
1776 skcms_Vector3 dP = mv_mul(&lhs_inv, &rhs);
1777 tf->g += dP.vals[0];
1778 tf->a += dP.vals[1];
1779 tf->b += dP.vals[2];
1780 return isfinitef_(tf->g) && isfinitef_(tf->a) && isfinitef_(tf->b);
1781}
1782
1783static float max_roundtrip_error_checked(const skcms_Curve* curve,
1784 const skcms_TransferFunction* tf_inv) {
1785 skcms_TransferFunction tf;
1786 if (!skcms_TransferFunction_invert(tf_inv, &tf) || sRGBish != classify(tf)) {
1787 return INFINITY_;
1788 }
1789
1790 skcms_TransferFunction tf_inv_again;
1791 if (!skcms_TransferFunction_invert(&tf, &tf_inv_again)) {
1792 return INFINITY_;
1793 }
1794
1795 return skcms_MaxRoundtripError(curve, &tf_inv_again);
1796}
1797
1798// Fit the points in [L,N) to the non-linear piece of tf, or return false if we can't.
1799static bool fit_nonlinear(const skcms_Curve* curve, int L, int N, skcms_TransferFunction* tf) {
1800 // This enforces a few constraints that are not modeled in gauss_newton_step()'s optimization.
1801 auto fixup_tf = [tf]() {
1802 // a must be non-negative. That ensures the function is monotonically increasing.
1803 // We don't really know how to fix up a if it goes negative.
1804 if (tf->a < 0) {
1805 return false;
1806 }
1807 // ad+b must be non-negative. That ensures we don't end up with complex numbers in powf.
1808 // We feel just barely not uneasy enough to tweak b so ad+b is zero in this case.
1809 if (tf->a * tf->d + tf->b < 0) {
1810 tf->b = -tf->a * tf->d;
1811 }
1812 assert (tf->a >= 0 &&
1813 tf->a * tf->d + tf->b >= 0);
1814
1815 // cd+f must be ~= (ad+b)^g+e. That ensures the function is continuous. We keep e as a free
1816 // parameter so we can guarantee this.
1817 tf->e = tf->c*tf->d + tf->f
1818 - powf_(tf->a*tf->d + tf->b, tf->g);
1819
1820 return true;
1821 };
1822
1823 if (!fixup_tf()) {
1824 return false;
1825 }
1826
1827 // No matter where we start, dx should always represent N even steps from 0 to 1.
1828 const float dx = 1.0f / (N-1);
1829
1830 skcms_TransferFunction best_tf = *tf;
1831 float best_max_error = INFINITY_;
1832
1833 // Need this or several curves get worse... *sigh*
1834 float init_error = max_roundtrip_error_checked(curve, tf);
1835 if (init_error < best_max_error) {
1836 best_max_error = init_error;
1837 best_tf = *tf;
1838 }
1839
1840 // As far as we can tell, 1 Gauss-Newton step won't converge, and 3 steps is no better than 2.
1841 for (int j = 0; j < 8; j++) {
1842 if (!gauss_newton_step(curve, tf, L*dx, dx, N-L) || !fixup_tf()) {
1843 *tf = best_tf;
1844 return isfinitef_(best_max_error);
1845 }
1846
1847 float max_error = max_roundtrip_error_checked(curve, tf);
1848 if (max_error < best_max_error) {
1849 best_max_error = max_error;
1850 best_tf = *tf;
1851 }
1852 }
1853
1854 *tf = best_tf;
1855 return isfinitef_(best_max_error);
1856}
1857
1858bool skcms_ApproximateCurve(const skcms_Curve* curve,
1859 skcms_TransferFunction* approx,
1860 float* max_error) {
1861 if (!curve || !approx || !max_error) {
1862 return false;
1863 }
1864
1865 if (curve->table_entries == 0) {
1866 // No point approximating an skcms_TransferFunction with an skcms_TransferFunction!
1867 return false;
1868 }
1869
1870 if (curve->table_entries == 1 || curve->table_entries > (uint32_t)INT_MAX) {
1871 // We need at least two points, and must put some reasonable cap on the maximum number.
1872 return false;
1873 }
1874
1875 int N = (int)curve->table_entries;
1876 const float dx = 1.0f / (N - 1);
1877
1878 *max_error = INFINITY_;
1879 const float kTolerances[] = { 1.5f / 65535.0f, 1.0f / 512.0f };
1880 for (int t = 0; t < ARRAY_COUNT(kTolerances); t++) {
1881 skcms_TransferFunction tf,
1882 tf_inv;
1883
1884 // It's problematic to fit curves with non-zero f, so always force it to zero explicitly.
1885 tf.f = 0.0f;
1886 int L = fit_linear(curve, N, kTolerances[t], &tf.c, &tf.d);
1887
1888 if (L == N) {
1889 // If the entire data set was linear, move the coefficients to the nonlinear portion
1890 // with G == 1. This lets use a canonical representation with d == 0.
1891 tf.g = 1;
1892 tf.a = tf.c;
1893 tf.b = tf.f;
1894 tf.c = tf.d = tf.e = tf.f = 0;
1895 } else if (L == N - 1) {
1896 // Degenerate case with only two points in the nonlinear segment. Solve directly.
1897 tf.g = 1;
1898 tf.a = (eval_curve(curve, (N-1)*dx) -
1899 eval_curve(curve, (N-2)*dx))
1900 / dx;
1901 tf.b = eval_curve(curve, (N-2)*dx)
1902 - tf.a * (N-2)*dx;
1903 tf.e = 0;
1904 } else {
1905 // Start by guessing a gamma-only curve through the midpoint.
1906 int mid = (L + N) / 2;
1907 float mid_x = mid / (N - 1.0f);
1908 float mid_y = eval_curve(curve, mid_x);
1909 tf.g = log2f_(mid_y) / log2f_(mid_x);
1910 tf.a = 1;
1911 tf.b = 0;
1912 tf.e = tf.c*tf.d + tf.f
1913 - powf_(tf.a*tf.d + tf.b, tf.g);
1914
1915
1916 if (!skcms_TransferFunction_invert(&tf, &tf_inv) ||
1917 !fit_nonlinear(curve, L,N, &tf_inv)) {
1918 continue;
1919 }
1920
1921 // We fit tf_inv, so calculate tf to keep in sync.
1922 // fit_nonlinear() should guarantee invertibility.
1923 if (!skcms_TransferFunction_invert(&tf_inv, &tf)) {
1924 assert(false);
1925 continue;
1926 }
1927 }
1928
1929 // We'd better have a sane, sRGB-ish TF by now.
1930 // Other non-Bad TFs would be fine, but we know we've only ever tried to fit sRGBish;
1931 // anything else is just some accident of math and the way we pun tf.g as a type flag.
1932 // fit_nonlinear() should guarantee this, but the special cases may fail this test.
1933 if (sRGBish != classify(tf)) {
1934 continue;
1935 }
1936
1937 // We find our error by roundtripping the table through tf_inv.
1938 //
1939 // (The most likely use case for this approximation is to be inverted and
1940 // used as the transfer function for a destination color space.)
1941 //
1942 // We've kept tf and tf_inv in sync above, but we can't guarantee that tf is
1943 // invertible, so re-verify that here (and use the new inverse for testing).
1944 // fit_nonlinear() should guarantee this, but the special cases that don't use
1945 // it may fail this test.
1946 if (!skcms_TransferFunction_invert(&tf, &tf_inv)) {
1947 continue;
1948 }
1949
1950 float err = skcms_MaxRoundtripError(curve, &tf_inv);
1951 if (*max_error > err) {
1952 *max_error = err;
1953 *approx = tf;
1954 }
1955 }
1956 return isfinitef_(*max_error);
1957}
1958
1959// ~~~~ Impl. of skcms_Transform() ~~~~
1960
1961typedef enum {
1962 Op_load_a8,
1963 Op_load_g8,
1964 Op_load_8888_palette8,
1965 Op_load_4444,
1966 Op_load_565,
1967 Op_load_888,
1968 Op_load_8888,
1969 Op_load_1010102,
1970 Op_load_161616LE,
1971 Op_load_16161616LE,
1972 Op_load_161616BE,
1973 Op_load_16161616BE,
1974 Op_load_hhh,
1975 Op_load_hhhh,
1976 Op_load_fff,
1977 Op_load_ffff,
1978
1979 Op_swap_rb,
1980 Op_clamp,
1981 Op_invert,
1982 Op_force_opaque,
1983 Op_premul,
1984 Op_unpremul,
1985 Op_matrix_3x3,
1986 Op_matrix_3x4,
1987 Op_lab_to_xyz,
1988
1989 Op_tf_r,
1990 Op_tf_g,
1991 Op_tf_b,
1992 Op_tf_a,
1993
1994 Op_pq_r,
1995 Op_pq_g,
1996 Op_pq_b,
1997 Op_pq_a,
1998
1999 Op_hlg_r,
2000 Op_hlg_g,
2001 Op_hlg_b,
2002 Op_hlg_a,
2003
2004 Op_hlginv_r,
2005 Op_hlginv_g,
2006 Op_hlginv_b,
2007 Op_hlginv_a,
2008
2009 Op_table_r,
2010 Op_table_g,
2011 Op_table_b,
2012 Op_table_a,
2013
2014 Op_clut,
2015
2016 Op_store_a8,
2017 Op_store_g8,
2018 Op_store_4444,
2019 Op_store_565,
2020 Op_store_888,
2021 Op_store_8888,
2022 Op_store_1010102,
2023 Op_store_161616LE,
2024 Op_store_16161616LE,
2025 Op_store_161616BE,
2026 Op_store_16161616BE,
2027 Op_store_hhh,
2028 Op_store_hhhh,
2029 Op_store_fff,
2030 Op_store_ffff,
2031} Op;
2032
2033#if defined(__clang__)
2034 template <int N, typename T> using Vec = T __attribute__((ext_vector_type(N)));
2035#elif defined(__GNUC__)
2036 // For some reason GCC accepts this nonsense, but not the more straightforward version,
2037 // template <int N, typename T> using Vec = T __attribute__((vector_size(N*sizeof(T))));
2038 template <int N, typename T>
2039 struct VecHelper { typedef T __attribute__((vector_size(N*sizeof(T)))) V; };
2040
2041 template <int N, typename T> using Vec = typename VecHelper<N,T>::V;
2042#endif
2043
2044// First, instantiate our default exec_ops() implementation using the default compiliation target.
2045
2046namespace baseline {
2047#if defined(SKCMS_PORTABLE) || !(defined(__clang__) || defined(__GNUC__)) \
2048 || (defined(__EMSCRIPTEN_major__) && !defined(__wasm_simd128__))
2049 #define N 1
2050 template <typename T> using V = T;
2051 using Color = float;
2052#elif defined(__AVX512F__)
2053 #define N 16
2054 template <typename T> using V = Vec<N,T>;
2055 using Color = float;
2056#elif defined(__AVX__)
2057 #define N 8
2058 template <typename T> using V = Vec<N,T>;
2059 using Color = float;
2060#elif defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(SKCMS_OPT_INTO_NEON_FP16)
2061 #define N 8
2062 template <typename T> using V = Vec<N,T>;
2063 using Color = _Float16;
2064#else
2065 #define N 4
2066 template <typename T> using V = Vec<N,T>;
2067 using Color = float;
2068#endif
2069
2070 #include "src/Transform_inl.h"
2071 #undef N
2072}
2073
2074// Now, instantiate any other versions of run_program() we may want for runtime detection.
2075#if !defined(SKCMS_PORTABLE) && \
2076 !defined(SKCMS_NO_RUNTIME_CPU_DETECTION) && \
2077 (( defined(__clang__) && __clang_major__ >= 5) || \
2078 (!defined(__clang__) && defined(__GNUC__))) \
2079 && defined(__x86_64__)
2080
2081 #if !defined(__AVX2__)
2082 #if defined(__clang__)
2083 #pragma clang attribute push(__attribute__((target("avx2,f16c"))), apply_to=function)
2084 #elif defined(__GNUC__)
2085 #pragma GCC push_options
2086 #pragma GCC target("avx2,f16c")
2087 #endif
2088
2089 namespace hsw {
2090 #define USING_AVX
2091 #define USING_AVX_F16C
2092 #define USING_AVX2
2093 #define N 8
2094 template <typename T> using V = Vec<N,T>;
2095 using Color = float;
2096
2097 #include "src/Transform_inl.h"
2098
2099 // src/Transform_inl.h will undefine USING_* for us.
2100 #undef N
2101 }
2102
2103 #if defined(__clang__)
2104 #pragma clang attribute pop
2105 #elif defined(__GNUC__)
2106 #pragma GCC pop_options
2107 #endif
2108
2109 #define TEST_FOR_HSW
2110 #endif
2111
2112 #if !defined(__AVX512F__)
2113 #if defined(__clang__)
2114 #pragma clang attribute push(__attribute__((target("avx512f,avx512dq,avx512cd,avx512bw,avx512vl"))), apply_to=function)
2115 #elif defined(__GNUC__)
2116 #pragma GCC push_options
2117 #pragma GCC target("avx512f,avx512dq,avx512cd,avx512bw,avx512vl")
2118 #endif
2119
2120 namespace skx {
2121 #define USING_AVX512F
2122 #define N 16
2123 template <typename T> using V = Vec<N,T>;
2124 using Color = float;
2125
2126 #include "src/Transform_inl.h"
2127
2128 // src/Transform_inl.h will undefine USING_* for us.
2129 #undef N
2130 }
2131
2132 #if defined(__clang__)
2133 #pragma clang attribute pop
2134 #elif defined(__GNUC__)
2135 #pragma GCC pop_options
2136 #endif
2137
2138 #define TEST_FOR_SKX
2139 #endif
2140
2141 #if defined(TEST_FOR_HSW) || defined(TEST_FOR_SKX)
2142 enum class CpuType { None, HSW, SKX };
2143 static CpuType cpu_type() {
2144 static const CpuType type = []{
2145 // See http://www.sandpile.org/x86/cpuid.htm
2146
2147 // First, a basic cpuid(1) lets us check prerequisites for HSW, SKX.
2148 uint32_t eax, ebx, ecx, edx;
2149 __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
2150 : "0"(1), "2"(0));
2151 if ((edx & (1u<<25)) && // SSE
2152 (edx & (1u<<26)) && // SSE2
2153 (ecx & (1u<< 0)) && // SSE3
2154 (ecx & (1u<< 9)) && // SSSE3
2155 (ecx & (1u<<12)) && // FMA (N.B. not used, avoided even)
2156 (ecx & (1u<<19)) && // SSE4.1
2157 (ecx & (1u<<20)) && // SSE4.2
2158 (ecx & (1u<<26)) && // XSAVE
2159 (ecx & (1u<<27)) && // OSXSAVE
2160 (ecx & (1u<<28)) && // AVX
2161 (ecx & (1u<<29))) { // F16C
2162
2163 // Call cpuid(7) to check for AVX2 and AVX-512 bits.
2164 __asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
2165 : "0"(7), "2"(0));
2166 // eax from xgetbv(0) will tell us whether XMM, YMM, and ZMM state is saved.
2167 uint32_t xcr0, dont_need_edx;
2168 __asm__ __volatile__("xgetbv" : "=a"(xcr0), "=d"(dont_need_edx) : "c"(0));
2169
2170 if ((xcr0 & (1u<<1)) && // XMM register state saved?
2171 (xcr0 & (1u<<2)) && // YMM register state saved?
2172 (ebx & (1u<<5))) { // AVX2
2173 // At this point we're at least HSW. Continue checking for SKX.
2174 if ((xcr0 & (1u<< 5)) && // Opmasks state saved?
2175 (xcr0 & (1u<< 6)) && // First 16 ZMM registers saved?
2176 (xcr0 & (1u<< 7)) && // High 16 ZMM registers saved?
2177 (ebx & (1u<<16)) && // AVX512F
2178 (ebx & (1u<<17)) && // AVX512DQ
2179 (ebx & (1u<<28)) && // AVX512CD
2180 (ebx & (1u<<30)) && // AVX512BW
2181 (ebx & (1u<<31))) { // AVX512VL
2182 return CpuType::SKX;
2183 }
2184 return CpuType::HSW;
2185 }
2186 }
2187 return CpuType::None;
2188 }();
2189 return type;
2190 }
2191 #endif
2192
2193#endif
2194
2195typedef struct {
2196 Op op;
2197 const void* arg;
2198} OpAndArg;
2199
2200static OpAndArg select_curve_op(const skcms_Curve* curve, int channel) {
2201 static const struct { Op sRGBish, PQish, HLGish, HLGinvish, table; } ops[] = {
2202 { Op_tf_r, Op_pq_r, Op_hlg_r, Op_hlginv_r, Op_table_r },
2203 { Op_tf_g, Op_pq_g, Op_hlg_g, Op_hlginv_g, Op_table_g },
2204 { Op_tf_b, Op_pq_b, Op_hlg_b, Op_hlginv_b, Op_table_b },
2205 { Op_tf_a, Op_pq_a, Op_hlg_a, Op_hlginv_a, Op_table_a },
2206 };
2207 const auto& op = ops[channel];
2208
2209 if (curve->table_entries == 0) {
2210 const OpAndArg noop = { Op_load_a8/*doesn't matter*/, nullptr };
2211
2212 const skcms_TransferFunction& tf = curve->parametric;
2213
2214 if (tf.g == 1 && tf.a == 1 &&
2215 tf.b == 0 && tf.c == 0 && tf.d == 0 && tf.e == 0 && tf.f == 0) {
2216 return noop;
2217 }
2218
2219 switch (classify(tf)) {
2220 case Bad: return noop;
2221 case sRGBish: return OpAndArg{op.sRGBish, &tf};
2222 case PQish: return OpAndArg{op.PQish, &tf};
2223 case HLGish: return OpAndArg{op.HLGish, &tf};
2224 case HLGinvish: return OpAndArg{op.HLGinvish, &tf};
2225 }
2226 }
2227 return OpAndArg{op.table, curve};
2228}
2229
2230static size_t bytes_per_pixel(skcms_PixelFormat fmt) {
2231 switch (fmt >> 1) { // ignore rgb/bgr
2232 case skcms_PixelFormat_A_8 >> 1: return 1;
2233 case skcms_PixelFormat_G_8 >> 1: return 1;
2234 case skcms_PixelFormat_RGBA_8888_Palette8 >> 1: return 1;
2235 case skcms_PixelFormat_ABGR_4444 >> 1: return 2;
2236 case skcms_PixelFormat_RGB_565 >> 1: return 2;
2237 case skcms_PixelFormat_RGB_888 >> 1: return 3;
2238 case skcms_PixelFormat_RGBA_8888 >> 1: return 4;
2239 case skcms_PixelFormat_RGBA_8888_sRGB >> 1: return 4;
2240 case skcms_PixelFormat_RGBA_1010102 >> 1: return 4;
2241 case skcms_PixelFormat_RGB_161616LE >> 1: return 6;
2242 case skcms_PixelFormat_RGBA_16161616LE >> 1: return 8;
2243 case skcms_PixelFormat_RGB_161616BE >> 1: return 6;
2244 case skcms_PixelFormat_RGBA_16161616BE >> 1: return 8;
2245 case skcms_PixelFormat_RGB_hhh_Norm >> 1: return 6;
2246 case skcms_PixelFormat_RGBA_hhhh_Norm >> 1: return 8;
2247 case skcms_PixelFormat_RGB_hhh >> 1: return 6;
2248 case skcms_PixelFormat_RGBA_hhhh >> 1: return 8;
2249 case skcms_PixelFormat_RGB_fff >> 1: return 12;
2250 case skcms_PixelFormat_RGBA_ffff >> 1: return 16;
2251 }
2252 assert(false);
2253 return 0;
2254}
2255
2256static bool prep_for_destination(const skcms_ICCProfile* profile,
2257 skcms_Matrix3x3* fromXYZD50,
2258 skcms_TransferFunction* invR,
2259 skcms_TransferFunction* invG,
2260 skcms_TransferFunction* invB) {
2261 // We only support destinations with parametric transfer functions
2262 // and with gamuts that can be transformed from XYZD50.
2263 return profile->has_trc
2264 && profile->has_toXYZD50
2265 && profile->trc[0].table_entries == 0
2266 && profile->trc[1].table_entries == 0
2267 && profile->trc[2].table_entries == 0
2268 && skcms_TransferFunction_invert(&profile->trc[0].parametric, invR)
2269 && skcms_TransferFunction_invert(&profile->trc[1].parametric, invG)
2270 && skcms_TransferFunction_invert(&profile->trc[2].parametric, invB)
2271 && skcms_Matrix3x3_invert(&profile->toXYZD50, fromXYZD50);
2272}
2273
2274bool skcms_Transform(const void* src,
2275 skcms_PixelFormat srcFmt,
2276 skcms_AlphaFormat srcAlpha,
2277 const skcms_ICCProfile* srcProfile,
2278 void* dst,
2279 skcms_PixelFormat dstFmt,
2280 skcms_AlphaFormat dstAlpha,
2281 const skcms_ICCProfile* dstProfile,
2282 size_t npixels) {
2283 return skcms_TransformWithPalette(src, srcFmt, srcAlpha, srcProfile,
2284 dst, dstFmt, dstAlpha, dstProfile,
2285 npixels, nullptr);
2286}
2287
2288bool skcms_TransformWithPalette(const void* src,
2289 skcms_PixelFormat srcFmt,
2290 skcms_AlphaFormat srcAlpha,
2291 const skcms_ICCProfile* srcProfile,
2292 void* dst,
2293 skcms_PixelFormat dstFmt,
2294 skcms_AlphaFormat dstAlpha,
2295 const skcms_ICCProfile* dstProfile,
2296 size_t nz,
2297 const void* palette) {
2298 const size_t dst_bpp = bytes_per_pixel(dstFmt),
2299 src_bpp = bytes_per_pixel(srcFmt);
2300 // Let's just refuse if the request is absurdly big.
2301 if (nz * dst_bpp > INT_MAX || nz * src_bpp > INT_MAX) {
2302 return false;
2303 }
2304 int n = (int)nz;
2305
2306 // Null profiles default to sRGB. Passing null for both is handy when doing format conversion.
2307 if (!srcProfile) {
2308 srcProfile = skcms_sRGB_profile();
2309 }
2310 if (!dstProfile) {
2311 dstProfile = skcms_sRGB_profile();
2312 }
2313
2314 // We can't transform in place unless the PixelFormats are the same size.
2315 if (dst == src && dst_bpp != src_bpp) {
2316 return false;
2317 }
2318 // TODO: more careful alias rejection (like, dst == src + 1)?
2319
2320 if (needs_palette(srcFmt) && !palette) {
2321 return false;
2322 }
2323
2324 Op program [32];
2325 const void* arguments[32];
2326
2327 Op* ops = program;
2328 const void** args = arguments;
2329
2330 // These are always parametric curves of some sort.
2331 skcms_Curve dst_curves[3];
2332 dst_curves[0].table_entries =
2333 dst_curves[1].table_entries =
2334 dst_curves[2].table_entries = 0;
2335
2336 skcms_Matrix3x3 from_xyz;
2337
2338 switch (srcFmt >> 1) {
2339 default: return false;
2340 case skcms_PixelFormat_A_8 >> 1: *ops++ = Op_load_a8; break;
2341 case skcms_PixelFormat_G_8 >> 1: *ops++ = Op_load_g8; break;
2342 case skcms_PixelFormat_ABGR_4444 >> 1: *ops++ = Op_load_4444; break;
2343 case skcms_PixelFormat_RGB_565 >> 1: *ops++ = Op_load_565; break;
2344 case skcms_PixelFormat_RGB_888 >> 1: *ops++ = Op_load_888; break;
2345 case skcms_PixelFormat_RGBA_8888 >> 1: *ops++ = Op_load_8888; break;
2346 case skcms_PixelFormat_RGBA_1010102 >> 1: *ops++ = Op_load_1010102; break;
2347 case skcms_PixelFormat_RGB_161616LE >> 1: *ops++ = Op_load_161616LE; break;
2348 case skcms_PixelFormat_RGBA_16161616LE >> 1: *ops++ = Op_load_16161616LE; break;
2349 case skcms_PixelFormat_RGB_161616BE >> 1: *ops++ = Op_load_161616BE; break;
2350 case skcms_PixelFormat_RGBA_16161616BE >> 1: *ops++ = Op_load_16161616BE; break;
2351 case skcms_PixelFormat_RGB_hhh_Norm >> 1: *ops++ = Op_load_hhh; break;
2352 case skcms_PixelFormat_RGBA_hhhh_Norm >> 1: *ops++ = Op_load_hhhh; break;
2353 case skcms_PixelFormat_RGB_hhh >> 1: *ops++ = Op_load_hhh; break;
2354 case skcms_PixelFormat_RGBA_hhhh >> 1: *ops++ = Op_load_hhhh; break;
2355 case skcms_PixelFormat_RGB_fff >> 1: *ops++ = Op_load_fff; break;
2356 case skcms_PixelFormat_RGBA_ffff >> 1: *ops++ = Op_load_ffff; break;
2357
2358 case skcms_PixelFormat_RGBA_8888_Palette8 >> 1: *ops++ = Op_load_8888_palette8;
2359 *args++ = palette;
2360 break;
2361 case skcms_PixelFormat_RGBA_8888_sRGB >> 1:
2362 *ops++ = Op_load_8888;
2363 *ops++ = Op_tf_r; *args++ = skcms_sRGB_TransferFunction();
2364 *ops++ = Op_tf_g; *args++ = skcms_sRGB_TransferFunction();
2365 *ops++ = Op_tf_b; *args++ = skcms_sRGB_TransferFunction();
2366 break;
2367 }
2368 if (srcFmt == skcms_PixelFormat_RGB_hhh_Norm ||
2369 srcFmt == skcms_PixelFormat_RGBA_hhhh_Norm) {
2370 *ops++ = Op_clamp;
2371 }
2372 if (srcFmt & 1) {
2373 *ops++ = Op_swap_rb;
2374 }
2375 skcms_ICCProfile gray_dst_profile;
2376 if ((dstFmt >> 1) == (skcms_PixelFormat_G_8 >> 1)) {
2377 // When transforming to gray, stop at XYZ (by setting toXYZ to identity), then transform
2378 // luminance (Y) by the destination transfer function.
2379 gray_dst_profile = *dstProfile;
2380 skcms_SetXYZD50(&gray_dst_profile, &skcms_XYZD50_profile()->toXYZD50);
2381 dstProfile = &gray_dst_profile;
2382 }
2383
2384 if (srcProfile->data_color_space == skcms_Signature_CMYK) {
2385 // Photoshop creates CMYK images as inverse CMYK.
2386 // These happen to be the only ones we've _ever_ seen.
2387 *ops++ = Op_invert;
2388 // With CMYK, ignore the alpha type, to avoid changing K or conflating CMY with K.
2389 srcAlpha = skcms_AlphaFormat_Unpremul;
2390 }
2391
2392 if (srcAlpha == skcms_AlphaFormat_Opaque) {
2393 *ops++ = Op_force_opaque;
2394 } else if (srcAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2395 *ops++ = Op_unpremul;
2396 }
2397
2398 if (dstProfile != srcProfile) {
2399
2400 if (!prep_for_destination(dstProfile,
2401 &from_xyz,
2402 &dst_curves[0].parametric,
2403 &dst_curves[1].parametric,
2404 &dst_curves[2].parametric)) {
2405 return false;
2406 }
2407
2408 if (srcProfile->has_A2B) {
2409 if (srcProfile->A2B.input_channels) {
2410 for (int i = 0; i < (int)srcProfile->A2B.input_channels; i++) {
2411 OpAndArg oa = select_curve_op(&srcProfile->A2B.input_curves[i], i);
2412 if (oa.arg) {
2413 *ops++ = oa.op;
2414 *args++ = oa.arg;
2415 }
2416 }
2417 *ops++ = Op_clamp;
2418 *ops++ = Op_clut;
2419 *args++ = &srcProfile->A2B;
2420 }
2421
2422 if (srcProfile->A2B.matrix_channels == 3) {
2423 for (int i = 0; i < 3; i++) {
2424 OpAndArg oa = select_curve_op(&srcProfile->A2B.matrix_curves[i], i);
2425 if (oa.arg) {
2426 *ops++ = oa.op;
2427 *args++ = oa.arg;
2428 }
2429 }
2430
2431 static const skcms_Matrix3x4 I = {{
2432 {1,0,0,0},
2433 {0,1,0,0},
2434 {0,0,1,0},
2435 }};
2436 if (0 != memcmp(&I, &srcProfile->A2B.matrix, sizeof(I))) {
2437 *ops++ = Op_matrix_3x4;
2438 *args++ = &srcProfile->A2B.matrix;
2439 }
2440 }
2441
2442 if (srcProfile->A2B.output_channels == 3) {
2443 for (int i = 0; i < 3; i++) {
2444 OpAndArg oa = select_curve_op(&srcProfile->A2B.output_curves[i], i);
2445 if (oa.arg) {
2446 *ops++ = oa.op;
2447 *args++ = oa.arg;
2448 }
2449 }
2450 }
2451
2452 if (srcProfile->pcs == skcms_Signature_Lab) {
2453 *ops++ = Op_lab_to_xyz;
2454 }
2455
2456 } else if (srcProfile->has_trc && srcProfile->has_toXYZD50) {
2457 for (int i = 0; i < 3; i++) {
2458 OpAndArg oa = select_curve_op(&srcProfile->trc[i], i);
2459 if (oa.arg) {
2460 *ops++ = oa.op;
2461 *args++ = oa.arg;
2462 }
2463 }
2464 } else {
2465 return false;
2466 }
2467
2468 // A2B sources should already be in XYZD50 at this point.
2469 // Others still need to be transformed using their toXYZD50 matrix.
2470 // N.B. There are profiles that contain both A2B tags and toXYZD50 matrices.
2471 // If we use the A2B tags, we need to ignore the XYZD50 matrix entirely.
2472 assert (srcProfile->has_A2B || srcProfile->has_toXYZD50);
2473 static const skcms_Matrix3x3 I = {{
2474 { 1.0f, 0.0f, 0.0f },
2475 { 0.0f, 1.0f, 0.0f },
2476 { 0.0f, 0.0f, 1.0f },
2477 }};
2478 const skcms_Matrix3x3* to_xyz = srcProfile->has_A2B ? &I : &srcProfile->toXYZD50;
2479
2480 // There's a chance the source and destination gamuts are identical,
2481 // in which case we can skip the gamut transform.
2482 if (0 != memcmp(&dstProfile->toXYZD50, to_xyz, sizeof(skcms_Matrix3x3))) {
2483 // Concat the entire gamut transform into from_xyz,
2484 // now slightly misnamed but it's a handy spot to stash the result.
2485 from_xyz = skcms_Matrix3x3_concat(&from_xyz, to_xyz);
2486 *ops++ = Op_matrix_3x3;
2487 *args++ = &from_xyz;
2488 }
2489
2490 // Encode back to dst RGB using its parametric transfer functions.
2491 for (int i = 0; i < 3; i++) {
2492 OpAndArg oa = select_curve_op(dst_curves+i, i);
2493 if (oa.arg) {
2494 assert (oa.op != Op_table_r &&
2495 oa.op != Op_table_g &&
2496 oa.op != Op_table_b &&
2497 oa.op != Op_table_a);
2498 *ops++ = oa.op;
2499 *args++ = oa.arg;
2500 }
2501 }
2502 }
2503
2504 // Clamp here before premul to make sure we're clamping to normalized values _and_ gamut,
2505 // not just to values that fit in [0,1].
2506 //
2507 // E.g. r = 1.1, a = 0.5 would fit fine in fixed point after premul (ra=0.55,a=0.5),
2508 // but would be carrying r > 1, which is really unexpected for downstream consumers.
2509 if (dstFmt < skcms_PixelFormat_RGB_hhh) {
2510 *ops++ = Op_clamp;
2511 }
2512 if (dstAlpha == skcms_AlphaFormat_Opaque) {
2513 *ops++ = Op_force_opaque;
2514 } else if (dstAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2515 *ops++ = Op_premul;
2516 }
2517 if (dstFmt & 1) {
2518 *ops++ = Op_swap_rb;
2519 }
2520 switch (dstFmt >> 1) {
2521 default: return false;
2522 case skcms_PixelFormat_A_8 >> 1: *ops++ = Op_store_a8; break;
2523 case skcms_PixelFormat_G_8 >> 1: *ops++ = Op_store_g8; break;
2524 case skcms_PixelFormat_ABGR_4444 >> 1: *ops++ = Op_store_4444; break;
2525 case skcms_PixelFormat_RGB_565 >> 1: *ops++ = Op_store_565; break;
2526 case skcms_PixelFormat_RGB_888 >> 1: *ops++ = Op_store_888; break;
2527 case skcms_PixelFormat_RGBA_8888 >> 1: *ops++ = Op_store_8888; break;
2528 case skcms_PixelFormat_RGBA_1010102 >> 1: *ops++ = Op_store_1010102; break;
2529 case skcms_PixelFormat_RGB_161616LE >> 1: *ops++ = Op_store_161616LE; break;
2530 case skcms_PixelFormat_RGBA_16161616LE >> 1: *ops++ = Op_store_16161616LE; break;
2531 case skcms_PixelFormat_RGB_161616BE >> 1: *ops++ = Op_store_161616BE; break;
2532 case skcms_PixelFormat_RGBA_16161616BE >> 1: *ops++ = Op_store_16161616BE; break;
2533 case skcms_PixelFormat_RGB_hhh_Norm >> 1: *ops++ = Op_store_hhh; break;
2534 case skcms_PixelFormat_RGBA_hhhh_Norm >> 1: *ops++ = Op_store_hhhh; break;
2535 case skcms_PixelFormat_RGB_hhh >> 1: *ops++ = Op_store_hhh; break;
2536 case skcms_PixelFormat_RGBA_hhhh >> 1: *ops++ = Op_store_hhhh; break;
2537 case skcms_PixelFormat_RGB_fff >> 1: *ops++ = Op_store_fff; break;
2538 case skcms_PixelFormat_RGBA_ffff >> 1: *ops++ = Op_store_ffff; break;
2539
2540 case skcms_PixelFormat_RGBA_8888_sRGB >> 1:
2541 *ops++ = Op_tf_r; *args++ = skcms_sRGB_Inverse_TransferFunction();
2542 *ops++ = Op_tf_g; *args++ = skcms_sRGB_Inverse_TransferFunction();
2543 *ops++ = Op_tf_b; *args++ = skcms_sRGB_Inverse_TransferFunction();
2544 *ops++ = Op_store_8888;
2545 break;
2546 }
2547
2548 auto run = baseline::run_program;
2549#if defined(TEST_FOR_HSW)
2550 switch (cpu_type()) {
2551 case CpuType::None: break;
2552 case CpuType::HSW: run = hsw::run_program; break;
2553 case CpuType::SKX: run = hsw::run_program; break;
2554 }
2555#endif
2556#if defined(TEST_FOR_SKX)
2557 switch (cpu_type()) {
2558 case CpuType::None: break;
2559 case CpuType::HSW: break;
2560 case CpuType::SKX: run = skx::run_program; break;
2561 }
2562#endif
2563 run(program, arguments, (const char*)src, (char*)dst, n, src_bpp,dst_bpp);
2564 return true;
2565}
2566
2567static void assert_usable_as_destination(const skcms_ICCProfile* profile) {
2568#if defined(NDEBUG)
2569 (void)profile;
2570#else
2571 skcms_Matrix3x3 fromXYZD50;
2572 skcms_TransferFunction invR, invG, invB;
2573 assert(prep_for_destination(profile, &fromXYZD50, &invR, &invG, &invB));
2574#endif
2575}
2576
2577bool skcms_MakeUsableAsDestination(skcms_ICCProfile* profile) {
2578 skcms_Matrix3x3 fromXYZD50;
2579 if (!profile->has_trc || !profile->has_toXYZD50
2580 || !skcms_Matrix3x3_invert(&profile->toXYZD50, &fromXYZD50)) {
2581 return false;
2582 }
2583
2584 skcms_TransferFunction tf[3];
2585 for (int i = 0; i < 3; i++) {
2586 skcms_TransferFunction inv;
2587 if (profile->trc[i].table_entries == 0
2588 && skcms_TransferFunction_invert(&profile->trc[i].parametric, &inv)) {
2589 tf[i] = profile->trc[i].parametric;
2590 continue;
2591 }
2592
2593 float max_error;
2594 // Parametric curves from skcms_ApproximateCurve() are guaranteed to be invertible.
2595 if (!skcms_ApproximateCurve(&profile->trc[i], &tf[i], &max_error)) {
2596 return false;
2597 }
2598 }
2599
2600 for (int i = 0; i < 3; ++i) {
2601 profile->trc[i].table_entries = 0;
2602 profile->trc[i].parametric = tf[i];
2603 }
2604
2605 assert_usable_as_destination(profile);
2606 return true;
2607}
2608
2609bool skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile* profile) {
2610 // Operate on a copy of profile, so we can choose the best TF for the original curves
2611 skcms_ICCProfile result = *profile;
2612 if (!skcms_MakeUsableAsDestination(&result)) {
2613 return false;
2614 }
2615
2616 int best_tf = 0;
2617 float min_max_error = INFINITY_;
2618 for (int i = 0; i < 3; i++) {
2619 skcms_TransferFunction inv;
2620 if (!skcms_TransferFunction_invert(&result.trc[i].parametric, &inv)) {
2621 return false;
2622 }
2623
2624 float err = 0;
2625 for (int j = 0; j < 3; ++j) {
2626 err = fmaxf_(err, skcms_MaxRoundtripError(&profile->trc[j], &inv));
2627 }
2628 if (min_max_error > err) {
2629 min_max_error = err;
2630 best_tf = i;
2631 }
2632 }
2633
2634 for (int i = 0; i < 3; i++) {
2635 result.trc[i].parametric = result.trc[best_tf].parametric;
2636 }
2637
2638 *profile = result;
2639 assert_usable_as_destination(profile);
2640 return true;
2641}
2642