1 | /* |
2 | * Copyright 2018 Google Inc. |
3 | * |
4 | * Use of this source code is governed by a BSD-style license that can be |
5 | * found in the LICENSE file. |
6 | */ |
7 | |
8 | #include "skcms.h" |
9 | #include "skcms_internal.h" |
10 | #include <assert.h> |
11 | #include <float.h> |
12 | #include <limits.h> |
13 | #include <stdlib.h> |
14 | #include <string.h> |
15 | |
16 | #if defined(__ARM_NEON) |
17 | #include <arm_neon.h> |
18 | #elif defined(__SSE__) |
19 | #include <immintrin.h> |
20 | |
21 | #if defined(__clang__) |
22 | // That #include <immintrin.h> is usually enough, but Clang's headers |
23 | // "helpfully" skip including the whole kitchen sink when _MSC_VER is |
24 | // defined, because lots of programs on Windows would include that and |
25 | // it'd be a lot slower. But we want all those headers included so we |
26 | // can use their features after runtime checks later. |
27 | #include <smmintrin.h> |
28 | #include <avxintrin.h> |
29 | #include <avx2intrin.h> |
30 | #include <avx512fintrin.h> |
31 | #include <avx512dqintrin.h> |
32 | #endif |
33 | #endif |
34 | |
35 | // sizeof(x) will return size_t, which is 32-bit on some machines and 64-bit on others. |
36 | // We have better testing on 64-bit machines, so force 32-bit machines to behave like 64-bit. |
37 | // |
38 | // Please do not use sizeof() directly, and size_t only when required. |
39 | // (We have no way of enforcing these requests...) |
40 | #define SAFE_SIZEOF(x) ((uint64_t)sizeof(x)) |
41 | |
42 | // Same sort of thing for _Layout structs with a variable sized array at the end (named "variable"). |
43 | #define SAFE_FIXED_SIZE(type) ((uint64_t)offsetof(type, variable)) |
44 | |
45 | static const union { |
46 | uint32_t bits; |
47 | float f; |
48 | } inf_ = { 0x7f800000 }; |
49 | #define INFINITY_ inf_.f |
50 | |
51 | #if defined(__clang__) || defined(__GNUC__) |
52 | #define small_memcpy __builtin_memcpy |
53 | #else |
54 | #define small_memcpy memcpy |
55 | #endif |
56 | |
57 | static float log2f_(float x) { |
58 | // The first approximation of log2(x) is its exponent 'e', minus 127. |
59 | int32_t bits; |
60 | small_memcpy(&bits, &x, sizeof(bits)); |
61 | |
62 | float e = (float)bits * (1.0f / (1<<23)); |
63 | |
64 | // If we use the mantissa too we can refine the error signficantly. |
65 | int32_t m_bits = (bits & 0x007fffff) | 0x3f000000; |
66 | float m; |
67 | small_memcpy(&m, &m_bits, sizeof(m)); |
68 | |
69 | return (e - 124.225514990f |
70 | - 1.498030302f*m |
71 | - 1.725879990f/(0.3520887068f + m)); |
72 | } |
73 | static float logf_(float x) { |
74 | const float ln2 = 0.69314718f; |
75 | return ln2*log2f_(x); |
76 | } |
77 | |
78 | static float exp2f_(float x) { |
79 | float fract = x - floorf_(x); |
80 | |
81 | float fbits = (1.0f * (1<<23)) * (x + 121.274057500f |
82 | - 1.490129070f*fract |
83 | + 27.728023300f/(4.84252568f - fract)); |
84 | |
85 | // Before we cast fbits to int32_t, check for out of range values to pacify UBSAN. |
86 | // INT_MAX is not exactly representable as a float, so exclude it as effectively infinite. |
87 | // INT_MIN is a power of 2 and exactly representable as a float, so it's fine. |
88 | if (fbits >= (float)INT_MAX) { |
89 | return INFINITY_; |
90 | } else if (fbits < (float)INT_MIN) { |
91 | return -INFINITY_; |
92 | } |
93 | |
94 | int32_t bits = (int32_t)fbits; |
95 | small_memcpy(&x, &bits, sizeof(x)); |
96 | return x; |
97 | } |
98 | |
99 | // Not static, as it's used by some test tools. |
100 | float powf_(float x, float y) { |
101 | assert (x >= 0); |
102 | return (x == 0) || (x == 1) ? x |
103 | : exp2f_(log2f_(x) * y); |
104 | } |
105 | |
106 | static float expf_(float x) { |
107 | const float log2_e = 1.4426950408889634074f; |
108 | return exp2f_(log2_e * x); |
109 | } |
110 | |
111 | static float fmaxf_(float x, float y) { return x > y ? x : y; } |
112 | static float fminf_(float x, float y) { return x < y ? x : y; } |
113 | |
114 | static bool isfinitef_(float x) { return 0 == x*0; } |
115 | |
116 | static float minus_1_ulp(float x) { |
117 | int32_t bits; |
118 | memcpy(&bits, &x, sizeof(bits)); |
119 | bits = bits - 1; |
120 | memcpy(&x, &bits, sizeof(bits)); |
121 | return x; |
122 | } |
123 | |
124 | // Most transfer functions we work with are sRGBish. |
125 | // For exotic HDR transfer functions, we encode them using a tf.g that makes no sense, |
126 | // and repurpose the other fields to hold the parameters of the HDR functions. |
127 | enum TFKind { Bad, sRGBish, PQish, HLGish, HLGinvish }; |
128 | struct TF_PQish { float A,B,C,D,E,F; }; |
129 | struct TF_HLGish { float R,G,a,b,c; }; |
130 | |
131 | static float TFKind_marker(TFKind kind) { |
132 | // We'd use different NaNs, but those aren't guaranteed to be preserved by WASM. |
133 | return -(float)kind; |
134 | } |
135 | |
136 | static TFKind classify(const skcms_TransferFunction& tf, TF_PQish* pq = nullptr |
137 | , TF_HLGish* hlg = nullptr) { |
138 | if (tf.g < 0 && (int)tf.g == tf.g) { |
139 | // TODO: sanity checks for PQ/HLG like we do for sRGBish. |
140 | switch ((int)tf.g) { |
141 | case -PQish: if (pq ) { memcpy(pq , &tf.a, sizeof(*pq )); } return PQish; |
142 | case -HLGish: if (hlg) { memcpy(hlg, &tf.a, sizeof(*hlg)); } return HLGish; |
143 | case -HLGinvish: if (hlg) { memcpy(hlg, &tf.a, sizeof(*hlg)); } return HLGinvish; |
144 | } |
145 | return Bad; |
146 | } |
147 | |
148 | // Basic sanity checks for sRGBish transfer functions. |
149 | if (isfinitef_(tf.a + tf.b + tf.c + tf.d + tf.e + tf.f + tf.g) |
150 | // a,c,d,g should be non-negative to make any sense. |
151 | && tf.a >= 0 |
152 | && tf.c >= 0 |
153 | && tf.d >= 0 |
154 | && tf.g >= 0 |
155 | // Raising a negative value to a fractional tf->g produces complex numbers. |
156 | && tf.a * tf.d + tf.b >= 0) { |
157 | return sRGBish; |
158 | } |
159 | |
160 | return Bad; |
161 | } |
162 | |
163 | bool skcms_TransferFunction_makePQish(skcms_TransferFunction* tf, |
164 | float A, float B, float C, |
165 | float D, float E, float F) { |
166 | *tf = { TFKind_marker(PQish), A,B,C,D,E,F }; |
167 | assert(classify(*tf) == PQish); |
168 | return true; |
169 | } |
170 | |
171 | bool skcms_TransferFunction_makeHLGish(skcms_TransferFunction* tf, |
172 | float R, float G, |
173 | float a, float b, float c) { |
174 | *tf = { TFKind_marker(HLGish), R,G, a,b,c, 0 }; |
175 | assert(classify(*tf) == HLGish); |
176 | return true; |
177 | } |
178 | |
179 | float skcms_TransferFunction_eval(const skcms_TransferFunction* tf, float x) { |
180 | float sign = x < 0 ? -1.0f : 1.0f; |
181 | x *= sign; |
182 | |
183 | TF_PQish pq; |
184 | TF_HLGish hlg; |
185 | switch (classify(*tf, &pq, &hlg)) { |
186 | case Bad: break; |
187 | |
188 | case HLGish: return sign * (x*hlg.R <= 1 ? powf_(x*hlg.R, hlg.G) |
189 | : expf_((x-hlg.c)*hlg.a) + hlg.b); |
190 | |
191 | // skcms_TransferFunction_invert() inverts R, G, and a for HLGinvish so this math is fast. |
192 | case HLGinvish: return sign * (x <= 1 ? hlg.R * powf_(x, hlg.G) |
193 | : hlg.a * logf_(x - hlg.b) + hlg.c); |
194 | |
195 | |
196 | case sRGBish: return sign * (x < tf->d ? tf->c * x + tf->f |
197 | : powf_(tf->a * x + tf->b, tf->g) + tf->e); |
198 | |
199 | case PQish: return sign * powf_(fmaxf_(pq.A + pq.B * powf_(x, pq.C), 0) |
200 | / (pq.D + pq.E * powf_(x, pq.C)), |
201 | pq.F); |
202 | } |
203 | return 0; |
204 | } |
205 | |
206 | |
207 | static float eval_curve(const skcms_Curve* curve, float x) { |
208 | if (curve->table_entries == 0) { |
209 | return skcms_TransferFunction_eval(&curve->parametric, x); |
210 | } |
211 | |
212 | float ix = fmaxf_(0, fminf_(x, 1)) * (curve->table_entries - 1); |
213 | int lo = (int) ix , |
214 | hi = (int)(float)minus_1_ulp(ix + 1.0f); |
215 | float t = ix - (float)lo; |
216 | |
217 | float l, h; |
218 | if (curve->table_8) { |
219 | l = curve->table_8[lo] * (1/255.0f); |
220 | h = curve->table_8[hi] * (1/255.0f); |
221 | } else { |
222 | uint16_t be_l, be_h; |
223 | memcpy(&be_l, curve->table_16 + 2*lo, 2); |
224 | memcpy(&be_h, curve->table_16 + 2*hi, 2); |
225 | uint16_t le_l = ((be_l << 8) | (be_l >> 8)) & 0xffff; |
226 | uint16_t le_h = ((be_h << 8) | (be_h >> 8)) & 0xffff; |
227 | l = le_l * (1/65535.0f); |
228 | h = le_h * (1/65535.0f); |
229 | } |
230 | return l + (h-l)*t; |
231 | } |
232 | |
233 | float skcms_MaxRoundtripError(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) { |
234 | uint32_t N = curve->table_entries > 256 ? curve->table_entries : 256; |
235 | const float dx = 1.0f / (N - 1); |
236 | float err = 0; |
237 | for (uint32_t i = 0; i < N; i++) { |
238 | float x = i * dx, |
239 | y = eval_curve(curve, x); |
240 | err = fmaxf_(err, fabsf_(x - skcms_TransferFunction_eval(inv_tf, y))); |
241 | } |
242 | return err; |
243 | } |
244 | |
245 | bool skcms_AreApproximateInverses(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) { |
246 | return skcms_MaxRoundtripError(curve, inv_tf) < (1/512.0f); |
247 | } |
248 | |
249 | // Additional ICC signature values that are only used internally |
250 | enum { |
251 | // File signature |
252 | skcms_Signature_acsp = 0x61637370, |
253 | |
254 | // Tag signatures |
255 | skcms_Signature_rTRC = 0x72545243, |
256 | skcms_Signature_gTRC = 0x67545243, |
257 | skcms_Signature_bTRC = 0x62545243, |
258 | skcms_Signature_kTRC = 0x6B545243, |
259 | |
260 | skcms_Signature_rXYZ = 0x7258595A, |
261 | skcms_Signature_gXYZ = 0x6758595A, |
262 | skcms_Signature_bXYZ = 0x6258595A, |
263 | |
264 | skcms_Signature_A2B0 = 0x41324230, |
265 | skcms_Signature_A2B1 = 0x41324231, |
266 | skcms_Signature_mAB = 0x6D414220, |
267 | |
268 | skcms_Signature_CHAD = 0x63686164, |
269 | skcms_Signature_WTPT = 0x77747074, |
270 | |
271 | // Type signatures |
272 | skcms_Signature_curv = 0x63757276, |
273 | skcms_Signature_mft1 = 0x6D667431, |
274 | skcms_Signature_mft2 = 0x6D667432, |
275 | skcms_Signature_para = 0x70617261, |
276 | skcms_Signature_sf32 = 0x73663332, |
277 | // XYZ is also a PCS signature, so it's defined in skcms.h |
278 | // skcms_Signature_XYZ = 0x58595A20, |
279 | }; |
280 | |
281 | static uint16_t read_big_u16(const uint8_t* ptr) { |
282 | uint16_t be; |
283 | memcpy(&be, ptr, sizeof(be)); |
284 | #if defined(_MSC_VER) |
285 | return _byteswap_ushort(be); |
286 | #else |
287 | return __builtin_bswap16(be); |
288 | #endif |
289 | } |
290 | |
291 | static uint32_t read_big_u32(const uint8_t* ptr) { |
292 | uint32_t be; |
293 | memcpy(&be, ptr, sizeof(be)); |
294 | #if defined(_MSC_VER) |
295 | return _byteswap_ulong(be); |
296 | #else |
297 | return __builtin_bswap32(be); |
298 | #endif |
299 | } |
300 | |
301 | static int32_t read_big_i32(const uint8_t* ptr) { |
302 | return (int32_t)read_big_u32(ptr); |
303 | } |
304 | |
305 | static float read_big_fixed(const uint8_t* ptr) { |
306 | return read_big_i32(ptr) * (1.0f / 65536.0f); |
307 | } |
308 | |
309 | // Maps to an in-memory profile so that fields line up to the locations specified |
310 | // in ICC.1:2010, section 7.2 |
311 | typedef struct { |
312 | uint8_t size [ 4]; |
313 | uint8_t cmm_type [ 4]; |
314 | uint8_t version [ 4]; |
315 | uint8_t profile_class [ 4]; |
316 | uint8_t data_color_space [ 4]; |
317 | uint8_t pcs [ 4]; |
318 | uint8_t creation_date_time [12]; |
319 | uint8_t signature [ 4]; |
320 | uint8_t platform [ 4]; |
321 | uint8_t flags [ 4]; |
322 | uint8_t device_manufacturer [ 4]; |
323 | uint8_t device_model [ 4]; |
324 | uint8_t device_attributes [ 8]; |
325 | uint8_t rendering_intent [ 4]; |
326 | uint8_t illuminant_X [ 4]; |
327 | uint8_t illuminant_Y [ 4]; |
328 | uint8_t illuminant_Z [ 4]; |
329 | uint8_t creator [ 4]; |
330 | uint8_t profile_id [16]; |
331 | uint8_t reserved [28]; |
332 | uint8_t tag_count [ 4]; // Technically not part of header, but required |
333 | } ; |
334 | |
335 | typedef struct { |
336 | uint8_t signature [4]; |
337 | uint8_t offset [4]; |
338 | uint8_t size [4]; |
339 | } tag_Layout; |
340 | |
341 | static const tag_Layout* get_tag_table(const skcms_ICCProfile* profile) { |
342 | return (const tag_Layout*)(profile->buffer + SAFE_SIZEOF(header_Layout)); |
343 | } |
344 | |
345 | // s15Fixed16ArrayType is technically variable sized, holding N values. However, the only valid |
346 | // use of the type is for the CHAD tag that stores exactly nine values. |
347 | typedef struct { |
348 | uint8_t type [ 4]; |
349 | uint8_t reserved [ 4]; |
350 | uint8_t values [36]; |
351 | } sf32_Layout; |
352 | |
353 | bool skcms_GetCHAD(const skcms_ICCProfile* profile, skcms_Matrix3x3* m) { |
354 | skcms_ICCTag tag; |
355 | if (!skcms_GetTagBySignature(profile, skcms_Signature_CHAD, &tag)) { |
356 | return false; |
357 | } |
358 | |
359 | if (tag.type != skcms_Signature_sf32 || tag.size < SAFE_SIZEOF(sf32_Layout)) { |
360 | return false; |
361 | } |
362 | |
363 | const sf32_Layout* sf32Tag = (const sf32_Layout*)tag.buf; |
364 | const uint8_t* values = sf32Tag->values; |
365 | for (int r = 0; r < 3; ++r) |
366 | for (int c = 0; c < 3; ++c, values += 4) { |
367 | m->vals[r][c] = read_big_fixed(values); |
368 | } |
369 | return true; |
370 | } |
371 | |
372 | // XYZType is technically variable sized, holding N XYZ triples. However, the only valid uses of |
373 | // the type are for tags/data that store exactly one triple. |
374 | typedef struct { |
375 | uint8_t type [4]; |
376 | uint8_t reserved [4]; |
377 | uint8_t X [4]; |
378 | uint8_t Y [4]; |
379 | uint8_t Z [4]; |
380 | } XYZ_Layout; |
381 | |
382 | static bool read_tag_xyz(const skcms_ICCTag* tag, float* x, float* y, float* z) { |
383 | if (tag->type != skcms_Signature_XYZ || tag->size < SAFE_SIZEOF(XYZ_Layout)) { |
384 | return false; |
385 | } |
386 | |
387 | const XYZ_Layout* xyzTag = (const XYZ_Layout*)tag->buf; |
388 | |
389 | *x = read_big_fixed(xyzTag->X); |
390 | *y = read_big_fixed(xyzTag->Y); |
391 | *z = read_big_fixed(xyzTag->Z); |
392 | return true; |
393 | } |
394 | |
395 | bool skcms_GetWTPT(const skcms_ICCProfile* profile, float xyz[3]) { |
396 | skcms_ICCTag tag; |
397 | return skcms_GetTagBySignature(profile, skcms_Signature_WTPT, &tag) && |
398 | read_tag_xyz(&tag, &xyz[0], &xyz[1], &xyz[2]); |
399 | } |
400 | |
401 | static bool read_to_XYZD50(const skcms_ICCTag* rXYZ, const skcms_ICCTag* gXYZ, |
402 | const skcms_ICCTag* bXYZ, skcms_Matrix3x3* toXYZ) { |
403 | return read_tag_xyz(rXYZ, &toXYZ->vals[0][0], &toXYZ->vals[1][0], &toXYZ->vals[2][0]) && |
404 | read_tag_xyz(gXYZ, &toXYZ->vals[0][1], &toXYZ->vals[1][1], &toXYZ->vals[2][1]) && |
405 | read_tag_xyz(bXYZ, &toXYZ->vals[0][2], &toXYZ->vals[1][2], &toXYZ->vals[2][2]); |
406 | } |
407 | |
408 | typedef struct { |
409 | uint8_t type [4]; |
410 | uint8_t reserved_a [4]; |
411 | uint8_t function_type [2]; |
412 | uint8_t reserved_b [2]; |
413 | uint8_t variable [1/*variable*/]; // 1, 3, 4, 5, or 7 s15.16, depending on function_type |
414 | } para_Layout; |
415 | |
416 | static bool read_curve_para(const uint8_t* buf, uint32_t size, |
417 | skcms_Curve* curve, uint32_t* curve_size) { |
418 | if (size < SAFE_FIXED_SIZE(para_Layout)) { |
419 | return false; |
420 | } |
421 | |
422 | const para_Layout* paraTag = (const para_Layout*)buf; |
423 | |
424 | enum { kG = 0, kGAB = 1, kGABC = 2, kGABCD = 3, kGABCDEF = 4 }; |
425 | uint16_t function_type = read_big_u16(paraTag->function_type); |
426 | if (function_type > kGABCDEF) { |
427 | return false; |
428 | } |
429 | |
430 | static const uint32_t curve_bytes[] = { 4, 12, 16, 20, 28 }; |
431 | if (size < SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type]) { |
432 | return false; |
433 | } |
434 | |
435 | if (curve_size) { |
436 | *curve_size = SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type]; |
437 | } |
438 | |
439 | curve->table_entries = 0; |
440 | curve->parametric.a = 1.0f; |
441 | curve->parametric.b = 0.0f; |
442 | curve->parametric.c = 0.0f; |
443 | curve->parametric.d = 0.0f; |
444 | curve->parametric.e = 0.0f; |
445 | curve->parametric.f = 0.0f; |
446 | curve->parametric.g = read_big_fixed(paraTag->variable); |
447 | |
448 | switch (function_type) { |
449 | case kGAB: |
450 | curve->parametric.a = read_big_fixed(paraTag->variable + 4); |
451 | curve->parametric.b = read_big_fixed(paraTag->variable + 8); |
452 | if (curve->parametric.a == 0) { |
453 | return false; |
454 | } |
455 | curve->parametric.d = -curve->parametric.b / curve->parametric.a; |
456 | break; |
457 | case kGABC: |
458 | curve->parametric.a = read_big_fixed(paraTag->variable + 4); |
459 | curve->parametric.b = read_big_fixed(paraTag->variable + 8); |
460 | curve->parametric.e = read_big_fixed(paraTag->variable + 12); |
461 | if (curve->parametric.a == 0) { |
462 | return false; |
463 | } |
464 | curve->parametric.d = -curve->parametric.b / curve->parametric.a; |
465 | curve->parametric.f = curve->parametric.e; |
466 | break; |
467 | case kGABCD: |
468 | curve->parametric.a = read_big_fixed(paraTag->variable + 4); |
469 | curve->parametric.b = read_big_fixed(paraTag->variable + 8); |
470 | curve->parametric.c = read_big_fixed(paraTag->variable + 12); |
471 | curve->parametric.d = read_big_fixed(paraTag->variable + 16); |
472 | break; |
473 | case kGABCDEF: |
474 | curve->parametric.a = read_big_fixed(paraTag->variable + 4); |
475 | curve->parametric.b = read_big_fixed(paraTag->variable + 8); |
476 | curve->parametric.c = read_big_fixed(paraTag->variable + 12); |
477 | curve->parametric.d = read_big_fixed(paraTag->variable + 16); |
478 | curve->parametric.e = read_big_fixed(paraTag->variable + 20); |
479 | curve->parametric.f = read_big_fixed(paraTag->variable + 24); |
480 | break; |
481 | } |
482 | return classify(curve->parametric) == sRGBish; |
483 | } |
484 | |
485 | typedef struct { |
486 | uint8_t type [4]; |
487 | uint8_t reserved [4]; |
488 | uint8_t value_count [4]; |
489 | uint8_t variable [1/*variable*/]; // value_count, 8.8 if 1, uint16 (n*65535) if > 1 |
490 | } curv_Layout; |
491 | |
492 | static bool read_curve_curv(const uint8_t* buf, uint32_t size, |
493 | skcms_Curve* curve, uint32_t* curve_size) { |
494 | if (size < SAFE_FIXED_SIZE(curv_Layout)) { |
495 | return false; |
496 | } |
497 | |
498 | const curv_Layout* curvTag = (const curv_Layout*)buf; |
499 | |
500 | uint32_t value_count = read_big_u32(curvTag->value_count); |
501 | if (size < SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t)) { |
502 | return false; |
503 | } |
504 | |
505 | if (curve_size) { |
506 | *curve_size = SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t); |
507 | } |
508 | |
509 | if (value_count < 2) { |
510 | curve->table_entries = 0; |
511 | curve->parametric.a = 1.0f; |
512 | curve->parametric.b = 0.0f; |
513 | curve->parametric.c = 0.0f; |
514 | curve->parametric.d = 0.0f; |
515 | curve->parametric.e = 0.0f; |
516 | curve->parametric.f = 0.0f; |
517 | if (value_count == 0) { |
518 | // Empty tables are a shorthand for an identity curve |
519 | curve->parametric.g = 1.0f; |
520 | } else { |
521 | // Single entry tables are a shorthand for simple gamma |
522 | curve->parametric.g = read_big_u16(curvTag->variable) * (1.0f / 256.0f); |
523 | } |
524 | } else { |
525 | curve->table_8 = nullptr; |
526 | curve->table_16 = curvTag->variable; |
527 | curve->table_entries = value_count; |
528 | } |
529 | |
530 | return true; |
531 | } |
532 | |
533 | // Parses both curveType and parametricCurveType data. Ensures that at most 'size' bytes are read. |
534 | // If curve_size is not nullptr, writes the number of bytes used by the curve in (*curve_size). |
535 | static bool read_curve(const uint8_t* buf, uint32_t size, |
536 | skcms_Curve* curve, uint32_t* curve_size) { |
537 | if (!buf || size < 4 || !curve) { |
538 | return false; |
539 | } |
540 | |
541 | uint32_t type = read_big_u32(buf); |
542 | if (type == skcms_Signature_para) { |
543 | return read_curve_para(buf, size, curve, curve_size); |
544 | } else if (type == skcms_Signature_curv) { |
545 | return read_curve_curv(buf, size, curve, curve_size); |
546 | } |
547 | |
548 | return false; |
549 | } |
550 | |
551 | // mft1 and mft2 share a large chunk of data |
552 | typedef struct { |
553 | uint8_t type [ 4]; |
554 | uint8_t reserved_a [ 4]; |
555 | uint8_t input_channels [ 1]; |
556 | uint8_t output_channels [ 1]; |
557 | uint8_t grid_points [ 1]; |
558 | uint8_t reserved_b [ 1]; |
559 | uint8_t matrix [36]; |
560 | } mft_CommonLayout; |
561 | |
562 | typedef struct { |
563 | mft_CommonLayout common [1]; |
564 | |
565 | uint8_t variable [1/*variable*/]; |
566 | } mft1_Layout; |
567 | |
568 | typedef struct { |
569 | mft_CommonLayout common [1]; |
570 | |
571 | uint8_t input_table_entries [2]; |
572 | uint8_t output_table_entries [2]; |
573 | uint8_t variable [1/*variable*/]; |
574 | } mft2_Layout; |
575 | |
576 | static bool read_mft_common(const mft_CommonLayout* mftTag, skcms_A2B* a2b) { |
577 | // MFT matrices are applied before the first set of curves, but must be identity unless the |
578 | // input is PCSXYZ. We don't support PCSXYZ profiles, so we ignore this matrix. Note that the |
579 | // matrix in skcms_A2B is applied later in the pipe, so supporting this would require another |
580 | // field/flag. |
581 | a2b->matrix_channels = 0; |
582 | |
583 | a2b->input_channels = mftTag->input_channels[0]; |
584 | a2b->output_channels = mftTag->output_channels[0]; |
585 | |
586 | // We require exactly three (ie XYZ/Lab/RGB) output channels |
587 | if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) { |
588 | return false; |
589 | } |
590 | // We require at least one, and no more than four (ie CMYK) input channels |
591 | if (a2b->input_channels < 1 || a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) { |
592 | return false; |
593 | } |
594 | |
595 | for (uint32_t i = 0; i < a2b->input_channels; ++i) { |
596 | a2b->grid_points[i] = mftTag->grid_points[0]; |
597 | } |
598 | // The grid only makes sense with at least two points along each axis |
599 | if (a2b->grid_points[0] < 2) { |
600 | return false; |
601 | } |
602 | |
603 | return true; |
604 | } |
605 | |
606 | static bool init_a2b_tables(const uint8_t* table_base, uint64_t max_tables_len, uint32_t byte_width, |
607 | uint32_t input_table_entries, uint32_t output_table_entries, |
608 | skcms_A2B* a2b) { |
609 | // byte_width is 1 or 2, [input|output]_table_entries are in [2, 4096], so no overflow |
610 | uint32_t byte_len_per_input_table = input_table_entries * byte_width; |
611 | uint32_t byte_len_per_output_table = output_table_entries * byte_width; |
612 | |
613 | // [input|output]_channels are <= 4, so still no overflow |
614 | uint32_t byte_len_all_input_tables = a2b->input_channels * byte_len_per_input_table; |
615 | uint32_t byte_len_all_output_tables = a2b->output_channels * byte_len_per_output_table; |
616 | |
617 | uint64_t grid_size = a2b->output_channels * byte_width; |
618 | for (uint32_t axis = 0; axis < a2b->input_channels; ++axis) { |
619 | grid_size *= a2b->grid_points[axis]; |
620 | } |
621 | |
622 | if (max_tables_len < byte_len_all_input_tables + grid_size + byte_len_all_output_tables) { |
623 | return false; |
624 | } |
625 | |
626 | for (uint32_t i = 0; i < a2b->input_channels; ++i) { |
627 | a2b->input_curves[i].table_entries = input_table_entries; |
628 | if (byte_width == 1) { |
629 | a2b->input_curves[i].table_8 = table_base + i * byte_len_per_input_table; |
630 | a2b->input_curves[i].table_16 = nullptr; |
631 | } else { |
632 | a2b->input_curves[i].table_8 = nullptr; |
633 | a2b->input_curves[i].table_16 = table_base + i * byte_len_per_input_table; |
634 | } |
635 | } |
636 | |
637 | if (byte_width == 1) { |
638 | a2b->grid_8 = table_base + byte_len_all_input_tables; |
639 | a2b->grid_16 = nullptr; |
640 | } else { |
641 | a2b->grid_8 = nullptr; |
642 | a2b->grid_16 = table_base + byte_len_all_input_tables; |
643 | } |
644 | |
645 | const uint8_t* output_table_base = table_base + byte_len_all_input_tables + grid_size; |
646 | for (uint32_t i = 0; i < a2b->output_channels; ++i) { |
647 | a2b->output_curves[i].table_entries = output_table_entries; |
648 | if (byte_width == 1) { |
649 | a2b->output_curves[i].table_8 = output_table_base + i * byte_len_per_output_table; |
650 | a2b->output_curves[i].table_16 = nullptr; |
651 | } else { |
652 | a2b->output_curves[i].table_8 = nullptr; |
653 | a2b->output_curves[i].table_16 = output_table_base + i * byte_len_per_output_table; |
654 | } |
655 | } |
656 | |
657 | return true; |
658 | } |
659 | |
660 | static bool read_tag_mft1(const skcms_ICCTag* tag, skcms_A2B* a2b) { |
661 | if (tag->size < SAFE_FIXED_SIZE(mft1_Layout)) { |
662 | return false; |
663 | } |
664 | |
665 | const mft1_Layout* mftTag = (const mft1_Layout*)tag->buf; |
666 | if (!read_mft_common(mftTag->common, a2b)) { |
667 | return false; |
668 | } |
669 | |
670 | uint32_t input_table_entries = 256; |
671 | uint32_t output_table_entries = 256; |
672 | |
673 | return init_a2b_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft1_Layout), 1, |
674 | input_table_entries, output_table_entries, a2b); |
675 | } |
676 | |
677 | static bool read_tag_mft2(const skcms_ICCTag* tag, skcms_A2B* a2b) { |
678 | if (tag->size < SAFE_FIXED_SIZE(mft2_Layout)) { |
679 | return false; |
680 | } |
681 | |
682 | const mft2_Layout* mftTag = (const mft2_Layout*)tag->buf; |
683 | if (!read_mft_common(mftTag->common, a2b)) { |
684 | return false; |
685 | } |
686 | |
687 | uint32_t input_table_entries = read_big_u16(mftTag->input_table_entries); |
688 | uint32_t output_table_entries = read_big_u16(mftTag->output_table_entries); |
689 | |
690 | // ICC spec mandates that 2 <= table_entries <= 4096 |
691 | if (input_table_entries < 2 || input_table_entries > 4096 || |
692 | output_table_entries < 2 || output_table_entries > 4096) { |
693 | return false; |
694 | } |
695 | |
696 | return init_a2b_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft2_Layout), 2, |
697 | input_table_entries, output_table_entries, a2b); |
698 | } |
699 | |
700 | static bool read_curves(const uint8_t* buf, uint32_t size, uint32_t curve_offset, |
701 | uint32_t num_curves, skcms_Curve* curves) { |
702 | for (uint32_t i = 0; i < num_curves; ++i) { |
703 | if (curve_offset > size) { |
704 | return false; |
705 | } |
706 | |
707 | uint32_t curve_bytes; |
708 | if (!read_curve(buf + curve_offset, size - curve_offset, &curves[i], &curve_bytes)) { |
709 | return false; |
710 | } |
711 | |
712 | if (curve_bytes > UINT32_MAX - 3) { |
713 | return false; |
714 | } |
715 | curve_bytes = (curve_bytes + 3) & ~3U; |
716 | |
717 | uint64_t new_offset_64 = (uint64_t)curve_offset + curve_bytes; |
718 | curve_offset = (uint32_t)new_offset_64; |
719 | if (new_offset_64 != curve_offset) { |
720 | return false; |
721 | } |
722 | } |
723 | |
724 | return true; |
725 | } |
726 | |
727 | typedef struct { |
728 | uint8_t type [ 4]; |
729 | uint8_t reserved_a [ 4]; |
730 | uint8_t input_channels [ 1]; |
731 | uint8_t output_channels [ 1]; |
732 | uint8_t reserved_b [ 2]; |
733 | uint8_t b_curve_offset [ 4]; |
734 | uint8_t matrix_offset [ 4]; |
735 | uint8_t m_curve_offset [ 4]; |
736 | uint8_t clut_offset [ 4]; |
737 | uint8_t a_curve_offset [ 4]; |
738 | } mAB_Layout; |
739 | |
740 | typedef struct { |
741 | uint8_t grid_points [16]; |
742 | uint8_t grid_byte_width [ 1]; |
743 | uint8_t reserved [ 3]; |
744 | uint8_t variable [1/*variable*/]; |
745 | } mABCLUT_Layout; |
746 | |
747 | static bool read_tag_mab(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) { |
748 | if (tag->size < SAFE_SIZEOF(mAB_Layout)) { |
749 | return false; |
750 | } |
751 | |
752 | const mAB_Layout* mABTag = (const mAB_Layout*)tag->buf; |
753 | |
754 | a2b->input_channels = mABTag->input_channels[0]; |
755 | a2b->output_channels = mABTag->output_channels[0]; |
756 | |
757 | // We require exactly three (ie XYZ/Lab/RGB) output channels |
758 | if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) { |
759 | return false; |
760 | } |
761 | // We require no more than four (ie CMYK) input channels |
762 | if (a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) { |
763 | return false; |
764 | } |
765 | |
766 | uint32_t b_curve_offset = read_big_u32(mABTag->b_curve_offset); |
767 | uint32_t matrix_offset = read_big_u32(mABTag->matrix_offset); |
768 | uint32_t m_curve_offset = read_big_u32(mABTag->m_curve_offset); |
769 | uint32_t clut_offset = read_big_u32(mABTag->clut_offset); |
770 | uint32_t a_curve_offset = read_big_u32(mABTag->a_curve_offset); |
771 | |
772 | // "B" curves must be present |
773 | if (0 == b_curve_offset) { |
774 | return false; |
775 | } |
776 | |
777 | if (!read_curves(tag->buf, tag->size, b_curve_offset, a2b->output_channels, |
778 | a2b->output_curves)) { |
779 | return false; |
780 | } |
781 | |
782 | // "M" curves and Matrix must be used together |
783 | if (0 != m_curve_offset) { |
784 | if (0 == matrix_offset) { |
785 | return false; |
786 | } |
787 | a2b->matrix_channels = a2b->output_channels; |
788 | if (!read_curves(tag->buf, tag->size, m_curve_offset, a2b->matrix_channels, |
789 | a2b->matrix_curves)) { |
790 | return false; |
791 | } |
792 | |
793 | // Read matrix, which is stored as a row-major 3x3, followed by the fourth column |
794 | if (tag->size < matrix_offset + 12 * SAFE_SIZEOF(uint32_t)) { |
795 | return false; |
796 | } |
797 | float encoding_factor = pcs_is_xyz ? 65535 / 32768.0f : 1.0f; |
798 | const uint8_t* mtx_buf = tag->buf + matrix_offset; |
799 | a2b->matrix.vals[0][0] = encoding_factor * read_big_fixed(mtx_buf + 0); |
800 | a2b->matrix.vals[0][1] = encoding_factor * read_big_fixed(mtx_buf + 4); |
801 | a2b->matrix.vals[0][2] = encoding_factor * read_big_fixed(mtx_buf + 8); |
802 | a2b->matrix.vals[1][0] = encoding_factor * read_big_fixed(mtx_buf + 12); |
803 | a2b->matrix.vals[1][1] = encoding_factor * read_big_fixed(mtx_buf + 16); |
804 | a2b->matrix.vals[1][2] = encoding_factor * read_big_fixed(mtx_buf + 20); |
805 | a2b->matrix.vals[2][0] = encoding_factor * read_big_fixed(mtx_buf + 24); |
806 | a2b->matrix.vals[2][1] = encoding_factor * read_big_fixed(mtx_buf + 28); |
807 | a2b->matrix.vals[2][2] = encoding_factor * read_big_fixed(mtx_buf + 32); |
808 | a2b->matrix.vals[0][3] = encoding_factor * read_big_fixed(mtx_buf + 36); |
809 | a2b->matrix.vals[1][3] = encoding_factor * read_big_fixed(mtx_buf + 40); |
810 | a2b->matrix.vals[2][3] = encoding_factor * read_big_fixed(mtx_buf + 44); |
811 | } else { |
812 | if (0 != matrix_offset) { |
813 | return false; |
814 | } |
815 | a2b->matrix_channels = 0; |
816 | } |
817 | |
818 | // "A" curves and CLUT must be used together |
819 | if (0 != a_curve_offset) { |
820 | if (0 == clut_offset) { |
821 | return false; |
822 | } |
823 | if (!read_curves(tag->buf, tag->size, a_curve_offset, a2b->input_channels, |
824 | a2b->input_curves)) { |
825 | return false; |
826 | } |
827 | |
828 | if (tag->size < clut_offset + SAFE_FIXED_SIZE(mABCLUT_Layout)) { |
829 | return false; |
830 | } |
831 | const mABCLUT_Layout* clut = (const mABCLUT_Layout*)(tag->buf + clut_offset); |
832 | |
833 | if (clut->grid_byte_width[0] == 1) { |
834 | a2b->grid_8 = clut->variable; |
835 | a2b->grid_16 = nullptr; |
836 | } else if (clut->grid_byte_width[0] == 2) { |
837 | a2b->grid_8 = nullptr; |
838 | a2b->grid_16 = clut->variable; |
839 | } else { |
840 | return false; |
841 | } |
842 | |
843 | uint64_t grid_size = a2b->output_channels * clut->grid_byte_width[0]; |
844 | for (uint32_t i = 0; i < a2b->input_channels; ++i) { |
845 | a2b->grid_points[i] = clut->grid_points[i]; |
846 | // The grid only makes sense with at least two points along each axis |
847 | if (a2b->grid_points[i] < 2) { |
848 | return false; |
849 | } |
850 | grid_size *= a2b->grid_points[i]; |
851 | } |
852 | if (tag->size < clut_offset + SAFE_FIXED_SIZE(mABCLUT_Layout) + grid_size) { |
853 | return false; |
854 | } |
855 | } else { |
856 | if (0 != clut_offset) { |
857 | return false; |
858 | } |
859 | |
860 | // If there is no CLUT, the number of input and output channels must match |
861 | if (a2b->input_channels != a2b->output_channels) { |
862 | return false; |
863 | } |
864 | |
865 | // Zero out the number of input channels to signal that we're skipping this stage |
866 | a2b->input_channels = 0; |
867 | } |
868 | |
869 | return true; |
870 | } |
871 | |
872 | // If you pass f, we'll fit a possibly-non-zero value for *f. |
873 | // If you pass nullptr, we'll assume you want *f to be treated as zero. |
874 | static int fit_linear(const skcms_Curve* curve, int N, float tol, |
875 | float* c, float* d, float* f = nullptr) { |
876 | assert(N > 1); |
877 | // We iteratively fit the first points to the TF's linear piece. |
878 | // We want the cx + f line to pass through the first and last points we fit exactly. |
879 | // |
880 | // As we walk along the points we find the minimum and maximum slope of the line before the |
881 | // error would exceed our tolerance. We stop when the range [slope_min, slope_max] becomes |
882 | // emtpy, when we definitely can't add any more points. |
883 | // |
884 | // Some points' error intervals may intersect the running interval but not lie fully |
885 | // within it. So we keep track of the last point we saw that is a valid end point candidate, |
886 | // and once the search is done, back up to build the line through *that* point. |
887 | const float dx = 1.0f / (N - 1); |
888 | |
889 | int lin_points = 1; |
890 | |
891 | float f_zero = 0.0f; |
892 | if (f) { |
893 | *f = eval_curve(curve, 0); |
894 | } else { |
895 | f = &f_zero; |
896 | } |
897 | |
898 | |
899 | float slope_min = -INFINITY_; |
900 | float slope_max = +INFINITY_; |
901 | for (int i = 1; i < N; ++i) { |
902 | float x = i * dx; |
903 | float y = eval_curve(curve, x); |
904 | |
905 | float slope_max_i = (y + tol - *f) / x, |
906 | slope_min_i = (y - tol - *f) / x; |
907 | if (slope_max_i < slope_min || slope_max < slope_min_i) { |
908 | // Slope intervals would no longer overlap. |
909 | break; |
910 | } |
911 | slope_max = fminf_(slope_max, slope_max_i); |
912 | slope_min = fmaxf_(slope_min, slope_min_i); |
913 | |
914 | float cur_slope = (y - *f) / x; |
915 | if (slope_min <= cur_slope && cur_slope <= slope_max) { |
916 | lin_points = i + 1; |
917 | *c = cur_slope; |
918 | } |
919 | } |
920 | |
921 | // Set D to the last point that met our tolerance. |
922 | *d = (lin_points - 1) * dx; |
923 | return lin_points; |
924 | } |
925 | |
926 | static bool read_a2b(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) { |
927 | bool ok = false; |
928 | if (tag->type == skcms_Signature_mft1) { |
929 | ok = read_tag_mft1(tag, a2b); |
930 | } else if (tag->type == skcms_Signature_mft2) { |
931 | ok = read_tag_mft2(tag, a2b); |
932 | } else if (tag->type == skcms_Signature_mAB) { |
933 | ok = read_tag_mab(tag, a2b, pcs_is_xyz); |
934 | } |
935 | if (!ok) { |
936 | return false; |
937 | } |
938 | |
939 | // Detect and canonicalize identity tables. |
940 | skcms_Curve* curves[] = { |
941 | a2b->input_channels > 0 ? a2b->input_curves + 0 : nullptr, |
942 | a2b->input_channels > 1 ? a2b->input_curves + 1 : nullptr, |
943 | a2b->input_channels > 2 ? a2b->input_curves + 2 : nullptr, |
944 | a2b->input_channels > 3 ? a2b->input_curves + 3 : nullptr, |
945 | a2b->matrix_channels > 0 ? a2b->matrix_curves + 0 : nullptr, |
946 | a2b->matrix_channels > 1 ? a2b->matrix_curves + 1 : nullptr, |
947 | a2b->matrix_channels > 2 ? a2b->matrix_curves + 2 : nullptr, |
948 | a2b->output_channels > 0 ? a2b->output_curves + 0 : nullptr, |
949 | a2b->output_channels > 1 ? a2b->output_curves + 1 : nullptr, |
950 | a2b->output_channels > 2 ? a2b->output_curves + 2 : nullptr, |
951 | }; |
952 | |
953 | for (int i = 0; i < ARRAY_COUNT(curves); i++) { |
954 | skcms_Curve* curve = curves[i]; |
955 | |
956 | if (curve && curve->table_entries && curve->table_entries <= (uint32_t)INT_MAX) { |
957 | int N = (int)curve->table_entries; |
958 | |
959 | float c = 0.0f, d = 0.0f, f = 0.0f; |
960 | if (N == fit_linear(curve, N, 1.0f/(2*N), &c,&d,&f) |
961 | && c == 1.0f |
962 | && f == 0.0f) { |
963 | curve->table_entries = 0; |
964 | curve->table_8 = nullptr; |
965 | curve->table_16 = nullptr; |
966 | curve->parametric = skcms_TransferFunction{1,1,0,0,0,0,0}; |
967 | } |
968 | } |
969 | } |
970 | |
971 | return true; |
972 | } |
973 | |
974 | void skcms_GetTagByIndex(const skcms_ICCProfile* profile, uint32_t idx, skcms_ICCTag* tag) { |
975 | if (!profile || !profile->buffer || !tag) { return; } |
976 | if (idx > profile->tag_count) { return; } |
977 | const tag_Layout* tags = get_tag_table(profile); |
978 | tag->signature = read_big_u32(tags[idx].signature); |
979 | tag->size = read_big_u32(tags[idx].size); |
980 | tag->buf = read_big_u32(tags[idx].offset) + profile->buffer; |
981 | tag->type = read_big_u32(tag->buf); |
982 | } |
983 | |
984 | bool skcms_GetTagBySignature(const skcms_ICCProfile* profile, uint32_t sig, skcms_ICCTag* tag) { |
985 | if (!profile || !profile->buffer || !tag) { return false; } |
986 | const tag_Layout* tags = get_tag_table(profile); |
987 | for (uint32_t i = 0; i < profile->tag_count; ++i) { |
988 | if (read_big_u32(tags[i].signature) == sig) { |
989 | tag->signature = sig; |
990 | tag->size = read_big_u32(tags[i].size); |
991 | tag->buf = read_big_u32(tags[i].offset) + profile->buffer; |
992 | tag->type = read_big_u32(tag->buf); |
993 | return true; |
994 | } |
995 | } |
996 | return false; |
997 | } |
998 | |
999 | static bool usable_as_src(const skcms_ICCProfile* profile) { |
1000 | return profile->has_A2B |
1001 | || (profile->has_trc && profile->has_toXYZD50); |
1002 | } |
1003 | |
1004 | bool skcms_Parse(const void* buf, size_t len, skcms_ICCProfile* profile) { |
1005 | assert(SAFE_SIZEOF(header_Layout) == 132); |
1006 | |
1007 | if (!profile) { |
1008 | return false; |
1009 | } |
1010 | memset(profile, 0, SAFE_SIZEOF(*profile)); |
1011 | |
1012 | if (len < SAFE_SIZEOF(header_Layout)) { |
1013 | return false; |
1014 | } |
1015 | |
1016 | // Byte-swap all header fields |
1017 | const header_Layout* = (const header_Layout*)buf; |
1018 | profile->buffer = (const uint8_t*)buf; |
1019 | profile->size = read_big_u32(header->size); |
1020 | uint32_t version = read_big_u32(header->version); |
1021 | profile->data_color_space = read_big_u32(header->data_color_space); |
1022 | profile->pcs = read_big_u32(header->pcs); |
1023 | uint32_t signature = read_big_u32(header->signature); |
1024 | float illuminant_X = read_big_fixed(header->illuminant_X); |
1025 | float illuminant_Y = read_big_fixed(header->illuminant_Y); |
1026 | float illuminant_Z = read_big_fixed(header->illuminant_Z); |
1027 | profile->tag_count = read_big_u32(header->tag_count); |
1028 | |
1029 | // Validate signature, size (smaller than buffer, large enough to hold tag table), |
1030 | // and major version |
1031 | uint64_t tag_table_size = profile->tag_count * SAFE_SIZEOF(tag_Layout); |
1032 | if (signature != skcms_Signature_acsp || |
1033 | profile->size > len || |
1034 | profile->size < SAFE_SIZEOF(header_Layout) + tag_table_size || |
1035 | (version >> 24) > 4) { |
1036 | return false; |
1037 | } |
1038 | |
1039 | // Validate that illuminant is D50 white |
1040 | if (fabsf_(illuminant_X - 0.9642f) > 0.0100f || |
1041 | fabsf_(illuminant_Y - 1.0000f) > 0.0100f || |
1042 | fabsf_(illuminant_Z - 0.8249f) > 0.0100f) { |
1043 | return false; |
1044 | } |
1045 | |
1046 | // Validate that all tag entries have sane offset + size |
1047 | const tag_Layout* tags = get_tag_table(profile); |
1048 | for (uint32_t i = 0; i < profile->tag_count; ++i) { |
1049 | uint32_t tag_offset = read_big_u32(tags[i].offset); |
1050 | uint32_t tag_size = read_big_u32(tags[i].size); |
1051 | uint64_t tag_end = (uint64_t)tag_offset + (uint64_t)tag_size; |
1052 | if (tag_size < 4 || tag_end > profile->size) { |
1053 | return false; |
1054 | } |
1055 | } |
1056 | |
1057 | if (profile->pcs != skcms_Signature_XYZ && profile->pcs != skcms_Signature_Lab) { |
1058 | return false; |
1059 | } |
1060 | |
1061 | bool pcs_is_xyz = profile->pcs == skcms_Signature_XYZ; |
1062 | |
1063 | // Pre-parse commonly used tags. |
1064 | skcms_ICCTag kTRC; |
1065 | if (profile->data_color_space == skcms_Signature_Gray && |
1066 | skcms_GetTagBySignature(profile, skcms_Signature_kTRC, &kTRC)) { |
1067 | if (!read_curve(kTRC.buf, kTRC.size, &profile->trc[0], nullptr)) { |
1068 | // Malformed tag |
1069 | return false; |
1070 | } |
1071 | profile->trc[1] = profile->trc[0]; |
1072 | profile->trc[2] = profile->trc[0]; |
1073 | profile->has_trc = true; |
1074 | |
1075 | if (pcs_is_xyz) { |
1076 | profile->toXYZD50.vals[0][0] = illuminant_X; |
1077 | profile->toXYZD50.vals[1][1] = illuminant_Y; |
1078 | profile->toXYZD50.vals[2][2] = illuminant_Z; |
1079 | profile->has_toXYZD50 = true; |
1080 | } |
1081 | } else { |
1082 | skcms_ICCTag rTRC, gTRC, bTRC; |
1083 | if (skcms_GetTagBySignature(profile, skcms_Signature_rTRC, &rTRC) && |
1084 | skcms_GetTagBySignature(profile, skcms_Signature_gTRC, &gTRC) && |
1085 | skcms_GetTagBySignature(profile, skcms_Signature_bTRC, &bTRC)) { |
1086 | if (!read_curve(rTRC.buf, rTRC.size, &profile->trc[0], nullptr) || |
1087 | !read_curve(gTRC.buf, gTRC.size, &profile->trc[1], nullptr) || |
1088 | !read_curve(bTRC.buf, bTRC.size, &profile->trc[2], nullptr)) { |
1089 | // Malformed TRC tags |
1090 | return false; |
1091 | } |
1092 | profile->has_trc = true; |
1093 | } |
1094 | |
1095 | skcms_ICCTag rXYZ, gXYZ, bXYZ; |
1096 | if (skcms_GetTagBySignature(profile, skcms_Signature_rXYZ, &rXYZ) && |
1097 | skcms_GetTagBySignature(profile, skcms_Signature_gXYZ, &gXYZ) && |
1098 | skcms_GetTagBySignature(profile, skcms_Signature_bXYZ, &bXYZ)) { |
1099 | if (!read_to_XYZD50(&rXYZ, &gXYZ, &bXYZ, &profile->toXYZD50)) { |
1100 | // Malformed XYZ tags |
1101 | return false; |
1102 | } |
1103 | profile->has_toXYZD50 = true; |
1104 | } |
1105 | } |
1106 | |
1107 | skcms_ICCTag a2b_tag; |
1108 | |
1109 | // For now, we're preferring A2B0, like Skia does and the ICC spec tells us to. |
1110 | // TODO: prefer A2B1 (relative colormetric) over A2B0 (perceptual)? |
1111 | // This breaks with the ICC spec, but we think it's a good idea, given that TRC curves |
1112 | // and all our known users are thinking exclusively in terms of relative colormetric. |
1113 | const uint32_t sigs[] = { skcms_Signature_A2B0, skcms_Signature_A2B1 }; |
1114 | for (int i = 0; i < ARRAY_COUNT(sigs); i++) { |
1115 | if (skcms_GetTagBySignature(profile, sigs[i], &a2b_tag)) { |
1116 | if (!read_a2b(&a2b_tag, &profile->A2B, pcs_is_xyz)) { |
1117 | // Malformed A2B tag |
1118 | return false; |
1119 | } |
1120 | profile->has_A2B = true; |
1121 | break; |
1122 | } |
1123 | } |
1124 | |
1125 | return usable_as_src(profile); |
1126 | } |
1127 | |
1128 | |
1129 | const skcms_ICCProfile* skcms_sRGB_profile() { |
1130 | static const skcms_ICCProfile sRGB_profile = { |
1131 | nullptr, // buffer, moot here |
1132 | |
1133 | 0, // size, moot here |
1134 | skcms_Signature_RGB, // data_color_space |
1135 | skcms_Signature_XYZ, // pcs |
1136 | 0, // tag count, moot here |
1137 | |
1138 | // We choose to represent sRGB with its canonical transfer function, |
1139 | // and with its canonical XYZD50 gamut matrix. |
1140 | true, // has_trc, followed by the 3 trc curves |
1141 | { |
1142 | {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}}, |
1143 | {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}}, |
1144 | {{0, {2.4f, (float)(1/1.055), (float)(0.055/1.055), (float)(1/12.92), 0.04045f, 0, 0}}}, |
1145 | }, |
1146 | |
1147 | true, // has_toXYZD50, followed by 3x3 toXYZD50 matrix |
1148 | {{ |
1149 | { 0.436065674f, 0.385147095f, 0.143066406f }, |
1150 | { 0.222488403f, 0.716873169f, 0.060607910f }, |
1151 | { 0.013916016f, 0.097076416f, 0.714096069f }, |
1152 | }}, |
1153 | |
1154 | false, // has_A2B, followed by a2b itself which we don't care about. |
1155 | { |
1156 | 0, |
1157 | { |
1158 | {{0, {0,0, 0,0,0,0,0}}}, |
1159 | {{0, {0,0, 0,0,0,0,0}}}, |
1160 | {{0, {0,0, 0,0,0,0,0}}}, |
1161 | {{0, {0,0, 0,0,0,0,0}}}, |
1162 | }, |
1163 | {0,0,0,0}, |
1164 | nullptr, |
1165 | nullptr, |
1166 | |
1167 | 0, |
1168 | { |
1169 | {{0, {0,0, 0,0,0,0,0}}}, |
1170 | {{0, {0,0, 0,0,0,0,0}}}, |
1171 | {{0, {0,0, 0,0,0,0,0}}}, |
1172 | }, |
1173 | {{ |
1174 | { 0,0,0,0 }, |
1175 | { 0,0,0,0 }, |
1176 | { 0,0,0,0 }, |
1177 | }}, |
1178 | |
1179 | 0, |
1180 | { |
1181 | {{0, {0,0, 0,0,0,0,0}}}, |
1182 | {{0, {0,0, 0,0,0,0,0}}}, |
1183 | {{0, {0,0, 0,0,0,0,0}}}, |
1184 | }, |
1185 | }, |
1186 | }; |
1187 | return &sRGB_profile; |
1188 | } |
1189 | |
1190 | const skcms_ICCProfile* skcms_XYZD50_profile() { |
1191 | // Just like sRGB above, but with identity transfer functions and toXYZD50 matrix. |
1192 | static const skcms_ICCProfile XYZD50_profile = { |
1193 | nullptr, // buffer, moot here |
1194 | |
1195 | 0, // size, moot here |
1196 | skcms_Signature_RGB, // data_color_space |
1197 | skcms_Signature_XYZ, // pcs |
1198 | 0, // tag count, moot here |
1199 | |
1200 | true, // has_trc, followed by the 3 trc curves |
1201 | { |
1202 | {{0, {1,1, 0,0,0,0,0}}}, |
1203 | {{0, {1,1, 0,0,0,0,0}}}, |
1204 | {{0, {1,1, 0,0,0,0,0}}}, |
1205 | }, |
1206 | |
1207 | true, // has_toXYZD50, followed by 3x3 toXYZD50 matrix |
1208 | {{ |
1209 | { 1,0,0 }, |
1210 | { 0,1,0 }, |
1211 | { 0,0,1 }, |
1212 | }}, |
1213 | |
1214 | false, // has_A2B, followed by a2b itself which we don't care about. |
1215 | { |
1216 | 0, |
1217 | { |
1218 | {{0, {0,0, 0,0,0,0,0}}}, |
1219 | {{0, {0,0, 0,0,0,0,0}}}, |
1220 | {{0, {0,0, 0,0,0,0,0}}}, |
1221 | {{0, {0,0, 0,0,0,0,0}}}, |
1222 | }, |
1223 | {0,0,0,0}, |
1224 | nullptr, |
1225 | nullptr, |
1226 | |
1227 | 0, |
1228 | { |
1229 | {{0, {0,0, 0,0,0,0,0}}}, |
1230 | {{0, {0,0, 0,0,0,0,0}}}, |
1231 | {{0, {0,0, 0,0,0,0,0}}}, |
1232 | }, |
1233 | {{ |
1234 | { 0,0,0,0 }, |
1235 | { 0,0,0,0 }, |
1236 | { 0,0,0,0 }, |
1237 | }}, |
1238 | |
1239 | 0, |
1240 | { |
1241 | {{0, {0,0, 0,0,0,0,0}}}, |
1242 | {{0, {0,0, 0,0,0,0,0}}}, |
1243 | {{0, {0,0, 0,0,0,0,0}}}, |
1244 | }, |
1245 | }, |
1246 | }; |
1247 | |
1248 | return &XYZD50_profile; |
1249 | } |
1250 | |
1251 | const skcms_TransferFunction* skcms_sRGB_TransferFunction() { |
1252 | return &skcms_sRGB_profile()->trc[0].parametric; |
1253 | } |
1254 | |
1255 | const skcms_TransferFunction* skcms_sRGB_Inverse_TransferFunction() { |
1256 | static const skcms_TransferFunction sRGB_inv = |
1257 | {0.416666657f, 1.137283325f, -0.0f, 12.920000076f, 0.003130805f, -0.054969788f, -0.0f}; |
1258 | return &sRGB_inv; |
1259 | } |
1260 | |
1261 | const skcms_TransferFunction* skcms_Identity_TransferFunction() { |
1262 | static const skcms_TransferFunction identity = {1,1,0,0,0,0,0}; |
1263 | return &identity; |
1264 | } |
1265 | |
1266 | const uint8_t skcms_252_random_bytes[] = { |
1267 | 8, 179, 128, 204, 253, 38, 134, 184, 68, 102, 32, 138, 99, 39, 169, 215, |
1268 | 119, 26, 3, 223, 95, 239, 52, 132, 114, 74, 81, 234, 97, 116, 244, 205, 30, |
1269 | 154, 173, 12, 51, 159, 122, 153, 61, 226, 236, 178, 229, 55, 181, 220, 191, |
1270 | 194, 160, 126, 168, 82, 131, 18, 180, 245, 163, 22, 246, 69, 235, 252, 57, |
1271 | 108, 14, 6, 152, 240, 255, 171, 242, 20, 227, 177, 238, 96, 85, 16, 211, |
1272 | 70, 200, 149, 155, 146, 127, 145, 100, 151, 109, 19, 165, 208, 195, 164, |
1273 | 137, 254, 182, 248, 64, 201, 45, 209, 5, 147, 207, 210, 113, 162, 83, 225, |
1274 | 9, 31, 15, 231, 115, 37, 58, 53, 24, 49, 197, 56, 120, 172, 48, 21, 214, |
1275 | 129, 111, 11, 50, 187, 196, 34, 60, 103, 71, 144, 47, 203, 77, 80, 232, |
1276 | 140, 222, 250, 206, 166, 247, 139, 249, 221, 72, 106, 27, 199, 117, 54, |
1277 | 219, 135, 118, 40, 79, 41, 251, 46, 93, 212, 92, 233, 148, 28, 121, 63, |
1278 | 123, 158, 105, 59, 29, 42, 143, 23, 0, 107, 176, 87, 104, 183, 156, 193, |
1279 | 189, 90, 188, 65, 190, 17, 198, 7, 186, 161, 1, 124, 78, 125, 170, 133, |
1280 | 174, 218, 67, 157, 75, 101, 89, 217, 62, 33, 141, 228, 25, 35, 91, 230, 4, |
1281 | 2, 13, 73, 86, 167, 237, 84, 243, 44, 185, 66, 130, 110, 150, 142, 216, 88, |
1282 | 112, 36, 224, 136, 202, 76, 94, 98, 175, 213 |
1283 | }; |
1284 | |
1285 | bool skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile* A, const skcms_ICCProfile* B) { |
1286 | // Test for exactly equal profiles first. |
1287 | if (A == B || 0 == memcmp(A,B, sizeof(skcms_ICCProfile))) { |
1288 | return true; |
1289 | } |
1290 | |
1291 | // For now this is the essentially the same strategy we use in test_only.c |
1292 | // for our skcms_Transform() smoke tests: |
1293 | // 1) transform A to XYZD50 |
1294 | // 2) transform B to XYZD50 |
1295 | // 3) return true if they're similar enough |
1296 | // Our current criterion in 3) is maximum 1 bit error per XYZD50 byte. |
1297 | |
1298 | // skcms_252_random_bytes are 252 of a random shuffle of all possible bytes. |
1299 | // 252 is evenly divisible by 3 and 4. Only 192, 10, 241, and 43 are missing. |
1300 | |
1301 | if (A->data_color_space != B->data_color_space) { |
1302 | return false; |
1303 | } |
1304 | |
1305 | // Interpret as RGB_888 if data color space is RGB or GRAY, RGBA_8888 if CMYK. |
1306 | // TODO: working with RGBA_8888 either way is probably fastest. |
1307 | skcms_PixelFormat fmt = skcms_PixelFormat_RGB_888; |
1308 | size_t npixels = 84; |
1309 | if (A->data_color_space == skcms_Signature_CMYK) { |
1310 | fmt = skcms_PixelFormat_RGBA_8888; |
1311 | npixels = 63; |
1312 | } |
1313 | |
1314 | // TODO: if A or B is a known profile (skcms_sRGB_profile, skcms_XYZD50_profile), |
1315 | // use pre-canned results and skip that skcms_Transform() call? |
1316 | uint8_t dstA[252], |
1317 | dstB[252]; |
1318 | if (!skcms_Transform( |
1319 | skcms_252_random_bytes, fmt, skcms_AlphaFormat_Unpremul, A, |
1320 | dstA, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(), |
1321 | npixels)) { |
1322 | return false; |
1323 | } |
1324 | if (!skcms_Transform( |
1325 | skcms_252_random_bytes, fmt, skcms_AlphaFormat_Unpremul, B, |
1326 | dstB, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(), |
1327 | npixels)) { |
1328 | return false; |
1329 | } |
1330 | |
1331 | // TODO: make sure this final check has reasonable codegen. |
1332 | for (size_t i = 0; i < 252; i++) { |
1333 | if (abs((int)dstA[i] - (int)dstB[i]) > 1) { |
1334 | return false; |
1335 | } |
1336 | } |
1337 | return true; |
1338 | } |
1339 | |
1340 | bool skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile* profile, |
1341 | const skcms_TransferFunction* inv_tf) { |
1342 | if (!profile || !profile->has_trc) { |
1343 | return false; |
1344 | } |
1345 | |
1346 | return skcms_AreApproximateInverses(&profile->trc[0], inv_tf) && |
1347 | skcms_AreApproximateInverses(&profile->trc[1], inv_tf) && |
1348 | skcms_AreApproximateInverses(&profile->trc[2], inv_tf); |
1349 | } |
1350 | |
1351 | static bool is_zero_to_one(float x) { |
1352 | return 0 <= x && x <= 1; |
1353 | } |
1354 | |
1355 | typedef struct { float vals[3]; } skcms_Vector3; |
1356 | |
1357 | static skcms_Vector3 mv_mul(const skcms_Matrix3x3* m, const skcms_Vector3* v) { |
1358 | skcms_Vector3 dst = {{0,0,0}}; |
1359 | for (int row = 0; row < 3; ++row) { |
1360 | dst.vals[row] = m->vals[row][0] * v->vals[0] |
1361 | + m->vals[row][1] * v->vals[1] |
1362 | + m->vals[row][2] * v->vals[2]; |
1363 | } |
1364 | return dst; |
1365 | } |
1366 | |
1367 | bool skcms_AdaptToXYZD50(float wx, float wy, |
1368 | skcms_Matrix3x3* toXYZD50) { |
1369 | if (!is_zero_to_one(wx) || !is_zero_to_one(wy) || |
1370 | !toXYZD50) { |
1371 | return false; |
1372 | } |
1373 | |
1374 | // Assumes that Y is 1.0f. |
1375 | skcms_Vector3 wXYZ = { { wx / wy, 1, (1 - wx - wy) / wy } }; |
1376 | |
1377 | // Now convert toXYZ matrix to toXYZD50. |
1378 | skcms_Vector3 wXYZD50 = { { 0.96422f, 1.0f, 0.82521f } }; |
1379 | |
1380 | // Calculate the chromatic adaptation matrix. We will use the Bradford method, thus |
1381 | // the matrices below. The Bradford method is used by Adobe and is widely considered |
1382 | // to be the best. |
1383 | skcms_Matrix3x3 xyz_to_lms = {{ |
1384 | { 0.8951f, 0.2664f, -0.1614f }, |
1385 | { -0.7502f, 1.7135f, 0.0367f }, |
1386 | { 0.0389f, -0.0685f, 1.0296f }, |
1387 | }}; |
1388 | skcms_Matrix3x3 lms_to_xyz = {{ |
1389 | { 0.9869929f, -0.1470543f, 0.1599627f }, |
1390 | { 0.4323053f, 0.5183603f, 0.0492912f }, |
1391 | { -0.0085287f, 0.0400428f, 0.9684867f }, |
1392 | }}; |
1393 | |
1394 | skcms_Vector3 srcCone = mv_mul(&xyz_to_lms, &wXYZ); |
1395 | skcms_Vector3 dstCone = mv_mul(&xyz_to_lms, &wXYZD50); |
1396 | |
1397 | *toXYZD50 = {{ |
1398 | { dstCone.vals[0] / srcCone.vals[0], 0, 0 }, |
1399 | { 0, dstCone.vals[1] / srcCone.vals[1], 0 }, |
1400 | { 0, 0, dstCone.vals[2] / srcCone.vals[2] }, |
1401 | }}; |
1402 | *toXYZD50 = skcms_Matrix3x3_concat(toXYZD50, &xyz_to_lms); |
1403 | *toXYZD50 = skcms_Matrix3x3_concat(&lms_to_xyz, toXYZD50); |
1404 | |
1405 | return true; |
1406 | } |
1407 | |
1408 | bool skcms_PrimariesToXYZD50(float rx, float ry, |
1409 | float gx, float gy, |
1410 | float bx, float by, |
1411 | float wx, float wy, |
1412 | skcms_Matrix3x3* toXYZD50) { |
1413 | if (!is_zero_to_one(rx) || !is_zero_to_one(ry) || |
1414 | !is_zero_to_one(gx) || !is_zero_to_one(gy) || |
1415 | !is_zero_to_one(bx) || !is_zero_to_one(by) || |
1416 | !is_zero_to_one(wx) || !is_zero_to_one(wy) || |
1417 | !toXYZD50) { |
1418 | return false; |
1419 | } |
1420 | |
1421 | // First, we need to convert xy values (primaries) to XYZ. |
1422 | skcms_Matrix3x3 primaries = {{ |
1423 | { rx, gx, bx }, |
1424 | { ry, gy, by }, |
1425 | { 1 - rx - ry, 1 - gx - gy, 1 - bx - by }, |
1426 | }}; |
1427 | skcms_Matrix3x3 primaries_inv; |
1428 | if (!skcms_Matrix3x3_invert(&primaries, &primaries_inv)) { |
1429 | return false; |
1430 | } |
1431 | |
1432 | // Assumes that Y is 1.0f. |
1433 | skcms_Vector3 wXYZ = { { wx / wy, 1, (1 - wx - wy) / wy } }; |
1434 | skcms_Vector3 XYZ = mv_mul(&primaries_inv, &wXYZ); |
1435 | |
1436 | skcms_Matrix3x3 toXYZ = {{ |
1437 | { XYZ.vals[0], 0, 0 }, |
1438 | { 0, XYZ.vals[1], 0 }, |
1439 | { 0, 0, XYZ.vals[2] }, |
1440 | }}; |
1441 | toXYZ = skcms_Matrix3x3_concat(&primaries, &toXYZ); |
1442 | |
1443 | skcms_Matrix3x3 DXtoD50; |
1444 | if (!skcms_AdaptToXYZD50(wx, wy, &DXtoD50)) { |
1445 | return false; |
1446 | } |
1447 | |
1448 | *toXYZD50 = skcms_Matrix3x3_concat(&DXtoD50, &toXYZ); |
1449 | return true; |
1450 | } |
1451 | |
1452 | |
1453 | bool skcms_Matrix3x3_invert(const skcms_Matrix3x3* src, skcms_Matrix3x3* dst) { |
1454 | double a00 = src->vals[0][0], |
1455 | a01 = src->vals[1][0], |
1456 | a02 = src->vals[2][0], |
1457 | a10 = src->vals[0][1], |
1458 | a11 = src->vals[1][1], |
1459 | a12 = src->vals[2][1], |
1460 | a20 = src->vals[0][2], |
1461 | a21 = src->vals[1][2], |
1462 | a22 = src->vals[2][2]; |
1463 | |
1464 | double b0 = a00*a11 - a01*a10, |
1465 | b1 = a00*a12 - a02*a10, |
1466 | b2 = a01*a12 - a02*a11, |
1467 | b3 = a20, |
1468 | b4 = a21, |
1469 | b5 = a22; |
1470 | |
1471 | double determinant = b0*b5 |
1472 | - b1*b4 |
1473 | + b2*b3; |
1474 | |
1475 | if (determinant == 0) { |
1476 | return false; |
1477 | } |
1478 | |
1479 | double invdet = 1.0 / determinant; |
1480 | if (invdet > +FLT_MAX || invdet < -FLT_MAX || !isfinitef_((float)invdet)) { |
1481 | return false; |
1482 | } |
1483 | |
1484 | b0 *= invdet; |
1485 | b1 *= invdet; |
1486 | b2 *= invdet; |
1487 | b3 *= invdet; |
1488 | b4 *= invdet; |
1489 | b5 *= invdet; |
1490 | |
1491 | dst->vals[0][0] = (float)( a11*b5 - a12*b4 ); |
1492 | dst->vals[1][0] = (float)( a02*b4 - a01*b5 ); |
1493 | dst->vals[2][0] = (float)( + b2 ); |
1494 | dst->vals[0][1] = (float)( a12*b3 - a10*b5 ); |
1495 | dst->vals[1][1] = (float)( a00*b5 - a02*b3 ); |
1496 | dst->vals[2][1] = (float)( - b1 ); |
1497 | dst->vals[0][2] = (float)( a10*b4 - a11*b3 ); |
1498 | dst->vals[1][2] = (float)( a01*b3 - a00*b4 ); |
1499 | dst->vals[2][2] = (float)( + b0 ); |
1500 | |
1501 | for (int r = 0; r < 3; ++r) |
1502 | for (int c = 0; c < 3; ++c) { |
1503 | if (!isfinitef_(dst->vals[r][c])) { |
1504 | return false; |
1505 | } |
1506 | } |
1507 | return true; |
1508 | } |
1509 | |
1510 | skcms_Matrix3x3 skcms_Matrix3x3_concat(const skcms_Matrix3x3* A, const skcms_Matrix3x3* B) { |
1511 | skcms_Matrix3x3 m = { { { 0,0,0 },{ 0,0,0 },{ 0,0,0 } } }; |
1512 | for (int r = 0; r < 3; r++) |
1513 | for (int c = 0; c < 3; c++) { |
1514 | m.vals[r][c] = A->vals[r][0] * B->vals[0][c] |
1515 | + A->vals[r][1] * B->vals[1][c] |
1516 | + A->vals[r][2] * B->vals[2][c]; |
1517 | } |
1518 | return m; |
1519 | } |
1520 | |
1521 | #if defined(__clang__) |
1522 | [[clang::no_sanitize("float-divide-by-zero" )]] // Checked for by classify() on the way out. |
1523 | #endif |
1524 | bool skcms_TransferFunction_invert(const skcms_TransferFunction* src, skcms_TransferFunction* dst) { |
1525 | TF_PQish pq; |
1526 | TF_HLGish hlg; |
1527 | switch (classify(*src, &pq, &hlg)) { |
1528 | case Bad: return false; |
1529 | case sRGBish: break; // handled below |
1530 | |
1531 | case PQish: |
1532 | *dst = { TFKind_marker(PQish), -pq.A, pq.D, 1.0f/pq.F |
1533 | , pq.B, -pq.E, 1.0f/pq.C}; |
1534 | return true; |
1535 | |
1536 | case HLGish: |
1537 | *dst = { TFKind_marker(HLGinvish), 1.0f/hlg.R, 1.0f/hlg.G |
1538 | , 1.0f/hlg.a, hlg.b, hlg.c, 0 }; |
1539 | return true; |
1540 | |
1541 | case HLGinvish: |
1542 | *dst = { TFKind_marker(HLGish), 1.0f/hlg.R, 1.0f/hlg.G |
1543 | , 1.0f/hlg.a, hlg.b, hlg.c, 0 }; |
1544 | return true; |
1545 | } |
1546 | |
1547 | assert (classify(*src) == sRGBish); |
1548 | |
1549 | // We're inverting this function, solving for x in terms of y. |
1550 | // y = (cx + f) x < d |
1551 | // (ax + b)^g + e x ≥ d |
1552 | // The inverse of this function can be expressed in the same piecewise form. |
1553 | skcms_TransferFunction inv = {0,0,0,0,0,0,0}; |
1554 | |
1555 | // We'll start by finding the new threshold inv.d. |
1556 | // In principle we should be able to find that by solving for y at x=d from either side. |
1557 | // (If those two d values aren't the same, it's a discontinuous transfer function.) |
1558 | float d_l = src->c * src->d + src->f, |
1559 | d_r = powf_(src->a * src->d + src->b, src->g) + src->e; |
1560 | if (fabsf_(d_l - d_r) > 1/512.0f) { |
1561 | return false; |
1562 | } |
1563 | inv.d = d_l; // TODO(mtklein): better in practice to choose d_r? |
1564 | |
1565 | // When d=0, the linear section collapses to a point. We leave c,d,f all zero in that case. |
1566 | if (inv.d > 0) { |
1567 | // Inverting the linear section is pretty straightfoward: |
1568 | // y = cx + f |
1569 | // y - f = cx |
1570 | // (1/c)y - f/c = x |
1571 | inv.c = 1.0f/src->c; |
1572 | inv.f = -src->f/src->c; |
1573 | } |
1574 | |
1575 | // The interesting part is inverting the nonlinear section: |
1576 | // y = (ax + b)^g + e. |
1577 | // y - e = (ax + b)^g |
1578 | // (y - e)^1/g = ax + b |
1579 | // (y - e)^1/g - b = ax |
1580 | // (1/a)(y - e)^1/g - b/a = x |
1581 | // |
1582 | // To make that fit our form, we need to move the (1/a) term inside the exponentiation: |
1583 | // let k = (1/a)^g |
1584 | // (1/a)( y - e)^1/g - b/a = x |
1585 | // (ky - ke)^1/g - b/a = x |
1586 | |
1587 | float k = powf_(src->a, -src->g); // (1/a)^g == a^-g |
1588 | inv.g = 1.0f / src->g; |
1589 | inv.a = k; |
1590 | inv.b = -k * src->e; |
1591 | inv.e = -src->b / src->a; |
1592 | |
1593 | // We need to enforce the same constraints here that we do when fitting a curve, |
1594 | // a >= 0 and ad+b >= 0. These constraints are checked by classify(), so they're true |
1595 | // of the source function if we're here. |
1596 | |
1597 | // Just like when fitting the curve, there's really no way to rescue a < 0. |
1598 | if (inv.a < 0) { |
1599 | return false; |
1600 | } |
1601 | // On the other hand we can rescue an ad+b that's gone slightly negative here. |
1602 | if (inv.a * inv.d + inv.b < 0) { |
1603 | inv.b = -inv.a * inv.d; |
1604 | } |
1605 | |
1606 | // That should usually make classify(inv) == sRGBish true, but there are a couple situations |
1607 | // where we might still fail here, like non-finite parameter values. |
1608 | if (classify(inv) != sRGBish) { |
1609 | return false; |
1610 | } |
1611 | |
1612 | assert (inv.a >= 0); |
1613 | assert (inv.a * inv.d + inv.b >= 0); |
1614 | |
1615 | // Now in principle we're done. |
1616 | // But to preserve the valuable invariant inv(src(1.0f)) == 1.0f, we'll tweak |
1617 | // e or f of the inverse, depending on which segment contains src(1.0f). |
1618 | float s = skcms_TransferFunction_eval(src, 1.0f); |
1619 | if (!isfinitef_(s)) { |
1620 | return false; |
1621 | } |
1622 | |
1623 | float sign = s < 0 ? -1.0f : 1.0f; |
1624 | s *= sign; |
1625 | if (s < inv.d) { |
1626 | inv.f = 1.0f - sign * inv.c * s; |
1627 | } else { |
1628 | inv.e = 1.0f - sign * powf_(inv.a * s + inv.b, inv.g); |
1629 | } |
1630 | |
1631 | *dst = inv; |
1632 | return classify(*dst) == sRGBish; |
1633 | } |
1634 | |
1635 | // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // |
1636 | |
1637 | // From here below we're approximating an skcms_Curve with an skcms_TransferFunction{g,a,b,c,d,e,f}: |
1638 | // |
1639 | // tf(x) = cx + f x < d |
1640 | // tf(x) = (ax + b)^g + e x ≥ d |
1641 | // |
1642 | // When fitting, we add the additional constraint that both pieces meet at d: |
1643 | // |
1644 | // cd + f = (ad + b)^g + e |
1645 | // |
1646 | // Solving for e and folding it through gives an alternate formulation of the non-linear piece: |
1647 | // |
1648 | // tf(x) = cx + f x < d |
1649 | // tf(x) = (ax + b)^g - (ad + b)^g + cd + f x ≥ d |
1650 | // |
1651 | // Our overall strategy is then: |
1652 | // For a couple tolerances, |
1653 | // - fit_linear(): fit c,d,f iteratively to as many points as our tolerance allows |
1654 | // - invert c,d,f |
1655 | // - fit_nonlinear(): fit g,a,b using Gauss-Newton given those inverted c,d,f |
1656 | // (and by constraint, inverted e) to the inverse of the table. |
1657 | // Return the parameters with least maximum error. |
1658 | // |
1659 | // To run Gauss-Newton to find g,a,b, we'll also need the gradient of the residuals |
1660 | // of round-trip f_inv(x), the inverse of the non-linear piece of f(x). |
1661 | // |
1662 | // let y = Table(x) |
1663 | // r(x) = x - f_inv(y) |
1664 | // |
1665 | // ∂r/∂g = ln(ay + b)*(ay + b)^g |
1666 | // - ln(ad + b)*(ad + b)^g |
1667 | // ∂r/∂a = yg(ay + b)^(g-1) |
1668 | // - dg(ad + b)^(g-1) |
1669 | // ∂r/∂b = g(ay + b)^(g-1) |
1670 | // - g(ad + b)^(g-1) |
1671 | |
1672 | // Return the residual of roundtripping skcms_Curve(x) through f_inv(y) with parameters P, |
1673 | // and fill out the gradient of the residual into dfdP. |
1674 | static float rg_nonlinear(float x, |
1675 | const skcms_Curve* curve, |
1676 | const skcms_TransferFunction* tf, |
1677 | float dfdP[3]) { |
1678 | const float y = eval_curve(curve, x); |
1679 | |
1680 | const float g = tf->g, a = tf->a, b = tf->b, |
1681 | c = tf->c, d = tf->d, f = tf->f; |
1682 | |
1683 | const float Y = fmaxf_(a*y + b, 0.0f), |
1684 | D = a*d + b; |
1685 | assert (D >= 0); |
1686 | |
1687 | // The gradient. |
1688 | dfdP[0] = logf_(Y)*powf_(Y, g) |
1689 | - logf_(D)*powf_(D, g); |
1690 | dfdP[1] = y*g*powf_(Y, g-1) |
1691 | - d*g*powf_(D, g-1); |
1692 | dfdP[2] = g*powf_(Y, g-1) |
1693 | - g*powf_(D, g-1); |
1694 | |
1695 | // The residual. |
1696 | const float f_inv = powf_(Y, g) |
1697 | - powf_(D, g) |
1698 | + c*d + f; |
1699 | return x - f_inv; |
1700 | } |
1701 | |
1702 | static bool gauss_newton_step(const skcms_Curve* curve, |
1703 | skcms_TransferFunction* tf, |
1704 | float x0, float dx, int N) { |
1705 | // We'll sample x from the range [x0,x1] (both inclusive) N times with even spacing. |
1706 | // |
1707 | // Let P = [ tf->g, tf->a, tf->b ] (the three terms that we're adjusting). |
1708 | // |
1709 | // We want to do P' = P + (Jf^T Jf)^-1 Jf^T r(P), |
1710 | // where r(P) is the residual vector |
1711 | // and Jf is the Jacobian matrix of f(), ∂r/∂P. |
1712 | // |
1713 | // Let's review the shape of each of these expressions: |
1714 | // r(P) is [N x 1], a column vector with one entry per value of x tested |
1715 | // Jf is [N x 3], a matrix with an entry for each (x,P) pair |
1716 | // Jf^T is [3 x N], the transpose of Jf |
1717 | // |
1718 | // Jf^T Jf is [3 x N] * [N x 3] == [3 x 3], a 3x3 matrix, |
1719 | // and so is its inverse (Jf^T Jf)^-1 |
1720 | // Jf^T r(P) is [3 x N] * [N x 1] == [3 x 1], a column vector with the same shape as P |
1721 | // |
1722 | // Our implementation strategy to get to the final ∆P is |
1723 | // 1) evaluate Jf^T Jf, call that lhs |
1724 | // 2) evaluate Jf^T r(P), call that rhs |
1725 | // 3) invert lhs |
1726 | // 4) multiply inverse lhs by rhs |
1727 | // |
1728 | // This is a friendly implementation strategy because we don't have to have any |
1729 | // buffers that scale with N, and equally nice don't have to perform any matrix |
1730 | // operations that are variable size. |
1731 | // |
1732 | // Other implementation strategies could trade this off, e.g. evaluating the |
1733 | // pseudoinverse of Jf ( (Jf^T Jf)^-1 Jf^T ) directly, then multiplying that by |
1734 | // the residuals. That would probably require implementing singular value |
1735 | // decomposition, and would create a [3 x N] matrix to be multiplied by the |
1736 | // [N x 1] residual vector, but on the upside I think that'd eliminate the |
1737 | // possibility of this gauss_newton_step() function ever failing. |
1738 | |
1739 | // 0) start off with lhs and rhs safely zeroed. |
1740 | skcms_Matrix3x3 lhs = {{ {0,0,0}, {0,0,0}, {0,0,0} }}; |
1741 | skcms_Vector3 rhs = { {0,0,0} }; |
1742 | |
1743 | // 1,2) evaluate lhs and evaluate rhs |
1744 | // We want to evaluate Jf only once, but both lhs and rhs involve Jf^T, |
1745 | // so we'll have to update lhs and rhs at the same time. |
1746 | for (int i = 0; i < N; i++) { |
1747 | float x = x0 + i*dx; |
1748 | |
1749 | float dfdP[3] = {0,0,0}; |
1750 | float resid = rg_nonlinear(x,curve,tf, dfdP); |
1751 | |
1752 | for (int r = 0; r < 3; r++) { |
1753 | for (int c = 0; c < 3; c++) { |
1754 | lhs.vals[r][c] += dfdP[r] * dfdP[c]; |
1755 | } |
1756 | rhs.vals[r] += dfdP[r] * resid; |
1757 | } |
1758 | } |
1759 | |
1760 | // If any of the 3 P parameters are unused, this matrix will be singular. |
1761 | // Detect those cases and fix them up to indentity instead, so we can invert. |
1762 | for (int k = 0; k < 3; k++) { |
1763 | if (lhs.vals[0][k]==0 && lhs.vals[1][k]==0 && lhs.vals[2][k]==0 && |
1764 | lhs.vals[k][0]==0 && lhs.vals[k][1]==0 && lhs.vals[k][2]==0) { |
1765 | lhs.vals[k][k] = 1; |
1766 | } |
1767 | } |
1768 | |
1769 | // 3) invert lhs |
1770 | skcms_Matrix3x3 lhs_inv; |
1771 | if (!skcms_Matrix3x3_invert(&lhs, &lhs_inv)) { |
1772 | return false; |
1773 | } |
1774 | |
1775 | // 4) multiply inverse lhs by rhs |
1776 | skcms_Vector3 dP = mv_mul(&lhs_inv, &rhs); |
1777 | tf->g += dP.vals[0]; |
1778 | tf->a += dP.vals[1]; |
1779 | tf->b += dP.vals[2]; |
1780 | return isfinitef_(tf->g) && isfinitef_(tf->a) && isfinitef_(tf->b); |
1781 | } |
1782 | |
1783 | static float max_roundtrip_error_checked(const skcms_Curve* curve, |
1784 | const skcms_TransferFunction* tf_inv) { |
1785 | skcms_TransferFunction tf; |
1786 | if (!skcms_TransferFunction_invert(tf_inv, &tf) || sRGBish != classify(tf)) { |
1787 | return INFINITY_; |
1788 | } |
1789 | |
1790 | skcms_TransferFunction tf_inv_again; |
1791 | if (!skcms_TransferFunction_invert(&tf, &tf_inv_again)) { |
1792 | return INFINITY_; |
1793 | } |
1794 | |
1795 | return skcms_MaxRoundtripError(curve, &tf_inv_again); |
1796 | } |
1797 | |
1798 | // Fit the points in [L,N) to the non-linear piece of tf, or return false if we can't. |
1799 | static bool fit_nonlinear(const skcms_Curve* curve, int L, int N, skcms_TransferFunction* tf) { |
1800 | // This enforces a few constraints that are not modeled in gauss_newton_step()'s optimization. |
1801 | auto fixup_tf = [tf]() { |
1802 | // a must be non-negative. That ensures the function is monotonically increasing. |
1803 | // We don't really know how to fix up a if it goes negative. |
1804 | if (tf->a < 0) { |
1805 | return false; |
1806 | } |
1807 | // ad+b must be non-negative. That ensures we don't end up with complex numbers in powf. |
1808 | // We feel just barely not uneasy enough to tweak b so ad+b is zero in this case. |
1809 | if (tf->a * tf->d + tf->b < 0) { |
1810 | tf->b = -tf->a * tf->d; |
1811 | } |
1812 | assert (tf->a >= 0 && |
1813 | tf->a * tf->d + tf->b >= 0); |
1814 | |
1815 | // cd+f must be ~= (ad+b)^g+e. That ensures the function is continuous. We keep e as a free |
1816 | // parameter so we can guarantee this. |
1817 | tf->e = tf->c*tf->d + tf->f |
1818 | - powf_(tf->a*tf->d + tf->b, tf->g); |
1819 | |
1820 | return true; |
1821 | }; |
1822 | |
1823 | if (!fixup_tf()) { |
1824 | return false; |
1825 | } |
1826 | |
1827 | // No matter where we start, dx should always represent N even steps from 0 to 1. |
1828 | const float dx = 1.0f / (N-1); |
1829 | |
1830 | skcms_TransferFunction best_tf = *tf; |
1831 | float best_max_error = INFINITY_; |
1832 | |
1833 | // Need this or several curves get worse... *sigh* |
1834 | float init_error = max_roundtrip_error_checked(curve, tf); |
1835 | if (init_error < best_max_error) { |
1836 | best_max_error = init_error; |
1837 | best_tf = *tf; |
1838 | } |
1839 | |
1840 | // As far as we can tell, 1 Gauss-Newton step won't converge, and 3 steps is no better than 2. |
1841 | for (int j = 0; j < 8; j++) { |
1842 | if (!gauss_newton_step(curve, tf, L*dx, dx, N-L) || !fixup_tf()) { |
1843 | *tf = best_tf; |
1844 | return isfinitef_(best_max_error); |
1845 | } |
1846 | |
1847 | float max_error = max_roundtrip_error_checked(curve, tf); |
1848 | if (max_error < best_max_error) { |
1849 | best_max_error = max_error; |
1850 | best_tf = *tf; |
1851 | } |
1852 | } |
1853 | |
1854 | *tf = best_tf; |
1855 | return isfinitef_(best_max_error); |
1856 | } |
1857 | |
1858 | bool skcms_ApproximateCurve(const skcms_Curve* curve, |
1859 | skcms_TransferFunction* approx, |
1860 | float* max_error) { |
1861 | if (!curve || !approx || !max_error) { |
1862 | return false; |
1863 | } |
1864 | |
1865 | if (curve->table_entries == 0) { |
1866 | // No point approximating an skcms_TransferFunction with an skcms_TransferFunction! |
1867 | return false; |
1868 | } |
1869 | |
1870 | if (curve->table_entries == 1 || curve->table_entries > (uint32_t)INT_MAX) { |
1871 | // We need at least two points, and must put some reasonable cap on the maximum number. |
1872 | return false; |
1873 | } |
1874 | |
1875 | int N = (int)curve->table_entries; |
1876 | const float dx = 1.0f / (N - 1); |
1877 | |
1878 | *max_error = INFINITY_; |
1879 | const float kTolerances[] = { 1.5f / 65535.0f, 1.0f / 512.0f }; |
1880 | for (int t = 0; t < ARRAY_COUNT(kTolerances); t++) { |
1881 | skcms_TransferFunction tf, |
1882 | tf_inv; |
1883 | |
1884 | // It's problematic to fit curves with non-zero f, so always force it to zero explicitly. |
1885 | tf.f = 0.0f; |
1886 | int L = fit_linear(curve, N, kTolerances[t], &tf.c, &tf.d); |
1887 | |
1888 | if (L == N) { |
1889 | // If the entire data set was linear, move the coefficients to the nonlinear portion |
1890 | // with G == 1. This lets use a canonical representation with d == 0. |
1891 | tf.g = 1; |
1892 | tf.a = tf.c; |
1893 | tf.b = tf.f; |
1894 | tf.c = tf.d = tf.e = tf.f = 0; |
1895 | } else if (L == N - 1) { |
1896 | // Degenerate case with only two points in the nonlinear segment. Solve directly. |
1897 | tf.g = 1; |
1898 | tf.a = (eval_curve(curve, (N-1)*dx) - |
1899 | eval_curve(curve, (N-2)*dx)) |
1900 | / dx; |
1901 | tf.b = eval_curve(curve, (N-2)*dx) |
1902 | - tf.a * (N-2)*dx; |
1903 | tf.e = 0; |
1904 | } else { |
1905 | // Start by guessing a gamma-only curve through the midpoint. |
1906 | int mid = (L + N) / 2; |
1907 | float mid_x = mid / (N - 1.0f); |
1908 | float mid_y = eval_curve(curve, mid_x); |
1909 | tf.g = log2f_(mid_y) / log2f_(mid_x); |
1910 | tf.a = 1; |
1911 | tf.b = 0; |
1912 | tf.e = tf.c*tf.d + tf.f |
1913 | - powf_(tf.a*tf.d + tf.b, tf.g); |
1914 | |
1915 | |
1916 | if (!skcms_TransferFunction_invert(&tf, &tf_inv) || |
1917 | !fit_nonlinear(curve, L,N, &tf_inv)) { |
1918 | continue; |
1919 | } |
1920 | |
1921 | // We fit tf_inv, so calculate tf to keep in sync. |
1922 | // fit_nonlinear() should guarantee invertibility. |
1923 | if (!skcms_TransferFunction_invert(&tf_inv, &tf)) { |
1924 | assert(false); |
1925 | continue; |
1926 | } |
1927 | } |
1928 | |
1929 | // We'd better have a sane, sRGB-ish TF by now. |
1930 | // Other non-Bad TFs would be fine, but we know we've only ever tried to fit sRGBish; |
1931 | // anything else is just some accident of math and the way we pun tf.g as a type flag. |
1932 | // fit_nonlinear() should guarantee this, but the special cases may fail this test. |
1933 | if (sRGBish != classify(tf)) { |
1934 | continue; |
1935 | } |
1936 | |
1937 | // We find our error by roundtripping the table through tf_inv. |
1938 | // |
1939 | // (The most likely use case for this approximation is to be inverted and |
1940 | // used as the transfer function for a destination color space.) |
1941 | // |
1942 | // We've kept tf and tf_inv in sync above, but we can't guarantee that tf is |
1943 | // invertible, so re-verify that here (and use the new inverse for testing). |
1944 | // fit_nonlinear() should guarantee this, but the special cases that don't use |
1945 | // it may fail this test. |
1946 | if (!skcms_TransferFunction_invert(&tf, &tf_inv)) { |
1947 | continue; |
1948 | } |
1949 | |
1950 | float err = skcms_MaxRoundtripError(curve, &tf_inv); |
1951 | if (*max_error > err) { |
1952 | *max_error = err; |
1953 | *approx = tf; |
1954 | } |
1955 | } |
1956 | return isfinitef_(*max_error); |
1957 | } |
1958 | |
1959 | // ~~~~ Impl. of skcms_Transform() ~~~~ |
1960 | |
1961 | typedef enum { |
1962 | Op_load_a8, |
1963 | Op_load_g8, |
1964 | Op_load_8888_palette8, |
1965 | Op_load_4444, |
1966 | Op_load_565, |
1967 | Op_load_888, |
1968 | Op_load_8888, |
1969 | Op_load_1010102, |
1970 | Op_load_161616LE, |
1971 | Op_load_16161616LE, |
1972 | Op_load_161616BE, |
1973 | Op_load_16161616BE, |
1974 | Op_load_hhh, |
1975 | Op_load_hhhh, |
1976 | Op_load_fff, |
1977 | Op_load_ffff, |
1978 | |
1979 | Op_swap_rb, |
1980 | Op_clamp, |
1981 | Op_invert, |
1982 | Op_force_opaque, |
1983 | Op_premul, |
1984 | Op_unpremul, |
1985 | Op_matrix_3x3, |
1986 | Op_matrix_3x4, |
1987 | Op_lab_to_xyz, |
1988 | |
1989 | Op_tf_r, |
1990 | Op_tf_g, |
1991 | Op_tf_b, |
1992 | Op_tf_a, |
1993 | |
1994 | Op_pq_r, |
1995 | Op_pq_g, |
1996 | Op_pq_b, |
1997 | Op_pq_a, |
1998 | |
1999 | Op_hlg_r, |
2000 | Op_hlg_g, |
2001 | Op_hlg_b, |
2002 | Op_hlg_a, |
2003 | |
2004 | Op_hlginv_r, |
2005 | Op_hlginv_g, |
2006 | Op_hlginv_b, |
2007 | Op_hlginv_a, |
2008 | |
2009 | Op_table_r, |
2010 | Op_table_g, |
2011 | Op_table_b, |
2012 | Op_table_a, |
2013 | |
2014 | Op_clut, |
2015 | |
2016 | Op_store_a8, |
2017 | Op_store_g8, |
2018 | Op_store_4444, |
2019 | Op_store_565, |
2020 | Op_store_888, |
2021 | Op_store_8888, |
2022 | Op_store_1010102, |
2023 | Op_store_161616LE, |
2024 | Op_store_16161616LE, |
2025 | Op_store_161616BE, |
2026 | Op_store_16161616BE, |
2027 | Op_store_hhh, |
2028 | Op_store_hhhh, |
2029 | Op_store_fff, |
2030 | Op_store_ffff, |
2031 | } Op; |
2032 | |
2033 | #if defined(__clang__) |
2034 | template <int N, typename T> using Vec = T __attribute__((ext_vector_type(N))); |
2035 | #elif defined(__GNUC__) |
2036 | // For some reason GCC accepts this nonsense, but not the more straightforward version, |
2037 | // template <int N, typename T> using Vec = T __attribute__((vector_size(N*sizeof(T)))); |
2038 | template <int N, typename T> |
2039 | struct VecHelper { typedef T __attribute__((vector_size(N*sizeof(T)))) V; }; |
2040 | |
2041 | template <int N, typename T> using Vec = typename VecHelper<N,T>::V; |
2042 | #endif |
2043 | |
2044 | // First, instantiate our default exec_ops() implementation using the default compiliation target. |
2045 | |
2046 | namespace baseline { |
2047 | #if defined(SKCMS_PORTABLE) || !(defined(__clang__) || defined(__GNUC__)) \ |
2048 | || (defined(__EMSCRIPTEN_major__) && !defined(__wasm_simd128__)) |
2049 | #define N 1 |
2050 | template <typename T> using V = T; |
2051 | using Color = float; |
2052 | #elif defined(__AVX512F__) |
2053 | #define N 16 |
2054 | template <typename T> using V = Vec<N,T>; |
2055 | using Color = float; |
2056 | #elif defined(__AVX__) |
2057 | #define N 8 |
2058 | template <typename T> using V = Vec<N,T>; |
2059 | using Color = float; |
2060 | #elif defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(SKCMS_OPT_INTO_NEON_FP16) |
2061 | #define N 8 |
2062 | template <typename T> using V = Vec<N,T>; |
2063 | using Color = _Float16; |
2064 | #else |
2065 | #define N 4 |
2066 | template <typename T> using V = Vec<N,T>; |
2067 | using Color = float; |
2068 | #endif |
2069 | |
2070 | #include "src/Transform_inl.h" |
2071 | #undef N |
2072 | } |
2073 | |
2074 | // Now, instantiate any other versions of run_program() we may want for runtime detection. |
2075 | #if !defined(SKCMS_PORTABLE) && \ |
2076 | !defined(SKCMS_NO_RUNTIME_CPU_DETECTION) && \ |
2077 | (( defined(__clang__) && __clang_major__ >= 5) || \ |
2078 | (!defined(__clang__) && defined(__GNUC__))) \ |
2079 | && defined(__x86_64__) |
2080 | |
2081 | #if !defined(__AVX2__) |
2082 | #if defined(__clang__) |
2083 | #pragma clang attribute push(__attribute__((target("avx2,f16c"))), apply_to=function) |
2084 | #elif defined(__GNUC__) |
2085 | #pragma GCC push_options |
2086 | #pragma GCC target("avx2,f16c") |
2087 | #endif |
2088 | |
2089 | namespace hsw { |
2090 | #define USING_AVX |
2091 | #define USING_AVX_F16C |
2092 | #define USING_AVX2 |
2093 | #define N 8 |
2094 | template <typename T> using V = Vec<N,T>; |
2095 | using Color = float; |
2096 | |
2097 | #include "src/Transform_inl.h" |
2098 | |
2099 | // src/Transform_inl.h will undefine USING_* for us. |
2100 | #undef N |
2101 | } |
2102 | |
2103 | #if defined(__clang__) |
2104 | #pragma clang attribute pop |
2105 | #elif defined(__GNUC__) |
2106 | #pragma GCC pop_options |
2107 | #endif |
2108 | |
2109 | #define TEST_FOR_HSW |
2110 | #endif |
2111 | |
2112 | #if !defined(__AVX512F__) |
2113 | #if defined(__clang__) |
2114 | #pragma clang attribute push(__attribute__((target("avx512f,avx512dq,avx512cd,avx512bw,avx512vl"))), apply_to=function) |
2115 | #elif defined(__GNUC__) |
2116 | #pragma GCC push_options |
2117 | #pragma GCC target("avx512f,avx512dq,avx512cd,avx512bw,avx512vl") |
2118 | #endif |
2119 | |
2120 | namespace skx { |
2121 | #define USING_AVX512F |
2122 | #define N 16 |
2123 | template <typename T> using V = Vec<N,T>; |
2124 | using Color = float; |
2125 | |
2126 | #include "src/Transform_inl.h" |
2127 | |
2128 | // src/Transform_inl.h will undefine USING_* for us. |
2129 | #undef N |
2130 | } |
2131 | |
2132 | #if defined(__clang__) |
2133 | #pragma clang attribute pop |
2134 | #elif defined(__GNUC__) |
2135 | #pragma GCC pop_options |
2136 | #endif |
2137 | |
2138 | #define TEST_FOR_SKX |
2139 | #endif |
2140 | |
2141 | #if defined(TEST_FOR_HSW) || defined(TEST_FOR_SKX) |
2142 | enum class CpuType { None, HSW, SKX }; |
2143 | static CpuType cpu_type() { |
2144 | static const CpuType type = []{ |
2145 | // See http://www.sandpile.org/x86/cpuid.htm |
2146 | |
2147 | // First, a basic cpuid(1) lets us check prerequisites for HSW, SKX. |
2148 | uint32_t eax, ebx, ecx, edx; |
2149 | __asm__ __volatile__("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) |
2150 | : "0" (1), "2" (0)); |
2151 | if ((edx & (1u<<25)) && // SSE |
2152 | (edx & (1u<<26)) && // SSE2 |
2153 | (ecx & (1u<< 0)) && // SSE3 |
2154 | (ecx & (1u<< 9)) && // SSSE3 |
2155 | (ecx & (1u<<12)) && // FMA (N.B. not used, avoided even) |
2156 | (ecx & (1u<<19)) && // SSE4.1 |
2157 | (ecx & (1u<<20)) && // SSE4.2 |
2158 | (ecx & (1u<<26)) && // XSAVE |
2159 | (ecx & (1u<<27)) && // OSXSAVE |
2160 | (ecx & (1u<<28)) && // AVX |
2161 | (ecx & (1u<<29))) { // F16C |
2162 | |
2163 | // Call cpuid(7) to check for AVX2 and AVX-512 bits. |
2164 | __asm__ __volatile__("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) |
2165 | : "0" (7), "2" (0)); |
2166 | // eax from xgetbv(0) will tell us whether XMM, YMM, and ZMM state is saved. |
2167 | uint32_t xcr0, dont_need_edx; |
2168 | __asm__ __volatile__("xgetbv" : "=a" (xcr0), "=d" (dont_need_edx) : "c" (0)); |
2169 | |
2170 | if ((xcr0 & (1u<<1)) && // XMM register state saved? |
2171 | (xcr0 & (1u<<2)) && // YMM register state saved? |
2172 | (ebx & (1u<<5))) { // AVX2 |
2173 | // At this point we're at least HSW. Continue checking for SKX. |
2174 | if ((xcr0 & (1u<< 5)) && // Opmasks state saved? |
2175 | (xcr0 & (1u<< 6)) && // First 16 ZMM registers saved? |
2176 | (xcr0 & (1u<< 7)) && // High 16 ZMM registers saved? |
2177 | (ebx & (1u<<16)) && // AVX512F |
2178 | (ebx & (1u<<17)) && // AVX512DQ |
2179 | (ebx & (1u<<28)) && // AVX512CD |
2180 | (ebx & (1u<<30)) && // AVX512BW |
2181 | (ebx & (1u<<31))) { // AVX512VL |
2182 | return CpuType::SKX; |
2183 | } |
2184 | return CpuType::HSW; |
2185 | } |
2186 | } |
2187 | return CpuType::None; |
2188 | }(); |
2189 | return type; |
2190 | } |
2191 | #endif |
2192 | |
2193 | #endif |
2194 | |
2195 | typedef struct { |
2196 | Op op; |
2197 | const void* arg; |
2198 | } OpAndArg; |
2199 | |
2200 | static OpAndArg select_curve_op(const skcms_Curve* curve, int channel) { |
2201 | static const struct { Op sRGBish, PQish, HLGish, HLGinvish, table; } ops[] = { |
2202 | { Op_tf_r, Op_pq_r, Op_hlg_r, Op_hlginv_r, Op_table_r }, |
2203 | { Op_tf_g, Op_pq_g, Op_hlg_g, Op_hlginv_g, Op_table_g }, |
2204 | { Op_tf_b, Op_pq_b, Op_hlg_b, Op_hlginv_b, Op_table_b }, |
2205 | { Op_tf_a, Op_pq_a, Op_hlg_a, Op_hlginv_a, Op_table_a }, |
2206 | }; |
2207 | const auto& op = ops[channel]; |
2208 | |
2209 | if (curve->table_entries == 0) { |
2210 | const OpAndArg noop = { Op_load_a8/*doesn't matter*/, nullptr }; |
2211 | |
2212 | const skcms_TransferFunction& tf = curve->parametric; |
2213 | |
2214 | if (tf.g == 1 && tf.a == 1 && |
2215 | tf.b == 0 && tf.c == 0 && tf.d == 0 && tf.e == 0 && tf.f == 0) { |
2216 | return noop; |
2217 | } |
2218 | |
2219 | switch (classify(tf)) { |
2220 | case Bad: return noop; |
2221 | case sRGBish: return OpAndArg{op.sRGBish, &tf}; |
2222 | case PQish: return OpAndArg{op.PQish, &tf}; |
2223 | case HLGish: return OpAndArg{op.HLGish, &tf}; |
2224 | case HLGinvish: return OpAndArg{op.HLGinvish, &tf}; |
2225 | } |
2226 | } |
2227 | return OpAndArg{op.table, curve}; |
2228 | } |
2229 | |
2230 | static size_t bytes_per_pixel(skcms_PixelFormat fmt) { |
2231 | switch (fmt >> 1) { // ignore rgb/bgr |
2232 | case skcms_PixelFormat_A_8 >> 1: return 1; |
2233 | case skcms_PixelFormat_G_8 >> 1: return 1; |
2234 | case skcms_PixelFormat_RGBA_8888_Palette8 >> 1: return 1; |
2235 | case skcms_PixelFormat_ABGR_4444 >> 1: return 2; |
2236 | case skcms_PixelFormat_RGB_565 >> 1: return 2; |
2237 | case skcms_PixelFormat_RGB_888 >> 1: return 3; |
2238 | case skcms_PixelFormat_RGBA_8888 >> 1: return 4; |
2239 | case skcms_PixelFormat_RGBA_8888_sRGB >> 1: return 4; |
2240 | case skcms_PixelFormat_RGBA_1010102 >> 1: return 4; |
2241 | case skcms_PixelFormat_RGB_161616LE >> 1: return 6; |
2242 | case skcms_PixelFormat_RGBA_16161616LE >> 1: return 8; |
2243 | case skcms_PixelFormat_RGB_161616BE >> 1: return 6; |
2244 | case skcms_PixelFormat_RGBA_16161616BE >> 1: return 8; |
2245 | case skcms_PixelFormat_RGB_hhh_Norm >> 1: return 6; |
2246 | case skcms_PixelFormat_RGBA_hhhh_Norm >> 1: return 8; |
2247 | case skcms_PixelFormat_RGB_hhh >> 1: return 6; |
2248 | case skcms_PixelFormat_RGBA_hhhh >> 1: return 8; |
2249 | case skcms_PixelFormat_RGB_fff >> 1: return 12; |
2250 | case skcms_PixelFormat_RGBA_ffff >> 1: return 16; |
2251 | } |
2252 | assert(false); |
2253 | return 0; |
2254 | } |
2255 | |
2256 | static bool prep_for_destination(const skcms_ICCProfile* profile, |
2257 | skcms_Matrix3x3* fromXYZD50, |
2258 | skcms_TransferFunction* invR, |
2259 | skcms_TransferFunction* invG, |
2260 | skcms_TransferFunction* invB) { |
2261 | // We only support destinations with parametric transfer functions |
2262 | // and with gamuts that can be transformed from XYZD50. |
2263 | return profile->has_trc |
2264 | && profile->has_toXYZD50 |
2265 | && profile->trc[0].table_entries == 0 |
2266 | && profile->trc[1].table_entries == 0 |
2267 | && profile->trc[2].table_entries == 0 |
2268 | && skcms_TransferFunction_invert(&profile->trc[0].parametric, invR) |
2269 | && skcms_TransferFunction_invert(&profile->trc[1].parametric, invG) |
2270 | && skcms_TransferFunction_invert(&profile->trc[2].parametric, invB) |
2271 | && skcms_Matrix3x3_invert(&profile->toXYZD50, fromXYZD50); |
2272 | } |
2273 | |
2274 | bool skcms_Transform(const void* src, |
2275 | skcms_PixelFormat srcFmt, |
2276 | skcms_AlphaFormat srcAlpha, |
2277 | const skcms_ICCProfile* srcProfile, |
2278 | void* dst, |
2279 | skcms_PixelFormat dstFmt, |
2280 | skcms_AlphaFormat dstAlpha, |
2281 | const skcms_ICCProfile* dstProfile, |
2282 | size_t npixels) { |
2283 | return skcms_TransformWithPalette(src, srcFmt, srcAlpha, srcProfile, |
2284 | dst, dstFmt, dstAlpha, dstProfile, |
2285 | npixels, nullptr); |
2286 | } |
2287 | |
2288 | bool skcms_TransformWithPalette(const void* src, |
2289 | skcms_PixelFormat srcFmt, |
2290 | skcms_AlphaFormat srcAlpha, |
2291 | const skcms_ICCProfile* srcProfile, |
2292 | void* dst, |
2293 | skcms_PixelFormat dstFmt, |
2294 | skcms_AlphaFormat dstAlpha, |
2295 | const skcms_ICCProfile* dstProfile, |
2296 | size_t nz, |
2297 | const void* palette) { |
2298 | const size_t dst_bpp = bytes_per_pixel(dstFmt), |
2299 | src_bpp = bytes_per_pixel(srcFmt); |
2300 | // Let's just refuse if the request is absurdly big. |
2301 | if (nz * dst_bpp > INT_MAX || nz * src_bpp > INT_MAX) { |
2302 | return false; |
2303 | } |
2304 | int n = (int)nz; |
2305 | |
2306 | // Null profiles default to sRGB. Passing null for both is handy when doing format conversion. |
2307 | if (!srcProfile) { |
2308 | srcProfile = skcms_sRGB_profile(); |
2309 | } |
2310 | if (!dstProfile) { |
2311 | dstProfile = skcms_sRGB_profile(); |
2312 | } |
2313 | |
2314 | // We can't transform in place unless the PixelFormats are the same size. |
2315 | if (dst == src && dst_bpp != src_bpp) { |
2316 | return false; |
2317 | } |
2318 | // TODO: more careful alias rejection (like, dst == src + 1)? |
2319 | |
2320 | if (needs_palette(srcFmt) && !palette) { |
2321 | return false; |
2322 | } |
2323 | |
2324 | Op program [32]; |
2325 | const void* arguments[32]; |
2326 | |
2327 | Op* ops = program; |
2328 | const void** args = arguments; |
2329 | |
2330 | // These are always parametric curves of some sort. |
2331 | skcms_Curve dst_curves[3]; |
2332 | dst_curves[0].table_entries = |
2333 | dst_curves[1].table_entries = |
2334 | dst_curves[2].table_entries = 0; |
2335 | |
2336 | skcms_Matrix3x3 from_xyz; |
2337 | |
2338 | switch (srcFmt >> 1) { |
2339 | default: return false; |
2340 | case skcms_PixelFormat_A_8 >> 1: *ops++ = Op_load_a8; break; |
2341 | case skcms_PixelFormat_G_8 >> 1: *ops++ = Op_load_g8; break; |
2342 | case skcms_PixelFormat_ABGR_4444 >> 1: *ops++ = Op_load_4444; break; |
2343 | case skcms_PixelFormat_RGB_565 >> 1: *ops++ = Op_load_565; break; |
2344 | case skcms_PixelFormat_RGB_888 >> 1: *ops++ = Op_load_888; break; |
2345 | case skcms_PixelFormat_RGBA_8888 >> 1: *ops++ = Op_load_8888; break; |
2346 | case skcms_PixelFormat_RGBA_1010102 >> 1: *ops++ = Op_load_1010102; break; |
2347 | case skcms_PixelFormat_RGB_161616LE >> 1: *ops++ = Op_load_161616LE; break; |
2348 | case skcms_PixelFormat_RGBA_16161616LE >> 1: *ops++ = Op_load_16161616LE; break; |
2349 | case skcms_PixelFormat_RGB_161616BE >> 1: *ops++ = Op_load_161616BE; break; |
2350 | case skcms_PixelFormat_RGBA_16161616BE >> 1: *ops++ = Op_load_16161616BE; break; |
2351 | case skcms_PixelFormat_RGB_hhh_Norm >> 1: *ops++ = Op_load_hhh; break; |
2352 | case skcms_PixelFormat_RGBA_hhhh_Norm >> 1: *ops++ = Op_load_hhhh; break; |
2353 | case skcms_PixelFormat_RGB_hhh >> 1: *ops++ = Op_load_hhh; break; |
2354 | case skcms_PixelFormat_RGBA_hhhh >> 1: *ops++ = Op_load_hhhh; break; |
2355 | case skcms_PixelFormat_RGB_fff >> 1: *ops++ = Op_load_fff; break; |
2356 | case skcms_PixelFormat_RGBA_ffff >> 1: *ops++ = Op_load_ffff; break; |
2357 | |
2358 | case skcms_PixelFormat_RGBA_8888_Palette8 >> 1: *ops++ = Op_load_8888_palette8; |
2359 | *args++ = palette; |
2360 | break; |
2361 | case skcms_PixelFormat_RGBA_8888_sRGB >> 1: |
2362 | *ops++ = Op_load_8888; |
2363 | *ops++ = Op_tf_r; *args++ = skcms_sRGB_TransferFunction(); |
2364 | *ops++ = Op_tf_g; *args++ = skcms_sRGB_TransferFunction(); |
2365 | *ops++ = Op_tf_b; *args++ = skcms_sRGB_TransferFunction(); |
2366 | break; |
2367 | } |
2368 | if (srcFmt == skcms_PixelFormat_RGB_hhh_Norm || |
2369 | srcFmt == skcms_PixelFormat_RGBA_hhhh_Norm) { |
2370 | *ops++ = Op_clamp; |
2371 | } |
2372 | if (srcFmt & 1) { |
2373 | *ops++ = Op_swap_rb; |
2374 | } |
2375 | skcms_ICCProfile gray_dst_profile; |
2376 | if ((dstFmt >> 1) == (skcms_PixelFormat_G_8 >> 1)) { |
2377 | // When transforming to gray, stop at XYZ (by setting toXYZ to identity), then transform |
2378 | // luminance (Y) by the destination transfer function. |
2379 | gray_dst_profile = *dstProfile; |
2380 | skcms_SetXYZD50(&gray_dst_profile, &skcms_XYZD50_profile()->toXYZD50); |
2381 | dstProfile = &gray_dst_profile; |
2382 | } |
2383 | |
2384 | if (srcProfile->data_color_space == skcms_Signature_CMYK) { |
2385 | // Photoshop creates CMYK images as inverse CMYK. |
2386 | // These happen to be the only ones we've _ever_ seen. |
2387 | *ops++ = Op_invert; |
2388 | // With CMYK, ignore the alpha type, to avoid changing K or conflating CMY with K. |
2389 | srcAlpha = skcms_AlphaFormat_Unpremul; |
2390 | } |
2391 | |
2392 | if (srcAlpha == skcms_AlphaFormat_Opaque) { |
2393 | *ops++ = Op_force_opaque; |
2394 | } else if (srcAlpha == skcms_AlphaFormat_PremulAsEncoded) { |
2395 | *ops++ = Op_unpremul; |
2396 | } |
2397 | |
2398 | if (dstProfile != srcProfile) { |
2399 | |
2400 | if (!prep_for_destination(dstProfile, |
2401 | &from_xyz, |
2402 | &dst_curves[0].parametric, |
2403 | &dst_curves[1].parametric, |
2404 | &dst_curves[2].parametric)) { |
2405 | return false; |
2406 | } |
2407 | |
2408 | if (srcProfile->has_A2B) { |
2409 | if (srcProfile->A2B.input_channels) { |
2410 | for (int i = 0; i < (int)srcProfile->A2B.input_channels; i++) { |
2411 | OpAndArg oa = select_curve_op(&srcProfile->A2B.input_curves[i], i); |
2412 | if (oa.arg) { |
2413 | *ops++ = oa.op; |
2414 | *args++ = oa.arg; |
2415 | } |
2416 | } |
2417 | *ops++ = Op_clamp; |
2418 | *ops++ = Op_clut; |
2419 | *args++ = &srcProfile->A2B; |
2420 | } |
2421 | |
2422 | if (srcProfile->A2B.matrix_channels == 3) { |
2423 | for (int i = 0; i < 3; i++) { |
2424 | OpAndArg oa = select_curve_op(&srcProfile->A2B.matrix_curves[i], i); |
2425 | if (oa.arg) { |
2426 | *ops++ = oa.op; |
2427 | *args++ = oa.arg; |
2428 | } |
2429 | } |
2430 | |
2431 | static const skcms_Matrix3x4 I = {{ |
2432 | {1,0,0,0}, |
2433 | {0,1,0,0}, |
2434 | {0,0,1,0}, |
2435 | }}; |
2436 | if (0 != memcmp(&I, &srcProfile->A2B.matrix, sizeof(I))) { |
2437 | *ops++ = Op_matrix_3x4; |
2438 | *args++ = &srcProfile->A2B.matrix; |
2439 | } |
2440 | } |
2441 | |
2442 | if (srcProfile->A2B.output_channels == 3) { |
2443 | for (int i = 0; i < 3; i++) { |
2444 | OpAndArg oa = select_curve_op(&srcProfile->A2B.output_curves[i], i); |
2445 | if (oa.arg) { |
2446 | *ops++ = oa.op; |
2447 | *args++ = oa.arg; |
2448 | } |
2449 | } |
2450 | } |
2451 | |
2452 | if (srcProfile->pcs == skcms_Signature_Lab) { |
2453 | *ops++ = Op_lab_to_xyz; |
2454 | } |
2455 | |
2456 | } else if (srcProfile->has_trc && srcProfile->has_toXYZD50) { |
2457 | for (int i = 0; i < 3; i++) { |
2458 | OpAndArg oa = select_curve_op(&srcProfile->trc[i], i); |
2459 | if (oa.arg) { |
2460 | *ops++ = oa.op; |
2461 | *args++ = oa.arg; |
2462 | } |
2463 | } |
2464 | } else { |
2465 | return false; |
2466 | } |
2467 | |
2468 | // A2B sources should already be in XYZD50 at this point. |
2469 | // Others still need to be transformed using their toXYZD50 matrix. |
2470 | // N.B. There are profiles that contain both A2B tags and toXYZD50 matrices. |
2471 | // If we use the A2B tags, we need to ignore the XYZD50 matrix entirely. |
2472 | assert (srcProfile->has_A2B || srcProfile->has_toXYZD50); |
2473 | static const skcms_Matrix3x3 I = {{ |
2474 | { 1.0f, 0.0f, 0.0f }, |
2475 | { 0.0f, 1.0f, 0.0f }, |
2476 | { 0.0f, 0.0f, 1.0f }, |
2477 | }}; |
2478 | const skcms_Matrix3x3* to_xyz = srcProfile->has_A2B ? &I : &srcProfile->toXYZD50; |
2479 | |
2480 | // There's a chance the source and destination gamuts are identical, |
2481 | // in which case we can skip the gamut transform. |
2482 | if (0 != memcmp(&dstProfile->toXYZD50, to_xyz, sizeof(skcms_Matrix3x3))) { |
2483 | // Concat the entire gamut transform into from_xyz, |
2484 | // now slightly misnamed but it's a handy spot to stash the result. |
2485 | from_xyz = skcms_Matrix3x3_concat(&from_xyz, to_xyz); |
2486 | *ops++ = Op_matrix_3x3; |
2487 | *args++ = &from_xyz; |
2488 | } |
2489 | |
2490 | // Encode back to dst RGB using its parametric transfer functions. |
2491 | for (int i = 0; i < 3; i++) { |
2492 | OpAndArg oa = select_curve_op(dst_curves+i, i); |
2493 | if (oa.arg) { |
2494 | assert (oa.op != Op_table_r && |
2495 | oa.op != Op_table_g && |
2496 | oa.op != Op_table_b && |
2497 | oa.op != Op_table_a); |
2498 | *ops++ = oa.op; |
2499 | *args++ = oa.arg; |
2500 | } |
2501 | } |
2502 | } |
2503 | |
2504 | // Clamp here before premul to make sure we're clamping to normalized values _and_ gamut, |
2505 | // not just to values that fit in [0,1]. |
2506 | // |
2507 | // E.g. r = 1.1, a = 0.5 would fit fine in fixed point after premul (ra=0.55,a=0.5), |
2508 | // but would be carrying r > 1, which is really unexpected for downstream consumers. |
2509 | if (dstFmt < skcms_PixelFormat_RGB_hhh) { |
2510 | *ops++ = Op_clamp; |
2511 | } |
2512 | if (dstAlpha == skcms_AlphaFormat_Opaque) { |
2513 | *ops++ = Op_force_opaque; |
2514 | } else if (dstAlpha == skcms_AlphaFormat_PremulAsEncoded) { |
2515 | *ops++ = Op_premul; |
2516 | } |
2517 | if (dstFmt & 1) { |
2518 | *ops++ = Op_swap_rb; |
2519 | } |
2520 | switch (dstFmt >> 1) { |
2521 | default: return false; |
2522 | case skcms_PixelFormat_A_8 >> 1: *ops++ = Op_store_a8; break; |
2523 | case skcms_PixelFormat_G_8 >> 1: *ops++ = Op_store_g8; break; |
2524 | case skcms_PixelFormat_ABGR_4444 >> 1: *ops++ = Op_store_4444; break; |
2525 | case skcms_PixelFormat_RGB_565 >> 1: *ops++ = Op_store_565; break; |
2526 | case skcms_PixelFormat_RGB_888 >> 1: *ops++ = Op_store_888; break; |
2527 | case skcms_PixelFormat_RGBA_8888 >> 1: *ops++ = Op_store_8888; break; |
2528 | case skcms_PixelFormat_RGBA_1010102 >> 1: *ops++ = Op_store_1010102; break; |
2529 | case skcms_PixelFormat_RGB_161616LE >> 1: *ops++ = Op_store_161616LE; break; |
2530 | case skcms_PixelFormat_RGBA_16161616LE >> 1: *ops++ = Op_store_16161616LE; break; |
2531 | case skcms_PixelFormat_RGB_161616BE >> 1: *ops++ = Op_store_161616BE; break; |
2532 | case skcms_PixelFormat_RGBA_16161616BE >> 1: *ops++ = Op_store_16161616BE; break; |
2533 | case skcms_PixelFormat_RGB_hhh_Norm >> 1: *ops++ = Op_store_hhh; break; |
2534 | case skcms_PixelFormat_RGBA_hhhh_Norm >> 1: *ops++ = Op_store_hhhh; break; |
2535 | case skcms_PixelFormat_RGB_hhh >> 1: *ops++ = Op_store_hhh; break; |
2536 | case skcms_PixelFormat_RGBA_hhhh >> 1: *ops++ = Op_store_hhhh; break; |
2537 | case skcms_PixelFormat_RGB_fff >> 1: *ops++ = Op_store_fff; break; |
2538 | case skcms_PixelFormat_RGBA_ffff >> 1: *ops++ = Op_store_ffff; break; |
2539 | |
2540 | case skcms_PixelFormat_RGBA_8888_sRGB >> 1: |
2541 | *ops++ = Op_tf_r; *args++ = skcms_sRGB_Inverse_TransferFunction(); |
2542 | *ops++ = Op_tf_g; *args++ = skcms_sRGB_Inverse_TransferFunction(); |
2543 | *ops++ = Op_tf_b; *args++ = skcms_sRGB_Inverse_TransferFunction(); |
2544 | *ops++ = Op_store_8888; |
2545 | break; |
2546 | } |
2547 | |
2548 | auto run = baseline::run_program; |
2549 | #if defined(TEST_FOR_HSW) |
2550 | switch (cpu_type()) { |
2551 | case CpuType::None: break; |
2552 | case CpuType::HSW: run = hsw::run_program; break; |
2553 | case CpuType::SKX: run = hsw::run_program; break; |
2554 | } |
2555 | #endif |
2556 | #if defined(TEST_FOR_SKX) |
2557 | switch (cpu_type()) { |
2558 | case CpuType::None: break; |
2559 | case CpuType::HSW: break; |
2560 | case CpuType::SKX: run = skx::run_program; break; |
2561 | } |
2562 | #endif |
2563 | run(program, arguments, (const char*)src, (char*)dst, n, src_bpp,dst_bpp); |
2564 | return true; |
2565 | } |
2566 | |
2567 | static void assert_usable_as_destination(const skcms_ICCProfile* profile) { |
2568 | #if defined(NDEBUG) |
2569 | (void)profile; |
2570 | #else |
2571 | skcms_Matrix3x3 fromXYZD50; |
2572 | skcms_TransferFunction invR, invG, invB; |
2573 | assert(prep_for_destination(profile, &fromXYZD50, &invR, &invG, &invB)); |
2574 | #endif |
2575 | } |
2576 | |
2577 | bool skcms_MakeUsableAsDestination(skcms_ICCProfile* profile) { |
2578 | skcms_Matrix3x3 fromXYZD50; |
2579 | if (!profile->has_trc || !profile->has_toXYZD50 |
2580 | || !skcms_Matrix3x3_invert(&profile->toXYZD50, &fromXYZD50)) { |
2581 | return false; |
2582 | } |
2583 | |
2584 | skcms_TransferFunction tf[3]; |
2585 | for (int i = 0; i < 3; i++) { |
2586 | skcms_TransferFunction inv; |
2587 | if (profile->trc[i].table_entries == 0 |
2588 | && skcms_TransferFunction_invert(&profile->trc[i].parametric, &inv)) { |
2589 | tf[i] = profile->trc[i].parametric; |
2590 | continue; |
2591 | } |
2592 | |
2593 | float max_error; |
2594 | // Parametric curves from skcms_ApproximateCurve() are guaranteed to be invertible. |
2595 | if (!skcms_ApproximateCurve(&profile->trc[i], &tf[i], &max_error)) { |
2596 | return false; |
2597 | } |
2598 | } |
2599 | |
2600 | for (int i = 0; i < 3; ++i) { |
2601 | profile->trc[i].table_entries = 0; |
2602 | profile->trc[i].parametric = tf[i]; |
2603 | } |
2604 | |
2605 | assert_usable_as_destination(profile); |
2606 | return true; |
2607 | } |
2608 | |
2609 | bool skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile* profile) { |
2610 | // Operate on a copy of profile, so we can choose the best TF for the original curves |
2611 | skcms_ICCProfile result = *profile; |
2612 | if (!skcms_MakeUsableAsDestination(&result)) { |
2613 | return false; |
2614 | } |
2615 | |
2616 | int best_tf = 0; |
2617 | float min_max_error = INFINITY_; |
2618 | for (int i = 0; i < 3; i++) { |
2619 | skcms_TransferFunction inv; |
2620 | if (!skcms_TransferFunction_invert(&result.trc[i].parametric, &inv)) { |
2621 | return false; |
2622 | } |
2623 | |
2624 | float err = 0; |
2625 | for (int j = 0; j < 3; ++j) { |
2626 | err = fmaxf_(err, skcms_MaxRoundtripError(&profile->trc[j], &inv)); |
2627 | } |
2628 | if (min_max_error > err) { |
2629 | min_max_error = err; |
2630 | best_tf = i; |
2631 | } |
2632 | } |
2633 | |
2634 | for (int i = 0; i < 3; i++) { |
2635 | result.trc[i].parametric = result.trc[best_tf].parametric; |
2636 | } |
2637 | |
2638 | *profile = result; |
2639 | assert_usable_as_destination(profile); |
2640 | return true; |
2641 | } |
2642 | |