skcms.cc source code [Skia/third_party/skcms/skcms.cc]

1	/*
2	* Copyright 2018 Google Inc.
3	*
4	* Use of this source code is governed by a BSD-style license that can be
5	* found in the LICENSE file.
6	*/
7
8	#include "skcms.h"
9	#include "skcms_internal.h"
10	#include <assert.h>
11	#include <float.h>
12	#include <limits.h>
13	#include <stdlib.h>
14	#include <string.h>
15
16	#if defined(__ARM_NEON)
17	#include <arm_neon.h>
18	#elif defined(__SSE__)
19	#include <immintrin.h>
20
21	#if defined(__clang__)
22	// That #include <immintrin.h> is usually enough, but Clang's headers
23	// "helpfully" skip including the whole kitchen sink when _MSC_VER is
24	// defined, because lots of programs on Windows would include that and
25	// it'd be a lot slower. But we want all those headers included so we
26	// can use their features after runtime checks later.
27	#include <smmintrin.h>
28	#include <avxintrin.h>
29	#include <avx2intrin.h>
30	#include <avx512fintrin.h>
31	#include <avx512dqintrin.h>
32	#endif
33	#endif
34
35	// sizeof(x) will return size_t, which is 32-bit on some machines and 64-bit on others.
36	// We have better testing on 64-bit machines, so force 32-bit machines to behave like 64-bit.
37	//
38	// Please do not use sizeof() directly, and size_t only when required.
39	// (We have no way of enforcing these requests...)
40	#define SAFE_SIZEOF(x) ((uint64_t)sizeof(x))
41
42	// Same sort of thing for _Layout structs with a variable sized array at the end (named "variable").
43	#define SAFE_FIXED_SIZE(type) ((uint64_t)offsetof(type, variable))
44
45	static const union {
46	uint32_t bits;
47	float f;
48	} inf_ = { `0x7f800000` };
49	#define INFINITY_ inf_.f
50
51	#if defined(__clang__) \|\| defined(__GNUC__)
52	#define small_memcpy __builtin_memcpy
53	#else
54	#define small_memcpy memcpy
55	#endif
56
57	static float log2f_(float x) {
58	// The first approximation of log2(x) is its exponent 'e', minus 127.
59	int32_t bits;
60	small_memcpy(&bits, &x, sizeof(bits));
61
62	float e = (float)bits * (`1.0f` / (`1`<<`23`));
63
64	// If we use the mantissa too we can refine the error signficantly.
65	int32_t m_bits = (bits & `0x007fffff`) \| `0x3f000000`;
66	float m;
67	small_memcpy(&m, &m_bits, sizeof(m));
68
69	return (e - `124.225514990f`
70	- `1.498030302f`*m
71	- `1.725879990f`/(`0.3520887068f` + m));
72	}
73	static float logf_(float x) {
74	const float ln2 = `0.69314718f`;
75	return ln2*log2f_(x);
76	}
77
78	static float exp2f_(float x) {
79	float fract = x - floorf_(x);
80
81	float fbits = (`1.0f` * (`1`<<`23`)) * (x + `121.274057500f`
82	- `1.490129070f`*fract
83	+ `27.728023300f`/(`4.84252568f` - fract));
84
85	// Before we cast fbits to int32_t, check for out of range values to pacify UBSAN.
86	// INT_MAX is not exactly representable as a float, so exclude it as effectively infinite.
87	// INT_MIN is a power of 2 and exactly representable as a float, so it's fine.
88	if (fbits >= (float)INT_MAX) {
89	return INFINITY_;
90	} else if (fbits < (float)INT_MIN) {
91	return -INFINITY_;
92	}
93
94	int32_t bits = (int32_t)fbits;
95	small_memcpy(&x, &bits, sizeof(x));
96	return x;
97	}
98
99	// Not static, as it's used by some test tools.
100	float powf_(float x, float y) {
101	assert (x >= `0`);
102	return (x == `0`) \|\| (x == `1`) ? x
103	: exp2f_(log2f_(x) * y);
104	}
105
106	static float expf_(float x) {
107	const float log2_e = `1.4426950408889634074f`;
108	return exp2f_(log2_e * x);
109	}
110
111	static float fmaxf_(float x, float y) { return x > y ? x : y; }
112	static float fminf_(float x, float y) { return x < y ? x : y; }
113
114	static bool isfinitef_(float x) { return `0` == x*`0`; }
115
116	static float minus_1_ulp(float x) {
117	int32_t bits;
118	memcpy(&bits, &x, sizeof(bits));
119	bits = bits - `1`;
120	memcpy(&x, &bits, sizeof(bits));
121	return x;
122	}
123
124	// Most transfer functions we work with are sRGBish.
125	// For exotic HDR transfer functions, we encode them using a tf.g that makes no sense,
126	// and repurpose the other fields to hold the parameters of the HDR functions.
127	enum TFKind { Bad, sRGBish, PQish, HLGish, HLGinvish };
128	struct TF_PQish { float A,B,C,D,E,F; };
129	struct TF_HLGish { float R,G,a,b,c; };
130
131	static float TFKind_marker(TFKind kind) {
132	// We'd use different NaNs, but those aren't guaranteed to be preserved by WASM.
133	return -(float)kind;
134	}
135
136	static TFKind classify(const skcms_TransferFunction& tf, TF_PQish* pq = nullptr
137	, TF_HLGish* hlg = nullptr) {
138	if (tf.g < `0` && (int)tf.g == tf.g) {
139	// TODO: sanity checks for PQ/HLG like we do for sRGBish.
140	switch ((int)tf.g) {
141	case -PQish: if (pq ) { memcpy(pq , &tf.a, sizeof(pq )); } return* PQish;
142	case -HLGish: if (hlg) { memcpy(hlg, &tf.a, sizeof(hlg)); } return* HLGish;
143	case -HLGinvish: if (hlg) { memcpy(hlg, &tf.a, sizeof(hlg)); } return* HLGinvish;
144	}
145	return Bad;
146	}
147
148	// Basic sanity checks for sRGBish transfer functions.
149	if (isfinitef_(tf.a + tf.b + tf.c + tf.d + tf.e + tf.f + tf.g)
150	// a,c,d,g should be non-negative to make any sense.
151	&& tf.a >= `0`
152	&& tf.c >= `0`
153	&& tf.d >= `0`
154	&& tf.g >= `0`
155	// Raising a negative value to a fractional tf->g produces complex numbers.
156	&& tf.a * tf.d + tf.b >= `0`) {
157	return sRGBish;
158	}
159
160	return Bad;
161	}
162
163	bool skcms_TransferFunction_makePQish(skcms_TransferFunction* tf,
164	float A, float B, float C,
165	float D, float E, float F) {
166	*tf = { TFKind_marker(PQish), A,B,C,D,E,F };
167	assert(classify(*tf) == PQish);
168	return true;
169	}
170
171	bool skcms_TransferFunction_makeHLGish(skcms_TransferFunction* tf,
172	float R, float G,
173	float a, float b, float c) {
174	*tf = { TFKind_marker(HLGish), R,G, a,b,c, `0` };
175	assert(classify(*tf) == HLGish);
176	return true;
177	}
178
179	float skcms_TransferFunction_eval(const skcms_TransferFunction* tf, float x) {
180	float sign = x < `0` ? -`1.0f` : `1.0f`;
181	x *= sign;
182
183	TF_PQish pq;
184	TF_HLGish hlg;
185	switch (classify(*tf, &pq, &hlg)) {
186	case Bad: break;
187
188	case HLGish: return sign * (xhlg.R <= `1` ? powf_(xhlg.R, hlg.G)
189	: expf_((x-hlg.c)*hlg.a) + hlg.b);
190
191	// skcms_TransferFunction_invert() inverts R, G, and a for HLGinvish so this math is fast.
192	case HLGinvish: return sign * (x <= `1` ? hlg.R * powf_(x, hlg.G)
193	: hlg.a * logf_(x - hlg.b) + hlg.c);
194
195
196	case sRGBish: return sign * (x < tf->d ? tf->c * x + tf->f
197	: powf_(tf->a * x + tf->b, tf->g) + tf->e);
198
199	case PQish: return sign * powf_(fmaxf_(pq.A + pq.B * powf_(x, pq.C), `0`)
200	/ (pq.D + pq.E * powf_(x, pq.C)),
201	pq.F);
202	}
203	return `0`;
204	}
205
206
207	static float eval_curve(const skcms_Curve* curve, float x) {
208	if (curve->table_entries == `0`) {
209	return skcms_TransferFunction_eval(&curve->parametric, x);
210	}
211
212	float ix = fmaxf_(`0`, fminf_(x, `1`)) * (curve->table_entries - `1`);
213	int lo = (int) ix ,
214	hi = (int)(float)minus_1_ulp(ix + `1.0f`);
215	float t = ix - (float)lo;
216
217	float l, h;
218	if (curve->table_8) {
219	l = curve->table_8[lo] * (`1`/`255.0f`);
220	h = curve->table_8[hi] * (`1`/`255.0f`);
221	} else {
222	uint16_t be_l, be_h;
223	memcpy(&be_l, curve->table_16 + `2`*lo, `2`);
224	memcpy(&be_h, curve->table_16 + `2`*hi, `2`);
225	uint16_t le_l = ((be_l << `8`) \| (be_l >> `8`)) & `0xffff`;
226	uint16_t le_h = ((be_h << `8`) \| (be_h >> `8`)) & `0xffff`;
227	l = le_l * (`1`/`65535.0f`);
228	h = le_h * (`1`/`65535.0f`);
229	}
230	return l + (h-l)*t;
231	}
232
233	float skcms_MaxRoundtripError(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
234	uint32_t N = curve->table_entries > `256` ? curve->table_entries : `256`;
235	const float dx = `1.0f` / (N - `1`);
236	float err = `0`;
237	for (uint32_t i = `0`; i < N; i++) {
238	float x = i * dx,
239	y = eval_curve(curve, x);
240	err = fmaxf_(err, fabsf_(x - skcms_TransferFunction_eval(inv_tf, y)));
241	}
242	return err;
243	}
244
245	bool skcms_AreApproximateInverses(const skcms_Curve* curve, const skcms_TransferFunction* inv_tf) {
246	return skcms_MaxRoundtripError(curve, inv_tf) < (`1`/`512.0f`);
247	}
248
249	// Additional ICC signature values that are only used internally
250	enum {
251	// File signature
252	skcms_Signature_acsp = `0x61637370`,
253
254	// Tag signatures
255	skcms_Signature_rTRC = `0x72545243`,
256	skcms_Signature_gTRC = `0x67545243`,
257	skcms_Signature_bTRC = `0x62545243`,
258	skcms_Signature_kTRC = `0x6B545243`,
259
260	skcms_Signature_rXYZ = `0x7258595A`,
261	skcms_Signature_gXYZ = `0x6758595A`,
262	skcms_Signature_bXYZ = `0x6258595A`,
263
264	skcms_Signature_A2B0 = `0x41324230`,
265	skcms_Signature_A2B1 = `0x41324231`,
266	skcms_Signature_mAB = `0x6D414220`,
267
268	skcms_Signature_CHAD = `0x63686164`,
269	skcms_Signature_WTPT = `0x77747074`,
270
271	// Type signatures
272	skcms_Signature_curv = `0x63757276`,
273	skcms_Signature_mft1 = `0x6D667431`,
274	skcms_Signature_mft2 = `0x6D667432`,
275	skcms_Signature_para = `0x70617261`,
276	skcms_Signature_sf32 = `0x73663332`,
277	// XYZ is also a PCS signature, so it's defined in skcms.h
278	// skcms_Signature_XYZ = 0x58595A20,
279	};
280
281	static uint16_t read_big_u16(const uint8_t* ptr) {
282	uint16_t be;
283	memcpy(&be, ptr, sizeof(be));
284	#if defined(_MSC_VER)
285	return _byteswap_ushort(be);
286	#else
287	return __builtin_bswap16(be);
288	#endif
289	}
290
291	static uint32_t read_big_u32(const uint8_t* ptr) {
292	uint32_t be;
293	memcpy(&be, ptr, sizeof(be));
294	#if defined(_MSC_VER)
295	return _byteswap_ulong(be);
296	#else
297	return __builtin_bswap32(be);
298	#endif
299	}
300
301	static int32_t read_big_i32(const uint8_t* ptr) {
302	return (int32_t)read_big_u32(ptr);
303	}
304
305	static float read_big_fixed(const uint8_t* ptr) {
306	return read_big_i32(ptr) * (`1.0f` / `65536.0f`);
307	}
308
309	// Maps to an in-memory profile so that fields line up to the locations specified
310	// in ICC.1:2010, section 7.2
311	typedef struct {
312	uint8_t size [ `4`];
313	uint8_t cmm_type [ `4`];
314	uint8_t version [ `4`];
315	uint8_t profile_class [ `4`];
316	uint8_t data_color_space [ `4`];
317	uint8_t pcs [ `4`];
318	uint8_t creation_date_time [`12`];
319	uint8_t signature [ `4`];
320	uint8_t platform [ `4`];
321	uint8_t flags [ `4`];
322	uint8_t device_manufacturer [ `4`];
323	uint8_t device_model [ `4`];
324	uint8_t device_attributes [ `8`];
325	uint8_t rendering_intent [ `4`];
326	uint8_t illuminant_X [ `4`];
327	uint8_t illuminant_Y [ `4`];
328	uint8_t illuminant_Z [ `4`];
329	uint8_t creator [ `4`];
330	uint8_t profile_id [`16`];
331	uint8_t reserved [`28`];
332	uint8_t tag_count [ `4`]; // Technically not part of header, but required
333	} header_Layout;
334
335	typedef struct {
336	uint8_t signature [`4`];
337	uint8_t offset [`4`];
338	uint8_t size [`4`];
339	} tag_Layout;
340
341	static const tag_Layout* get_tag_table(const skcms_ICCProfile* profile) {
342	return (const tag_Layout*)(profile->buffer + SAFE_SIZEOF(header_Layout));
343	}
344
345	// s15Fixed16ArrayType is technically variable sized, holding N values. However, the only valid
346	// use of the type is for the CHAD tag that stores exactly nine values.
347	typedef struct {
348	uint8_t type [ `4`];
349	uint8_t reserved [ `4`];
350	uint8_t values [`36`];
351	} sf32_Layout;
352
353	bool skcms_GetCHAD(const skcms_ICCProfile* profile, skcms_Matrix3x3* m) {
354	skcms_ICCTag tag;
355	if (!skcms_GetTagBySignature(profile, skcms_Signature_CHAD, &tag)) {
356	return false;
357	}
358
359	if (tag.type != skcms_Signature_sf32 \|\| tag.size < SAFE_SIZEOF(sf32_Layout)) {
360	return false;
361	}
362
363	const sf32_Layout* sf32Tag = (const sf32_Layout*)tag.buf;
364	const uint8_t* values = sf32Tag->values;
365	for (int r = `0`; r < `3`; ++r)
366	for (int c = `0`; c < `3`; ++c, values += `4`) {
367	m->vals[r][c] = read_big_fixed(values);
368	}
369	return true;
370	}
371
372	// XYZType is technically variable sized, holding N XYZ triples. However, the only valid uses of
373	// the type are for tags/data that store exactly one triple.
374	typedef struct {
375	uint8_t type [`4`];
376	uint8_t reserved [`4`];
377	uint8_t X [`4`];
378	uint8_t Y [`4`];
379	uint8_t Z [`4`];
380	} XYZ_Layout;
381
382	static bool read_tag_xyz(const skcms_ICCTag* tag, float* x, float* y, float* z) {
383	if (tag->type != skcms_Signature_XYZ \|\| tag->size < SAFE_SIZEOF(XYZ_Layout)) {
384	return false;
385	}
386
387	const XYZ_Layout* xyzTag = (const XYZ_Layout*)tag->buf;
388
389	*x = read_big_fixed(xyzTag->X);
390	*y = read_big_fixed(xyzTag->Y);
391	*z = read_big_fixed(xyzTag->Z);
392	return true;
393	}
394
395	bool skcms_GetWTPT(const skcms_ICCProfile* profile, float xyz[`3`]) {
396	skcms_ICCTag tag;
397	return skcms_GetTagBySignature(profile, skcms_Signature_WTPT, &tag) &&
398	read_tag_xyz(&tag, &xyz[`0`], &xyz[`1`], &xyz[`2`]);
399	}
400
401	static bool read_to_XYZD50(const skcms_ICCTag* rXYZ, const skcms_ICCTag* gXYZ,
402	const skcms_ICCTag* bXYZ, skcms_Matrix3x3* toXYZ) {
403	return read_tag_xyz(rXYZ, &toXYZ->vals[`0`][`0`], &toXYZ->vals[`1`][`0`], &toXYZ->vals[`2`][`0`]) &&
404	read_tag_xyz(gXYZ, &toXYZ->vals[`0`][`1`], &toXYZ->vals[`1`][`1`], &toXYZ->vals[`2`][`1`]) &&
405	read_tag_xyz(bXYZ, &toXYZ->vals[`0`][`2`], &toXYZ->vals[`1`][`2`], &toXYZ->vals[`2`][`2`]);
406	}
407
408	typedef struct {
409	uint8_t type [`4`];
410	uint8_t reserved_a [`4`];
411	uint8_t function_type [`2`];
412	uint8_t reserved_b [`2`];
413	uint8_t variable [`1`/variable/]; // 1, 3, 4, 5, or 7 s15.16, depending on function_type
414	} para_Layout;
415
416	static bool read_curve_para(const uint8_t* buf, uint32_t size,
417	skcms_Curve* curve, uint32_t* curve_size) {
418	if (size < SAFE_FIXED_SIZE(para_Layout)) {
419	return false;
420	}
421
422	const para_Layout* paraTag = (const para_Layout*)buf;
423
424	enum { kG = `0`, kGAB = `1`, kGABC = `2`, kGABCD = `3`, kGABCDEF = `4` };
425	uint16_t function_type = read_big_u16(paraTag->function_type);
426	if (function_type > kGABCDEF) {
427	return false;
428	}
429
430	static const uint32_t curve_bytes[] = { `4`, `12`, `16`, `20`, `28` };
431	if (size < SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type]) {
432	return false;
433	}
434
435	if (curve_size) {
436	*curve_size = SAFE_FIXED_SIZE(para_Layout) + curve_bytes[function_type];
437	}
438
439	curve->table_entries = `0`;
440	curve->parametric.a = `1.0f`;
441	curve->parametric.b = `0.0f`;
442	curve->parametric.c = `0.0f`;
443	curve->parametric.d = `0.0f`;
444	curve->parametric.e = `0.0f`;
445	curve->parametric.f = `0.0f`;
446	curve->parametric.g = read_big_fixed(paraTag->variable);
447
448	switch (function_type) {
449	case kGAB:
450	curve->parametric.a = read_big_fixed(paraTag->variable + `4`);
451	curve->parametric.b = read_big_fixed(paraTag->variable + `8`);
452	if (curve->parametric.a == `0`) {
453	return false;
454	}
455	curve->parametric.d = -curve->parametric.b / curve->parametric.a;
456	break;
457	case kGABC:
458	curve->parametric.a = read_big_fixed(paraTag->variable + `4`);
459	curve->parametric.b = read_big_fixed(paraTag->variable + `8`);
460	curve->parametric.e = read_big_fixed(paraTag->variable + `12`);
461	if (curve->parametric.a == `0`) {
462	return false;
463	}
464	curve->parametric.d = -curve->parametric.b / curve->parametric.a;
465	curve->parametric.f = curve->parametric.e;
466	break;
467	case kGABCD:
468	curve->parametric.a = read_big_fixed(paraTag->variable + `4`);
469	curve->parametric.b = read_big_fixed(paraTag->variable + `8`);
470	curve->parametric.c = read_big_fixed(paraTag->variable + `12`);
471	curve->parametric.d = read_big_fixed(paraTag->variable + `16`);
472	break;
473	case kGABCDEF:
474	curve->parametric.a = read_big_fixed(paraTag->variable + `4`);
475	curve->parametric.b = read_big_fixed(paraTag->variable + `8`);
476	curve->parametric.c = read_big_fixed(paraTag->variable + `12`);
477	curve->parametric.d = read_big_fixed(paraTag->variable + `16`);
478	curve->parametric.e = read_big_fixed(paraTag->variable + `20`);
479	curve->parametric.f = read_big_fixed(paraTag->variable + `24`);
480	break;
481	}
482	return classify(curve->parametric) == sRGBish;
483	}
484
485	typedef struct {
486	uint8_t type [`4`];
487	uint8_t reserved [`4`];
488	uint8_t value_count [`4`];
489	uint8_t variable [`1`/variable/]; // value_count, 8.8 if 1, uint16 (n65535) if > 1*
490	} curv_Layout;
491
492	static bool read_curve_curv(const uint8_t* buf, uint32_t size,
493	skcms_Curve* curve, uint32_t* curve_size) {
494	if (size < SAFE_FIXED_SIZE(curv_Layout)) {
495	return false;
496	}
497
498	const curv_Layout* curvTag = (const curv_Layout*)buf;
499
500	uint32_t value_count = read_big_u32(curvTag->value_count);
501	if (size < SAFE_FIXED_SIZE(curv_Layout) + value_count * SAFE_SIZEOF(uint16_t)) {
502	return false;
503	}
504
505	if (curve_size) {
506	curve_size = SAFE_FIXED_SIZE(curv_Layout) + value_count SAFE_SIZEOF(uint16_t);
507	}
508
509	if (value_count < `2`) {
510	curve->table_entries = `0`;
511	curve->parametric.a = `1.0f`;
512	curve->parametric.b = `0.0f`;
513	curve->parametric.c = `0.0f`;
514	curve->parametric.d = `0.0f`;
515	curve->parametric.e = `0.0f`;
516	curve->parametric.f = `0.0f`;
517	if (value_count == `0`) {
518	// Empty tables are a shorthand for an identity curve
519	curve->parametric.g = `1.0f`;
520	} else {
521	// Single entry tables are a shorthand for simple gamma
522	curve->parametric.g = read_big_u16(curvTag->variable) * (`1.0f` / `256.0f`);
523	}
524	} else {
525	curve->table_8 = nullptr;
526	curve->table_16 = curvTag->variable;
527	curve->table_entries = value_count;
528	}
529
530	return true;
531	}
532
533	// Parses both curveType and parametricCurveType data. Ensures that at most 'size' bytes are read.
534	// If curve_size is not nullptr, writes the number of bytes used by the curve in (curve_size).*
535	static bool read_curve(const uint8_t* buf, uint32_t size,
536	skcms_Curve* curve, uint32_t* curve_size) {
537	if (!buf \|\| size < `4` \|\| !curve) {
538	return false;
539	}
540
541	uint32_t type = read_big_u32(buf);
542	if (type == skcms_Signature_para) {
543	return read_curve_para(buf, size, curve, curve_size);
544	} else if (type == skcms_Signature_curv) {
545	return read_curve_curv(buf, size, curve, curve_size);
546	}
547
548	return false;
549	}
550
551	// mft1 and mft2 share a large chunk of data
552	typedef struct {
553	uint8_t type [ `4`];
554	uint8_t reserved_a [ `4`];
555	uint8_t input_channels [ `1`];
556	uint8_t output_channels [ `1`];
557	uint8_t grid_points [ `1`];
558	uint8_t reserved_b [ `1`];
559	uint8_t matrix [`36`];
560	} mft_CommonLayout;
561
562	typedef struct {
563	mft_CommonLayout common [`1`];
564
565	uint8_t variable [`1`/variable/];
566	} mft1_Layout;
567
568	typedef struct {
569	mft_CommonLayout common [`1`];
570
571	uint8_t input_table_entries [`2`];
572	uint8_t output_table_entries [`2`];
573	uint8_t variable [`1`/variable/];
574	} mft2_Layout;
575
576	static bool read_mft_common(const mft_CommonLayout* mftTag, skcms_A2B* a2b) {
577	// MFT matrices are applied before the first set of curves, but must be identity unless the
578	// input is PCSXYZ. We don't support PCSXYZ profiles, so we ignore this matrix. Note that the
579	// matrix in skcms_A2B is applied later in the pipe, so supporting this would require another
580	// field/flag.
581	a2b->matrix_channels = `0`;
582
583	a2b->input_channels = mftTag->input_channels[`0`];
584	a2b->output_channels = mftTag->output_channels[`0`];
585
586	// We require exactly three (ie XYZ/Lab/RGB) output channels
587	if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
588	return false;
589	}
590	// We require at least one, and no more than four (ie CMYK) input channels
591	if (a2b->input_channels < `1` \|\| a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
592	return false;
593	}
594
595	for (uint32_t i = `0`; i < a2b->input_channels; ++i) {
596	a2b->grid_points[i] = mftTag->grid_points[`0`];
597	}
598	// The grid only makes sense with at least two points along each axis
599	if (a2b->grid_points[`0`] < `2`) {
600	return false;
601	}
602
603	return true;
604	}
605
606	static bool init_a2b_tables(const uint8_t* table_base, uint64_t max_tables_len, uint32_t byte_width,
607	uint32_t input_table_entries, uint32_t output_table_entries,
608	skcms_A2B* a2b) {
609	// byte_width is 1 or 2, [input\|output]_table_entries are in [2, 4096], so no overflow
610	uint32_t byte_len_per_input_table = input_table_entries * byte_width;
611	uint32_t byte_len_per_output_table = output_table_entries * byte_width;
612
613	// [input\|output]_channels are <= 4, so still no overflow
614	uint32_t byte_len_all_input_tables = a2b->input_channels * byte_len_per_input_table;
615	uint32_t byte_len_all_output_tables = a2b->output_channels * byte_len_per_output_table;
616
617	uint64_t grid_size = a2b->output_channels * byte_width;
618	for (uint32_t axis = `0`; axis < a2b->input_channels; ++axis) {
619	grid_size *= a2b->grid_points[axis];
620	}
621
622	if (max_tables_len < byte_len_all_input_tables + grid_size + byte_len_all_output_tables) {
623	return false;
624	}
625
626	for (uint32_t i = `0`; i < a2b->input_channels; ++i) {
627	a2b->input_curves[i].table_entries = input_table_entries;
628	if (byte_width == `1`) {
629	a2b->input_curves[i].table_8 = table_base + i * byte_len_per_input_table;
630	a2b->input_curves[i].table_16 = nullptr;
631	} else {
632	a2b->input_curves[i].table_8 = nullptr;
633	a2b->input_curves[i].table_16 = table_base + i * byte_len_per_input_table;
634	}
635	}
636
637	if (byte_width == `1`) {
638	a2b->grid_8 = table_base + byte_len_all_input_tables;
639	a2b->grid_16 = nullptr;
640	} else {
641	a2b->grid_8 = nullptr;
642	a2b->grid_16 = table_base + byte_len_all_input_tables;
643	}
644
645	const uint8_t* output_table_base = table_base + byte_len_all_input_tables + grid_size;
646	for (uint32_t i = `0`; i < a2b->output_channels; ++i) {
647	a2b->output_curves[i].table_entries = output_table_entries;
648	if (byte_width == `1`) {
649	a2b->output_curves[i].table_8 = output_table_base + i * byte_len_per_output_table;
650	a2b->output_curves[i].table_16 = nullptr;
651	} else {
652	a2b->output_curves[i].table_8 = nullptr;
653	a2b->output_curves[i].table_16 = output_table_base + i * byte_len_per_output_table;
654	}
655	}
656
657	return true;
658	}
659
660	static bool read_tag_mft1(const skcms_ICCTag* tag, skcms_A2B* a2b) {
661	if (tag->size < SAFE_FIXED_SIZE(mft1_Layout)) {
662	return false;
663	}
664
665	const mft1_Layout* mftTag = (const mft1_Layout*)tag->buf;
666	if (!read_mft_common(mftTag->common, a2b)) {
667	return false;
668	}
669
670	uint32_t input_table_entries = `256`;
671	uint32_t output_table_entries = `256`;
672
673	return init_a2b_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft1_Layout), `1`,
674	input_table_entries, output_table_entries, a2b);
675	}
676
677	static bool read_tag_mft2(const skcms_ICCTag* tag, skcms_A2B* a2b) {
678	if (tag->size < SAFE_FIXED_SIZE(mft2_Layout)) {
679	return false;
680	}
681
682	const mft2_Layout* mftTag = (const mft2_Layout*)tag->buf;
683	if (!read_mft_common(mftTag->common, a2b)) {
684	return false;
685	}
686
687	uint32_t input_table_entries = read_big_u16(mftTag->input_table_entries);
688	uint32_t output_table_entries = read_big_u16(mftTag->output_table_entries);
689
690	// ICC spec mandates that 2 <= table_entries <= 4096
691	if (input_table_entries < `2` \|\| input_table_entries > `4096` \|\|
692	output_table_entries < `2` \|\| output_table_entries > `4096`) {
693	return false;
694	}
695
696	return init_a2b_tables(mftTag->variable, tag->size - SAFE_FIXED_SIZE(mft2_Layout), `2`,
697	input_table_entries, output_table_entries, a2b);
698	}
699
700	static bool read_curves(const uint8_t* buf, uint32_t size, uint32_t curve_offset,
701	uint32_t num_curves, skcms_Curve* curves) {
702	for (uint32_t i = `0`; i < num_curves; ++i) {
703	if (curve_offset > size) {
704	return false;
705	}
706
707	uint32_t curve_bytes;
708	if (!read_curve(buf + curve_offset, size - curve_offset, &curves[i], &curve_bytes)) {
709	return false;
710	}
711
712	if (curve_bytes > UINT32_MAX - `3`) {
713	return false;
714	}
715	curve_bytes = (curve_bytes + `3`) & ~`3U`;
716
717	uint64_t new_offset_64 = (uint64_t)curve_offset + curve_bytes;
718	curve_offset = (uint32_t)new_offset_64;
719	if (new_offset_64 != curve_offset) {
720	return false;
721	}
722	}
723
724	return true;
725	}
726
727	typedef struct {
728	uint8_t type [ `4`];
729	uint8_t reserved_a [ `4`];
730	uint8_t input_channels [ `1`];
731	uint8_t output_channels [ `1`];
732	uint8_t reserved_b [ `2`];
733	uint8_t b_curve_offset [ `4`];
734	uint8_t matrix_offset [ `4`];
735	uint8_t m_curve_offset [ `4`];
736	uint8_t clut_offset [ `4`];
737	uint8_t a_curve_offset [ `4`];
738	} mAB_Layout;
739
740	typedef struct {
741	uint8_t grid_points [`16`];
742	uint8_t grid_byte_width [ `1`];
743	uint8_t reserved [ `3`];
744	uint8_t variable [`1`/variable/];
745	} mABCLUT_Layout;
746
747	static bool read_tag_mab(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
748	if (tag->size < SAFE_SIZEOF(mAB_Layout)) {
749	return false;
750	}
751
752	const mAB_Layout* mABTag = (const mAB_Layout*)tag->buf;
753
754	a2b->input_channels = mABTag->input_channels[`0`];
755	a2b->output_channels = mABTag->output_channels[`0`];
756
757	// We require exactly three (ie XYZ/Lab/RGB) output channels
758	if (a2b->output_channels != ARRAY_COUNT(a2b->output_curves)) {
759	return false;
760	}
761	// We require no more than four (ie CMYK) input channels
762	if (a2b->input_channels > ARRAY_COUNT(a2b->input_curves)) {
763	return false;
764	}
765
766	uint32_t b_curve_offset = read_big_u32(mABTag->b_curve_offset);
767	uint32_t matrix_offset = read_big_u32(mABTag->matrix_offset);
768	uint32_t m_curve_offset = read_big_u32(mABTag->m_curve_offset);
769	uint32_t clut_offset = read_big_u32(mABTag->clut_offset);
770	uint32_t a_curve_offset = read_big_u32(mABTag->a_curve_offset);
771
772	// "B" curves must be present
773	if (`0` == b_curve_offset) {
774	return false;
775	}
776
777	if (!read_curves(tag->buf, tag->size, b_curve_offset, a2b->output_channels,
778	a2b->output_curves)) {
779	return false;
780	}
781
782	// "M" curves and Matrix must be used together
783	if (`0` != m_curve_offset) {
784	if (`0` == matrix_offset) {
785	return false;
786	}
787	a2b->matrix_channels = a2b->output_channels;
788	if (!read_curves(tag->buf, tag->size, m_curve_offset, a2b->matrix_channels,
789	a2b->matrix_curves)) {
790	return false;
791	}
792
793	// Read matrix, which is stored as a row-major 3x3, followed by the fourth column
794	if (tag->size < matrix_offset + `12` * SAFE_SIZEOF(uint32_t)) {
795	return false;
796	}
797	float encoding_factor = pcs_is_xyz ? `65535` / `32768.0f` : `1.0f`;
798	const uint8_t* mtx_buf = tag->buf + matrix_offset;
799	a2b->matrix.vals[`0`][`0`] = encoding_factor * read_big_fixed(mtx_buf + `0`);
800	a2b->matrix.vals[`0`][`1`] = encoding_factor * read_big_fixed(mtx_buf + `4`);
801	a2b->matrix.vals[`0`][`2`] = encoding_factor * read_big_fixed(mtx_buf + `8`);
802	a2b->matrix.vals[`1`][`0`] = encoding_factor * read_big_fixed(mtx_buf + `12`);
803	a2b->matrix.vals[`1`][`1`] = encoding_factor * read_big_fixed(mtx_buf + `16`);
804	a2b->matrix.vals[`1`][`2`] = encoding_factor * read_big_fixed(mtx_buf + `20`);
805	a2b->matrix.vals[`2`][`0`] = encoding_factor * read_big_fixed(mtx_buf + `24`);
806	a2b->matrix.vals[`2`][`1`] = encoding_factor * read_big_fixed(mtx_buf + `28`);
807	a2b->matrix.vals[`2`][`2`] = encoding_factor * read_big_fixed(mtx_buf + `32`);
808	a2b->matrix.vals[`0`][`3`] = encoding_factor * read_big_fixed(mtx_buf + `36`);
809	a2b->matrix.vals[`1`][`3`] = encoding_factor * read_big_fixed(mtx_buf + `40`);
810	a2b->matrix.vals[`2`][`3`] = encoding_factor * read_big_fixed(mtx_buf + `44`);
811	} else {
812	if (`0` != matrix_offset) {
813	return false;
814	}
815	a2b->matrix_channels = `0`;
816	}
817
818	// "A" curves and CLUT must be used together
819	if (`0` != a_curve_offset) {
820	if (`0` == clut_offset) {
821	return false;
822	}
823	if (!read_curves(tag->buf, tag->size, a_curve_offset, a2b->input_channels,
824	a2b->input_curves)) {
825	return false;
826	}
827
828	if (tag->size < clut_offset + SAFE_FIXED_SIZE(mABCLUT_Layout)) {
829	return false;
830	}
831	const mABCLUT_Layout* clut = (const mABCLUT_Layout*)(tag->buf + clut_offset);
832
833	if (clut->grid_byte_width[`0`] == `1`) {
834	a2b->grid_8 = clut->variable;
835	a2b->grid_16 = nullptr;
836	} else if (clut->grid_byte_width[`0`] == `2`) {
837	a2b->grid_8 = nullptr;
838	a2b->grid_16 = clut->variable;
839	} else {
840	return false;
841	}
842
843	uint64_t grid_size = a2b->output_channels * clut->grid_byte_width[`0`];
844	for (uint32_t i = `0`; i < a2b->input_channels; ++i) {
845	a2b->grid_points[i] = clut->grid_points[i];
846	// The grid only makes sense with at least two points along each axis
847	if (a2b->grid_points[i] < `2`) {
848	return false;
849	}
850	grid_size *= a2b->grid_points[i];
851	}
852	if (tag->size < clut_offset + SAFE_FIXED_SIZE(mABCLUT_Layout) + grid_size) {
853	return false;
854	}
855	} else {
856	if (`0` != clut_offset) {
857	return false;
858	}
859
860	// If there is no CLUT, the number of input and output channels must match
861	if (a2b->input_channels != a2b->output_channels) {
862	return false;
863	}
864
865	// Zero out the number of input channels to signal that we're skipping this stage
866	a2b->input_channels = `0`;
867	}
868
869	return true;
870	}
871
872	// If you pass f, we'll fit a possibly-non-zero value for f.*
873	// If you pass nullptr, we'll assume you want f to be treated as zero.*
874	static int fit_linear(const skcms_Curve* curve, int N, float tol,
875	float* c, float* d, float* f = nullptr) {
876	assert(N > `1`);
877	// We iteratively fit the first points to the TF's linear piece.
878	// We want the cx + f line to pass through the first and last points we fit exactly.
879	//
880	// As we walk along the points we find the minimum and maximum slope of the line before the
881	// error would exceed our tolerance. We stop when the range [slope_min, slope_max] becomes
882	// emtpy, when we definitely can't add any more points.
883	//
884	// Some points' error intervals may intersect the running interval but not lie fully
885	// within it. So we keep track of the last point we saw that is a valid end point candidate,
886	// and once the search is done, back up to build the line through that* point.*
887	const float dx = `1.0f` / (N - `1`);
888
889	int lin_points = `1`;
890
891	float f_zero = `0.0f`;
892	if (f) {
893	*f = eval_curve(curve, `0`);
894	} else {
895	f = &f_zero;
896	}
897
898
899	float slope_min = -INFINITY_;
900	float slope_max = +INFINITY_;
901	for (int i = `1`; i < N; ++i) {
902	float x = i * dx;
903	float y = eval_curve(curve, x);
904
905	float slope_max_i = (y + tol - *f) / x,
906	slope_min_i = (y - tol - *f) / x;
907	if (slope_max_i < slope_min \|\| slope_max < slope_min_i) {
908	// Slope intervals would no longer overlap.
909	break;
910	}
911	slope_max = fminf_(slope_max, slope_max_i);
912	slope_min = fmaxf_(slope_min, slope_min_i);
913
914	float cur_slope = (y - *f) / x;
915	if (slope_min <= cur_slope && cur_slope <= slope_max) {
916	lin_points = i + `1`;
917	*c = cur_slope;
918	}
919	}
920
921	// Set D to the last point that met our tolerance.
922	d = (lin_points - `1`) dx;
923	return lin_points;
924	}
925
926	static bool read_a2b(const skcms_ICCTag* tag, skcms_A2B* a2b, bool pcs_is_xyz) {
927	bool ok = false;
928	if (tag->type == skcms_Signature_mft1) {
929	ok = read_tag_mft1(tag, a2b);
930	} else if (tag->type == skcms_Signature_mft2) {
931	ok = read_tag_mft2(tag, a2b);
932	} else if (tag->type == skcms_Signature_mAB) {
933	ok = read_tag_mab(tag, a2b, pcs_is_xyz);
934	}
935	if (!ok) {
936	return false;
937	}
938
939	// Detect and canonicalize identity tables.
940	skcms_Curve* curves[] = {
941	a2b->input_channels > `0` ? a2b->input_curves + `0` : nullptr,
942	a2b->input_channels > `1` ? a2b->input_curves + `1` : nullptr,
943	a2b->input_channels > `2` ? a2b->input_curves + `2` : nullptr,
944	a2b->input_channels > `3` ? a2b->input_curves + `3` : nullptr,
945	a2b->matrix_channels > `0` ? a2b->matrix_curves + `0` : nullptr,
946	a2b->matrix_channels > `1` ? a2b->matrix_curves + `1` : nullptr,
947	a2b->matrix_channels > `2` ? a2b->matrix_curves + `2` : nullptr,
948	a2b->output_channels > `0` ? a2b->output_curves + `0` : nullptr,
949	a2b->output_channels > `1` ? a2b->output_curves + `1` : nullptr,
950	a2b->output_channels > `2` ? a2b->output_curves + `2` : nullptr,
951	};
952
953	for (int i = `0`; i < ARRAY_COUNT(curves); i++) {
954	skcms_Curve* curve = curves[i];
955
956	if (curve && curve->table_entries && curve->table_entries <= (uint32_t)INT_MAX) {
957	int N = (int)curve->table_entries;
958
959	float c = `0.0f`, d = `0.0f`, f = `0.0f`;
960	if (N == fit_linear(curve, N, `1.0f`/(`2`*N), &c,&d,&f)
961	&& c == `1.0f`
962	&& f == `0.0f`) {
963	curve->table_entries = `0`;
964	curve->table_8 = nullptr;
965	curve->table_16 = nullptr;
966	curve->parametric = skcms_TransferFunction{`1`,`1`,`0`,`0`,`0`,`0`,`0`};
967	}
968	}
969	}
970
971	return true;
972	}
973
974	void skcms_GetTagByIndex(const skcms_ICCProfile* profile, uint32_t idx, skcms_ICCTag* tag) {
975	if (!profile \|\| !profile->buffer \|\| !tag) { return; }
976	if (idx > profile->tag_count) { return; }
977	const tag_Layout* tags = get_tag_table(profile);
978	tag->signature = read_big_u32(tags[idx].signature);
979	tag->size = read_big_u32(tags[idx].size);
980	tag->buf = read_big_u32(tags[idx].offset) + profile->buffer;
981	tag->type = read_big_u32(tag->buf);
982	}
983
984	bool skcms_GetTagBySignature(const skcms_ICCProfile* profile, uint32_t sig, skcms_ICCTag* tag) {
985	if (!profile \|\| !profile->buffer \|\| !tag) { return false; }
986	const tag_Layout* tags = get_tag_table(profile);
987	for (uint32_t i = `0`; i < profile->tag_count; ++i) {
988	if (read_big_u32(tags[i].signature) == sig) {
989	tag->signature = sig;
990	tag->size = read_big_u32(tags[i].size);
991	tag->buf = read_big_u32(tags[i].offset) + profile->buffer;
992	tag->type = read_big_u32(tag->buf);
993	return true;
994	}
995	}
996	return false;
997	}
998
999	static bool usable_as_src(const skcms_ICCProfile* profile) {
1000	return profile->has_A2B
1001	\|\| (profile->has_trc && profile->has_toXYZD50);
1002	}
1003
1004	bool skcms_Parse(const void* buf, size_t len, skcms_ICCProfile* profile) {
1005	assert(SAFE_SIZEOF(header_Layout) == `132`);
1006
1007	if (!profile) {
1008	return false;
1009	}
1010	memset(profile, `0`, SAFE_SIZEOF(*profile));
1011
1012	if (len < SAFE_SIZEOF(header_Layout)) {
1013	return false;
1014	}
1015
1016	// Byte-swap all header fields
1017	const header_Layout* header = (const header_Layout*)buf;
1018	profile->buffer = (const uint8_t*)buf;
1019	profile->size = read_big_u32(header->size);
1020	uint32_t version = read_big_u32(header->version);
1021	profile->data_color_space = read_big_u32(header->data_color_space);
1022	profile->pcs = read_big_u32(header->pcs);
1023	uint32_t signature = read_big_u32(header->signature);
1024	float illuminant_X = read_big_fixed(header->illuminant_X);
1025	float illuminant_Y = read_big_fixed(header->illuminant_Y);
1026	float illuminant_Z = read_big_fixed(header->illuminant_Z);
1027	profile->tag_count = read_big_u32(header->tag_count);
1028
1029	// Validate signature, size (smaller than buffer, large enough to hold tag table),
1030	// and major version
1031	uint64_t tag_table_size = profile->tag_count * SAFE_SIZEOF(tag_Layout);
1032	if (signature != skcms_Signature_acsp \|\|
1033	profile->size > len \|\|
1034	profile->size < SAFE_SIZEOF(header_Layout) + tag_table_size \|\|
1035	(version >> `24`) > `4`) {
1036	return false;
1037	}
1038
1039	// Validate that illuminant is D50 white
1040	if (fabsf_(illuminant_X - `0.9642f`) > `0.0100f` \|\|
1041	fabsf_(illuminant_Y - `1.0000f`) > `0.0100f` \|\|
1042	fabsf_(illuminant_Z - `0.8249f`) > `0.0100f`) {
1043	return false;
1044	}
1045
1046	// Validate that all tag entries have sane offset + size
1047	const tag_Layout* tags = get_tag_table(profile);
1048	for (uint32_t i = `0`; i < profile->tag_count; ++i) {
1049	uint32_t tag_offset = read_big_u32(tags[i].offset);
1050	uint32_t tag_size = read_big_u32(tags[i].size);
1051	uint64_t tag_end = (uint64_t)tag_offset + (uint64_t)tag_size;
1052	if (tag_size < `4` \|\| tag_end > profile->size) {
1053	return false;
1054	}
1055	}
1056
1057	if (profile->pcs != skcms_Signature_XYZ && profile->pcs != skcms_Signature_Lab) {
1058	return false;
1059	}
1060
1061	bool pcs_is_xyz = profile->pcs == skcms_Signature_XYZ;
1062
1063	// Pre-parse commonly used tags.
1064	skcms_ICCTag kTRC;
1065	if (profile->data_color_space == skcms_Signature_Gray &&
1066	skcms_GetTagBySignature(profile, skcms_Signature_kTRC, &kTRC)) {
1067	if (!read_curve(kTRC.buf, kTRC.size, &profile->trc[`0`], nullptr)) {
1068	// Malformed tag
1069	return false;
1070	}
1071	profile->trc[`1`] = profile->trc[`0`];
1072	profile->trc[`2`] = profile->trc[`0`];
1073	profile->has_trc = true;
1074
1075	if (pcs_is_xyz) {
1076	profile->toXYZD50.vals[`0`][`0`] = illuminant_X;
1077	profile->toXYZD50.vals[`1`][`1`] = illuminant_Y;
1078	profile->toXYZD50.vals[`2`][`2`] = illuminant_Z;
1079	profile->has_toXYZD50 = true;
1080	}
1081	} else {
1082	skcms_ICCTag rTRC, gTRC, bTRC;
1083	if (skcms_GetTagBySignature(profile, skcms_Signature_rTRC, &rTRC) &&
1084	skcms_GetTagBySignature(profile, skcms_Signature_gTRC, &gTRC) &&
1085	skcms_GetTagBySignature(profile, skcms_Signature_bTRC, &bTRC)) {
1086	if (!read_curve(rTRC.buf, rTRC.size, &profile->trc[`0`], nullptr) \|\|
1087	!read_curve(gTRC.buf, gTRC.size, &profile->trc[`1`], nullptr) \|\|
1088	!read_curve(bTRC.buf, bTRC.size, &profile->trc[`2`], nullptr)) {
1089	// Malformed TRC tags
1090	return false;
1091	}
1092	profile->has_trc = true;
1093	}
1094
1095	skcms_ICCTag rXYZ, gXYZ, bXYZ;
1096	if (skcms_GetTagBySignature(profile, skcms_Signature_rXYZ, &rXYZ) &&
1097	skcms_GetTagBySignature(profile, skcms_Signature_gXYZ, &gXYZ) &&
1098	skcms_GetTagBySignature(profile, skcms_Signature_bXYZ, &bXYZ)) {
1099	if (!read_to_XYZD50(&rXYZ, &gXYZ, &bXYZ, &profile->toXYZD50)) {
1100	// Malformed XYZ tags
1101	return false;
1102	}
1103	profile->has_toXYZD50 = true;
1104	}
1105	}
1106
1107	skcms_ICCTag a2b_tag;
1108
1109	// For now, we're preferring A2B0, like Skia does and the ICC spec tells us to.
1110	// TODO: prefer A2B1 (relative colormetric) over A2B0 (perceptual)?
1111	// This breaks with the ICC spec, but we think it's a good idea, given that TRC curves
1112	// and all our known users are thinking exclusively in terms of relative colormetric.
1113	const uint32_t sigs[] = { skcms_Signature_A2B0, skcms_Signature_A2B1 };
1114	for (int i = `0`; i < ARRAY_COUNT(sigs); i++) {
1115	if (skcms_GetTagBySignature(profile, sigs[i], &a2b_tag)) {
1116	if (!read_a2b(&a2b_tag, &profile->A2B, pcs_is_xyz)) {
1117	// Malformed A2B tag
1118	return false;
1119	}
1120	profile->has_A2B = true;
1121	break;
1122	}
1123	}
1124
1125	return usable_as_src(profile);
1126	}
1127
1128
1129	const skcms_ICCProfile* skcms_sRGB_profile() {
1130	static const skcms_ICCProfile sRGB_profile = {
1131	nullptr, // buffer, moot here
1132
1133	`0`, // size, moot here
1134	skcms_Signature_RGB, // data_color_space
1135	skcms_Signature_XYZ, // pcs
1136	`0`, // tag count, moot here
1137
1138	// We choose to represent sRGB with its canonical transfer function,
1139	// and with its canonical XYZD50 gamut matrix.
1140	true, // has_trc, followed by the 3 trc curves
1141	{
1142	{{`0`, {`2.4f`, (float)(`1`/`1.055`), (float)(`0.055`/`1.055`), (float)(`1`/`12.92`), `0.04045f`, `0`, `0`}}},
1143	{{`0`, {`2.4f`, (float)(`1`/`1.055`), (float)(`0.055`/`1.055`), (float)(`1`/`12.92`), `0.04045f`, `0`, `0`}}},
1144	{{`0`, {`2.4f`, (float)(`1`/`1.055`), (float)(`0.055`/`1.055`), (float)(`1`/`12.92`), `0.04045f`, `0`, `0`}}},
1145	},
1146
1147	true, // has_toXYZD50, followed by 3x3 toXYZD50 matrix
1148	{{
1149	{ `0.436065674f`, `0.385147095f`, `0.143066406f` },
1150	{ `0.222488403f`, `0.716873169f`, `0.060607910f` },
1151	{ `0.013916016f`, `0.097076416f`, `0.714096069f` },
1152	}},
1153
1154	false, // has_A2B, followed by a2b itself which we don't care about.
1155	{
1156	`0`,
1157	{
1158	{{`0`, {`0`,`0`, `0`,`0`,`0`,`0`,`0`}}},
1159	{{`0`, {`0`,`0`, `0`,`0`,`0`,`0`,`0`}}},
1160	{{`0`, {`0`,`0`, `0`,`0`,`0`,`0`,`0`}}},
1161	{{`0`, {`0`,`0`, `0`,`0`,`0`,`0`,`0`}}},
1162	},
1163	{`0`,`0`,`0`,`0`},
1164	nullptr,
1165	nullptr,
1166
1167	`0`,
1168	{
1169	{{`0`, {`0`,`0`, `0`,`0`,`0`,`0`,`0`}}},
1170	{{`0`, {`0`,`0`, `0`,`0`,`0`,`0`,`0`}}},
1171	{{`0`, {`0`,`0`, `0`,`0`,`0`,`0`,`0`}}},
1172	},
1173	{{
1174	{ `0`,`0`,`0`,`0` },
1175	{ `0`,`0`,`0`,`0` },
1176	{ `0`,`0`,`0`,`0` },
1177	}},
1178
1179	`0`,
1180	{
1181	{{`0`, {`0`,`0`, `0`,`0`,`0`,`0`,`0`}}},
1182	{{`0`, {`0`,`0`, `0`,`0`,`0`,`0`,`0`}}},
1183	{{`0`, {`0`,`0`, `0`,`0`,`0`,`0`,`0`}}},
1184	},
1185	},
1186	};
1187	return &sRGB_profile;
1188	}
1189
1190	const skcms_ICCProfile* skcms_XYZD50_profile() {
1191	// Just like sRGB above, but with identity transfer functions and toXYZD50 matrix.
1192	static const skcms_ICCProfile XYZD50_profile = {
1193	nullptr, // buffer, moot here
1194
1195	`0`, // size, moot here
1196	skcms_Signature_RGB, // data_color_space
1197	skcms_Signature_XYZ, // pcs
1198	`0`, // tag count, moot here
1199
1200	true, // has_trc, followed by the 3 trc curves
1201	{
1202	{{`0`, {`1`,`1`, `0`,`0`,`0`,`0`,`0`}}},
1203	{{`0`, {`1`,`1`, `0`,`0`,`0`,`0`,`0`}}},
1204	{{`0`, {`1`,`1`, `0`,`0`,`0`,`0`,`0`}}},
1205	},
1206
1207	true, // has_toXYZD50, followed by 3x3 toXYZD50 matrix
1208	{{
1209	{ `1`,`0`,`0` },
1210	{ `0`,`1`,`0` },
1211	{ `0`,`0`,`1` },
1212	}},
1213
1214	false, // has_A2B, followed by a2b itself which we don't care about.
1215	{
1216	`0`,
1217	{
1218	{{`0`, {`0`,`0`, `0`,`0`,`0`,`0`,`0`}}},
1219	{{`0`, {`0`,`0`, `0`,`0`,`0`,`0`,`0`}}},
1220	{{`0`, {`0`,`0`, `0`,`0`,`0`,`0`,`0`}}},
1221	{{`0`, {`0`,`0`, `0`,`0`,`0`,`0`,`0`}}},
1222	},
1223	{`0`,`0`,`0`,`0`},
1224	nullptr,
1225	nullptr,
1226
1227	`0`,
1228	{
1229	{{`0`, {`0`,`0`, `0`,`0`,`0`,`0`,`0`}}},
1230	{{`0`, {`0`,`0`, `0`,`0`,`0`,`0`,`0`}}},
1231	{{`0`, {`0`,`0`, `0`,`0`,`0`,`0`,`0`}}},
1232	},
1233	{{
1234	{ `0`,`0`,`0`,`0` },
1235	{ `0`,`0`,`0`,`0` },
1236	{ `0`,`0`,`0`,`0` },
1237	}},
1238
1239	`0`,
1240	{
1241	{{`0`, {`0`,`0`, `0`,`0`,`0`,`0`,`0`}}},
1242	{{`0`, {`0`,`0`, `0`,`0`,`0`,`0`,`0`}}},
1243	{{`0`, {`0`,`0`, `0`,`0`,`0`,`0`,`0`}}},
1244	},
1245	},
1246	};
1247
1248	return &XYZD50_profile;
1249	}
1250
1251	const skcms_TransferFunction* skcms_sRGB_TransferFunction() {
1252	return &skcms_sRGB_profile()->trc[`0`].parametric;
1253	}
1254
1255	const skcms_TransferFunction* skcms_sRGB_Inverse_TransferFunction() {
1256	static const skcms_TransferFunction sRGB_inv =
1257	{`0.416666657f`, `1.137283325f`, -`0.0f`, `12.920000076f`, `0.003130805f`, -`0.054969788f`, -`0.0f`};
1258	return &sRGB_inv;
1259	}
1260
1261	const skcms_TransferFunction* skcms_Identity_TransferFunction() {
1262	static const skcms_TransferFunction identity = {`1`,`1`,`0`,`0`,`0`,`0`,`0`};
1263	return &identity;
1264	}
1265
1266	const uint8_t skcms_252_random_bytes[] = {
1267	`8`, `179`, `128`, `204`, `253`, `38`, `134`, `184`, `68`, `102`, `32`, `138`, `99`, `39`, `169`, `215`,
1268	`119`, `26`, `3`, `223`, `95`, `239`, `52`, `132`, `114`, `74`, `81`, `234`, `97`, `116`, `244`, `205`, `30`,
1269	`154`, `173`, `12`, `51`, `159`, `122`, `153`, `61`, `226`, `236`, `178`, `229`, `55`, `181`, `220`, `191`,
1270	`194`, `160`, `126`, `168`, `82`, `131`, `18`, `180`, `245`, `163`, `22`, `246`, `69`, `235`, `252`, `57`,
1271	`108`, `14`, `6`, `152`, `240`, `255`, `171`, `242`, `20`, `227`, `177`, `238`, `96`, `85`, `16`, `211`,
1272	`70`, `200`, `149`, `155`, `146`, `127`, `145`, `100`, `151`, `109`, `19`, `165`, `208`, `195`, `164`,
1273	`137`, `254`, `182`, `248`, `64`, `201`, `45`, `209`, `5`, `147`, `207`, `210`, `113`, `162`, `83`, `225`,
1274	`9`, `31`, `15`, `231`, `115`, `37`, `58`, `53`, `24`, `49`, `197`, `56`, `120`, `172`, `48`, `21`, `214`,
1275	`129`, `111`, `11`, `50`, `187`, `196`, `34`, `60`, `103`, `71`, `144`, `47`, `203`, `77`, `80`, `232`,
1276	`140`, `222`, `250`, `206`, `166`, `247`, `139`, `249`, `221`, `72`, `106`, `27`, `199`, `117`, `54`,
1277	`219`, `135`, `118`, `40`, `79`, `41`, `251`, `46`, `93`, `212`, `92`, `233`, `148`, `28`, `121`, `63`,
1278	`123`, `158`, `105`, `59`, `29`, `42`, `143`, `23`, `0`, `107`, `176`, `87`, `104`, `183`, `156`, `193`,
1279	`189`, `90`, `188`, `65`, `190`, `17`, `198`, `7`, `186`, `161`, `1`, `124`, `78`, `125`, `170`, `133`,
1280	`174`, `218`, `67`, `157`, `75`, `101`, `89`, `217`, `62`, `33`, `141`, `228`, `25`, `35`, `91`, `230`, `4`,
1281	`2`, `13`, `73`, `86`, `167`, `237`, `84`, `243`, `44`, `185`, `66`, `130`, `110`, `150`, `142`, `216`, `88`,
1282	`112`, `36`, `224`, `136`, `202`, `76`, `94`, `98`, `175`, `213`
1283	};
1284
1285	bool skcms_ApproximatelyEqualProfiles(const skcms_ICCProfile* A, const skcms_ICCProfile* B) {
1286	// Test for exactly equal profiles first.
1287	if (A == B \|\| `0` == memcmp(A,B, sizeof(skcms_ICCProfile))) {
1288	return true;
1289	}
1290
1291	// For now this is the essentially the same strategy we use in test_only.c
1292	// for our skcms_Transform() smoke tests:
1293	// 1) transform A to XYZD50
1294	// 2) transform B to XYZD50
1295	// 3) return true if they're similar enough
1296	// Our current criterion in 3) is maximum 1 bit error per XYZD50 byte.
1297
1298	// skcms_252_random_bytes are 252 of a random shuffle of all possible bytes.
1299	// 252 is evenly divisible by 3 and 4. Only 192, 10, 241, and 43 are missing.
1300
1301	if (A->data_color_space != B->data_color_space) {
1302	return false;
1303	}
1304
1305	// Interpret as RGB_888 if data color space is RGB or GRAY, RGBA_8888 if CMYK.
1306	// TODO: working with RGBA_8888 either way is probably fastest.
1307	skcms_PixelFormat fmt = skcms_PixelFormat_RGB_888;
1308	size_t npixels = `84`;
1309	if (A->data_color_space == skcms_Signature_CMYK) {
1310	fmt = skcms_PixelFormat_RGBA_8888;
1311	npixels = `63`;
1312	}
1313
1314	// TODO: if A or B is a known profile (skcms_sRGB_profile, skcms_XYZD50_profile),
1315	// use pre-canned results and skip that skcms_Transform() call?
1316	uint8_t dstA[`252`],
1317	dstB[`252`];
1318	if (!skcms_Transform(
1319	skcms_252_random_bytes, fmt, skcms_AlphaFormat_Unpremul, A,
1320	dstA, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1321	npixels)) {
1322	return false;
1323	}
1324	if (!skcms_Transform(
1325	skcms_252_random_bytes, fmt, skcms_AlphaFormat_Unpremul, B,
1326	dstB, skcms_PixelFormat_RGB_888, skcms_AlphaFormat_Unpremul, skcms_XYZD50_profile(),
1327	npixels)) {
1328	return false;
1329	}
1330
1331	// TODO: make sure this final check has reasonable codegen.
1332	for (size_t i = `0`; i < `252`; i++) {
1333	if (abs((int)dstA[i] - (int)dstB[i]) > `1`) {
1334	return false;
1335	}
1336	}
1337	return true;
1338	}
1339
1340	bool skcms_TRCs_AreApproximateInverse(const skcms_ICCProfile* profile,
1341	const skcms_TransferFunction* inv_tf) {
1342	if (!profile \|\| !profile->has_trc) {
1343	return false;
1344	}
1345
1346	return skcms_AreApproximateInverses(&profile->trc[`0`], inv_tf) &&
1347	skcms_AreApproximateInverses(&profile->trc[`1`], inv_tf) &&
1348	skcms_AreApproximateInverses(&profile->trc[`2`], inv_tf);
1349	}
1350
1351	static bool is_zero_to_one(float x) {
1352	return `0` <= x && x <= `1`;
1353	}
1354
1355	typedef struct { float vals[`3`]; } skcms_Vector3;
1356
1357	static skcms_Vector3 mv_mul(const skcms_Matrix3x3* m, const skcms_Vector3* v) {
1358	skcms_Vector3 dst = {{`0`,`0`,`0`}};
1359	for (int row = `0`; row < `3`; ++row) {
1360	dst.vals[row] = m->vals[row][`0`] * v->vals[`0`]
1361	+ m->vals[row][`1`] * v->vals[`1`]
1362	+ m->vals[row][`2`] * v->vals[`2`];
1363	}
1364	return dst;
1365	}
1366
1367	bool skcms_AdaptToXYZD50(float wx, float wy,
1368	skcms_Matrix3x3* toXYZD50) {
1369	if (!is_zero_to_one(wx) \|\| !is_zero_to_one(wy) \|\|
1370	!toXYZD50) {
1371	return false;
1372	}
1373
1374	// Assumes that Y is 1.0f.
1375	skcms_Vector3 wXYZ = { { wx / wy, `1`, (`1` - wx - wy) / wy } };
1376
1377	// Now convert toXYZ matrix to toXYZD50.
1378	skcms_Vector3 wXYZD50 = { { `0.96422f`, `1.0f`, `0.82521f` } };
1379
1380	// Calculate the chromatic adaptation matrix. We will use the Bradford method, thus
1381	// the matrices below. The Bradford method is used by Adobe and is widely considered
1382	// to be the best.
1383	skcms_Matrix3x3 xyz_to_lms = {{
1384	{ `0.8951f`, `0.2664f`, -`0.1614f` },
1385	{ -`0.7502f`, `1.7135f`, `0.0367f` },
1386	{ `0.0389f`, -`0.0685f`, `1.0296f` },
1387	}};
1388	skcms_Matrix3x3 lms_to_xyz = {{
1389	{ `0.9869929f`, -`0.1470543f`, `0.1599627f` },
1390	{ `0.4323053f`, `0.5183603f`, `0.0492912f` },
1391	{ -`0.0085287f`, `0.0400428f`, `0.9684867f` },
1392	}};
1393
1394	skcms_Vector3 srcCone = mv_mul(&xyz_to_lms, &wXYZ);
1395	skcms_Vector3 dstCone = mv_mul(&xyz_to_lms, &wXYZD50);
1396
1397	*toXYZD50 = {{
1398	{ dstCone.vals[`0`] / srcCone.vals[`0`], `0`, `0` },
1399	{ `0`, dstCone.vals[`1`] / srcCone.vals[`1`], `0` },
1400	{ `0`, `0`, dstCone.vals[`2`] / srcCone.vals[`2`] },
1401	}};
1402	*toXYZD50 = skcms_Matrix3x3_concat(toXYZD50, &xyz_to_lms);
1403	*toXYZD50 = skcms_Matrix3x3_concat(&lms_to_xyz, toXYZD50);
1404
1405	return true;
1406	}
1407
1408	bool skcms_PrimariesToXYZD50(float rx, float ry,
1409	float gx, float gy,
1410	float bx, float by,
1411	float wx, float wy,
1412	skcms_Matrix3x3* toXYZD50) {
1413	if (!is_zero_to_one(rx) \|\| !is_zero_to_one(ry) \|\|
1414	!is_zero_to_one(gx) \|\| !is_zero_to_one(gy) \|\|
1415	!is_zero_to_one(bx) \|\| !is_zero_to_one(by) \|\|
1416	!is_zero_to_one(wx) \|\| !is_zero_to_one(wy) \|\|
1417	!toXYZD50) {
1418	return false;
1419	}
1420
1421	// First, we need to convert xy values (primaries) to XYZ.
1422	skcms_Matrix3x3 primaries = {{
1423	{ rx, gx, bx },
1424	{ ry, gy, by },
1425	{ `1` - rx - ry, `1` - gx - gy, `1` - bx - by },
1426	}};
1427	skcms_Matrix3x3 primaries_inv;
1428	if (!skcms_Matrix3x3_invert(&primaries, &primaries_inv)) {
1429	return false;
1430	}
1431
1432	// Assumes that Y is 1.0f.
1433	skcms_Vector3 wXYZ = { { wx / wy, `1`, (`1` - wx - wy) / wy } };
1434	skcms_Vector3 XYZ = mv_mul(&primaries_inv, &wXYZ);
1435
1436	skcms_Matrix3x3 toXYZ = {{
1437	{ XYZ.vals[`0`], `0`, `0` },
1438	{ `0`, XYZ.vals[`1`], `0` },
1439	{ `0`, `0`, XYZ.vals[`2`] },
1440	}};
1441	toXYZ = skcms_Matrix3x3_concat(&primaries, &toXYZ);
1442
1443	skcms_Matrix3x3 DXtoD50;
1444	if (!skcms_AdaptToXYZD50(wx, wy, &DXtoD50)) {
1445	return false;
1446	}
1447
1448	*toXYZD50 = skcms_Matrix3x3_concat(&DXtoD50, &toXYZ);
1449	return true;
1450	}
1451
1452
1453	bool skcms_Matrix3x3_invert(const skcms_Matrix3x3* src, skcms_Matrix3x3* dst) {
1454	double a00 = src->vals[`0`][`0`],
1455	a01 = src->vals[`1`][`0`],
1456	a02 = src->vals[`2`][`0`],
1457	a10 = src->vals[`0`][`1`],
1458	a11 = src->vals[`1`][`1`],
1459	a12 = src->vals[`2`][`1`],
1460	a20 = src->vals[`0`][`2`],
1461	a21 = src->vals[`1`][`2`],
1462	a22 = src->vals[`2`][`2`];
1463
1464	double b0 = a00a11 - a01a10,
1465	b1 = a00a12 - a02a10,
1466	b2 = a01a12 - a02a11,
1467	b3 = a20,
1468	b4 = a21,
1469	b5 = a22;
1470
1471	double determinant = b0*b5
1472	- b1*b4
1473	+ b2*b3;
1474
1475	if (determinant == `0`) {
1476	return false;
1477	}
1478
1479	double invdet = `1.0` / determinant;
1480	if (invdet > +FLT_MAX \|\| invdet < -FLT_MAX \|\| !isfinitef_((float)invdet)) {
1481	return false;
1482	}
1483
1484	b0 *= invdet;
1485	b1 *= invdet;
1486	b2 *= invdet;
1487	b3 *= invdet;
1488	b4 *= invdet;
1489	b5 *= invdet;
1490
1491	dst->vals[`0`][`0`] = (float)( a11b5 - a12b4 );
1492	dst->vals[`1`][`0`] = (float)( a02b4 - a01b5 );
1493	dst->vals[`2`][`0`] = (float)( + b2 );
1494	dst->vals[`0`][`1`] = (float)( a12b3 - a10b5 );
1495	dst->vals[`1`][`1`] = (float)( a00b5 - a02b3 );
1496	dst->vals[`2`][`1`] = (float)( - b1 );
1497	dst->vals[`0`][`2`] = (float)( a10b4 - a11b3 );
1498	dst->vals[`1`][`2`] = (float)( a01b3 - a00b4 );
1499	dst->vals[`2`][`2`] = (float)( + b0 );
1500
1501	for (int r = `0`; r < `3`; ++r)
1502	for (int c = `0`; c < `3`; ++c) {
1503	if (!isfinitef_(dst->vals[r][c])) {
1504	return false;
1505	}
1506	}
1507	return true;
1508	}
1509
1510	skcms_Matrix3x3 skcms_Matrix3x3_concat(const skcms_Matrix3x3* A, const skcms_Matrix3x3* B) {
1511	skcms_Matrix3x3 m = { { { `0`,`0`,`0` },{ `0`,`0`,`0` },{ `0`,`0`,`0` } } };
1512	for (int r = `0`; r < `3`; r++)
1513	for (int c = `0`; c < `3`; c++) {
1514	m.vals[r][c] = A->vals[r][`0`] * B->vals[`0`][c]
1515	+ A->vals[r][`1`] * B->vals[`1`][c]
1516	+ A->vals[r][`2`] * B->vals[`2`][c];
1517	}
1518	return m;
1519	}
1520
1521	#if defined(__clang__)
1522	[[clang::no_sanitize("float-divide-by-zero")]] // Checked for by classify() on the way out.
1523	#endif
1524	bool skcms_TransferFunction_invert(const skcms_TransferFunction* src, skcms_TransferFunction* dst) {
1525	TF_PQish pq;
1526	TF_HLGish hlg;
1527	switch (classify(*src, &pq, &hlg)) {
1528	case Bad: return false;
1529	case sRGBish: break; // handled below
1530
1531	case PQish:
1532	*dst = { TFKind_marker(PQish), -pq.A, pq.D, `1.0f`/pq.F
1533	, pq.B, -pq.E, `1.0f`/pq.C};
1534	return true;
1535
1536	case HLGish:
1537	*dst = { TFKind_marker(HLGinvish), `1.0f`/hlg.R, `1.0f`/hlg.G
1538	, `1.0f`/hlg.a, hlg.b, hlg.c, `0` };
1539	return true;
1540
1541	case HLGinvish:
1542	*dst = { TFKind_marker(HLGish), `1.0f`/hlg.R, `1.0f`/hlg.G
1543	, `1.0f`/hlg.a, hlg.b, hlg.c, `0` };
1544	return true;
1545	}
1546
1547	assert (classify(*src) == sRGBish);
1548
1549	// We're inverting this function, solving for x in terms of y.
1550	// y = (cx + f) x < d
1551	// (ax + b)^g + e x ≥ d
1552	// The inverse of this function can be expressed in the same piecewise form.
1553	skcms_TransferFunction inv = {`0`,`0`,`0`,`0`,`0`,`0`,`0`};
1554
1555	// We'll start by finding the new threshold inv.d.
1556	// In principle we should be able to find that by solving for y at x=d from either side.
1557	// (If those two d values aren't the same, it's a discontinuous transfer function.)
1558	float d_l = src->c * src->d + src->f,
1559	d_r = powf_(src->a * src->d + src->b, src->g) + src->e;
1560	if (fabsf_(d_l - d_r) > `1`/`512.0f`) {
1561	return false;
1562	}
1563	inv.d = d_l; // TODO(mtklein): better in practice to choose d_r?
1564
1565	// When d=0, the linear section collapses to a point. We leave c,d,f all zero in that case.
1566	if (inv.d > `0`) {
1567	// Inverting the linear section is pretty straightfoward:
1568	// y = cx + f
1569	// y - f = cx
1570	// (1/c)y - f/c = x
1571	inv.c = `1.0f`/src->c;
1572	inv.f = -src->f/src->c;
1573	}
1574
1575	// The interesting part is inverting the nonlinear section:
1576	// y = (ax + b)^g + e.
1577	// y - e = (ax + b)^g
1578	// (y - e)^1/g = ax + b
1579	// (y - e)^1/g - b = ax
1580	// (1/a)(y - e)^1/g - b/a = x
1581	//
1582	// To make that fit our form, we need to move the (1/a) term inside the exponentiation:
1583	// let k = (1/a)^g
1584	// (1/a)( y - e)^1/g - b/a = x
1585	// (ky - ke)^1/g - b/a = x
1586
1587	float k = powf_(src->a, -src->g); // (1/a)^g == a^-g
1588	inv.g = `1.0f` / src->g;
1589	inv.a = k;
1590	inv.b = -k * src->e;
1591	inv.e = -src->b / src->a;
1592
1593	// We need to enforce the same constraints here that we do when fitting a curve,
1594	// a >= 0 and ad+b >= 0. These constraints are checked by classify(), so they're true
1595	// of the source function if we're here.
1596
1597	// Just like when fitting the curve, there's really no way to rescue a < 0.
1598	if (inv.a < `0`) {
1599	return false;
1600	}
1601	// On the other hand we can rescue an ad+b that's gone slightly negative here.
1602	if (inv.a * inv.d + inv.b < `0`) {
1603	inv.b = -inv.a * inv.d;
1604	}
1605
1606	// That should usually make classify(inv) == sRGBish true, but there are a couple situations
1607	// where we might still fail here, like non-finite parameter values.
1608	if (classify(inv) != sRGBish) {
1609	return false;
1610	}
1611
1612	assert (inv.a >= `0`);
1613	assert (inv.a * inv.d + inv.b >= `0`);
1614
1615	// Now in principle we're done.
1616	// But to preserve the valuable invariant inv(src(1.0f)) == 1.0f, we'll tweak
1617	// e or f of the inverse, depending on which segment contains src(1.0f).
1618	float s = skcms_TransferFunction_eval(src, `1.0f`);
1619	if (!isfinitef_(s)) {
1620	return false;
1621	}
1622
1623	float sign = s < `0` ? -`1.0f` : `1.0f`;
1624	s *= sign;
1625	if (s < inv.d) {
1626	inv.f = `1.0f` - sign * inv.c * s;
1627	} else {
1628	inv.e = `1.0f` - sign * powf_(inv.a * s + inv.b, inv.g);
1629	}
1630
1631	*dst = inv;
1632	return classify(*dst) == sRGBish;
1633	}
1634
1635	// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ //
1636
1637	// From here below we're approximating an skcms_Curve with an skcms_TransferFunction{g,a,b,c,d,e,f}:
1638	//
1639	// tf(x) = cx + f x < d
1640	// tf(x) = (ax + b)^g + e x ≥ d
1641	//
1642	// When fitting, we add the additional constraint that both pieces meet at d:
1643	//
1644	// cd + f = (ad + b)^g + e
1645	//
1646	// Solving for e and folding it through gives an alternate formulation of the non-linear piece:
1647	//
1648	// tf(x) = cx + f x < d
1649	// tf(x) = (ax + b)^g - (ad + b)^g + cd + f x ≥ d
1650	//
1651	// Our overall strategy is then:
1652	// For a couple tolerances,
1653	// - fit_linear(): fit c,d,f iteratively to as many points as our tolerance allows
1654	// - invert c,d,f
1655	// - fit_nonlinear(): fit g,a,b using Gauss-Newton given those inverted c,d,f
1656	// (and by constraint, inverted e) to the inverse of the table.
1657	// Return the parameters with least maximum error.
1658	//
1659	// To run Gauss-Newton to find g,a,b, we'll also need the gradient of the residuals
1660	// of round-trip f_inv(x), the inverse of the non-linear piece of f(x).
1661	//
1662	// let y = Table(x)
1663	// r(x) = x - f_inv(y)
1664	//
1665	// ∂r/∂g = ln(ay + b)(ay + b)^g*
1666	// - ln(ad + b)(ad + b)^g*
1667	// ∂r/∂a = yg(ay + b)^(g-1)
1668	// - dg(ad + b)^(g-1)
1669	// ∂r/∂b = g(ay + b)^(g-1)
1670	// - g(ad + b)^(g-1)
1671
1672	// Return the residual of roundtripping skcms_Curve(x) through f_inv(y) with parameters P,
1673	// and fill out the gradient of the residual into dfdP.
1674	static float rg_nonlinear(float x,
1675	const skcms_Curve* curve,
1676	const skcms_TransferFunction* tf,
1677	float dfdP[`3`]) {
1678	const float y = eval_curve(curve, x);
1679
1680	const float g = tf->g, a = tf->a, b = tf->b,
1681	c = tf->c, d = tf->d, f = tf->f;
1682
1683	const float Y = fmaxf_(a*y + b, `0.0f`),
1684	D = a*d + b;
1685	assert (D >= `0`);
1686
1687	// The gradient.
1688	dfdP[`0`] = logf_(Y)*powf_(Y, g)
1689	- logf_(D)*powf_(D, g);
1690	dfdP[`1`] = ygpowf_(Y, g-`1`)
1691	- dgpowf_(D, g-`1`);
1692	dfdP[`2`] = g*powf_(Y, g-`1`)
1693	- g*powf_(D, g-`1`);
1694
1695	// The residual.
1696	const float f_inv = powf_(Y, g)
1697	- powf_(D, g)
1698	+ c*d + f;
1699	return x - f_inv;
1700	}
1701
1702	static bool gauss_newton_step(const skcms_Curve* curve,
1703	skcms_TransferFunction* tf,
1704	float x0, float dx, int N) {
1705	// We'll sample x from the range [x0,x1] (both inclusive) N times with even spacing.
1706	//
1707	// Let P = [ tf->g, tf->a, tf->b ] (the three terms that we're adjusting).
1708	//
1709	// We want to do P' = P + (Jf^T Jf)^-1 Jf^T r(P),
1710	// where r(P) is the residual vector
1711	// and Jf is the Jacobian matrix of f(), ∂r/∂P.
1712	//
1713	// Let's review the shape of each of these expressions:
1714	// r(P) is [N x 1], a column vector with one entry per value of x tested
1715	// Jf is [N x 3], a matrix with an entry for each (x,P) pair
1716	// Jf^T is [3 x N], the transpose of Jf
1717	//
1718	// Jf^T Jf is [3 x N] [N x 3] == [3 x 3], a 3x3 matrix,*
1719	// and so is its inverse (Jf^T Jf)^-1
1720	// Jf^T r(P) is [3 x N] [N x 1] == [3 x 1], a column vector with the same shape as P*
1721	//
1722	// Our implementation strategy to get to the final ∆P is
1723	// 1) evaluate Jf^T Jf, call that lhs
1724	// 2) evaluate Jf^T r(P), call that rhs
1725	// 3) invert lhs
1726	// 4) multiply inverse lhs by rhs
1727	//
1728	// This is a friendly implementation strategy because we don't have to have any
1729	// buffers that scale with N, and equally nice don't have to perform any matrix
1730	// operations that are variable size.
1731	//
1732	// Other implementation strategies could trade this off, e.g. evaluating the
1733	// pseudoinverse of Jf ( (Jf^T Jf)^-1 Jf^T ) directly, then multiplying that by
1734	// the residuals. That would probably require implementing singular value
1735	// decomposition, and would create a [3 x N] matrix to be multiplied by the
1736	// [N x 1] residual vector, but on the upside I think that'd eliminate the
1737	// possibility of this gauss_newton_step() function ever failing.
1738
1739	// 0) start off with lhs and rhs safely zeroed.
1740	skcms_Matrix3x3 lhs = {{ {`0`,`0`,`0`}, {`0`,`0`,`0`}, {`0`,`0`,`0`} }};
1741	skcms_Vector3 rhs = { {`0`,`0`,`0`} };
1742
1743	// 1,2) evaluate lhs and evaluate rhs
1744	// We want to evaluate Jf only once, but both lhs and rhs involve Jf^T,
1745	// so we'll have to update lhs and rhs at the same time.
1746	for (int i = `0`; i < N; i++) {
1747	float x = x0 + i*dx;
1748
1749	float dfdP[`3`] = {`0`,`0`,`0`};
1750	float resid = rg_nonlinear(x,curve,tf, dfdP);
1751
1752	for (int r = `0`; r < `3`; r++) {
1753	for (int c = `0`; c < `3`; c++) {
1754	lhs.vals[r][c] += dfdP[r] * dfdP[c];
1755	}
1756	rhs.vals[r] += dfdP[r] * resid;
1757	}
1758	}
1759
1760	// If any of the 3 P parameters are unused, this matrix will be singular.
1761	// Detect those cases and fix them up to indentity instead, so we can invert.
1762	for (int k = `0`; k < `3`; k++) {
1763	if (lhs.vals[`0`][k]==`0` && lhs.vals[`1`][k]==`0` && lhs.vals[`2`][k]==`0` &&
1764	lhs.vals[k][`0`]==`0` && lhs.vals[k][`1`]==`0` && lhs.vals[k][`2`]==`0`) {
1765	lhs.vals[k][k] = `1`;
1766	}
1767	}
1768
1769	// 3) invert lhs
1770	skcms_Matrix3x3 lhs_inv;
1771	if (!skcms_Matrix3x3_invert(&lhs, &lhs_inv)) {
1772	return false;
1773	}
1774
1775	// 4) multiply inverse lhs by rhs
1776	skcms_Vector3 dP = mv_mul(&lhs_inv, &rhs);
1777	tf->g += dP.vals[`0`];
1778	tf->a += dP.vals[`1`];
1779	tf->b += dP.vals[`2`];
1780	return isfinitef_(tf->g) && isfinitef_(tf->a) && isfinitef_(tf->b);
1781	}
1782
1783	static float max_roundtrip_error_checked(const skcms_Curve* curve,
1784	const skcms_TransferFunction* tf_inv) {
1785	skcms_TransferFunction tf;
1786	if (!skcms_TransferFunction_invert(tf_inv, &tf) \|\| sRGBish != classify(tf)) {
1787	return INFINITY_;
1788	}
1789
1790	skcms_TransferFunction tf_inv_again;
1791	if (!skcms_TransferFunction_invert(&tf, &tf_inv_again)) {
1792	return INFINITY_;
1793	}
1794
1795	return skcms_MaxRoundtripError(curve, &tf_inv_again);
1796	}
1797
1798	// Fit the points in [L,N) to the non-linear piece of tf, or return false if we can't.
1799	static bool fit_nonlinear(const skcms_Curve* curve, int L, int N, skcms_TransferFunction* tf) {
1800	// This enforces a few constraints that are not modeled in gauss_newton_step()'s optimization.
1801	auto fixup_tf = [tf]() {
1802	// a must be non-negative. That ensures the function is monotonically increasing.
1803	// We don't really know how to fix up a if it goes negative.
1804	if (tf->a < `0`) {
1805	return false;
1806	}
1807	// ad+b must be non-negative. That ensures we don't end up with complex numbers in powf.
1808	// We feel just barely not uneasy enough to tweak b so ad+b is zero in this case.
1809	if (tf->a * tf->d + tf->b < `0`) {
1810	tf->b = -tf->a * tf->d;
1811	}
1812	assert (tf->a >= `0` &&
1813	tf->a * tf->d + tf->b >= `0`);
1814
1815	// cd+f must be ~= (ad+b)^g+e. That ensures the function is continuous. We keep e as a free
1816	// parameter so we can guarantee this.
1817	tf->e = tf->c*tf->d + tf->f
1818	- powf_(tf->a*tf->d + tf->b, tf->g);
1819
1820	return true;
1821	};
1822
1823	if (!fixup_tf ()) {
1824	return false;
1825	}
1826
1827	// No matter where we start, dx should always represent N even steps from 0 to 1.
1828	const float dx = `1.0f` / (N-`1`);
1829
1830	skcms_TransferFunction best_tf = *tf;
1831	float best_max_error = INFINITY_;
1832
1833	// Need this or several curves get worse... sigh
1834	float init_error = max_roundtrip_error_checked(curve, tf);
1835	if (init_error < best_max_error) {
1836	best_max_error = init_error;
1837	best_tf = *tf;
1838	}
1839
1840	// As far as we can tell, 1 Gauss-Newton step won't converge, and 3 steps is no better than 2.
1841	for (int j = `0`; j < `8`; j++) {
1842	if (!gauss_newton_step(curve, tf, L*dx, dx, N-L) \|\| !fixup_tf ()) {
1843	*tf = best_tf;
1844	return isfinitef_(best_max_error);
1845	}
1846
1847	float max_error = max_roundtrip_error_checked(curve, tf);
1848	if (max_error < best_max_error) {
1849	best_max_error = max_error;
1850	best_tf = *tf;
1851	}
1852	}
1853
1854	*tf = best_tf;
1855	return isfinitef_(best_max_error);
1856	}
1857
1858	bool skcms_ApproximateCurve(const skcms_Curve* curve,
1859	skcms_TransferFunction* approx,
1860	float* max_error) {
1861	if (!curve \|\| !approx \|\| !max_error) {
1862	return false;
1863	}
1864
1865	if (curve->table_entries == `0`) {
1866	// No point approximating an skcms_TransferFunction with an skcms_TransferFunction!
1867	return false;
1868	}
1869
1870	if (curve->table_entries == `1` \|\| curve->table_entries > (uint32_t)INT_MAX) {
1871	// We need at least two points, and must put some reasonable cap on the maximum number.
1872	return false;
1873	}
1874
1875	int N = (int)curve->table_entries;
1876	const float dx = `1.0f` / (N - `1`);
1877
1878	*max_error = INFINITY_;
1879	const float kTolerances[] = { `1.5f` / `65535.0f`, `1.0f` / `512.0f` };
1880	for (int t = `0`; t < ARRAY_COUNT(kTolerances); t++) {
1881	skcms_TransferFunction tf,
1882	tf_inv;
1883
1884	// It's problematic to fit curves with non-zero f, so always force it to zero explicitly.
1885	tf.f = `0.0f`;
1886	int L = fit_linear(curve, N, kTolerances[t], &tf.c, &tf.d);
1887
1888	if (L == N) {
1889	// If the entire data set was linear, move the coefficients to the nonlinear portion
1890	// with G == 1. This lets use a canonical representation with d == 0.
1891	tf.g = `1`;
1892	tf.a = tf.c;
1893	tf.b = tf.f;
1894	tf.c = tf.d = tf.e = tf.f = `0`;
1895	} else if (L == N - `1`) {
1896	// Degenerate case with only two points in the nonlinear segment. Solve directly.
1897	tf.g = `1`;
1898	tf.a = (eval_curve(curve, (N-`1`)*dx) -
1899	eval_curve(curve, (N-`2`)*dx))
1900	/ dx;
1901	tf.b = eval_curve(curve, (N-`2`)*dx)
1902	- tf.a * (N-`2`)*dx;
1903	tf.e = `0`;
1904	} else {
1905	// Start by guessing a gamma-only curve through the midpoint.
1906	int mid = (L + N) / `2`;
1907	float mid_x = mid / (N - `1.0f`);
1908	float mid_y = eval_curve(curve, mid_x);
1909	tf.g = log2f_(mid_y) / log2f_(mid_x);
1910	tf.a = `1`;
1911	tf.b = `0`;
1912	tf.e = tf.c*tf.d + tf.f
1913	- powf_(tf.a*tf.d + tf.b, tf.g);
1914
1915
1916	if (!skcms_TransferFunction_invert(&tf, &tf_inv) \|\|
1917	!fit_nonlinear(curve, L,N, &tf_inv)) {
1918	continue;
1919	}
1920
1921	// We fit tf_inv, so calculate tf to keep in sync.
1922	// fit_nonlinear() should guarantee invertibility.
1923	if (!skcms_TransferFunction_invert(&tf_inv, &tf)) {
1924	assert(false);
1925	continue;
1926	}
1927	}
1928
1929	// We'd better have a sane, sRGB-ish TF by now.
1930	// Other non-Bad TFs would be fine, but we know we've only ever tried to fit sRGBish;
1931	// anything else is just some accident of math and the way we pun tf.g as a type flag.
1932	// fit_nonlinear() should guarantee this, but the special cases may fail this test.
1933	if (sRGBish != classify(tf)) {
1934	continue;
1935	}
1936
1937	// We find our error by roundtripping the table through tf_inv.
1938	//
1939	// (The most likely use case for this approximation is to be inverted and
1940	// used as the transfer function for a destination color space.)
1941	//
1942	// We've kept tf and tf_inv in sync above, but we can't guarantee that tf is
1943	// invertible, so re-verify that here (and use the new inverse for testing).
1944	// fit_nonlinear() should guarantee this, but the special cases that don't use
1945	// it may fail this test.
1946	if (!skcms_TransferFunction_invert(&tf, &tf_inv)) {
1947	continue;
1948	}
1949
1950	float err = skcms_MaxRoundtripError(curve, &tf_inv);
1951	if (*max_error > err) {
1952	*max_error = err;
1953	*approx = tf;
1954	}
1955	}
1956	return isfinitef_(*max_error);
1957	}
1958
1959	// ~~~~ Impl. of skcms_Transform() ~~~~
1960
1961	typedef enum {
1962	Op_load_a8,
1963	Op_load_g8,
1964	Op_load_8888_palette8,
1965	Op_load_4444,
1966	Op_load_565,
1967	Op_load_888,
1968	Op_load_8888,
1969	Op_load_1010102,
1970	Op_load_161616LE,
1971	Op_load_16161616LE,
1972	Op_load_161616BE,
1973	Op_load_16161616BE,
1974	Op_load_hhh,
1975	Op_load_hhhh,
1976	Op_load_fff,
1977	Op_load_ffff,
1978
1979	Op_swap_rb,
1980	Op_clamp,
1981	Op_invert,
1982	Op_force_opaque,
1983	Op_premul,
1984	Op_unpremul,
1985	Op_matrix_3x3,
1986	Op_matrix_3x4,
1987	Op_lab_to_xyz,
1988
1989	Op_tf_r,
1990	Op_tf_g,
1991	Op_tf_b,
1992	Op_tf_a,
1993
1994	Op_pq_r,
1995	Op_pq_g,
1996	Op_pq_b,
1997	Op_pq_a,
1998
1999	Op_hlg_r,
2000	Op_hlg_g,
2001	Op_hlg_b,
2002	Op_hlg_a,
2003
2004	Op_hlginv_r,
2005	Op_hlginv_g,
2006	Op_hlginv_b,
2007	Op_hlginv_a,
2008
2009	Op_table_r,
2010	Op_table_g,
2011	Op_table_b,
2012	Op_table_a,
2013
2014	Op_clut,
2015
2016	Op_store_a8,
2017	Op_store_g8,
2018	Op_store_4444,
2019	Op_store_565,
2020	Op_store_888,
2021	Op_store_8888,
2022	Op_store_1010102,
2023	Op_store_161616LE,
2024	Op_store_16161616LE,
2025	Op_store_161616BE,
2026	Op_store_16161616BE,
2027	Op_store_hhh,
2028	Op_store_hhhh,
2029	Op_store_fff,
2030	Op_store_ffff,
2031	} Op;
2032
2033	#if defined(__clang__)
2034	template <int N, typename T> using Vec = T __attribute__((ext_vector_type(N)));
2035	#elif defined(__GNUC__)
2036	// For some reason GCC accepts this nonsense, but not the more straightforward version,
2037	// template <int N, typename T> using Vec = T __attribute__((vector_size(Nsizeof(T))));*
2038	template <int N, typename T>
2039	struct VecHelper { typedef T __attribute__((vector_size(N*sizeof(T)))) V; };
2040
2041	template <int N, typename T> using Vec = typename VecHelper<N,T>::V;
2042	#endif
2043
2044	// First, instantiate our default exec_ops() implementation using the default compiliation target.
2045
2046	namespace baseline {
2047	#if defined(SKCMS_PORTABLE) \|\| !(defined(__clang__) \|\| defined(__GNUC__)) \
2048	\|\| (defined(__EMSCRIPTEN_major__) && !defined(__wasm_simd128__))
2049	#define N 1
2050	template <typename T> using V = T;
2051	using Color = float;
2052	#elif defined(__AVX512F__)
2053	#define N 16
2054	template <typename T> using V = Vec<N,T>;
2055	using Color = float;
2056	#elif defined(__AVX__)
2057	#define N 8
2058	template <typename T> using V = Vec<N,T>;
2059	using Color = float;
2060	#elif defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(SKCMS_OPT_INTO_NEON_FP16)
2061	#define N 8
2062	template <typename T> using V = Vec<N,T>;
2063	using Color = _Float16;
2064	#else
2065	#define N 4
2066	template <typename T> using V = Vec<N,T>;
2067	using Color = float;
2068	#endif
2069
2070	#include "src/Transform_inl.h"
2071	#undef N
2072	}
2073
2074	// Now, instantiate any other versions of run_program() we may want for runtime detection.
2075	#if !defined(SKCMS_PORTABLE) && \
2076	!defined(SKCMS_NO_RUNTIME_CPU_DETECTION) && \
2077	(( defined(__clang__) && __clang_major__ >= 5) \|\| \
2078	(!defined(__clang__) && defined(__GNUC__))) \
2079	&& defined(__x86_64__)
2080
2081	#if !defined(__AVX2__)
2082	#if defined(__clang__)
2083	#pragma clang attribute push(__attribute__((target("avx2,f16c"))), apply_to=function)
2084	#elif defined(__GNUC__)
2085	#pragma GCC push_options
2086	#pragma GCC target("avx2,f16c")
2087	#endif
2088
2089	namespace hsw {
2090	#define USING_AVX
2091	#define USING_AVX_F16C
2092	#define USING_AVX2
2093	#define N 8
2094	template <typename T> using V = Vec<N,T>;
2095	using Color = float;
2096
2097	#include "src/Transform_inl.h"
2098
2099	// src/Transform_inl.h will undefine USING_ for us.*
2100	#undef N
2101	}
2102
2103	#if defined(__clang__)
2104	#pragma clang attribute pop
2105	#elif defined(__GNUC__)
2106	#pragma GCC pop_options
2107	#endif
2108
2109	#define TEST_FOR_HSW
2110	#endif
2111
2112	#if !defined(__AVX512F__)
2113	#if defined(__clang__)
2114	#pragma clang attribute push(__attribute__((target("avx512f,avx512dq,avx512cd,avx512bw,avx512vl"))), apply_to=function)
2115	#elif defined(__GNUC__)
2116	#pragma GCC push_options
2117	#pragma GCC target("avx512f,avx512dq,avx512cd,avx512bw,avx512vl")
2118	#endif
2119
2120	namespace skx {
2121	#define USING_AVX512F
2122	#define N 16
2123	template <typename T> using V = Vec<N,T>;
2124	using Color = float;
2125
2126	#include "src/Transform_inl.h"
2127
2128	// src/Transform_inl.h will undefine USING_ for us.*
2129	#undef N
2130	}
2131
2132	#if defined(__clang__)
2133	#pragma clang attribute pop
2134	#elif defined(__GNUC__)
2135	#pragma GCC pop_options
2136	#endif
2137
2138	#define TEST_FOR_SKX
2139	#endif
2140
2141	#if defined(TEST_FOR_HSW) \|\| defined(TEST_FOR_SKX)
2142	enum class CpuType { None, HSW, SKX };
2143	static CpuType cpu_type() {
2144	static const CpuType type = []{
2145	// See http://www.sandpile.org/x86/cpuid.htm
2146
2147	// First, a basic cpuid(1) lets us check prerequisites for HSW, SKX.
2148	uint32_t eax, ebx, ecx, edx;
2149	__asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
2150	: "0"(`1`), "2"(`0`));
2151	if ((edx & (`1u`<<`25`)) && // SSE
2152	(edx & (`1u`<<`26`)) && // SSE2
2153	(ecx & (`1u`<< `0`)) && // SSE3
2154	(ecx & (`1u`<< `9`)) && // SSSE3
2155	(ecx & (`1u`<<`12`)) && // FMA (N.B. not used, avoided even)
2156	(ecx & (`1u`<<`19`)) && // SSE4.1
2157	(ecx & (`1u`<<`20`)) && // SSE4.2
2158	(ecx & (`1u`<<`26`)) && // XSAVE
2159	(ecx & (`1u`<<`27`)) && // OSXSAVE
2160	(ecx & (`1u`<<`28`)) && // AVX
2161	(ecx & (`1u`<<`29`))) { // F16C
2162
2163	// Call cpuid(7) to check for AVX2 and AVX-512 bits.
2164	__asm__ __volatile__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
2165	: "0"(`7`), "2"(`0`));
2166	// eax from xgetbv(0) will tell us whether XMM, YMM, and ZMM state is saved.
2167	uint32_t xcr0, dont_need_edx;
2168	__asm__ __volatile__("xgetbv" : "=a"(xcr0), "=d"(dont_need_edx) : "c"(`0`));
2169
2170	if ((xcr0 & (`1u`<<`1`)) && // XMM register state saved?
2171	(xcr0 & (`1u`<<`2`)) && // YMM register state saved?
2172	(ebx & (`1u`<<`5`))) { // AVX2
2173	// At this point we're at least HSW. Continue checking for SKX.
2174	if ((xcr0 & (`1u`<< `5`)) && // Opmasks state saved?
2175	(xcr0 & (`1u`<< `6`)) && // First 16 ZMM registers saved?
2176	(xcr0 & (`1u`<< `7`)) && // High 16 ZMM registers saved?
2177	(ebx & (`1u`<<`16`)) && // AVX512F
2178	(ebx & (`1u`<<`17`)) && // AVX512DQ
2179	(ebx & (`1u`<<`28`)) && // AVX512CD
2180	(ebx & (`1u`<<`30`)) && // AVX512BW
2181	(ebx & (`1u`<<`31`))) { // AVX512VL
2182	return CpuType::SKX;
2183	}
2184	return CpuType::HSW;
2185	}
2186	}
2187	return CpuType::None;
2188	}();
2189	return type;
2190	}
2191	#endif
2192
2193	#endif
2194
2195	typedef struct {
2196	Op op;
2197	const void* arg;
2198	} OpAndArg;
2199
2200	static OpAndArg select_curve_op(const skcms_Curve* curve, int channel) {
2201	static const struct { Op sRGBish, PQish, HLGish, HLGinvish, table; } ops[] = {
2202	{ Op_tf_r, Op_pq_r, Op_hlg_r, Op_hlginv_r, Op_table_r },
2203	{ Op_tf_g, Op_pq_g, Op_hlg_g, Op_hlginv_g, Op_table_g },
2204	{ Op_tf_b, Op_pq_b, Op_hlg_b, Op_hlginv_b, Op_table_b },
2205	{ Op_tf_a, Op_pq_a, Op_hlg_a, Op_hlginv_a, Op_table_a },
2206	};
2207	const auto& op = ops[channel];
2208
2209	if (curve->table_entries == `0`) {
2210	const OpAndArg noop = { Op_load_a8/doesn't matter/, nullptr };
2211
2212	const skcms_TransferFunction& tf = curve->parametric;
2213
2214	if (tf.g == `1` && tf.a == `1` &&
2215	tf.b == `0` && tf.c == `0` && tf.d == `0` && tf.e == `0` && tf.f == `0`) {
2216	return noop;
2217	}
2218
2219	switch (classify(tf)) {
2220	case Bad: return noop;
2221	case sRGBish: return OpAndArg{op.sRGBish, &tf};
2222	case PQish: return OpAndArg{op.PQish, &tf};
2223	case HLGish: return OpAndArg{op.HLGish, &tf};
2224	case HLGinvish: return OpAndArg{op.HLGinvish, &tf};
2225	}
2226	}
2227	return OpAndArg{op.table, curve};
2228	}
2229
2230	static size_t bytes_per_pixel(skcms_PixelFormat fmt) {
2231	switch (fmt >> `1`) { // ignore rgb/bgr
2232	case skcms_PixelFormat_A_8 >> `1`: return `1`;
2233	case skcms_PixelFormat_G_8 >> `1`: return `1`;
2234	case skcms_PixelFormat_RGBA_8888_Palette8 >> `1`: return `1`;
2235	case skcms_PixelFormat_ABGR_4444 >> `1`: return `2`;
2236	case skcms_PixelFormat_RGB_565 >> `1`: return `2`;
2237	case skcms_PixelFormat_RGB_888 >> `1`: return `3`;
2238	case skcms_PixelFormat_RGBA_8888 >> `1`: return `4`;
2239	case skcms_PixelFormat_RGBA_8888_sRGB >> `1`: return `4`;
2240	case skcms_PixelFormat_RGBA_1010102 >> `1`: return `4`;
2241	case skcms_PixelFormat_RGB_161616LE >> `1`: return `6`;
2242	case skcms_PixelFormat_RGBA_16161616LE >> `1`: return `8`;
2243	case skcms_PixelFormat_RGB_161616BE >> `1`: return `6`;
2244	case skcms_PixelFormat_RGBA_16161616BE >> `1`: return `8`;
2245	case skcms_PixelFormat_RGB_hhh_Norm >> `1`: return `6`;
2246	case skcms_PixelFormat_RGBA_hhhh_Norm >> `1`: return `8`;
2247	case skcms_PixelFormat_RGB_hhh >> `1`: return `6`;
2248	case skcms_PixelFormat_RGBA_hhhh >> `1`: return `8`;
2249	case skcms_PixelFormat_RGB_fff >> `1`: return `12`;
2250	case skcms_PixelFormat_RGBA_ffff >> `1`: return `16`;
2251	}
2252	assert(false);
2253	return `0`;
2254	}
2255
2256	static bool prep_for_destination(const skcms_ICCProfile* profile,
2257	skcms_Matrix3x3* fromXYZD50,
2258	skcms_TransferFunction* invR,
2259	skcms_TransferFunction* invG,
2260	skcms_TransferFunction* invB) {
2261	// We only support destinations with parametric transfer functions
2262	// and with gamuts that can be transformed from XYZD50.
2263	return profile->has_trc
2264	&& profile->has_toXYZD50
2265	&& profile->trc[`0`].table_entries == `0`
2266	&& profile->trc[`1`].table_entries == `0`
2267	&& profile->trc[`2`].table_entries == `0`
2268	&& skcms_TransferFunction_invert(&profile->trc[`0`].parametric, invR)
2269	&& skcms_TransferFunction_invert(&profile->trc[`1`].parametric, invG)
2270	&& skcms_TransferFunction_invert(&profile->trc[`2`].parametric, invB)
2271	&& skcms_Matrix3x3_invert(&profile->toXYZD50, fromXYZD50);
2272	}
2273
2274	bool skcms_Transform(const void* src,
2275	skcms_PixelFormat srcFmt,
2276	skcms_AlphaFormat srcAlpha,
2277	const skcms_ICCProfile* srcProfile,
2278	void* dst,
2279	skcms_PixelFormat dstFmt,
2280	skcms_AlphaFormat dstAlpha,
2281	const skcms_ICCProfile* dstProfile,
2282	size_t npixels) {
2283	return skcms_TransformWithPalette(src, srcFmt, srcAlpha, srcProfile,
2284	dst, dstFmt, dstAlpha, dstProfile,
2285	npixels, nullptr);
2286	}
2287
2288	bool skcms_TransformWithPalette(const void* src,
2289	skcms_PixelFormat srcFmt,
2290	skcms_AlphaFormat srcAlpha,
2291	const skcms_ICCProfile* srcProfile,
2292	void* dst,
2293	skcms_PixelFormat dstFmt,
2294	skcms_AlphaFormat dstAlpha,
2295	const skcms_ICCProfile* dstProfile,
2296	size_t nz,
2297	const void* palette) {
2298	const size_t dst_bpp = bytes_per_pixel(dstFmt),
2299	src_bpp = bytes_per_pixel(srcFmt);
2300	// Let's just refuse if the request is absurdly big.
2301	if (nz * dst_bpp > INT_MAX \|\| nz * src_bpp > INT_MAX) {
2302	return false;
2303	}
2304	int n = (int)nz;
2305
2306	// Null profiles default to sRGB. Passing null for both is handy when doing format conversion.
2307	if (!srcProfile) {
2308	srcProfile = skcms_sRGB_profile();
2309	}
2310	if (!dstProfile) {
2311	dstProfile = skcms_sRGB_profile();
2312	}
2313
2314	// We can't transform in place unless the PixelFormats are the same size.
2315	if (dst == src && dst_bpp != src_bpp) {
2316	return false;
2317	}
2318	// TODO: more careful alias rejection (like, dst == src + 1)?
2319
2320	if (needs_palette(srcFmt) && !palette) {
2321	return false;
2322	}
2323
2324	Op program [`32`];
2325	const void* arguments[`32`];
2326
2327	Op* ops = program;
2328	const void** args = arguments;
2329
2330	// These are always parametric curves of some sort.
2331	skcms_Curve dst_curves[`3`];
2332	dst_curves[`0`].table_entries =
2333	dst_curves[`1`].table_entries =
2334	dst_curves[`2`].table_entries = `0`;
2335
2336	skcms_Matrix3x3 from_xyz;
2337
2338	switch (srcFmt >> `1`) {
2339	default: return false;
2340	case skcms_PixelFormat_A_8 >> `1`: ops++ = Op_load_a8; break*;
2341	case skcms_PixelFormat_G_8 >> `1`: ops++ = Op_load_g8; break*;
2342	case skcms_PixelFormat_ABGR_4444 >> `1`: ops++ = Op_load_4444; break*;
2343	case skcms_PixelFormat_RGB_565 >> `1`: ops++ = Op_load_565; break*;
2344	case skcms_PixelFormat_RGB_888 >> `1`: ops++ = Op_load_888; break*;
2345	case skcms_PixelFormat_RGBA_8888 >> `1`: ops++ = Op_load_8888; break*;
2346	case skcms_PixelFormat_RGBA_1010102 >> `1`: ops++ = Op_load_1010102; break*;
2347	case skcms_PixelFormat_RGB_161616LE >> `1`: ops++ = Op_load_161616LE; break*;
2348	case skcms_PixelFormat_RGBA_16161616LE >> `1`: ops++ = Op_load_16161616LE; break*;
2349	case skcms_PixelFormat_RGB_161616BE >> `1`: ops++ = Op_load_161616BE; break*;
2350	case skcms_PixelFormat_RGBA_16161616BE >> `1`: ops++ = Op_load_16161616BE; break*;
2351	case skcms_PixelFormat_RGB_hhh_Norm >> `1`: ops++ = Op_load_hhh; break*;
2352	case skcms_PixelFormat_RGBA_hhhh_Norm >> `1`: ops++ = Op_load_hhhh; break*;
2353	case skcms_PixelFormat_RGB_hhh >> `1`: ops++ = Op_load_hhh; break*;
2354	case skcms_PixelFormat_RGBA_hhhh >> `1`: ops++ = Op_load_hhhh; break*;
2355	case skcms_PixelFormat_RGB_fff >> `1`: ops++ = Op_load_fff; break*;
2356	case skcms_PixelFormat_RGBA_ffff >> `1`: ops++ = Op_load_ffff; break*;
2357
2358	case skcms_PixelFormat_RGBA_8888_Palette8 >> `1`: *ops++ = Op_load_8888_palette8;
2359	*args++ = palette;
2360	break;
2361	case skcms_PixelFormat_RGBA_8888_sRGB >> `1`:
2362	*ops++ = Op_load_8888;
2363	ops++ = Op_tf_r; args++ = skcms_sRGB_TransferFunction();
2364	ops++ = Op_tf_g; args++ = skcms_sRGB_TransferFunction();
2365	ops++ = Op_tf_b; args++ = skcms_sRGB_TransferFunction();
2366	break;
2367	}
2368	if (srcFmt == skcms_PixelFormat_RGB_hhh_Norm \|\|
2369	srcFmt == skcms_PixelFormat_RGBA_hhhh_Norm) {
2370	*ops++ = Op_clamp;
2371	}
2372	if (srcFmt & `1`) {
2373	*ops++ = Op_swap_rb;
2374	}
2375	skcms_ICCProfile gray_dst_profile;
2376	if ((dstFmt >> `1`) == (skcms_PixelFormat_G_8 >> `1`)) {
2377	// When transforming to gray, stop at XYZ (by setting toXYZ to identity), then transform
2378	// luminance (Y) by the destination transfer function.
2379	gray_dst_profile = *dstProfile;
2380	skcms_SetXYZD50(&gray_dst_profile, &skcms_XYZD50_profile()->toXYZD50);
2381	dstProfile = &gray_dst_profile;
2382	}
2383
2384	if (srcProfile->data_color_space == skcms_Signature_CMYK) {
2385	// Photoshop creates CMYK images as inverse CMYK.
2386	// These happen to be the only ones we've _ever_ seen.
2387	*ops++ = Op_invert;
2388	// With CMYK, ignore the alpha type, to avoid changing K or conflating CMY with K.
2389	srcAlpha = skcms_AlphaFormat_Unpremul;
2390	}
2391
2392	if (srcAlpha == skcms_AlphaFormat_Opaque) {
2393	*ops++ = Op_force_opaque;
2394	} else if (srcAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2395	*ops++ = Op_unpremul;
2396	}
2397
2398	if (dstProfile != srcProfile) {
2399
2400	if (!prep_for_destination(dstProfile,
2401	&from_xyz,
2402	&dst_curves[`0`].parametric,
2403	&dst_curves[`1`].parametric,
2404	&dst_curves[`2`].parametric)) {
2405	return false;
2406	}
2407
2408	if (srcProfile->has_A2B) {
2409	if (srcProfile->A2B.input_channels) {
2410	for (int i = `0`; i < (int)srcProfile->A2B.input_channels; i++) {
2411	OpAndArg oa = select_curve_op(&srcProfile->A2B.input_curves[i], i);
2412	if (oa.arg) {
2413	*ops++ = oa.op;
2414	*args++ = oa.arg;
2415	}
2416	}
2417	*ops++ = Op_clamp;
2418	*ops++ = Op_clut;
2419	*args++ = &srcProfile->A2B;
2420	}
2421
2422	if (srcProfile->A2B.matrix_channels == `3`) {
2423	for (int i = `0`; i < `3`; i++) {
2424	OpAndArg oa = select_curve_op(&srcProfile->A2B.matrix_curves[i], i);
2425	if (oa.arg) {
2426	*ops++ = oa.op;
2427	*args++ = oa.arg;
2428	}
2429	}
2430
2431	static const skcms_Matrix3x4 I = {{
2432	{`1`,`0`,`0`,`0`},
2433	{`0`,`1`,`0`,`0`},
2434	{`0`,`0`,`1`,`0`},
2435	}};
2436	if (`0` != memcmp(&I, &srcProfile->A2B.matrix, sizeof(I))) {
2437	*ops++ = Op_matrix_3x4;
2438	*args++ = &srcProfile->A2B.matrix;
2439	}
2440	}
2441
2442	if (srcProfile->A2B.output_channels == `3`) {
2443	for (int i = `0`; i < `3`; i++) {
2444	OpAndArg oa = select_curve_op(&srcProfile->A2B.output_curves[i], i);
2445	if (oa.arg) {
2446	*ops++ = oa.op;
2447	*args++ = oa.arg;
2448	}
2449	}
2450	}
2451
2452	if (srcProfile->pcs == skcms_Signature_Lab) {
2453	*ops++ = Op_lab_to_xyz;
2454	}
2455
2456	} else if (srcProfile->has_trc && srcProfile->has_toXYZD50) {
2457	for (int i = `0`; i < `3`; i++) {
2458	OpAndArg oa = select_curve_op(&srcProfile->trc[i], i);
2459	if (oa.arg) {
2460	*ops++ = oa.op;
2461	*args++ = oa.arg;
2462	}
2463	}
2464	} else {
2465	return false;
2466	}
2467
2468	// A2B sources should already be in XYZD50 at this point.
2469	// Others still need to be transformed using their toXYZD50 matrix.
2470	// N.B. There are profiles that contain both A2B tags and toXYZD50 matrices.
2471	// If we use the A2B tags, we need to ignore the XYZD50 matrix entirely.
2472	assert (srcProfile->has_A2B \|\| srcProfile->has_toXYZD50);
2473	static const skcms_Matrix3x3 I = {{
2474	{ `1.0f`, `0.0f`, `0.0f` },
2475	{ `0.0f`, `1.0f`, `0.0f` },
2476	{ `0.0f`, `0.0f`, `1.0f` },
2477	}};
2478	const skcms_Matrix3x3* to_xyz = srcProfile->has_A2B ? &I : &srcProfile->toXYZD50;
2479
2480	// There's a chance the source and destination gamuts are identical,
2481	// in which case we can skip the gamut transform.
2482	if (`0` != memcmp(&dstProfile->toXYZD50, to_xyz, sizeof(skcms_Matrix3x3))) {
2483	// Concat the entire gamut transform into from_xyz,
2484	// now slightly misnamed but it's a handy spot to stash the result.
2485	from_xyz = skcms_Matrix3x3_concat(&from_xyz, to_xyz);
2486	*ops++ = Op_matrix_3x3;
2487	*args++ = &from_xyz;
2488	}
2489
2490	// Encode back to dst RGB using its parametric transfer functions.
2491	for (int i = `0`; i < `3`; i++) {
2492	OpAndArg oa = select_curve_op(dst_curves+i, i);
2493	if (oa.arg) {
2494	assert (oa.op != Op_table_r &&
2495	oa.op != Op_table_g &&
2496	oa.op != Op_table_b &&
2497	oa.op != Op_table_a);
2498	*ops++ = oa.op;
2499	*args++ = oa.arg;
2500	}
2501	}
2502	}
2503
2504	// Clamp here before premul to make sure we're clamping to normalized values _and_ gamut,
2505	// not just to values that fit in [0,1].
2506	//
2507	// E.g. r = 1.1, a = 0.5 would fit fine in fixed point after premul (ra=0.55,a=0.5),
2508	// but would be carrying r > 1, which is really unexpected for downstream consumers.
2509	if (dstFmt < skcms_PixelFormat_RGB_hhh) {
2510	*ops++ = Op_clamp;
2511	}
2512	if (dstAlpha == skcms_AlphaFormat_Opaque) {
2513	*ops++ = Op_force_opaque;
2514	} else if (dstAlpha == skcms_AlphaFormat_PremulAsEncoded) {
2515	*ops++ = Op_premul;
2516	}
2517	if (dstFmt & `1`) {
2518	*ops++ = Op_swap_rb;
2519	}
2520	switch (dstFmt >> `1`) {
2521	default: return false;
2522	case skcms_PixelFormat_A_8 >> `1`: ops++ = Op_store_a8; break*;
2523	case skcms_PixelFormat_G_8 >> `1`: ops++ = Op_store_g8; break*;
2524	case skcms_PixelFormat_ABGR_4444 >> `1`: ops++ = Op_store_4444; break*;
2525	case skcms_PixelFormat_RGB_565 >> `1`: ops++ = Op_store_565; break*;
2526	case skcms_PixelFormat_RGB_888 >> `1`: ops++ = Op_store_888; break*;
2527	case skcms_PixelFormat_RGBA_8888 >> `1`: ops++ = Op_store_8888; break*;
2528	case skcms_PixelFormat_RGBA_1010102 >> `1`: ops++ = Op_store_1010102; break*;
2529	case skcms_PixelFormat_RGB_161616LE >> `1`: ops++ = Op_store_161616LE; break*;
2530	case skcms_PixelFormat_RGBA_16161616LE >> `1`: ops++ = Op_store_16161616LE; break*;
2531	case skcms_PixelFormat_RGB_161616BE >> `1`: ops++ = Op_store_161616BE; break*;
2532	case skcms_PixelFormat_RGBA_16161616BE >> `1`: ops++ = Op_store_16161616BE; break*;
2533	case skcms_PixelFormat_RGB_hhh_Norm >> `1`: ops++ = Op_store_hhh; break*;
2534	case skcms_PixelFormat_RGBA_hhhh_Norm >> `1`: ops++ = Op_store_hhhh; break*;
2535	case skcms_PixelFormat_RGB_hhh >> `1`: ops++ = Op_store_hhh; break*;
2536	case skcms_PixelFormat_RGBA_hhhh >> `1`: ops++ = Op_store_hhhh; break*;
2537	case skcms_PixelFormat_RGB_fff >> `1`: ops++ = Op_store_fff; break*;
2538	case skcms_PixelFormat_RGBA_ffff >> `1`: ops++ = Op_store_ffff; break*;
2539
2540	case skcms_PixelFormat_RGBA_8888_sRGB >> `1`:
2541	ops++ = Op_tf_r; args++ = skcms_sRGB_Inverse_TransferFunction();
2542	ops++ = Op_tf_g; args++ = skcms_sRGB_Inverse_TransferFunction();
2543	ops++ = Op_tf_b; args++ = skcms_sRGB_Inverse_TransferFunction();
2544	*ops++ = Op_store_8888;
2545	break;
2546	}
2547
2548	auto run = baseline::run_program;
2549	#if defined(TEST_FOR_HSW)
2550	switch (cpu_type()) {
2551	case CpuType::None: break;
2552	case CpuType::HSW: run = hsw::run_program; break;
2553	case CpuType::SKX: run = hsw::run_program; break;
2554	}
2555	#endif
2556	#if defined(TEST_FOR_SKX)
2557	switch (cpu_type()) {
2558	case CpuType::None: break;
2559	case CpuType::HSW: break;
2560	case CpuType::SKX: run = skx::run_program; break;
2561	}
2562	#endif
2563	run(program, arguments, (const char)src, (char**)dst, n, src_bpp,dst_bpp);
2564	return true;
2565	}
2566
2567	static void assert_usable_as_destination(const skcms_ICCProfile* profile) {
2568	#if defined(NDEBUG)
2569	(void)profile;
2570	#else
2571	skcms_Matrix3x3 fromXYZD50;
2572	skcms_TransferFunction invR, invG, invB;
2573	assert(prep_for_destination(profile, &fromXYZD50, &invR, &invG, &invB));
2574	#endif
2575	}
2576
2577	bool skcms_MakeUsableAsDestination(skcms_ICCProfile* profile) {
2578	skcms_Matrix3x3 fromXYZD50;
2579	if (!profile->has_trc \|\| !profile->has_toXYZD50
2580	\|\| !skcms_Matrix3x3_invert(&profile->toXYZD50, &fromXYZD50)) {
2581	return false;
2582	}
2583
2584	skcms_TransferFunction tf[`3`];
2585	for (int i = `0`; i < `3`; i++) {
2586	skcms_TransferFunction inv;
2587	if (profile->trc[i].table_entries == `0`
2588	&& skcms_TransferFunction_invert(&profile->trc[i].parametric, &inv)) {
2589	tf[i] = profile->trc[i].parametric;
2590	continue;
2591	}
2592
2593	float max_error;
2594	// Parametric curves from skcms_ApproximateCurve() are guaranteed to be invertible.
2595	if (!skcms_ApproximateCurve(&profile->trc[i], &tf[i], &max_error)) {
2596	return false;
2597	}
2598	}
2599
2600	for (int i = `0`; i < `3`; ++i) {
2601	profile->trc[i].table_entries = `0`;
2602	profile->trc[i].parametric = tf[i];
2603	}
2604
2605	assert_usable_as_destination(profile);
2606	return true;
2607	}
2608
2609	bool skcms_MakeUsableAsDestinationWithSingleCurve(skcms_ICCProfile* profile) {
2610	// Operate on a copy of profile, so we can choose the best TF for the original curves
2611	skcms_ICCProfile result = *profile;
2612	if (!skcms_MakeUsableAsDestination(&result)) {
2613	return false;
2614	}
2615
2616	int best_tf = `0`;
2617	float min_max_error = INFINITY_;
2618	for (int i = `0`; i < `3`; i++) {
2619	skcms_TransferFunction inv;
2620	if (!skcms_TransferFunction_invert(&result.trc[i].parametric, &inv)) {
2621	return false;
2622	}
2623
2624	float err = `0`;
2625	for (int j = `0`; j < `3`; ++j) {
2626	err = fmaxf_(err, skcms_MaxRoundtripError(&profile->trc[j], &inv));
2627	}
2628	if (min_max_error > err) {
2629	min_max_error = err;
2630	best_tf = i;
2631	}
2632	}
2633
2634	for (int i = `0`; i < `3`; i++) {
2635	result.trc[i].parametric = result.trc[best_tf].parametric;
2636	}
2637
2638	*profile = result;
2639	assert_usable_as_destination(profile);
2640	return true;
2641	}
2642

Browse the source code of Skia/third_party/skcms/skcms.cc