1 | /**************************************************************************** |
2 | ** |
3 | ** Copyright (C) 2018 The Qt Company Ltd. |
4 | ** Contact: https://www.qt.io/licensing/ |
5 | ** |
6 | ** This file is part of the QtGui module of the Qt Toolkit. |
7 | ** |
8 | ** $QT_BEGIN_LICENSE:LGPL$ |
9 | ** Commercial License Usage |
10 | ** Licensees holding valid commercial Qt licenses may use this file in |
11 | ** accordance with the commercial license agreement provided with the |
12 | ** Software or, alternatively, in accordance with the terms contained in |
13 | ** a written agreement between you and The Qt Company. For licensing terms |
14 | ** and conditions see https://www.qt.io/terms-conditions. For further |
15 | ** information use the contact form at https://www.qt.io/contact-us. |
16 | ** |
17 | ** GNU Lesser General Public License Usage |
18 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
19 | ** General Public License version 3 as published by the Free Software |
20 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
21 | ** packaging of this file. Please review the following information to |
22 | ** ensure the GNU Lesser General Public License version 3 requirements |
23 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
24 | ** |
25 | ** GNU General Public License Usage |
26 | ** Alternatively, this file may be used under the terms of the GNU |
27 | ** General Public License version 2.0 or (at your option) the GNU General |
28 | ** Public license version 3 or any later version approved by the KDE Free |
29 | ** Qt Foundation. The licenses are as published by the Free Software |
30 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
31 | ** included in the packaging of this file. Please review the following |
32 | ** information to ensure the GNU General Public License requirements will |
33 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
34 | ** https://www.gnu.org/licenses/gpl-3.0.html. |
35 | ** |
36 | ** $QT_END_LICENSE$ |
37 | ** |
38 | ****************************************************************************/ |
39 | |
40 | |
41 | #include "qcolortransform.h" |
42 | #include "qcolortransform_p.h" |
43 | |
44 | #include "qcolormatrix_p.h" |
45 | #include "qcolorspace_p.h" |
46 | #include "qcolortrc_p.h" |
47 | #include "qcolortrclut_p.h" |
48 | |
49 | #include <QtCore/qatomic.h> |
50 | #include <QtCore/qmath.h> |
51 | #include <QtGui/qcolor.h> |
52 | #include <QtGui/qtransform.h> |
53 | #include <QtCore/private/qsimd_p.h> |
54 | |
55 | #include <qdebug.h> |
56 | |
57 | QT_BEGIN_NAMESPACE |
58 | |
59 | QColorTrcLut *lutFromTrc(const QColorTrc &trc) |
60 | { |
61 | if (trc.m_type == QColorTrc::Type::Table) |
62 | return QColorTrcLut::fromTransferTable(trc.m_table); |
63 | if (trc.m_type == QColorTrc::Type::Function) |
64 | return QColorTrcLut::fromTransferFunction(trc.m_fun); |
65 | qWarning() << "TRC uninitialized" ; |
66 | return nullptr; |
67 | } |
68 | |
69 | void QColorTransformPrivate::updateLutsIn() const |
70 | { |
71 | if (colorSpaceIn->lut.generated.loadAcquire()) |
72 | return; |
73 | QMutexLocker lock(&QColorSpacePrivate::s_lutWriteLock); |
74 | if (colorSpaceIn->lut.generated.loadRelaxed()) |
75 | return; |
76 | |
77 | for (int i = 0; i < 3; ++i) { |
78 | if (!colorSpaceIn->trc[i].isValid()) |
79 | return; |
80 | } |
81 | |
82 | if (colorSpaceIn->trc[0] == colorSpaceIn->trc[1] && colorSpaceIn->trc[0] == colorSpaceIn->trc[2]) { |
83 | colorSpaceIn->lut[0].reset(lutFromTrc(colorSpaceIn->trc[0])); |
84 | colorSpaceIn->lut[1] = colorSpaceIn->lut[0]; |
85 | colorSpaceIn->lut[2] = colorSpaceIn->lut[0]; |
86 | } else { |
87 | for (int i = 0; i < 3; ++i) |
88 | colorSpaceIn->lut[i].reset(lutFromTrc(colorSpaceIn->trc[i])); |
89 | } |
90 | |
91 | colorSpaceIn->lut.generated.storeRelease(1); |
92 | } |
93 | |
94 | void QColorTransformPrivate::updateLutsOut() const |
95 | { |
96 | if (colorSpaceOut->lut.generated.loadAcquire()) |
97 | return; |
98 | QMutexLocker lock(&QColorSpacePrivate::s_lutWriteLock); |
99 | if (colorSpaceOut->lut.generated.loadRelaxed()) |
100 | return; |
101 | for (int i = 0; i < 3; ++i) { |
102 | if (!colorSpaceOut->trc[i].isValid()) |
103 | return; |
104 | } |
105 | |
106 | if (colorSpaceOut->trc[0] == colorSpaceOut->trc[1] && colorSpaceOut->trc[0] == colorSpaceOut->trc[2]) { |
107 | colorSpaceOut->lut[0].reset(lutFromTrc(colorSpaceOut->trc[0])); |
108 | colorSpaceOut->lut[1] = colorSpaceOut->lut[0]; |
109 | colorSpaceOut->lut[2] = colorSpaceOut->lut[0]; |
110 | } else { |
111 | for (int i = 0; i < 3; ++i) |
112 | colorSpaceOut->lut[i].reset(lutFromTrc(colorSpaceOut->trc[i])); |
113 | } |
114 | |
115 | colorSpaceOut->lut.generated.storeRelease(1); |
116 | } |
117 | |
118 | /*! |
119 | \class QColorTransform |
120 | \brief The QColorTransform class is a transformation between color spaces. |
121 | \since 5.14 |
122 | |
123 | \ingroup painting |
124 | \ingroup appearance |
125 | \inmodule QtGui |
126 | |
127 | QColorTransform is an instantiation of a transformation between color spaces. |
128 | It can be applied on color and pixels to convert them from one color space to |
129 | another. |
130 | |
131 | Setting up a QColorTransform takes some preprocessing, so keeping around |
132 | QColorTransforms that you need often is recommended, instead of generating |
133 | them on the fly. |
134 | */ |
135 | |
136 | |
137 | QColorTransform::QColorTransform(const QColorTransform &colorTransform) noexcept = default; |
138 | |
139 | QColorTransform::~QColorTransform() = default; |
140 | |
141 | QT_DEFINE_QESDP_SPECIALIZATION_DTOR(QColorTransformPrivate) |
142 | |
143 | /*! |
144 | Applies the color transformation on the QRgb value \a argb. |
145 | |
146 | The input should be opaque or unpremultiplied. |
147 | */ |
148 | QRgb QColorTransform::map(QRgb argb) const |
149 | { |
150 | if (!d) |
151 | return argb; |
152 | constexpr float f = 1.0f / 255.0f; |
153 | QColorVector c = { qRed(argb) * f, qGreen(argb) * f, qBlue(argb) * f }; |
154 | c.x = d->colorSpaceIn->trc[0].apply(c.x); |
155 | c.y = d->colorSpaceIn->trc[1].apply(c.y); |
156 | c.z = d->colorSpaceIn->trc[2].apply(c.z); |
157 | c = d->colorMatrix.map(c); |
158 | c.x = std::max(0.0f, std::min(1.0f, c.x)); |
159 | c.y = std::max(0.0f, std::min(1.0f, c.y)); |
160 | c.z = std::max(0.0f, std::min(1.0f, c.z)); |
161 | if (d->colorSpaceOut->lut.generated.loadAcquire()) { |
162 | c.x = d->colorSpaceOut->lut[0]->fromLinear(c.x); |
163 | c.y = d->colorSpaceOut->lut[1]->fromLinear(c.y); |
164 | c.z = d->colorSpaceOut->lut[2]->fromLinear(c.z); |
165 | } else { |
166 | c.x = d->colorSpaceOut->trc[0].applyInverse(c.x); |
167 | c.y = d->colorSpaceOut->trc[1].applyInverse(c.y); |
168 | c.z = d->colorSpaceOut->trc[2].applyInverse(c.z); |
169 | } |
170 | |
171 | return qRgba(c.x * 255 + 0.5f, c.y * 255 + 0.5f, c.z * 255 + 0.5f, qAlpha(argb)); |
172 | } |
173 | |
174 | /*! |
175 | Applies the color transformation on the QRgba64 value \a rgba64. |
176 | |
177 | The input should be opaque or unpremultiplied. |
178 | */ |
179 | QRgba64 QColorTransform::map(QRgba64 rgba64) const |
180 | { |
181 | if (!d) |
182 | return rgba64; |
183 | constexpr float f = 1.0f / 65535.0f; |
184 | QColorVector c = { rgba64.red() * f, rgba64.green() * f, rgba64.blue() * f }; |
185 | c.x = d->colorSpaceIn->trc[0].apply(c.x); |
186 | c.y = d->colorSpaceIn->trc[1].apply(c.y); |
187 | c.z = d->colorSpaceIn->trc[2].apply(c.z); |
188 | c = d->colorMatrix.map(c); |
189 | c.x = std::max(0.0f, std::min(1.0f, c.x)); |
190 | c.y = std::max(0.0f, std::min(1.0f, c.y)); |
191 | c.z = std::max(0.0f, std::min(1.0f, c.z)); |
192 | if (d->colorSpaceOut->lut.generated.loadAcquire()) { |
193 | c.x = d->colorSpaceOut->lut[0]->fromLinear(c.x); |
194 | c.y = d->colorSpaceOut->lut[1]->fromLinear(c.y); |
195 | c.z = d->colorSpaceOut->lut[2]->fromLinear(c.z); |
196 | } else { |
197 | c.x = d->colorSpaceOut->trc[0].applyInverse(c.x); |
198 | c.y = d->colorSpaceOut->trc[1].applyInverse(c.y); |
199 | c.z = d->colorSpaceOut->trc[2].applyInverse(c.z); |
200 | } |
201 | |
202 | return QRgba64::fromRgba64(c.x * 65535, c.y * 65535, c.z * 65535, rgba64.alpha()); |
203 | } |
204 | |
205 | /*! |
206 | Applies the color transformation on the QColor value \a color. |
207 | |
208 | */ |
209 | QColor QColorTransform::map(const QColor &color) const |
210 | { |
211 | if (!d) |
212 | return color; |
213 | QColor clr = color; |
214 | if (color.spec() != QColor::ExtendedRgb || color.spec() != QColor::Rgb) |
215 | clr = clr.toRgb(); |
216 | |
217 | QColorVector c = { (float)clr.redF(), (float)clr.greenF(), (float)clr.blueF() }; |
218 | if (clr.spec() == QColor::ExtendedRgb) { |
219 | c.x = d->colorSpaceIn->trc[0].applyExtended(c.x); |
220 | c.y = d->colorSpaceIn->trc[1].applyExtended(c.y); |
221 | c.z = d->colorSpaceIn->trc[2].applyExtended(c.z); |
222 | } else { |
223 | c.x = d->colorSpaceIn->trc[0].apply(c.x); |
224 | c.y = d->colorSpaceIn->trc[1].apply(c.y); |
225 | c.z = d->colorSpaceIn->trc[2].apply(c.z); |
226 | } |
227 | c = d->colorMatrix.map(c); |
228 | bool inGamut = c.x >= 0.0f && c.x <= 1.0f && c.y >= 0.0f && c.y <= 1.0f && c.z >= 0.0f && c.z <= 1.0f; |
229 | if (inGamut) { |
230 | if (d->colorSpaceOut->lut.generated.loadAcquire()) { |
231 | c.x = d->colorSpaceOut->lut[0]->fromLinear(c.x); |
232 | c.y = d->colorSpaceOut->lut[1]->fromLinear(c.y); |
233 | c.z = d->colorSpaceOut->lut[2]->fromLinear(c.z); |
234 | } else { |
235 | c.x = d->colorSpaceOut->trc[0].applyInverse(c.x); |
236 | c.y = d->colorSpaceOut->trc[1].applyInverse(c.y); |
237 | c.z = d->colorSpaceOut->trc[2].applyInverse(c.z); |
238 | } |
239 | } else { |
240 | c.x = d->colorSpaceOut->trc[0].applyInverseExtended(c.x); |
241 | c.y = d->colorSpaceOut->trc[1].applyInverseExtended(c.y); |
242 | c.z = d->colorSpaceOut->trc[2].applyInverseExtended(c.z); |
243 | } |
244 | QColor out; |
245 | out.setRgbF(c.x, c.y, c.z, color.alphaF()); |
246 | return out; |
247 | } |
248 | |
249 | // Optimized sub-routines for fast block based conversion: |
250 | |
251 | static void applyMatrix(QColorVector *buffer, const qsizetype len, const QColorMatrix &colorMatrix) |
252 | { |
253 | #if defined(__SSE2__) |
254 | const __m128 minV = _mm_set1_ps(0.0f); |
255 | const __m128 maxV = _mm_set1_ps(1.0f); |
256 | const __m128 xMat = _mm_loadu_ps(&colorMatrix.r.x); |
257 | const __m128 yMat = _mm_loadu_ps(&colorMatrix.g.x); |
258 | const __m128 zMat = _mm_loadu_ps(&colorMatrix.b.x); |
259 | for (qsizetype j = 0; j < len; ++j) { |
260 | __m128 c = _mm_loadu_ps(&buffer[j].x); |
261 | __m128 cx = _mm_shuffle_ps(c, c, _MM_SHUFFLE(0, 0, 0, 0)); |
262 | __m128 cy = _mm_shuffle_ps(c, c, _MM_SHUFFLE(1, 1, 1, 1)); |
263 | __m128 cz = _mm_shuffle_ps(c, c, _MM_SHUFFLE(2, 2, 2, 2)); |
264 | cx = _mm_mul_ps(cx, xMat); |
265 | cy = _mm_mul_ps(cy, yMat); |
266 | cz = _mm_mul_ps(cz, zMat); |
267 | cx = _mm_add_ps(cx, cy); |
268 | cx = _mm_add_ps(cx, cz); |
269 | // Clamp: |
270 | cx = _mm_min_ps(cx, maxV); |
271 | cx = _mm_max_ps(cx, minV); |
272 | _mm_storeu_ps(&buffer[j].x, cx); |
273 | } |
274 | #else |
275 | for (int j = 0; j < len; ++j) { |
276 | const QColorVector cv = colorMatrix.map(buffer[j]); |
277 | buffer[j].x = std::max(0.0f, std::min(1.0f, cv.x)); |
278 | buffer[j].y = std::max(0.0f, std::min(1.0f, cv.y)); |
279 | buffer[j].z = std::max(0.0f, std::min(1.0f, cv.z)); |
280 | } |
281 | #endif |
282 | } |
283 | |
284 | template<typename T> |
285 | static void loadPremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr); |
286 | template<typename T> |
287 | static void loadUnpremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr); |
288 | |
289 | #if defined(__SSE2__) |
290 | // Load to [0-alpha] in 4x32 SIMD |
291 | template<typename T> |
292 | static inline void loadP(const T &p, __m128i &v); |
293 | |
294 | template<> |
295 | inline void loadP<QRgb>(const QRgb &p, __m128i &v) |
296 | { |
297 | v = _mm_cvtsi32_si128(p); |
298 | #if defined(__SSE4_1__) |
299 | v = _mm_cvtepu8_epi32(v); |
300 | #else |
301 | v = _mm_unpacklo_epi8(v, _mm_setzero_si128()); |
302 | v = _mm_unpacklo_epi16(v, _mm_setzero_si128()); |
303 | #endif |
304 | } |
305 | |
306 | template<> |
307 | inline void loadP<QRgba64>(const QRgba64 &p, __m128i &v) |
308 | { |
309 | v = _mm_loadl_epi64((const __m128i *)&p); |
310 | #if defined(__SSE4_1__) |
311 | v = _mm_cvtepu16_epi32(v); |
312 | #else |
313 | v = _mm_unpacklo_epi16(v, _mm_setzero_si128()); |
314 | #endif |
315 | // Shuffle to ARGB as the template below expects it |
316 | v = _mm_shuffle_epi32(v, _MM_SHUFFLE(3, 0, 1, 2)); |
317 | } |
318 | |
319 | template<typename T> |
320 | static void loadPremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr) |
321 | { |
322 | const __m128 v4080 = _mm_set1_ps(4080.f); |
323 | const __m128 iFF00 = _mm_set1_ps(1.0f / (255 * 256)); |
324 | for (qsizetype i = 0; i < len; ++i) { |
325 | __m128i v; |
326 | loadP<T>(src[i], v); |
327 | __m128 vf = _mm_cvtepi32_ps(v); |
328 | // Approximate 1/a: |
329 | __m128 va = _mm_shuffle_ps(vf, vf, _MM_SHUFFLE(3, 3, 3, 3)); |
330 | __m128 via = _mm_rcp_ps(va); |
331 | via = _mm_sub_ps(_mm_add_ps(via, via), _mm_mul_ps(via, _mm_mul_ps(via, va))); |
332 | // v * (1/a) |
333 | vf = _mm_mul_ps(vf, via); |
334 | |
335 | // Handle zero alpha |
336 | __m128 vAlphaMask = _mm_cmpeq_ps(va, _mm_set1_ps(0.0f)); |
337 | vf = _mm_andnot_ps(vAlphaMask, vf); |
338 | |
339 | // LUT |
340 | v = _mm_cvtps_epi32(_mm_mul_ps(vf, v4080)); |
341 | const int ridx = _mm_extract_epi16(v, 4); |
342 | const int gidx = _mm_extract_epi16(v, 2); |
343 | const int bidx = _mm_extract_epi16(v, 0); |
344 | v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], 0); |
345 | v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], 2); |
346 | v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], 4); |
347 | vf = _mm_mul_ps(_mm_cvtepi32_ps(v), iFF00); |
348 | |
349 | _mm_storeu_ps(&buffer[i].x, vf); |
350 | } |
351 | } |
352 | |
353 | // Load to [0-4080] in 4x32 SIMD |
354 | template<typename T> |
355 | static inline void loadPU(const T &p, __m128i &v); |
356 | |
357 | template<> |
358 | inline void loadPU<QRgb>(const QRgb &p, __m128i &v) |
359 | { |
360 | v = _mm_cvtsi32_si128(p); |
361 | #if defined(__SSE4_1__) |
362 | v = _mm_cvtepu8_epi32(v); |
363 | #else |
364 | v = _mm_unpacklo_epi8(v, _mm_setzero_si128()); |
365 | v = _mm_unpacklo_epi16(v, _mm_setzero_si128()); |
366 | #endif |
367 | v = _mm_slli_epi32(v, 4); |
368 | } |
369 | |
370 | template<> |
371 | inline void loadPU<QRgba64>(const QRgba64 &p, __m128i &v) |
372 | { |
373 | v = _mm_loadl_epi64((const __m128i *)&p); |
374 | v = _mm_sub_epi16(v, _mm_srli_epi16(v, 8)); |
375 | #if defined(__SSE4_1__) |
376 | v = _mm_cvtepu16_epi32(v); |
377 | #else |
378 | v = _mm_unpacklo_epi16(v, _mm_setzero_si128()); |
379 | #endif |
380 | v = _mm_srli_epi32(v, 4); |
381 | // Shuffle to ARGB as the template below expects it |
382 | v = _mm_shuffle_epi32(v, _MM_SHUFFLE(3, 0, 1, 2)); |
383 | } |
384 | |
385 | template<typename T> |
386 | void loadUnpremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr) |
387 | { |
388 | const __m128 iFF00 = _mm_set1_ps(1.0f / (255 * 256)); |
389 | for (qsizetype i = 0; i < len; ++i) { |
390 | __m128i v; |
391 | loadPU<T>(src[i], v); |
392 | const int ridx = _mm_extract_epi16(v, 4); |
393 | const int gidx = _mm_extract_epi16(v, 2); |
394 | const int bidx = _mm_extract_epi16(v, 0); |
395 | v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], 0); |
396 | v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], 2); |
397 | v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], 4); |
398 | __m128 vf = _mm_mul_ps(_mm_cvtepi32_ps(v), iFF00); |
399 | _mm_storeu_ps(&buffer[i].x, vf); |
400 | } |
401 | } |
402 | |
403 | #else |
404 | template<> |
405 | void loadPremultiplied<QRgb>(QColorVector *buffer, const QRgb *src, const qsizetype len, const QColorTransformPrivate *d_ptr) |
406 | { |
407 | for (qsizetype i = 0; i < len; ++i) { |
408 | const uint p = src[i]; |
409 | const int a = qAlpha(p); |
410 | if (a) { |
411 | const float ia = 4080.0f / a; |
412 | const int ridx = int(qRed(p) * ia + 0.5f); |
413 | const int gidx = int(qGreen(p) * ia + 0.5f); |
414 | const int bidx = int(qBlue(p) * ia + 0.5f); |
415 | buffer[i].x = d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx] * (1.0f / (255 * 256)); |
416 | buffer[i].y = d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx] * (1.0f / (255 * 256)); |
417 | buffer[i].z = d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx] * (1.0f / (255 * 256)); |
418 | } else { |
419 | buffer[i].x = buffer[i].y = buffer[i].z = 0.0f; |
420 | } |
421 | } |
422 | } |
423 | |
424 | template<> |
425 | void loadPremultiplied<QRgba64>(QColorVector *buffer, const QRgba64 *src, const qsizetype len, const QColorTransformPrivate *d_ptr) |
426 | { |
427 | for (qsizetype i = 0; i < len; ++i) { |
428 | const QRgba64 &p = src[i]; |
429 | const int a = p.alpha(); |
430 | if (a) { |
431 | const float ia = 4080.0f / a; |
432 | const int ridx = int(p.red() * ia + 0.5f); |
433 | const int gidx = int(p.green() * ia + 0.5f); |
434 | const int bidx = int(p.blue() * ia + 0.5f); |
435 | buffer[i].x = d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx] * (1.0f / (255 * 256)); |
436 | buffer[i].y = d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx] * (1.0f / (255 * 256)); |
437 | buffer[i].z = d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx] * (1.0f / (255 * 256)); |
438 | } else { |
439 | buffer[i].x = buffer[i].y = buffer[i].z = 0.0f; |
440 | } |
441 | } |
442 | } |
443 | |
444 | template<> |
445 | void loadUnpremultiplied<QRgb>(QColorVector *buffer, const QRgb *src, const qsizetype len, const QColorTransformPrivate *d_ptr) |
446 | { |
447 | for (qsizetype i = 0; i < len; ++i) { |
448 | const uint p = src[i]; |
449 | buffer[i].x = d_ptr->colorSpaceIn->lut[0]->u8ToLinearF32(qRed(p)); |
450 | buffer[i].y = d_ptr->colorSpaceIn->lut[1]->u8ToLinearF32(qGreen(p)); |
451 | buffer[i].z = d_ptr->colorSpaceIn->lut[2]->u8ToLinearF32(qBlue(p)); |
452 | } |
453 | } |
454 | |
455 | template<> |
456 | void loadUnpremultiplied<QRgba64>(QColorVector *buffer, const QRgba64 *src, const qsizetype len, const QColorTransformPrivate *d_ptr) |
457 | { |
458 | for (qsizetype i = 0; i < len; ++i) { |
459 | const QRgba64 &p = src[i]; |
460 | buffer[i].x = d_ptr->colorSpaceIn->lut[0]->u16ToLinearF32(p.red()); |
461 | buffer[i].y = d_ptr->colorSpaceIn->lut[1]->u16ToLinearF32(p.green()); |
462 | buffer[i].z = d_ptr->colorSpaceIn->lut[2]->u16ToLinearF32(p.blue()); |
463 | } |
464 | } |
465 | #endif |
466 | |
467 | static void storePremultiplied(QRgb *dst, const QRgb *src, const QColorVector *buffer, const qsizetype len, |
468 | const QColorTransformPrivate *d_ptr) |
469 | { |
470 | #if defined(__SSE2__) |
471 | const __m128 v4080 = _mm_set1_ps(4080.f); |
472 | const __m128 iFF00 = _mm_set1_ps(1.0f / (255 * 256)); |
473 | for (qsizetype i = 0; i < len; ++i) { |
474 | const int a = qAlpha(src[i]); |
475 | __m128 vf = _mm_loadu_ps(&buffer[i].x); |
476 | __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, v4080)); |
477 | __m128 va = _mm_set1_ps(a); |
478 | va = _mm_mul_ps(va, iFF00); |
479 | const int ridx = _mm_extract_epi16(v, 0); |
480 | const int gidx = _mm_extract_epi16(v, 2); |
481 | const int bidx = _mm_extract_epi16(v, 4); |
482 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[0]->m_fromLinear[ridx], 4); |
483 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[1]->m_fromLinear[gidx], 2); |
484 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[2]->m_fromLinear[bidx], 0); |
485 | vf = _mm_cvtepi32_ps(v); |
486 | vf = _mm_mul_ps(vf, va); |
487 | v = _mm_cvtps_epi32(vf); |
488 | v = _mm_packs_epi32(v, v); |
489 | v = _mm_insert_epi16(v, a, 3); |
490 | v = _mm_packus_epi16(v, v); |
491 | dst[i] = _mm_cvtsi128_si32(v); |
492 | } |
493 | #else |
494 | for (qsizetype i = 0; i < len; ++i) { |
495 | const int a = qAlpha(src[i]); |
496 | const float fa = a / (255.0f * 256.0f); |
497 | const float r = d_ptr->colorSpaceOut->lut[0]->m_fromLinear[int(buffer[i].x * 4080.0f + 0.5f)]; |
498 | const float g = d_ptr->colorSpaceOut->lut[1]->m_fromLinear[int(buffer[i].y * 4080.0f + 0.5f)]; |
499 | const float b = d_ptr->colorSpaceOut->lut[2]->m_fromLinear[int(buffer[i].z * 4080.0f + 0.5f)]; |
500 | dst[i] = qRgba(r * fa + 0.5f, g * fa + 0.5f, b * fa + 0.5f, a); |
501 | } |
502 | #endif |
503 | } |
504 | |
505 | static void storeUnpremultiplied(QRgb *dst, const QRgb *src, const QColorVector *buffer, const qsizetype len, |
506 | const QColorTransformPrivate *d_ptr) |
507 | { |
508 | #if defined(__SSE2__) |
509 | const __m128 v4080 = _mm_set1_ps(4080.f); |
510 | for (qsizetype i = 0; i < len; ++i) { |
511 | const int a = qAlpha(src[i]); |
512 | __m128 vf = _mm_loadu_ps(&buffer[i].x); |
513 | __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, v4080)); |
514 | const int ridx = _mm_extract_epi16(v, 0); |
515 | const int gidx = _mm_extract_epi16(v, 2); |
516 | const int bidx = _mm_extract_epi16(v, 4); |
517 | v = _mm_setzero_si128(); |
518 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[0]->m_fromLinear[ridx], 2); |
519 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[1]->m_fromLinear[gidx], 1); |
520 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[2]->m_fromLinear[bidx], 0); |
521 | v = _mm_add_epi16(v, _mm_set1_epi16(0x80)); |
522 | v = _mm_srli_epi16(v, 8); |
523 | v = _mm_insert_epi16(v, a, 3); |
524 | v = _mm_packus_epi16(v, v); |
525 | dst[i] = _mm_cvtsi128_si32(v); |
526 | } |
527 | #else |
528 | for (qsizetype i = 0; i < len; ++i) { |
529 | const int r = d_ptr->colorSpaceOut->lut[0]->u8FromLinearF32(buffer[i].x); |
530 | const int g = d_ptr->colorSpaceOut->lut[1]->u8FromLinearF32(buffer[i].y); |
531 | const int b = d_ptr->colorSpaceOut->lut[2]->u8FromLinearF32(buffer[i].z); |
532 | dst[i] = (src[i] & 0xff000000) | (r << 16) | (g << 8) | (b << 0); |
533 | } |
534 | #endif |
535 | } |
536 | |
537 | static void storeOpaque(QRgb *dst, const QRgb *src, const QColorVector *buffer, const qsizetype len, |
538 | const QColorTransformPrivate *d_ptr) |
539 | { |
540 | Q_UNUSED(src); |
541 | #if defined(__SSE2__) |
542 | const __m128 v4080 = _mm_set1_ps(4080.f); |
543 | for (qsizetype i = 0; i < len; ++i) { |
544 | __m128 vf = _mm_loadu_ps(&buffer[i].x); |
545 | __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, v4080)); |
546 | const int ridx = _mm_extract_epi16(v, 0); |
547 | const int gidx = _mm_extract_epi16(v, 2); |
548 | const int bidx = _mm_extract_epi16(v, 4); |
549 | v = _mm_setzero_si128(); |
550 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[0]->m_fromLinear[ridx], 2); |
551 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[1]->m_fromLinear[gidx], 1); |
552 | v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[2]->m_fromLinear[bidx], 0); |
553 | v = _mm_add_epi16(v, _mm_set1_epi16(0x80)); |
554 | v = _mm_srli_epi16(v, 8); |
555 | v = _mm_insert_epi16(v, 255, 3); |
556 | v = _mm_packus_epi16(v, v); |
557 | dst[i] = _mm_cvtsi128_si32(v); |
558 | } |
559 | #else |
560 | for (qsizetype i = 0; i < len; ++i) { |
561 | const int r = d_ptr->colorSpaceOut->lut[0]->u8FromLinearF32(buffer[i].x); |
562 | const int g = d_ptr->colorSpaceOut->lut[1]->u8FromLinearF32(buffer[i].y); |
563 | const int b = d_ptr->colorSpaceOut->lut[2]->u8FromLinearF32(buffer[i].z); |
564 | dst[i] = 0xff000000 | (r << 16) | (g << 8) | (b << 0); |
565 | } |
566 | #endif |
567 | } |
568 | |
569 | static void storePremultiplied(QRgba64 *dst, const QRgba64 *src, const QColorVector *buffer, const qsizetype len, |
570 | const QColorTransformPrivate *d_ptr) |
571 | { |
572 | for (qsizetype i = 0; i < len; ++i) { |
573 | const int a = src[i].alpha(); |
574 | const float fa = a / (255.0f * 256.0f); |
575 | const float r = d_ptr->colorSpaceOut->lut[0]->m_fromLinear[int(buffer[i].x * 4080.0f + 0.5f)]; |
576 | const float g = d_ptr->colorSpaceOut->lut[1]->m_fromLinear[int(buffer[i].y * 4080.0f + 0.5f)]; |
577 | const float b = d_ptr->colorSpaceOut->lut[2]->m_fromLinear[int(buffer[i].z * 4080.0f + 0.5f)]; |
578 | dst[i] = qRgba64(r * fa + 0.5f, g * fa + 0.5f, b * fa + 0.5f, a); |
579 | } |
580 | } |
581 | |
582 | static void storeUnpremultiplied(QRgba64 *dst, const QRgba64 *src, const QColorVector *buffer, const qsizetype len, |
583 | const QColorTransformPrivate *d_ptr) |
584 | { |
585 | for (qsizetype i = 0; i < len; ++i) { |
586 | const int r = d_ptr->colorSpaceOut->lut[0]->u16FromLinearF32(buffer[i].x); |
587 | const int g = d_ptr->colorSpaceOut->lut[1]->u16FromLinearF32(buffer[i].y); |
588 | const int b = d_ptr->colorSpaceOut->lut[2]->u16FromLinearF32(buffer[i].z); |
589 | dst[i] = qRgba64(r, g, b, src[i].alpha()); |
590 | } |
591 | } |
592 | |
593 | static void storeOpaque(QRgba64 *dst, const QRgba64 *src, const QColorVector *buffer, const qsizetype len, |
594 | const QColorTransformPrivate *d_ptr) |
595 | { |
596 | Q_UNUSED(src); |
597 | for (qsizetype i = 0; i < len; ++i) { |
598 | const int r = d_ptr->colorSpaceOut->lut[0]->u16FromLinearF32(buffer[i].x); |
599 | const int g = d_ptr->colorSpaceOut->lut[1]->u16FromLinearF32(buffer[i].y); |
600 | const int b = d_ptr->colorSpaceOut->lut[2]->u16FromLinearF32(buffer[i].z); |
601 | dst[i] = qRgba64(r, g, b, 0xFFFF); |
602 | } |
603 | } |
604 | |
605 | static constexpr qsizetype WorkBlockSize = 256; |
606 | |
607 | template <typename T, int Count = 1> |
608 | class QUninitialized |
609 | { |
610 | public: |
611 | operator T*() { return reinterpret_cast<T *>(this); } |
612 | private: |
613 | alignas(T) char data[sizeof(T) * Count]; |
614 | }; |
615 | |
616 | template<typename T> |
617 | void QColorTransformPrivate::apply(T *dst, const T *src, qsizetype count, TransformFlags flags) const |
618 | { |
619 | if (!colorMatrix.isValid()) |
620 | return; |
621 | |
622 | updateLutsIn(); |
623 | updateLutsOut(); |
624 | |
625 | bool doApplyMatrix = (colorMatrix != QColorMatrix::identity()); |
626 | |
627 | QUninitialized<QColorVector, WorkBlockSize> buffer; |
628 | |
629 | qsizetype i = 0; |
630 | while (i < count) { |
631 | const qsizetype len = qMin(count - i, WorkBlockSize); |
632 | if (flags & InputPremultiplied) |
633 | loadPremultiplied(buffer, src + i, len, this); |
634 | else |
635 | loadUnpremultiplied(buffer, src + i, len, this); |
636 | |
637 | if (doApplyMatrix) |
638 | applyMatrix(buffer, len, colorMatrix); |
639 | |
640 | if (flags & InputOpaque) |
641 | storeOpaque(dst + i, src + i, buffer, len, this); |
642 | else if (flags & OutputPremultiplied) |
643 | storePremultiplied(dst + i, src + i, buffer, len, this); |
644 | else |
645 | storeUnpremultiplied(dst + i, src + i, buffer, len, this); |
646 | |
647 | i += len; |
648 | } |
649 | } |
650 | |
651 | /*! |
652 | \internal |
653 | \enum QColorTransformPrivate::TransformFlag |
654 | |
655 | Defines how the transform is to be applied. |
656 | |
657 | \value Unpremultiplied The input and output should both be unpremultiplied. |
658 | \value InputOpaque The input is guaranteed to be opaque. |
659 | \value InputPremultiplied The input is premultiplied. |
660 | \value OutputPremultiplied The output should be premultiplied. |
661 | \value Premultiplied Both input and output should both be premultiplied. |
662 | */ |
663 | |
664 | /*! |
665 | \internal |
666 | Prepares a color transformation for fast application. You do not need to |
667 | call this explicitly as it will be called implicitly on the first transforms, but |
668 | if you want predictable performance on the first transforms, you can perform it |
669 | in advance. |
670 | |
671 | \sa QColorTransform::map(), apply() |
672 | */ |
673 | void QColorTransformPrivate::prepare() |
674 | { |
675 | updateLutsIn(); |
676 | updateLutsOut(); |
677 | } |
678 | |
679 | /*! |
680 | \internal |
681 | Applies the color transformation on \a count QRgb pixels starting from |
682 | \a src and stores the result in \a dst. |
683 | |
684 | Thread-safe if prepare() has been called first. |
685 | |
686 | Assumes unpremultiplied data by default. Set \a flags to change defaults. |
687 | |
688 | \sa prepare() |
689 | */ |
690 | void QColorTransformPrivate::apply(QRgb *dst, const QRgb *src, qsizetype count, TransformFlags flags) const |
691 | { |
692 | apply<QRgb>(dst, src, count, flags); |
693 | } |
694 | |
695 | /*! |
696 | \internal |
697 | Applies the color transformation on \a count QRgba64 pixels starting from |
698 | \a src and stores the result in \a dst. |
699 | |
700 | Thread-safe if prepare() has been called first. |
701 | |
702 | Assumes unpremultiplied data by default. Set \a flags to change defaults. |
703 | |
704 | \sa prepare() |
705 | */ |
706 | void QColorTransformPrivate::apply(QRgba64 *dst, const QRgba64 *src, qsizetype count, TransformFlags flags) const |
707 | { |
708 | apply<QRgba64>(dst, src, count, flags); |
709 | } |
710 | |
711 | |
712 | QT_END_NAMESPACE |
713 | |