1 | /**************************************************************************** |
2 | ** |
3 | ** Copyright (C) 2016 The Qt Company Ltd. |
4 | ** Contact: https://www.qt.io/licensing/ |
5 | ** |
6 | ** This file is part of the QtGui module of the Qt Toolkit. |
7 | ** |
8 | ** $QT_BEGIN_LICENSE:LGPL$ |
9 | ** Commercial License Usage |
10 | ** Licensees holding valid commercial Qt licenses may use this file in |
11 | ** accordance with the commercial license agreement provided with the |
12 | ** Software or, alternatively, in accordance with the terms contained in |
13 | ** a written agreement between you and The Qt Company. For licensing terms |
14 | ** and conditions see https://www.qt.io/terms-conditions. For further |
15 | ** information use the contact form at https://www.qt.io/contact-us. |
16 | ** |
17 | ** GNU Lesser General Public License Usage |
18 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
19 | ** General Public License version 3 as published by the Free Software |
20 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
21 | ** packaging of this file. Please review the following information to |
22 | ** ensure the GNU Lesser General Public License version 3 requirements |
23 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
24 | ** |
25 | ** GNU General Public License Usage |
26 | ** Alternatively, this file may be used under the terms of the GNU |
27 | ** General Public License version 2.0 or (at your option) the GNU General |
28 | ** Public license version 3 or any later version approved by the KDE Free |
29 | ** Qt Foundation. The licenses are as published by the Free Software |
30 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
31 | ** included in the packaging of this file. Please review the following |
32 | ** information to ensure the GNU General Public License requirements will |
33 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
34 | ** https://www.gnu.org/licenses/gpl-3.0.html. |
35 | ** |
36 | ** $QT_END_LICENSE$ |
37 | ** |
38 | ****************************************************************************/ |
39 | |
40 | #ifndef QRGBA64_P_H |
41 | #define QRGBA64_P_H |
42 | |
43 | // |
44 | // W A R N I N G |
45 | // ------------- |
46 | // |
47 | // This file is not part of the Qt API. It exists purely as an |
48 | // implementation detail. This header file may change from version to |
49 | // version without notice, or even be removed. |
50 | // |
51 | // We mean it. |
52 | // |
53 | |
54 | #include "qrgba64.h" |
55 | #include "qdrawhelper_p.h" |
56 | |
57 | #include <QtCore/private/qsimd_p.h> |
58 | #include <QtGui/private/qtguiglobal_p.h> |
59 | |
60 | QT_BEGIN_NAMESPACE |
61 | |
62 | inline QRgba64 combineAlpha256(QRgba64 rgba64, uint alpha256) |
63 | { |
64 | return QRgba64::fromRgba64(rgba64.red(), rgba64.green(), rgba64.blue(), (rgba64.alpha() * alpha256) >> 8); |
65 | } |
66 | |
67 | inline QRgba64 multiplyAlpha65535(QRgba64 rgba64, uint alpha65535) |
68 | { |
69 | return QRgba64::fromRgba64(qt_div_65535(rgba64.red() * alpha65535), |
70 | qt_div_65535(rgba64.green() * alpha65535), |
71 | qt_div_65535(rgba64.blue() * alpha65535), |
72 | qt_div_65535(rgba64.alpha() * alpha65535)); |
73 | } |
74 | |
75 | #ifdef __SSE2__ |
76 | static inline __m128i Q_DECL_VECTORCALL multiplyAlpha65535(__m128i rgba64, __m128i va) |
77 | { |
78 | __m128i vs = rgba64; |
79 | vs = _mm_unpacklo_epi16(_mm_mullo_epi16(vs, va), _mm_mulhi_epu16(vs, va)); |
80 | vs = _mm_add_epi32(vs, _mm_srli_epi32(vs, 16)); |
81 | vs = _mm_add_epi32(vs, _mm_set1_epi32(0x8000)); |
82 | vs = _mm_srai_epi32(vs, 16); |
83 | vs = _mm_packs_epi32(vs, _mm_setzero_si128()); |
84 | return vs; |
85 | } |
86 | static inline __m128i Q_DECL_VECTORCALL multiplyAlpha65535(__m128i rgba64, uint alpha65535) |
87 | { |
88 | const __m128i va = _mm_shufflelo_epi16(_mm_cvtsi32_si128(alpha65535), _MM_SHUFFLE(0, 0, 0, 0)); |
89 | return multiplyAlpha65535(rgba64, va); |
90 | } |
91 | #elif defined(__ARM_NEON__) |
92 | static inline uint16x4_t multiplyAlpha65535(uint16x4_t rgba64, uint16x4_t alpha65535) |
93 | { |
94 | uint32x4_t vs32 = vmull_u16(rgba64, alpha65535); // vs = vs * alpha |
95 | vs32 = vsraq_n_u32(vs32, vs32, 16); // vs = vs + (vs >> 16) |
96 | return vrshrn_n_u32(vs32, 16); // vs = (vs + 0x8000) >> 16 |
97 | } |
98 | static inline uint16x4_t multiplyAlpha65535(uint16x4_t rgba64, uint alpha65535) |
99 | { |
100 | uint32x4_t vs32 = vmull_n_u16(rgba64, alpha65535); // vs = vs * alpha |
101 | vs32 = vsraq_n_u32(vs32, vs32, 16); // vs = vs + (vs >> 16) |
102 | return vrshrn_n_u32(vs32, 16); // vs = (vs + 0x8000) >> 16 |
103 | } |
104 | #endif |
105 | |
106 | template<typename T> |
107 | static inline T Q_DECL_VECTORCALL multiplyAlpha255(T rgba64, uint alpha255) |
108 | { |
109 | #if defined(__SSE2__) || defined(__ARM_NEON__) |
110 | return multiplyAlpha65535(rgba64, alpha255 * 257); |
111 | #else |
112 | return QRgba64::fromRgba64(qt_div_255(rgba64.red() * alpha255), |
113 | qt_div_255(rgba64.green() * alpha255), |
114 | qt_div_255(rgba64.blue() * alpha255), |
115 | qt_div_255(rgba64.alpha() * alpha255)); |
116 | #endif |
117 | } |
118 | |
119 | inline QRgba64 interpolate255(QRgba64 x, uint alpha1, QRgba64 y, uint alpha2) |
120 | { |
121 | return QRgba64::fromRgba64(multiplyAlpha255(x, alpha1) + multiplyAlpha255(y, alpha2)); |
122 | } |
123 | |
124 | #if defined __SSE2__ |
125 | static inline __m128i Q_DECL_VECTORCALL interpolate255(__m128i x, uint alpha1, __m128i y, uint alpha2) |
126 | { |
127 | return _mm_add_epi32(multiplyAlpha255(x, alpha1), multiplyAlpha255(y, alpha2)); |
128 | } |
129 | #endif |
130 | |
131 | #if defined __ARM_NEON__ |
132 | inline uint16x4_t interpolate255(uint16x4_t x, uint alpha1, uint16x4_t y, uint alpha2) |
133 | { |
134 | return vadd_u16(multiplyAlpha255(x, alpha1), multiplyAlpha255(y, alpha2)); |
135 | } |
136 | #endif |
137 | |
138 | inline QRgba64 interpolate65535(QRgba64 x, uint alpha1, QRgba64 y, uint alpha2) |
139 | { |
140 | return QRgba64::fromRgba64(multiplyAlpha65535(x, alpha1) + multiplyAlpha65535(y, alpha2)); |
141 | } |
142 | |
143 | #if defined __SSE2__ |
144 | static inline __m128i Q_DECL_VECTORCALL interpolate65535(__m128i x, uint alpha1, __m128i y, uint alpha2) |
145 | { |
146 | return _mm_add_epi32(multiplyAlpha65535(x, alpha1), multiplyAlpha65535(y, alpha2)); |
147 | } |
148 | // alpha2 below is const-ref because otherwise MSVC2015 complains that it can't 16-byte align the argument. |
149 | static inline __m128i Q_DECL_VECTORCALL interpolate65535(__m128i x, __m128i alpha1, __m128i y, const __m128i &alpha2) |
150 | { |
151 | return _mm_add_epi32(multiplyAlpha65535(x, alpha1), multiplyAlpha65535(y, alpha2)); |
152 | } |
153 | #endif |
154 | |
155 | #if defined __ARM_NEON__ |
156 | inline uint16x4_t interpolate65535(uint16x4_t x, uint alpha1, uint16x4_t y, uint alpha2) |
157 | { |
158 | return vadd_u16(multiplyAlpha65535(x, alpha1), multiplyAlpha65535(y, alpha2)); |
159 | } |
160 | inline uint16x4_t interpolate65535(uint16x4_t x, uint16x4_t alpha1, uint16x4_t y, uint16x4_t alpha2) |
161 | { |
162 | return vadd_u16(multiplyAlpha65535(x, alpha1), multiplyAlpha65535(y, alpha2)); |
163 | } |
164 | #endif |
165 | |
166 | inline QRgba64 addWithSaturation(QRgba64 a, QRgba64 b) |
167 | { |
168 | return QRgba64::fromRgba64(qMin(a.red() + b.red(), 65535), |
169 | qMin(a.green() + b.green(), 65535), |
170 | qMin(a.blue() + b.blue(), 65535), |
171 | qMin(a.alpha() + b.alpha(), 65535)); |
172 | } |
173 | |
174 | #if QT_COMPILER_SUPPORTS_HERE(SSE2) |
175 | QT_FUNCTION_TARGET(SSE2) |
176 | static inline uint Q_DECL_VECTORCALL toArgb32(__m128i v) |
177 | { |
178 | v = _mm_unpacklo_epi16(v, _mm_setzero_si128()); |
179 | v = _mm_add_epi32(v, _mm_set1_epi32(128)); |
180 | v = _mm_sub_epi32(v, _mm_srli_epi32(v, 8)); |
181 | v = _mm_srli_epi32(v, 8); |
182 | v = _mm_packs_epi32(v, v); |
183 | v = _mm_packus_epi16(v, v); |
184 | return _mm_cvtsi128_si32(v); |
185 | } |
186 | #elif defined __ARM_NEON__ |
187 | static inline uint toArgb32(uint16x4_t v) |
188 | { |
189 | v = vsub_u16(v, vrshr_n_u16(v, 8)); |
190 | v = vrshr_n_u16(v, 8); |
191 | uint8x8_t v8 = vmovn_u16(vcombine_u16(v, v)); |
192 | return vget_lane_u32(vreinterpret_u32_u8(v8), 0); |
193 | } |
194 | #endif |
195 | |
196 | static inline uint toArgb32(QRgba64 rgba64) |
197 | { |
198 | #if defined __SSE2__ |
199 | __m128i v = _mm_loadl_epi64((const __m128i *)&rgba64); |
200 | v = _mm_shufflelo_epi16(v, _MM_SHUFFLE(3, 0, 1, 2)); |
201 | return toArgb32(v); |
202 | #elif defined __ARM_NEON__ |
203 | uint16x4_t v = vreinterpret_u16_u64(vld1_u64(reinterpret_cast<const uint64_t *>(&rgba64))); |
204 | #if Q_BYTE_ORDER == Q_LITTLE_ENDIAN |
205 | const uint8x8_t shuffleMask = { 4, 5, 2, 3, 0, 1, 6, 7 }; |
206 | v = vreinterpret_u16_u8(vtbl1_u8(vreinterpret_u8_u16(v), shuffleMask)); |
207 | #else |
208 | v = vext_u16(v, v, 3); |
209 | #endif |
210 | return toArgb32(v); |
211 | #else |
212 | return rgba64.toArgb32(); |
213 | #endif |
214 | } |
215 | |
216 | static inline uint toRgba8888(QRgba64 rgba64) |
217 | { |
218 | #if defined __SSE2__ |
219 | __m128i v = _mm_loadl_epi64((const __m128i *)&rgba64); |
220 | return toArgb32(v); |
221 | #elif defined __ARM_NEON__ |
222 | uint16x4_t v = vreinterpret_u16_u64(vld1_u64(reinterpret_cast<const uint64_t *>(&rgba64))); |
223 | return toArgb32(v); |
224 | #else |
225 | return ARGB2RGBA(toArgb32(rgba64)); |
226 | #endif |
227 | } |
228 | |
229 | static inline QRgba64 rgbBlend(QRgba64 d, QRgba64 s, uint rgbAlpha) |
230 | { |
231 | QRgba64 blend; |
232 | #if defined(__SSE2__) |
233 | __m128i vd = _mm_loadl_epi64((const __m128i *)&d); |
234 | __m128i vs = _mm_loadl_epi64((const __m128i *)&s); |
235 | __m128i va = _mm_cvtsi32_si128(rgbAlpha); |
236 | va = _mm_unpacklo_epi8(va, va); |
237 | va = _mm_shufflelo_epi16(va, _MM_SHUFFLE(3, 0, 1, 2)); |
238 | __m128i vb = _mm_xor_si128(_mm_set1_epi16(-1), va); |
239 | |
240 | vs = _mm_unpacklo_epi16(_mm_mullo_epi16(vs, va), _mm_mulhi_epu16(vs, va)); |
241 | vd = _mm_unpacklo_epi16(_mm_mullo_epi16(vd, vb), _mm_mulhi_epu16(vd, vb)); |
242 | vd = _mm_add_epi32(vd, vs); |
243 | vd = _mm_add_epi32(vd, _mm_srli_epi32(vd, 16)); |
244 | vd = _mm_add_epi32(vd, _mm_set1_epi32(0x8000)); |
245 | vd = _mm_srai_epi32(vd, 16); |
246 | vd = _mm_packs_epi32(vd, _mm_setzero_si128()); |
247 | |
248 | _mm_storel_epi64((__m128i *)&blend, vd); |
249 | #elif defined(__ARM_NEON__) |
250 | uint16x4_t vd = vreinterpret_u16_u64(vmov_n_u64(d)); |
251 | uint16x4_t vs = vreinterpret_u16_u64(vmov_n_u64(s)); |
252 | uint8x8_t va8 = vreinterpret_u8_u32(vmov_n_u32(ARGB2RGBA(rgbAlpha))); |
253 | uint16x4_t va = vreinterpret_u16_u8(vzip_u8(va8, va8).val[0]); |
254 | uint16x4_t vb = vdup_n_u16(0xffff); |
255 | vb = vsub_u16(vb, va); |
256 | |
257 | uint32x4_t vs32 = vmull_u16(vs, va); |
258 | uint32x4_t vd32 = vmull_u16(vd, vb); |
259 | vd32 = vaddq_u32(vd32, vs32); |
260 | vd32 = vsraq_n_u32(vd32, vd32, 16); |
261 | vd = vrshrn_n_u32(vd32, 16); |
262 | vst1_u64(reinterpret_cast<uint64_t *>(&blend), vreinterpret_u64_u16(vd)); |
263 | #else |
264 | const int mr = qRed(rgbAlpha); |
265 | const int mg = qGreen(rgbAlpha); |
266 | const int mb = qBlue(rgbAlpha); |
267 | blend = qRgba64(qt_div_255(s.red() * mr + d.red() * (255 - mr)), |
268 | qt_div_255(s.green() * mg + d.green() * (255 - mg)), |
269 | qt_div_255(s.blue() * mb + d.blue() * (255 - mb)), |
270 | s.alpha()); |
271 | #endif |
272 | return blend; |
273 | } |
274 | |
275 | static inline void blend_pixel(QRgba64 &dst, QRgba64 src) |
276 | { |
277 | if (src.isOpaque()) |
278 | dst = src; |
279 | else if (!src.isTransparent()) |
280 | dst = src + multiplyAlpha65535(dst, 65535 - src.alpha()); |
281 | } |
282 | |
283 | static inline void blend_pixel(QRgba64 &dst, QRgba64 src, const int const_alpha) |
284 | { |
285 | if (const_alpha == 255) |
286 | return blend_pixel(dst, src); |
287 | if (!src.isTransparent()) { |
288 | src = multiplyAlpha255(src, const_alpha); |
289 | dst = src + multiplyAlpha65535(dst, 65535 - src.alpha()); |
290 | } |
291 | } |
292 | |
293 | QT_END_NAMESPACE |
294 | |
295 | #endif // QRGBA64_P_H |
296 | |