qrgba64_p.h source code [Qt/src/gui/painting/qrgba64_p.h]

1	/****************************************************************************
2	**
3	** Copyright (C) 2016 The Qt Company Ltd.
4	** Contact: https://www.qt.io/licensing/
5	**
6	** This file is part of the QtGui module of the Qt Toolkit.
7	**
8	** $QT_BEGIN_LICENSE:LGPL$
9	** Commercial License Usage
10	** Licensees holding valid commercial Qt licenses may use this file in
11	** accordance with the commercial license agreement provided with the
12	** Software or, alternatively, in accordance with the terms contained in
13	** a written agreement between you and The Qt Company. For licensing terms
14	** and conditions see https://www.qt.io/terms-conditions. For further
15	** information use the contact form at https://www.qt.io/contact-us.
16	**
17	** GNU Lesser General Public License Usage
18	** Alternatively, this file may be used under the terms of the GNU Lesser
19	** General Public License version 3 as published by the Free Software
20	** Foundation and appearing in the file LICENSE.LGPL3 included in the
21	** packaging of this file. Please review the following information to
22	** ensure the GNU Lesser General Public License version 3 requirements
23	** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24	**
25	** GNU General Public License Usage
26	** Alternatively, this file may be used under the terms of the GNU
27	** General Public License version 2.0 or (at your option) the GNU General
28	** Public license version 3 or any later version approved by the KDE Free
29	** Qt Foundation. The licenses are as published by the Free Software
30	** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31	** included in the packaging of this file. Please review the following
32	** information to ensure the GNU General Public License requirements will
33	** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34	** https://www.gnu.org/licenses/gpl-3.0.html.
35	**
36	** $QT_END_LICENSE$
37	**
38	****************************************************************************/
39
40	#ifndef QRGBA64_P_H
41	#define QRGBA64_P_H
42
43	//
44	// W A R N I N G
45	// -------------
46	//
47	// This file is not part of the Qt API. It exists purely as an
48	// implementation detail. This header file may change from version to
49	// version without notice, or even be removed.
50	//
51	// We mean it.
52	//
53
54	#include "qrgba64.h"
55	#include "qdrawhelper_p.h"
56
57	#include <QtCore/private/qsimd_p.h>
58	#include <QtGui/private/qtguiglobal_p.h>
59
60	QT_BEGIN_NAMESPACE
61
62	inline QRgba64 combineAlpha256(QRgba64 rgba64, uint alpha256)
63	{
64	return QRgba64::fromRgba64(rgba64.red(), rgba64.green(), rgba64.blue(), (rgba64.alpha() * alpha256) >> `8`);
65	}
66
67	inline QRgba64 multiplyAlpha65535(QRgba64 rgba64, uint alpha65535)
68	{
69	return QRgba64::fromRgba64(qt_div_65535(rgba64.red() * alpha65535),
70	qt_div_65535(rgba64.green() * alpha65535),
71	qt_div_65535(rgba64.blue() * alpha65535),
72	qt_div_65535(rgba64.alpha() * alpha65535));
73	}
74
75	#ifdef __SSE2__
76	static inline __m128i Q_DECL_VECTORCALL multiplyAlpha65535(__m128i rgba64, __m128i va)
77	{
78	__m128i vs = rgba64;
79	vs = _mm_unpacklo_epi16(_mm_mullo_epi16(vs, va), _mm_mulhi_epu16(vs, va));
80	vs = _mm_add_epi32(vs, _mm_srli_epi32(vs, `16`));
81	vs = _mm_add_epi32(vs, _mm_set1_epi32(`0x8000`));
82	vs = _mm_srai_epi32(vs, `16`);
83	vs = _mm_packs_epi32(vs, _mm_setzero_si128());
84	return vs;
85	}
86	static inline __m128i Q_DECL_VECTORCALL multiplyAlpha65535(__m128i rgba64, uint alpha65535)
87	{
88	const __m128i va = _mm_shufflelo_epi16(_mm_cvtsi32_si128(alpha65535), _MM_SHUFFLE(`0`, `0`, `0`, `0`));
89	return multiplyAlpha65535(rgba64, va);
90	}
91	#elif defined(__ARM_NEON__)
92	static inline uint16x4_t multiplyAlpha65535(uint16x4_t rgba64, uint16x4_t alpha65535)
93	{
94	uint32x4_t vs32 = vmull_u16(rgba64, alpha65535); // vs = vs alpha*
95	vs32 = vsraq_n_u32(vs32, vs32, `16`); // vs = vs + (vs >> 16)
96	return vrshrn_n_u32(vs32, `16`); // vs = (vs + 0x8000) >> 16
97	}
98	static inline uint16x4_t multiplyAlpha65535(uint16x4_t rgba64, uint alpha65535)
99	{
100	uint32x4_t vs32 = vmull_n_u16(rgba64, alpha65535); // vs = vs alpha*
101	vs32 = vsraq_n_u32(vs32, vs32, `16`); // vs = vs + (vs >> 16)
102	return vrshrn_n_u32(vs32, `16`); // vs = (vs + 0x8000) >> 16
103	}
104	#endif
105
106	template<typename T>
107	static inline T Q_DECL_VECTORCALL multiplyAlpha255(T rgba64, uint alpha255)
108	{
109	#if defined(__SSE2__) \|\| defined(__ARM_NEON__)
110	return multiplyAlpha65535(rgba64, alpha255 * `257`);
111	#else
112	return QRgba64::fromRgba64(qt_div_255(rgba64.red() * alpha255),
113	qt_div_255(rgba64.green() * alpha255),
114	qt_div_255(rgba64.blue() * alpha255),
115	qt_div_255(rgba64.alpha() * alpha255));
116	#endif
117	}
118
119	inline QRgba64 interpolate255(QRgba64 x, uint alpha1, QRgba64 y, uint alpha2)
120	{
121	return QRgba64::fromRgba64(multiplyAlpha255(x, alpha1) + multiplyAlpha255(y, alpha2));
122	}
123
124	#if defined __SSE2__
125	static inline __m128i Q_DECL_VECTORCALL interpolate255(__m128i x, uint alpha1, __m128i y, uint alpha2)
126	{
127	return _mm_add_epi32(multiplyAlpha255(x, alpha1), multiplyAlpha255(y, alpha2));
128	}
129	#endif
130
131	#if defined __ARM_NEON__
132	inline uint16x4_t interpolate255(uint16x4_t x, uint alpha1, uint16x4_t y, uint alpha2)
133	{
134	return vadd_u16(multiplyAlpha255(x, alpha1), multiplyAlpha255(y, alpha2));
135	}
136	#endif
137
138	inline QRgba64 interpolate65535(QRgba64 x, uint alpha1, QRgba64 y, uint alpha2)
139	{
140	return QRgba64::fromRgba64(multiplyAlpha65535(x, alpha1) + multiplyAlpha65535(y, alpha2));
141	}
142
143	#if defined __SSE2__
144	static inline __m128i Q_DECL_VECTORCALL interpolate65535(__m128i x, uint alpha1, __m128i y, uint alpha2)
145	{
146	return _mm_add_epi32(multiplyAlpha65535(x, alpha1), multiplyAlpha65535(y, alpha2));
147	}
148	// alpha2 below is const-ref because otherwise MSVC2015 complains that it can't 16-byte align the argument.
149	static inline __m128i Q_DECL_VECTORCALL interpolate65535(__m128i x, __m128i alpha1, __m128i y, const __m128i &alpha2)
150	{
151	return _mm_add_epi32(multiplyAlpha65535(x, alpha1), multiplyAlpha65535(y, alpha2));
152	}
153	#endif
154
155	#if defined __ARM_NEON__
156	inline uint16x4_t interpolate65535(uint16x4_t x, uint alpha1, uint16x4_t y, uint alpha2)
157	{
158	return vadd_u16(multiplyAlpha65535(x, alpha1), multiplyAlpha65535(y, alpha2));
159	}
160	inline uint16x4_t interpolate65535(uint16x4_t x, uint16x4_t alpha1, uint16x4_t y, uint16x4_t alpha2)
161	{
162	return vadd_u16(multiplyAlpha65535(x, alpha1), multiplyAlpha65535(y, alpha2));
163	}
164	#endif
165
166	inline QRgba64 addWithSaturation(QRgba64 a, QRgba64 b)
167	{
168	return QRgba64::fromRgba64(qMin(a.red() + b.red(), `65535`),
169	qMin(a.green() + b.green(), `65535`),
170	qMin(a.blue() + b.blue(), `65535`),
171	qMin(a.alpha() + b.alpha(), `65535`));
172	}
173
174	#if QT_COMPILER_SUPPORTS_HERE(SSE2)
175	QT_FUNCTION_TARGET(SSE2)
176	static inline uint Q_DECL_VECTORCALL toArgb32(__m128i v)
177	{
178	v = _mm_unpacklo_epi16(v, _mm_setzero_si128());
179	v = _mm_add_epi32(v, _mm_set1_epi32(`128`));
180	v = _mm_sub_epi32(v, _mm_srli_epi32(v, `8`));
181	v = _mm_srli_epi32(v, `8`);
182	v = _mm_packs_epi32(v, v);
183	v = _mm_packus_epi16(v, v);
184	return _mm_cvtsi128_si32(v);
185	}
186	#elif defined __ARM_NEON__
187	static inline uint toArgb32(uint16x4_t v)
188	{
189	v = vsub_u16(v, vrshr_n_u16(v, `8`));
190	v = vrshr_n_u16(v, `8`);
191	uint8x8_t v8 = vmovn_u16(vcombine_u16(v, v));
192	return vget_lane_u32(vreinterpret_u32_u8(v8), `0`);
193	}
194	#endif
195
196	static inline uint toArgb32(QRgba64 rgba64)
197	{
198	#if defined __SSE2__
199	__m128i v = _mm_loadl_epi64((const __m128i *)&rgba64);
200	v = _mm_shufflelo_epi16(v, _MM_SHUFFLE(`3`, `0`, `1`, `2`));
201	return toArgb32(v);
202	#elif defined __ARM_NEON__
203	uint16x4_t v = vreinterpret_u16_u64(vld1_u64(reinterpret_cast<const uint64_t *>(&rgba64)));
204	#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
205	const uint8x8_t shuffleMask = { `4`, `5`, `2`, `3`, `0`, `1`, `6`, `7` };
206	v = vreinterpret_u16_u8(vtbl1_u8(vreinterpret_u8_u16(v), shuffleMask));
207	#else
208	v = vext_u16(v, v, `3`);
209	#endif
210	return toArgb32(v);
211	#else
212	return rgba64.toArgb32();
213	#endif
214	}
215
216	static inline uint toRgba8888(QRgba64 rgba64)
217	{
218	#if defined __SSE2__
219	__m128i v = _mm_loadl_epi64((const __m128i *)&rgba64);
220	return toArgb32(v);
221	#elif defined __ARM_NEON__
222	uint16x4_t v = vreinterpret_u16_u64(vld1_u64(reinterpret_cast<const uint64_t *>(&rgba64)));
223	return toArgb32(v);
224	#else
225	return ARGB2RGBA(toArgb32(rgba64));
226	#endif
227	}
228
229	static inline QRgba64 rgbBlend(QRgba64 d, QRgba64 s, uint rgbAlpha)
230	{
231	QRgba64 blend;
232	#if defined(__SSE2__)
233	__m128i vd = _mm_loadl_epi64((const __m128i *)&d);
234	__m128i vs = _mm_loadl_epi64((const __m128i *)&s);
235	__m128i va = _mm_cvtsi32_si128(rgbAlpha);
236	va = _mm_unpacklo_epi8(va, va);
237	va = _mm_shufflelo_epi16(va, _MM_SHUFFLE(`3`, `0`, `1`, `2`));
238	__m128i vb = _mm_xor_si128(_mm_set1_epi16(-`1`), va);
239
240	vs = _mm_unpacklo_epi16(_mm_mullo_epi16(vs, va), _mm_mulhi_epu16(vs, va));
241	vd = _mm_unpacklo_epi16(_mm_mullo_epi16(vd, vb), _mm_mulhi_epu16(vd, vb));
242	vd = _mm_add_epi32(vd, vs);
243	vd = _mm_add_epi32(vd, _mm_srli_epi32(vd, `16`));
244	vd = _mm_add_epi32(vd, _mm_set1_epi32(`0x8000`));
245	vd = _mm_srai_epi32(vd, `16`);
246	vd = _mm_packs_epi32(vd, _mm_setzero_si128());
247
248	_mm_storel_epi64((__m128i *)&blend, vd);
249	#elif defined(__ARM_NEON__)
250	uint16x4_t vd = vreinterpret_u16_u64(vmov_n_u64(d));
251	uint16x4_t vs = vreinterpret_u16_u64(vmov_n_u64(s));
252	uint8x8_t va8 = vreinterpret_u8_u32(vmov_n_u32(ARGB2RGBA(rgbAlpha)));
253	uint16x4_t va = vreinterpret_u16_u8(vzip_u8(va8, va8).val[`0`]);
254	uint16x4_t vb = vdup_n_u16(`0xffff`);
255	vb = vsub_u16(vb, va);
256
257	uint32x4_t vs32 = vmull_u16(vs, va);
258	uint32x4_t vd32 = vmull_u16(vd, vb);
259	vd32 = vaddq_u32(vd32, vs32);
260	vd32 = vsraq_n_u32(vd32, vd32, `16`);
261	vd = vrshrn_n_u32(vd32, `16`);
262	vst1_u64(reinterpret_cast<uint64_t *>(&blend), vreinterpret_u64_u16(vd));
263	#else
264	const int mr = qRed(rgbAlpha);
265	const int mg = qGreen(rgbAlpha);
266	const int mb = qBlue(rgbAlpha);
267	blend = qRgba64(qt_div_255(s.red() * mr + d.red() * (`255` - mr)),
268	qt_div_255(s.green() * mg + d.green() * (`255` - mg)),
269	qt_div_255(s.blue() * mb + d.blue() * (`255` - mb)),
270	s.alpha());
271	#endif
272	return blend;
273	}
274
275	static inline void blend_pixel(QRgba64 &dst, QRgba64 src)
276	{
277	if (src.isOpaque())
278	dst = src;
279	else if (!src.isTransparent())
280	dst = src + multiplyAlpha65535(dst, `65535` - src.alpha());
281	}
282
283	static inline void blend_pixel(QRgba64 &dst, QRgba64 src, const int const_alpha)
284	{
285	if (const_alpha == `255`)
286	return blend_pixel(dst, src);
287	if (!src.isTransparent()) {
288	src = multiplyAlpha255(src, const_alpha);
289	dst = src + multiplyAlpha65535(dst, `65535` - src.alpha());
290	}
291	}
292
293	QT_END_NAMESPACE
294
295	#endif // QRGBA64_P_H
296

Browse the source code of Qt/src/gui/painting/qrgba64_p.h