qdrawhelper.cpp source code [Qt/src/gui/painting/qdrawhelper.cpp]

1	/****************************************************************************
2	**
3	** Copyright (C) 2018 The Qt Company Ltd.
4	** Copyright (C) 2018 Intel Corporation.
5	** Contact: https://www.qt.io/licensing/
6	**
7	** This file is part of the QtGui module of the Qt Toolkit.
8	**
9	** $QT_BEGIN_LICENSE:LGPL$
10	** Commercial License Usage
11	** Licensees holding valid commercial Qt licenses may use this file in
12	** accordance with the commercial license agreement provided with the
13	** Software or, alternatively, in accordance with the terms contained in
14	** a written agreement between you and The Qt Company. For licensing terms
15	** and conditions see https://www.qt.io/terms-conditions. For further
16	** information use the contact form at https://www.qt.io/contact-us.
17	**
18	** GNU Lesser General Public License Usage
19	** Alternatively, this file may be used under the terms of the GNU Lesser
20	** General Public License version 3 as published by the Free Software
21	** Foundation and appearing in the file LICENSE.LGPL3 included in the
22	** packaging of this file. Please review the following information to
23	** ensure the GNU Lesser General Public License version 3 requirements
24	** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
25	**
26	** GNU General Public License Usage
27	** Alternatively, this file may be used under the terms of the GNU
28	** General Public License version 2.0 or (at your option) the GNU General
29	** Public license version 3 or any later version approved by the KDE Free
30	** Qt Foundation. The licenses are as published by the Free Software
31	** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
32	** included in the packaging of this file. Please review the following
33	** information to ensure the GNU General Public License requirements will
34	** be met: https://www.gnu.org/licenses/gpl-2.0.html and
35	** https://www.gnu.org/licenses/gpl-3.0.html.
36	**
37	** $QT_END_LICENSE$
38	**
39	****************************************************************************/
40
41	#include <qglobal.h>
42
43	#include <qstylehints.h>
44	#include <qguiapplication.h>
45	#include <qatomic.h>
46	#include <private/qcolortrclut_p.h>
47	#include <private/qdrawhelper_p.h>
48	#include <private/qdrawhelper_x86_p.h>
49	#include <private/qdrawingprimitive_sse2_p.h>
50	#include <private/qdrawhelper_neon_p.h>
51	#if defined(QT_COMPILER_SUPPORTS_MIPS_DSP) \|\| defined(QT_COMPILER_SUPPORTS_MIPS_DSPR2)
52	#include <private/qdrawhelper_mips_dsp_p.h>
53	#endif
54	#include <private/qguiapplication_p.h>
55	#include <private/qpaintengine_raster_p.h>
56	#include <private/qpainter_p.h>
57	#include <private/qpixellayout_p.h>
58	#include <private/qrgba64_p.h>
59	#include <qendian.h>
60	#include <qloggingcategory.h>
61	#include <qmath.h>
62
63	QT_BEGIN_NAMESPACE
64
65	Q_LOGGING_CATEGORY(lcQtGuiDrawHelper, "qt.gui.drawhelper")
66
67	#define MASK(src, a) src = BYTE_MUL(src, a)
68
69	/*
70	constants and structures
71	*/
72
73	enum {
74	fixed_scale = `1` << `16`,
75	half_point = `1` << `15`
76	};
77
78	#if QT_CONFIG(raster_64bit)
79	static void convertRGBA64ToRGBA64PM(QRgba64 buffer, int* count)
80	{
81	for (int i = `0`; i < count; ++i)
82	buffer[i] = buffer[i].premultiplied();
83	}
84
85	static void convertRGBA64PMToRGBA64PM(QRgba64 , int*)
86	{
87	}
88	#endif
89
90	/*
91	Destination fetch. This is simple as we don't have to do bounds checks or
92	transformations
93	*/
94
95	static uint * QT_FASTCALL destFetchMono(uint buffer, QRasterBuffer rasterBuffer, int x, int y, int length)
96	{
97	uchar Q_DECL_RESTRICT data = (uchar )rasterBuffer->scanLine(y);
98	uint *start = buffer;
99	const uint *end = buffer + length;
100	while (buffer < end) {
101	*buffer = data[x>>`3`] & (`0x80` >> (x & `7`)) ? rasterBuffer->destColor1 : rasterBuffer->destColor0;
102	++buffer;
103	++x;
104	}
105	return start;
106	}
107
108	static uint * QT_FASTCALL destFetchMonoLsb(uint buffer, QRasterBuffer rasterBuffer, int x, int y, int length)
109	{
110	uchar Q_DECL_RESTRICT data = (uchar )rasterBuffer->scanLine(y);
111	uint *start = buffer;
112	const uint *end = buffer + length;
113	while (buffer < end) {
114	*buffer = data[x>>`3`] & (`0x1` << (x & `7`)) ? rasterBuffer->destColor1 : rasterBuffer->destColor0;
115	++buffer;
116	++x;
117	}
118	return start;
119	}
120
121	static uint * QT_FASTCALL destFetchARGB32P(uint , QRasterBuffer rasterBuffer, int x, int y, int)
122	{
123	return (uint *)rasterBuffer->scanLine(y) + x;
124	}
125
126	static uint * QT_FASTCALL destFetchRGB16(uint buffer, QRasterBuffer rasterBuffer, int x, int y, int length)
127	{
128	const ushort Q_DECL_RESTRICT data = (const* ushort *)rasterBuffer->scanLine(y) + x;
129	for (int i = `0`; i < length; ++i)
130	buffer[i] = qConvertRgb16To32(data[i]);
131	return buffer;
132	}
133
134	static uint QT_FASTCALL destFetch(uint buffer, QRasterBuffer rasterBuffer, int* x, int y, int length)
135	{
136	const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format];
137	return const_cast<uint >(layout->fetchToARGB32PM(buffer, rasterBuffer->scanLine(y), x, length, nullptr, nullptr*));
138	}
139
140	static uint QT_FASTCALL destFetchUndefined(uint buffer, QRasterBuffer , int, int, int*)
141	{
142	return buffer;
143	}
144
145	static DestFetchProc destFetchProc[QImage::NImageFormats] =
146	{
147	nullptr, // Format_Invalid
148	destFetchMono, // Format_Mono,
149	destFetchMonoLsb, // Format_MonoLSB
150	nullptr, // Format_Indexed8
151	destFetchARGB32P, // Format_RGB32
152	destFetch, // Format_ARGB32,
153	destFetchARGB32P, // Format_ARGB32_Premultiplied
154	destFetchRGB16, // Format_RGB16
155	destFetch, // Format_ARGB8565_Premultiplied
156	destFetch, // Format_RGB666
157	destFetch, // Format_ARGB6666_Premultiplied
158	destFetch, // Format_RGB555
159	destFetch, // Format_ARGB8555_Premultiplied
160	destFetch, // Format_RGB888
161	destFetch, // Format_RGB444
162	destFetch, // Format_ARGB4444_Premultiplied
163	destFetch, // Format_RGBX8888
164	destFetch, // Format_RGBA8888
165	destFetch, // Format_RGBA8888_Premultiplied
166	destFetch, // Format_BGR30
167	destFetch, // Format_A2BGR30_Premultiplied
168	destFetch, // Format_RGB30
169	destFetch, // Format_A2RGB30_Premultiplied
170	destFetch, // Format_Alpha8
171	destFetch, // Format_Grayscale8
172	destFetch, // Format_RGBX64
173	destFetch, // Format_RGBA64
174	destFetch, // Format_RGBA64_Premultiplied
175	destFetch, // Format_Grayscale16
176	destFetch, // Format_BGR888
177	};
178
179	#if QT_CONFIG(raster_64bit)
180	static QRgba64 QT_FASTCALL destFetch64(QRgba64 buffer, QRasterBuffer rasterBuffer, int* x, int y, int length)
181	{
182	const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format];
183	return const_cast<QRgba64 >(layout->fetchToRGBA64PM(buffer, rasterBuffer->scanLine(y), x, length, nullptr, nullptr*));
184	}
185
186	static QRgba64 * QT_FASTCALL destFetchRGB64(QRgba64 , QRasterBuffer rasterBuffer, int x, int y, int)
187	{
188	return (QRgba64 *)rasterBuffer->scanLine(y) + x;
189	}
190
191	static QRgba64 * QT_FASTCALL destFetch64Undefined(QRgba64 buffer, QRasterBuffer , int, int, int)
192	{
193	return buffer;
194	}
195
196	static DestFetchProc64 destFetchProc64[QImage::NImageFormats] =
197	{
198	nullptr, // Format_Invalid
199	nullptr, // Format_Mono,
200	nullptr, // Format_MonoLSB
201	nullptr, // Format_Indexed8
202	destFetch64, // Format_RGB32
203	destFetch64, // Format_ARGB32,
204	destFetch64, // Format_ARGB32_Premultiplied
205	destFetch64, // Format_RGB16
206	destFetch64, // Format_ARGB8565_Premultiplied
207	destFetch64, // Format_RGB666
208	destFetch64, // Format_ARGB6666_Premultiplied
209	destFetch64, // Format_RGB555
210	destFetch64, // Format_ARGB8555_Premultiplied
211	destFetch64, // Format_RGB888
212	destFetch64, // Format_RGB444
213	destFetch64, // Format_ARGB4444_Premultiplied
214	destFetch64, // Format_RGBX8888
215	destFetch64, // Format_RGBA8888
216	destFetch64, // Format_RGBA8888_Premultiplied
217	destFetch64, // Format_BGR30
218	destFetch64, // Format_A2BGR30_Premultiplied
219	destFetch64, // Format_RGB30
220	destFetch64, // Format_A2RGB30_Premultiplied
221	destFetch64, // Format_Alpha8
222	destFetch64, // Format_Grayscale8
223	destFetchRGB64, // Format_RGBX64
224	destFetch64, // Format_RGBA64
225	destFetchRGB64, // Format_RGBA64_Premultiplied
226	destFetch64, // Format_Grayscale16
227	destFetch64, // Format_BGR888
228	};
229	#endif
230
231	/*
232	Returns the color in the mono destination color table
233	that is the "nearest" to /color/.
234	*/
235	static inline QRgb findNearestColor(QRgb color, QRasterBuffer *rbuf)
236	{
237	QRgb color_0 = qPremultiply(rbuf->destColor0);
238	QRgb color_1 = qPremultiply(rbuf->destColor1);
239	color = qPremultiply(color);
240
241	int r = qRed(color);
242	int g = qGreen(color);
243	int b = qBlue(color);
244	int rx, gx, bx;
245	int dist_0, dist_1;
246
247	rx = r - qRed(color_0);
248	gx = g - qGreen(color_0);
249	bx = b - qBlue(color_0);
250	dist_0 = rxrx + gxgx + bx*bx;
251
252	rx = r - qRed(color_1);
253	gx = g - qGreen(color_1);
254	bx = b - qBlue(color_1);
255	dist_1 = rxrx + gxgx + bx*bx;
256
257	if (dist_0 < dist_1)
258	return color_0;
259	return color_1;
260	}
261
262	/*
263	Destination store.
264	*/
265
266	static void QT_FASTCALL destStoreMono(QRasterBuffer rasterBuffer, int* x, int y, const uint buffer, int* length)
267	{
268	uchar Q_DECL_RESTRICT data = (uchar )rasterBuffer->scanLine(y);
269	if (rasterBuffer->monoDestinationWithClut) {
270	for (int i = `0`; i < length; ++i) {
271	if (buffer[i] == rasterBuffer->destColor0) {
272	data[x >> `3`] &= ~(`0x80` >> (x & `7`));
273	} else if (buffer[i] == rasterBuffer->destColor1) {
274	data[x >> `3`] \|= `0x80` >> (x & `7`);
275	} else if (findNearestColor(buffer[i], rasterBuffer) == rasterBuffer->destColor0) {
276	data[x >> `3`] &= ~(`0x80` >> (x & `7`));
277	} else {
278	data[x >> `3`] \|= `0x80` >> (x & `7`);
279	}
280	++x;
281	}
282	} else {
283	for (int i = `0`; i < length; ++i) {
284	if (qGray(buffer[i]) < int(qt_bayer_matrix[y & `15`][x & `15`]))
285	data[x >> `3`] \|= `0x80` >> (x & `7`);
286	else
287	data[x >> `3`] &= ~(`0x80` >> (x & `7`));
288	++x;
289	}
290	}
291	}
292
293	static void QT_FASTCALL destStoreMonoLsb(QRasterBuffer rasterBuffer, int* x, int y, const uint buffer, int* length)
294	{
295	uchar Q_DECL_RESTRICT data = (uchar )rasterBuffer->scanLine(y);
296	if (rasterBuffer->monoDestinationWithClut) {
297	for (int i = `0`; i < length; ++i) {
298	if (buffer[i] == rasterBuffer->destColor0) {
299	data[x >> `3`] &= ~(`1` << (x & `7`));
300	} else if (buffer[i] == rasterBuffer->destColor1) {
301	data[x >> `3`] \|= `1` << (x & `7`);
302	} else if (findNearestColor(buffer[i], rasterBuffer) == rasterBuffer->destColor0) {
303	data[x >> `3`] &= ~(`1` << (x & `7`));
304	} else {
305	data[x >> `3`] \|= `1` << (x & `7`);
306	}
307	++x;
308	}
309	} else {
310	for (int i = `0`; i < length; ++i) {
311	if (qGray(buffer[i]) < int(qt_bayer_matrix[y & `15`][x & `15`]))
312	data[x >> `3`] \|= `1` << (x & `7`);
313	else
314	data[x >> `3`] &= ~(`1` << (x & `7`));
315	++x;
316	}
317	}
318	}
319
320	static void QT_FASTCALL destStoreRGB16(QRasterBuffer rasterBuffer, int* x, int y, const uint buffer, int* length)
321	{
322	quint16 data = (quint16)rasterBuffer->scanLine(y) + x;
323	for (int i = `0`; i < length; ++i)
324	data[i] = qConvertRgb32To16(buffer[i]);
325	}
326
327	static void QT_FASTCALL destStore(QRasterBuffer rasterBuffer, int* x, int y, const uint buffer, int* length)
328	{
329	const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format];
330	ConvertAndStorePixelsFunc store = layout->storeFromARGB32PM;
331	if (!layout->premultiplied && !layout->hasAlphaChannel)
332	store = layout->storeFromRGB32;
333	uchar *dest = rasterBuffer->scanLine(y);
334	store(dest, buffer, x, length, nullptr, nullptr);
335	}
336
337	static DestStoreProc destStoreProc[QImage::NImageFormats] =
338	{
339	nullptr, // Format_Invalid
340	destStoreMono, // Format_Mono,
341	destStoreMonoLsb, // Format_MonoLSB
342	nullptr, // Format_Indexed8
343	nullptr, // Format_RGB32
344	destStore, // Format_ARGB32,
345	nullptr, // Format_ARGB32_Premultiplied
346	destStoreRGB16, // Format_RGB16
347	destStore, // Format_ARGB8565_Premultiplied
348	destStore, // Format_RGB666
349	destStore, // Format_ARGB6666_Premultiplied
350	destStore, // Format_RGB555
351	destStore, // Format_ARGB8555_Premultiplied
352	destStore, // Format_RGB888
353	destStore, // Format_RGB444
354	destStore, // Format_ARGB4444_Premultiplied
355	destStore, // Format_RGBX8888
356	destStore, // Format_RGBA8888
357	destStore, // Format_RGBA8888_Premultiplied
358	destStore, // Format_BGR30
359	destStore, // Format_A2BGR30_Premultiplied
360	destStore, // Format_RGB30
361	destStore, // Format_A2RGB30_Premultiplied
362	destStore, // Format_Alpha8
363	destStore, // Format_Grayscale8
364	destStore, // Format_RGBX64
365	destStore, // Format_RGBA64
366	destStore, // Format_RGBA64_Premultiplied
367	destStore, // Format_Grayscale16
368	destStore, // Format_BGR888
369	};
370
371	#if QT_CONFIG(raster_64bit)
372	static void QT_FASTCALL destStore64(QRasterBuffer rasterBuffer, int* x, int y, const QRgba64 buffer, int* length)
373	{
374	auto store = qStoreFromRGBA64PM[rasterBuffer->format];
375	uchar *dest = rasterBuffer->scanLine(y);
376	store(dest, buffer, x, length, nullptr, nullptr);
377	}
378
379	static void QT_FASTCALL destStore64RGBA64(QRasterBuffer rasterBuffer, int* x, int y, const QRgba64 buffer, int* length)
380	{
381	QRgba64 dest = reinterpret_cast<QRgba64>(rasterBuffer->scanLine(y)) + x;
382	for (int i = `0`; i < length; ++i) {
383	dest[i] = buffer[i].unpremultiplied();
384	}
385	}
386
387	static DestStoreProc64 destStoreProc64[QImage::NImageFormats] =
388	{
389	nullptr, // Format_Invalid
390	nullptr, // Format_Mono,
391	nullptr, // Format_MonoLSB
392	nullptr, // Format_Indexed8
393	destStore64, // Format_RGB32
394	destStore64, // Format_ARGB32,
395	destStore64, // Format_ARGB32_Premultiplied
396	destStore64, // Format_RGB16
397	destStore64, // Format_ARGB8565_Premultiplied
398	destStore64, // Format_RGB666
399	destStore64, // Format_ARGB6666_Premultiplied
400	destStore64, // Format_RGB555
401	destStore64, // Format_ARGB8555_Premultiplied
402	destStore64, // Format_RGB888
403	destStore64, // Format_RGB444
404	destStore64, // Format_ARGB4444_Premultiplied
405	destStore64, // Format_RGBX8888
406	destStore64, // Format_RGBA8888
407	destStore64, // Format_RGBA8888_Premultiplied
408	destStore64, // Format_BGR30
409	destStore64, // Format_A2BGR30_Premultiplied
410	destStore64, // Format_RGB30
411	destStore64, // Format_A2RGB30_Premultiplied
412	destStore64, // Format_Alpha8
413	destStore64, // Format_Grayscale8
414	nullptr, // Format_RGBX64
415	destStore64RGBA64, // Format_RGBA64
416	nullptr, // Format_RGBA64_Premultiplied
417	destStore64, // Format_Grayscale16
418	destStore64, // Format_BGR888
419	};
420	#endif
421
422	/*
423	Source fetches
424
425	This is a bit more complicated, as we need several fetch routines for every surface type
426
427	We need 5 fetch methods per surface type:
428	untransformed
429	transformed (tiled and not tiled)
430	transformed bilinear (tiled and not tiled)
431
432	We don't need bounds checks for untransformed, but we need them for the other ones.
433
434	The generic implementation does pixel by pixel fetches
435	*/
436
437	enum TextureBlendType {
438	BlendUntransformed,
439	BlendTiled,
440	BlendTransformed,
441	BlendTransformedTiled,
442	BlendTransformedBilinear,
443	BlendTransformedBilinearTiled,
444	NBlendTypes
445	};
446
447	static const uint QT_FASTCALL fetchUntransformed(uint buffer, const Operator *,
448	const QSpanData data, int* y, int x, int length)
449	{
450	const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
451	return layout->fetchToARGB32PM(buffer, data->texture.scanLine(y), x, length, data->texture.colorTable, nullptr);
452	}
453
454	static const uint QT_FASTCALL fetchUntransformedARGB32PM(uint , const Operator *,
455	const QSpanData data, int* y, int x, int)
456	{
457	const uchar *scanLine = data->texture.scanLine(y);
458	return reinterpret_cast<const uint *>(scanLine) + x;
459	}
460
461	static const uint QT_FASTCALL fetchUntransformedRGB16(uint buffer, const Operator *,
462	const QSpanData data, int* y, int x,
463	int length)
464	{
465	const quint16 scanLine = (const* quint16 *)data->texture.scanLine(y) + x;
466	for (int i = `0`; i < length; ++i)
467	buffer[i] = qConvertRgb16To32(scanLine[i]);
468	return buffer;
469	}
470
471	#if QT_CONFIG(raster_64bit)
472	static const QRgba64 QT_FASTCALL fetchUntransformed64(QRgba64 buffer, const Operator *,
473	const QSpanData data, int* y, int x, int length)
474	{
475	const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
476	return layout->fetchToRGBA64PM(buffer, data->texture.scanLine(y), x, length, data->texture.colorTable, nullptr);
477	}
478
479	static const QRgba64 QT_FASTCALL fetchUntransformedRGBA64PM(QRgba64 , const Operator *,
480	const QSpanData data, int* y, int x, int)
481	{
482	const uchar *scanLine = data->texture.scanLine(y);
483	return reinterpret_cast<const QRgba64 *>(scanLine) + x;
484	}
485	#endif
486
487	template<TextureBlendType blendType>
488	inline void fetchTransformed_pixelBounds(int max, int l1, int l2, int &v)
489	{
490	static_assert(blendType == BlendTransformed \|\| blendType == BlendTransformedTiled);
491	if (blendType == BlendTransformedTiled) {
492	if (v < `0` \|\| v >= max) {
493	v %= max;
494	if (v < `0`) v += max;
495	}
496	} else {
497	v = qBound(l1, v, l2);
498	}
499	}
500
501	static inline bool canUseFastMatrixPath(const qreal cx, const qreal cy, const qsizetype length, const QSpanData *data)
502	{
503	if (Q_UNLIKELY(!data->fast_matrix))
504	return false;
505
506	qreal fx = (data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale;
507	qreal fy = (data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale;
508	qreal minc = std::min(fx, fy);
509	qreal maxc = std::max(fx, fy);
510	fx += std::trunc(data->m11 * fixed_scale) * length;
511	fy += std::trunc(data->m12 * fixed_scale) * length;
512	minc = std::min(minc, std::min(fx, fy));
513	maxc = std::max(maxc, std::max(fx, fy));
514
515	return minc >= std::numeric_limits<int>::min() && maxc <= std::numeric_limits<int>::max();
516	}
517
518	template<TextureBlendType blendType, QPixelLayout::BPP bpp, typename T>
519	static void QT_FASTCALL fetchTransformed_fetcher(T buffer, const* QSpanData *data,
520	int y, int x, int length)
521	{
522	static_assert(blendType == BlendTransformed \|\| blendType == BlendTransformedTiled);
523	const QTextureData &image = data->texture;
524
525	const qreal cx = x + qreal(`0.5`);
526	const qreal cy = y + qreal(`0.5`);
527
528	constexpr bool useFetch = (bpp < QPixelLayout::BPP32) && sizeof(T) == sizeof(uint);
529	const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
530	if (!useFetch)
531	Q_ASSERT(layout->bpp == bpp);
532	// When templated 'fetch' should be inlined at compile time:
533	const FetchPixelFunc fetch = (bpp == QPixelLayout::BPPNone) ? qFetchPixelTable[layout->bpp] : FetchPixelFunc(qFetchPixel<bpp>);
534
535	if (canUseFastMatrixPath(cx, cy, length, data)) {
536	// The increment pr x in the scanline
537	int fdx = (int)(data->m11 * fixed_scale);
538	int fdy = (int)(data->m12 * fixed_scale);
539
540	int fx = int((data->m21 * cy
541	+ data->m11 * cx + data->dx) * fixed_scale);
542	int fy = int((data->m22 * cy
543	+ data->m12 * cx + data->dy) * fixed_scale);
544
545	if (fdy == `0`) { // simple scale, no rotation or shear
546	int py = (fy >> `16`);
547	fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - `1`, py);
548	const uchar *src = image.scanLine(py);
549
550	int i = `0`;
551	if (blendType == BlendTransformed) {
552	int fastLen = length;
553	if (fdx > `0`)
554	fastLen = qMin(fastLen, int((qint64(image.x2 - `1`) * fixed_scale - fx) / fdx));
555	else if (fdx < `0`)
556	fastLen = qMin(fastLen, int((qint64(image.x1) * fixed_scale - fx) / fdx));
557
558	for (; i < fastLen; ++i) {
559	int x1 = (fx >> `16`);
560	int x2 = x1;
561	fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - `1`, x1);
562	if (x1 == x2)
563	break;
564	if (useFetch)
565	buffer[i] = fetch(src, x1);
566	else
567	buffer[i] = reinterpret_cast<const T*>(src)[x1];
568	fx += fdx;
569	}
570
571	for (; i < fastLen; ++i) {
572	int px = (fx >> `16`);
573	if (useFetch)
574	buffer[i] = fetch(src, px);
575	else
576	buffer[i] = reinterpret_cast<const T*>(src)[px];
577	fx += fdx;
578	}
579	}
580
581	for (; i < length; ++i) {
582	int px = (fx >> `16`);
583	fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - `1`, px);
584	if (useFetch)
585	buffer[i] = fetch(src, px);
586	else
587	buffer[i] = reinterpret_cast<const T*>(src)[px];
588	fx += fdx;
589	}
590	} else { // rotation or shear
591	int i = `0`;
592	if (blendType == BlendTransformed) {
593	int fastLen = length;
594	if (fdx > `0`)
595	fastLen = qMin(fastLen, int((qint64(image.x2 - `1`) * fixed_scale - fx) / fdx));
596	else if (fdx < `0`)
597	fastLen = qMin(fastLen, int((qint64(image.x1) * fixed_scale - fx) / fdx));
598	if (fdy > `0`)
599	fastLen = qMin(fastLen, int((qint64(image.y2 - `1`) * fixed_scale - fy) / fdy));
600	else if (fdy < `0`)
601	fastLen = qMin(fastLen, int((qint64(image.y1) * fixed_scale - fy) / fdy));
602
603	for (; i < fastLen; ++i) {
604	int x1 = (fx >> `16`);
605	int y1 = (fy >> `16`);
606	int x2 = x1;
607	int y2 = y1;
608	fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - `1`, x1);
609	fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - `1`, y1);
610	if (x1 == x2 && y1 == y2)
611	break;
612	if (useFetch)
613	buffer[i] = fetch(image.scanLine(y1), x1);
614	else
615	buffer[i] = reinterpret_cast<const T*>(image.scanLine(y1))[x1];
616	fx += fdx;
617	fy += fdy;
618	}
619
620	for (; i < fastLen; ++i) {
621	int px = (fx >> `16`);
622	int py = (fy >> `16`);
623	if (useFetch)
624	buffer[i] = fetch(image.scanLine(py), px);
625	else
626	buffer[i] = reinterpret_cast<const T*>(image.scanLine(py))[px];
627	fx += fdx;
628	fy += fdy;
629	}
630	}
631
632	for (; i < length; ++i) {
633	int px = (fx >> `16`);
634	int py = (fy >> `16`);
635	fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - `1`, px);
636	fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - `1`, py);
637	if (useFetch)
638	buffer[i] = fetch(image.scanLine(py), px);
639	else
640	buffer[i] = reinterpret_cast<const T*>(image.scanLine(py))[px];
641	fx += fdx;
642	fy += fdy;
643	}
644	}
645	} else {
646	const qreal fdx = data->m11;
647	const qreal fdy = data->m12;
648	const qreal fdw = data->m13;
649
650	qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
651	qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
652	qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
653
654	T *const end = buffer + length;
655	T *b = buffer;
656	while (b < end) {
657	const qreal iw = fw == `0` ? `1` : `1` / fw;
658	const qreal tx = fx * iw;
659	const qreal ty = fy * iw;
660	int px = qFloor(tx);
661	int py = qFloor(ty);
662
663	fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - `1`, py);
664	fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - `1`, px);
665	if (useFetch)
666	*b = fetch(image.scanLine(py), px);
667	else
668	b = reinterpret_cast<const* T*>(image.scanLine(py))[px];
669
670	fx += fdx;
671	fy += fdy;
672	fw += fdw;
673	//force increment to avoid /0
674	if (!fw) {
675	fw += fdw;
676	}
677	++b;
678	}
679	}
680	}
681
682	template<TextureBlendType blendType, QPixelLayout::BPP bpp>
683	static const uint QT_FASTCALL fetchTransformed(uint buffer, const Operator , const* QSpanData *data,
684	int y, int x, int length)
685	{
686	static_assert(blendType == BlendTransformed \|\| blendType == BlendTransformedTiled);
687	const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
688	fetchTransformed_fetcher<blendType, bpp, uint>(buffer, data, y, x, length);
689	layout->convertToARGB32PM(buffer, length, data->texture.colorTable);
690	return buffer;
691	}
692
693	#if QT_CONFIG(raster_64bit)
694	template<TextureBlendType blendType> / either BlendTransformed or BlendTransformedTiled /
695	static const QRgba64 QT_FASTCALL fetchTransformed64(QRgba64 buffer, const Operator , const* QSpanData *data,
696	int y, int x, int length)
697	{
698	const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
699	if (layout->bpp != QPixelLayout::BPP64) {
700	uint buffer32[BufferSize];
701	Q_ASSERT(length <= BufferSize);
702	if (layout->bpp == QPixelLayout::BPP32)
703	fetchTransformed_fetcher<blendType, QPixelLayout::BPP32, uint>(buffer32, data, y, x, length);
704	else
705	fetchTransformed_fetcher<blendType, QPixelLayout::BPPNone, uint>(buffer32, data, y, x, length);
706	return layout->convertToRGBA64PM(buffer, buffer32, length, data->texture.colorTable, nullptr);
707	}
708
709	fetchTransformed_fetcher<blendType, QPixelLayout::BPP64, QRgba64>(buffer, data, y, x, length);
710	if (data->texture.format == QImage::Format_RGBA64)
711	convertRGBA64ToRGBA64PM(buffer, length);
712	return buffer;
713	}
714	#endif
715
716	/* \internal*
717	interpolate 4 argb pixels with the distx and disty factor.
718	distx and disty must be between 0 and 16
719	*/
720	static inline uint interpolate_4_pixels_16(uint tl, uint tr, uint bl, uint br, uint distx, uint disty)
721	{
722	uint distxy = distx * disty;
723	//idistx disty = (16-distx) * disty = 16disty - distxy
724	//idistx idisty = (16-distx) * (16-disty) = 1616 - 16distx -16disty + distxy
725	uint tlrb = (tl & `0x00ff00ff`) * (`16``16` - `16`distx - `16`*disty + distxy);
726	uint tlag = ((tl & `0xff00ff00`) >> `8`) * (`16``16` - `16`distx - `16`*disty + distxy);
727	uint trrb = ((tr & `0x00ff00ff`) * (distx*`16` - distxy));
728	uint trag = (((tr & `0xff00ff00`) >> `8`) * (distx*`16` - distxy));
729	uint blrb = ((bl & `0x00ff00ff`) * (disty*`16` - distxy));
730	uint blag = (((bl & `0xff00ff00`) >> `8`) * (disty*`16` - distxy));
731	uint brrb = ((br & `0x00ff00ff`) * (distxy));
732	uint brag = (((br & `0xff00ff00`) >> `8`) * (distxy));
733	return (((tlrb + trrb + blrb + brrb) >> `8`) & `0x00ff00ff`) \| ((tlag + trag + blag + brag) & `0xff00ff00`);
734	}
735
736	#if defined(__SSE2__)
737	#define interpolate_4_pixels_16_sse2(tl, tr, bl, br, distx, disty, colorMask, v_256, b) \
738	{ \
739	const __m128i dxdy = _mm_mullo_epi16 (distx, disty); \
740	const __m128i distx_ = _mm_slli_epi16(distx, 4); \
741	const __m128i disty_ = _mm_slli_epi16(disty, 4); \
742	const __m128i idxidy = _mm_add_epi16(dxdy, _mm_sub_epi16(v_256, _mm_add_epi16(distx_, disty_))); \
743	const __m128i dxidy = _mm_sub_epi16(distx_, dxdy); \
744	const __m128i idxdy = _mm_sub_epi16(disty_, dxdy); \
745	\
746	__m128i tlAG = _mm_srli_epi16(tl, 8); \
747	__m128i tlRB = _mm_and_si128(tl, colorMask); \
748	__m128i trAG = _mm_srli_epi16(tr, 8); \
749	__m128i trRB = _mm_and_si128(tr, colorMask); \
750	__m128i blAG = _mm_srli_epi16(bl, 8); \
751	__m128i blRB = _mm_and_si128(bl, colorMask); \
752	__m128i brAG = _mm_srli_epi16(br, 8); \
753	__m128i brRB = _mm_and_si128(br, colorMask); \
754	\
755	tlAG = _mm_mullo_epi16(tlAG, idxidy); \
756	tlRB = _mm_mullo_epi16(tlRB, idxidy); \
757	trAG = _mm_mullo_epi16(trAG, dxidy); \
758	trRB = _mm_mullo_epi16(trRB, dxidy); \
759	blAG = _mm_mullo_epi16(blAG, idxdy); \
760	blRB = _mm_mullo_epi16(blRB, idxdy); \
761	brAG = _mm_mullo_epi16(brAG, dxdy); \
762	brRB = _mm_mullo_epi16(brRB, dxdy); \
763	\
764	/* Add the values, and shift to only keep 8 significant bits per colors */ \
765	__m128i rAG =_mm_add_epi16(_mm_add_epi16(tlAG, trAG), _mm_add_epi16(blAG, brAG)); \
766	__m128i rRB =_mm_add_epi16(_mm_add_epi16(tlRB, trRB), _mm_add_epi16(blRB, brRB)); \
767	rAG = _mm_andnot_si128(colorMask, rAG); \
768	rRB = _mm_srli_epi16(rRB, 8); \
769	_mm_storeu_si128((__m128i*)(b), _mm_or_si128(rAG, rRB)); \
770	}
771	#endif
772
773	#if defined(__ARM_NEON__)
774	#define interpolate_4_pixels_16_neon(tl, tr, bl, br, distx, disty, disty_, colorMask, invColorMask, v_256, b) \
775	{ \
776	const int16x8_t dxdy = vmulq_s16(distx, disty); \
777	const int16x8_t distx_ = vshlq_n_s16(distx, 4); \
778	const int16x8_t idxidy = vaddq_s16(dxdy, vsubq_s16(v_256, vaddq_s16(distx_, disty_))); \
779	const int16x8_t dxidy = vsubq_s16(distx_, dxdy); \
780	const int16x8_t idxdy = vsubq_s16(disty_, dxdy); \
781	\
782	int16x8_t tlAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(tl), 8)); \
783	int16x8_t tlRB = vandq_s16(tl, colorMask); \
784	int16x8_t trAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(tr), 8)); \
785	int16x8_t trRB = vandq_s16(tr, colorMask); \
786	int16x8_t blAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(bl), 8)); \
787	int16x8_t blRB = vandq_s16(bl, colorMask); \
788	int16x8_t brAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(br), 8)); \
789	int16x8_t brRB = vandq_s16(br, colorMask); \
790	\
791	int16x8_t rAG = vmulq_s16(tlAG, idxidy); \
792	int16x8_t rRB = vmulq_s16(tlRB, idxidy); \
793	rAG = vmlaq_s16(rAG, trAG, dxidy); \
794	rRB = vmlaq_s16(rRB, trRB, dxidy); \
795	rAG = vmlaq_s16(rAG, blAG, idxdy); \
796	rRB = vmlaq_s16(rRB, blRB, idxdy); \
797	rAG = vmlaq_s16(rAG, brAG, dxdy); \
798	rRB = vmlaq_s16(rRB, brRB, dxdy); \
799	\
800	rAG = vandq_s16(invColorMask, rAG); \
801	rRB = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rRB), 8)); \
802	vst1q_s16((int16_t*)(b), vorrq_s16(rAG, rRB)); \
803	}
804	#endif
805
806	template<TextureBlendType blendType>
807	void fetchTransformedBilinear_pixelBounds(int max, int l1, int l2, int &v1, int &v2);
808
809	template<>
810	inline void fetchTransformedBilinear_pixelBounds<BlendTransformedBilinearTiled>(int max, int, int, int &v1, int &v2)
811	{
812	v1 %= max;
813	if (v1 < `0`)
814	v1 += max;
815	v2 = v1 + `1`;
816	if (v2 == max)
817	v2 = `0`;
818	Q_ASSERT(v1 >= `0` && v1 < max);
819	Q_ASSERT(v2 >= `0` && v2 < max);
820	}
821
822	template<>
823	inline void fetchTransformedBilinear_pixelBounds<BlendTransformedBilinear>(int, int l1, int l2, int &v1, int &v2)
824	{
825	if (v1 < l1)
826	v2 = v1 = l1;
827	else if (v1 >= l2)
828	v2 = v1 = l2;
829	else
830	v2 = v1 + `1`;
831	Q_ASSERT(v1 >= l1 && v1 <= l2);
832	Q_ASSERT(v2 >= l1 && v2 <= l2);
833	}
834
835	enum FastTransformTypes {
836	SimpleScaleTransform,
837	UpscaleTransform,
838	DownscaleTransform,
839	RotateTransform,
840	FastRotateTransform,
841	NFastTransformTypes
842	};
843
844	// Completes the partial interpolation stored in IntermediateBuffer.
845	// by performing the x-axis interpolation and joining the RB and AG buffers.
846	static void QT_FASTCALL intermediate_adder(uint b, uint end, const IntermediateBuffer &intermediate, int offset, int &fx, int fdx)
847	{
848	#if defined(QT_COMPILER_SUPPORTS_AVX2)
849	extern void QT_FASTCALL intermediate_adder_avx2(uint b, uint end, const IntermediateBuffer &intermediate, int offset, int &fx, int fdx);
850	if (qCpuHasFeature(ArchHaswell))
851	return intermediate_adder_avx2(b, end, intermediate, offset, fx, fdx);
852	#endif
853
854	// Switch to intermediate buffer coordinates
855	fx -= offset * fixed_scale;
856
857	while (b < end) {
858	const int x = (fx >> `16`);
859
860	const uint distx = (fx & `0x0000ffff`) >> `8`;
861	const uint idistx = `256` - distx;
862	const uint rb = (intermediate.buffer_rb[x] * idistx + intermediate.buffer_rb[x + `1`] * distx) & `0xff00ff00`;
863	const uint ag = (intermediate.buffer_ag[x] * idistx + intermediate.buffer_ag[x + `1`] * distx) & `0xff00ff00`;
864	*b = (rb >> `8`) \| ag;
865	b++;
866	fx += fdx;
867	}
868	fx += offset * fixed_scale;
869	}
870
871	typedef void (QT_FASTCALL BilinearFastTransformHelper)(uint b, uint end, const* QTextureData &image, int &fx, int &fy, int fdx, int fdy);
872
873	template<TextureBlendType blendType>
874	static void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_scale_helper(uint b, uint end, const QTextureData &image,
875	int &fx, int &fy, int fdx, int /fdy/)
876	{
877	int y1 = (fy >> `16`);
878	int y2;
879	fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - `1`, y1, y2);
880	const uint s1 = (const* uint *)image.scanLine(y1);
881	const uint s2 = (const* uint *)image.scanLine(y2);
882
883	const int disty = (fy & `0x0000ffff`) >> `8`;
884	const int idisty = `256` - disty;
885	const int length = end - b;
886
887	// The intermediate buffer is generated in the positive direction
888	const int adjust = (fdx < `0`) ? fdx * length : `0`;
889	const int offset = (fx + adjust) >> `16`;
890	int x = offset;
891
892	IntermediateBuffer intermediate;
893	// count is the size used in the intermediate.buffer.
894	int count = (qint64(length) * qAbs(fdx) + fixed_scale - `1`) / fixed_scale + `2`;
895	// length is supposed to be <= BufferSize either because data->m11 < 1 or
896	// data->m11 < 2, and any larger buffers split
897	Q_ASSERT(count <= BufferSize + `2`);
898	int f = `0`;
899	int lim = count;
900	if (blendType == BlendTransformedBilinearTiled) {
901	x %= image.width;
902	if (x < `0`) x += image.width;
903	} else {
904	lim = qMin(count, image.x2 - x);
905	if (x < image.x1) {
906	Q_ASSERT(x < image.x2);
907	uint t = s1[image.x1];
908	uint b = s2[image.x1];
909	quint32 rb = (((t & `0xff00ff`) * idisty + (b & `0xff00ff`) * disty) >> `8`) & `0xff00ff`;
910	quint32 ag = ((((t>>`8`) & `0xff00ff`) * idisty + ((b>>`8`) & `0xff00ff`) * disty) >> `8`) & `0xff00ff`;
911	do {
912	intermediate.buffer_rb[f] = rb;
913	intermediate.buffer_ag[f] = ag;
914	f++;
915	x++;
916	} while (x < image.x1 && f < lim);
917	}
918	}
919
920	if (blendType != BlendTransformedBilinearTiled) {
921	#if defined(__SSE2__)
922	const __m128i disty_ = _mm_set1_epi16(disty);
923	const __m128i idisty_ = _mm_set1_epi16(idisty);
924	const __m128i colorMask = _mm_set1_epi32(`0x00ff00ff`);
925
926	lim -= `3`;
927	for (; f < lim; x += `4`, f += `4`) {
928	// Load 4 pixels from s1, and split the alpha-green and red-blue component
929	__m128i top = _mm_loadu_si128((const __m128i)((const* uint *)(s1)+x));
930	__m128i topAG = _mm_srli_epi16(top, `8`);
931	__m128i topRB = _mm_and_si128(top, colorMask);
932	// Multiplies each color component by idisty
933	topAG = _mm_mullo_epi16 (topAG, idisty_);
934	topRB = _mm_mullo_epi16 (topRB, idisty_);
935
936	// Same for the s2 vector
937	__m128i bottom = _mm_loadu_si128((const __m128i)((const* uint *)(s2)+x));
938	__m128i bottomAG = _mm_srli_epi16(bottom, `8`);
939	__m128i bottomRB = _mm_and_si128(bottom, colorMask);
940	bottomAG = _mm_mullo_epi16 (bottomAG, disty_);
941	bottomRB = _mm_mullo_epi16 (bottomRB, disty_);
942
943	// Add the values, and shift to only keep 8 significant bits per colors
944	__m128i rAG =_mm_add_epi16(topAG, bottomAG);
945	rAG = _mm_srli_epi16(rAG, `8`);
946	_mm_storeu_si128((__m128i*)(&intermediate.buffer_ag[f]), rAG);
947	__m128i rRB =_mm_add_epi16(topRB, bottomRB);
948	rRB = _mm_srli_epi16(rRB, `8`);
949	_mm_storeu_si128((__m128i*)(&intermediate.buffer_rb[f]), rRB);
950	}
951	#elif defined(__ARM_NEON__)
952	const int16x8_t disty_ = vdupq_n_s16(disty);
953	const int16x8_t idisty_ = vdupq_n_s16(idisty);
954	const int16x8_t colorMask = vdupq_n_s16(`0x00ff`);
955
956	lim -= `3`;
957	for (; f < lim; x += `4`, f += `4`) {
958	// Load 4 pixels from s1, and split the alpha-green and red-blue component
959	int16x8_t top = vld1q_s16((int16_t)((const* uint *)(s1)+x));
960	int16x8_t topAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(top), `8`));
961	int16x8_t topRB = vandq_s16(top, colorMask);
962	// Multiplies each color component by idisty
963	topAG = vmulq_s16(topAG, idisty_);
964	topRB = vmulq_s16(topRB, idisty_);
965
966	// Same for the s2 vector
967	int16x8_t bottom = vld1q_s16((int16_t)((const* uint *)(s2)+x));
968	int16x8_t bottomAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(bottom), `8`));
969	int16x8_t bottomRB = vandq_s16(bottom, colorMask);
970	bottomAG = vmulq_s16(bottomAG, disty_);
971	bottomRB = vmulq_s16(bottomRB, disty_);
972
973	// Add the values, and shift to only keep 8 significant bits per colors
974	int16x8_t rAG = vaddq_s16(topAG, bottomAG);
975	rAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rAG), `8`));
976	vst1q_s16((int16_t*)(&intermediate.buffer_ag[f]), rAG);
977	int16x8_t rRB = vaddq_s16(topRB, bottomRB);
978	rRB = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rRB), `8`));
979	vst1q_s16((int16_t*)(&intermediate.buffer_rb[f]), rRB);
980	}
981	#endif
982	}
983	for (; f < count; f++) { // Same as above but without simd
984	if (blendType == BlendTransformedBilinearTiled) {
985	if (x >= image.width) x -= image.width;
986	} else {
987	x = qMin(x, image.x2 - `1`);
988	}
989
990	uint t = s1[x];
991	uint b = s2[x];
992
993	intermediate.buffer_rb[f] = (((t & `0xff00ff`) * idisty + (b & `0xff00ff`) * disty) >> `8`) & `0xff00ff`;
994	intermediate.buffer_ag[f] = ((((t>>`8`) & `0xff00ff`) * idisty + ((b>>`8`) & `0xff00ff`) * disty) >> `8`) & `0xff00ff`;
995	x++;
996	}
997
998	// Now interpolate the values from the intermediate.buffer to get the final result.
999	intermediate_adder(b, end, intermediate, offset, fx, fdx);
1000	}
1001
1002	template<TextureBlendType blendType>
1003	static void QT_FASTCALL fetchTransformedBilinearARGB32PM_upscale_helper(uint b, uint end, const QTextureData &image,
1004	int &fx, int &fy, int fdx, int /fdy/)
1005	{
1006	int y1 = (fy >> `16`);
1007	int y2;
1008	fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - `1`, y1, y2);
1009	const uint s1 = (const* uint *)image.scanLine(y1);
1010	const uint s2 = (const* uint *)image.scanLine(y2);
1011	const int disty = (fy & `0x0000ffff`) >> `8`;
1012
1013	if (blendType != BlendTransformedBilinearTiled) {
1014	const qint64 min_fx = qint64(image.x1) * fixed_scale;
1015	const qint64 max_fx = qint64(image.x2 - `1`) * fixed_scale;
1016	while (b < end) {
1017	int x1 = (fx >> `16`);
1018	int x2;
1019	fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - `1`, x1, x2);
1020	if (x1 != x2)
1021	break;
1022	uint top = s1[x1];
1023	uint bot = s2[x1];
1024	*b = INTERPOLATE_PIXEL_256(top, `256` - disty, bot, disty);
1025	fx += fdx;
1026	++b;
1027	}
1028	uint *boundedEnd = end;
1029	if (fdx > `0`)
1030	boundedEnd = qMin(boundedEnd, b + (max_fx - fx) / fdx);
1031	else if (fdx < `0`)
1032	boundedEnd = qMin(boundedEnd, b + (min_fx - fx) / fdx);
1033
1034	// A fast middle part without boundary checks
1035	while (b < boundedEnd) {
1036	int x = (fx >> `16`);
1037	int distx = (fx & `0x0000ffff`) >> `8`;
1038	*b = interpolate_4_pixels(s1 + x, s2 + x, distx, disty);
1039	fx += fdx;
1040	++b;
1041	}
1042	}
1043
1044	while (b < end) {
1045	int x1 = (fx >> `16`);
1046	int x2;
1047	fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - `1` , x1, x2);
1048	uint tl = s1[x1];
1049	uint tr = s1[x2];
1050	uint bl = s2[x1];
1051	uint br = s2[x2];
1052	int distx = (fx & `0x0000ffff`) >> `8`;
1053	*b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
1054
1055	fx += fdx;
1056	++b;
1057	}
1058	}
1059
1060	template<TextureBlendType blendType>
1061	static void QT_FASTCALL fetchTransformedBilinearARGB32PM_downscale_helper(uint b, uint end, const QTextureData &image,
1062	int &fx, int &fy, int fdx, int /fdy/)
1063	{
1064	int y1 = (fy >> `16`);
1065	int y2;
1066	fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - `1`, y1, y2);
1067	const uint s1 = (const* uint *)image.scanLine(y1);
1068	const uint s2 = (const* uint *)image.scanLine(y2);
1069	const int disty8 = (fy & `0x0000ffff`) >> `8`;
1070	const int disty4 = (disty8 + `0x08`) >> `4`;
1071
1072	if (blendType != BlendTransformedBilinearTiled) {
1073	const qint64 min_fx = qint64(image.x1) * fixed_scale;
1074	const qint64 max_fx = qint64(image.x2 - `1`) * fixed_scale;
1075	while (b < end) {
1076	int x1 = (fx >> `16`);
1077	int x2;
1078	fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - `1`, x1, x2);
1079	if (x1 != x2)
1080	break;
1081	uint top = s1[x1];
1082	uint bot = s2[x1];
1083	*b = INTERPOLATE_PIXEL_256(top, `256` - disty8, bot, disty8);
1084	fx += fdx;
1085	++b;
1086	}
1087	uint *boundedEnd = end;
1088	if (fdx > `0`)
1089	boundedEnd = qMin(boundedEnd, b + (max_fx - fx) / fdx);
1090	else if (fdx < `0`)
1091	boundedEnd = qMin(boundedEnd, b + (min_fx - fx) / fdx);
1092	// A fast middle part without boundary checks
1093	#if defined(__SSE2__)
1094	const __m128i colorMask = _mm_set1_epi32(`0x00ff00ff`);
1095	const __m128i v_256 = _mm_set1_epi16(`256`);
1096	const __m128i v_disty = _mm_set1_epi16(disty4);
1097	const __m128i v_fdx = _mm_set1_epi32(fdx*`4`);
1098	const __m128i v_fx_r = _mm_set1_epi32(`0x8`);
1099	__m128i v_fx = _mm_setr_epi32(fx, fx + fdx, fx + fdx + fdx, fx + fdx + fdx + fdx);
1100
1101	while (b < boundedEnd - `3`) {
1102	__m128i offset = _mm_srli_epi32(v_fx, `16`);
1103	const int offset0 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, `4`);
1104	const int offset1 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, `4`);
1105	const int offset2 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, `4`);
1106	const int offset3 = _mm_cvtsi128_si32(offset);
1107	const __m128i tl = _mm_setr_epi32(s1[offset0], s1[offset1], s1[offset2], s1[offset3]);
1108	const __m128i tr = _mm_setr_epi32(s1[offset0 + `1`], s1[offset1 + `1`], s1[offset2 + `1`], s1[offset3 + `1`]);
1109	const __m128i bl = _mm_setr_epi32(s2[offset0], s2[offset1], s2[offset2], s2[offset3]);
1110	const __m128i br = _mm_setr_epi32(s2[offset0 + `1`], s2[offset1 + `1`], s2[offset2 + `1`], s2[offset3 + `1`]);
1111
1112	__m128i v_distx = _mm_srli_epi16(v_fx, `8`);
1113	v_distx = _mm_srli_epi16(_mm_add_epi32(v_distx, v_fx_r), `4`);
1114	v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(`2`,`2`,`0`,`0`));
1115	v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(`2`,`2`,`0`,`0`));
1116
1117	interpolate_4_pixels_16_sse2(tl, tr, bl, br, v_distx, v_disty, colorMask, v_256, b);
1118	b += `4`;
1119	v_fx = _mm_add_epi32(v_fx, v_fdx);
1120	}
1121	fx = _mm_cvtsi128_si32(v_fx);
1122	#elif defined(__ARM_NEON__)
1123	const int16x8_t colorMask = vdupq_n_s16(`0x00ff`);
1124	const int16x8_t invColorMask = vmvnq_s16(colorMask);
1125	const int16x8_t v_256 = vdupq_n_s16(`256`);
1126	const int16x8_t v_disty = vdupq_n_s16(disty4);
1127	const int16x8_t v_disty_ = vshlq_n_s16(v_disty, `4`);
1128	int32x4_t v_fdx = vdupq_n_s32(fdx*`4`);
1129
1130	int32x4_t v_fx = vmovq_n_s32(fx);
1131	v_fx = vsetq_lane_s32(fx + fdx, v_fx, `1`);
1132	v_fx = vsetq_lane_s32(fx + fdx * `2`, v_fx, `2`);
1133	v_fx = vsetq_lane_s32(fx + fdx * `3`, v_fx, `3`);
1134
1135	const int32x4_t v_ffff_mask = vdupq_n_s32(`0x0000ffff`);
1136	const int32x4_t v_fx_r = vdupq_n_s32(`0x0800`);
1137
1138	while (b < boundedEnd - `3`) {
1139	uint32x4x2_t v_top, v_bot;
1140
1141	int x1 = (fx >> `16`);
1142	fx += fdx;
1143	v_top = vld2q_lane_u32(s1 + x1, v_top, `0`);
1144	v_bot = vld2q_lane_u32(s2 + x1, v_bot, `0`);
1145	x1 = (fx >> `16`);
1146	fx += fdx;
1147	v_top = vld2q_lane_u32(s1 + x1, v_top, `1`);
1148	v_bot = vld2q_lane_u32(s2 + x1, v_bot, `1`);
1149	x1 = (fx >> `16`);
1150	fx += fdx;
1151	v_top = vld2q_lane_u32(s1 + x1, v_top, `2`);
1152	v_bot = vld2q_lane_u32(s2 + x1, v_bot, `2`);
1153	x1 = (fx >> `16`);
1154	fx += fdx;
1155	v_top = vld2q_lane_u32(s1 + x1, v_top, `3`);
1156	v_bot = vld2q_lane_u32(s2 + x1, v_bot, `3`);
1157
1158	int32x4_t v_distx = vshrq_n_s32(vaddq_s32(vandq_s32(v_fx, v_ffff_mask), v_fx_r), `12`);
1159	v_distx = vorrq_s32(v_distx, vshlq_n_s32(v_distx, `16`));
1160
1161	interpolate_4_pixels_16_neon(
1162	vreinterpretq_s16_u32(v_top.val[`0`]), vreinterpretq_s16_u32(v_top.val[`1`]),
1163	vreinterpretq_s16_u32(v_bot.val[`0`]), vreinterpretq_s16_u32(v_bot.val[`1`]),
1164	vreinterpretq_s16_s32(v_distx), v_disty, v_disty_,
1165	colorMask, invColorMask, v_256, b);
1166	b+=`4`;
1167	v_fx = vaddq_s32(v_fx, v_fdx);
1168	}
1169	#endif
1170	while (b < boundedEnd) {
1171	int x = (fx >> `16`);
1172	if (hasFastInterpolate4()) {
1173	int distx8 = (fx & `0x0000ffff`) >> `8`;
1174	*b = interpolate_4_pixels(s1 + x, s2 + x, distx8, disty8);
1175	} else {
1176	int distx4 = ((fx & `0x0000ffff`) + `0x0800`) >> `12`;
1177	*b = interpolate_4_pixels_16(s1[x], s1[x + `1`], s2[x], s2[x + `1`], distx4, disty4);
1178	}
1179	fx += fdx;
1180	++b;
1181	}
1182	}
1183
1184	while (b < end) {
1185	int x1 = (fx >> `16`);
1186	int x2;
1187	fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - `1`, x1, x2);
1188	uint tl = s1[x1];
1189	uint tr = s1[x2];
1190	uint bl = s2[x1];
1191	uint br = s2[x2];
1192	if (hasFastInterpolate4()) {
1193	int distx8 = (fx & `0x0000ffff`) >> `8`;
1194	*b = interpolate_4_pixels(tl, tr, bl, br, distx8, disty8);
1195	} else {
1196	int distx4 = ((fx & `0x0000ffff`) + `0x0800`) >> `12`;
1197	*b = interpolate_4_pixels_16(tl, tr, bl, br, distx4, disty4);
1198	}
1199	fx += fdx;
1200	++b;
1201	}
1202	}
1203
1204	template<TextureBlendType blendType>
1205	static void QT_FASTCALL fetchTransformedBilinearARGB32PM_rotate_helper(uint b, uint end, const QTextureData &image,
1206	int &fx, int &fy, int fdx, int fdy)
1207	{
1208	// if we are zooming more than 8 times, we use 8bit precision for the position.
1209	while (b < end) {
1210	int x1 = (fx >> `16`);
1211	int x2;
1212	int y1 = (fy >> `16`);
1213	int y2;
1214
1215	fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - `1`, x1, x2);
1216	fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - `1`, y1, y2);
1217
1218	const uint s1 = (const* uint *)image.scanLine(y1);
1219	const uint s2 = (const* uint *)image.scanLine(y2);
1220
1221	uint tl = s1[x1];
1222	uint tr = s1[x2];
1223	uint bl = s2[x1];
1224	uint br = s2[x2];
1225
1226	int distx = (fx & `0x0000ffff`) >> `8`;
1227	int disty = (fy & `0x0000ffff`) >> `8`;
1228
1229	*b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
1230
1231	fx += fdx;
1232	fy += fdy;
1233	++b;
1234	}
1235	}
1236
1237	template<TextureBlendType blendType>
1238	static void QT_FASTCALL fetchTransformedBilinearARGB32PM_fast_rotate_helper(uint b, uint end, const QTextureData &image,
1239	int &fx, int &fy, int fdx, int fdy)
1240	{
1241	//we are zooming less than 8x, use 4bit precision
1242	if (blendType != BlendTransformedBilinearTiled) {
1243	const qint64 min_fx = qint64(image.x1) * fixed_scale;
1244	const qint64 max_fx = qint64(image.x2 - `1`) * fixed_scale;
1245	const qint64 min_fy = qint64(image.y1) * fixed_scale;
1246	const qint64 max_fy = qint64(image.y2 - `1`) * fixed_scale;
1247	// first handle the possibly bounded part in the beginning
1248	while (b < end) {
1249	int x1 = (fx >> `16`);
1250	int x2;
1251	int y1 = (fy >> `16`);
1252	int y2;
1253	fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - `1`, x1, x2);
1254	fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - `1`, y1, y2);
1255	if (x1 != x2 && y1 != y2)
1256	break;
1257	const uint s1 = (const* uint *)image.scanLine(y1);
1258	const uint s2 = (const* uint *)image.scanLine(y2);
1259	uint tl = s1[x1];
1260	uint tr = s1[x2];
1261	uint bl = s2[x1];
1262	uint br = s2[x2];
1263	if (hasFastInterpolate4()) {
1264	int distx = (fx & `0x0000ffff`) >> `8`;
1265	int disty = (fy & `0x0000ffff`) >> `8`;
1266	*b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
1267	} else {
1268	int distx = ((fx & `0x0000ffff`) + `0x0800`) >> `12`;
1269	int disty = ((fy & `0x0000ffff`) + `0x0800`) >> `12`;
1270	*b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
1271	}
1272	fx += fdx;
1273	fy += fdy;
1274	++b;
1275	}
1276	uint *boundedEnd = end;
1277	if (fdx > `0`)
1278	boundedEnd = qMin(boundedEnd, b + (max_fx - fx) / fdx);
1279	else if (fdx < `0`)
1280	boundedEnd = qMin(boundedEnd, b + (min_fx - fx) / fdx);
1281	if (fdy > `0`)
1282	boundedEnd = qMin(boundedEnd, b + (max_fy - fy) / fdy);
1283	else if (fdy < `0`)
1284	boundedEnd = qMin(boundedEnd, b + (min_fy - fy) / fdy);
1285
1286	// until boundedEnd we can now have a fast middle part without boundary checks
1287	#if defined(__SSE2__)
1288	const __m128i colorMask = _mm_set1_epi32(`0x00ff00ff`);
1289	const __m128i v_256 = _mm_set1_epi16(`256`);
1290	const __m128i v_fdx = _mm_set1_epi32(fdx*`4`);
1291	const __m128i v_fdy = _mm_set1_epi32(fdy*`4`);
1292	const __m128i v_fxy_r = _mm_set1_epi32(`0x8`);
1293	__m128i v_fx = _mm_setr_epi32(fx, fx + fdx, fx + fdx + fdx, fx + fdx + fdx + fdx);
1294	__m128i v_fy = _mm_setr_epi32(fy, fy + fdy, fy + fdy + fdy, fy + fdy + fdy + fdy);
1295
1296	const uchar *textureData = image.imageData;
1297	const qsizetype bytesPerLine = image.bytesPerLine;
1298	const __m128i vbpl = _mm_shufflelo_epi16(_mm_cvtsi32_si128(bytesPerLine/`4`), _MM_SHUFFLE(`0`, `0`, `0`, `0`));
1299
1300	while (b < boundedEnd - `3`) {
1301	const __m128i vy = _mm_packs_epi32(_mm_srli_epi32(v_fy, `16`), _mm_setzero_si128());
1302	// 4x16bit 4x16bit -> 4x32bit*
1303	__m128i offset = _mm_unpacklo_epi16(_mm_mullo_epi16(vy, vbpl), _mm_mulhi_epi16(vy, vbpl));
1304	offset = _mm_add_epi32(offset, _mm_srli_epi32(v_fx, `16`));
1305	const int offset0 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, `4`);
1306	const int offset1 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, `4`);
1307	const int offset2 = _mm_cvtsi128_si32(offset); offset = _mm_srli_si128(offset, `4`);
1308	const int offset3 = _mm_cvtsi128_si32(offset);
1309	const uint topData = (const* uint *)(textureData);
1310	const __m128i tl = _mm_setr_epi32(topData[offset0], topData[offset1], topData[offset2], topData[offset3]);
1311	const __m128i tr = _mm_setr_epi32(topData[offset0 + `1`], topData[offset1 + `1`], topData[offset2 + `1`], topData[offset3 + `1`]);
1312	const uint bottomData = (const* uint *)(textureData + bytesPerLine);
1313	const __m128i bl = _mm_setr_epi32(bottomData[offset0], bottomData[offset1], bottomData[offset2], bottomData[offset3]);
1314	const __m128i br = _mm_setr_epi32(bottomData[offset0 + `1`], bottomData[offset1 + `1`], bottomData[offset2 + `1`], bottomData[offset3 + `1`]);
1315
1316	__m128i v_distx = _mm_srli_epi16(v_fx, `8`);
1317	__m128i v_disty = _mm_srli_epi16(v_fy, `8`);
1318	v_distx = _mm_srli_epi16(_mm_add_epi32(v_distx, v_fxy_r), `4`);
1319	v_disty = _mm_srli_epi16(_mm_add_epi32(v_disty, v_fxy_r), `4`);
1320	v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(`2`,`2`,`0`,`0`));
1321	v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(`2`,`2`,`0`,`0`));
1322	v_disty = _mm_shufflehi_epi16(v_disty, _MM_SHUFFLE(`2`,`2`,`0`,`0`));
1323	v_disty = _mm_shufflelo_epi16(v_disty, _MM_SHUFFLE(`2`,`2`,`0`,`0`));
1324
1325	interpolate_4_pixels_16_sse2(tl, tr, bl, br, v_distx, v_disty, colorMask, v_256, b);
1326	b += `4`;
1327	v_fx = _mm_add_epi32(v_fx, v_fdx);
1328	v_fy = _mm_add_epi32(v_fy, v_fdy);
1329	}
1330	fx = _mm_cvtsi128_si32(v_fx);
1331	fy = _mm_cvtsi128_si32(v_fy);
1332	#elif defined(__ARM_NEON__)
1333	const int16x8_t colorMask = vdupq_n_s16(`0x00ff`);
1334	const int16x8_t invColorMask = vmvnq_s16(colorMask);
1335	const int16x8_t v_256 = vdupq_n_s16(`256`);
1336	int32x4_t v_fdx = vdupq_n_s32(fdx * `4`);
1337	int32x4_t v_fdy = vdupq_n_s32(fdy * `4`);
1338
1339	const uchar *textureData = image.imageData;
1340	const qsizetype bytesPerLine = image.bytesPerLine;
1341
1342	int32x4_t v_fx = vmovq_n_s32(fx);
1343	int32x4_t v_fy = vmovq_n_s32(fy);
1344	v_fx = vsetq_lane_s32(fx + fdx, v_fx, `1`);
1345	v_fy = vsetq_lane_s32(fy + fdy, v_fy, `1`);
1346	v_fx = vsetq_lane_s32(fx + fdx * `2`, v_fx, `2`);
1347	v_fy = vsetq_lane_s32(fy + fdy * `2`, v_fy, `2`);
1348	v_fx = vsetq_lane_s32(fx + fdx * `3`, v_fx, `3`);
1349	v_fy = vsetq_lane_s32(fy + fdy * `3`, v_fy, `3`);
1350
1351	const int32x4_t v_ffff_mask = vdupq_n_s32(`0x0000ffff`);
1352	const int32x4_t v_round = vdupq_n_s32(`0x0800`);
1353
1354	while (b < boundedEnd - `3`) {
1355	uint32x4x2_t v_top, v_bot;
1356
1357	int x1 = (fx >> `16`);
1358	int y1 = (fy >> `16`);
1359	fx += fdx; fy += fdy;
1360	const uchar sl = textureData + bytesPerLine y1;
1361	const uint s1 = reinterpret_cast<const* uint *>(sl);
1362	const uint s2 = reinterpret_cast<const* uint *>(sl + bytesPerLine);
1363	v_top = vld2q_lane_u32(s1 + x1, v_top, `0`);
1364	v_bot = vld2q_lane_u32(s2 + x1, v_bot, `0`);
1365	x1 = (fx >> `16`);
1366	y1 = (fy >> `16`);
1367	fx += fdx; fy += fdy;
1368	sl = textureData + bytesPerLine * y1;
1369	s1 = reinterpret_cast<const uint *>(sl);
1370	s2 = reinterpret_cast<const uint *>(sl + bytesPerLine);
1371	v_top = vld2q_lane_u32(s1 + x1, v_top, `1`);
1372	v_bot = vld2q_lane_u32(s2 + x1, v_bot, `1`);
1373	x1 = (fx >> `16`);
1374	y1 = (fy >> `16`);
1375	fx += fdx; fy += fdy;
1376	sl = textureData + bytesPerLine * y1;
1377	s1 = reinterpret_cast<const uint *>(sl);
1378	s2 = reinterpret_cast<const uint *>(sl + bytesPerLine);
1379	v_top = vld2q_lane_u32(s1 + x1, v_top, `2`);
1380	v_bot = vld2q_lane_u32(s2 + x1, v_bot, `2`);
1381	x1 = (fx >> `16`);
1382	y1 = (fy >> `16`);
1383	fx += fdx; fy += fdy;
1384	sl = textureData + bytesPerLine * y1;
1385	s1 = reinterpret_cast<const uint *>(sl);
1386	s2 = reinterpret_cast<const uint *>(sl + bytesPerLine);
1387	v_top = vld2q_lane_u32(s1 + x1, v_top, `3`);
1388	v_bot = vld2q_lane_u32(s2 + x1, v_bot, `3`);
1389
1390	int32x4_t v_distx = vshrq_n_s32(vaddq_s32(vandq_s32(v_fx, v_ffff_mask), v_round), `12`);
1391	int32x4_t v_disty = vshrq_n_s32(vaddq_s32(vandq_s32(v_fy, v_ffff_mask), v_round), `12`);
1392	v_distx = vorrq_s32(v_distx, vshlq_n_s32(v_distx, `16`));
1393	v_disty = vorrq_s32(v_disty, vshlq_n_s32(v_disty, `16`));
1394	int16x8_t v_disty_ = vshlq_n_s16(vreinterpretq_s16_s32(v_disty), `4`);
1395
1396	interpolate_4_pixels_16_neon(
1397	vreinterpretq_s16_u32(v_top.val[`0`]), vreinterpretq_s16_u32(v_top.val[`1`]),
1398	vreinterpretq_s16_u32(v_bot.val[`0`]), vreinterpretq_s16_u32(v_bot.val[`1`]),
1399	vreinterpretq_s16_s32(v_distx), vreinterpretq_s16_s32(v_disty),
1400	v_disty_, colorMask, invColorMask, v_256, b);
1401	b += `4`;
1402	v_fx = vaddq_s32(v_fx, v_fdx);
1403	v_fy = vaddq_s32(v_fy, v_fdy);
1404	}
1405	#endif
1406	while (b < boundedEnd) {
1407	int x = (fx >> `16`);
1408	int y = (fy >> `16`);
1409
1410	const uint s1 = (const* uint *)image.scanLine(y);
1411	const uint s2 = (const* uint *)image.scanLine(y + `1`);
1412
1413	if (hasFastInterpolate4()) {
1414	int distx = (fx & `0x0000ffff`) >> `8`;
1415	int disty = (fy & `0x0000ffff`) >> `8`;
1416	*b = interpolate_4_pixels(s1 + x, s2 + x, distx, disty);
1417	} else {
1418	int distx = ((fx & `0x0000ffff`) + `0x0800`) >> `12`;
1419	int disty = ((fy & `0x0000ffff`) + `0x0800`) >> `12`;
1420	*b = interpolate_4_pixels_16(s1[x], s1[x + `1`], s2[x], s2[x + `1`], distx, disty);
1421	}
1422
1423	fx += fdx;
1424	fy += fdy;
1425	++b;
1426	}
1427	}
1428
1429	while (b < end) {
1430	int x1 = (fx >> `16`);
1431	int x2;
1432	int y1 = (fy >> `16`);
1433	int y2;
1434
1435	fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - `1`, x1, x2);
1436	fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - `1`, y1, y2);
1437
1438	const uint s1 = (const* uint *)image.scanLine(y1);
1439	const uint s2 = (const* uint *)image.scanLine(y2);
1440
1441	uint tl = s1[x1];
1442	uint tr = s1[x2];
1443	uint bl = s2[x1];
1444	uint br = s2[x2];
1445
1446	if (hasFastInterpolate4()) {
1447	int distx = (fx & `0x0000ffff`) >> `8`;
1448	int disty = (fy & `0x0000ffff`) >> `8`;
1449	*b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
1450	} else {
1451	int distx = ((fx & `0x0000ffff`) + `0x0800`) >> `12`;
1452	int disty = ((fy & `0x0000ffff`) + `0x0800`) >> `12`;
1453	*b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
1454	}
1455
1456	fx += fdx;
1457	fy += fdy;
1458	++b;
1459	}
1460	}
1461
1462
1463	static BilinearFastTransformHelper bilinearFastTransformHelperARGB32PM[`2`][NFastTransformTypes] = {
1464	{
1465	fetchTransformedBilinearARGB32PM_simple_scale_helper<BlendTransformedBilinear>,
1466	fetchTransformedBilinearARGB32PM_upscale_helper<BlendTransformedBilinear>,
1467	fetchTransformedBilinearARGB32PM_downscale_helper<BlendTransformedBilinear>,
1468	fetchTransformedBilinearARGB32PM_rotate_helper<BlendTransformedBilinear>,
1469	fetchTransformedBilinearARGB32PM_fast_rotate_helper<BlendTransformedBilinear>
1470	},
1471	{
1472	fetchTransformedBilinearARGB32PM_simple_scale_helper<BlendTransformedBilinearTiled>,
1473	fetchTransformedBilinearARGB32PM_upscale_helper<BlendTransformedBilinearTiled>,
1474	fetchTransformedBilinearARGB32PM_downscale_helper<BlendTransformedBilinearTiled>,
1475	fetchTransformedBilinearARGB32PM_rotate_helper<BlendTransformedBilinearTiled>,
1476	fetchTransformedBilinearARGB32PM_fast_rotate_helper<BlendTransformedBilinearTiled>
1477	}
1478	};
1479
1480	template<TextureBlendType blendType> / blendType = BlendTransformedBilinear or BlendTransformedBilinearTiled /
1481	static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint buffer, const* Operator *,
1482	const QSpanData data, int* y, int x,
1483	int length)
1484	{
1485	const qreal cx = x + qreal(`0.5`);
1486	const qreal cy = y + qreal(`0.5`);
1487	constexpr int tiled = (blendType == BlendTransformedBilinearTiled) ? `1` : `0`;
1488
1489	uint *end = buffer + length;
1490	uint *b = buffer;
1491	if (canUseFastMatrixPath(cx, cy, length, data)) {
1492	// The increment pr x in the scanline
1493	int fdx = (int)(data->m11 * fixed_scale);
1494	int fdy = (int)(data->m12 * fixed_scale);
1495
1496	int fx = int((data->m21 * cy
1497	+ data->m11 * cx + data->dx) * fixed_scale);
1498	int fy = int((data->m22 * cy
1499	+ data->m12 * cx + data->dy) * fixed_scale);
1500
1501	fx -= half_point;
1502	fy -= half_point;
1503
1504	if (fdy == `0`) { // simple scale, no rotation or shear
1505	if (qAbs(fdx) <= fixed_scale) {
1506	// simple scale up on X
1507	bilinearFastTransformHelperARGB32PM[tiled][SimpleScaleTransform](b, end, data->texture, fx, fy, fdx, fdy);
1508	} else if (qAbs(fdx) <= `2` * fixed_scale) {
1509	// simple scale down on X, less than 2x
1510	const int mid = (length * `2` < BufferSize) ? length : ((length + `1`) / `2`);
1511	bilinearFastTransformHelperARGB32PM[tiled][SimpleScaleTransform](buffer, buffer + mid, data->texture, fx, fy, fdx, fdy);
1512	if (mid != length)
1513	bilinearFastTransformHelperARGB32PM[tiled][SimpleScaleTransform](buffer + mid, buffer + length, data->texture, fx, fy, fdx, fdy);
1514	} else if (qAbs(data->m22) < qreal(`1.`/`8.`)) {
1515	// scale up more than 8x (on Y)
1516	bilinearFastTransformHelperARGB32PM[tiled][UpscaleTransform](b, end, data->texture, fx, fy, fdx, fdy);
1517	} else {
1518	// scale down on X
1519	bilinearFastTransformHelperARGB32PM[tiled][DownscaleTransform](b, end, data->texture, fx, fy, fdx, fdy);
1520	}
1521	} else { // rotation or shear
1522	if (qAbs(data->m11) < qreal(`1.`/`8.`) \|\| qAbs(data->m22) < qreal(`1.`/`8.`) ) {
1523	// if we are zooming more than 8 times, we use 8bit precision for the position.
1524	bilinearFastTransformHelperARGB32PM[tiled][RotateTransform](b, end, data->texture, fx, fy, fdx, fdy);
1525	} else {
1526	// we are zooming less than 8x, use 4bit precision
1527	bilinearFastTransformHelperARGB32PM[tiled][FastRotateTransform](b, end, data->texture, fx, fy, fdx, fdy);
1528	}
1529	}
1530	} else {
1531	const QTextureData &image = data->texture;
1532
1533	const qreal fdx = data->m11;
1534	const qreal fdy = data->m12;
1535	const qreal fdw = data->m13;
1536
1537	qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
1538	qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
1539	qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
1540
1541	while (b < end) {
1542	const qreal iw = fw == `0` ? `1` : `1` / fw;
1543	const qreal px = fx * iw - qreal(`0.5`);
1544	const qreal py = fy * iw - qreal(`0.5`);
1545
1546	int x1 = int(px) - (px < `0`);
1547	int x2;
1548	int y1 = int(py) - (py < `0`);
1549	int y2;
1550
1551	int distx = int((px - x1) * `256`);
1552	int disty = int((py - y1) * `256`);
1553
1554	fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - `1`, x1, x2);
1555	fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - `1`, y1, y2);
1556
1557	const uint s1 = (const* uint *)data->texture.scanLine(y1);
1558	const uint s2 = (const* uint *)data->texture.scanLine(y2);
1559
1560	uint tl = s1[x1];
1561	uint tr = s1[x2];
1562	uint bl = s2[x1];
1563	uint br = s2[x2];
1564
1565	*b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
1566
1567	fx += fdx;
1568	fy += fdy;
1569	fw += fdw;
1570	//force increment to avoid /0
1571	if (!fw) {
1572	fw += fdw;
1573	}
1574	++b;
1575	}
1576	}
1577
1578	return buffer;
1579	}
1580
1581	template<TextureBlendType blendType>
1582	static void QT_FASTCALL fetchTransformedBilinear_simple_scale_helper(uint b, uint end, const QTextureData &image,
1583	int &fx, int &fy, int fdx, int /fdy/)
1584	{
1585	const QPixelLayout *layout = &qPixelLayouts[image.format];
1586	const QList<QRgb> *clut = image.colorTable;
1587	const FetchAndConvertPixelsFunc fetch = layout->fetchToARGB32PM;
1588
1589	int y1 = (fy >> `16`);
1590	int y2;
1591	fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - `1`, y1, y2);
1592	const uchar *s1 = image.scanLine(y1);
1593	const uchar *s2 = image.scanLine(y2);
1594
1595	const int disty = (fy & `0x0000ffff`) >> `8`;
1596	const int idisty = `256` - disty;
1597	const int length = end - b;
1598
1599	// The intermediate buffer is generated in the positive direction
1600	const int adjust = (fdx < `0`) ? fdx * length : `0`;
1601	const int offset = (fx + adjust) >> `16`;
1602	int x = offset;
1603
1604	IntermediateBuffer intermediate;
1605	uint *buf1 = intermediate.buffer_rb;
1606	uint *buf2 = intermediate.buffer_ag;
1607	const uint *ptr1;
1608	const uint *ptr2;
1609
1610	int count = (qint64(length) * qAbs(fdx) + fixed_scale - `1`) / fixed_scale + `2`;
1611	Q_ASSERT(count <= BufferSize + `2`);
1612
1613	if (blendType == BlendTransformedBilinearTiled) {
1614	x %= image.width;
1615	if (x < `0`)
1616	x += image.width;
1617	int len1 = qMin(count, image.width - x);
1618	int len2 = qMin(x, count - len1);
1619
1620	ptr1 = fetch(buf1, s1, x, len1, clut, nullptr);
1621	ptr2 = fetch(buf2, s2, x, len1, clut, nullptr);
1622	for (int i = `0`; i < len1; ++i) {
1623	uint t = ptr1[i];
1624	uint b = ptr2[i];
1625	buf1[i] = (((t & `0xff00ff`) * idisty + (b & `0xff00ff`) * disty) >> `8`) & `0xff00ff`;
1626	buf2[i] = ((((t >> `8`) & `0xff00ff`) * idisty + ((b >> `8`) & `0xff00ff`) * disty) >> `8`) & `0xff00ff`;
1627	}
1628
1629	if (len2) {
1630	ptr1 = fetch(buf1 + len1, s1, `0`, len2, clut, nullptr);
1631	ptr2 = fetch(buf2 + len1, s2, `0`, len2, clut, nullptr);
1632	for (int i = `0`; i < len2; ++i) {
1633	uint t = ptr1[i];
1634	uint b = ptr2[i];
1635	buf1[i + len1] = (((t & `0xff00ff`) * idisty + (b & `0xff00ff`) * disty) >> `8`) & `0xff00ff`;
1636	buf2[i + len1] = ((((t >> `8`) & `0xff00ff`) * idisty + ((b >> `8`) & `0xff00ff`) * disty) >> `8`) & `0xff00ff`;
1637	}
1638	}
1639	// Generate the rest by repeatedly repeating the previous set of pixels
1640	for (int i = image.width; i < count; ++i) {
1641	buf1[i] = buf1[i - image.width];
1642	buf2[i] = buf2[i - image.width];
1643	}
1644	} else {
1645	int start = qMax(x, image.x1);
1646	int end = qMin(x + count, image.x2);
1647	int len = qMax(`1`, end - start);
1648	int leading = start - x;
1649
1650	ptr1 = fetch(buf1 + leading, s1, start, len, clut, nullptr);
1651	ptr2 = fetch(buf2 + leading, s2, start, len, clut, nullptr);
1652
1653	for (int i = `0`; i < len; ++i) {
1654	uint t = ptr1[i];
1655	uint b = ptr2[i];
1656	buf1[i + leading] = (((t & `0xff00ff`) * idisty + (b & `0xff00ff`) * disty) >> `8`) & `0xff00ff`;
1657	buf2[i + leading] = ((((t >> `8`) & `0xff00ff`) * idisty + ((b >> `8`) & `0xff00ff`) * disty) >> `8`) & `0xff00ff`;
1658	}
1659
1660	for (int i = `0`; i < leading; ++i) {
1661	buf1[i] = buf1[leading];
1662	buf2[i] = buf2[leading];
1663	}
1664	for (int i = leading + len; i < count; ++i) {
1665	buf1[i] = buf1[i - `1`];
1666	buf2[i] = buf2[i - `1`];
1667	}
1668	}
1669
1670	// Now interpolate the values from the intermediate.buffer to get the final result.
1671	intermediate_adder(b, end, intermediate, offset, fx, fdx);
1672	}
1673
1674
1675	template<TextureBlendType blendType, QPixelLayout::BPP bpp, typename T>
1676	static void QT_FASTCALL fetchTransformedBilinear_fetcher(T buf1, T buf2, const int len, const QTextureData &image,
1677	int fx, int fy, const int fdx, const int fdy)
1678	{
1679	const QPixelLayout &layout = qPixelLayouts[image.format];
1680	constexpr bool useFetch = (bpp < QPixelLayout::BPP32);
1681	if (useFetch)
1682	Q_ASSERT(sizeof(T) == sizeof(uint));
1683	else
1684	Q_ASSERT(layout.bpp == bpp);
1685	const FetchPixelFunc fetch1 = (bpp == QPixelLayout::BPPNone) ? qFetchPixelTable[layout.bpp] : qFetchPixel<bpp>;
1686	if (fdy == `0`) {
1687	int y1 = (fy >> `16`);
1688	int y2;
1689	fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - `1`, y1, y2);
1690	const uchar *s1 = image.scanLine(y1);
1691	const uchar *s2 = image.scanLine(y2);
1692
1693	int i = `0`;
1694	if (blendType == BlendTransformedBilinear) {
1695	for (; i < len; ++i) {
1696	int x1 = (fx >> `16`);
1697	int x2;
1698	fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - `1`, x1, x2);
1699	if (x1 != x2)
1700	break;
1701	if (useFetch) {
1702	buf1[i * `2` + `0`] = buf1[i * `2` + `1`] = fetch1(s1, x1);
1703	buf2[i * `2` + `0`] = buf2[i * `2` + `1`] = fetch1(s2, x1);
1704	} else {
1705	buf1[i * `2` + `0`] = buf1[i * `2` + `1`] = reinterpret_cast<const T *>(s1)[x1];
1706	buf2[i * `2` + `0`] = buf2[i * `2` + `1`] = reinterpret_cast<const T *>(s2)[x1];
1707	}
1708	fx += fdx;
1709	}
1710	int fastLen = len;
1711	if (fdx > `0`)
1712	fastLen = qMin(fastLen, int((qint64(image.x2 - `1`) * fixed_scale - fx) / fdx));
1713	else if (fdx < `0`)
1714	fastLen = qMin(fastLen, int((qint64(image.x1) * fixed_scale - fx) / fdx));
1715
1716	for (; i < fastLen; ++i) {
1717	int x = (fx >> `16`);
1718	if (useFetch) {
1719	buf1[i * `2` + `0`] = fetch1(s1, x);
1720	buf1[i * `2` + `1`] = fetch1(s1, x + `1`);
1721	buf2[i * `2` + `0`] = fetch1(s2, x);
1722	buf2[i * `2` + `1`] = fetch1(s2, x + `1`);
1723	} else {
1724	buf1[i * `2` + `0`] = reinterpret_cast<const T *>(s1)[x];
1725	buf1[i * `2` + `1`] = reinterpret_cast<const T *>(s1)[x + `1`];
1726	buf2[i * `2` + `0`] = reinterpret_cast<const T *>(s2)[x];
1727	buf2[i * `2` + `1`] = reinterpret_cast<const T *>(s2)[x + `1`];
1728	}
1729	fx += fdx;
1730	}
1731	}
1732
1733	for (; i < len; ++i) {
1734	int x1 = (fx >> `16`);
1735	int x2;
1736	fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - `1`, x1, x2);
1737	if (useFetch) {
1738	buf1[i * `2` + `0`] = fetch1(s1, x1);
1739	buf1[i * `2` + `1`] = fetch1(s1, x2);
1740	buf2[i * `2` + `0`] = fetch1(s2, x1);
1741	buf2[i * `2` + `1`] = fetch1(s2, x2);
1742	} else {
1743	buf1[i * `2` + `0`] = reinterpret_cast<const T *>(s1)[x1];
1744	buf1[i * `2` + `1`] = reinterpret_cast<const T *>(s1)[x2];
1745	buf2[i * `2` + `0`] = reinterpret_cast<const T *>(s2)[x1];
1746	buf2[i * `2` + `1`] = reinterpret_cast<const T *>(s2)[x2];
1747	}
1748	fx += fdx;
1749	}
1750	} else {
1751	int i = `0`;
1752	if (blendType == BlendTransformedBilinear) {
1753	for (; i < len; ++i) {
1754	int x1 = (fx >> `16`);
1755	int x2;
1756	int y1 = (fy >> `16`);
1757	int y2;
1758	fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - `1`, x1, x2);
1759	fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - `1`, y1, y2);
1760	if (x1 != x2 && y1 != y2)
1761	break;
1762	const uchar *s1 = image.scanLine(y1);
1763	const uchar *s2 = image.scanLine(y2);
1764	if (useFetch) {
1765	buf1[i * `2` + `0`] = fetch1(s1, x1);
1766	buf1[i * `2` + `1`] = fetch1(s1, x2);
1767	buf2[i * `2` + `0`] = fetch1(s2, x1);
1768	buf2[i * `2` + `1`] = fetch1(s2, x2);
1769	} else {
1770	buf1[i * `2` + `0`] = reinterpret_cast<const T *>(s1)[x1];
1771	buf1[i * `2` + `1`] = reinterpret_cast<const T *>(s1)[x2];
1772	buf2[i * `2` + `0`] = reinterpret_cast<const T *>(s2)[x1];
1773	buf2[i * `2` + `1`] = reinterpret_cast<const T *>(s2)[x2];
1774	}
1775	fx += fdx;
1776	fy += fdy;
1777	}
1778	int fastLen = len;
1779	if (fdx > `0`)
1780	fastLen = qMin(fastLen, int((qint64(image.x2 - `1`) * fixed_scale - fx) / fdx));
1781	else if (fdx < `0`)
1782	fastLen = qMin(fastLen, int((qint64(image.x1) * fixed_scale - fx) / fdx));
1783	if (fdy > `0`)
1784	fastLen = qMin(fastLen, int((qint64(image.y2 - `1`) * fixed_scale - fy) / fdy));
1785	else if (fdy < `0`)
1786	fastLen = qMin(fastLen, int((qint64(image.y1) * fixed_scale - fy) / fdy));
1787
1788	for (; i < fastLen; ++i) {
1789	int x = (fx >> `16`);
1790	int y = (fy >> `16`);
1791	const uchar *s1 = image.scanLine(y);
1792	const uchar *s2 = s1 + image.bytesPerLine;
1793	if (useFetch) {
1794	buf1[i * `2` + `0`] = fetch1(s1, x);
1795	buf1[i * `2` + `1`] = fetch1(s1, x + `1`);
1796	buf2[i * `2` + `0`] = fetch1(s2, x);
1797	buf2[i * `2` + `1`] = fetch1(s2, x + `1`);
1798	} else {
1799	buf1[i * `2` + `0`] = reinterpret_cast<const T *>(s1)[x];
1800	buf1[i * `2` + `1`] = reinterpret_cast<const T *>(s1)[x + `1`];
1801	buf2[i * `2` + `0`] = reinterpret_cast<const T *>(s2)[x];
1802	buf2[i * `2` + `1`] = reinterpret_cast<const T *>(s2)[x + `1`];
1803	}
1804	fx += fdx;
1805	fy += fdy;
1806	}
1807	}
1808
1809	for (; i < len; ++i) {
1810	int x1 = (fx >> `16`);
1811	int x2;
1812	int y1 = (fy >> `16`);
1813	int y2;
1814	fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - `1`, x1, x2);
1815	fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - `1`, y1, y2);
1816
1817	const uchar *s1 = image.scanLine(y1);
1818	const uchar *s2 = image.scanLine(y2);
1819	if (useFetch) {
1820	buf1[i * `2` + `0`] = fetch1(s1, x1);
1821	buf1[i * `2` + `1`] = fetch1(s1, x2);
1822	buf2[i * `2` + `0`] = fetch1(s2, x1);
1823	buf2[i * `2` + `1`] = fetch1(s2, x2);
1824	} else {
1825	buf1[i * `2` + `0`] = reinterpret_cast<const T *>(s1)[x1];
1826	buf1[i * `2` + `1`] = reinterpret_cast<const T *>(s1)[x2];
1827	buf2[i * `2` + `0`] = reinterpret_cast<const T *>(s2)[x1];
1828	buf2[i * `2` + `1`] = reinterpret_cast<const T *>(s2)[x2];
1829	}
1830	fx += fdx;
1831	fy += fdy;
1832	}
1833	}
1834	}
1835
1836	template<TextureBlendType blendType, QPixelLayout::BPP bpp, typename T>
1837	static void QT_FASTCALL fetchTransformedBilinear_slow_fetcher(T buf1, T buf2, ushort distxs, ushort distys,
1838	const int len, const QTextureData &image,
1839	qreal &fx, qreal &fy, qreal &fw,
1840	const qreal fdx, const qreal fdy, const qreal fdw)
1841	{
1842	const QPixelLayout &layout = qPixelLayouts[image.format];
1843	constexpr bool useFetch = (bpp < QPixelLayout::BPP32);
1844	if (useFetch)
1845	Q_ASSERT(sizeof(T) == sizeof(uint));
1846	else
1847	Q_ASSERT(layout.bpp == bpp);
1848
1849	const FetchPixelFunc fetch1 = (bpp == QPixelLayout::BPPNone) ? qFetchPixelTable[layout.bpp] : qFetchPixel<bpp>;
1850
1851	for (int i = `0`; i < len; ++i) {
1852	const qreal iw = fw == `0` ? `16384` : `1` / fw;
1853	const qreal px = fx * iw - qreal(`0.5`);
1854	const qreal py = fy * iw - qreal(`0.5`);
1855
1856	int x1 = qFloor(px);
1857	int x2;
1858	int y1 = qFloor(py);
1859	int y2;
1860
1861	distxs[i] = ushort((px - x1) * (`1`<<`16`));
1862	distys[i] = ushort((py - y1) * (`1`<<`16`));
1863
1864	fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - `1`, x1, x2);
1865	fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - `1`, y1, y2);
1866
1867	const uchar *s1 = image.scanLine(y1);
1868	const uchar *s2 = image.scanLine(y2);
1869	if (useFetch) {
1870	buf1[i * `2` + `0`] = fetch1(s1, x1);
1871	buf1[i * `2` + `1`] = fetch1(s1, x2);
1872	buf2[i * `2` + `0`] = fetch1(s2, x1);
1873	buf2[i * `2` + `1`] = fetch1(s2, x2);
1874	} else {
1875	buf1[i * `2` + `0`] = reinterpret_cast<const T *>(s1)[x1];
1876	buf1[i * `2` + `1`] = reinterpret_cast<const T *>(s1)[x2];
1877	buf2[i * `2` + `0`] = reinterpret_cast<const T *>(s2)[x1];
1878	buf2[i * `2` + `1`] = reinterpret_cast<const T *>(s2)[x2];
1879	}
1880
1881	fx += fdx;
1882	fy += fdy;
1883	fw += fdw;
1884	}
1885	}
1886
1887	// blendType = BlendTransformedBilinear or BlendTransformedBilinearTiled
1888	template<TextureBlendType blendType, QPixelLayout::BPP bpp>
1889	static const uint QT_FASTCALL fetchTransformedBilinear(uint buffer, const Operator *,
1890	const QSpanData data, int* y, int x, int length)
1891	{
1892	const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
1893	const QList<QRgb> *clut = data->texture.colorTable;
1894	Q_ASSERT(bpp == QPixelLayout::BPPNone \|\| layout->bpp == bpp);
1895
1896	const qreal cx = x + qreal(`0.5`);
1897	const qreal cy = y + qreal(`0.5`);
1898
1899	if (canUseFastMatrixPath(cx, cy, length, data)) {
1900	// The increment pr x in the scanline
1901	int fdx = (int)(data->m11 * fixed_scale);
1902	int fdy = (int)(data->m12 * fixed_scale);
1903
1904	int fx = int((data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale);
1905	int fy = int((data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale);
1906
1907	fx -= half_point;
1908	fy -= half_point;
1909
1910	if (fdy == `0`) { // simple scale, no rotation or shear
1911	if (qAbs(fdx) <= fixed_scale) { // scale up on X
1912	fetchTransformedBilinear_simple_scale_helper<blendType>(buffer, buffer + length, data->texture, fx, fy, fdx, fdy);
1913	} else if (qAbs(fdx) <= `2` * fixed_scale) { // scale down on X less than 2x
1914	const int mid = (length * `2` < BufferSize) ? length : ((length + `1`) / `2`);
1915	fetchTransformedBilinear_simple_scale_helper<blendType>(buffer, buffer + mid, data->texture, fx, fy, fdx, fdy);
1916	if (mid != length)
1917	fetchTransformedBilinear_simple_scale_helper<blendType>(buffer + mid, buffer + length, data->texture, fx, fy, fdx, fdy);
1918	} else {
1919	const auto fetcher = fetchTransformedBilinear_fetcher<blendType,bpp,uint>;
1920
1921	uint buf1[BufferSize];
1922	uint buf2[BufferSize];
1923	uint *b = buffer;
1924	while (length) {
1925	int len = qMin(length, BufferSize / `2`);
1926	fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, `0`);
1927	layout->convertToARGB32PM(buf1, len * `2`, clut);
1928	layout->convertToARGB32PM(buf2, len * `2`, clut);
1929
1930	if (hasFastInterpolate4() \|\| qAbs(data->m22) < qreal(`1.`/`8.`)) { // scale up more than 8x (on Y)
1931	int disty = (fy & `0x0000ffff`) >> `8`;
1932	for (int i = `0`; i < len; ++i) {
1933	int distx = (fx & `0x0000ffff`) >> `8`;
1934	b[i] = interpolate_4_pixels(buf1 + i * `2`, buf2 + i * `2`, distx, disty);
1935	fx += fdx;
1936	}
1937	} else {
1938	int disty = ((fy & `0x0000ffff`) + `0x0800`) >> `12`;
1939	for (int i = `0`; i < len; ++i) {
1940	uint tl = buf1[i * `2` + `0`];
1941	uint tr = buf1[i * `2` + `1`];
1942	uint bl = buf2[i * `2` + `0`];
1943	uint br = buf2[i * `2` + `1`];
1944	int distx = ((fx & `0x0000ffff`) + `0x0800`) >> `12`;
1945	b[i] = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
1946	fx += fdx;
1947	}
1948	}
1949	length -= len;
1950	b += len;
1951	}
1952	}
1953	} else { // rotation or shear
1954	const auto fetcher = fetchTransformedBilinear_fetcher<blendType,bpp,uint>;
1955
1956	uint buf1[BufferSize];
1957	uint buf2[BufferSize];
1958	uint *b = buffer;
1959	while (length) {
1960	int len = qMin(length, BufferSize / `2`);
1961	fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, fdy);
1962	layout->convertToARGB32PM(buf1, len * `2`, clut);
1963	layout->convertToARGB32PM(buf2, len * `2`, clut);
1964
1965	if (hasFastInterpolate4() \|\| qAbs(data->m11) < qreal(`1.`/`8.`) \|\| qAbs(data->m22) < qreal(`1.`/`8.`)) {
1966	// If we are zooming more than 8 times, we use 8bit precision for the position.
1967	for (int i = `0`; i < len; ++i) {
1968	int distx = (fx & `0x0000ffff`) >> `8`;
1969	int disty = (fy & `0x0000ffff`) >> `8`;
1970
1971	b[i] = interpolate_4_pixels(buf1 + i * `2`, buf2 + i * `2`, distx, disty);
1972	fx += fdx;
1973	fy += fdy;
1974	}
1975	} else {
1976	// We are zooming less than 8x, use 4bit precision
1977	for (int i = `0`; i < len; ++i) {
1978	uint tl = buf1[i * `2` + `0`];
1979	uint tr = buf1[i * `2` + `1`];
1980	uint bl = buf2[i * `2` + `0`];
1981	uint br = buf2[i * `2` + `1`];
1982
1983	int distx = ((fx & `0x0000ffff`) + `0x0800`) >> `12`;
1984	int disty = ((fy & `0x0000ffff`) + `0x0800`) >> `12`;
1985
1986	b[i] = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
1987	fx += fdx;
1988	fy += fdy;
1989	}
1990	}
1991
1992	length -= len;
1993	b += len;
1994	}
1995	}
1996	} else {
1997	const auto fetcher = fetchTransformedBilinear_slow_fetcher<blendType,bpp,uint>;
1998
1999	const qreal fdx = data->m11;
2000	const qreal fdy = data->m12;
2001	const qreal fdw = data->m13;
2002
2003	qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
2004	qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
2005	qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
2006
2007	uint buf1[BufferSize];
2008	uint buf2[BufferSize];
2009	uint *b = buffer;
2010
2011	ushort distxs[BufferSize / `2`];
2012	ushort distys[BufferSize / `2`];
2013
2014	while (length) {
2015	const int len = qMin(length, BufferSize / `2`);
2016	fetcher(buf1, buf2, distxs, distys, len, data->texture, fx, fy, fw, fdx, fdy, fdw);
2017
2018	layout->convertToARGB32PM(buf1, len * `2`, clut);
2019	layout->convertToARGB32PM(buf2, len * `2`, clut);
2020
2021	for (int i = `0`; i < len; ++i) {
2022	const int distx = distxs[i] >> `8`;
2023	const int disty = distys[i] >> `8`;
2024
2025	b[i] = interpolate_4_pixels(buf1 + i * `2`, buf2 + i * `2`, distx, disty);
2026	}
2027	length -= len;
2028	b += len;
2029	}
2030	}
2031
2032	return buffer;
2033	}
2034
2035	#if QT_CONFIG(raster_64bit)
2036	template<TextureBlendType blendType>
2037	static const QRgba64 QT_FASTCALL fetchTransformedBilinear64_uint32(QRgba64 buffer, const QSpanData *data,
2038	int y, int x, int length)
2039	{
2040	const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
2041	const QList<QRgb> *clut = data->texture.colorTable;
2042
2043	const qreal cx = x + qreal(`0.5`);
2044	const qreal cy = y + qreal(`0.5`);
2045
2046	uint sbuf1[BufferSize];
2047	uint sbuf2[BufferSize];
2048	alignas(`8`) QRgba64 buf1[BufferSize];
2049	alignas(`8`) QRgba64 buf2[BufferSize];
2050	QRgba64 *end = buffer + length;
2051	QRgba64 *b = buffer;
2052
2053	if (canUseFastMatrixPath(cx, cy, length, data)) {
2054	// The increment pr x in the scanline
2055	const int fdx = (int)(data->m11 * fixed_scale);
2056	const int fdy = (int)(data->m12 * fixed_scale);
2057
2058	int fx = int((data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale);
2059	int fy = int((data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale);
2060
2061	fx -= half_point;
2062	fy -= half_point;
2063
2064	const auto fetcher =
2065	(layout->bpp == QPixelLayout::BPP32)
2066	? fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPP32, uint>
2067	: fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPPNone, uint>;
2068
2069	if (fdy == `0`) { //simple scale, no rotation
2070	while (length) {
2071	int len = qMin(length, BufferSize / `2`);
2072	int disty = (fy & `0x0000ffff`);
2073	#if defined(__SSE2__)
2074	const __m128i vdy = _mm_set1_epi16(disty);
2075	const __m128i vidy = _mm_set1_epi16(`0x10000` - disty);
2076	#endif
2077	fetcher(sbuf1, sbuf2, len, data->texture, fx, fy, fdx, fdy);
2078
2079	layout->convertToRGBA64PM(buf1, sbuf1, len * `2`, clut, nullptr);
2080	if (disty)
2081	layout->convertToRGBA64PM(buf2, sbuf2, len * `2`, clut, nullptr);
2082
2083	for (int i = `0`; i < len; ++i) {
2084	int distx = (fx & `0x0000ffff`);
2085	#if defined(__SSE2__)
2086	__m128i vt = _mm_loadu_si128((const __m128i)(buf1 + i`2`));
2087	if (disty) {
2088	__m128i vb = _mm_loadu_si128((const __m128i)(buf2 + i`2`));
2089	vt = _mm_mulhi_epu16(vt, vidy);
2090	vb = _mm_mulhi_epu16(vb, vdy);
2091	vt = _mm_add_epi16(vt, vb);
2092	}
2093	if (distx) {
2094	const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(`0`, `0`, `0`, `0`));
2095	const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(`0x10000` - distx), _MM_SHUFFLE(`0`, `0`, `0`, `0`));
2096	vt = _mm_mulhi_epu16(vt, _mm_unpacklo_epi64(vidistx, vdistx));
2097	vt = _mm_add_epi16(vt, _mm_srli_si128(vt, `8`));
2098	}
2099	_mm_storel_epi64((__m128i*)(b+i), vt);
2100	#else
2101	b[i] = interpolate_4_pixels_rgb64(buf1 + i`2`, buf2 + i`2`, distx, disty);
2102	#endif
2103	fx += fdx;
2104	}
2105	length -= len;
2106	b += len;
2107	}
2108	} else { // rotation or shear
2109	while (b < end) {
2110	int len = qMin(length, BufferSize / `2`);
2111
2112	fetcher(sbuf1, sbuf2, len, data->texture, fx, fy, fdx, fdy);
2113
2114	layout->convertToRGBA64PM(buf1, sbuf1, len * `2`, clut, nullptr);
2115	layout->convertToRGBA64PM(buf2, sbuf2, len * `2`, clut, nullptr);
2116
2117	for (int i = `0`; i < len; ++i) {
2118	int distx = (fx & `0x0000ffff`);
2119	int disty = (fy & `0x0000ffff`);
2120	b[i] = interpolate_4_pixels_rgb64(buf1 + i`2`, buf2 + i`2`, distx, disty);
2121	fx += fdx;
2122	fy += fdy;
2123	}
2124
2125	length -= len;
2126	b += len;
2127	}
2128	}
2129	} else { // !(data->fast_matrix)
2130	const auto fetcher =
2131	(layout->bpp == QPixelLayout::BPP32)
2132	? fetchTransformedBilinear_slow_fetcher<blendType, QPixelLayout::BPP32, uint>
2133	: fetchTransformedBilinear_slow_fetcher<blendType, QPixelLayout::BPPNone, uint>;
2134
2135	const qreal fdx = data->m11;
2136	const qreal fdy = data->m12;
2137	const qreal fdw = data->m13;
2138
2139	qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
2140	qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
2141	qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
2142
2143	ushort distxs[BufferSize / `2`];
2144	ushort distys[BufferSize / `2`];
2145
2146	while (length) {
2147	const int len = qMin(length, BufferSize / `2`);
2148	fetcher(sbuf1, sbuf2, distxs, distys, len, data->texture, fx, fy, fw, fdx, fdy, fdw);
2149
2150	layout->convertToRGBA64PM(buf1, sbuf1, len * `2`, clut, nullptr);
2151	layout->convertToRGBA64PM(buf2, sbuf2, len * `2`, clut, nullptr);
2152
2153	for (int i = `0`; i < len; ++i) {
2154	const int distx = distxs[i];
2155	const int disty = distys[i];
2156	b[i] = interpolate_4_pixels_rgb64(buf1 + i`2`, buf2 + i`2`, distx, disty);
2157	}
2158
2159	length -= len;
2160	b += len;
2161	}
2162	}
2163	return buffer;
2164	}
2165
2166	template<TextureBlendType blendType>
2167	static const QRgba64 QT_FASTCALL fetchTransformedBilinear64_uint64(QRgba64 buffer, const QSpanData *data,
2168	int y, int x, int length)
2169	{
2170	Q_ASSERT(qPixelLayouts[data->texture.format].bpp == QPixelLayout::BPP64);
2171	const auto convert = (data->texture.format == QImage::Format_RGBA64) ? convertRGBA64ToRGBA64PM : convertRGBA64PMToRGBA64PM;
2172
2173	const qreal cx = x + qreal(`0.5`);
2174	const qreal cy = y + qreal(`0.5`);
2175
2176	alignas(`8`) QRgba64 buf1[BufferSize];
2177	alignas(`8`) QRgba64 buf2[BufferSize];
2178	QRgba64 *end = buffer + length;
2179	QRgba64 *b = buffer;
2180
2181	if (canUseFastMatrixPath(cx, cy, length, data)) {
2182	// The increment pr x in the scanline
2183	const int fdx = (int)(data->m11 * fixed_scale);
2184	const int fdy = (int)(data->m12 * fixed_scale);
2185
2186	int fx = int((data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale);
2187	int fy = int((data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale);
2188
2189	fx -= half_point;
2190	fy -= half_point;
2191	const auto fetcher = fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPP64, QRgba64>;
2192
2193	if (fdy == `0`) { //simple scale, no rotation
2194	while (length) {
2195	int len = qMin(length, BufferSize / `2`);
2196	int disty = (fy & `0x0000ffff`);
2197	#if defined(__SSE2__)
2198	const __m128i vdy = _mm_set1_epi16(disty);
2199	const __m128i vidy = _mm_set1_epi16(`0x10000` - disty);
2200	#endif
2201	fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, fdy);
2202
2203	convert(buf1, len * `2`);
2204	if (disty)
2205	convert(buf2, len * `2`);
2206
2207	for (int i = `0`; i < len; ++i) {
2208	int distx = (fx & `0x0000ffff`);
2209	#if defined(__SSE2__)
2210	__m128i vt = _mm_loadu_si128((const __m128i)(buf1 + i`2`));
2211	if (disty) {
2212	__m128i vb = _mm_loadu_si128((const __m128i)(buf2 + i`2`));
2213	vt = _mm_mulhi_epu16(vt, vidy);
2214	vb = _mm_mulhi_epu16(vb, vdy);
2215	vt = _mm_add_epi16(vt, vb);
2216	}
2217	if (distx) {
2218	const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(`0`, `0`, `0`, `0`));
2219	const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(`0x10000` - distx), _MM_SHUFFLE(`0`, `0`, `0`, `0`));
2220	vt = _mm_mulhi_epu16(vt, _mm_unpacklo_epi64(vidistx, vdistx));
2221	vt = _mm_add_epi16(vt, _mm_srli_si128(vt, `8`));
2222	}
2223	_mm_storel_epi64((__m128i*)(b+i), vt);
2224	#else
2225	b[i] = interpolate_4_pixels_rgb64(buf1 + i`2`, buf2 + i`2`, distx, disty);
2226	#endif
2227	fx += fdx;
2228	}
2229	length -= len;
2230	b += len;
2231	}
2232	} else { // rotation or shear
2233	while (b < end) {
2234	int len = qMin(length, BufferSize / `2`);
2235
2236	fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, fdy);
2237
2238	convert(buf1, len * `2`);
2239	convert(buf2, len * `2`);
2240
2241	for (int i = `0`; i < len; ++i) {
2242	int distx = (fx & `0x0000ffff`);
2243	int disty = (fy & `0x0000ffff`);
2244	b[i] = interpolate_4_pixels_rgb64(buf1 + i`2`, buf2 + i`2`, distx, disty);
2245	fx += fdx;
2246	fy += fdy;
2247	}
2248
2249	length -= len;
2250	b += len;
2251	}
2252	}
2253	} else { // !(data->fast_matrix)
2254	const auto fetcher = fetchTransformedBilinear_slow_fetcher<blendType, QPixelLayout::BPP64, QRgba64>;
2255
2256	const qreal fdx = data->m11;
2257	const qreal fdy = data->m12;
2258	const qreal fdw = data->m13;
2259
2260	qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
2261	qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
2262	qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
2263
2264	ushort distxs[BufferSize / `2`];
2265	ushort distys[BufferSize / `2`];
2266
2267	while (length) {
2268	const int len = qMin(length, BufferSize / `2`);
2269	fetcher(buf1, buf2, distxs, distys, len, data->texture, fx, fy, fw, fdx, fdy, fdw);
2270
2271	convert(buf1, len * `2`);
2272	convert(buf2, len * `2`);
2273
2274	for (int i = `0`; i < len; ++i) {
2275	const int distx = distxs[i];
2276	const int disty = distys[i];
2277	b[i] = interpolate_4_pixels_rgb64(buf1 + i`2`, buf2 + i`2`, distx, disty);
2278	}
2279
2280	length -= len;
2281	b += len;
2282	}
2283	}
2284	return buffer;
2285	}
2286
2287	template<TextureBlendType blendType>
2288	static const QRgba64 QT_FASTCALL fetchTransformedBilinear64(QRgba64 buffer, const Operator *,
2289	const QSpanData data, int* y, int x, int length)
2290	{
2291	if (qPixelLayouts[data->texture.format].bpp == QPixelLayout::BPP64)
2292	return fetchTransformedBilinear64_uint64<blendType>(buffer, data, y, x, length);
2293	return fetchTransformedBilinear64_uint32<blendType>(buffer, data, y, x, length);
2294	}
2295	#endif
2296
2297	// FetchUntransformed can have more specialized methods added depending on SIMD features.
2298	static SourceFetchProc sourceFetchUntransformed[QImage::NImageFormats] = {
2299	nullptr, // Invalid
2300	fetchUntransformed, // Mono
2301	fetchUntransformed, // MonoLsb
2302	fetchUntransformed, // Indexed8
2303	fetchUntransformedARGB32PM, // RGB32
2304	fetchUntransformed, // ARGB32
2305	fetchUntransformedARGB32PM, // ARGB32_Premultiplied
2306	fetchUntransformedRGB16, // RGB16
2307	fetchUntransformed, // ARGB8565_Premultiplied
2308	fetchUntransformed, // RGB666
2309	fetchUntransformed, // ARGB6666_Premultiplied
2310	fetchUntransformed, // RGB555
2311	fetchUntransformed, // ARGB8555_Premultiplied
2312	fetchUntransformed, // RGB888
2313	fetchUntransformed, // RGB444
2314	fetchUntransformed, // ARGB4444_Premultiplied
2315	fetchUntransformed, // RGBX8888
2316	fetchUntransformed, // RGBA8888
2317	fetchUntransformed, // RGBA8888_Premultiplied
2318	fetchUntransformed, // Format_BGR30
2319	fetchUntransformed, // Format_A2BGR30_Premultiplied
2320	fetchUntransformed, // Format_RGB30
2321	fetchUntransformed, // Format_A2RGB30_Premultiplied
2322	fetchUntransformed, // Alpha8
2323	fetchUntransformed, // Grayscale8
2324	fetchUntransformed, // RGBX64
2325	fetchUntransformed, // RGBA64
2326	fetchUntransformed, // RGBA64_Premultiplied
2327	fetchUntransformed, // Grayscale16
2328	fetchUntransformed, // BGR888
2329	};
2330
2331	static const SourceFetchProc sourceFetchGeneric[NBlendTypes] = {
2332	fetchUntransformed, // Untransformed
2333	fetchUntransformed, // Tiled
2334	fetchTransformed<BlendTransformed, QPixelLayout::BPPNone>, // Transformed
2335	fetchTransformed<BlendTransformedTiled, QPixelLayout::BPPNone>, // TransformedTiled
2336	fetchTransformedBilinear<BlendTransformedBilinear, QPixelLayout::BPPNone>, // TransformedBilinear
2337	fetchTransformedBilinear<BlendTransformedBilinearTiled, QPixelLayout::BPPNone> // TransformedBilinearTiled
2338	};
2339
2340	static SourceFetchProc sourceFetchARGB32PM[NBlendTypes] = {
2341	fetchUntransformedARGB32PM, // Untransformed
2342	fetchUntransformedARGB32PM, // Tiled
2343	fetchTransformed<BlendTransformed, QPixelLayout::BPP32>, // Transformed
2344	fetchTransformed<BlendTransformedTiled, QPixelLayout::BPP32>, // TransformedTiled
2345	fetchTransformedBilinearARGB32PM<BlendTransformedBilinear>, // Bilinear
2346	fetchTransformedBilinearARGB32PM<BlendTransformedBilinearTiled> // BilinearTiled
2347	};
2348
2349	static SourceFetchProc sourceFetchAny16[NBlendTypes] = {
2350	fetchUntransformed, // Untransformed
2351	fetchUntransformed, // Tiled
2352	fetchTransformed<BlendTransformed, QPixelLayout::BPP16>, // Transformed
2353	fetchTransformed<BlendTransformedTiled, QPixelLayout::BPP16>, // TransformedTiled
2354	fetchTransformedBilinear<BlendTransformedBilinear, QPixelLayout::BPP16>, // TransformedBilinear
2355	fetchTransformedBilinear<BlendTransformedBilinearTiled, QPixelLayout::BPP16> // TransformedBilinearTiled
2356	};
2357
2358	static SourceFetchProc sourceFetchAny32[NBlendTypes] = {
2359	fetchUntransformed, // Untransformed
2360	fetchUntransformed, // Tiled
2361	fetchTransformed<BlendTransformed, QPixelLayout::BPP32>, // Transformed
2362	fetchTransformed<BlendTransformedTiled, QPixelLayout::BPP32>, // TransformedTiled
2363	fetchTransformedBilinear<BlendTransformedBilinear, QPixelLayout::BPP32>, // TransformedBilinear
2364	fetchTransformedBilinear<BlendTransformedBilinearTiled, QPixelLayout::BPP32> // TransformedBilinearTiled
2365	};
2366
2367	static inline SourceFetchProc getSourceFetch(TextureBlendType blendType, QImage::Format format)
2368	{
2369	if (format == QImage::Format_RGB32 \|\| format == QImage::Format_ARGB32_Premultiplied)
2370	return sourceFetchARGB32PM[blendType];
2371	if (blendType == BlendUntransformed \|\| blendType == BlendTiled)
2372	return sourceFetchUntransformed[format];
2373	if (qPixelLayouts[format].bpp == QPixelLayout::BPP16)
2374	return sourceFetchAny16[blendType];
2375	if (qPixelLayouts[format].bpp == QPixelLayout::BPP32)
2376	return sourceFetchAny32[blendType];
2377	return sourceFetchGeneric[blendType];
2378	}
2379
2380	#if QT_CONFIG(raster_64bit)
2381	static const SourceFetchProc64 sourceFetchGeneric64[NBlendTypes] = {
2382	fetchUntransformed64, // Untransformed
2383	fetchUntransformed64, // Tiled
2384	fetchTransformed64<BlendTransformed>, // Transformed
2385	fetchTransformed64<BlendTransformedTiled>, // TransformedTiled
2386	fetchTransformedBilinear64<BlendTransformedBilinear>, // Bilinear
2387	fetchTransformedBilinear64<BlendTransformedBilinearTiled> // BilinearTiled
2388	};
2389
2390	static const SourceFetchProc64 sourceFetchRGBA64PM[NBlendTypes] = {
2391	fetchUntransformedRGBA64PM, // Untransformed
2392	fetchUntransformedRGBA64PM, // Tiled
2393	fetchTransformed64<BlendTransformed>, // Transformed
2394	fetchTransformed64<BlendTransformedTiled>, // TransformedTiled
2395	fetchTransformedBilinear64<BlendTransformedBilinear>, // Bilinear
2396	fetchTransformedBilinear64<BlendTransformedBilinearTiled> // BilinearTiled
2397	};
2398
2399	static inline SourceFetchProc64 getSourceFetch64(TextureBlendType blendType, QImage::Format format)
2400	{
2401	if (format == QImage::Format_RGBX64 \|\| format == QImage::Format_RGBA64_Premultiplied)
2402	return sourceFetchRGBA64PM[blendType];
2403	return sourceFetchGeneric64[blendType];
2404	}
2405	#endif
2406
2407
2408	#define FIXPT_BITS 8
2409	#define FIXPT_SIZE (1<<FIXPT_BITS)
2410
2411	static uint qt_gradient_pixel_fixed(const QGradientData data, int* fixed_pos)
2412	{
2413	int ipos = (fixed_pos + (FIXPT_SIZE / `2`)) >> FIXPT_BITS;
2414	return data->colorTable32[qt_gradient_clamp(data, ipos)];
2415	}
2416
2417	#if QT_CONFIG(raster_64bit)
2418	static const QRgba64& qt_gradient_pixel64_fixed(const QGradientData data, int* fixed_pos)
2419	{
2420	int ipos = (fixed_pos + (FIXPT_SIZE / `2`)) >> FIXPT_BITS;
2421	return data->colorTable64[qt_gradient_clamp(data, ipos)];
2422	}
2423	#endif
2424
2425	static void QT_FASTCALL getLinearGradientValues(LinearGradientValues v, const* QSpanData *data)
2426	{
2427	v->dx = data->gradient.linear.end.x - data->gradient.linear.origin.x;
2428	v->dy = data->gradient.linear.end.y - data->gradient.linear.origin.y;
2429	v->l = v->dx * v->dx + v->dy * v->dy;
2430	v->off = `0`;
2431	if (v->l != `0`) {
2432	v->dx /= v->l;
2433	v->dy /= v->l;
2434	v->off = -v->dx * data->gradient.linear.origin.x - v->dy * data->gradient.linear.origin.y;
2435	}
2436	}
2437
2438	class GradientBase32
2439	{
2440	public:
2441	typedef uint Type;
2442	static Type null() { return `0`; }
2443	static Type fetchSingle(const QGradientData& gradient, qreal v)
2444	{
2445	return qt_gradient_pixel(&gradient, v);
2446	}
2447	static Type fetchSingle(const QGradientData& gradient, int v)
2448	{
2449	return qt_gradient_pixel_fixed(&gradient, v);
2450	}
2451	static void memfill(Type buffer, Type fill, int* length)
2452	{
2453	qt_memfill32(buffer, fill, length);
2454	}
2455	};
2456
2457	#if QT_CONFIG(raster_64bit)
2458	class GradientBase64
2459	{
2460	public:
2461	typedef QRgba64 Type;
2462	static Type null() { return QRgba64::fromRgba64(`0`); }
2463	static Type fetchSingle(const QGradientData& gradient, qreal v)
2464	{
2465	return qt_gradient_pixel64(&gradient, v);
2466	}
2467	static Type fetchSingle(const QGradientData& gradient, int v)
2468	{
2469	return qt_gradient_pixel64_fixed(&gradient, v);
2470	}
2471	static void memfill(Type buffer, Type fill, int* length)
2472	{
2473	qt_memfill64((quint64*)buffer, fill, length);
2474	}
2475	};
2476	#endif
2477
2478	template<class GradientBase, typename BlendType>
2479	static inline const BlendType * QT_FASTCALL qt_fetch_linear_gradient_template(
2480	BlendType buffer, const* Operator op, const* QSpanData *data,
2481	int y, int x, int length)
2482	{
2483	const BlendType *b = buffer;
2484	qreal t, inc;
2485
2486	bool affine = true;
2487	qreal rx=`0`, ry=`0`;
2488	if (op->linear.l == `0`) {
2489	t = inc = `0`;
2490	} else {
2491	rx = data->m21 * (y + qreal(`0.5`)) + data->m11 * (x + qreal(`0.5`)) + data->dx;
2492	ry = data->m22 * (y + qreal(`0.5`)) + data->m12 * (x + qreal(`0.5`)) + data->dy;
2493	t = op->linear.dxrx + op->linear.dyry + op->linear.off;
2494	inc = op->linear.dx * data->m11 + op->linear.dy * data->m12;
2495	affine = !data->m13 && !data->m23;
2496
2497	if (affine) {
2498	t *= (GRADIENT_STOPTABLE_SIZE - `1`);
2499	inc *= (GRADIENT_STOPTABLE_SIZE - `1`);
2500	}
2501	}
2502
2503	const BlendType *end = buffer + length;
2504	if (affine) {
2505	if (inc > qreal(-`1e-5`) && inc < qreal(`1e-5`)) {
2506	GradientBase::memfill(buffer, GradientBase::fetchSingle(data->gradient, int(t * FIXPT_SIZE)), length);
2507	} else {
2508	if (t+inc*length < qreal(INT_MAX >> (FIXPT_BITS + `1`)) &&
2509	t+inc*length > qreal(INT_MIN >> (FIXPT_BITS + `1`))) {
2510	// we can use fixed point math
2511	int t_fixed = int(t * FIXPT_SIZE);
2512	int inc_fixed = int(inc * FIXPT_SIZE);
2513	while (buffer < end) {
2514	*buffer = GradientBase::fetchSingle(data->gradient, t_fixed);
2515	t_fixed += inc_fixed;
2516	++buffer;
2517	}
2518	} else {
2519	// we have to fall back to float math
2520	while (buffer < end) {
2521	*buffer = GradientBase::fetchSingle(data->gradient, t/GRADIENT_STOPTABLE_SIZE);
2522	t += inc;
2523	++buffer;
2524	}
2525	}
2526	}
2527	} else { // fall back to float math here as well
2528	qreal rw = data->m23 * (y + qreal(`0.5`)) + data->m13 * (x + qreal(`0.5`)) + data->m33;
2529	while (buffer < end) {
2530	qreal x = rx/rw;
2531	qreal y = ry/rw;
2532	t = (op->linear.dxx + op->linear.dy y) + op->linear.off;
2533
2534	*buffer = GradientBase::fetchSingle(data->gradient, t);
2535	rx += data->m11;
2536	ry += data->m12;
2537	rw += data->m13;
2538	if (!rw) {
2539	rw += data->m13;
2540	}
2541	++buffer;
2542	}
2543	}
2544
2545	return b;
2546	}
2547
2548	static const uint * QT_FASTCALL qt_fetch_linear_gradient(uint buffer, const* Operator op, const* QSpanData *data,
2549	int y, int x, int length)
2550	{
2551	return qt_fetch_linear_gradient_template<GradientBase32, uint>(buffer, op, data, y, x, length);
2552	}
2553
2554	#if QT_CONFIG(raster_64bit)
2555	static const QRgba64 * QT_FASTCALL qt_fetch_linear_gradient_rgb64(QRgba64 buffer, const* Operator op, const* QSpanData *data,
2556	int y, int x, int length)
2557	{
2558	return qt_fetch_linear_gradient_template<GradientBase64, QRgba64>(buffer, op, data, y, x, length);
2559	}
2560	#endif
2561
2562	static void QT_FASTCALL getRadialGradientValues(RadialGradientValues v, const* QSpanData *data)
2563	{
2564	v->dx = data->gradient.radial.center.x - data->gradient.radial.focal.x;
2565	v->dy = data->gradient.radial.center.y - data->gradient.radial.focal.y;
2566
2567	v->dr = data->gradient.radial.center.radius - data->gradient.radial.focal.radius;
2568	v->sqrfr = data->gradient.radial.focal.radius * data->gradient.radial.focal.radius;
2569
2570	v->a = v->dr * v->dr - v->dxv->dx - v->dyv->dy;
2571	v->inv2a = `1` / (`2` * v->a);
2572
2573	v->extended = !qFuzzyIsNull(data->gradient.radial.focal.radius) \|\| v->a <= `0`;
2574	}
2575
2576	template <class GradientBase>
2577	class RadialFetchPlain : public GradientBase
2578	{
2579	public:
2580	typedef typename GradientBase::Type BlendType;
2581	static void fetch(BlendType buffer, BlendType end,
2582	const Operator op, const* QSpanData *data, qreal det,
2583	qreal delta_det, qreal delta_delta_det, qreal b, qreal delta_b)
2584	{
2585	if (op->radial.extended) {
2586	while (buffer < end) {
2587	BlendType result = GradientBase::null();
2588	if (det >= `0`) {
2589	qreal w = qSqrt(det) - b;
2590	if (data->gradient.radial.focal.radius + op->radial.dr * w >= `0`)
2591	result = GradientBase::fetchSingle(data->gradient, w);
2592	}
2593
2594	*buffer = result;
2595
2596	det += delta_det;
2597	delta_det += delta_delta_det;
2598	b += delta_b;
2599
2600	++buffer;
2601	}
2602	} else {
2603	while (buffer < end) {
2604	*buffer++ = GradientBase::fetchSingle(data->gradient, qSqrt(det) - b);
2605
2606	det += delta_det;
2607	delta_det += delta_delta_det;
2608	b += delta_b;
2609	}
2610	}
2611	}
2612	};
2613
2614	const uint * QT_FASTCALL qt_fetch_radial_gradient_plain(uint buffer, const* Operator op, const* QSpanData *data,
2615	int y, int x, int length)
2616	{
2617	return qt_fetch_radial_gradient_template<RadialFetchPlain<GradientBase32>, uint>(buffer, op, data, y, x, length);
2618	}
2619
2620	static SourceFetchProc qt_fetch_radial_gradient = qt_fetch_radial_gradient_plain;
2621
2622	#if QT_CONFIG(raster_64bit)
2623	const QRgba64 * QT_FASTCALL qt_fetch_radial_gradient_rgb64(QRgba64 buffer, const* Operator op, const* QSpanData *data,
2624	int y, int x, int length)
2625	{
2626	return qt_fetch_radial_gradient_template<RadialFetchPlain<GradientBase64>, QRgba64>(buffer, op, data, y, x, length);
2627	}
2628	#endif
2629
2630	template <class GradientBase, typename BlendType>
2631	static inline const BlendType * QT_FASTCALL qt_fetch_conical_gradient_template(
2632	BlendType buffer, const* QSpanData *data,
2633	int y, int x, int length)
2634	{
2635	const BlendType *b = buffer;
2636	qreal rx = data->m21 * (y + qreal(`0.5`))
2637	+ data->dx + data->m11 * (x + qreal(`0.5`));
2638	qreal ry = data->m22 * (y + qreal(`0.5`))
2639	+ data->dy + data->m12 * (x + qreal(`0.5`));
2640	bool affine = !data->m13 && !data->m23;
2641
2642	const qreal inv2pi = M_1_PI / `2.0`;
2643
2644	const BlendType *end = buffer + length;
2645	if (affine) {
2646	rx -= data->gradient.conical.center.x;
2647	ry -= data->gradient.conical.center.y;
2648	while (buffer < end) {
2649	qreal angle = qAtan2(ry, rx) + data->gradient.conical.angle;
2650
2651	buffer = GradientBase::fetchSingle(data->gradient, `1` - angle inv2pi);
2652
2653	rx += data->m11;
2654	ry += data->m12;
2655	++buffer;
2656	}
2657	} else {
2658	qreal rw = data->m23 * (y + qreal(`0.5`))
2659	+ data->m33 + data->m13 * (x + qreal(`0.5`));
2660	if (!rw)
2661	rw = `1`;
2662	while (buffer < end) {
2663	qreal angle = qAtan2(ry/rw - data->gradient.conical.center.x,
2664	rx/rw - data->gradient.conical.center.y)
2665	+ data->gradient.conical.angle;
2666
2667	buffer = GradientBase::fetchSingle(data->gradient, `1` - angle inv2pi);
2668
2669	rx += data->m11;
2670	ry += data->m12;
2671	rw += data->m13;
2672	if (!rw) {
2673	rw += data->m13;
2674	}
2675	++buffer;
2676	}
2677	}
2678	return b;
2679	}
2680
2681	static const uint * QT_FASTCALL qt_fetch_conical_gradient(uint buffer, const* Operator , const* QSpanData *data,
2682	int y, int x, int length)
2683	{
2684	return qt_fetch_conical_gradient_template<GradientBase32, uint>(buffer, data, y, x, length);
2685	}
2686
2687	#if QT_CONFIG(raster_64bit)
2688	static const QRgba64 * QT_FASTCALL qt_fetch_conical_gradient_rgb64(QRgba64 buffer, const* Operator , const* QSpanData *data,
2689	int y, int x, int length)
2690	{
2691	return qt_fetch_conical_gradient_template<GradientBase64, QRgba64>(buffer, data, y, x, length);
2692	}
2693	#endif
2694
2695	extern CompositionFunctionSolid qt_functionForModeSolid_C[];
2696	extern CompositionFunctionSolid64 qt_functionForModeSolid64_C[];
2697
2698	static const CompositionFunctionSolid *functionForModeSolid = qt_functionForModeSolid_C;
2699	#if QT_CONFIG(raster_64bit)
2700	static const CompositionFunctionSolid64 *functionForModeSolid64 = qt_functionForModeSolid64_C;
2701	#endif
2702
2703	extern CompositionFunction qt_functionForMode_C[];
2704	extern CompositionFunction64 qt_functionForMode64_C[];
2705
2706	static const CompositionFunction *functionForMode = qt_functionForMode_C;
2707	#if QT_CONFIG(raster_64bit)
2708	static const CompositionFunction64 *functionForMode64 = qt_functionForMode64_C;
2709	#endif
2710
2711	static TextureBlendType getBlendType(const QSpanData *data)
2712	{
2713	TextureBlendType ft;
2714	if (data->txop <= QTransform::TxTranslate)
2715	if (data->texture.type == QTextureData::Tiled)
2716	ft = BlendTiled;
2717	else
2718	ft = BlendUntransformed;
2719	else if (data->bilinear)
2720	if (data->texture.type == QTextureData::Tiled)
2721	ft = BlendTransformedBilinearTiled;
2722	else
2723	ft = BlendTransformedBilinear;
2724	else
2725	if (data->texture.type == QTextureData::Tiled)
2726	ft = BlendTransformedTiled;
2727	else
2728	ft = BlendTransformed;
2729	return ft;
2730	}
2731
2732	static inline Operator getOperator(const QSpanData data, const* QSpan spans, int* spanCount)
2733	{
2734	Operator op;
2735	bool solidSource = false;
2736
2737	switch(data->type) {
2738	case QSpanData::Solid:
2739	solidSource = data->solidColor.isOpaque();
2740	op.srcFetch = nullptr;
2741	#if QT_CONFIG(raster_64bit)
2742	op.srcFetch64 = nullptr;
2743	#endif
2744	break;
2745	case QSpanData::LinearGradient:
2746	solidSource = !data->gradient.alphaColor;
2747	getLinearGradientValues(&op.linear, data);
2748	op.srcFetch = qt_fetch_linear_gradient;
2749	#if QT_CONFIG(raster_64bit)
2750	op.srcFetch64 = qt_fetch_linear_gradient_rgb64;
2751	#endif
2752	break;
2753	case QSpanData::RadialGradient:
2754	solidSource = !data->gradient.alphaColor;
2755	getRadialGradientValues(&op.radial, data);
2756	op.srcFetch = qt_fetch_radial_gradient;
2757	#if QT_CONFIG(raster_64bit)
2758	op.srcFetch64 = qt_fetch_radial_gradient_rgb64;
2759	#endif
2760	break;
2761	case QSpanData::ConicalGradient:
2762	solidSource = !data->gradient.alphaColor;
2763	op.srcFetch = qt_fetch_conical_gradient;
2764	#if QT_CONFIG(raster_64bit)
2765	op.srcFetch64 = qt_fetch_conical_gradient_rgb64;
2766	#endif
2767	break;
2768	case QSpanData::Texture:
2769	solidSource = !data->texture.hasAlpha;
2770	op.srcFetch = getSourceFetch(getBlendType(data), data->texture.format);
2771	#if QT_CONFIG(raster_64bit)
2772	op.srcFetch64 = getSourceFetch64(getBlendType(data), data->texture.format);;
2773	#endif
2774	break;
2775	default:
2776	Q_UNREACHABLE();
2777	break;
2778	}
2779	#if !QT_CONFIG(raster_64bit)
2780	op.srcFetch64 = `0`;
2781	#endif
2782
2783	op.mode = data->rasterBuffer->compositionMode;
2784	if (op.mode == QPainter::CompositionMode_SourceOver && solidSource)
2785	op.mode = QPainter::CompositionMode_Source;
2786
2787	op.destFetch = destFetchProc[data->rasterBuffer->format];
2788	#if QT_CONFIG(raster_64bit)
2789	op.destFetch64 = destFetchProc64[data->rasterBuffer->format];
2790	#else
2791	op.destFetch64 = `0`;
2792	#endif
2793	if (op.mode == QPainter::CompositionMode_Source &&
2794	(data->type != QSpanData::Texture \|\| data->texture.const_alpha == `256`)) {
2795	const QSpan *lastSpan = spans + spanCount;
2796	bool alphaSpans = false;
2797	while (spans < lastSpan) {
2798	if (spans->coverage != `255`) {
2799	alphaSpans = true;
2800	break;
2801	}
2802	++spans;
2803	}
2804	if (!alphaSpans && spanCount > `0`) {
2805	// If all spans are opaque we do not need to fetch dest.
2806	// But don't clear passthrough destFetch as they are just as fast and save destStore.
2807	if (op.destFetch != destFetchARGB32P)
2808	op.destFetch = destFetchUndefined;
2809	#if QT_CONFIG(raster_64bit)
2810	if (op.destFetch64 != destFetchRGB64)
2811	op.destFetch64 = destFetch64Undefined;
2812	#endif
2813	}
2814	}
2815
2816	op.destStore = destStoreProc[data->rasterBuffer->format];
2817	op.funcSolid = functionForModeSolid[op.mode];
2818	op.func = functionForMode[op.mode];
2819	#if QT_CONFIG(raster_64bit)
2820	op.destStore64 = destStoreProc64[data->rasterBuffer->format];
2821	op.funcSolid64 = functionForModeSolid64[op.mode];
2822	op.func64 = functionForMode64[op.mode];
2823	#else
2824	op.destStore64 = `0`;
2825	op.funcSolid64 = `0`;
2826	op.func64 = `0`;
2827	#endif
2828
2829	return op;
2830	}
2831
2832	static void spanfill_from_first(QRasterBuffer rasterBuffer, QPixelLayout::BPP bpp, int* x, int y, int length)
2833	{
2834	switch (bpp) {
2835	case QPixelLayout::BPP64: {
2836	quint64 dest = reinterpret_cast<quint64 >(rasterBuffer->scanLine(y)) + x;
2837	qt_memfill_template(dest + `1`, dest[`0`], length - `1`);
2838	break;
2839	}
2840	case QPixelLayout::BPP32: {
2841	quint32 dest = reinterpret_cast<quint32 >(rasterBuffer->scanLine(y)) + x;
2842	qt_memfill_template(dest + `1`, dest[`0`], length - `1`);
2843	break;
2844	}
2845	case QPixelLayout::BPP24: {
2846	quint24 dest = reinterpret_cast<quint24 >(rasterBuffer->scanLine(y)) + x;
2847	qt_memfill_template(dest + `1`, dest[`0`], length - `1`);
2848	break;
2849	}
2850	case QPixelLayout::BPP16: {
2851	quint16 dest = reinterpret_cast<quint16 >(rasterBuffer->scanLine(y)) + x;
2852	qt_memfill_template(dest + `1`, dest[`0`], length - `1`);
2853	break;
2854	}
2855	case QPixelLayout::BPP8: {
2856	uchar *dest = rasterBuffer->scanLine(y) + x;
2857	memset(dest + `1`, dest[`0`], length - `1`);
2858	break;
2859	}
2860	default:
2861	Q_UNREACHABLE();
2862	}
2863	}
2864
2865
2866	// -------------------- blend methods ---------------------
2867
2868	static void blend_color_generic(int count, const QSpan spans, void* *userData)
2869	{
2870	QSpanData data = reinterpret_cast<QSpanData >(userData);
2871	uint buffer[BufferSize];
2872	Operator op = getOperator(data, nullptr, `0`);
2873	const uint color = data->solidColor.toArgb32();
2874	const bool solidFill = op.mode == QPainter::CompositionMode_Source;
2875	const QPixelLayout::BPP bpp = qPixelLayouts[data->rasterBuffer->format].bpp;
2876
2877	while (count--) {
2878	int x = spans->x;
2879	int length = spans->len;
2880	if (solidFill && bpp >= QPixelLayout::BPP8 && spans->coverage == `255` && length) {
2881	// If dest doesn't matter we don't need to bother with blending or converting all the identical pixels
2882	op.destStore(data->rasterBuffer, x, spans->y, &color, `1`);
2883	spanfill_from_first(data->rasterBuffer, bpp, x, spans->y, length);
2884	length = `0`;
2885	}
2886
2887	while (length) {
2888	int l = qMin(BufferSize, length);
2889	uint *dest = op.destFetch(buffer, data->rasterBuffer, x, spans->y, l);
2890	op.funcSolid(dest, l, color, spans->coverage);
2891	if (op.destStore)
2892	op.destStore(data->rasterBuffer, x, spans->y, dest, l);
2893	length -= l;
2894	x += l;
2895	}
2896	++spans;
2897	}
2898	}
2899
2900	static void blend_color_argb(int count, const QSpan spans, void* *userData)
2901	{
2902	QSpanData data = reinterpret_cast<QSpanData >(userData);
2903
2904	const Operator op = getOperator(data, nullptr, `0`);
2905	const uint color = data->solidColor.toArgb32();
2906
2907	if (op.mode == QPainter::CompositionMode_Source) {
2908	// inline for performance
2909	while (count--) {
2910	uint target = ((uint )data->rasterBuffer->scanLine(spans->y)) + spans->x;
2911	if (spans->coverage == `255`) {
2912	qt_memfill(target, color, spans->len);
2913	#ifdef __SSE2__
2914	} else if (spans->len > `16`) {
2915	op.funcSolid(target, spans->len, color, spans->coverage);
2916	#endif
2917	} else {
2918	uint c = BYTE_MUL(color, spans->coverage);
2919	int ialpha = `255` - spans->coverage;
2920	for (int i = `0`; i < spans->len; ++i)
2921	target[i] = c + BYTE_MUL(target[i], ialpha);
2922	}
2923	++spans;
2924	}
2925	return;
2926	}
2927
2928	while (count--) {
2929	uint target = ((uint )data->rasterBuffer->scanLine(spans->y)) + spans->x;
2930	op.funcSolid(target, spans->len, color, spans->coverage);
2931	++spans;
2932	}
2933	}
2934
2935	void blend_color_generic_rgb64(int count, const QSpan spans, void* *userData)
2936	{
2937	#if QT_CONFIG(raster_64bit)
2938	QSpanData data = reinterpret_cast<QSpanData >(userData);
2939	Operator op = getOperator(data, nullptr, `0`);
2940	if (!op.funcSolid64) {
2941	qCDebug(lcQtGuiDrawHelper, "blend_color_generic_rgb64: unsupported 64bit blend attempted, falling back to 32-bit");
2942	return blend_color_generic(count, spans, userData);
2943	}
2944
2945	alignas(`8`) QRgba64 buffer[BufferSize];
2946	const QRgba64 color = data->solidColor;
2947	const bool solidFill = op.mode == QPainter::CompositionMode_Source;
2948	const QPixelLayout::BPP bpp = qPixelLayouts[data->rasterBuffer->format].bpp;
2949
2950	while (count--) {
2951	int x = spans->x;
2952	int length = spans->len;
2953	if (solidFill && bpp >= QPixelLayout::BPP8 && spans->coverage == `255` && length && op.destStore64) {
2954	// If dest doesn't matter we don't need to bother with blending or converting all the identical pixels
2955	op.destStore64(data->rasterBuffer, x, spans->y, &color, `1`);
2956	spanfill_from_first(data->rasterBuffer, bpp, x, spans->y, length);
2957	length = `0`;
2958	}
2959
2960	while (length) {
2961	int l = qMin(BufferSize, length);
2962	QRgba64 *dest = op.destFetch64(buffer, data->rasterBuffer, x, spans->y, l);
2963	op.funcSolid64(dest, l, color, spans->coverage);
2964	if (op.destStore64)
2965	op.destStore64(data->rasterBuffer, x, spans->y, dest, l);
2966	length -= l;
2967	x += l;
2968	}
2969	++spans;
2970	}
2971	#else
2972	blend_color_generic(count, spans, userData);
2973	#endif
2974	}
2975
2976	static void blend_color_rgb16(int count, const QSpan spans, void* *userData)
2977	{
2978	QSpanData data = reinterpret_cast<QSpanData >(userData);
2979
2980	/*
2981	We duplicate a little logic from getOperator() and calculate the
2982	composition mode directly. This allows blend_color_rgb16 to be used
2983	from qt_gradient_quint16 with minimal overhead.
2984	*/
2985	QPainter::CompositionMode mode = data->rasterBuffer->compositionMode;
2986	if (mode == QPainter::CompositionMode_SourceOver && data->solidColor.isOpaque())
2987	mode = QPainter::CompositionMode_Source;
2988
2989	if (mode == QPainter::CompositionMode_Source) {
2990	// inline for performance
2991	ushort c = data->solidColor.toRgb16();
2992	for (; count--; spans++) {
2993	if (!spans->len)
2994	continue;
2995	ushort target = ((ushort )data->rasterBuffer->scanLine(spans->y)) + spans->x;
2996	if (spans->coverage == `255`) {
2997	qt_memfill(target, c, spans->len);
2998	} else {
2999	ushort color = BYTE_MUL_RGB16(c, spans->coverage);
3000	int ialpha = `255` - spans->coverage;
3001	const ushort *end = target + spans->len;
3002	while (target < end) {
3003	target = color + BYTE_MUL_RGB16(target, ialpha);
3004	++target;
3005	}
3006	}
3007	}
3008	return;
3009	}
3010
3011	if (mode == QPainter::CompositionMode_SourceOver) {
3012	for (; count--; spans++) {
3013	if (!spans->len)
3014	continue;
3015	uint color = BYTE_MUL(data->solidColor.toArgb32(), spans->coverage);
3016	int ialpha = qAlpha(~color);
3017	ushort c = qConvertRgb32To16(color);
3018	ushort target = ((ushort )data->rasterBuffer->scanLine(spans->y)) + spans->x;
3019	int len = spans->len;
3020	bool pre = (((quintptr)target) & `0x3`) != `0`;
3021	bool post = false;
3022	if (pre) {
3023	// skip to word boundary
3024	target = c + BYTE_MUL_RGB16(target, ialpha);
3025	++target;
3026	--len;
3027	}
3028	if (len & `0x1`) {
3029	post = true;
3030	--len;
3031	}
3032	uint target32 = (uint)target;
3033	uint c32 = c \| (c<<`16`);
3034	len >>= `1`;
3035	uint salpha = (ialpha+`1`) >> `3`; // calculate here rather than in loop
3036	while (len--) {
3037	// blend full words
3038	target32 = c32 + BYTE_MUL_RGB16_32(target32, salpha);
3039	++target32;
3040	target += `2`;
3041	}
3042	if (post) {
3043	// one last pixel beyond a full word
3044	target = c + BYTE_MUL_RGB16(target, ialpha);
3045	}
3046	}
3047	return;
3048	}
3049
3050	blend_color_generic(count, spans, userData);
3051	}
3052
3053	template <typename T>
3054	void handleSpans(int count, const QSpan spans, const* QSpanData *data, T &handler)
3055	{
3056	uint const_alpha = `256`;
3057	if (data->type == QSpanData::Texture)
3058	const_alpha = data->texture.const_alpha;
3059
3060	int coverage = `0`;
3061	while (count) {
3062	if (!spans->len) {
3063	++spans;
3064	--count;
3065	continue;
3066	}
3067	int x = spans->x;
3068	const int y = spans->y;
3069	int right = x + spans->len;
3070
3071	// compute length of adjacent spans
3072	for (int i = `1`; i < count && spans[i].y == y && spans[i].x == right; ++i)
3073	right += spans[i].len;
3074	int length = right - x;
3075
3076	while (length) {
3077	int l = qMin(BufferSize, length);
3078	length -= l;
3079
3080	int process_length = l;
3081	int process_x = x;
3082
3083	const typename T::BlendType *src = handler.fetch(process_x, y, process_length);
3084	int offset = `0`;
3085	while (l > `0`) {
3086	if (x == spans->x) // new span?
3087	coverage = (spans->coverage * const_alpha) >> `8`;
3088
3089	int right = spans->x + spans->len;
3090	int len = qMin(l, right - x);
3091
3092	handler.process(x, y, len, coverage, src, offset);
3093
3094	l -= len;
3095	x += len;
3096	offset += len;
3097
3098	if (x == right) { // done with current span?
3099	++spans;
3100	--count;
3101	}
3102	}
3103	handler.store(process_x, y, process_length);
3104	}
3105	}
3106	}
3107
3108	template<typename T>
3109	struct QBlendBase
3110	{
3111	typedef T BlendType;
3112	QBlendBase(QSpanData d, const* Operator &o)
3113	: data(d)
3114	, op (o)
3115	, dest(nullptr)
3116	{
3117	}
3118
3119	QSpanData *data;
3120	Operator op;
3121
3122	BlendType *dest;
3123
3124	alignas(`8`) BlendType buffer[BufferSize];
3125	alignas(`8`) BlendType src_buffer[BufferSize];
3126	};
3127
3128	class BlendSrcGeneric : public QBlendBase<uint>
3129	{
3130	public:
3131	BlendSrcGeneric(QSpanData d, const* Operator &o)
3132	: QBlendBase<uint>(d, o)
3133	{
3134	}
3135
3136	const uint fetch(int* x, int y, int len)
3137	{
3138	dest = op.destFetch(buffer, data->rasterBuffer, x, y, len);
3139	return op.srcFetch(src_buffer, &op, data, y, x, len);
3140	}
3141
3142	void process(int, int, int len, int coverage, const uint src, int* offset)
3143	{
3144	op.func(dest + offset, src + offset, len, coverage);
3145	}
3146
3147	void store(int x, int y, int len)
3148	{
3149	if (op.destStore)
3150	op.destStore(data->rasterBuffer, x, y, dest, len);
3151	}
3152	};
3153
3154	#if QT_CONFIG(raster_64bit)
3155	class BlendSrcGenericRGB64 : public QBlendBase<QRgba64>
3156	{
3157	public:
3158	BlendSrcGenericRGB64(QSpanData d, const* Operator &o)
3159	: QBlendBase<QRgba64>(d, o)
3160	{
3161	}
3162
3163	bool isSupported() const
3164	{
3165	return op.func64 && op.destFetch64;
3166	}
3167
3168	const QRgba64 fetch(int* x, int y, int len)
3169	{
3170	dest = op.destFetch64(buffer, data->rasterBuffer, x, y, len);
3171	return op.srcFetch64(src_buffer, &op, data, y, x, len);
3172	}
3173
3174	void process(int, int, int len, int coverage, const QRgba64 src, int* offset)
3175	{
3176	op.func64(dest + offset, src + offset, len, coverage);
3177	}
3178
3179	void store(int x, int y, int len)
3180	{
3181	if (op.destStore64)
3182	op.destStore64(data->rasterBuffer, x, y, dest, len);
3183	}
3184	};
3185	#endif
3186
3187	static void blend_src_generic(int count, const QSpan spans, void* *userData)
3188	{
3189	QSpanData data = reinterpret_cast<QSpanData >(userData);
3190	BlendSrcGeneric blend(data, getOperator(data, spans, count));
3191	handleSpans(count, spans, data, blend);
3192	}
3193
3194	#if QT_CONFIG(raster_64bit)
3195	static void blend_src_generic_rgb64(int count, const QSpan spans, void* *userData)
3196	{
3197	QSpanData data = reinterpret_cast<QSpanData >(userData);
3198	Operator op = getOperator(data, spans, count);
3199	BlendSrcGenericRGB64 blend64(data, op);
3200	if (blend64.isSupported())
3201	handleSpans(count, spans, data, blend64);
3202	else {
3203	qCDebug(lcQtGuiDrawHelper, "blend_src_generic_rgb64: unsupported 64-bit blend attempted, falling back to 32-bit");
3204	BlendSrcGeneric blend32(data, op);
3205	handleSpans(count, spans, data, blend32);
3206	}
3207	}
3208	#endif
3209
3210	static void blend_untransformed_generic(int count, const QSpan spans, void* *userData)
3211	{
3212	QSpanData data = reinterpret_cast<QSpanData >(userData);
3213
3214	uint buffer[BufferSize];
3215	uint src_buffer[BufferSize];
3216	Operator op = getOperator(data, spans, count);
3217
3218	const int image_width = data->texture.width;
3219	const int image_height = data->texture.height;
3220	int xoff = -qRound(-data->dx);
3221	int yoff = -qRound(-data->dy);
3222
3223	for (; count--; spans++) {
3224	if (!spans->len)
3225	continue;
3226	int x = spans->x;
3227	int length = spans->len;
3228	int sx = xoff + x;
3229	int sy = yoff + spans->y;
3230	if (sy >= `0` && sy < image_height && sx < image_width) {
3231	if (sx < `0`) {
3232	x -= sx;
3233	length += sx;
3234	sx = `0`;
3235	}
3236	if (sx + length > image_width)
3237	length = image_width - sx;
3238	if (length > `0`) {
3239	const int coverage = (spans->coverage * data->texture.const_alpha) >> `8`;
3240	while (length) {
3241	int l = qMin(BufferSize, length);
3242	const uint *src = op.srcFetch(src_buffer, &op, data, sy, sx, l);
3243	uint *dest = op.destFetch(buffer, data->rasterBuffer, x, spans->y, l);
3244	op.func(dest, src, l, coverage);
3245	if (op.destStore)
3246	op.destStore(data->rasterBuffer, x, spans->y, dest, l);
3247	x += l;
3248	sx += l;
3249	length -= l;
3250	}
3251	}
3252	}
3253	}
3254	}
3255
3256	#if QT_CONFIG(raster_64bit)
3257	static void blend_untransformed_generic_rgb64(int count, const QSpan spans, void* *userData)
3258	{
3259	QSpanData data = reinterpret_cast<QSpanData >(userData);
3260
3261	Operator op = getOperator(data, spans, count);
3262	if (!op.func64) {
3263	qCDebug(lcQtGuiDrawHelper, "blend_untransformed_generic_rgb64: unsupported 64-bit blend attempted, falling back to 32-bit");
3264	return blend_untransformed_generic(count, spans, userData);
3265	}
3266	alignas(`8`) QRgba64 buffer[BufferSize];
3267	alignas(`8`) QRgba64 src_buffer[BufferSize];
3268
3269	const int image_width = data->texture.width;
3270	const int image_height = data->texture.height;
3271	int xoff = -qRound(-data->dx);
3272	int yoff = -qRound(-data->dy);
3273
3274	for (; count--; spans++) {
3275	if (!spans->len)
3276	continue;
3277	int x = spans->x;
3278	int length = spans->len;
3279	int sx = xoff + x;
3280	int sy = yoff + spans->y;
3281	if (sy >= `0` && sy < image_height && sx < image_width) {
3282	if (sx < `0`) {
3283	x -= sx;
3284	length += sx;
3285	sx = `0`;
3286	}
3287	if (sx + length > image_width)
3288	length = image_width - sx;
3289	if (length > `0`) {
3290	const int coverage = (spans->coverage * data->texture.const_alpha) >> `8`;
3291	while (length) {
3292	int l = qMin(BufferSize, length);
3293	const QRgba64 *src = op.srcFetch64(src_buffer, &op, data, sy, sx, l);
3294	QRgba64 *dest = op.destFetch64(buffer, data->rasterBuffer, x, spans->y, l);
3295	op.func64(dest, src, l, coverage);
3296	if (op.destStore64)
3297	op.destStore64(data->rasterBuffer, x, spans->y, dest, l);
3298	x += l;
3299	sx += l;
3300	length -= l;
3301	}
3302	}
3303	}
3304	}
3305	}
3306	#endif
3307
3308	static void blend_untransformed_argb(int count, const QSpan spans, void* *userData)
3309	{
3310	QSpanData data = reinterpret_cast<QSpanData >(userData);
3311	if (data->texture.format != QImage::Format_ARGB32_Premultiplied
3312	&& data->texture.format != QImage::Format_RGB32) {
3313	blend_untransformed_generic(count, spans, userData);
3314	return;
3315	}
3316
3317	Operator op = getOperator(data, spans, count);
3318
3319	const int image_width = data->texture.width;
3320	const int image_height = data->texture.height;
3321	int xoff = -qRound(-data->dx);
3322	int yoff = -qRound(-data->dy);
3323
3324	for (; count--; spans++) {
3325	if (!spans->len)
3326	continue;
3327	int x = spans->x;
3328	int length = spans->len;
3329	int sx = xoff + x;
3330	int sy = yoff + spans->y;
3331	if (sy >= `0` && sy < image_height && sx < image_width) {
3332	if (sx < `0`) {
3333	x -= sx;
3334	length += sx;
3335	sx = `0`;
3336	}
3337	if (sx + length > image_width)
3338	length = image_width - sx;
3339	if (length > `0`) {
3340	const int coverage = (spans->coverage * data->texture.const_alpha) >> `8`;
3341	const uint src = (const* uint *)data->texture.scanLine(sy) + sx;
3342	uint dest = ((uint )data->rasterBuffer->scanLine(spans->y)) + x;
3343	op.func(dest, src, length, coverage);
3344	}
3345	}
3346	}
3347	}
3348
3349	static inline quint16 interpolate_pixel_rgb16_255(quint16 x, quint8 a,
3350	quint16 y, quint8 b)
3351	{
3352	quint16 t = ((((x & `0x07e0`) * a) + ((y & `0x07e0`) * b)) >> `5`) & `0x07e0`;
3353	t \|= ((((x & `0xf81f`) * a) + ((y & `0xf81f`) * b)) >> `5`) & `0xf81f`;
3354
3355	return t;
3356	}
3357
3358	static inline quint32 interpolate_pixel_rgb16x2_255(quint32 x, quint8 a,
3359	quint32 y, quint8 b)
3360	{
3361	uint t;
3362	t = ((((x & `0xf81f07e0`) >> `5`) * a) + (((y & `0xf81f07e0`) >> `5`) * b)) & `0xf81f07e0`;
3363	t \|= ((((x & `0x07e0f81f`) * a) + ((y & `0x07e0f81f`) * b)) >> `5`) & `0x07e0f81f`;
3364	return t;
3365	}
3366
3367	static inline void blend_sourceOver_rgb16_rgb16(quint16 *Q_DECL_RESTRICT dest,
3368	const quint16 *Q_DECL_RESTRICT src,
3369	int length,
3370	const quint8 alpha,
3371	const quint8 ialpha)
3372	{
3373	const int dstAlign = ((quintptr)dest) & `0x3`;
3374	if (dstAlign) {
3375	dest = interpolate_pixel_rgb16_255(src, alpha, *dest, ialpha);
3376	++dest;
3377	++src;
3378	--length;
3379	}
3380	const int srcAlign = ((quintptr)src) & `0x3`;
3381	int length32 = length >> `1`;
3382	if (length32 && srcAlign == `0`) {
3383	while (length32--) {
3384	const quint32 src32 = reinterpret_cast<const* quint32*>(src);
3385	quint32 dest32 = reinterpret_cast<quint32>(dest);
3386	dest32 = interpolate_pixel_rgb16x2_255(src32, alpha,
3387	*dest32, ialpha);
3388	dest += `2`;
3389	src += `2`;
3390	}
3391	length &= `0x1`;
3392	}
3393	while (length--) {
3394	dest = interpolate_pixel_rgb16_255(src, alpha, *dest, ialpha);
3395	++dest;
3396	++src;
3397	}
3398	}
3399
3400	static void blend_untransformed_rgb565(int count, const QSpan spans, void* *userData)
3401	{
3402	QSpanData data = reinterpret_cast<QSpanData>(userData);
3403	QPainter::CompositionMode mode = data->rasterBuffer->compositionMode;
3404
3405	if (data->texture.format != QImage::Format_RGB16
3406	\|\| (mode != QPainter::CompositionMode_SourceOver
3407	&& mode != QPainter::CompositionMode_Source))
3408	{
3409	blend_untransformed_generic(count, spans, userData);
3410	return;
3411	}
3412
3413	const int image_width = data->texture.width;
3414	const int image_height = data->texture.height;
3415	int xoff = -qRound(-data->dx);
3416	int yoff = -qRound(-data->dy);
3417
3418	const QSpan *end = spans + count;
3419	while (spans < end) {
3420	if (!spans->len) {
3421	++spans;
3422	continue;
3423	}
3424	const quint8 coverage = (data->texture.const_alpha * spans->coverage) >> `8`;
3425	if (coverage == `0`) {
3426	++spans;
3427	continue;
3428	}
3429
3430	int x = spans->x;
3431	int length = spans->len;
3432	int sx = xoff + x;
3433	int sy = yoff + spans->y;
3434	if (sy >= `0` && sy < image_height && sx < image_width) {
3435	if (sx < `0`) {
3436	x -= sx;
3437	length += sx;
3438	sx = `0`;
3439	}
3440	if (sx + length > image_width)
3441	length = image_width - sx;
3442	if (length > `0`) {
3443	quint16 dest = (quint16 )data->rasterBuffer->scanLine(spans->y) + x;
3444	const quint16 src = (const* quint16 *)data->texture.scanLine(sy) + sx;
3445	if (coverage == `255`) {
3446	memcpy(dest, src, length * sizeof(quint16));
3447	} else {
3448	const quint8 alpha = (coverage + `1`) >> `3`;
3449	const quint8 ialpha = `0x20` - alpha;
3450	if (alpha > `0`)
3451	blend_sourceOver_rgb16_rgb16(dest, src, length, alpha, ialpha);
3452	}
3453	}
3454	}
3455	++spans;
3456	}
3457	}
3458
3459	static void blend_tiled_generic(int count, const QSpan spans, void* *userData)
3460	{
3461	QSpanData data = reinterpret_cast<QSpanData >(userData);
3462
3463	uint buffer[BufferSize];
3464	uint src_buffer[BufferSize];
3465	Operator op = getOperator(data, spans, count);
3466
3467	const int image_width = data->texture.width;
3468	const int image_height = data->texture.height;
3469	int xoff = -qRound(-data->dx) % image_width;
3470	int yoff = -qRound(-data->dy) % image_height;
3471
3472	if (xoff < `0`)
3473	xoff += image_width;
3474	if (yoff < `0`)
3475	yoff += image_height;
3476
3477	while (count--) {
3478	int x = spans->x;
3479	int length = spans->len;
3480	int sx = (xoff + spans->x) % image_width;
3481	int sy = (spans->y + yoff) % image_height;
3482	if (sx < `0`)
3483	sx += image_width;
3484	if (sy < `0`)
3485	sy += image_height;
3486
3487	const int coverage = (spans->coverage * data->texture.const_alpha) >> `8`;
3488	while (length) {
3489	int l = qMin(image_width - sx, length);
3490	if (BufferSize < l)
3491	l = BufferSize;
3492	const uint *src = op.srcFetch(src_buffer, &op, data, sy, sx, l);
3493	uint *dest = op.destFetch(buffer, data->rasterBuffer, x, spans->y, l);
3494	op.func(dest, src, l, coverage);
3495	if (op.destStore)
3496	op.destStore(data->rasterBuffer, x, spans->y, dest, l);
3497	x += l;
3498	sx += l;
3499	length -= l;
3500	if (sx >= image_width)
3501	sx = `0`;
3502	}
3503	++spans;
3504	}
3505	}
3506
3507	#if QT_CONFIG(raster_64bit)
3508	static void blend_tiled_generic_rgb64(int count, const QSpan spans, void* *userData)
3509	{
3510	QSpanData data = reinterpret_cast<QSpanData >(userData);
3511
3512	Operator op = getOperator(data, spans, count);
3513	if (!op.func64) {
3514	qCDebug(lcQtGuiDrawHelper, "blend_tiled_generic_rgb64: unsupported 64-bit blend attempted, falling back to 32-bit");
3515	return blend_tiled_generic(count, spans, userData);
3516	}
3517	alignas(`8`) QRgba64 buffer[BufferSize];
3518	alignas(`8`) QRgba64 src_buffer[BufferSize];
3519
3520	const int image_width = data->texture.width;
3521	const int image_height = data->texture.height;
3522	int xoff = -qRound(-data->dx) % image_width;
3523	int yoff = -qRound(-data->dy) % image_height;
3524
3525	if (xoff < `0`)
3526	xoff += image_width;
3527	if (yoff < `0`)
3528	yoff += image_height;
3529
3530	bool isBpp32 = qPixelLayouts[data->rasterBuffer->format].bpp == QPixelLayout::BPP32;
3531	bool isBpp64 = qPixelLayouts[data->rasterBuffer->format].bpp == QPixelLayout::BPP64;
3532	if (op.destFetch64 == destFetch64Undefined && image_width <= BufferSize && (isBpp32 \|\| isBpp64)) {
3533	// If destination isn't blended into the result, we can do the tiling directly on destination pixels.
3534	while (count--) {
3535	int x = spans->x;
3536	int y = spans->y;
3537	int length = spans->len;
3538	int sx = (xoff + spans->x) % image_width;
3539	int sy = (spans->y + yoff) % image_height;
3540	if (sx < `0`)
3541	sx += image_width;
3542	if (sy < `0`)
3543	sy += image_height;
3544
3545	int sl = qMin(image_width, length);
3546	if (sx > `0` && sl > `0`) {
3547	int l = qMin(image_width - sx, sl);
3548	const QRgba64 *src = op.srcFetch64(src_buffer, &op, data, sy, sx, l);
3549	op.destStore64(data->rasterBuffer, x, y, src, l);
3550	x += l;
3551	sx += l;
3552	sl -= l;
3553	if (sx >= image_width)
3554	sx = `0`;
3555	}
3556	if (sl > `0`) {
3557	Q_ASSERT(sx == `0`);
3558	const QRgba64 *src = op.srcFetch64(src_buffer, &op, data, sy, sx, sl);
3559	op.destStore64(data->rasterBuffer, x, y, src, sl);
3560	x += sl;
3561	sx += sl;
3562	sl -= sl;
3563	if (sx >= image_width)
3564	sx = `0`;
3565	}
3566	if (isBpp32) {
3567	uint dest = reinterpret_cast<uint >(data->rasterBuffer->scanLine(y)) + x - image_width;
3568	for (int i = image_width; i < length; ++i)
3569	dest[i] = dest[i - image_width];
3570	} else {
3571	quint64 dest = reinterpret_cast<quint64 >(data->rasterBuffer->scanLine(y)) + x - image_width;
3572	for (int i = image_width; i < length; ++i)
3573	dest[i] = dest[i - image_width];
3574	}
3575	++spans;
3576	}
3577	return;
3578	}
3579
3580	while (count--) {
3581	int x = spans->x;
3582	int length = spans->len;
3583	int sx = (xoff + spans->x) % image_width;
3584	int sy = (spans->y + yoff) % image_height;
3585	if (sx < `0`)
3586	sx += image_width;
3587	if (sy < `0`)
3588	sy += image_height;
3589
3590	const int coverage = (spans->coverage * data->texture.const_alpha) >> `8`;
3591	while (length) {
3592	int l = qMin(image_width - sx, length);
3593	if (BufferSize < l)
3594	l = BufferSize;
3595	const QRgba64 *src = op.srcFetch64(src_buffer, &op, data, sy, sx, l);
3596	QRgba64 *dest = op.destFetch64(buffer, data->rasterBuffer, x, spans->y, l);
3597	op.func64(dest, src, l, coverage);
3598	if (op.destStore64)
3599	op.destStore64(data->rasterBuffer, x, spans->y, dest, l);
3600	x += l;
3601	sx += l;
3602	length -= l;
3603	if (sx >= image_width)
3604	sx = `0`;
3605	}
3606	++spans;
3607	}
3608	}
3609	#endif
3610
3611	static void blend_tiled_argb(int count, const QSpan spans, void* *userData)
3612	{
3613	QSpanData data = reinterpret_cast<QSpanData >(userData);
3614	if (data->texture.format != QImage::Format_ARGB32_Premultiplied
3615	&& data->texture.format != QImage::Format_RGB32) {
3616	blend_tiled_generic(count, spans, userData);
3617	return;
3618	}
3619
3620	Operator op = getOperator(data, spans, count);
3621
3622	int image_width = data->texture.width;
3623	int image_height = data->texture.height;
3624	int xoff = -qRound(-data->dx) % image_width;
3625	int yoff = -qRound(-data->dy) % image_height;
3626
3627	if (xoff < `0`)
3628	xoff += image_width;
3629	if (yoff < `0`)
3630	yoff += image_height;
3631
3632	while (count--) {
3633	int x = spans->x;
3634	int length = spans->len;
3635	int sx = (xoff + spans->x) % image_width;
3636	int sy = (spans->y + yoff) % image_height;
3637	if (sx < `0`)
3638	sx += image_width;
3639	if (sy < `0`)
3640	sy += image_height;
3641
3642	const int coverage = (spans->coverage * data->texture.const_alpha) >> `8`;
3643	while (length) {
3644	int l = qMin(image_width - sx, length);
3645	if (BufferSize < l)
3646	l = BufferSize;
3647	const uint src = (const* uint *)data->texture.scanLine(sy) + sx;
3648	uint dest = ((uint )data->rasterBuffer->scanLine(spans->y)) + x;
3649	op.func(dest, src, l, coverage);
3650	x += l;
3651	sx += l;
3652	length -= l;
3653	if (sx >= image_width)
3654	sx = `0`;
3655	}
3656	++spans;
3657	}
3658	}
3659
3660	static void blend_tiled_rgb565(int count, const QSpan spans, void* *userData)
3661	{
3662	QSpanData data = reinterpret_cast<QSpanData>(userData);
3663	QPainter::CompositionMode mode = data->rasterBuffer->compositionMode;
3664
3665	if (data->texture.format != QImage::Format_RGB16
3666	\|\| (mode != QPainter::CompositionMode_SourceOver
3667	&& mode != QPainter::CompositionMode_Source))
3668	{
3669	blend_tiled_generic(count, spans, userData);
3670	return;
3671	}
3672
3673	const int image_width = data->texture.width;
3674	const int image_height = data->texture.height;
3675	int xoff = -qRound(-data->dx) % image_width;
3676	int yoff = -qRound(-data->dy) % image_height;
3677
3678	if (xoff < `0`)
3679	xoff += image_width;
3680	if (yoff < `0`)
3681	yoff += image_height;
3682
3683	while (count--) {
3684	const quint8 coverage = (data->texture.const_alpha * spans->coverage) >> `8`;
3685	if (coverage == `0`) {
3686	++spans;
3687	continue;
3688	}
3689
3690	int x = spans->x;
3691	int length = spans->len;
3692	int sx = (xoff + spans->x) % image_width;
3693	int sy = (spans->y + yoff) % image_height;
3694	if (sx < `0`)
3695	sx += image_width;
3696	if (sy < `0`)
3697	sy += image_height;
3698
3699	if (coverage == `255`) {
3700	// Copy the first texture block
3701	length = qMin(image_width,length);
3702	int tx = x;
3703	while (length) {
3704	int l = qMin(image_width - sx, length);
3705	if (BufferSize < l)
3706	l = BufferSize;
3707	quint16 dest = ((quint16 )data->rasterBuffer->scanLine(spans->y)) + tx;
3708	const quint16 src = (const* quint16 *)data->texture.scanLine(sy) + sx;
3709	memcpy(dest, src, l * sizeof(quint16));
3710	length -= l;
3711	tx += l;
3712	sx += l;
3713	if (sx >= image_width)
3714	sx = `0`;
3715	}
3716
3717	// Now use the rasterBuffer as the source of the texture,
3718	// We can now progressively copy larger blocks
3719	// - Less cpu time in code figuring out what to copy
3720	// We are dealing with one block of data
3721	// - More likely to fit in the cache
3722	// - can use memcpy
3723	int copy_image_width = qMin(image_width, int(spans->len));
3724	length = spans->len - copy_image_width;
3725	quint16 src = ((quint16 )data->rasterBuffer->scanLine(spans->y)) + x;
3726	quint16 *dest = src + copy_image_width;
3727	while (copy_image_width < length) {
3728	memcpy(dest, src, copy_image_width * sizeof(quint16));
3729	dest += copy_image_width;
3730	length -= copy_image_width;
3731	copy_image_width *= `2`;
3732	}
3733	if (length > `0`)
3734	memcpy(dest, src, length * sizeof(quint16));
3735	} else {
3736	const quint8 alpha = (coverage + `1`) >> `3`;
3737	const quint8 ialpha = `0x20` - alpha;
3738	if (alpha > `0`) {
3739	while (length) {
3740	int l = qMin(image_width - sx, length);
3741	if (BufferSize < l)
3742	l = BufferSize;
3743	quint16 dest = ((quint16 )data->rasterBuffer->scanLine(spans->y)) + x;
3744	const quint16 src = (const* quint16 *)data->texture.scanLine(sy) + sx;
3745	blend_sourceOver_rgb16_rgb16(dest, src, l, alpha, ialpha);
3746	x += l;
3747	sx += l;
3748	length -= l;
3749	if (sx >= image_width)
3750	sx = `0`;
3751	}
3752	}
3753	}
3754	++spans;
3755	}
3756	}
3757
3758	/ Image formats here are target formats /
3759	static const ProcessSpans processTextureSpansARGB32PM[NBlendTypes] = {
3760	blend_untransformed_argb, // Untransformed
3761	blend_tiled_argb, // Tiled
3762	blend_src_generic, // Transformed
3763	blend_src_generic, // TransformedTiled
3764	blend_src_generic, // TransformedBilinear
3765	blend_src_generic // TransformedBilinearTiled
3766	};
3767
3768	static const ProcessSpans processTextureSpansRGB16[NBlendTypes] = {
3769	blend_untransformed_rgb565, // Untransformed
3770	blend_tiled_rgb565, // Tiled
3771	blend_src_generic, // Transformed
3772	blend_src_generic, // TransformedTiled
3773	blend_src_generic, // TransformedBilinear
3774	blend_src_generic // TransformedBilinearTiled
3775	};
3776
3777	static const ProcessSpans processTextureSpansGeneric[NBlendTypes] = {
3778	blend_untransformed_generic, // Untransformed
3779	blend_tiled_generic, // Tiled
3780	blend_src_generic, // Transformed
3781	blend_src_generic, // TransformedTiled
3782	blend_src_generic, // TransformedBilinear
3783	blend_src_generic // TransformedBilinearTiled
3784	};
3785
3786	#if QT_CONFIG(raster_64bit)
3787	static const ProcessSpans processTextureSpansGeneric64[NBlendTypes] = {
3788	blend_untransformed_generic_rgb64, // Untransformed
3789	blend_tiled_generic_rgb64, // Tiled
3790	blend_src_generic_rgb64, // Transformed
3791	blend_src_generic_rgb64, // TransformedTiled
3792	blend_src_generic_rgb64, // TransformedBilinear
3793	blend_src_generic_rgb64 // TransformedBilinearTiled
3794	};
3795	#endif
3796
3797	void qBlendTexture(int count, const QSpan spans, void* *userData)
3798	{
3799	QSpanData data = reinterpret_cast<QSpanData >(userData);
3800	TextureBlendType blendType = getBlendType(data);
3801	ProcessSpans proc;
3802	switch (data->rasterBuffer->format) {
3803	case QImage::Format_ARGB32_Premultiplied:
3804	proc = processTextureSpansARGB32PM[blendType];
3805	break;
3806	case QImage::Format_RGB16:
3807	proc = processTextureSpansRGB16[blendType];
3808	break;
3809	#if QT_CONFIG(raster_64bit)
3810	#if defined(__SSE2__) \|\| defined(__ARM_NEON__) \|\| (Q_PROCESSOR_WORDSIZE == 8)
3811	case QImage::Format_ARGB32:
3812	case QImage::Format_RGBA8888:
3813	#endif
3814	case QImage::Format_BGR30:
3815	case QImage::Format_A2BGR30_Premultiplied:
3816	case QImage::Format_RGB30:
3817	case QImage::Format_A2RGB30_Premultiplied:
3818	case QImage::Format_RGBX64:
3819	case QImage::Format_RGBA64:
3820	case QImage::Format_RGBA64_Premultiplied:
3821	case QImage::Format_Grayscale16:
3822	proc = processTextureSpansGeneric64[blendType];
3823	break;
3824	#endif // QT_CONFIG(raster_64bit)
3825	case QImage::Format_Invalid:
3826	Q_UNREACHABLE();
3827	return;
3828	default:
3829	proc = processTextureSpansGeneric[blendType];
3830	break;
3831	}
3832	proc(count, spans, userData);
3833	}
3834
3835	static void blend_vertical_gradient_argb(int count, const QSpan spans, void* *userData)
3836	{
3837	QSpanData data = reinterpret_cast<QSpanData >(userData);
3838
3839	LinearGradientValues linear;
3840	getLinearGradientValues(&linear, data);
3841
3842	CompositionFunctionSolid funcSolid =
3843	functionForModeSolid[data->rasterBuffer->compositionMode];
3844
3845	/*
3846	The logic for vertical gradient calculations is a mathematically
3847	reduced copy of that in fetchLinearGradient() - which is basically:
3848
3849	qreal ry = data->m22 (y + 0.5) + data->dy;*
3850	qreal t = linear.dyry + linear.off;*
3851	t = (GRADIENT_STOPTABLE_SIZE - 1);*
3852	quint32 color =
3853	qt_gradient_pixel_fixed(&data->gradient,
3854	int(t FIXPT_SIZE));*
3855
3856	This has then been converted to fixed point to improve performance.
3857	*/
3858	const int gss = GRADIENT_STOPTABLE_SIZE - `1`;
3859	int yinc = int((linear.dy * data->m22 * gss) * FIXPT_SIZE);
3860	int off = int((((linear.dy * (data->m22 * qreal(`0.5`) + data->dy) + linear.off) * gss) * FIXPT_SIZE));
3861
3862	while (count--) {
3863	int y = spans->y;
3864	int x = spans->x;
3865
3866	quint32 dst = (quint32 )(data->rasterBuffer->scanLine(y)) + x;
3867	quint32 color =
3868	qt_gradient_pixel_fixed(&data->gradient, yinc * y + off);
3869
3870	funcSolid(dst, spans->len, color, spans->coverage);
3871	++spans;
3872	}
3873	}
3874
3875	template<ProcessSpans blend_color>
3876	static void blend_vertical_gradient(int count, const QSpan spans, void* *userData)
3877	{
3878	QSpanData data = reinterpret_cast<QSpanData >(userData);
3879
3880	LinearGradientValues linear;
3881	getLinearGradientValues(&linear, data);
3882
3883	// Based on the same logic as blend_vertical_gradient_argb.
3884
3885	const int gss = GRADIENT_STOPTABLE_SIZE - `1`;
3886	int yinc = int((linear.dy * data->m22 * gss) * FIXPT_SIZE);
3887	int off = int((((linear.dy * (data->m22 * qreal(`0.5`) + data->dy) + linear.off) * gss) * FIXPT_SIZE));
3888
3889	while (count--) {
3890	int y = spans->y;
3891
3892	#if QT_CONFIG(raster_64bit)
3893	data->solidColor = qt_gradient_pixel64_fixed(&data->gradient, yinc * y + off);
3894	#else
3895	data->solidColor = QRgba64::fromArgb32(qt_gradient_pixel_fixed(&data->gradient, yinc * y + off));
3896	#endif
3897	blend_color(`1`, spans, userData);
3898	++spans;
3899	}
3900	}
3901
3902	void qBlendGradient(int count, const QSpan spans, void* *userData)
3903	{
3904	QSpanData data = reinterpret_cast<QSpanData >(userData);
3905	bool isVerticalGradient =
3906	data->txop <= QTransform::TxScale &&
3907	data->type == QSpanData::LinearGradient &&
3908	data->gradient.linear.end.x == data->gradient.linear.origin.x;
3909	switch (data->rasterBuffer->format) {
3910	case QImage::Format_RGB16:
3911	if (isVerticalGradient)
3912	return blend_vertical_gradient<blend_color_rgb16>(count, spans, userData);
3913	return blend_src_generic(count, spans, userData);
3914	case QImage::Format_RGB32:
3915	case QImage::Format_ARGB32_Premultiplied:
3916	if (isVerticalGradient)
3917	return blend_vertical_gradient_argb(count, spans, userData);
3918	return blend_src_generic(count, spans, userData);
3919	#if QT_CONFIG(raster_64bit)
3920	#if defined(__SSE2__) \|\| defined(__ARM_NEON__) \|\| (Q_PROCESSOR_WORDSIZE == 8)
3921	case QImage::Format_ARGB32:
3922	case QImage::Format_RGBA8888:
3923	#endif
3924	case QImage::Format_BGR30:
3925	case QImage::Format_A2BGR30_Premultiplied:
3926	case QImage::Format_RGB30:
3927	case QImage::Format_A2RGB30_Premultiplied:
3928	case QImage::Format_RGBX64:
3929	case QImage::Format_RGBA64:
3930	case QImage::Format_RGBA64_Premultiplied:
3931	if (isVerticalGradient)
3932	return blend_vertical_gradient<blend_color_generic_rgb64>(count, spans, userData);
3933	return blend_src_generic_rgb64(count, spans, userData);
3934	#endif // QT_CONFIG(raster_64bit)
3935	case QImage::Format_Invalid:
3936	break;
3937	default:
3938	if (isVerticalGradient)
3939	return blend_vertical_gradient<blend_color_generic>(count, spans, userData);
3940	return blend_src_generic(count, spans, userData);
3941	}
3942	Q_UNREACHABLE();
3943	}
3944
3945	template <class DST> static
3946	inline void qt_bitmapblit_template(QRasterBuffer *rasterBuffer,
3947	int x, int y, DST color,
3948	const uchar *map,
3949	int mapWidth, int mapHeight, int mapStride)
3950	{
3951	DST dest = reinterpret_cast<DST >(rasterBuffer->scanLine(y)) + x;
3952	const int destStride = rasterBuffer->stride<DST>();
3953
3954	if (mapWidth > `8`) {
3955	while (mapHeight--) {
3956	int x0 = `0`;
3957	int n = `0`;
3958	for (int x = `0`; x < mapWidth; x += `8`) {
3959	uchar s = map[x >> `3`];
3960	for (int i = `0`; i < `8`; ++i) {
3961	if (s & `0x80`) {
3962	++n;
3963	} else {
3964	if (n) {
3965	qt_memfill(dest + x0, color, n);
3966	x0 += n + `1`;
3967	n = `0`;
3968	} else {
3969	++x0;
3970	}
3971	if (!s) {
3972	x0 += `8` - `1` - i;
3973	break;
3974	}
3975	}
3976	s <<= `1`;
3977	}
3978	}
3979	if (n)
3980	qt_memfill(dest + x0, color, n);
3981	dest += destStride;
3982	map += mapStride;
3983	}
3984	} else {
3985	while (mapHeight--) {
3986	int x0 = `0`;
3987	int n = `0`;
3988	for (uchar s = *map; s; s <<= `1`) {
3989	if (s & `0x80`) {
3990	++n;
3991	} else if (n) {
3992	qt_memfill(dest + x0, color, n);
3993	x0 += n + `1`;
3994	n = `0`;
3995	} else {
3996	++x0;
3997	}
3998	}
3999	if (n)
4000	qt_memfill(dest + x0, color, n);
4001	dest += destStride;
4002	map += mapStride;
4003	}
4004	}
4005	}
4006
4007	inline static void qt_bitmapblit_argb32(QRasterBuffer *rasterBuffer,
4008	int x, int y, const QRgba64 &color,
4009	const uchar *map,
4010	int mapWidth, int mapHeight, int mapStride)
4011	{
4012	qt_bitmapblit_template<quint32>(rasterBuffer, x, y, color.toArgb32(),
4013	map, mapWidth, mapHeight, mapStride);
4014	}
4015
4016	inline static void qt_bitmapblit_rgba8888(QRasterBuffer *rasterBuffer,
4017	int x, int y, const QRgba64 &color,
4018	const uchar *map,
4019	int mapWidth, int mapHeight, int mapStride)
4020	{
4021	qt_bitmapblit_template<quint32>(rasterBuffer, x, y, ARGB2RGBA(color.toArgb32()),
4022	map, mapWidth, mapHeight, mapStride);
4023	}
4024
4025	template<QtPixelOrder PixelOrder>
4026	inline static void qt_bitmapblit_rgb30(QRasterBuffer *rasterBuffer,
4027	int x, int y, const QRgba64 &color,
4028	const uchar *map,
4029	int mapWidth, int mapHeight, int mapStride)
4030	{
4031	qt_bitmapblit_template<quint32>(rasterBuffer, x, y, qConvertRgb64ToRgb30<PixelOrder>(color),
4032	map, mapWidth, mapHeight, mapStride);
4033	}
4034
4035	inline static void qt_bitmapblit_quint16(QRasterBuffer *rasterBuffer,
4036	int x, int y, const QRgba64 &color,
4037	const uchar *map,
4038	int mapWidth, int mapHeight, int mapStride)
4039	{
4040	qt_bitmapblit_template<quint16>(rasterBuffer, x, y, color.toRgb16(),
4041	map, mapWidth, mapHeight, mapStride);
4042	}
4043
4044	static inline void grayBlendPixel(quint32 dst, int* coverage, QRgba64 srcLinear, const QColorTrcLut *colorProfile)
4045	{
4046	// Do a gammacorrected gray alphablend...
4047	const QRgba64 dstLinear = colorProfile ? colorProfile->toLinear64(dst) : QRgba64::fromArgb32(dst);
4048
4049	QRgba64 blend = interpolate255(srcLinear, coverage, dstLinear, `255` - coverage);
4050
4051	*dst = colorProfile ? colorProfile->fromLinear64(blend) : toArgb32(blend);
4052	}
4053
4054	static inline void alphamapblend_argb32(quint32 dst, int* coverage, QRgba64 srcLinear, quint32 src, const QColorTrcLut *colorProfile)
4055	{
4056	if (coverage == `0`) {
4057	// nothing
4058	} else if (coverage == `255` \|\| !colorProfile) {
4059	blend_pixel(*dst, src, coverage);
4060	} else if (*dst < `0xff000000`) {
4061	// Give up and do a naive gray alphablend. Needed to deal with ARGB32 and invalid ARGB32_premultiplied, see QTBUG-60571
4062	blend_pixel(*dst, src, coverage);
4063	} else if (src >= `0xff000000`) {
4064	grayBlendPixel(dst, coverage, srcLinear, colorProfile);
4065	} else {
4066	// First do naive blend with text-color
4067	QRgb s = *dst;
4068	blend_pixel(s, src);
4069	// Then gamma-corrected blend with glyph shape
4070	QRgba64 s64 = colorProfile ? colorProfile->toLinear64(s) : QRgba64::fromArgb32(s);
4071	grayBlendPixel(dst, coverage, s64, colorProfile);
4072	}
4073	}
4074
4075	#if QT_CONFIG(raster_64bit)
4076
4077	static inline void grayBlendPixel(QRgba64 &dst, int coverage, QRgba64 srcLinear, const QColorTrcLut *colorProfile)
4078	{
4079	// Do a gammacorrected gray alphablend...
4080	QRgba64 dstColor = dst;
4081	if (colorProfile) {
4082	if (dstColor.isOpaque())
4083	dstColor = colorProfile->toLinear(dstColor);
4084	else if (!dstColor.isTransparent())
4085	dstColor = colorProfile->toLinear(dstColor.unpremultiplied()).premultiplied();
4086	}
4087
4088	blend_pixel(dstColor, srcLinear, coverage);
4089
4090	if (colorProfile) {
4091	if (dstColor.isOpaque())
4092	dstColor = colorProfile->fromLinear(dstColor);
4093	else if (!dstColor.isTransparent())
4094	dstColor = colorProfile->fromLinear(dstColor.unpremultiplied()).premultiplied();
4095	}
4096	dst = dstColor;
4097	}
4098
4099	static inline void alphamapblend_generic(int coverage, QRgba64 dest, int* x, const QRgba64 &srcLinear, const QRgba64 &src, const QColorTrcLut *colorProfile)
4100	{
4101	if (coverage == `0`) {
4102	// nothing
4103	} else if (coverage == `255`) {
4104	blend_pixel(dest[x], src);
4105	} else if (src.isOpaque()) {
4106	grayBlendPixel(dest[x], coverage, srcLinear, colorProfile);
4107	} else {
4108	// First do naive blend with text-color
4109	QRgba64 s = dest[x];
4110	blend_pixel(s, src);
4111	// Then gamma-corrected blend with glyph shape
4112	if (colorProfile)
4113	s = colorProfile->toLinear(s);
4114	grayBlendPixel(dest[x], coverage, s, colorProfile);
4115	}
4116	}
4117
4118	static void qt_alphamapblit_generic(QRasterBuffer *rasterBuffer,
4119	int x, int y, const QRgba64 &color,
4120	const uchar *map,
4121	int mapWidth, int mapHeight, int mapStride,
4122	const QClipData clip, bool* useGammaCorrection)
4123	{
4124	if (color.isTransparent())
4125	return;
4126
4127	const QColorTrcLut colorProfile = nullptr*;
4128
4129	if (useGammaCorrection)
4130	colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA8Text();
4131
4132	QRgba64 srcColor = color;
4133	if (colorProfile && color.isOpaque())
4134	srcColor = colorProfile->toLinear(srcColor);
4135
4136	alignas(`8`) QRgba64 buffer[BufferSize];
4137	const DestFetchProc64 destFetch64 = destFetchProc64[rasterBuffer->format];
4138	const DestStoreProc64 destStore64 = destStoreProc64[rasterBuffer->format];
4139
4140	if (!clip) {
4141	for (int ly = `0`; ly < mapHeight; ++ly) {
4142	int i = x;
4143	int length = mapWidth;
4144	while (length > `0`) {
4145	int l = qMin(BufferSize, length);
4146	QRgba64 *dest = destFetch64(buffer, rasterBuffer, i, y + ly, l);
4147	for (int j=`0`; j < l; ++j) {
4148	const int coverage = map[j + (i - x)];
4149	alphamapblend_generic(coverage, dest, j, srcColor, color, colorProfile);
4150	}
4151	if (destStore64)
4152	destStore64(rasterBuffer, i, y + ly, dest, l);
4153	length -= l;
4154	i += l;
4155	}
4156	map += mapStride;
4157	}
4158	} else {
4159	int bottom = qMin(y + mapHeight, rasterBuffer->height());
4160
4161	int top = qMax(y, `0`);
4162	map += (top - y) * mapStride;
4163
4164	const_cast<QClipData *>(clip)->initialize();
4165	for (int yp = top; yp<bottom; ++yp) {
4166	const QClipData::ClipLine &line = clip->m_clipLines[yp];
4167
4168	for (int i=`0`; i<line.count; ++i) {
4169	const QSpan &clip = line.spans[i];
4170
4171	int start = qMax<int>(x, clip.x);
4172	int end = qMin<int>(x + mapWidth, clip.x + clip.len);
4173	if (end <= start)
4174	continue;
4175	Q_ASSERT(end - start <= BufferSize);
4176	QRgba64 *dest = destFetch64(buffer, rasterBuffer, start, clip.y, end - start);
4177
4178	for (int xp=start; xp<end; ++xp) {
4179	const int coverage = map[xp - x];
4180	alphamapblend_generic(coverage, dest, xp - start, srcColor, color, colorProfile);
4181	}
4182	if (destStore64)
4183	destStore64(rasterBuffer, start, clip.y, dest, end - start);
4184	} // for (i -> line.count)
4185	map += mapStride;
4186	} // for (yp -> bottom)
4187	}
4188	}
4189	#else
4190	static void qt_alphamapblit_generic(QRasterBuffer *rasterBuffer,
4191	int x, int y, const QRgba64 &color,
4192	const uchar *map,
4193	int mapWidth, int mapHeight, int mapStride,
4194	const QClipData clip, bool* useGammaCorrection)
4195	{
4196	if (color.isTransparent())
4197	return;
4198
4199	const quint32 c = color.toArgb32();
4200
4201	const QColorTrcLut colorProfile = nullptr*;
4202
4203	if (useGammaCorrection)
4204	colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA8Text();
4205
4206	QRgba64 srcColor = color;
4207	if (colorProfile && color.isOpaque())
4208	srcColor = colorProfile->toLinear(srcColor);
4209
4210	quint32 buffer[BufferSize];
4211	const DestFetchProc destFetch = destFetchProc[rasterBuffer->format];
4212	const DestStoreProc destStore = destStoreProc[rasterBuffer->format];
4213
4214	if (!clip) {
4215	for (int ly = `0`; ly < mapHeight; ++ly) {
4216	int i = x;
4217	int length = mapWidth;
4218	while (length > `0`) {
4219	int l = qMin(BufferSize, length);
4220	quint32 *dest = destFetch(buffer, rasterBuffer, i, y + ly, l);
4221	for (int j=`0`; j < l; ++j) {
4222	const int coverage = map[j + (i - x)];
4223	alphamapblend_argb32(dest + j, coverage, srcColor, c, colorProfile);
4224	}
4225	if (destStore)
4226	destStore(rasterBuffer, i, y + ly, dest, l);
4227	length -= l;
4228	i += l;
4229	}
4230	map += mapStride;
4231	}
4232	} else {
4233	int bottom = qMin(y + mapHeight, rasterBuffer->height());
4234
4235	int top = qMax(y, `0`);
4236	map += (top - y) * mapStride;
4237
4238	const_cast<QClipData *>(clip)->initialize();
4239	for (int yp = top; yp<bottom; ++yp) {
4240	const QClipData::ClipLine &line = clip->m_clipLines[yp];
4241
4242	for (int i=`0`; i<line.count; ++i) {
4243	const QSpan &clip = line.spans[i];
4244
4245	int start = qMax<int>(x, clip.x);
4246	int end = qMin<int>(x + mapWidth, clip.x + clip.len);
4247	if (end <= start)
4248	continue;
4249	Q_ASSERT(end - start <= BufferSize);
4250	quint32 *dest = destFetch(buffer, rasterBuffer, start, clip.y, end - start);
4251
4252	for (int xp=start; xp<end; ++xp) {
4253	const int coverage = map[xp - x];
4254	alphamapblend_argb32(dest + xp - x, coverage, srcColor, color, colorProfile);
4255	}
4256	if (destStore)
4257	destStore(rasterBuffer, start, clip.y, dest, end - start);
4258	} // for (i -> line.count)
4259	map += mapStride;
4260	} // for (yp -> bottom)
4261	}
4262	}
4263	#endif
4264
4265	static inline void alphamapblend_quint16(int coverage, quint16 dest, int* x, const quint16 srcColor)
4266	{
4267	if (coverage == `0`) {
4268	// nothing
4269	} else if (coverage == `255`) {
4270	dest[x] = srcColor;
4271	} else {
4272	dest[x] = BYTE_MUL_RGB16(srcColor, coverage)
4273	+ BYTE_MUL_RGB16(dest[x], `255` - coverage);
4274	}
4275	}
4276
4277	void qt_alphamapblit_quint16(QRasterBuffer *rasterBuffer,
4278	int x, int y, const QRgba64 &color,
4279	const uchar *map,
4280	int mapWidth, int mapHeight, int mapStride,
4281	const QClipData clip, bool* useGammaCorrection)
4282	{
4283	if (useGammaCorrection \|\| !color.isOpaque()) {
4284	qt_alphamapblit_generic(rasterBuffer, x, y, color, map, mapWidth, mapHeight, mapStride, clip, useGammaCorrection);
4285	return;
4286	}
4287
4288	const quint16 c = color.toRgb16();
4289
4290	if (!clip) {
4291	quint16 dest = reinterpret_cast<quint16>(rasterBuffer->scanLine(y)) + x;
4292	const int destStride = rasterBuffer->stride<quint16>();
4293	while (mapHeight--) {
4294	for (int i = `0`; i < mapWidth; ++i)
4295	alphamapblend_quint16(map[i], dest, i, c);
4296	dest += destStride;
4297	map += mapStride;
4298	}
4299	} else {
4300	int top = qMax(y, `0`);
4301	int bottom = qMin(y + mapHeight, rasterBuffer->height());
4302	map += (top - y) * mapStride;
4303
4304	const_cast<QClipData *>(clip)->initialize();
4305	for (int yp = top; yp<bottom; ++yp) {
4306	const QClipData::ClipLine &line = clip->m_clipLines[yp];
4307
4308	quint16 dest = reinterpret_cast<quint16>(rasterBuffer->scanLine(yp));
4309
4310	for (int i=`0`; i<line.count; ++i) {
4311	const QSpan &clip = line.spans[i];
4312
4313	int start = qMax<int>(x, clip.x);
4314	int end = qMin<int>(x + mapWidth, clip.x + clip.len);
4315
4316	for (int xp=start; xp<end; ++xp)
4317	alphamapblend_quint16(map[xp - x], dest, xp, c);
4318	} // for (i -> line.count)
4319	map += mapStride;
4320	} // for (yp -> bottom)
4321	}
4322	}
4323
4324	static void qt_alphamapblit_argb32(QRasterBuffer *rasterBuffer,
4325	int x, int y, const QRgba64 &color,
4326	const uchar *map,
4327	int mapWidth, int mapHeight, int mapStride,
4328	const QClipData clip, bool* useGammaCorrection)
4329	{
4330	const quint32 c = color.toArgb32();
4331	const int destStride = rasterBuffer->stride<quint32>();
4332
4333	if (color.isTransparent())
4334	return;
4335
4336	const QColorTrcLut colorProfile = nullptr*;
4337
4338	if (useGammaCorrection)
4339	colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA8Text();
4340
4341	QRgba64 srcColor = color;
4342	if (colorProfile && color.isOpaque())
4343	srcColor = colorProfile->toLinear(srcColor);
4344
4345	if (!clip) {
4346	quint32 dest = reinterpret_cast<quint32>(rasterBuffer->scanLine(y)) + x;
4347	while (mapHeight--) {
4348	for (int i = `0`; i < mapWidth; ++i) {
4349	const int coverage = map[i];
4350	alphamapblend_argb32(dest + i, coverage, srcColor, c, colorProfile);
4351	}
4352	dest += destStride;
4353	map += mapStride;
4354	}
4355	} else {
4356	int bottom = qMin(y + mapHeight, rasterBuffer->height());
4357
4358	int top = qMax(y, `0`);
4359	map += (top - y) * mapStride;
4360
4361	const_cast<QClipData *>(clip)->initialize();
4362	for (int yp = top; yp<bottom; ++yp) {
4363	const QClipData::ClipLine &line = clip->m_clipLines[yp];
4364
4365	quint32 dest = reinterpret_cast<quint32 >(rasterBuffer->scanLine(yp));
4366
4367	for (int i=`0`; i<line.count; ++i) {
4368	const QSpan &clip = line.spans[i];
4369
4370	int start = qMax<int>(x, clip.x);
4371	int end = qMin<int>(x + mapWidth, clip.x + clip.len);
4372
4373	for (int xp=start; xp<end; ++xp) {
4374	const int coverage = map[xp - x];
4375	alphamapblend_argb32(dest + xp, coverage, srcColor, c, colorProfile);
4376	} // for (i -> line.count)
4377	} // for (yp -> bottom)
4378	map += mapStride;
4379	}
4380	}
4381	}
4382
4383	static inline int qRgbAvg(QRgb rgb)
4384	{
4385	return (qRed(rgb) * `5` + qGreen(rgb) * `6` + qBlue(rgb) * `5`) / `16`;
4386	}
4387
4388	static inline void rgbBlendPixel(quint32 dst, int* coverage, QRgba64 slinear, const QColorTrcLut *colorProfile)
4389	{
4390	// Do a gammacorrected RGB alphablend...
4391	const QRgba64 dlinear = colorProfile ? colorProfile->toLinear64(dst) : QRgba64::fromArgb32(dst);
4392
4393	QRgba64 blend = rgbBlend(dlinear, slinear, coverage);
4394
4395	*dst = colorProfile ? colorProfile->fromLinear64(blend) : toArgb32(blend);
4396	}
4397
4398	static inline QRgb rgbBlend(QRgb d, QRgb s, uint rgbAlpha)
4399	{
4400	#if defined(__SSE2__)
4401	__m128i vd = _mm_cvtsi32_si128(d);
4402	__m128i vs = _mm_cvtsi32_si128(s);
4403	__m128i va = _mm_cvtsi32_si128(rgbAlpha);
4404	const __m128i vz = _mm_setzero_si128();
4405	vd = _mm_unpacklo_epi8(vd, vz);
4406	vs = _mm_unpacklo_epi8(vs, vz);
4407	va = _mm_unpacklo_epi8(va, vz);
4408	__m128i vb = _mm_xor_si128(_mm_set1_epi16(`255`), va);
4409	vs = _mm_mullo_epi16(vs, va);
4410	vd = _mm_mullo_epi16(vd, vb);
4411	vd = _mm_add_epi16(vd, vs);
4412	vd = _mm_add_epi16(vd, _mm_srli_epi16(vd, `8`));
4413	vd = _mm_add_epi16(vd, _mm_set1_epi16(`0x80`));
4414	vd = _mm_srli_epi16(vd, `8`);
4415	vd = _mm_packus_epi16(vd, vd);
4416	return _mm_cvtsi128_si32(vd);
4417	#else
4418	const int dr = qRed(d);
4419	const int dg = qGreen(d);
4420	const int db = qBlue(d);
4421
4422	const int sr = qRed(s);
4423	const int sg = qGreen(s);
4424	const int sb = qBlue(s);
4425
4426	const int mr = qRed(rgbAlpha);
4427	const int mg = qGreen(rgbAlpha);
4428	const int mb = qBlue(rgbAlpha);
4429
4430	const int nr = qt_div_255(sr * mr + dr * (`255` - mr));
4431	const int ng = qt_div_255(sg * mg + dg * (`255` - mg));
4432	const int nb = qt_div_255(sb * mb + db * (`255` - mb));
4433
4434	return `0xff000000` \| (nr << `16`) \| (ng << `8`) \| nb;
4435	#endif
4436	}
4437
4438	static inline void alphargbblend_argb32(quint32 dst, uint coverage, const* QRgba64 &srcLinear, quint32 src, const QColorTrcLut *colorProfile)
4439	{
4440	if (coverage == `0xff000000`) {
4441	// nothing
4442	} else if (coverage == `0xffffffff` && qAlpha(src) == `255`) {
4443	blend_pixel(*dst, src);
4444	} else if (*dst < `0xff000000`) {
4445	// Give up and do a naive gray alphablend. Needed to deal with ARGB32 and invalid ARGB32_premultiplied, see QTBUG-60571
4446	blend_pixel(*dst, src, qRgbAvg(coverage));
4447	} else if (!colorProfile) {
4448	// First do naive blend with text-color
4449	QRgb s = *dst;
4450	blend_pixel(s, src);
4451	// Then a naive blend with glyph shape
4452	dst = rgbBlend(dst, s, coverage);
4453	} else if (srcLinear.isOpaque()) {
4454	rgbBlendPixel(dst, coverage, srcLinear, colorProfile);
4455	} else {
4456	// First do naive blend with text-color
4457	QRgb s = *dst;
4458	blend_pixel(s, src);
4459	// Then gamma-corrected blend with glyph shape
4460	QRgba64 s64 = colorProfile ? colorProfile->toLinear64(s) : QRgba64::fromArgb32(s);
4461	rgbBlendPixel(dst, coverage, s64, colorProfile);
4462	}
4463	}
4464
4465	#if QT_CONFIG(raster_64bit)
4466	static inline void rgbBlendPixel(QRgba64 &dst, int coverage, QRgba64 slinear, const QColorTrcLut *colorProfile)
4467	{
4468	// Do a gammacorrected RGB alphablend...
4469	const QRgba64 dlinear = colorProfile ? colorProfile->toLinear64(dst) : dst;
4470
4471	QRgba64 blend = rgbBlend(dlinear, slinear, coverage);
4472
4473	dst = colorProfile ? colorProfile->fromLinear(blend) : blend;
4474	}
4475
4476	static inline void alphargbblend_generic(uint coverage, QRgba64 dest, int* x, const QRgba64 &srcLinear, const QRgba64 &src, const QColorTrcLut *colorProfile)
4477	{
4478	if (coverage == `0xff000000`) {
4479	// nothing
4480	} else if (coverage == `0xffffffff`) {
4481	blend_pixel(dest[x], src);
4482	} else if (!dest[x].isOpaque()) {
4483	// Do a gray alphablend.
4484	alphamapblend_generic(qRgbAvg(coverage), dest, x, srcLinear, src, colorProfile);
4485	} else if (src.isOpaque()) {
4486	rgbBlendPixel(dest[x], coverage, srcLinear, colorProfile);
4487	} else {
4488	// First do naive blend with text-color
4489	QRgba64 s = dest[x];
4490	blend_pixel(s, src);
4491	// Then gamma-corrected blend with glyph shape
4492	if (colorProfile)
4493	s = colorProfile->toLinear(s);
4494	rgbBlendPixel(dest[x], coverage, s, colorProfile);
4495	}
4496	}
4497
4498	static void qt_alphargbblit_generic(QRasterBuffer *rasterBuffer,
4499	int x, int y, const QRgba64 &color,
4500	const uint src, int* mapWidth, int mapHeight, int srcStride,
4501	const QClipData clip, bool* useGammaCorrection)
4502	{
4503	if (color.isTransparent())
4504	return;
4505
4506	const QColorTrcLut colorProfile = nullptr*;
4507
4508	if (useGammaCorrection)
4509	colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA32Text();
4510
4511	QRgba64 srcColor = color;
4512	if (colorProfile && color.isOpaque())
4513	srcColor = colorProfile->toLinear(srcColor);
4514
4515	alignas(`8`) QRgba64 buffer[BufferSize];
4516	const DestFetchProc64 destFetch64 = destFetchProc64[rasterBuffer->format];
4517	const DestStoreProc64 destStore64 = destStoreProc64[rasterBuffer->format];
4518
4519	if (!clip) {
4520	for (int ly = `0`; ly < mapHeight; ++ly) {
4521	int i = x;
4522	int length = mapWidth;
4523	while (length > `0`) {
4524	int l = qMin(BufferSize, length);
4525	QRgba64 *dest = destFetch64(buffer, rasterBuffer, i, y + ly, l);
4526	for (int j=`0`; j < l; ++j) {
4527	const uint coverage = src[j + (i - x)];
4528	alphargbblend_generic(coverage, dest, j, srcColor, color, colorProfile);
4529	}
4530	if (destStore64)
4531	destStore64(rasterBuffer, i, y + ly, dest, l);
4532	length -= l;
4533	i += l;
4534	}
4535	src += srcStride;
4536	}
4537	} else {
4538	int bottom = qMin(y + mapHeight, rasterBuffer->height());
4539
4540	int top = qMax(y, `0`);
4541	src += (top - y) * srcStride;
4542
4543	const_cast<QClipData *>(clip)->initialize();
4544	for (int yp = top; yp<bottom; ++yp) {
4545	const QClipData::ClipLine &line = clip->m_clipLines[yp];
4546
4547	for (int i=`0`; i<line.count; ++i) {
4548	const QSpan &clip = line.spans[i];
4549
4550	int start = qMax<int>(x, clip.x);
4551	int end = qMin<int>(x + mapWidth, clip.x + clip.len);
4552	if (end <= start)
4553	continue;
4554	Q_ASSERT(end - start <= BufferSize);
4555	QRgba64 *dest = destFetch64(buffer, rasterBuffer, start, clip.y, end - start);
4556
4557	for (int xp=start; xp<end; ++xp) {
4558	const uint coverage = src[xp - x];
4559	alphargbblend_generic(coverage, dest, xp - start, srcColor, color, colorProfile);
4560	}
4561	if (destStore64)
4562	destStore64(rasterBuffer, start, clip.y, dest, end - start);
4563	} // for (i -> line.count)
4564	src += srcStride;
4565	} // for (yp -> bottom)
4566	}
4567	}
4568	#else
4569	static void qt_alphargbblit_generic(QRasterBuffer *rasterBuffer,
4570	int x, int y, const QRgba64 &color,
4571	const uint src, int* mapWidth, int mapHeight, int srcStride,
4572	const QClipData clip, bool* useGammaCorrection)
4573	{
4574	if (color.isTransparent())
4575	return;
4576
4577	const quint32 c = color.toArgb32();
4578
4579	const QColorTrcLut colorProfile = nullptr*;
4580
4581	if (useGammaCorrection)
4582	colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA32Text();
4583
4584	QRgba64 srcColor = color;
4585	if (colorProfile && color.isOpaque())
4586	srcColor = colorProfile->toLinear(srcColor);
4587
4588	quint32 buffer[BufferSize];
4589	const DestFetchProc destFetch = destFetchProc[rasterBuffer->format];
4590	const DestStoreProc destStore = destStoreProc[rasterBuffer->format];
4591
4592	if (!clip) {
4593	for (int ly = `0`; ly < mapHeight; ++ly) {
4594	int i = x;
4595	int length = mapWidth;
4596	while (length > `0`) {
4597	int l = qMin(BufferSize, length);
4598	quint32 *dest = destFetch(buffer, rasterBuffer, i, y + ly, l);
4599	for (int j=`0`; j < l; ++j) {
4600	const uint coverage = src[j + (i - x)];
4601	alphargbblend_argb32(dest + j, coverage, srcColor, c, colorProfile);
4602	}
4603	if (destStore)
4604	destStore(rasterBuffer, i, y + ly, dest, l);
4605	length -= l;
4606	i += l;
4607	}
4608	src += srcStride;
4609	}
4610	} else {
4611	int bottom = qMin(y + mapHeight, rasterBuffer->height());
4612
4613	int top = qMax(y, `0`);
4614	src += (top - y) * srcStride;
4615
4616	const_cast<QClipData *>(clip)->initialize();
4617	for (int yp = top; yp<bottom; ++yp) {
4618	const QClipData::ClipLine &line = clip->m_clipLines[yp];
4619
4620	for (int i=`0`; i<line.count; ++i) {
4621	const QSpan &clip = line.spans[i];
4622
4623	int start = qMax<int>(x, clip.x);
4624	int end = qMin<int>(x + mapWidth, clip.x + clip.len);
4625	if (end <= start)
4626	continue;
4627	Q_ASSERT(end - start <= BufferSize);
4628	quint32 *dest = destFetch(buffer, rasterBuffer, start, clip.y, end - start);
4629
4630	for (int xp=start; xp<end; ++xp) {
4631	const uint coverage = src[xp - x];
4632	alphargbblend_argb32(dest + xp - start, coverage, srcColor, c, colorProfile);
4633	}
4634	if (destStore)
4635	destStore(rasterBuffer, start, clip.y, dest, end - start);
4636	} // for (i -> line.count)
4637	src += srcStride;
4638	} // for (yp -> bottom)
4639	}
4640	}
4641	#endif
4642
4643	static void qt_alphargbblit_argb32(QRasterBuffer *rasterBuffer,
4644	int x, int y, const QRgba64 &color,
4645	const uint src, int* mapWidth, int mapHeight, int srcStride,
4646	const QClipData clip, bool* useGammaCorrection)
4647	{
4648	if (color.isTransparent())
4649	return;
4650
4651	const quint32 c = color.toArgb32();
4652
4653	const QColorTrcLut colorProfile = nullptr*;
4654
4655	if (useGammaCorrection)
4656	colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA32Text();
4657
4658	QRgba64 srcColor = color;
4659	if (colorProfile && color.isOpaque())
4660	srcColor = colorProfile->toLinear(srcColor);
4661
4662	if (!clip) {
4663	quint32 dst = reinterpret_cast<quint32>(rasterBuffer->scanLine(y)) + x;
4664	const int destStride = rasterBuffer->stride<quint32>();
4665	while (mapHeight--) {
4666	for (int i = `0`; i < mapWidth; ++i) {
4667	const uint coverage = src[i];
4668	alphargbblend_argb32(dst + i, coverage, srcColor, c, colorProfile);
4669	}
4670
4671	dst += destStride;
4672	src += srcStride;
4673	}
4674	} else {
4675	int bottom = qMin(y + mapHeight, rasterBuffer->height());
4676
4677	int top = qMax(y, `0`);
4678	src += (top - y) * srcStride;
4679
4680	const_cast<QClipData *>(clip)->initialize();
4681	for (int yp = top; yp<bottom; ++yp) {
4682	const QClipData::ClipLine &line = clip->m_clipLines[yp];
4683
4684	quint32 dst = reinterpret_cast<quint32 >(rasterBuffer->scanLine(yp));
4685
4686	for (int i=`0`; i<line.count; ++i) {
4687	const QSpan &clip = line.spans[i];
4688
4689	int start = qMax<int>(x, clip.x);
4690	int end = qMin<int>(x + mapWidth, clip.x + clip.len);
4691
4692	for (int xp=start; xp<end; ++xp) {
4693	const uint coverage = src[xp - x];
4694	alphargbblend_argb32(dst + xp, coverage, srcColor, c, colorProfile);
4695	}
4696	} // for (i -> line.count)
4697	src += srcStride;
4698	} // for (yp -> bottom)
4699
4700	}
4701	}
4702
4703	static void qt_rectfill_argb32(QRasterBuffer *rasterBuffer,
4704	int x, int y, int width, int height,
4705	const QRgba64 &color)
4706	{
4707	qt_rectfill<quint32>(reinterpret_cast<quint32 *>(rasterBuffer->buffer()),
4708	color.toArgb32(), x, y, width, height, rasterBuffer->bytesPerLine());
4709	}
4710
4711	static void qt_rectfill_quint16(QRasterBuffer *rasterBuffer,
4712	int x, int y, int width, int height,
4713	const QRgba64 &color)
4714	{
4715	const QPixelLayout &layout = qPixelLayouts[rasterBuffer->format];
4716	quint32 c32 = color.toArgb32();
4717	quint16 c16;
4718	layout.storeFromARGB32PM(reinterpret_cast<uchar >(&c16), &c32, `0`, `1`, nullptr, nullptr*);
4719	qt_rectfill<quint16>(reinterpret_cast<quint16 *>(rasterBuffer->buffer()),
4720	c16, x, y, width, height, rasterBuffer->bytesPerLine());
4721	}
4722
4723	static void qt_rectfill_quint24(QRasterBuffer *rasterBuffer,
4724	int x, int y, int width, int height,
4725	const QRgba64 &color)
4726	{
4727	const QPixelLayout &layout = qPixelLayouts[rasterBuffer->format];
4728	quint32 c32 = color.toArgb32();
4729	quint24 c24;
4730	layout.storeFromARGB32PM(reinterpret_cast<uchar >(&c24), &c32, `0`, `1`, nullptr, nullptr*);
4731	qt_rectfill<quint24>(reinterpret_cast<quint24 *>(rasterBuffer->buffer()),
4732	c24, x, y, width, height, rasterBuffer->bytesPerLine());
4733	}
4734
4735	static void qt_rectfill_nonpremul_argb32(QRasterBuffer *rasterBuffer,
4736	int x, int y, int width, int height,
4737	const QRgba64 &color)
4738	{
4739	qt_rectfill<quint32>(reinterpret_cast<quint32 *>(rasterBuffer->buffer()),
4740	color.unpremultiplied().toArgb32(), x, y, width, height, rasterBuffer->bytesPerLine());
4741	}
4742
4743	static void qt_rectfill_rgba(QRasterBuffer *rasterBuffer,
4744	int x, int y, int width, int height,
4745	const QRgba64 &color)
4746	{
4747	qt_rectfill<quint32>(reinterpret_cast<quint32 *>(rasterBuffer->buffer()),
4748	ARGB2RGBA(color.toArgb32()), x, y, width, height, rasterBuffer->bytesPerLine());
4749	}
4750
4751	static void qt_rectfill_nonpremul_rgba(QRasterBuffer *rasterBuffer,
4752	int x, int y, int width, int height,
4753	const QRgba64 &color)
4754	{
4755	qt_rectfill<quint32>(reinterpret_cast<quint32 *>(rasterBuffer->buffer()),
4756	ARGB2RGBA(color.unpremultiplied().toArgb32()), x, y, width, height, rasterBuffer->bytesPerLine());
4757	}
4758
4759	template<QtPixelOrder PixelOrder>
4760	static void qt_rectfill_rgb30(QRasterBuffer *rasterBuffer,
4761	int x, int y, int width, int height,
4762	const QRgba64 &color)
4763	{
4764	qt_rectfill<quint32>(reinterpret_cast<quint32 *>(rasterBuffer->buffer()),
4765	qConvertRgb64ToRgb30<PixelOrder>(color), x, y, width, height, rasterBuffer->bytesPerLine());
4766	}
4767
4768	static void qt_rectfill_alpha(QRasterBuffer *rasterBuffer,
4769	int x, int y, int width, int height,
4770	const QRgba64 &color)
4771	{
4772	qt_rectfill<quint8>(reinterpret_cast<quint8 *>(rasterBuffer->buffer()),
4773	color.alpha() >> `8`, x, y, width, height, rasterBuffer->bytesPerLine());
4774	}
4775
4776	static void qt_rectfill_gray(QRasterBuffer *rasterBuffer,
4777	int x, int y, int width, int height,
4778	const QRgba64 &color)
4779	{
4780	qt_rectfill<quint8>(reinterpret_cast<quint8 *>(rasterBuffer->buffer()),
4781	qGray(color.toArgb32()), x, y, width, height, rasterBuffer->bytesPerLine());
4782	}
4783
4784	static void qt_rectfill_quint64(QRasterBuffer *rasterBuffer,
4785	int x, int y, int width, int height,
4786	const QRgba64 &color)
4787	{
4788	const auto store = qStoreFromRGBA64PM[rasterBuffer->format];
4789	quint64 c64;
4790	store(reinterpret_cast<uchar >(&c64), &color, `0`, `1`, nullptr, nullptr*);
4791	qt_rectfill<quint64>(reinterpret_cast<quint64 *>(rasterBuffer->buffer()),
4792	c64, x, y, width, height, rasterBuffer->bytesPerLine());
4793	}
4794
4795	// Map table for destination image format. Contains function pointers
4796	// for blends of various types unto the destination
4797
4798	DrawHelper qDrawHelper[QImage::NImageFormats] =
4799	{
4800	// Format_Invalid,
4801	{ nullptr, nullptr, nullptr, nullptr, nullptr },
4802	// Format_Mono,
4803	{
4804	blend_color_generic,
4805	nullptr, nullptr, nullptr, nullptr
4806	},
4807	// Format_MonoLSB,
4808	{
4809	blend_color_generic,
4810	nullptr, nullptr, nullptr, nullptr
4811	},
4812	// Format_Indexed8,
4813	{
4814	blend_color_generic,
4815	nullptr, nullptr, nullptr, nullptr
4816	},
4817	// Format_RGB32,
4818	{
4819	blend_color_argb,
4820	qt_bitmapblit_argb32,
4821	qt_alphamapblit_argb32,
4822	qt_alphargbblit_argb32,
4823	qt_rectfill_argb32
4824	},
4825	// Format_ARGB32,
4826	{
4827	blend_color_generic,
4828	qt_bitmapblit_argb32,
4829	qt_alphamapblit_argb32,
4830	qt_alphargbblit_argb32,
4831	qt_rectfill_nonpremul_argb32
4832	},
4833	// Format_ARGB32_Premultiplied
4834	{
4835	blend_color_argb,
4836	qt_bitmapblit_argb32,
4837	qt_alphamapblit_argb32,
4838	qt_alphargbblit_argb32,
4839	qt_rectfill_argb32
4840	},
4841	// Format_RGB16
4842	{
4843	blend_color_rgb16,
4844	qt_bitmapblit_quint16,
4845	qt_alphamapblit_quint16,
4846	qt_alphargbblit_generic,
4847	qt_rectfill_quint16
4848	},
4849	// Format_ARGB8565_Premultiplied
4850	{
4851	blend_color_generic,
4852	nullptr,
4853	qt_alphamapblit_generic,
4854	qt_alphargbblit_generic,
4855	qt_rectfill_quint24
4856	},
4857	// Format_RGB666
4858	{
4859	blend_color_generic,
4860	nullptr,
4861	qt_alphamapblit_generic,
4862	qt_alphargbblit_generic,
4863	qt_rectfill_quint24
4864	},
4865	// Format_ARGB6666_Premultiplied
4866	{
4867	blend_color_generic,
4868	nullptr,
4869	qt_alphamapblit_generic,
4870	qt_alphargbblit_generic,
4871	qt_rectfill_quint24
4872	},
4873	// Format_RGB555
4874	{
4875	blend_color_generic,
4876	nullptr,
4877	qt_alphamapblit_generic,
4878	qt_alphargbblit_generic,
4879	qt_rectfill_quint16
4880	},
4881	// Format_ARGB8555_Premultiplied
4882	{
4883	blend_color_generic,
4884	nullptr,
4885	qt_alphamapblit_generic,
4886	qt_alphargbblit_generic,
4887	qt_rectfill_quint24
4888	},
4889	// Format_RGB888
4890	{
4891	blend_color_generic,
4892	nullptr,
4893	qt_alphamapblit_generic,
4894	qt_alphargbblit_generic,
4895	qt_rectfill_quint24
4896	},
4897	// Format_RGB444
4898	{
4899	blend_color_generic,
4900	nullptr,
4901	qt_alphamapblit_generic,
4902	qt_alphargbblit_generic,
4903	qt_rectfill_quint16
4904	},
4905	// Format_ARGB4444_Premultiplied
4906	{
4907	blend_color_generic,
4908	nullptr,
4909	qt_alphamapblit_generic,
4910	qt_alphargbblit_generic,
4911	qt_rectfill_quint16
4912	},
4913	// Format_RGBX8888
4914	{
4915	blend_color_generic,
4916	qt_bitmapblit_rgba8888,
4917	qt_alphamapblit_generic,
4918	qt_alphargbblit_generic,
4919	qt_rectfill_rgba
4920	},
4921	// Format_RGBA8888
4922	{
4923	blend_color_generic,
4924	qt_bitmapblit_rgba8888,
4925	qt_alphamapblit_generic,
4926	qt_alphargbblit_generic,
4927	qt_rectfill_nonpremul_rgba
4928	},
4929	// Format_RGB8888_Premultiplied
4930	{
4931	blend_color_generic,
4932	qt_bitmapblit_rgba8888,
4933	qt_alphamapblit_generic,
4934	qt_alphargbblit_generic,
4935	qt_rectfill_rgba
4936	},
4937	// Format_BGR30
4938	{
4939	blend_color_generic_rgb64,
4940	qt_bitmapblit_rgb30<PixelOrderBGR>,
4941	qt_alphamapblit_generic,
4942	qt_alphargbblit_generic,
4943	qt_rectfill_rgb30<PixelOrderBGR>
4944	},
4945	// Format_A2BGR30_Premultiplied
4946	{
4947	blend_color_generic_rgb64,
4948	qt_bitmapblit_rgb30<PixelOrderBGR>,
4949	qt_alphamapblit_generic,
4950	qt_alphargbblit_generic,
4951	qt_rectfill_rgb30<PixelOrderBGR>
4952	},
4953	// Format_RGB30
4954	{
4955	blend_color_generic_rgb64,
4956	qt_bitmapblit_rgb30<PixelOrderRGB>,
4957	qt_alphamapblit_generic,
4958	qt_alphargbblit_generic,
4959	qt_rectfill_rgb30<PixelOrderRGB>
4960	},
4961	// Format_A2RGB30_Premultiplied
4962	{
4963	blend_color_generic_rgb64,
4964	qt_bitmapblit_rgb30<PixelOrderRGB>,
4965	qt_alphamapblit_generic,
4966	qt_alphargbblit_generic,
4967	qt_rectfill_rgb30<PixelOrderRGB>
4968	},
4969	// Format_Alpha8
4970	{
4971	blend_color_generic,
4972	nullptr,
4973	qt_alphamapblit_generic,
4974	qt_alphargbblit_generic,
4975	qt_rectfill_alpha
4976	},
4977	// Format_Grayscale8
4978	{
4979	blend_color_generic,
4980	nullptr,
4981	qt_alphamapblit_generic,
4982	qt_alphargbblit_generic,
4983	qt_rectfill_gray
4984	},
4985	// Format_RGBX64
4986	{
4987	blend_color_generic_rgb64,
4988	nullptr,
4989	qt_alphamapblit_generic,
4990	qt_alphargbblit_generic,
4991	qt_rectfill_quint64
4992	},
4993	// Format_RGBA64
4994	{
4995	blend_color_generic_rgb64,
4996	nullptr,
4997	qt_alphamapblit_generic,
4998	qt_alphargbblit_generic,
4999	qt_rectfill_quint64
5000	},
5001	// Format_RGBA64_Premultiplied
5002	{
5003	blend_color_generic_rgb64,
5004	nullptr,
5005	qt_alphamapblit_generic,
5006	qt_alphargbblit_generic,
5007	qt_rectfill_quint64
5008	},
5009	// Format_Grayscale16
5010	{
5011	blend_color_generic_rgb64,
5012	nullptr,
5013	qt_alphamapblit_generic,
5014	qt_alphargbblit_generic,
5015	qt_rectfill_quint16
5016	},
5017	// Format_BGR888
5018	{
5019	blend_color_generic,
5020	nullptr,
5021	qt_alphamapblit_generic,
5022	qt_alphargbblit_generic,
5023	qt_rectfill_quint24
5024	},
5025	};
5026
5027	#if !defined(__SSE2__)
5028	void qt_memfill64(quint64 *dest, quint64 color, qsizetype count)
5029	{
5030	qt_memfill_template<quint64>(dest, color, count);
5031	}
5032	#endif
5033
5034	#if defined(QT_COMPILER_SUPPORTS_SSSE3) && defined(Q_CC_GNU) && !defined(Q_CC_INTEL) && !defined(Q_CC_CLANG)
5035	__attribute__((optimize("no-tree-vectorize")))
5036	#endif
5037	void qt_memfill24(quint24 *dest, quint24 color, qsizetype count)
5038	{
5039	# ifdef QT_COMPILER_SUPPORTS_SSSE3
5040	extern void qt_memfill24_ssse3(quint24 *, quint24, qsizetype);
5041	if (qCpuHasFeature(SSSE3))
5042	return qt_memfill24_ssse3(dest, color, count);
5043	# endif
5044
5045	const quint32 v = color;
5046	quint24 *end = dest + count;
5047
5048	// prolog: align dest to 32bit
5049	while ((quintptr(dest) & `0x3`) && dest < end) {
5050	*dest++ = v;
5051	}
5052	if (dest >= end)
5053	return;
5054
5055	const uint val1 = qFromBigEndian((v << `8`) \| (v >> `16`));
5056	const uint val2 = qFromBigEndian((v << `16`) \| (v >> `8`));
5057	const uint val3 = qFromBigEndian((v << `24`) \| (v >> `0`));
5058
5059	for ( ; dest <= (end - `4`); dest += `4`) {
5060	quint32 dst = reinterpret_cast<quint32 >(dest);
5061	dst[`0`] = val1;
5062	dst[`1`] = val2;
5063	dst[`2`] = val3;
5064	}
5065
5066	// less than 4px left
5067	switch (end - dest) {
5068	case `3`:
5069	*dest++ = v;
5070	Q_FALLTHROUGH();
5071	case `2`:
5072	*dest++ = v;
5073	Q_FALLTHROUGH();
5074	case `1`:
5075	*dest++ = v;
5076	}
5077	}
5078
5079	void qt_memfill16(quint16 *dest, quint16 value, qsizetype count)
5080	{
5081	const int align = quintptr(dest) & `0x3`;
5082	if (align) {
5083	*dest++ = value;
5084	--count;
5085	}
5086
5087	if (count & `0x1`)
5088	dest[count - `1`] = value;
5089
5090	const quint32 value32 = (value << `16`) \| value;
5091	qt_memfill32(reinterpret_cast<quint32*>(dest), value32, count / `2`);
5092	}
5093
5094	#if !defined(__SSE2__) && !defined(__ARM_NEON__) && !defined(__MIPS_DSP__)
5095	void qt_memfill32(quint32 *dest, quint32 color, qsizetype count)
5096	{
5097	qt_memfill_template<quint32>(dest, color, count);
5098	}
5099	#endif
5100	#ifdef __SSE2__
5101	decltype(qt_memfill32_sse2) qt_memfill32 = nullptr*;
5102	decltype(qt_memfill64_sse2) qt_memfill64 = nullptr*;
5103	#endif
5104
5105	#ifdef QT_COMPILER_SUPPORTS_SSE4_1
5106	template<QtPixelOrder> void QT_FASTCALL storeA2RGB30PMFromARGB32PM_sse4(uchar dest, const* uint src, int* index, int count, const QList<QRgb> , QDitherInfo );
5107	#endif
5108
5109	extern void qInitBlendFunctions();
5110
5111	static void qInitDrawhelperFunctions()
5112	{
5113	// Set up basic blend function tables.
5114	qInitBlendFunctions();
5115
5116	#ifdef __SSE2__
5117	# ifndef __AVX2__
5118	qt_memfill32 = qt_memfill32_sse2;
5119	qt_memfill64 = qt_memfill64_sse2;
5120	# endif
5121	qDrawHelper[QImage::Format_RGB32].bitmapBlit = qt_bitmapblit32_sse2;
5122	qDrawHelper[QImage::Format_ARGB32].bitmapBlit = qt_bitmapblit32_sse2;
5123	qDrawHelper[QImage::Format_ARGB32_Premultiplied].bitmapBlit = qt_bitmapblit32_sse2;
5124	qDrawHelper[QImage::Format_RGB16].bitmapBlit = qt_bitmapblit16_sse2;
5125	qDrawHelper[QImage::Format_RGBX8888].bitmapBlit = qt_bitmapblit8888_sse2;
5126	qDrawHelper[QImage::Format_RGBA8888].bitmapBlit = qt_bitmapblit8888_sse2;
5127	qDrawHelper[QImage::Format_RGBA8888_Premultiplied].bitmapBlit = qt_bitmapblit8888_sse2;
5128
5129	extern void qt_scale_image_argb32_on_argb32_sse2(uchar destPixels, int* dbpl,
5130	const uchar srcPixels, int* sbpl, int srch,
5131	const QRectF &targetRect,
5132	const QRectF &sourceRect,
5133	const QRect &clip,
5134	int const_alpha);
5135	qScaleFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_scale_image_argb32_on_argb32_sse2;
5136	qScaleFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_scale_image_argb32_on_argb32_sse2;
5137	qScaleFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBA8888_Premultiplied] = qt_scale_image_argb32_on_argb32_sse2;
5138	qScaleFunctions[QImage::Format_RGBX8888][QImage::Format_RGBA8888_Premultiplied] = qt_scale_image_argb32_on_argb32_sse2;
5139
5140	extern void qt_blend_rgb32_on_rgb32_sse2(uchar destPixels, int* dbpl,
5141	const uchar srcPixels, int* sbpl,
5142	int w, int h,
5143	int const_alpha);
5144	extern void qt_blend_argb32_on_argb32_sse2(uchar destPixels, int* dbpl,
5145	const uchar srcPixels, int* sbpl,
5146	int w, int h,
5147	int const_alpha);
5148
5149	qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_sse2;
5150	qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_sse2;
5151	qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_sse2;
5152	qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_sse2;
5153	qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_sse2;
5154	qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_sse2;
5155	qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_sse2;
5156	qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_sse2;
5157
5158	extern const uint * QT_FASTCALL qt_fetch_radial_gradient_sse2(uint buffer, const* Operator op, const* QSpanData *data,
5159	int y, int x, int length);
5160
5161	qt_fetch_radial_gradient = qt_fetch_radial_gradient_sse2;
5162
5163	extern void QT_FASTCALL comp_func_SourceOver_sse2(uint destPixels, const* uint srcPixels, int* length, uint const_alpha);
5164	extern void QT_FASTCALL comp_func_solid_SourceOver_sse2(uint destPixels, int* length, uint color, uint const_alpha);
5165	extern void QT_FASTCALL comp_func_Source_sse2(uint destPixels, const* uint srcPixels, int* length, uint const_alpha);
5166	extern void QT_FASTCALL comp_func_solid_Source_sse2(uint destPixels, int* length, uint color, uint const_alpha);
5167	extern void QT_FASTCALL comp_func_Plus_sse2(uint destPixels, const* uint srcPixels, int* length, uint const_alpha);
5168	qt_functionForMode_C[QPainter::CompositionMode_SourceOver] = comp_func_SourceOver_sse2;
5169	qt_functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_sse2;
5170	qt_functionForMode_C[QPainter::CompositionMode_Source] = comp_func_Source_sse2;
5171	qt_functionForModeSolid_C[QPainter::CompositionMode_Source] = comp_func_solid_Source_sse2;
5172	qt_functionForMode_C[QPainter::CompositionMode_Plus] = comp_func_Plus_sse2;
5173
5174	#ifdef QT_COMPILER_SUPPORTS_SSSE3
5175	if (qCpuHasFeature(SSSE3)) {
5176	extern void qt_blend_argb32_on_argb32_ssse3(uchar destPixels, int* dbpl,
5177	const uchar srcPixels, int* sbpl,
5178	int w, int h,
5179	int const_alpha);
5180
5181	extern const uint * QT_FASTCALL qt_fetchUntransformed_888_ssse3(uint buffer, const* Operator , const* QSpanData *data,
5182	int y, int x, int length);
5183	qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_ssse3;
5184	qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_ssse3;
5185	qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_ssse3;
5186	qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_ssse3;
5187	sourceFetchUntransformed[QImage::Format_RGB888] = qt_fetchUntransformed_888_ssse3;
5188	extern void QT_FASTCALL rbSwap_888_ssse3(uchar dst, const* uchar src, int* count);
5189	qPixelLayouts[QImage::Format_RGB888].rbSwap = rbSwap_888_ssse3;
5190	qPixelLayouts[QImage::Format_BGR888].rbSwap = rbSwap_888_ssse3;
5191	}
5192	#endif // SSSE3
5193
5194	#if defined(QT_COMPILER_SUPPORTS_SSE4_1)
5195	if (qCpuHasFeature(SSE4_1)) {
5196	extern void QT_FASTCALL convertARGB32ToARGB32PM_sse4(uint buffer, int* count, const QList<QRgb> *);
5197	extern void QT_FASTCALL convertRGBA8888ToARGB32PM_sse4(uint buffer, int* count, const QList<QRgb> *);
5198	extern const uint QT_FASTCALL fetchARGB32ToARGB32PM_sse4(uint buffer, const uchar src, int* index, int count,
5199	const QList<QRgb> , QDitherInfo );
5200	extern const uint QT_FASTCALL fetchRGBA8888ToARGB32PM_sse4(uint buffer, const uchar src, int* index, int count,
5201	const QList<QRgb> , QDitherInfo );
5202	extern const QRgba64 * QT_FASTCALL convertARGB32ToRGBA64PM_sse4(QRgba64 buffer, const* uint src, int* count,
5203	const QList<QRgb> , QDitherInfo );
5204	extern const QRgba64 * QT_FASTCALL convertRGBA8888ToRGBA64PM_sse4(QRgba64 buffer, const* uint src, int* count,
5205	const QList<QRgb> , QDitherInfo );
5206	extern const QRgba64 QT_FASTCALL fetchARGB32ToRGBA64PM_sse4(QRgba64 buffer, const uchar src, int* index, int count,
5207	const QList<QRgb> , QDitherInfo );
5208	extern const QRgba64 QT_FASTCALL fetchRGBA8888ToRGBA64PM_sse4(QRgba64 buffer, const uchar src, int* index, int count,
5209	const QList<QRgb> , QDitherInfo );
5210	extern void QT_FASTCALL storeARGB32FromARGB32PM_sse4(uchar dest, const* uint src, int* index, int count,
5211	const QList<QRgb> , QDitherInfo );
5212	extern void QT_FASTCALL storeRGBA8888FromARGB32PM_sse4(uchar dest, const* uint src, int* index, int count,
5213	const QList<QRgb> , QDitherInfo );
5214	extern void QT_FASTCALL storeRGBXFromARGB32PM_sse4(uchar dest, const* uint src, int* index, int count,
5215	const QList<QRgb> , QDitherInfo );
5216	extern void QT_FASTCALL storeARGB32FromRGBA64PM_sse4(uchar dest, const* QRgba64 src, int* index, int count,
5217	const QList<QRgb> , QDitherInfo );
5218	extern void QT_FASTCALL storeRGBA8888FromRGBA64PM_sse4(uchar dest, const* QRgba64 src, int* index, int count,
5219	const QList<QRgb> , QDitherInfo );
5220	extern void QT_FASTCALL destStore64ARGB32_sse4(QRasterBuffer rasterBuffer, int* x, int y, const QRgba64 buffer, int* length);
5221	extern void QT_FASTCALL destStore64RGBA8888_sse4(QRasterBuffer rasterBuffer, int* x, int y, const QRgba64 buffer, int* length);
5222	# ifndef __AVX2__
5223	qPixelLayouts[QImage::Format_ARGB32].fetchToARGB32PM = fetchARGB32ToARGB32PM_sse4;
5224	qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_sse4;
5225	qPixelLayouts[QImage::Format_RGBA8888].fetchToARGB32PM = fetchRGBA8888ToARGB32PM_sse4;
5226	qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_sse4;
5227	qPixelLayouts[QImage::Format_ARGB32].fetchToRGBA64PM = fetchARGB32ToRGBA64PM_sse4;
5228	qPixelLayouts[QImage::Format_ARGB32].convertToRGBA64PM = convertARGB32ToRGBA64PM_sse4;
5229	qPixelLayouts[QImage::Format_RGBA8888].fetchToRGBA64PM = fetchRGBA8888ToRGBA64PM_sse4;
5230	qPixelLayouts[QImage::Format_RGBA8888].convertToRGBA64PM = convertRGBA8888ToRGBA64PM_sse4;
5231	qPixelLayouts[QImage::Format_RGBX8888].fetchToRGBA64PM = fetchRGBA8888ToRGBA64PM_sse4;
5232	qPixelLayouts[QImage::Format_RGBX8888].convertToRGBA64PM = convertRGBA8888ToRGBA64PM_sse4;
5233	# endif
5234	qPixelLayouts[QImage::Format_ARGB32].storeFromARGB32PM = storeARGB32FromARGB32PM_sse4;
5235	qPixelLayouts[QImage::Format_RGBA8888].storeFromARGB32PM = storeRGBA8888FromARGB32PM_sse4;
5236	qPixelLayouts[QImage::Format_RGBX8888].storeFromARGB32PM = storeRGBXFromARGB32PM_sse4;
5237	qPixelLayouts[QImage::Format_A2BGR30_Premultiplied].storeFromARGB32PM = storeA2RGB30PMFromARGB32PM_sse4<PixelOrderBGR>;
5238	qPixelLayouts[QImage::Format_A2RGB30_Premultiplied].storeFromARGB32PM = storeA2RGB30PMFromARGB32PM_sse4<PixelOrderRGB>;
5239	qStoreFromRGBA64PM[QImage::Format_ARGB32] = storeARGB32FromRGBA64PM_sse4;
5240	qStoreFromRGBA64PM[QImage::Format_RGBA8888] = storeRGBA8888FromRGBA64PM_sse4;
5241	#if QT_CONFIG(raster_64bit)
5242	destStoreProc64[QImage::Format_ARGB32] = destStore64ARGB32_sse4;
5243	destStoreProc64[QImage::Format_RGBA8888] = destStore64RGBA8888_sse4;
5244	#endif
5245	}
5246	#endif
5247
5248	#if defined(QT_COMPILER_SUPPORTS_AVX2)
5249	if (qCpuHasFeature(ArchHaswell)) {
5250	qt_memfill32 = qt_memfill32_avx2;
5251	qt_memfill64 = qt_memfill64_avx2;
5252	extern void qt_blend_rgb32_on_rgb32_avx2(uchar destPixels, int* dbpl,
5253	const uchar srcPixels, int* sbpl,
5254	int w, int h, int const_alpha);
5255	extern void qt_blend_argb32_on_argb32_avx2(uchar destPixels, int* dbpl,
5256	const uchar srcPixels, int* sbpl,
5257	int w, int h, int const_alpha);
5258	qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_avx2;
5259	qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_avx2;
5260	qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_avx2;
5261	qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_avx2;
5262	qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_avx2;
5263	qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_avx2;
5264	qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_avx2;
5265	qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_avx2;
5266
5267	extern void QT_FASTCALL comp_func_Source_avx2(uint destPixels, const* uint srcPixels, int* length, uint const_alpha);
5268	extern void QT_FASTCALL comp_func_SourceOver_avx2(uint destPixels, const* uint srcPixels, int* length, uint const_alpha);
5269	extern void QT_FASTCALL comp_func_solid_SourceOver_avx2(uint destPixels, int* length, uint color, uint const_alpha);
5270	qt_functionForMode_C[QPainter::CompositionMode_Source] = comp_func_Source_avx2;
5271	qt_functionForMode_C[QPainter::CompositionMode_SourceOver] = comp_func_SourceOver_avx2;
5272	qt_functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_avx2;
5273	#if QT_CONFIG(raster_64bit)
5274	extern void QT_FASTCALL comp_func_Source_rgb64_avx2(QRgba64 destPixels, const* QRgba64 srcPixels, int* length, uint const_alpha);
5275	extern void QT_FASTCALL comp_func_SourceOver_rgb64_avx2(QRgba64 destPixels, const* QRgba64 srcPixels, int* length, uint const_alpha);
5276	extern void QT_FASTCALL comp_func_solid_SourceOver_rgb64_avx2(QRgba64 destPixels, int* length, QRgba64 color, uint const_alpha);
5277	qt_functionForMode64_C[QPainter::CompositionMode_Source] = comp_func_Source_rgb64_avx2;
5278	qt_functionForMode64_C[QPainter::CompositionMode_SourceOver] = comp_func_SourceOver_rgb64_avx2;
5279	qt_functionForModeSolid64_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_rgb64_avx2;
5280	#endif
5281
5282	extern void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_scale_helper_avx2(uint b, uint end, const QTextureData &image,
5283	int &fx, int &fy, int fdx, int /fdy/);
5284	extern void QT_FASTCALL fetchTransformedBilinearARGB32PM_downscale_helper_avx2(uint b, uint end, const QTextureData &image,
5285	int &fx, int &fy, int fdx, int /fdy/);
5286	extern void QT_FASTCALL fetchTransformedBilinearARGB32PM_fast_rotate_helper_avx2(uint b, uint end, const QTextureData &image,
5287	int &fx, int &fy, int fdx, int fdy);
5288
5289	bilinearFastTransformHelperARGB32PM[`0`][SimpleScaleTransform] = fetchTransformedBilinearARGB32PM_simple_scale_helper_avx2;
5290	bilinearFastTransformHelperARGB32PM[`0`][DownscaleTransform] = fetchTransformedBilinearARGB32PM_downscale_helper_avx2;
5291	bilinearFastTransformHelperARGB32PM[`0`][FastRotateTransform] = fetchTransformedBilinearARGB32PM_fast_rotate_helper_avx2;
5292
5293	extern void QT_FASTCALL convertARGB32ToARGB32PM_avx2(uint buffer, int* count, const QList<QRgb> *);
5294	extern void QT_FASTCALL convertRGBA8888ToARGB32PM_avx2(uint buffer, int* count, const QList<QRgb> *);
5295	extern const uint QT_FASTCALL fetchARGB32ToARGB32PM_avx2(uint buffer, const uchar src, int* index, int count,
5296	const QList<QRgb> , QDitherInfo );
5297	extern const uint QT_FASTCALL fetchRGBA8888ToARGB32PM_avx2(uint buffer, const uchar src, int* index, int count,
5298	const QList<QRgb> , QDitherInfo );
5299	qPixelLayouts[QImage::Format_ARGB32].fetchToARGB32PM = fetchARGB32ToARGB32PM_avx2;
5300	qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_avx2;
5301	qPixelLayouts[QImage::Format_RGBA8888].fetchToARGB32PM = fetchRGBA8888ToARGB32PM_avx2;
5302	qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_avx2;
5303
5304	#if QT_CONFIG(raster_64bit)
5305	extern const QRgba64 * QT_FASTCALL convertARGB32ToRGBA64PM_avx2(QRgba64 , const* uint , int, const* QList<QRgb> , QDitherInfo );
5306	extern const QRgba64 * QT_FASTCALL convertRGBA8888ToRGBA64PM_avx2(QRgba64 , const* uint , int* count, const QList<QRgb> , QDitherInfo );
5307	extern const QRgba64 QT_FASTCALL fetchARGB32ToRGBA64PM_avx2(QRgba64 , const uchar , int, int, const* QList<QRgb> , QDitherInfo );
5308	extern const QRgba64 QT_FASTCALL fetchRGBA8888ToRGBA64PM_avx2(QRgba64 , const uchar , int, int, const* QList<QRgb> , QDitherInfo );
5309	qPixelLayouts[QImage::Format_ARGB32].convertToRGBA64PM = convertARGB32ToRGBA64PM_avx2;
5310	qPixelLayouts[QImage::Format_RGBX8888].convertToRGBA64PM = convertRGBA8888ToRGBA64PM_avx2;
5311	qPixelLayouts[QImage::Format_ARGB32].fetchToRGBA64PM = fetchARGB32ToRGBA64PM_avx2;
5312	qPixelLayouts[QImage::Format_RGBX8888].fetchToRGBA64PM = fetchRGBA8888ToRGBA64PM_avx2;
5313	#endif
5314	}
5315	#endif
5316
5317	#endif // SSE2
5318
5319	#if defined(__ARM_NEON__)
5320	qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_neon;
5321	qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_neon;
5322	qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_neon;
5323	qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_neon;
5324	#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
5325	qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_neon;
5326	qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_neon;
5327	qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_neon;
5328	qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_neon;
5329	#endif
5330
5331	qt_functionForMode_C[QPainter::CompositionMode_SourceOver] = qt_blend_argb32_on_argb32_scanline_neon;
5332	qt_functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_neon;
5333	qt_functionForMode_C[QPainter::CompositionMode_Plus] = comp_func_Plus_neon;
5334
5335	extern const uint * QT_FASTCALL qt_fetch_radial_gradient_neon(uint buffer, const* Operator op, const* QSpanData *data,
5336	int y, int x, int length);
5337
5338	qt_fetch_radial_gradient = qt_fetch_radial_gradient_neon;
5339
5340	sourceFetchUntransformed[QImage::Format_RGB888] = qt_fetchUntransformed_888_neon;
5341
5342	#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
5343	extern void QT_FASTCALL convertARGB32ToARGB32PM_neon(uint buffer, int* count, const QList<QRgb> *);
5344	extern void QT_FASTCALL convertRGBA8888ToARGB32PM_neon(uint buffer, int* count, const QList<QRgb> *);
5345	extern const uint QT_FASTCALL fetchARGB32ToARGB32PM_neon(uint buffer, const uchar src, int* index, int count,
5346	const QList<QRgb> , QDitherInfo );
5347	extern const uint QT_FASTCALL fetchRGBA8888ToARGB32PM_neon(uint buffer, const uchar src, int* index, int count,
5348	const QList<QRgb> , QDitherInfo );
5349	extern const QRgba64 * QT_FASTCALL convertARGB32ToRGBA64PM_neon(QRgba64 buffer, const* uint src, int* count,
5350	const QList<QRgb> , QDitherInfo );
5351	extern const QRgba64 * QT_FASTCALL convertRGBA8888ToRGBA64PM_neon(QRgba64 buffer, const* uint src, int* count,
5352	const QList<QRgb> , QDitherInfo );
5353	extern const QRgba64 QT_FASTCALL fetchARGB32ToRGBA64PM_neon(QRgba64 buffer, const uchar src, int* index, int count,
5354	const QList<QRgb> , QDitherInfo );
5355	extern const QRgba64 QT_FASTCALL fetchRGBA8888ToRGBA64PM_neon(QRgba64 buffer, const uchar src, int* index, int count,
5356	const QList<QRgb> , QDitherInfo );
5357	extern void QT_FASTCALL storeARGB32FromARGB32PM_neon(uchar dest, const* uint src, int* index, int count,
5358	const QList<QRgb> , QDitherInfo );
5359	extern void QT_FASTCALL storeRGBA8888FromARGB32PM_neon(uchar dest, const* uint src, int* index, int count,
5360	const QList<QRgb> , QDitherInfo );
5361	extern void QT_FASTCALL storeRGBXFromARGB32PM_neon(uchar dest, const* uint src, int* index, int count,
5362	const QList<QRgb> , QDitherInfo );
5363	qPixelLayouts[QImage::Format_ARGB32].fetchToARGB32PM = fetchARGB32ToARGB32PM_neon;
5364	qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_neon;
5365	qPixelLayouts[QImage::Format_ARGB32].storeFromARGB32PM = storeARGB32FromARGB32PM_neon;
5366	qPixelLayouts[QImage::Format_ARGB32].fetchToRGBA64PM = fetchARGB32ToRGBA64PM_neon;
5367	qPixelLayouts[QImage::Format_ARGB32].convertToRGBA64PM = convertARGB32ToRGBA64PM_neon;
5368	qPixelLayouts[QImage::Format_RGBA8888].fetchToARGB32PM = fetchRGBA8888ToARGB32PM_neon;
5369	qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_neon;
5370	qPixelLayouts[QImage::Format_RGBA8888].storeFromARGB32PM = storeRGBA8888FromARGB32PM_neon;
5371	qPixelLayouts[QImage::Format_RGBA8888].fetchToRGBA64PM = fetchRGBA8888ToRGBA64PM_neon;
5372	qPixelLayouts[QImage::Format_RGBA8888].convertToRGBA64PM = convertRGBA8888ToRGBA64PM_neon;
5373	qPixelLayouts[QImage::Format_RGBX8888].storeFromARGB32PM = storeRGBXFromARGB32PM_neon;
5374	qPixelLayouts[QImage::Format_RGBX8888].fetchToRGBA64PM = fetchRGBA8888ToRGBA64PM_neon;
5375	qPixelLayouts[QImage::Format_RGBX8888].convertToRGBA64PM = convertRGBA8888ToRGBA64PM_neon;
5376	#endif
5377
5378	#if defined(ENABLE_PIXMAN_DRAWHELPERS)
5379	// The RGB16 helpers are using Arm32 assemblythat has not been ported to AArch64
5380	qBlendFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_rgb16_neon;
5381	qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB16] = qt_blend_rgb16_on_argb32_neon;
5382	qBlendFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_blend_rgb16_on_rgb16_neon;
5383
5384	qScaleFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_scale_image_argb32_on_rgb16_neon;
5385	qScaleFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_scale_image_rgb16_on_rgb16_neon;
5386
5387	qTransformFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_transform_image_argb32_on_rgb16_neon;
5388	qTransformFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_transform_image_rgb16_on_rgb16_neon;
5389
5390	qDrawHelper[QImage::Format_RGB16].alphamapBlit = qt_alphamapblit_quint16_neon;
5391
5392	destFetchProc[QImage::Format_RGB16] = qt_destFetchRGB16_neon;
5393	destStoreProc[QImage::Format_RGB16] = qt_destStoreRGB16_neon;
5394
5395	qMemRotateFunctions[QPixelLayout::BPP16][`0`] = qt_memrotate90_16_neon;
5396	qMemRotateFunctions[QPixelLayout::BPP16][`2`] = qt_memrotate270_16_neon;
5397	#endif
5398	#endif // defined(__ARM_NEON__)
5399
5400	#if defined(__MIPS_DSP__)
5401	// Composition functions are all DSP r1
5402	qt_functionForMode_C[QPainter::CompositionMode_SourceOver] = comp_func_SourceOver_asm_mips_dsp;
5403	qt_functionForMode_C[QPainter::CompositionMode_Source] = comp_func_Source_mips_dsp;
5404	qt_functionForMode_C[QPainter::CompositionMode_DestinationOver] = comp_func_DestinationOver_mips_dsp;
5405	qt_functionForMode_C[QPainter::CompositionMode_SourceIn] = comp_func_SourceIn_mips_dsp;
5406	qt_functionForMode_C[QPainter::CompositionMode_DestinationIn] = comp_func_DestinationIn_mips_dsp;
5407	qt_functionForMode_C[QPainter::CompositionMode_DestinationOut] = comp_func_DestinationOut_mips_dsp;
5408	qt_functionForMode_C[QPainter::CompositionMode_SourceAtop] = comp_func_SourceAtop_mips_dsp;
5409	qt_functionForMode_C[QPainter::CompositionMode_DestinationAtop] = comp_func_DestinationAtop_mips_dsp;
5410	qt_functionForMode_C[QPainter::CompositionMode_Xor] = comp_func_XOR_mips_dsp;
5411	qt_functionForMode_C[QPainter::CompositionMode_SourceOut] = comp_func_SourceOut_mips_dsp;
5412
5413	qt_functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_mips_dsp;
5414	qt_functionForModeSolid_C[QPainter::CompositionMode_DestinationOver] = comp_func_solid_DestinationOver_mips_dsp;
5415	qt_functionForModeSolid_C[QPainter::CompositionMode_SourceIn] = comp_func_solid_SourceIn_mips_dsp;
5416	qt_functionForModeSolid_C[QPainter::CompositionMode_DestinationIn] = comp_func_solid_DestinationIn_mips_dsp;
5417	qt_functionForModeSolid_C[QPainter::CompositionMode_SourceAtop] = comp_func_solid_SourceAtop_mips_dsp;
5418	qt_functionForModeSolid_C[QPainter::CompositionMode_DestinationAtop] = comp_func_solid_DestinationAtop_mips_dsp;
5419	qt_functionForModeSolid_C[QPainter::CompositionMode_Xor] = comp_func_solid_XOR_mips_dsp;
5420	qt_functionForModeSolid_C[QPainter::CompositionMode_SourceOut] = comp_func_solid_SourceOut_mips_dsp;
5421
5422	qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_mips_dsp;
5423	qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_mips_dsp;
5424	qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_mips_dsp;
5425	qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_mips_dsp;
5426
5427	destFetchProc[QImage::Format_ARGB32] = qt_destFetchARGB32_mips_dsp;
5428
5429	destStoreProc[QImage::Format_ARGB32] = qt_destStoreARGB32_mips_dsp;
5430
5431	sourceFetchUntransformed[QImage::Format_RGB888] = qt_fetchUntransformed_888_mips_dsp;
5432	sourceFetchUntransformed[QImage::Format_RGB444] = qt_fetchUntransformed_444_mips_dsp;
5433	sourceFetchUntransformed[QImage::Format_ARGB8565_Premultiplied] = qt_fetchUntransformed_argb8565_premultiplied_mips_dsp;
5434
5435	#if defined(__MIPS_DSPR2__)
5436	qBlendFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_blend_rgb16_on_rgb16_mips_dspr2;
5437	sourceFetchUntransformed[QImage::Format_RGB16] = qt_fetchUntransformedRGB16_mips_dspr2;
5438	#else
5439	qBlendFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_blend_rgb16_on_rgb16_mips_dsp;
5440	#endif // defined(__MIPS_DSPR2__)
5441	#endif // defined(__MIPS_DSP__)
5442	}
5443
5444	// Ensure initialization if this object file is linked.
5445	Q_CONSTRUCTOR_FUNCTION(qInitDrawhelperFunctions);
5446
5447	QT_END_NAMESPACE
5448

Browse the source code of Qt/src/gui/painting/qdrawhelper.cpp