1/****************************************************************************
2**
3** Copyright (C) 2016 The Qt Company Ltd.
4** Contact: https://www.qt.io/licensing/
5**
6** This file is part of the QtGui module of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:LGPL$
9** Commercial License Usage
10** Licensees holding valid commercial Qt licenses may use this file in
11** accordance with the commercial license agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and The Qt Company. For licensing terms
14** and conditions see https://www.qt.io/terms-conditions. For further
15** information use the contact form at https://www.qt.io/contact-us.
16**
17** GNU Lesser General Public License Usage
18** Alternatively, this file may be used under the terms of the GNU Lesser
19** General Public License version 3 as published by the Free Software
20** Foundation and appearing in the file LICENSE.LGPL3 included in the
21** packaging of this file. Please review the following information to
22** ensure the GNU Lesser General Public License version 3 requirements
23** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24**
25** GNU General Public License Usage
26** Alternatively, this file may be used under the terms of the GNU
27** General Public License version 2.0 or (at your option) the GNU General
28** Public license version 3 or any later version approved by the KDE Free
29** Qt Foundation. The licenses are as published by the Free Software
30** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31** included in the packaging of this file. Please review the following
32** information to ensure the GNU General Public License requirements will
33** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34** https://www.gnu.org/licenses/gpl-3.0.html.
35**
36** $QT_END_LICENSE$
37**
38****************************************************************************/
39
40#include "qimagescale_p.h"
41#include "qimage.h"
42#include <private/qdrawhelper_x86_p.h>
43#include <private/qsimd_p.h>
44
45#if QT_CONFIG(thread) && !defined(Q_OS_WASM)
46#include "qsemaphore.h"
47#include "qthreadpool.h"
48#endif
49
50#if defined(QT_COMPILER_SUPPORTS_SSE4_1)
51
52QT_BEGIN_NAMESPACE
53
54using namespace QImageScale;
55
56template<typename T>
57static inline void multithread_pixels_function(QImageScaleInfo *isi, int dh, const T &scaleSection)
58{
59#if QT_CONFIG(thread) && !defined(Q_OS_WASM)
60 int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
61 segments = std::min(segments, dh);
62 QThreadPool *threadPool = QThreadPool::globalInstance();
63 if (segments > 1 && threadPool && !threadPool->contains(QThread::currentThread())) {
64 QSemaphore semaphore;
65 int y = 0;
66 for (int i = 0; i < segments; ++i) {
67 int yn = (dh - y) / (segments - i);
68 threadPool->start([&, y, yn]() {
69 scaleSection(y, y + yn);
70 semaphore.release(1);
71 });
72 y += yn;
73 }
74 semaphore.acquire(segments);
75 return;
76 }
77#endif
78 scaleSection(0, dh);
79}
80
81inline static __m128i Q_DECL_VECTORCALL
82qt_qimageScaleAARGBA_helper(const unsigned int *pix, int xyap, int Cxy, int step, const __m128i vxyap, const __m128i vCxy)
83{
84 __m128i vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*pix));
85 __m128i vx = _mm_mullo_epi32(vpix, vxyap);
86 int i;
87 for (i = (1 << 14) - xyap; i > Cxy; i -= Cxy) {
88 pix += step;
89 vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*pix));
90 vx = _mm_add_epi32(vx, _mm_mullo_epi32(vpix, vCxy));
91 }
92 pix += step;
93 vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*pix));
94 vx = _mm_add_epi32(vx, _mm_mullo_epi32(vpix, _mm_set1_epi32(i)));
95 return vx;
96}
97
98template<bool RGB>
99void qt_qimageScaleAARGBA_up_x_down_y_sse4(QImageScaleInfo *isi, unsigned int *dest,
100 int dw, int dh, int dow, int sow)
101{
102 const unsigned int **ypoints = isi->ypoints;
103 const int *xpoints = isi->xpoints;
104 const int *xapoints = isi->xapoints;
105 const int *yapoints = isi->yapoints;
106
107 const __m128i v256 = _mm_set1_epi32(256);
108
109 /* go through every scanline in the output buffer */
110 auto scaleSection = [&] (int yStart, int yEnd) {
111 for (int y = yStart; y < yEnd; ++y) {
112 const int Cy = yapoints[y] >> 16;
113 const int yap = yapoints[y] & 0xffff;
114 const __m128i vCy = _mm_set1_epi32(Cy);
115 const __m128i vyap = _mm_set1_epi32(yap);
116
117 unsigned int *dptr = dest + (y * dow);
118 for (int x = 0; x < dw; x++) {
119 const unsigned int *sptr = ypoints[y] + xpoints[x];
120 __m128i vx = qt_qimageScaleAARGBA_helper(sptr, yap, Cy, sow, vyap, vCy);
121
122 const int xap = xapoints[x];
123 if (xap > 0) {
124 const __m128i vxap = _mm_set1_epi32(xap);
125 const __m128i vinvxap = _mm_sub_epi32(v256, vxap);
126 __m128i vr = qt_qimageScaleAARGBA_helper(sptr + 1, yap, Cy, sow, vyap, vCy);
127
128 vx = _mm_mullo_epi32(vx, vinvxap);
129 vr = _mm_mullo_epi32(vr, vxap);
130 vx = _mm_add_epi32(vx, vr);
131 vx = _mm_srli_epi32(vx, 8);
132 }
133 vx = _mm_srli_epi32(vx, 14);
134 vx = _mm_packus_epi32(vx, vx);
135 vx = _mm_packus_epi16(vx, vx);
136 *dptr = _mm_cvtsi128_si32(vx);
137 if (RGB)
138 *dptr |= 0xff000000;
139 dptr++;
140 }
141 }
142 };
143 multithread_pixels_function(isi, dh, scaleSection);
144}
145
146template<bool RGB>
147void qt_qimageScaleAARGBA_down_x_up_y_sse4(QImageScaleInfo *isi, unsigned int *dest,
148 int dw, int dh, int dow, int sow)
149{
150 const unsigned int **ypoints = isi->ypoints;
151 int *xpoints = isi->xpoints;
152 int *xapoints = isi->xapoints;
153 int *yapoints = isi->yapoints;
154
155 const __m128i v256 = _mm_set1_epi32(256);
156
157 /* go through every scanline in the output buffer */
158 auto scaleSection = [&] (int yStart, int yEnd) {
159 for (int y = yStart; y < yEnd; ++y) {
160 unsigned int *dptr = dest + (y * dow);
161 for (int x = 0; x < dw; x++) {
162 int Cx = xapoints[x] >> 16;
163 int xap = xapoints[x] & 0xffff;
164 const __m128i vCx = _mm_set1_epi32(Cx);
165 const __m128i vxap = _mm_set1_epi32(xap);
166
167 const unsigned int *sptr = ypoints[y] + xpoints[x];
168 __m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
169
170 int yap = yapoints[y];
171 if (yap > 0) {
172 const __m128i vyap = _mm_set1_epi32(yap);
173 const __m128i vinvyap = _mm_sub_epi32(v256, vyap);
174 __m128i vr = qt_qimageScaleAARGBA_helper(sptr + sow, xap, Cx, 1, vxap, vCx);
175
176 vx = _mm_mullo_epi32(vx, vinvyap);
177 vr = _mm_mullo_epi32(vr, vyap);
178 vx = _mm_add_epi32(vx, vr);
179 vx = _mm_srli_epi32(vx, 8);
180 }
181 vx = _mm_srli_epi32(vx, 14);
182 vx = _mm_packus_epi32(vx, vx);
183 vx = _mm_packus_epi16(vx, vx);
184 *dptr = _mm_cvtsi128_si32(vx);
185 if (RGB)
186 *dptr |= 0xff000000;
187 dptr++;
188 }
189 }
190 };
191 multithread_pixels_function(isi, dh, scaleSection);
192}
193
194template<bool RGB>
195void qt_qimageScaleAARGBA_down_xy_sse4(QImageScaleInfo *isi, unsigned int *dest,
196 int dw, int dh, int dow, int sow)
197{
198 const unsigned int **ypoints = isi->ypoints;
199 int *xpoints = isi->xpoints;
200 int *xapoints = isi->xapoints;
201 int *yapoints = isi->yapoints;
202
203 auto scaleSection = [&] (int yStart, int yEnd) {
204 for (int y = yStart; y < yEnd; ++y) {
205 int Cy = yapoints[y] >> 16;
206 int yap = yapoints[y] & 0xffff;
207 const __m128i vCy = _mm_set1_epi32(Cy);
208 const __m128i vyap = _mm_set1_epi32(yap);
209
210 unsigned int *dptr = dest + (y * dow);
211 for (int x = 0; x < dw; x++) {
212 const int Cx = xapoints[x] >> 16;
213 const int xap = xapoints[x] & 0xffff;
214 const __m128i vCx = _mm_set1_epi32(Cx);
215 const __m128i vxap = _mm_set1_epi32(xap);
216
217 const unsigned int *sptr = ypoints[y] + xpoints[x];
218 __m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
219 __m128i vr = _mm_mullo_epi32(_mm_srli_epi32(vx, 4), vyap);
220
221 int j;
222 for (j = (1 << 14) - yap; j > Cy; j -= Cy) {
223 sptr += sow;
224 vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
225 vr = _mm_add_epi32(vr, _mm_mullo_epi32(_mm_srli_epi32(vx, 4), vCy));
226 }
227 sptr += sow;
228 vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
229 vr = _mm_add_epi32(vr, _mm_mullo_epi32(_mm_srli_epi32(vx, 4), _mm_set1_epi32(j)));
230
231 vr = _mm_srli_epi32(vr, 24);
232 vr = _mm_packus_epi32(vr, _mm_setzero_si128());
233 vr = _mm_packus_epi16(vr, _mm_setzero_si128());
234 *dptr = _mm_cvtsi128_si32(vr);
235 if (RGB)
236 *dptr |= 0xff000000;
237 dptr++;
238 }
239 }
240 };
241 multithread_pixels_function(isi, dh, scaleSection);
242}
243
244template void qt_qimageScaleAARGBA_up_x_down_y_sse4<false>(QImageScaleInfo *isi, unsigned int *dest,
245 int dw, int dh, int dow, int sow);
246
247template void qt_qimageScaleAARGBA_up_x_down_y_sse4<true>(QImageScaleInfo *isi, unsigned int *dest,
248 int dw, int dh, int dow, int sow);
249
250template void qt_qimageScaleAARGBA_down_x_up_y_sse4<false>(QImageScaleInfo *isi, unsigned int *dest,
251 int dw, int dh, int dow, int sow);
252
253template void qt_qimageScaleAARGBA_down_x_up_y_sse4<true>(QImageScaleInfo *isi, unsigned int *dest,
254 int dw, int dh, int dow, int sow);
255
256template void qt_qimageScaleAARGBA_down_xy_sse4<false>(QImageScaleInfo *isi, unsigned int *dest,
257 int dw, int dh, int dow, int sow);
258
259template void qt_qimageScaleAARGBA_down_xy_sse4<true>(QImageScaleInfo *isi, unsigned int *dest,
260 int dw, int dh, int dow, int sow);
261
262QT_END_NAMESPACE
263
264#endif
265