| 1 | /**************************************************************************** |
| 2 | ** |
| 3 | ** Copyright (C) 2016 The Qt Company Ltd. |
| 4 | ** Contact: https://www.qt.io/licensing/ |
| 5 | ** |
| 6 | ** This file is part of the QtGui module of the Qt Toolkit. |
| 7 | ** |
| 8 | ** $QT_BEGIN_LICENSE:LGPL$ |
| 9 | ** Commercial License Usage |
| 10 | ** Licensees holding valid commercial Qt licenses may use this file in |
| 11 | ** accordance with the commercial license agreement provided with the |
| 12 | ** Software or, alternatively, in accordance with the terms contained in |
| 13 | ** a written agreement between you and The Qt Company. For licensing terms |
| 14 | ** and conditions see https://www.qt.io/terms-conditions. For further |
| 15 | ** information use the contact form at https://www.qt.io/contact-us. |
| 16 | ** |
| 17 | ** GNU Lesser General Public License Usage |
| 18 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
| 19 | ** General Public License version 3 as published by the Free Software |
| 20 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
| 21 | ** packaging of this file. Please review the following information to |
| 22 | ** ensure the GNU Lesser General Public License version 3 requirements |
| 23 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
| 24 | ** |
| 25 | ** GNU General Public License Usage |
| 26 | ** Alternatively, this file may be used under the terms of the GNU |
| 27 | ** General Public License version 2.0 or (at your option) the GNU General |
| 28 | ** Public license version 3 or any later version approved by the KDE Free |
| 29 | ** Qt Foundation. The licenses are as published by the Free Software |
| 30 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
| 31 | ** included in the packaging of this file. Please review the following |
| 32 | ** information to ensure the GNU General Public License requirements will |
| 33 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
| 34 | ** https://www.gnu.org/licenses/gpl-3.0.html. |
| 35 | ** |
| 36 | ** $QT_END_LICENSE$ |
| 37 | ** |
| 38 | ****************************************************************************/ |
| 39 | |
| 40 | #include <private/qdrawhelper_p.h> |
| 41 | #include <private/qdrawingprimitive_sse2_p.h> |
| 42 | #include <private/qpaintengine_raster_p.h> |
| 43 | #include <private/qpixellayout_p.h> |
| 44 | |
| 45 | #if defined(QT_COMPILER_SUPPORTS_SSE4_1) |
| 46 | |
| 47 | QT_BEGIN_NAMESPACE |
| 48 | |
| 49 | #ifndef __AVX2__ |
| 50 | template<bool RGBA> |
| 51 | static void convertARGBToARGB32PM_sse4(uint *buffer, const uint *src, int count) |
| 52 | { |
| 53 | int i = 0; |
| 54 | const __m128i alphaMask = _mm_set1_epi32(0xff000000); |
| 55 | const __m128i rgbaMask = _mm_setr_epi8(2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15); |
| 56 | const __m128i shuffleMask = _mm_setr_epi8(6, 7, 6, 7, 6, 7, 6, 7, 14, 15, 14, 15, 14, 15, 14, 15); |
| 57 | const __m128i half = _mm_set1_epi16(0x0080); |
| 58 | const __m128i zero = _mm_setzero_si128(); |
| 59 | |
| 60 | for (; i < count - 3; i += 4) { |
| 61 | __m128i srcVector = _mm_loadu_si128((const __m128i *)&src[i]); |
| 62 | if (!_mm_testz_si128(srcVector, alphaMask)) { |
| 63 | if (!_mm_testc_si128(srcVector, alphaMask)) { |
| 64 | if (RGBA) |
| 65 | srcVector = _mm_shuffle_epi8(srcVector, rgbaMask); |
| 66 | __m128i src1 = _mm_unpacklo_epi8(srcVector, zero); |
| 67 | __m128i src2 = _mm_unpackhi_epi8(srcVector, zero); |
| 68 | __m128i alpha1 = _mm_shuffle_epi8(src1, shuffleMask); |
| 69 | __m128i alpha2 = _mm_shuffle_epi8(src2, shuffleMask); |
| 70 | src1 = _mm_mullo_epi16(src1, alpha1); |
| 71 | src2 = _mm_mullo_epi16(src2, alpha2); |
| 72 | src1 = _mm_add_epi16(src1, _mm_srli_epi16(src1, 8)); |
| 73 | src2 = _mm_add_epi16(src2, _mm_srli_epi16(src2, 8)); |
| 74 | src1 = _mm_add_epi16(src1, half); |
| 75 | src2 = _mm_add_epi16(src2, half); |
| 76 | src1 = _mm_srli_epi16(src1, 8); |
| 77 | src2 = _mm_srli_epi16(src2, 8); |
| 78 | src1 = _mm_blend_epi16(src1, alpha1, 0x88); |
| 79 | src2 = _mm_blend_epi16(src2, alpha2, 0x88); |
| 80 | srcVector = _mm_packus_epi16(src1, src2); |
| 81 | _mm_storeu_si128((__m128i *)&buffer[i], srcVector); |
| 82 | } else { |
| 83 | if (RGBA) |
| 84 | _mm_storeu_si128((__m128i *)&buffer[i], _mm_shuffle_epi8(srcVector, rgbaMask)); |
| 85 | else if (buffer != src) |
| 86 | _mm_storeu_si128((__m128i *)&buffer[i], srcVector); |
| 87 | } |
| 88 | } else { |
| 89 | _mm_storeu_si128((__m128i *)&buffer[i], zero); |
| 90 | } |
| 91 | } |
| 92 | |
| 93 | SIMD_EPILOGUE(i, count, 3) { |
| 94 | uint v = qPremultiply(src[i]); |
| 95 | buffer[i] = RGBA ? RGBA2ARGB(v) : v; |
| 96 | } |
| 97 | } |
| 98 | |
| 99 | template<bool RGBA> |
| 100 | static void convertARGBToRGBA64PM_sse4(QRgba64 *buffer, const uint *src, int count) |
| 101 | { |
| 102 | int i = 0; |
| 103 | const __m128i alphaMask = _mm_set1_epi32(0xff000000); |
| 104 | const __m128i rgbaMask = _mm_setr_epi8(2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15); |
| 105 | const __m128i shuffleMask = _mm_setr_epi8(6, 7, 6, 7, 6, 7, 6, 7, 14, 15, 14, 15, 14, 15, 14, 15); |
| 106 | const __m128i zero = _mm_setzero_si128(); |
| 107 | |
| 108 | for (; i < count - 3; i += 4) { |
| 109 | __m128i srcVector = _mm_loadu_si128((const __m128i *)&src[i]); |
| 110 | if (!_mm_testz_si128(srcVector, alphaMask)) { |
| 111 | bool cf = _mm_testc_si128(srcVector, alphaMask); |
| 112 | |
| 113 | if (!RGBA) |
| 114 | srcVector = _mm_shuffle_epi8(srcVector, rgbaMask); |
| 115 | const __m128i src1 = _mm_unpacklo_epi8(srcVector, srcVector); |
| 116 | const __m128i src2 = _mm_unpackhi_epi8(srcVector, srcVector); |
| 117 | if (!cf) { |
| 118 | __m128i alpha1 = _mm_shuffle_epi8(src1, shuffleMask); |
| 119 | __m128i alpha2 = _mm_shuffle_epi8(src2, shuffleMask); |
| 120 | __m128i dst1 = _mm_mulhi_epu16(src1, alpha1); |
| 121 | __m128i dst2 = _mm_mulhi_epu16(src2, alpha2); |
| 122 | // Map 0->0xfffe to 0->0xffff |
| 123 | dst1 = _mm_add_epi16(dst1, _mm_srli_epi16(dst1, 15)); |
| 124 | dst2 = _mm_add_epi16(dst2, _mm_srli_epi16(dst2, 15)); |
| 125 | // correct alpha value: |
| 126 | dst1 = _mm_blend_epi16(dst1, src1, 0x88); |
| 127 | dst2 = _mm_blend_epi16(dst2, src2, 0x88); |
| 128 | _mm_storeu_si128((__m128i *)&buffer[i], dst1); |
| 129 | _mm_storeu_si128((__m128i *)&buffer[i + 2], dst2); |
| 130 | } else { |
| 131 | _mm_storeu_si128((__m128i *)&buffer[i], src1); |
| 132 | _mm_storeu_si128((__m128i *)&buffer[i + 2], src2); |
| 133 | } |
| 134 | } else { |
| 135 | _mm_storeu_si128((__m128i *)&buffer[i], zero); |
| 136 | _mm_storeu_si128((__m128i *)&buffer[i + 2], zero); |
| 137 | } |
| 138 | } |
| 139 | |
| 140 | SIMD_EPILOGUE(i, count, 3) { |
| 141 | const uint s = RGBA ? RGBA2ARGB(src[i]) : src[i]; |
| 142 | buffer[i] = QRgba64::fromArgb32(s).premultiplied(); |
| 143 | } |
| 144 | } |
| 145 | #endif // __AVX2__ |
| 146 | |
| 147 | static inline __m128 Q_DECL_VECTORCALL reciprocal_mul_ps(__m128 a, float mul) |
| 148 | { |
| 149 | __m128 ia = _mm_rcp_ps(a); // Approximate 1/a |
| 150 | // Improve precision of ia using Newton-Raphson |
| 151 | ia = _mm_sub_ps(_mm_add_ps(ia, ia), _mm_mul_ps(ia, _mm_mul_ps(ia, a))); |
| 152 | ia = _mm_mul_ps(ia, _mm_set1_ps(mul)); |
| 153 | return ia; |
| 154 | } |
| 155 | |
| 156 | template<bool RGBA, bool RGBx> |
| 157 | static inline void convertARGBFromARGB32PM_sse4(uint *buffer, const uint *src, int count) |
| 158 | { |
| 159 | int i = 0; |
| 160 | if ((_MM_GET_EXCEPTION_MASK() & _MM_MASK_INVALID) == 0) { |
| 161 | for (; i < count; ++i) { |
| 162 | uint v = qUnpremultiply(src[i]); |
| 163 | if (RGBx) |
| 164 | v = 0xff000000 | v; |
| 165 | if (RGBA) |
| 166 | v = ARGB2RGBA(v); |
| 167 | buffer[i] = v; |
| 168 | } |
| 169 | return; |
| 170 | } |
| 171 | const __m128i alphaMask = _mm_set1_epi32(0xff000000); |
| 172 | const __m128i rgbaMask = _mm_setr_epi8(2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15); |
| 173 | const __m128i zero = _mm_setzero_si128(); |
| 174 | |
| 175 | for (; i < count - 3; i += 4) { |
| 176 | __m128i srcVector = _mm_loadu_si128((const __m128i *)&src[i]); |
| 177 | if (!_mm_testz_si128(srcVector, alphaMask)) { |
| 178 | if (!_mm_testc_si128(srcVector, alphaMask)) { |
| 179 | __m128i srcVectorAlpha = _mm_srli_epi32(srcVector, 24); |
| 180 | if (RGBA) |
| 181 | srcVector = _mm_shuffle_epi8(srcVector, rgbaMask); |
| 182 | const __m128 a = _mm_cvtepi32_ps(srcVectorAlpha); |
| 183 | const __m128 ia = reciprocal_mul_ps(a, 255.0f); |
| 184 | __m128i src1 = _mm_unpacklo_epi8(srcVector, zero); |
| 185 | __m128i src3 = _mm_unpackhi_epi8(srcVector, zero); |
| 186 | __m128i src2 = _mm_unpackhi_epi16(src1, zero); |
| 187 | __m128i src4 = _mm_unpackhi_epi16(src3, zero); |
| 188 | src1 = _mm_unpacklo_epi16(src1, zero); |
| 189 | src3 = _mm_unpacklo_epi16(src3, zero); |
| 190 | __m128 ia1 = _mm_shuffle_ps(ia, ia, _MM_SHUFFLE(0, 0, 0, 0)); |
| 191 | __m128 ia2 = _mm_shuffle_ps(ia, ia, _MM_SHUFFLE(1, 1, 1, 1)); |
| 192 | __m128 ia3 = _mm_shuffle_ps(ia, ia, _MM_SHUFFLE(2, 2, 2, 2)); |
| 193 | __m128 ia4 = _mm_shuffle_ps(ia, ia, _MM_SHUFFLE(3, 3, 3, 3)); |
| 194 | src1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtepi32_ps(src1), ia1)); |
| 195 | src2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtepi32_ps(src2), ia2)); |
| 196 | src3 = _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtepi32_ps(src3), ia3)); |
| 197 | src4 = _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtepi32_ps(src4), ia4)); |
| 198 | src1 = _mm_packus_epi32(src1, src2); |
| 199 | src3 = _mm_packus_epi32(src3, src4); |
| 200 | src1 = _mm_packus_epi16(src1, src3); |
| 201 | // Handle potential alpha == 0 values: |
| 202 | __m128i srcVectorAlphaMask = _mm_cmpeq_epi32(srcVectorAlpha, zero); |
| 203 | src1 = _mm_andnot_si128(srcVectorAlphaMask, src1); |
| 204 | // Fixup alpha values: |
| 205 | if (RGBx) |
| 206 | srcVector = _mm_or_si128(src1, alphaMask); |
| 207 | else |
| 208 | srcVector = _mm_blendv_epi8(src1, srcVector, alphaMask); |
| 209 | _mm_storeu_si128((__m128i *)&buffer[i], srcVector); |
| 210 | } else { |
| 211 | if (RGBA) |
| 212 | _mm_storeu_si128((__m128i *)&buffer[i], _mm_shuffle_epi8(srcVector, rgbaMask)); |
| 213 | else if (buffer != src) |
| 214 | _mm_storeu_si128((__m128i *)&buffer[i], srcVector); |
| 215 | } |
| 216 | } else { |
| 217 | if (RGBx) |
| 218 | _mm_storeu_si128((__m128i *)&buffer[i], alphaMask); |
| 219 | else |
| 220 | _mm_storeu_si128((__m128i *)&buffer[i], zero); |
| 221 | } |
| 222 | } |
| 223 | |
| 224 | SIMD_EPILOGUE(i, count, 3) { |
| 225 | uint v = qUnpremultiply_sse4(src[i]); |
| 226 | if (RGBx) |
| 227 | v = 0xff000000 | v; |
| 228 | if (RGBA) |
| 229 | v = ARGB2RGBA(v); |
| 230 | buffer[i] = v; |
| 231 | } |
| 232 | } |
| 233 | |
| 234 | template<bool RGBA> |
| 235 | static inline void convertARGBFromRGBA64PM_sse4(uint *buffer, const QRgba64 *src, int count) |
| 236 | { |
| 237 | int i = 0; |
| 238 | if ((_MM_GET_EXCEPTION_MASK() & _MM_MASK_INVALID) == 0) { |
| 239 | for (; i < count; ++i) { |
| 240 | const QRgba64 v = src[i].unpremultiplied(); |
| 241 | buffer[i] = RGBA ? toRgba8888(v) : toArgb32(v); |
| 242 | } |
| 243 | return; |
| 244 | } |
| 245 | const __m128i alphaMask = _mm_set1_epi64x(qint64(Q_UINT64_C(0xffff) << 48)); |
| 246 | const __m128i alphaMask32 = _mm_set1_epi32(0xff000000); |
| 247 | const __m128i rgbaMask = _mm_setr_epi8(2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15); |
| 248 | const __m128i zero = _mm_setzero_si128(); |
| 249 | |
| 250 | for (; i < count - 3; i += 4) { |
| 251 | __m128i srcVector1 = _mm_loadu_si128((const __m128i *)&src[i]); |
| 252 | __m128i srcVector2 = _mm_loadu_si128((const __m128i *)&src[i + 2]); |
| 253 | bool transparent1 = _mm_testz_si128(srcVector1, alphaMask); |
| 254 | bool opaque1 = _mm_testc_si128(srcVector1, alphaMask); |
| 255 | bool transparent2 = _mm_testz_si128(srcVector2, alphaMask); |
| 256 | bool opaque2 = _mm_testc_si128(srcVector2, alphaMask); |
| 257 | |
| 258 | if (!(transparent1 && transparent2)) { |
| 259 | if (!(opaque1 && opaque2)) { |
| 260 | __m128i srcVector1Alpha = _mm_srli_epi64(srcVector1, 48); |
| 261 | __m128i srcVector2Alpha = _mm_srli_epi64(srcVector2, 48); |
| 262 | __m128i srcVectorAlpha = _mm_packus_epi32(srcVector1Alpha, srcVector2Alpha); |
| 263 | const __m128 a = _mm_cvtepi32_ps(srcVectorAlpha); |
| 264 | // Convert srcVectorAlpha to final 8-bit alpha channel |
| 265 | srcVectorAlpha = _mm_add_epi32(srcVectorAlpha, _mm_set1_epi32(128)); |
| 266 | srcVectorAlpha = _mm_sub_epi32(srcVectorAlpha, _mm_srli_epi32(srcVectorAlpha, 8)); |
| 267 | srcVectorAlpha = _mm_srli_epi32(srcVectorAlpha, 8); |
| 268 | srcVectorAlpha = _mm_slli_epi32(srcVectorAlpha, 24); |
| 269 | const __m128 ia = reciprocal_mul_ps(a, 255.0f); |
| 270 | __m128i src1 = _mm_unpacklo_epi16(srcVector1, zero); |
| 271 | __m128i src2 = _mm_unpackhi_epi16(srcVector1, zero); |
| 272 | __m128i src3 = _mm_unpacklo_epi16(srcVector2, zero); |
| 273 | __m128i src4 = _mm_unpackhi_epi16(srcVector2, zero); |
| 274 | __m128 ia1 = _mm_shuffle_ps(ia, ia, _MM_SHUFFLE(0, 0, 0, 0)); |
| 275 | __m128 ia2 = _mm_shuffle_ps(ia, ia, _MM_SHUFFLE(1, 1, 1, 1)); |
| 276 | __m128 ia3 = _mm_shuffle_ps(ia, ia, _MM_SHUFFLE(2, 2, 2, 2)); |
| 277 | __m128 ia4 = _mm_shuffle_ps(ia, ia, _MM_SHUFFLE(3, 3, 3, 3)); |
| 278 | src1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtepi32_ps(src1), ia1)); |
| 279 | src2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtepi32_ps(src2), ia2)); |
| 280 | src3 = _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtepi32_ps(src3), ia3)); |
| 281 | src4 = _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtepi32_ps(src4), ia4)); |
| 282 | src1 = _mm_packus_epi32(src1, src2); |
| 283 | src3 = _mm_packus_epi32(src3, src4); |
| 284 | // Handle potential alpha == 0 values: |
| 285 | __m128i srcVector1AlphaMask = _mm_cmpeq_epi64(srcVector1Alpha, zero); |
| 286 | __m128i srcVector2AlphaMask = _mm_cmpeq_epi64(srcVector2Alpha, zero); |
| 287 | src1 = _mm_andnot_si128(srcVector1AlphaMask, src1); |
| 288 | src3 = _mm_andnot_si128(srcVector2AlphaMask, src3); |
| 289 | src1 = _mm_packus_epi16(src1, src3); |
| 290 | // Fixup alpha values: |
| 291 | src1 = _mm_blendv_epi8(src1, srcVectorAlpha, alphaMask32); |
| 292 | // Fix RGB order |
| 293 | if (!RGBA) |
| 294 | src1 = _mm_shuffle_epi8(src1, rgbaMask); |
| 295 | _mm_storeu_si128((__m128i *)&buffer[i], src1); |
| 296 | } else { |
| 297 | __m128i src1 = _mm_unpacklo_epi16(srcVector1, zero); |
| 298 | __m128i src2 = _mm_unpackhi_epi16(srcVector1, zero); |
| 299 | __m128i src3 = _mm_unpacklo_epi16(srcVector2, zero); |
| 300 | __m128i src4 = _mm_unpackhi_epi16(srcVector2, zero); |
| 301 | src1 = _mm_add_epi32(src1, _mm_set1_epi32(128)); |
| 302 | src2 = _mm_add_epi32(src2, _mm_set1_epi32(128)); |
| 303 | src3 = _mm_add_epi32(src3, _mm_set1_epi32(128)); |
| 304 | src4 = _mm_add_epi32(src4, _mm_set1_epi32(128)); |
| 305 | src1 = _mm_sub_epi32(src1, _mm_srli_epi32(src1, 8)); |
| 306 | src2 = _mm_sub_epi32(src2, _mm_srli_epi32(src2, 8)); |
| 307 | src3 = _mm_sub_epi32(src3, _mm_srli_epi32(src3, 8)); |
| 308 | src4 = _mm_sub_epi32(src4, _mm_srli_epi32(src4, 8)); |
| 309 | src1 = _mm_srli_epi32(src1, 8); |
| 310 | src2 = _mm_srli_epi32(src2, 8); |
| 311 | src3 = _mm_srli_epi32(src3, 8); |
| 312 | src4 = _mm_srli_epi32(src4, 8); |
| 313 | src1 = _mm_packus_epi32(src1, src2); |
| 314 | src3 = _mm_packus_epi32(src3, src4); |
| 315 | src1 = _mm_packus_epi16(src1, src3); |
| 316 | if (!RGBA) |
| 317 | src1 = _mm_shuffle_epi8(src1, rgbaMask); |
| 318 | _mm_storeu_si128((__m128i *)&buffer[i], src1); |
| 319 | } |
| 320 | } else { |
| 321 | _mm_storeu_si128((__m128i *)&buffer[i], zero); |
| 322 | } |
| 323 | } |
| 324 | |
| 325 | SIMD_EPILOGUE(i, count, 3) { |
| 326 | buffer[i] = qConvertRgba64ToRgb32_sse4<RGBA ? PixelOrderRGB : PixelOrderBGR>(src[i]); |
| 327 | } |
| 328 | } |
| 329 | |
| 330 | #ifndef __AVX2__ |
| 331 | void QT_FASTCALL convertARGB32ToARGB32PM_sse4(uint *buffer, int count, const QList<QRgb> *) |
| 332 | { |
| 333 | convertARGBToARGB32PM_sse4<false>(buffer, buffer, count); |
| 334 | } |
| 335 | |
| 336 | void QT_FASTCALL convertRGBA8888ToARGB32PM_sse4(uint *buffer, int count, const QList<QRgb> *) |
| 337 | { |
| 338 | convertARGBToARGB32PM_sse4<true>(buffer, buffer, count); |
| 339 | } |
| 340 | |
| 341 | const QRgba64 * QT_FASTCALL convertARGB32ToRGBA64PM_sse4(QRgba64 *buffer, const uint *src, int count, |
| 342 | const QList<QRgb> *, QDitherInfo *) |
| 343 | { |
| 344 | convertARGBToRGBA64PM_sse4<false>(buffer, src, count); |
| 345 | return buffer; |
| 346 | } |
| 347 | |
| 348 | const QRgba64 * QT_FASTCALL convertRGBA8888ToRGBA64PM_sse4(QRgba64 *buffer, const uint *src, int count, |
| 349 | const QList<QRgb> *, QDitherInfo *) |
| 350 | { |
| 351 | convertARGBToRGBA64PM_sse4<true>(buffer, src, count); |
| 352 | return buffer; |
| 353 | } |
| 354 | |
| 355 | const uint *QT_FASTCALL fetchARGB32ToARGB32PM_sse4(uint *buffer, const uchar *src, int index, int count, |
| 356 | const QList<QRgb> *, QDitherInfo *) |
| 357 | { |
| 358 | convertARGBToARGB32PM_sse4<false>(buffer, reinterpret_cast<const uint *>(src) + index, count); |
| 359 | return buffer; |
| 360 | } |
| 361 | |
| 362 | const uint *QT_FASTCALL fetchRGBA8888ToARGB32PM_sse4(uint *buffer, const uchar *src, int index, int count, |
| 363 | const QList<QRgb> *, QDitherInfo *) |
| 364 | { |
| 365 | convertARGBToARGB32PM_sse4<true>(buffer, reinterpret_cast<const uint *>(src) + index, count); |
| 366 | return buffer; |
| 367 | } |
| 368 | |
| 369 | const QRgba64 *QT_FASTCALL fetchARGB32ToRGBA64PM_sse4(QRgba64 *buffer, const uchar *src, int index, int count, |
| 370 | const QList<QRgb> *, QDitherInfo *) |
| 371 | { |
| 372 | convertARGBToRGBA64PM_sse4<false>(buffer, reinterpret_cast<const uint *>(src) + index, count); |
| 373 | return buffer; |
| 374 | } |
| 375 | |
| 376 | const QRgba64 *QT_FASTCALL fetchRGBA8888ToRGBA64PM_sse4(QRgba64 *buffer, const uchar *src, int index, int count, |
| 377 | const QList<QRgb> *, QDitherInfo *) |
| 378 | { |
| 379 | convertARGBToRGBA64PM_sse4<true>(buffer, reinterpret_cast<const uint *>(src) + index, count); |
| 380 | return buffer; |
| 381 | } |
| 382 | #endif // __AVX2__ |
| 383 | |
| 384 | void QT_FASTCALL storeRGB32FromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count, |
| 385 | const QList<QRgb> *, QDitherInfo *) |
| 386 | { |
| 387 | uint *d = reinterpret_cast<uint *>(dest) + index; |
| 388 | convertARGBFromARGB32PM_sse4<false,true>(d, src, count); |
| 389 | } |
| 390 | |
| 391 | void QT_FASTCALL storeARGB32FromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count, |
| 392 | const QList<QRgb> *, QDitherInfo *) |
| 393 | { |
| 394 | uint *d = reinterpret_cast<uint *>(dest) + index; |
| 395 | convertARGBFromARGB32PM_sse4<false,false>(d, src, count); |
| 396 | } |
| 397 | |
| 398 | void QT_FASTCALL storeRGBA8888FromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count, |
| 399 | const QList<QRgb> *, QDitherInfo *) |
| 400 | { |
| 401 | uint *d = reinterpret_cast<uint *>(dest) + index; |
| 402 | convertARGBFromARGB32PM_sse4<true,false>(d, src, count); |
| 403 | } |
| 404 | |
| 405 | void QT_FASTCALL storeRGBXFromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count, |
| 406 | const QList<QRgb> *, QDitherInfo *) |
| 407 | { |
| 408 | uint *d = reinterpret_cast<uint *>(dest) + index; |
| 409 | convertARGBFromARGB32PM_sse4<true,true>(d, src, count); |
| 410 | } |
| 411 | |
| 412 | template<QtPixelOrder PixelOrder> |
| 413 | void QT_FASTCALL storeA2RGB30PMFromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count, |
| 414 | const QList<QRgb> *, QDitherInfo *) |
| 415 | { |
| 416 | uint *d = reinterpret_cast<uint *>(dest) + index; |
| 417 | for (int i = 0; i < count; ++i) |
| 418 | d[i] = qConvertArgb32ToA2rgb30_sse4<PixelOrder>(src[i]); |
| 419 | } |
| 420 | |
| 421 | #if QT_CONFIG(raster_64bit) |
| 422 | void QT_FASTCALL destStore64ARGB32_sse4(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length) |
| 423 | { |
| 424 | uint *dest = (uint*)rasterBuffer->scanLine(y) + x; |
| 425 | convertARGBFromRGBA64PM_sse4<false>(dest, buffer, length); |
| 426 | } |
| 427 | |
| 428 | void QT_FASTCALL destStore64RGBA8888_sse4(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length) |
| 429 | { |
| 430 | uint *dest = (uint*)rasterBuffer->scanLine(y) + x; |
| 431 | convertARGBFromRGBA64PM_sse4<true>(dest, buffer, length); |
| 432 | } |
| 433 | #endif |
| 434 | |
| 435 | void QT_FASTCALL storeARGB32FromRGBA64PM_sse4(uchar *dest, const QRgba64 *src, int index, int count, |
| 436 | const QList<QRgb> *, QDitherInfo *) |
| 437 | { |
| 438 | uint *d = (uint*)dest + index; |
| 439 | convertARGBFromRGBA64PM_sse4<false>(d, src, count); |
| 440 | } |
| 441 | |
| 442 | void QT_FASTCALL storeRGBA8888FromRGBA64PM_sse4(uchar *dest, const QRgba64 *src, int index, int count, |
| 443 | const QList<QRgb> *, QDitherInfo *) |
| 444 | { |
| 445 | uint *d = (uint*)dest + index; |
| 446 | convertARGBFromRGBA64PM_sse4<true>(d, src, count); |
| 447 | } |
| 448 | |
| 449 | template |
| 450 | void QT_FASTCALL storeA2RGB30PMFromARGB32PM_sse4<PixelOrderBGR>(uchar *dest, const uint *src, int index, int count, |
| 451 | const QList<QRgb> *, QDitherInfo *); |
| 452 | template |
| 453 | void QT_FASTCALL storeA2RGB30PMFromARGB32PM_sse4<PixelOrderRGB>(uchar *dest, const uint *src, int index, int count, |
| 454 | const QList<QRgb> *, QDitherInfo *); |
| 455 | |
| 456 | QT_END_NAMESPACE |
| 457 | |
| 458 | #endif |
| 459 | |