1// [Blend2D]
2// 2D Vector Graphics Powered by a JIT Compiler.
3//
4// [License]
5// Zlib - See LICENSE.md file in the package.
6
7#include "./blapi-build_p.h"
8#ifdef BL_TARGET_OPT_AVX
9
10#include "./blgeometry.h"
11#include "./blmatrix.h"
12#include "./blruntime_p.h"
13#include "./blsimd_p.h"
14#include "./blsupport_p.h"
15
16// ============================================================================
17// [BLMatrix2D - MapPointDArray [AVX]]
18// ============================================================================
19
20static BLResult BL_CDECL blMatrix2DMapPointDArrayIdentity_AVX(const BLMatrix2D* self, BLPoint* dst, const BLPoint* src, size_t size) noexcept {
21 using namespace SIMD;
22
23 BL_UNUSED(self);
24 if (dst == src)
25 return BL_SUCCESS;
26
27 size_t i = size;
28 while (i >= 8) {
29 vstored256u(dst + 0, vloadd256u(src + 0));
30 vstored256u(dst + 2, vloadd256u(src + 2));
31 vstored256u(dst + 4, vloadd256u(src + 4));
32 vstored256u(dst + 6, vloadd256u(src + 6));
33
34 i -= 8;
35 dst += 8;
36 src += 8;
37 }
38
39 while (i >= 2) {
40 vstored256u(dst, vloadd256u(src));
41
42 i -= 2;
43 dst += 2;
44 src += 2;
45 }
46
47 if (i)
48 vstored128u(dst, vloadd128u(src));
49
50 return BL_SUCCESS;
51}
52
53static BLResult BL_CDECL blMatrix2DMapPointDArrayTranslate_AVX(const BLMatrix2D* self, BLPoint* dst, const BLPoint* src, size_t size) noexcept {
54 using namespace SIMD;
55
56 size_t i = size;
57 D256 m20_m21 = vbroadcastd256_128(&self->m20);
58
59 while (i >= 8) {
60 vstored256u(dst + 0, vaddpd(vloadd256u(src + 0), m20_m21));
61 vstored256u(dst + 2, vaddpd(vloadd256u(src + 2), m20_m21));
62 vstored256u(dst + 4, vaddpd(vloadd256u(src + 4), m20_m21));
63 vstored256u(dst + 6, vaddpd(vloadd256u(src + 6), m20_m21));
64
65 i -= 8;
66 dst += 8;
67 src += 8;
68 }
69
70 while (i >= 2) {
71 vstored256u(dst, vaddpd(vloadd256u(src), m20_m21));
72
73 i -= 2;
74 dst += 2;
75 src += 2;
76 }
77
78 if (i)
79 vstored128u(dst, vaddpd(vloadd128u(src), vcast<D128>(m20_m21)));
80
81 return BL_SUCCESS;
82}
83
84static BLResult BL_CDECL blMatrix2DMapPointDArrayScale_AVX(const BLMatrix2D* self, BLPoint* dst, const BLPoint* src, size_t size) noexcept {
85 using namespace SIMD;
86
87 size_t i = size;
88 D256 m00_m11 = vdupld128(vsetd128(self->m11, self->m00));
89 D256 m20_m21 = vbroadcastd256_128(&self->m20);
90
91 while (i >= 8) {
92 vstored256u(dst + 0, vaddpd(vmulpd(vloadd256u(src + 0), m00_m11), m20_m21));
93 vstored256u(dst + 2, vaddpd(vmulpd(vloadd256u(src + 2), m00_m11), m20_m21));
94 vstored256u(dst + 4, vaddpd(vmulpd(vloadd256u(src + 4), m00_m11), m20_m21));
95 vstored256u(dst + 6, vaddpd(vmulpd(vloadd256u(src + 6), m00_m11), m20_m21));
96
97 i -= 8;
98 dst += 8;
99 src += 8;
100 }
101
102 while (i >= 2) {
103 vstored256u(dst, vaddpd(vmulpd(vloadd256u(src), m00_m11), m20_m21));
104
105 i -= 2;
106 dst += 2;
107 src += 2;
108 }
109
110 if (i)
111 vstored128u(dst, vaddpd(vmulpd(vloadd128u(src), vcast<D128>(m00_m11)), vcast<D128>(m20_m21)));
112
113 return BL_SUCCESS;
114}
115
116static BLResult BL_CDECL blMatrix2DMapPointDArraySwap_AVX(const BLMatrix2D* self, BLPoint* dst, const BLPoint* src, size_t size) noexcept {
117 using namespace SIMD;
118
119 size_t i = size;
120 D256 m01_m10 = vdupld128(vsetd128(self->m01, self->m10));
121 D256 m20_m21 = vbroadcastd256_128(&self->m20);
122
123 while (i >= 8) {
124 vstored256u(dst + 0, vaddpd(vmulpd(vswapd64(vloadd256u(src + 0)), m01_m10), m20_m21));
125 vstored256u(dst + 2, vaddpd(vmulpd(vswapd64(vloadd256u(src + 2)), m01_m10), m20_m21));
126 vstored256u(dst + 4, vaddpd(vmulpd(vswapd64(vloadd256u(src + 4)), m01_m10), m20_m21));
127 vstored256u(dst + 6, vaddpd(vmulpd(vswapd64(vloadd256u(src + 6)), m01_m10), m20_m21));
128
129 i -= 8;
130 dst += 8;
131 src += 8;
132 }
133
134 while (i >= 2) {
135 vstored256u(dst, vaddpd(vmulpd(vswapd64(vloadd256u(src)), m01_m10), m20_m21));
136
137 i -= 2;
138 dst += 2;
139 src += 2;
140 }
141
142 if (i)
143 vstored128u(dst, vaddpd(vmulpd(vswapd64(vloadd128u(src)), vcast<D128>(m01_m10)), vcast<D128>(m20_m21)));
144
145 return BL_SUCCESS;
146}
147
148static BLResult BL_CDECL blMatrix2DMapPointDArrayAffine_AVX(const BLMatrix2D* self, BLPoint* dst, const BLPoint* src, size_t size) noexcept {
149 using namespace SIMD;
150
151 size_t i = size;
152 D256 m00_m11 = vdupld128(vsetd128(self->m11, self->m00));
153 D256 m10_m01 = vdupld128(vsetd128(self->m01, self->m10));
154 D256 m20_m21 = vbroadcastd256_128(&self->m20);
155
156 while (i >= 8) {
157 D256 s0 = vloadd256u(src + 0);
158 D256 s1 = vloadd256u(src + 2);
159 D256 s2 = vloadd256u(src + 4);
160 D256 s3 = vloadd256u(src + 6);
161
162 vstored256u(dst + 0, vaddpd(vaddpd(vmulpd(s0, m00_m11), m20_m21), vmulpd(vswapd64(s0), m10_m01)));
163 vstored256u(dst + 2, vaddpd(vaddpd(vmulpd(s1, m00_m11), m20_m21), vmulpd(vswapd64(s1), m10_m01)));
164 vstored256u(dst + 4, vaddpd(vaddpd(vmulpd(s2, m00_m11), m20_m21), vmulpd(vswapd64(s2), m10_m01)));
165 vstored256u(dst + 6, vaddpd(vaddpd(vmulpd(s3, m00_m11), m20_m21), vmulpd(vswapd64(s3), m10_m01)));
166
167 i -= 8;
168 dst += 8;
169 src += 8;
170 }
171
172 while (i >= 2) {
173 D256 s0 = vloadd256u(src);
174 vstored256u(dst, vaddpd(vaddpd(vmulpd(s0, m00_m11), m20_m21), vmulpd(vswapd64(s0), m10_m01)));
175
176 i -= 2;
177 dst += 2;
178 src += 2;
179 }
180
181 if (i) {
182 D128 s0 = vloadd128u(src);
183 vstored128u(dst, vaddpd(vaddpd(vmulpd(s0, vcast<D128>(m00_m11)), vcast<D128>(m20_m21)), vmulpd(vswapd64(s0), vcast<D128>(m10_m01))));
184 }
185
186 return BL_SUCCESS;
187}
188
189// ============================================================================
190// [BLMatrix2D - Runtime Init [AVX]]
191// ============================================================================
192
193BL_HIDDEN void blMatrix2DRtInit_AVX(BLRuntimeContext* rt) noexcept {
194 BL_UNUSED(rt);
195 BLMapPointDArrayFunc* funcs = blMatrix2DMapPointDArrayFuncs;
196
197 blAssignFunc(&funcs[BL_MATRIX2D_TYPE_IDENTITY ], blMatrix2DMapPointDArrayIdentity_AVX);
198 blAssignFunc(&funcs[BL_MATRIX2D_TYPE_TRANSLATE], blMatrix2DMapPointDArrayTranslate_AVX);
199 blAssignFunc(&funcs[BL_MATRIX2D_TYPE_SCALE ], blMatrix2DMapPointDArrayScale_AVX);
200 blAssignFunc(&funcs[BL_MATRIX2D_TYPE_SWAP ], blMatrix2DMapPointDArraySwap_AVX);
201 blAssignFunc(&funcs[BL_MATRIX2D_TYPE_AFFINE ], blMatrix2DMapPointDArrayAffine_AVX);
202 blAssignFunc(&funcs[BL_MATRIX2D_TYPE_INVALID ], blMatrix2DMapPointDArrayAffine_AVX);
203}
204
205#endif
206