1/**
2 * Copyright (c) 2006-2023 LOVE Development Team
3 *
4 * This software is provided 'as-is', without any express or implied
5 * warranty. In no event will the authors be held liable for any damages
6 * arising from the use of this software.
7 *
8 * Permission is granted to anyone to use this software for any purpose,
9 * including commercial applications, and to alter it and redistribute it
10 * freely, subject to the following restrictions:
11 *
12 * 1. The origin of this software must not be misrepresented; you must not
13 * claim that you wrote the original software. If you use this software
14 * in a product, an acknowledgment in the product documentation would be
15 * appreciated but is not required.
16 * 2. Altered source versions must be plainly marked as such, and must not be
17 * misrepresented as being the original software.
18 * 3. This notice may not be removed or altered from any source distribution.
19 **/
20
21#include "Matrix.h"
22#include "common/config.h"
23
24// STD
25#include <cstring> // memcpy
26#include <cmath>
27
28#if defined(LOVE_SIMD_SSE)
29#include <xmmintrin.h>
30#endif
31
32#if defined(LOVE_SIMD_NEON)
33#include <arm_neon.h>
34#endif
35
36namespace love
37{
38
39// | e0 e4 e8 e12 |
40// | e1 e5 e9 e13 |
41// | e2 e6 e10 e14 |
42// | e3 e7 e11 e15 |
43// | e0 e4 e8 e12 |
44// | e1 e5 e9 e13 |
45// | e2 e6 e10 e14 |
46// | e3 e7 e11 e15 |
47
48void Matrix4::multiply(const Matrix4 &a, const Matrix4 &b, float t[16])
49{
50#if defined(LOVE_SIMD_SSE)
51
52 // We can't guarantee 16-bit alignment (e.g. for heap-allocated Matrix4
53 // objects) so we use unaligned loads and stores.
54 __m128 col1 = _mm_loadu_ps(&a.e[0]);
55 __m128 col2 = _mm_loadu_ps(&a.e[4]);
56 __m128 col3 = _mm_loadu_ps(&a.e[8]);
57 __m128 col4 = _mm_loadu_ps(&a.e[12]);
58
59 for (int i = 0; i < 4; i++)
60 {
61 __m128 brod1 = _mm_set1_ps(b.e[4*i + 0]);
62 __m128 brod2 = _mm_set1_ps(b.e[4*i + 1]);
63 __m128 brod3 = _mm_set1_ps(b.e[4*i + 2]);
64 __m128 brod4 = _mm_set1_ps(b.e[4*i + 3]);
65
66 __m128 col = _mm_add_ps(
67 _mm_add_ps(_mm_mul_ps(brod1, col1), _mm_mul_ps(brod2, col2)),
68 _mm_add_ps(_mm_mul_ps(brod3, col3), _mm_mul_ps(brod4, col4))
69 );
70
71 _mm_storeu_ps(&t[4*i], col);
72 }
73
74#elif defined(LOVE_SIMD_NEON)
75
76 float32x4_t cola1 = vld1q_f32(&a.e[0]);
77 float32x4_t cola2 = vld1q_f32(&a.e[4]);
78 float32x4_t cola3 = vld1q_f32(&a.e[8]);
79 float32x4_t cola4 = vld1q_f32(&a.e[12]);
80
81 float32x4_t col1 = vmulq_n_f32(cola1, b.e[0]);
82 col1 = vmlaq_n_f32(col1, cola2, b.e[1]);
83 col1 = vmlaq_n_f32(col1, cola3, b.e[2]);
84 col1 = vmlaq_n_f32(col1, cola4, b.e[3]);
85
86 float32x4_t col2 = vmulq_n_f32(cola1, b.e[4]);
87 col2 = vmlaq_n_f32(col2, cola2, b.e[5]);
88 col2 = vmlaq_n_f32(col2, cola3, b.e[6]);
89 col2 = vmlaq_n_f32(col2, cola4, b.e[7]);
90
91 float32x4_t col3 = vmulq_n_f32(cola1, b.e[8]);
92 col3 = vmlaq_n_f32(col3, cola2, b.e[9]);
93 col3 = vmlaq_n_f32(col3, cola3, b.e[10]);
94 col3 = vmlaq_n_f32(col3, cola4, b.e[11]);
95
96 float32x4_t col4 = vmulq_n_f32(cola1, b.e[12]);
97 col4 = vmlaq_n_f32(col4, cola2, b.e[13]);
98 col4 = vmlaq_n_f32(col4, cola3, b.e[14]);
99 col4 = vmlaq_n_f32(col4, cola4, b.e[15]);
100
101 vst1q_f32(&t[0], col1);
102 vst1q_f32(&t[4], col2);
103 vst1q_f32(&t[8], col3);
104 vst1q_f32(&t[12], col4);
105
106#else
107
108 t[0] = (a.e[0]*b.e[0]) + (a.e[4]*b.e[1]) + (a.e[8]*b.e[2]) + (a.e[12]*b.e[3]);
109 t[4] = (a.e[0]*b.e[4]) + (a.e[4]*b.e[5]) + (a.e[8]*b.e[6]) + (a.e[12]*b.e[7]);
110 t[8] = (a.e[0]*b.e[8]) + (a.e[4]*b.e[9]) + (a.e[8]*b.e[10]) + (a.e[12]*b.e[11]);
111 t[12] = (a.e[0]*b.e[12]) + (a.e[4]*b.e[13]) + (a.e[8]*b.e[14]) + (a.e[12]*b.e[15]);
112
113 t[1] = (a.e[1]*b.e[0]) + (a.e[5]*b.e[1]) + (a.e[9]*b.e[2]) + (a.e[13]*b.e[3]);
114 t[5] = (a.e[1]*b.e[4]) + (a.e[5]*b.e[5]) + (a.e[9]*b.e[6]) + (a.e[13]*b.e[7]);
115 t[9] = (a.e[1]*b.e[8]) + (a.e[5]*b.e[9]) + (a.e[9]*b.e[10]) + (a.e[13]*b.e[11]);
116 t[13] = (a.e[1]*b.e[12]) + (a.e[5]*b.e[13]) + (a.e[9]*b.e[14]) + (a.e[13]*b.e[15]);
117
118 t[2] = (a.e[2]*b.e[0]) + (a.e[6]*b.e[1]) + (a.e[10]*b.e[2]) + (a.e[14]*b.e[3]);
119 t[6] = (a.e[2]*b.e[4]) + (a.e[6]*b.e[5]) + (a.e[10]*b.e[6]) + (a.e[14]*b.e[7]);
120 t[10] = (a.e[2]*b.e[8]) + (a.e[6]*b.e[9]) + (a.e[10]*b.e[10]) + (a.e[14]*b.e[11]);
121 t[14] = (a.e[2]*b.e[12]) + (a.e[6]*b.e[13]) + (a.e[10]*b.e[14]) + (a.e[14]*b.e[15]);
122
123 t[3] = (a.e[3]*b.e[0]) + (a.e[7]*b.e[1]) + (a.e[11]*b.e[2]) + (a.e[15]*b.e[3]);
124 t[7] = (a.e[3]*b.e[4]) + (a.e[7]*b.e[5]) + (a.e[11]*b.e[6]) + (a.e[15]*b.e[7]);
125 t[11] = (a.e[3]*b.e[8]) + (a.e[7]*b.e[9]) + (a.e[11]*b.e[10]) + (a.e[15]*b.e[11]);
126 t[15] = (a.e[3]*b.e[12]) + (a.e[7]*b.e[13]) + (a.e[11]*b.e[14]) + (a.e[15]*b.e[15]);
127
128#endif
129}
130
131void Matrix4::multiply(const Matrix4 &a, const Matrix4 &b, Matrix4 &t)
132{
133 multiply(a, b, t.e);
134}
135
136// | e0 e4 e8 e12 |
137// | e1 e5 e9 e13 |
138// | e2 e6 e10 e14 |
139// | e3 e7 e11 e15 |
140
141Matrix4::Matrix4()
142{
143 setIdentity();
144}
145
146
147Matrix4::Matrix4(const float elements[16])
148{
149 memcpy(e, elements, sizeof(float) * 16);
150}
151
152Matrix4::Matrix4(float t00, float t10, float t01, float t11, float x, float y)
153{
154 setRawTransformation(t00, t10, t01, t11, x, y);
155}
156
157Matrix4::Matrix4(const Matrix4 &a, const Matrix4 &b)
158{
159 multiply(a, b, e);
160}
161
162Matrix4::Matrix4(float x, float y, float angle, float sx, float sy, float ox, float oy, float kx, float ky)
163{
164 setTransformation(x, y, angle, sx, sy, ox, oy, kx, ky);
165}
166
167Matrix4 Matrix4::operator * (const Matrix4 &m) const
168{
169 return Matrix4(*this, m);
170}
171
172void Matrix4::operator *= (const Matrix4 &m)
173{
174 float t[16];
175 multiply(*this, m, t);
176 memcpy(this->e, t, sizeof(float)*16);
177}
178
179const float *Matrix4::getElements() const
180{
181 return e;
182}
183
184void Matrix4::setIdentity()
185{
186 memset(e, 0, sizeof(float)*16);
187 e[15] = e[10] = e[5] = e[0] = 1;
188}
189
190void Matrix4::setTranslation(float x, float y)
191{
192 setIdentity();
193 e[12] = x;
194 e[13] = y;
195}
196
197void Matrix4::setRotation(float rad)
198{
199 setIdentity();
200 float c = cosf(rad), s = sinf(rad);
201 e[0] = c;
202 e[4] = -s;
203 e[1] = s;
204 e[5] = c;
205}
206
207void Matrix4::setScale(float sx, float sy)
208{
209 setIdentity();
210 e[0] = sx;
211 e[5] = sy;
212}
213
214void Matrix4::setShear(float kx, float ky)
215{
216 setIdentity();
217 e[1] = ky;
218 e[4] = kx;
219}
220
221void Matrix4::getApproximateScale(float &sx, float &sy) const
222{
223 sx = sqrtf(e[0] * e[0] + e[4] * e[4]);
224 sy = sqrtf(e[1] * e[1] + e[5] * e[5]);
225}
226
227void Matrix4::setRawTransformation(float t00, float t10, float t01, float t11, float x, float y)
228{
229 memset(e, 0, sizeof(float)*16); // zero out matrix
230 e[10] = e[15] = 1.0f;
231 e[0] = t00;
232 e[1] = t10;
233 e[4] = t01;
234 e[5] = t11;
235 e[12] = x;
236 e[13] = y;
237}
238
239void Matrix4::setTransformation(float x, float y, float angle, float sx, float sy, float ox, float oy, float kx, float ky)
240{
241 memset(e, 0, sizeof(float)*16); // zero out matrix
242 float c = cosf(angle), s = sinf(angle);
243 // matrix multiplication carried out on paper:
244 // |1 x| |c -s | |sx | | 1 ky | |1 -ox|
245 // | 1 y| |s c | | sy | |kx 1 | | 1 -oy|
246 // | 1 | | 1 | | 1 | | 1 | | 1 |
247 // | 1| | 1| | 1| | 1| | 1 |
248 // move rotate scale skew origin
249 e[10] = e[15] = 1.0f;
250 e[0] = c * sx - ky * s * sy; // = a
251 e[1] = s * sx + ky * c * sy; // = b
252 e[4] = kx * c * sx - s * sy; // = c
253 e[5] = kx * s * sx + c * sy; // = d
254 e[12] = x - ox * e[0] - oy * e[4];
255 e[13] = y - ox * e[1] - oy * e[5];
256}
257
258void Matrix4::translate(float x, float y)
259{
260 Matrix4 t;
261 t.setTranslation(x, y);
262 this->operator *=(t);
263}
264
265void Matrix4::rotate(float rad)
266{
267 Matrix4 t;
268 t.setRotation(rad);
269 this->operator *=(t);
270}
271
272void Matrix4::scale(float sx, float sy)
273{
274 Matrix4 t;
275 t.setScale(sx, sy);
276 this->operator *=(t);
277}
278
279void Matrix4::shear(float kx, float ky)
280{
281 Matrix4 t;
282 t.setShear(kx,ky);
283 this->operator *=(t);
284}
285
286bool Matrix4::isAffine2DTransform() const
287{
288 return fabsf(e[2] + e[3] + e[6] + e[7] + e[8] + e[9] + e[11] + e[14]) < 0.00001f
289 && fabsf(e[10] + e[15] - 2.0f) < 0.00001f;
290}
291
292Matrix4 Matrix4::inverse() const
293{
294 Matrix4 inv;
295
296 inv.e[0] = e[5] * e[10] * e[15] -
297 e[5] * e[11] * e[14] -
298 e[9] * e[6] * e[15] +
299 e[9] * e[7] * e[14] +
300 e[13] * e[6] * e[11] -
301 e[13] * e[7] * e[10];
302
303 inv.e[4] = -e[4] * e[10] * e[15] +
304 e[4] * e[11] * e[14] +
305 e[8] * e[6] * e[15] -
306 e[8] * e[7] * e[14] -
307 e[12] * e[6] * e[11] +
308 e[12] * e[7] * e[10];
309
310 inv.e[8] = e[4] * e[9] * e[15] -
311 e[4] * e[11] * e[13] -
312 e[8] * e[5] * e[15] +
313 e[8] * e[7] * e[13] +
314 e[12] * e[5] * e[11] -
315 e[12] * e[7] * e[9];
316
317 inv.e[12] = -e[4] * e[9] * e[14] +
318 e[4] * e[10] * e[13] +
319 e[8] * e[5] * e[14] -
320 e[8] * e[6] * e[13] -
321 e[12] * e[5] * e[10] +
322 e[12] * e[6] * e[9];
323
324 inv.e[1] = -e[1] * e[10] * e[15] +
325 e[1] * e[11] * e[14] +
326 e[9] * e[2] * e[15] -
327 e[9] * e[3] * e[14] -
328 e[13] * e[2] * e[11] +
329 e[13] * e[3] * e[10];
330
331 inv.e[5] = e[0] * e[10] * e[15] -
332 e[0] * e[11] * e[14] -
333 e[8] * e[2] * e[15] +
334 e[8] * e[3] * e[14] +
335 e[12] * e[2] * e[11] -
336 e[12] * e[3] * e[10];
337
338 inv.e[9] = -e[0] * e[9] * e[15] +
339 e[0] * e[11] * e[13] +
340 e[8] * e[1] * e[15] -
341 e[8] * e[3] * e[13] -
342 e[12] * e[1] * e[11] +
343 e[12] * e[3] * e[9];
344
345 inv.e[13] = e[0] * e[9] * e[14] -
346 e[0] * e[10] * e[13] -
347 e[8] * e[1] * e[14] +
348 e[8] * e[2] * e[13] +
349 e[12] * e[1] * e[10] -
350 e[12] * e[2] * e[9];
351
352 inv.e[2] = e[1] * e[6] * e[15] -
353 e[1] * e[7] * e[14] -
354 e[5] * e[2] * e[15] +
355 e[5] * e[3] * e[14] +
356 e[13] * e[2] * e[7] -
357 e[13] * e[3] * e[6];
358
359 inv.e[6] = -e[0] * e[6] * e[15] +
360 e[0] * e[7] * e[14] +
361 e[4] * e[2] * e[15] -
362 e[4] * e[3] * e[14] -
363 e[12] * e[2] * e[7] +
364 e[12] * e[3] * e[6];
365
366 inv.e[10] = e[0] * e[5] * e[15] -
367 e[0] * e[7] * e[13] -
368 e[4] * e[1] * e[15] +
369 e[4] * e[3] * e[13] +
370 e[12] * e[1] * e[7] -
371 e[12] * e[3] * e[5];
372
373 inv.e[14] = -e[0] * e[5] * e[14] +
374 e[0] * e[6] * e[13] +
375 e[4] * e[1] * e[14] -
376 e[4] * e[2] * e[13] -
377 e[12] * e[1] * e[6] +
378 e[12] * e[2] * e[5];
379
380 inv.e[3] = -e[1] * e[6] * e[11] +
381 e[1] * e[7] * e[10] +
382 e[5] * e[2] * e[11] -
383 e[5] * e[3] * e[10] -
384 e[9] * e[2] * e[7] +
385 e[9] * e[3] * e[6];
386
387 inv.e[7] = e[0] * e[6] * e[11] -
388 e[0] * e[7] * e[10] -
389 e[4] * e[2] * e[11] +
390 e[4] * e[3] * e[10] +
391 e[8] * e[2] * e[7] -
392 e[8] * e[3] * e[6];
393
394 inv.e[11] = -e[0] * e[5] * e[11] +
395 e[0] * e[7] * e[9] +
396 e[4] * e[1] * e[11] -
397 e[4] * e[3] * e[9] -
398 e[8] * e[1] * e[7] +
399 e[8] * e[3] * e[5];
400
401 inv.e[15] = e[0] * e[5] * e[10] -
402 e[0] * e[6] * e[9] -
403 e[4] * e[1] * e[10] +
404 e[4] * e[2] * e[9] +
405 e[8] * e[1] * e[6] -
406 e[8] * e[2] * e[5];
407
408 float det = e[0] * inv.e[0] + e[1] * inv.e[4] + e[2] * inv.e[8] + e[3] * inv.e[12];
409
410 float invdet = 1.0f / det;
411
412 for (int i = 0; i < 16; i++)
413 inv.e[i] *= invdet;
414
415 return inv;
416}
417
418Matrix4 Matrix4::ortho(float left, float right, float bottom, float top, float near, float far)
419{
420 Matrix4 m;
421
422 m.e[0] = 2.0f / (right - left);
423 m.e[5] = 2.0f / (top - bottom);
424 m.e[10] = -2.0f / (far - near);
425
426 m.e[12] = -(right + left) / (right - left);
427 m.e[13] = -(top + bottom) / (top - bottom);
428 m.e[14] = -(far + near) / (far - near);
429
430 return m;
431}
432
433/**
434 * | e0 e3 e6 |
435 * | e1 e4 e7 |
436 * | e2 e5 e8 |
437 **/
438Matrix3::Matrix3()
439{
440 setIdentity();
441}
442
443Matrix3::Matrix3(const Matrix4 &mat4)
444{
445 const float *mat4elems = mat4.getElements();
446
447 // Column 0.
448 e[0] = mat4elems[0];
449 e[1] = mat4elems[1];
450 e[2] = mat4elems[2];
451
452 // Column 1.
453 e[3] = mat4elems[4];
454 e[4] = mat4elems[5];
455 e[5] = mat4elems[6];
456
457 // Column 2.
458 e[6] = mat4elems[8];
459 e[7] = mat4elems[9];
460 e[8] = mat4elems[10];
461}
462
463Matrix3::Matrix3(float x, float y, float angle, float sx, float sy, float ox, float oy, float kx, float ky)
464{
465 setTransformation(x, y, angle, sx, sy, ox, oy, kx, ky);
466}
467
468Matrix3::~Matrix3()
469{
470}
471
472void Matrix3::setIdentity()
473{
474 memset(e, 0, sizeof(float) * 9);
475 e[8] = e[4] = e[0] = 1.0f;
476}
477
478Matrix3 Matrix3::operator * (const love::Matrix3 &m) const
479{
480 Matrix3 t;
481
482 t.e[0] = (e[0]*m.e[0]) + (e[3]*m.e[1]) + (e[6]*m.e[2]);
483 t.e[3] = (e[0]*m.e[3]) + (e[3]*m.e[4]) + (e[6]*m.e[5]);
484 t.e[6] = (e[0]*m.e[6]) + (e[3]*m.e[7]) + (e[6]*m.e[8]);
485
486 t.e[1] = (e[1]*m.e[0]) + (e[4]*m.e[1]) + (e[7]*m.e[2]);
487 t.e[4] = (e[1]*m.e[3]) + (e[4]*m.e[4]) + (e[7]*m.e[5]);
488 t.e[7] = (e[1]*m.e[6]) + (e[4]*m.e[7]) + (e[7]*m.e[8]);
489
490 t.e[2] = (e[2]*m.e[0]) + (e[5]*m.e[1]) + (e[8]*m.e[2]);
491 t.e[5] = (e[2]*m.e[3]) + (e[5]*m.e[4]) + (e[8]*m.e[5]);
492 t.e[8] = (e[2]*m.e[6]) + (e[5]*m.e[7]) + (e[8]*m.e[8]);
493
494 return t;
495}
496
497void Matrix3::operator *= (const Matrix3 &m)
498{
499 Matrix3 t = (*this) * m;
500 memcpy(e, t.e, sizeof(float) * 9);
501}
502
503const float *Matrix3::getElements() const
504{
505 return e;
506}
507
508Matrix3 Matrix3::transposedInverse() const
509{
510 // e0 e3 e6
511 // e1 e4 e7
512 // e2 e5 e8
513
514 float det = e[0] * (e[4]*e[8] - e[7]*e[5])
515 - e[1] * (e[3]*e[8] - e[5]*e[6])
516 + e[2] * (e[3]*e[7] - e[4]*e[6]);
517
518 float invdet = 1.0f / det;
519
520 Matrix3 m;
521
522 m.e[0] = invdet * (e[4]*e[8] - e[7]*e[5]);
523 m.e[3] = -invdet * (e[1]*e[8] - e[2]*e[7]);
524 m.e[6] = invdet * (e[1]*e[5] - e[2]*e[4]);
525 m.e[1] = -invdet * (e[3]*e[8] - e[5]*e[6]);
526 m.e[4] = invdet * (e[0]*e[8] - e[2]*e[6]);
527 m.e[7] = -invdet * (e[0]*e[5] - e[3]*e[2]);
528 m.e[2] = invdet * (e[3]*e[7] - e[6]*e[4]);
529 m.e[5] = -invdet * (e[0]*e[7] - e[6]*e[1]);
530 m.e[8] = invdet * (e[0]*e[4] - e[3]*e[1]);
531
532 return m;
533}
534
535void Matrix3::setTransformation(float x, float y, float angle, float sx, float sy, float ox, float oy, float kx, float ky)
536{
537 float c = cosf(angle), s = sinf(angle);
538 // matrix multiplication carried out on paper:
539 // |1 x| |c -s | |sx | | 1 ky | |1 -ox|
540 // | 1 y| |s c | | sy | |kx 1 | | 1 -oy|
541 // | 1| | 1| | 1| | 1| | 1 |
542 // move rotate scale skew origin
543 e[0] = c * sx - ky * s * sy; // = a
544 e[1] = s * sx + ky * c * sy; // = b
545 e[3] = kx * c * sx - s * sy; // = c
546 e[4] = kx * s * sx + c * sy; // = d
547 e[6] = x - ox * e[0] - oy * e[3];
548 e[7] = y - ox * e[1] - oy * e[4];
549
550 e[2] = e[5] = 0.0f;
551 e[8] = 1.0f;
552}
553
554} // love
555