1 | /* |
2 | * The copyright in this software is being made available under the 2-clauses |
3 | * BSD License, included below. This software may be subject to other third |
4 | * party and contributor rights, including patent rights, and no such rights |
5 | * are granted under this license. |
6 | * |
7 | * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium |
8 | * Copyright (c) 2002-2014, Professor Benoit Macq |
9 | * Copyright (c) 2001-2003, David Janssens |
10 | * Copyright (c) 2002-2003, Yannick Verschueren |
11 | * Copyright (c) 2003-2007, Francois-Olivier Devaux |
12 | * Copyright (c) 2003-2014, Antonin Descampe |
13 | * Copyright (c) 2005, Herve Drolon, FreeImage Team |
14 | * Copyright (c) 2008, 2011-2012, Centre National d'Etudes Spatiales (CNES), FR |
15 | * Copyright (c) 2012, CS Systemes d'Information, France |
16 | * All rights reserved. |
17 | * |
18 | * Redistribution and use in source and binary forms, with or without |
19 | * modification, are permitted provided that the following conditions |
20 | * are met: |
21 | * 1. Redistributions of source code must retain the above copyright |
22 | * notice, this list of conditions and the following disclaimer. |
23 | * 2. Redistributions in binary form must reproduce the above copyright |
24 | * notice, this list of conditions and the following disclaimer in the |
25 | * documentation and/or other materials provided with the distribution. |
26 | * |
27 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS' |
28 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
29 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
30 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
31 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
32 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
33 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
34 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
35 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
36 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
37 | * POSSIBILITY OF SUCH DAMAGE. |
38 | */ |
39 | |
40 | #ifdef __SSE__ |
41 | #include <xmmintrin.h> |
42 | #endif |
43 | #ifdef __SSE2__ |
44 | #include <emmintrin.h> |
45 | #endif |
46 | #ifdef __SSE4_1__ |
47 | #include <smmintrin.h> |
48 | #endif |
49 | |
50 | #include "opj_includes.h" |
51 | |
52 | /* <summary> */ |
53 | /* This table contains the norms of the basis function of the reversible MCT. */ |
54 | /* </summary> */ |
55 | static const OPJ_FLOAT64 opj_mct_norms[3] = { 1.732, .8292, .8292 }; |
56 | |
57 | /* <summary> */ |
58 | /* This table contains the norms of the basis function of the irreversible MCT. */ |
59 | /* </summary> */ |
60 | static const OPJ_FLOAT64 opj_mct_norms_real[3] = { 1.732, 1.805, 1.573 }; |
61 | |
62 | const OPJ_FLOAT64 * opj_mct_get_mct_norms() |
63 | { |
64 | return opj_mct_norms; |
65 | } |
66 | |
67 | const OPJ_FLOAT64 * opj_mct_get_mct_norms_real() |
68 | { |
69 | return opj_mct_norms_real; |
70 | } |
71 | |
72 | /* <summary> */ |
73 | /* Forward reversible MCT. */ |
74 | /* </summary> */ |
75 | #ifdef __SSE2__ |
76 | void opj_mct_encode( |
77 | OPJ_INT32* OPJ_RESTRICT c0, |
78 | OPJ_INT32* OPJ_RESTRICT c1, |
79 | OPJ_INT32* OPJ_RESTRICT c2, |
80 | OPJ_SIZE_T n) |
81 | { |
82 | OPJ_SIZE_T i; |
83 | const OPJ_SIZE_T len = n; |
84 | /* buffer are aligned on 16 bytes */ |
85 | assert(((size_t)c0 & 0xf) == 0); |
86 | assert(((size_t)c1 & 0xf) == 0); |
87 | assert(((size_t)c2 & 0xf) == 0); |
88 | |
89 | for (i = 0; i < (len & ~3U); i += 4) { |
90 | __m128i y, u, v; |
91 | __m128i r = _mm_load_si128((const __m128i *) & (c0[i])); |
92 | __m128i g = _mm_load_si128((const __m128i *) & (c1[i])); |
93 | __m128i b = _mm_load_si128((const __m128i *) & (c2[i])); |
94 | y = _mm_add_epi32(g, g); |
95 | y = _mm_add_epi32(y, b); |
96 | y = _mm_add_epi32(y, r); |
97 | y = _mm_srai_epi32(y, 2); |
98 | u = _mm_sub_epi32(b, g); |
99 | v = _mm_sub_epi32(r, g); |
100 | _mm_store_si128((__m128i *) & (c0[i]), y); |
101 | _mm_store_si128((__m128i *) & (c1[i]), u); |
102 | _mm_store_si128((__m128i *) & (c2[i]), v); |
103 | } |
104 | |
105 | for (; i < len; ++i) { |
106 | OPJ_INT32 r = c0[i]; |
107 | OPJ_INT32 g = c1[i]; |
108 | OPJ_INT32 b = c2[i]; |
109 | OPJ_INT32 y = (r + (g * 2) + b) >> 2; |
110 | OPJ_INT32 u = b - g; |
111 | OPJ_INT32 v = r - g; |
112 | c0[i] = y; |
113 | c1[i] = u; |
114 | c2[i] = v; |
115 | } |
116 | } |
117 | #else |
118 | void opj_mct_encode( |
119 | OPJ_INT32* OPJ_RESTRICT c0, |
120 | OPJ_INT32* OPJ_RESTRICT c1, |
121 | OPJ_INT32* OPJ_RESTRICT c2, |
122 | OPJ_SIZE_T n) |
123 | { |
124 | OPJ_SIZE_T i; |
125 | const OPJ_SIZE_T len = n; |
126 | |
127 | for (i = 0; i < len; ++i) { |
128 | OPJ_INT32 r = c0[i]; |
129 | OPJ_INT32 g = c1[i]; |
130 | OPJ_INT32 b = c2[i]; |
131 | OPJ_INT32 y = (r + (g * 2) + b) >> 2; |
132 | OPJ_INT32 u = b - g; |
133 | OPJ_INT32 v = r - g; |
134 | c0[i] = y; |
135 | c1[i] = u; |
136 | c2[i] = v; |
137 | } |
138 | } |
139 | #endif |
140 | |
141 | /* <summary> */ |
142 | /* Inverse reversible MCT. */ |
143 | /* </summary> */ |
144 | #ifdef __SSE2__ |
145 | void opj_mct_decode( |
146 | OPJ_INT32* OPJ_RESTRICT c0, |
147 | OPJ_INT32* OPJ_RESTRICT c1, |
148 | OPJ_INT32* OPJ_RESTRICT c2, |
149 | OPJ_SIZE_T n) |
150 | { |
151 | OPJ_SIZE_T i; |
152 | const OPJ_SIZE_T len = n; |
153 | |
154 | for (i = 0; i < (len & ~3U); i += 4) { |
155 | __m128i r, g, b; |
156 | __m128i y = _mm_load_si128((const __m128i *) & (c0[i])); |
157 | __m128i u = _mm_load_si128((const __m128i *) & (c1[i])); |
158 | __m128i v = _mm_load_si128((const __m128i *) & (c2[i])); |
159 | g = y; |
160 | g = _mm_sub_epi32(g, _mm_srai_epi32(_mm_add_epi32(u, v), 2)); |
161 | r = _mm_add_epi32(v, g); |
162 | b = _mm_add_epi32(u, g); |
163 | _mm_store_si128((__m128i *) & (c0[i]), r); |
164 | _mm_store_si128((__m128i *) & (c1[i]), g); |
165 | _mm_store_si128((__m128i *) & (c2[i]), b); |
166 | } |
167 | for (; i < len; ++i) { |
168 | OPJ_INT32 y = c0[i]; |
169 | OPJ_INT32 u = c1[i]; |
170 | OPJ_INT32 v = c2[i]; |
171 | OPJ_INT32 g = y - ((u + v) >> 2); |
172 | OPJ_INT32 r = v + g; |
173 | OPJ_INT32 b = u + g; |
174 | c0[i] = r; |
175 | c1[i] = g; |
176 | c2[i] = b; |
177 | } |
178 | } |
179 | #else |
180 | void opj_mct_decode( |
181 | OPJ_INT32* OPJ_RESTRICT c0, |
182 | OPJ_INT32* OPJ_RESTRICT c1, |
183 | OPJ_INT32* OPJ_RESTRICT c2, |
184 | OPJ_SIZE_T n) |
185 | { |
186 | OPJ_UINT32 i; |
187 | for (i = 0; i < n; ++i) { |
188 | OPJ_INT32 y = c0[i]; |
189 | OPJ_INT32 u = c1[i]; |
190 | OPJ_INT32 v = c2[i]; |
191 | OPJ_INT32 g = y - ((u + v) >> 2); |
192 | OPJ_INT32 r = v + g; |
193 | OPJ_INT32 b = u + g; |
194 | c0[i] = r; |
195 | c1[i] = g; |
196 | c2[i] = b; |
197 | } |
198 | } |
199 | #endif |
200 | |
201 | /* <summary> */ |
202 | /* Get norm of basis function of reversible MCT. */ |
203 | /* </summary> */ |
204 | OPJ_FLOAT64 opj_mct_getnorm(OPJ_UINT32 compno) |
205 | { |
206 | return opj_mct_norms[compno]; |
207 | } |
208 | |
209 | /* <summary> */ |
210 | /* Forward irreversible MCT. */ |
211 | /* </summary> */ |
212 | #ifdef __SSE4_1__ |
213 | void opj_mct_encode_real( |
214 | OPJ_INT32* OPJ_RESTRICT c0, |
215 | OPJ_INT32* OPJ_RESTRICT c1, |
216 | OPJ_INT32* OPJ_RESTRICT c2, |
217 | OPJ_SIZE_T n) |
218 | { |
219 | OPJ_SIZE_T i; |
220 | const OPJ_SIZE_T len = n; |
221 | |
222 | const __m128i ry = _mm_set1_epi32(2449); |
223 | const __m128i gy = _mm_set1_epi32(4809); |
224 | const __m128i by = _mm_set1_epi32(934); |
225 | const __m128i ru = _mm_set1_epi32(1382); |
226 | const __m128i gu = _mm_set1_epi32(2714); |
227 | /* const __m128i bu = _mm_set1_epi32(4096); */ |
228 | /* const __m128i rv = _mm_set1_epi32(4096); */ |
229 | const __m128i gv = _mm_set1_epi32(3430); |
230 | const __m128i bv = _mm_set1_epi32(666); |
231 | const __m128i mulround = _mm_shuffle_epi32(_mm_cvtsi32_si128(4096), |
232 | _MM_SHUFFLE(1, 0, 1, 0)); |
233 | |
234 | for (i = 0; i < (len & ~3U); i += 4) { |
235 | __m128i lo, hi; |
236 | __m128i y, u, v; |
237 | __m128i r = _mm_load_si128((const __m128i *) & (c0[i])); |
238 | __m128i g = _mm_load_si128((const __m128i *) & (c1[i])); |
239 | __m128i b = _mm_load_si128((const __m128i *) & (c2[i])); |
240 | |
241 | lo = r; |
242 | hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1)); |
243 | lo = _mm_mul_epi32(lo, ry); |
244 | hi = _mm_mul_epi32(hi, ry); |
245 | lo = _mm_add_epi64(lo, mulround); |
246 | hi = _mm_add_epi64(hi, mulround); |
247 | lo = _mm_srli_epi64(lo, 13); |
248 | hi = _mm_slli_epi64(hi, 32 - 13); |
249 | y = _mm_blend_epi16(lo, hi, 0xCC); |
250 | |
251 | lo = g; |
252 | hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1)); |
253 | lo = _mm_mul_epi32(lo, gy); |
254 | hi = _mm_mul_epi32(hi, gy); |
255 | lo = _mm_add_epi64(lo, mulround); |
256 | hi = _mm_add_epi64(hi, mulround); |
257 | lo = _mm_srli_epi64(lo, 13); |
258 | hi = _mm_slli_epi64(hi, 32 - 13); |
259 | y = _mm_add_epi32(y, _mm_blend_epi16(lo, hi, 0xCC)); |
260 | |
261 | lo = b; |
262 | hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1)); |
263 | lo = _mm_mul_epi32(lo, by); |
264 | hi = _mm_mul_epi32(hi, by); |
265 | lo = _mm_add_epi64(lo, mulround); |
266 | hi = _mm_add_epi64(hi, mulround); |
267 | lo = _mm_srli_epi64(lo, 13); |
268 | hi = _mm_slli_epi64(hi, 32 - 13); |
269 | y = _mm_add_epi32(y, _mm_blend_epi16(lo, hi, 0xCC)); |
270 | _mm_store_si128((__m128i *) & (c0[i]), y); |
271 | |
272 | /*lo = b; |
273 | hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1)); |
274 | lo = _mm_mul_epi32(lo, mulround); |
275 | hi = _mm_mul_epi32(hi, mulround);*/ |
276 | lo = _mm_cvtepi32_epi64(_mm_shuffle_epi32(b, _MM_SHUFFLE(3, 2, 2, 0))); |
277 | hi = _mm_cvtepi32_epi64(_mm_shuffle_epi32(b, _MM_SHUFFLE(3, 2, 3, 1))); |
278 | lo = _mm_slli_epi64(lo, 12); |
279 | hi = _mm_slli_epi64(hi, 12); |
280 | lo = _mm_add_epi64(lo, mulround); |
281 | hi = _mm_add_epi64(hi, mulround); |
282 | lo = _mm_srli_epi64(lo, 13); |
283 | hi = _mm_slli_epi64(hi, 32 - 13); |
284 | u = _mm_blend_epi16(lo, hi, 0xCC); |
285 | |
286 | lo = r; |
287 | hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1)); |
288 | lo = _mm_mul_epi32(lo, ru); |
289 | hi = _mm_mul_epi32(hi, ru); |
290 | lo = _mm_add_epi64(lo, mulround); |
291 | hi = _mm_add_epi64(hi, mulround); |
292 | lo = _mm_srli_epi64(lo, 13); |
293 | hi = _mm_slli_epi64(hi, 32 - 13); |
294 | u = _mm_sub_epi32(u, _mm_blend_epi16(lo, hi, 0xCC)); |
295 | |
296 | lo = g; |
297 | hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1)); |
298 | lo = _mm_mul_epi32(lo, gu); |
299 | hi = _mm_mul_epi32(hi, gu); |
300 | lo = _mm_add_epi64(lo, mulround); |
301 | hi = _mm_add_epi64(hi, mulround); |
302 | lo = _mm_srli_epi64(lo, 13); |
303 | hi = _mm_slli_epi64(hi, 32 - 13); |
304 | u = _mm_sub_epi32(u, _mm_blend_epi16(lo, hi, 0xCC)); |
305 | _mm_store_si128((__m128i *) & (c1[i]), u); |
306 | |
307 | /*lo = r; |
308 | hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1)); |
309 | lo = _mm_mul_epi32(lo, mulround); |
310 | hi = _mm_mul_epi32(hi, mulround);*/ |
311 | lo = _mm_cvtepi32_epi64(_mm_shuffle_epi32(r, _MM_SHUFFLE(3, 2, 2, 0))); |
312 | hi = _mm_cvtepi32_epi64(_mm_shuffle_epi32(r, _MM_SHUFFLE(3, 2, 3, 1))); |
313 | lo = _mm_slli_epi64(lo, 12); |
314 | hi = _mm_slli_epi64(hi, 12); |
315 | lo = _mm_add_epi64(lo, mulround); |
316 | hi = _mm_add_epi64(hi, mulround); |
317 | lo = _mm_srli_epi64(lo, 13); |
318 | hi = _mm_slli_epi64(hi, 32 - 13); |
319 | v = _mm_blend_epi16(lo, hi, 0xCC); |
320 | |
321 | lo = g; |
322 | hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1)); |
323 | lo = _mm_mul_epi32(lo, gv); |
324 | hi = _mm_mul_epi32(hi, gv); |
325 | lo = _mm_add_epi64(lo, mulround); |
326 | hi = _mm_add_epi64(hi, mulround); |
327 | lo = _mm_srli_epi64(lo, 13); |
328 | hi = _mm_slli_epi64(hi, 32 - 13); |
329 | v = _mm_sub_epi32(v, _mm_blend_epi16(lo, hi, 0xCC)); |
330 | |
331 | lo = b; |
332 | hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1)); |
333 | lo = _mm_mul_epi32(lo, bv); |
334 | hi = _mm_mul_epi32(hi, bv); |
335 | lo = _mm_add_epi64(lo, mulround); |
336 | hi = _mm_add_epi64(hi, mulround); |
337 | lo = _mm_srli_epi64(lo, 13); |
338 | hi = _mm_slli_epi64(hi, 32 - 13); |
339 | v = _mm_sub_epi32(v, _mm_blend_epi16(lo, hi, 0xCC)); |
340 | _mm_store_si128((__m128i *) & (c2[i]), v); |
341 | } |
342 | for (; i < len; ++i) { |
343 | OPJ_INT32 r = c0[i]; |
344 | OPJ_INT32 g = c1[i]; |
345 | OPJ_INT32 b = c2[i]; |
346 | OPJ_INT32 y = opj_int_fix_mul(r, 2449) + opj_int_fix_mul(g, |
347 | 4809) + opj_int_fix_mul(b, 934); |
348 | OPJ_INT32 u = -opj_int_fix_mul(r, 1382) - opj_int_fix_mul(g, |
349 | 2714) + opj_int_fix_mul(b, 4096); |
350 | OPJ_INT32 v = opj_int_fix_mul(r, 4096) - opj_int_fix_mul(g, |
351 | 3430) - opj_int_fix_mul(b, 666); |
352 | c0[i] = y; |
353 | c1[i] = u; |
354 | c2[i] = v; |
355 | } |
356 | } |
357 | #else |
358 | void opj_mct_encode_real( |
359 | OPJ_INT32* OPJ_RESTRICT c0, |
360 | OPJ_INT32* OPJ_RESTRICT c1, |
361 | OPJ_INT32* OPJ_RESTRICT c2, |
362 | OPJ_SIZE_T n) |
363 | { |
364 | OPJ_UINT32 i; |
365 | for (i = 0; i < n; ++i) { |
366 | OPJ_INT32 r = c0[i]; |
367 | OPJ_INT32 g = c1[i]; |
368 | OPJ_INT32 b = c2[i]; |
369 | OPJ_INT32 y = opj_int_fix_mul(r, 2449) + opj_int_fix_mul(g, |
370 | 4809) + opj_int_fix_mul(b, 934); |
371 | OPJ_INT32 u = -opj_int_fix_mul(r, 1382) - opj_int_fix_mul(g, |
372 | 2714) + opj_int_fix_mul(b, 4096); |
373 | OPJ_INT32 v = opj_int_fix_mul(r, 4096) - opj_int_fix_mul(g, |
374 | 3430) - opj_int_fix_mul(b, 666); |
375 | c0[i] = y; |
376 | c1[i] = u; |
377 | c2[i] = v; |
378 | } |
379 | } |
380 | #endif |
381 | |
382 | /* <summary> */ |
383 | /* Inverse irreversible MCT. */ |
384 | /* </summary> */ |
385 | void opj_mct_decode_real( |
386 | OPJ_FLOAT32* OPJ_RESTRICT c0, |
387 | OPJ_FLOAT32* OPJ_RESTRICT c1, |
388 | OPJ_FLOAT32* OPJ_RESTRICT c2, |
389 | OPJ_SIZE_T n) |
390 | { |
391 | OPJ_UINT32 i; |
392 | #ifdef __SSE__ |
393 | __m128 vrv, vgu, vgv, vbu; |
394 | vrv = _mm_set1_ps(1.402f); |
395 | vgu = _mm_set1_ps(0.34413f); |
396 | vgv = _mm_set1_ps(0.71414f); |
397 | vbu = _mm_set1_ps(1.772f); |
398 | for (i = 0; i < (n >> 3); ++i) { |
399 | __m128 vy, vu, vv; |
400 | __m128 vr, vg, vb; |
401 | |
402 | vy = _mm_load_ps(c0); |
403 | vu = _mm_load_ps(c1); |
404 | vv = _mm_load_ps(c2); |
405 | vr = _mm_add_ps(vy, _mm_mul_ps(vv, vrv)); |
406 | vg = _mm_sub_ps(_mm_sub_ps(vy, _mm_mul_ps(vu, vgu)), _mm_mul_ps(vv, vgv)); |
407 | vb = _mm_add_ps(vy, _mm_mul_ps(vu, vbu)); |
408 | _mm_store_ps(c0, vr); |
409 | _mm_store_ps(c1, vg); |
410 | _mm_store_ps(c2, vb); |
411 | c0 += 4; |
412 | c1 += 4; |
413 | c2 += 4; |
414 | |
415 | vy = _mm_load_ps(c0); |
416 | vu = _mm_load_ps(c1); |
417 | vv = _mm_load_ps(c2); |
418 | vr = _mm_add_ps(vy, _mm_mul_ps(vv, vrv)); |
419 | vg = _mm_sub_ps(_mm_sub_ps(vy, _mm_mul_ps(vu, vgu)), _mm_mul_ps(vv, vgv)); |
420 | vb = _mm_add_ps(vy, _mm_mul_ps(vu, vbu)); |
421 | _mm_store_ps(c0, vr); |
422 | _mm_store_ps(c1, vg); |
423 | _mm_store_ps(c2, vb); |
424 | c0 += 4; |
425 | c1 += 4; |
426 | c2 += 4; |
427 | } |
428 | n &= 7; |
429 | #endif |
430 | for (i = 0; i < n; ++i) { |
431 | OPJ_FLOAT32 y = c0[i]; |
432 | OPJ_FLOAT32 u = c1[i]; |
433 | OPJ_FLOAT32 v = c2[i]; |
434 | OPJ_FLOAT32 r = y + (v * 1.402f); |
435 | OPJ_FLOAT32 g = y - (u * 0.34413f) - (v * (0.71414f)); |
436 | OPJ_FLOAT32 b = y + (u * 1.772f); |
437 | c0[i] = r; |
438 | c1[i] = g; |
439 | c2[i] = b; |
440 | } |
441 | } |
442 | |
443 | /* <summary> */ |
444 | /* Get norm of basis function of irreversible MCT. */ |
445 | /* </summary> */ |
446 | OPJ_FLOAT64 opj_mct_getnorm_real(OPJ_UINT32 compno) |
447 | { |
448 | return opj_mct_norms_real[compno]; |
449 | } |
450 | |
451 | |
452 | OPJ_BOOL opj_mct_encode_custom( |
453 | OPJ_BYTE * pCodingdata, |
454 | OPJ_SIZE_T n, |
455 | OPJ_BYTE ** pData, |
456 | OPJ_UINT32 pNbComp, |
457 | OPJ_UINT32 isSigned) |
458 | { |
459 | OPJ_FLOAT32 * lMct = (OPJ_FLOAT32 *) pCodingdata; |
460 | OPJ_SIZE_T i; |
461 | OPJ_UINT32 j; |
462 | OPJ_UINT32 k; |
463 | OPJ_UINT32 lNbMatCoeff = pNbComp * pNbComp; |
464 | OPJ_INT32 * lCurrentData = 00; |
465 | OPJ_INT32 * lCurrentMatrix = 00; |
466 | OPJ_INT32 ** lData = (OPJ_INT32 **) pData; |
467 | OPJ_UINT32 lMultiplicator = 1 << 13; |
468 | OPJ_INT32 * lMctPtr; |
469 | |
470 | OPJ_ARG_NOT_USED(isSigned); |
471 | |
472 | lCurrentData = (OPJ_INT32 *) opj_malloc((pNbComp + lNbMatCoeff) * sizeof( |
473 | OPJ_INT32)); |
474 | if (! lCurrentData) { |
475 | return OPJ_FALSE; |
476 | } |
477 | |
478 | lCurrentMatrix = lCurrentData + pNbComp; |
479 | |
480 | for (i = 0; i < lNbMatCoeff; ++i) { |
481 | lCurrentMatrix[i] = (OPJ_INT32)(*(lMct++) * (OPJ_FLOAT32)lMultiplicator); |
482 | } |
483 | |
484 | for (i = 0; i < n; ++i) { |
485 | lMctPtr = lCurrentMatrix; |
486 | for (j = 0; j < pNbComp; ++j) { |
487 | lCurrentData[j] = (*(lData[j])); |
488 | } |
489 | |
490 | for (j = 0; j < pNbComp; ++j) { |
491 | *(lData[j]) = 0; |
492 | for (k = 0; k < pNbComp; ++k) { |
493 | *(lData[j]) += opj_int_fix_mul(*lMctPtr, lCurrentData[k]); |
494 | ++lMctPtr; |
495 | } |
496 | |
497 | ++lData[j]; |
498 | } |
499 | } |
500 | |
501 | opj_free(lCurrentData); |
502 | |
503 | return OPJ_TRUE; |
504 | } |
505 | |
506 | OPJ_BOOL opj_mct_decode_custom( |
507 | OPJ_BYTE * pDecodingData, |
508 | OPJ_SIZE_T n, |
509 | OPJ_BYTE ** pData, |
510 | OPJ_UINT32 pNbComp, |
511 | OPJ_UINT32 isSigned) |
512 | { |
513 | OPJ_FLOAT32 * lMct; |
514 | OPJ_SIZE_T i; |
515 | OPJ_UINT32 j; |
516 | OPJ_UINT32 k; |
517 | |
518 | OPJ_FLOAT32 * lCurrentData = 00; |
519 | OPJ_FLOAT32 * lCurrentResult = 00; |
520 | OPJ_FLOAT32 ** lData = (OPJ_FLOAT32 **) pData; |
521 | |
522 | OPJ_ARG_NOT_USED(isSigned); |
523 | |
524 | lCurrentData = (OPJ_FLOAT32 *) opj_malloc(2 * pNbComp * sizeof(OPJ_FLOAT32)); |
525 | if (! lCurrentData) { |
526 | return OPJ_FALSE; |
527 | } |
528 | lCurrentResult = lCurrentData + pNbComp; |
529 | |
530 | for (i = 0; i < n; ++i) { |
531 | lMct = (OPJ_FLOAT32 *) pDecodingData; |
532 | for (j = 0; j < pNbComp; ++j) { |
533 | lCurrentData[j] = (OPJ_FLOAT32)(*(lData[j])); |
534 | } |
535 | for (j = 0; j < pNbComp; ++j) { |
536 | lCurrentResult[j] = 0; |
537 | for (k = 0; k < pNbComp; ++k) { |
538 | lCurrentResult[j] += *(lMct++) * lCurrentData[k]; |
539 | } |
540 | *(lData[j]++) = (OPJ_FLOAT32)(lCurrentResult[j]); |
541 | } |
542 | } |
543 | opj_free(lCurrentData); |
544 | return OPJ_TRUE; |
545 | } |
546 | |
547 | void opj_calculate_norms(OPJ_FLOAT64 * pNorms, |
548 | OPJ_UINT32 pNbComps, |
549 | OPJ_FLOAT32 * pMatrix) |
550 | { |
551 | OPJ_UINT32 i, j, lIndex; |
552 | OPJ_FLOAT32 lCurrentValue; |
553 | OPJ_FLOAT64 * lNorms = (OPJ_FLOAT64 *) pNorms; |
554 | OPJ_FLOAT32 * lMatrix = (OPJ_FLOAT32 *) pMatrix; |
555 | |
556 | for (i = 0; i < pNbComps; ++i) { |
557 | lNorms[i] = 0; |
558 | lIndex = i; |
559 | |
560 | for (j = 0; j < pNbComps; ++j) { |
561 | lCurrentValue = lMatrix[lIndex]; |
562 | lIndex += pNbComps; |
563 | lNorms[i] += lCurrentValue * lCurrentValue; |
564 | } |
565 | lNorms[i] = sqrt(lNorms[i]); |
566 | } |
567 | } |
568 | |