1 | /* |
2 | * Copyright (c) 2004, 2014, Oracle and/or its affiliates. All rights reserved. |
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 | * |
5 | * This code is free software; you can redistribute it and/or modify it |
6 | * under the terms of the GNU General Public License version 2 only, as |
7 | * published by the Free Software Foundation. Oracle designates this |
8 | * particular file as subject to the "Classpath" exception as provided |
9 | * by Oracle in the LICENSE file that accompanied this code. |
10 | * |
11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
14 | * version 2 for more details (a copy is included in the LICENSE file that |
15 | * accompanied this code). |
16 | * |
17 | * You should have received a copy of the GNU General Public License version |
18 | * 2 along with this work; if not, write to the Free Software Foundation, |
19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
20 | * |
21 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
22 | * or visit www.oracle.com if you need additional information or have any |
23 | * questions. |
24 | */ |
25 | |
26 | #include <stdlib.h> |
27 | #include "jni_util.h" |
28 | #include "math.h" |
29 | |
30 | #include "GraphicsPrimitiveMgr.h" |
31 | #include "Region.h" |
32 | |
33 | #include "sun_java2d_loops_TransformHelper.h" |
34 | #include "java_awt_image_AffineTransformOp.h" |
35 | |
36 | /* |
37 | * The stub functions replace the bilinear and bicubic interpolation |
38 | * functions with NOP versions so that the performance of the helper |
39 | * functions that fetch the data can be more directly tested. They |
40 | * are not compiled or enabled by default. Change the following |
41 | * #undef to a #define to build the stub functions. |
42 | * |
43 | * When compiled, they are enabled by the environment variable TXSTUB. |
44 | * When compiled, there is also code to disable the VIS versions and |
45 | * use the C versions in this file in their place by defining the TXNOVIS |
46 | * environment variable. |
47 | */ |
48 | #undef MAKE_STUBS |
49 | |
50 | /* The number of IntArgbPre samples to store in the temporary buffer. */ |
51 | #define LINE_SIZE 2048 |
52 | |
53 | /* The size of a stack allocated buffer to hold edge coordinates (see below). */ |
54 | #define MAXEDGES 1024 |
55 | |
56 | /* Declare the software interpolation functions. */ |
57 | static TransformInterpFunc BilinearInterp; |
58 | static TransformInterpFunc BicubicInterp; |
59 | |
60 | #ifdef MAKE_STUBS |
61 | /* Optionally Declare the stub interpolation functions. */ |
62 | static TransformInterpFunc BilinearInterpStub; |
63 | static TransformInterpFunc BicubicInterpStub; |
64 | #endif /* MAKE_STUBS */ |
65 | |
66 | /* |
67 | * Initially choose the software interpolation functions. |
68 | * These choices can be overridden by platform code that runs during the |
69 | * primitive registration phase of initialization by storing pointers to |
70 | * better functions in these pointers. |
71 | * Compiling the stubs also turns on code below that can re-install the |
72 | * software functions or stub functions on the first call to this primitive. |
73 | */ |
74 | TransformInterpFunc *pBilinearFunc = BilinearInterp; |
75 | TransformInterpFunc *pBicubicFunc = BicubicInterp; |
76 | |
77 | /* |
78 | * The dxydxy parameters of the inverse transform determine how |
79 | * quickly we step through the source image. For tiny scale |
80 | * factors (on the order of 1E-16 or so) the stepping distances |
81 | * are huge. The image has been scaled so small that stepping |
82 | * a single pixel in device space moves the sampling point by |
83 | * billions (or more) pixels in the source image space. These |
84 | * huge stepping values can overflow the whole part of the longs |
85 | * we use for the fixed point stepping equations and so we need |
86 | * a more robust solution. We could simply iterate over every |
87 | * device pixel, use the inverse transform to transform it back |
88 | * into the source image coordinate system and then test it for |
89 | * being in range and sample pixel-by-pixel, but that is quite |
90 | * a bit more expensive. Fortunately, if the scale factors are |
91 | * so tiny that we overflow our long values then the number of |
92 | * pixels we are planning to visit should be very tiny. The only |
93 | * exception to that rule is if the scale factor along one |
94 | * dimension is tiny (creating the huge stepping values), and |
95 | * the scale factor along the other dimension is fairly regular |
96 | * or an up-scale. In that case we have a lot of pixels along |
97 | * the direction of the larger axis to sample, but few along the |
98 | * smaller axis. Though, pessimally, with an added shear factor |
99 | * such a linearly tiny image could have bounds that cover a large |
100 | * number of pixels. Such odd transformations should be very |
101 | * rare and the absolute limit on calculations would involve a |
102 | * single reverse transform of every pixel in the output image |
103 | * which is not fast, but it should not cause an undue stall |
104 | * of the rendering software. |
105 | * |
106 | * The specific test we will use is to calculate the inverse |
107 | * transformed values of every corner of the destination bounds |
108 | * (in order to be user-clip independent) and if we can |
109 | * perform a fixed-point-long inverse transform of all of |
110 | * those points without overflowing we will use the fast |
111 | * fixed point algorithm. Otherwise we will use the safe |
112 | * per-pixel transform algorithm. |
113 | * The 4 corners are 0,0, 0,dsth, dstw,0, dstw,dsth |
114 | * Transformed they are: |
115 | * tx, ty |
116 | * tx +dxdy*H, ty +dydy*H |
117 | * tx+dxdx*W, ty+dydx*W |
118 | * tx+dxdx*W+dxdy*H, ty+dydx*W+dydy*H |
119 | */ |
120 | /* We reject coordinates not less than 1<<30 so that the distance between */ |
121 | /* any 2 of them is less than 1<<31 which would overflow into the sign */ |
122 | /* bit of a signed long value used to represent fixed point coordinates. */ |
123 | #define TX_FIXED_UNSAFE(v) (fabs(v) >= (1<<30)) |
124 | static jboolean |
125 | checkOverflow(jint dxoff, jint dyoff, |
126 | SurfaceDataBounds *pBounds, |
127 | TransformInfo *pItxInfo, |
128 | jdouble *retx, jdouble *rety) |
129 | { |
130 | jdouble x, y; |
131 | |
132 | x = dxoff+pBounds->x1+0.5; /* Center of pixel x1 */ |
133 | y = dyoff+pBounds->y1+0.5; /* Center of pixel y1 */ |
134 | Transform_transform(pItxInfo, &x, &y); |
135 | *retx = x; |
136 | *rety = y; |
137 | if (TX_FIXED_UNSAFE(x) || TX_FIXED_UNSAFE(y)) { |
138 | return JNI_TRUE; |
139 | } |
140 | |
141 | x = dxoff+pBounds->x2-0.5; /* Center of pixel x2-1 */ |
142 | y = dyoff+pBounds->y1+0.5; /* Center of pixel y1 */ |
143 | Transform_transform(pItxInfo, &x, &y); |
144 | if (TX_FIXED_UNSAFE(x) || TX_FIXED_UNSAFE(y)) { |
145 | return JNI_TRUE; |
146 | } |
147 | |
148 | x = dxoff+pBounds->x1+0.5; /* Center of pixel x1 */ |
149 | y = dyoff+pBounds->y2-0.5; /* Center of pixel y2-1 */ |
150 | Transform_transform(pItxInfo, &x, &y); |
151 | if (TX_FIXED_UNSAFE(x) || TX_FIXED_UNSAFE(y)) { |
152 | return JNI_TRUE; |
153 | } |
154 | |
155 | x = dxoff+pBounds->x2-0.5; /* Center of pixel x2-1 */ |
156 | y = dyoff+pBounds->y2-0.5; /* Center of pixel y2-1 */ |
157 | Transform_transform(pItxInfo, &x, &y); |
158 | if (TX_FIXED_UNSAFE(x) || TX_FIXED_UNSAFE(y)) { |
159 | return JNI_TRUE; |
160 | } |
161 | |
162 | return JNI_FALSE; |
163 | } |
164 | |
165 | /* |
166 | * Fill the edge buffer with pairs of coordinates representing the maximum |
167 | * left and right pixels of the destination surface that should be processed |
168 | * on each scanline, clipped to the bounds parameter. |
169 | * The number of scanlines to calculate is implied by the bounds parameter. |
170 | * Only pixels that map back through the specified (inverse) transform to a |
171 | * source coordinate that falls within the (0, 0, sw, sh) bounds of the |
172 | * source image should be processed. |
173 | * pEdges points to an array of jints that holds 2 + numedges*2 values where |
174 | * numedges should match (pBounds->y2 - pBounds->y1). |
175 | * The first two jints in pEdges should be set to y1 and y2 and every pair |
176 | * of jints after that represent the xmin,xmax of all pixels in range of |
177 | * the transformed blit for the corresponding scanline. |
178 | */ |
179 | static void |
180 | calculateEdges(jint *pEdges, |
181 | SurfaceDataBounds *pBounds, |
182 | TransformInfo *pItxInfo, |
183 | jlong xbase, jlong ybase, |
184 | juint sw, juint sh) |
185 | { |
186 | jlong dxdxlong, dydxlong; |
187 | jlong dxdylong, dydylong; |
188 | jlong drowxlong, drowylong; |
189 | jint dx1, dy1, dx2, dy2; |
190 | |
191 | dxdxlong = DblToLong(pItxInfo->dxdx); |
192 | dydxlong = DblToLong(pItxInfo->dydx); |
193 | dxdylong = DblToLong(pItxInfo->dxdy); |
194 | dydylong = DblToLong(pItxInfo->dydy); |
195 | |
196 | dx1 = pBounds->x1; |
197 | dy1 = pBounds->y1; |
198 | dx2 = pBounds->x2; |
199 | dy2 = pBounds->y2; |
200 | *pEdges++ = dy1; |
201 | *pEdges++ = dy2; |
202 | |
203 | drowxlong = (dx2-dx1-1) * dxdxlong; |
204 | drowylong = (dx2-dx1-1) * dydxlong; |
205 | |
206 | while (dy1 < dy2) { |
207 | jlong xlong, ylong; |
208 | |
209 | dx1 = pBounds->x1; |
210 | dx2 = pBounds->x2; |
211 | |
212 | xlong = xbase; |
213 | ylong = ybase; |
214 | while (dx1 < dx2 && |
215 | (((juint) WholeOfLong(ylong)) >= sh || |
216 | ((juint) WholeOfLong(xlong)) >= sw)) |
217 | { |
218 | dx1++; |
219 | xlong += dxdxlong; |
220 | ylong += dydxlong; |
221 | } |
222 | |
223 | xlong = xbase + drowxlong; |
224 | ylong = ybase + drowylong; |
225 | while (dx2 > dx1 && |
226 | (((juint) WholeOfLong(ylong)) >= sh || |
227 | ((juint) WholeOfLong(xlong)) >= sw)) |
228 | { |
229 | dx2--; |
230 | xlong -= dxdxlong; |
231 | ylong -= dydxlong; |
232 | } |
233 | |
234 | *pEdges++ = dx1; |
235 | *pEdges++ = dx2; |
236 | |
237 | /* Increment to next scanline */ |
238 | xbase += dxdylong; |
239 | ybase += dydylong; |
240 | dy1++; |
241 | } |
242 | } |
243 | |
244 | static void |
245 | Transform_SafeHelper(JNIEnv *env, |
246 | SurfaceDataOps *srcOps, |
247 | SurfaceDataOps *dstOps, |
248 | SurfaceDataRasInfo *pSrcInfo, |
249 | SurfaceDataRasInfo *pDstInfo, |
250 | NativePrimitive *pMaskBlitPrim, |
251 | CompositeInfo *pCompInfo, |
252 | TransformHelperFunc *pHelperFunc, |
253 | TransformInterpFunc *pInterpFunc, |
254 | RegionData *pClipInfo, TransformInfo *pItxInfo, |
255 | jint *pData, jint *pEdges, |
256 | jint dxoff, jint dyoff, jint sw, jint sh); |
257 | |
258 | /* |
259 | * Class: sun_java2d_loops_TransformHelper |
260 | * Method: Transform |
261 | * Signature: (Lsun/java2d/loops/MaskBlit;Lsun/java2d/SurfaceData;Lsun/java2d/SurfaceData;Ljava/awt/Composite;Lsun/java2d/pipe/Region;Ljava/awt/geom/AffineTransform;IIIIIIIII[I)V |
262 | */ |
263 | JNIEXPORT void JNICALL |
264 | Java_sun_java2d_loops_TransformHelper_Transform |
265 | (JNIEnv *env, jobject self, |
266 | jobject maskblit, |
267 | jobject srcData, jobject dstData, |
268 | jobject comp, jobject clip, |
269 | jobject itxform, jint txtype, |
270 | jint sx1, jint sy1, jint sx2, jint sy2, |
271 | jint dx1, jint dy1, jint dx2, jint dy2, |
272 | jintArray edgeArray, jint dxoff, jint dyoff) |
273 | { |
274 | SurfaceDataOps *srcOps; |
275 | SurfaceDataOps *dstOps; |
276 | SurfaceDataRasInfo srcInfo; |
277 | SurfaceDataRasInfo dstInfo; |
278 | NativePrimitive *pHelperPrim; |
279 | NativePrimitive *pMaskBlitPrim; |
280 | CompositeInfo compInfo; |
281 | RegionData clipInfo; |
282 | TransformInfo itxInfo; |
283 | jint maxlinepix; |
284 | TransformHelperFunc *pHelperFunc; |
285 | TransformInterpFunc *pInterpFunc; |
286 | jdouble xorig, yorig; |
287 | jlong numedges; |
288 | jint *pEdges; |
289 | jint edgebuf[2 + MAXEDGES * 2]; |
290 | union { |
291 | jlong align; |
292 | jint data[LINE_SIZE]; |
293 | } rgb; |
294 | |
295 | #ifdef MAKE_STUBS |
296 | static int th_initialized; |
297 | |
298 | /* For debugging only - used to swap in alternate funcs for perf testing */ |
299 | if (!th_initialized) { |
300 | if (getenv("TXSTUB" ) != 0) { |
301 | pBilinearFunc = BilinearInterpStub; |
302 | pBicubicFunc = BicubicInterpStub; |
303 | } else if (getenv("TXNOVIS" ) != 0) { |
304 | pBilinearFunc = BilinearInterp; |
305 | pBicubicFunc = BicubicInterp; |
306 | } |
307 | th_initialized = 1; |
308 | } |
309 | #endif /* MAKE_STUBS */ |
310 | |
311 | pHelperPrim = GetNativePrim(env, self); |
312 | if (pHelperPrim == NULL) { |
313 | /* Should never happen... */ |
314 | return; |
315 | } |
316 | pMaskBlitPrim = GetNativePrim(env, maskblit); |
317 | if (pMaskBlitPrim == NULL) { |
318 | /* Exception was thrown by GetNativePrim */ |
319 | return; |
320 | } |
321 | if (pMaskBlitPrim->pCompType->getCompInfo != NULL) { |
322 | (*pMaskBlitPrim->pCompType->getCompInfo)(env, &compInfo, comp); |
323 | } |
324 | if (Region_GetInfo(env, clip, &clipInfo)) { |
325 | return; |
326 | } |
327 | |
328 | srcOps = SurfaceData_GetOps(env, srcData); |
329 | if (srcOps == 0) { |
330 | return; |
331 | } |
332 | dstOps = SurfaceData_GetOps(env, dstData); |
333 | if (dstOps == 0) { |
334 | return; |
335 | } |
336 | |
337 | /* |
338 | * Grab the appropriate pointer to the helper and interpolation |
339 | * routines and calculate the maximum number of destination pixels |
340 | * that can be processed in one intermediate buffer based on the |
341 | * size of the buffer and the number of samples needed per pixel. |
342 | */ |
343 | switch (txtype) { |
344 | case java_awt_image_AffineTransformOp_TYPE_NEAREST_NEIGHBOR: |
345 | pHelperFunc = pHelperPrim->funcs.transformhelpers->nnHelper; |
346 | pInterpFunc = NULL; |
347 | maxlinepix = LINE_SIZE; |
348 | break; |
349 | case java_awt_image_AffineTransformOp_TYPE_BILINEAR: |
350 | pHelperFunc = pHelperPrim->funcs.transformhelpers->blHelper; |
351 | pInterpFunc = pBilinearFunc; |
352 | maxlinepix = LINE_SIZE / 4; |
353 | break; |
354 | case java_awt_image_AffineTransformOp_TYPE_BICUBIC: |
355 | pHelperFunc = pHelperPrim->funcs.transformhelpers->bcHelper; |
356 | pInterpFunc = pBicubicFunc; |
357 | maxlinepix = LINE_SIZE / 16; |
358 | break; |
359 | default: |
360 | // Should not happen, but just in case. |
361 | return; |
362 | } |
363 | |
364 | srcInfo.bounds.x1 = sx1; |
365 | srcInfo.bounds.y1 = sy1; |
366 | srcInfo.bounds.x2 = sx2; |
367 | srcInfo.bounds.y2 = sy2; |
368 | dstInfo.bounds.x1 = dx1; |
369 | dstInfo.bounds.y1 = dy1; |
370 | dstInfo.bounds.x2 = dx2; |
371 | dstInfo.bounds.y2 = dy2; |
372 | SurfaceData_IntersectBounds(&dstInfo.bounds, &clipInfo.bounds); |
373 | if (srcOps->Lock(env, srcOps, &srcInfo, pHelperPrim->srcflags) |
374 | != SD_SUCCESS) |
375 | { |
376 | /* edgeArray should already contain zeros for min/maxy */ |
377 | return; |
378 | } |
379 | if (dstOps->Lock(env, dstOps, &dstInfo, pMaskBlitPrim->dstflags) |
380 | != SD_SUCCESS) |
381 | { |
382 | SurfaceData_InvokeUnlock(env, srcOps, &srcInfo); |
383 | /* edgeArray should already contain zeros for min/maxy */ |
384 | return; |
385 | } |
386 | Region_IntersectBounds(&clipInfo, &dstInfo.bounds); |
387 | Transform_GetInfo(env, itxform, &itxInfo); |
388 | |
389 | numedges = (((jlong) dstInfo.bounds.y2) - ((jlong) dstInfo.bounds.y1)); |
390 | if (numedges <= 0) { |
391 | pEdges = NULL; |
392 | } else if (!JNU_IsNull(env, edgeArray)) { |
393 | /* |
394 | * Ideally Java should allocate an array large enough, but if |
395 | * we ever have a miscommunication about the number of edge |
396 | * lines, or if the Java array calculation should overflow to |
397 | * a positive number and succeed in allocating an array that |
398 | * is too small, we need to verify that it can still hold the |
399 | * number of integers that we plan to store to be safe. |
400 | */ |
401 | jsize edgesize = (*env)->GetArrayLength(env, edgeArray); |
402 | /* (edgesize/2 - 1) should avoid any overflow or underflow. */ |
403 | pEdges = (((edgesize / 2) - 1) >= numedges) |
404 | ? (*env)->GetPrimitiveArrayCritical(env, edgeArray, NULL) |
405 | : NULL; |
406 | } else if (numedges > MAXEDGES) { |
407 | /* numedges variable (jlong) can be at most ((1<<32)-1) */ |
408 | /* memsize can overflow a jint, but not a jlong */ |
409 | jlong memsize = ((numedges * 2) + 2) * sizeof(*pEdges); |
410 | pEdges = (memsize == ((size_t) memsize)) |
411 | ? malloc((size_t) memsize) |
412 | : NULL; |
413 | } else { |
414 | pEdges = edgebuf; |
415 | } |
416 | |
417 | if (pEdges == NULL) { |
418 | if (!(*env)->ExceptionCheck(env) && numedges > 0) { |
419 | JNU_ThrowInternalError(env, "Unable to allocate edge list" ); |
420 | } |
421 | SurfaceData_InvokeUnlock(env, dstOps, &dstInfo); |
422 | SurfaceData_InvokeUnlock(env, srcOps, &srcInfo); |
423 | /* edgeArray should already contain zeros for min/maxy */ |
424 | return; |
425 | } |
426 | |
427 | |
428 | if (!Region_IsEmpty(&clipInfo)) { |
429 | srcOps->GetRasInfo(env, srcOps, &srcInfo); |
430 | dstOps->GetRasInfo(env, dstOps, &dstInfo); |
431 | if (srcInfo.rasBase == NULL || dstInfo.rasBase == NULL) { |
432 | pEdges[0] = pEdges[1] = 0; |
433 | } else if (checkOverflow(dxoff, dyoff, &dstInfo.bounds, |
434 | &itxInfo, &xorig, &yorig)) |
435 | { |
436 | Transform_SafeHelper(env, srcOps, dstOps, |
437 | &srcInfo, &dstInfo, |
438 | pMaskBlitPrim, &compInfo, |
439 | pHelperFunc, pInterpFunc, |
440 | &clipInfo, &itxInfo, rgb.data, pEdges, |
441 | dxoff, dyoff, sx2-sx1, sy2-sy1); |
442 | } else { |
443 | SurfaceDataBounds span; |
444 | jlong dxdxlong, dydxlong; |
445 | jlong dxdylong, dydylong; |
446 | jlong xbase, ybase; |
447 | |
448 | dxdxlong = DblToLong(itxInfo.dxdx); |
449 | dydxlong = DblToLong(itxInfo.dydx); |
450 | dxdylong = DblToLong(itxInfo.dxdy); |
451 | dydylong = DblToLong(itxInfo.dydy); |
452 | xbase = DblToLong(xorig); |
453 | ybase = DblToLong(yorig); |
454 | |
455 | calculateEdges(pEdges, &dstInfo.bounds, &itxInfo, |
456 | xbase, ybase, sx2-sx1, sy2-sy1); |
457 | |
458 | Region_StartIteration(env, &clipInfo); |
459 | while (Region_NextIteration(&clipInfo, &span)) { |
460 | jlong rowxlong, rowylong; |
461 | void *pDst; |
462 | |
463 | dy1 = span.y1; |
464 | dy2 = span.y2; |
465 | rowxlong = xbase + (dy1 - dstInfo.bounds.y1) * dxdylong; |
466 | rowylong = ybase + (dy1 - dstInfo.bounds.y1) * dydylong; |
467 | |
468 | while (dy1 < dy2) { |
469 | jlong xlong, ylong; |
470 | |
471 | /* Note - process at most one scanline at a time. */ |
472 | |
473 | dx1 = pEdges[(dy1 - dstInfo.bounds.y1) * 2 + 2]; |
474 | dx2 = pEdges[(dy1 - dstInfo.bounds.y1) * 2 + 3]; |
475 | if (dx1 < span.x1) dx1 = span.x1; |
476 | if (dx2 > span.x2) dx2 = span.x2; |
477 | |
478 | /* All pixels from dx1 to dx2 have centers in bounds */ |
479 | while (dx1 < dx2) { |
480 | /* Can process at most one buffer full at a time */ |
481 | jint numpix = dx2 - dx1; |
482 | if (numpix > maxlinepix) { |
483 | numpix = maxlinepix; |
484 | } |
485 | |
486 | xlong = |
487 | rowxlong + ((dx1 - dstInfo.bounds.x1) * dxdxlong); |
488 | ylong = |
489 | rowylong + ((dx1 - dstInfo.bounds.x1) * dydxlong); |
490 | |
491 | /* Get IntArgbPre pixel data from source */ |
492 | (*pHelperFunc)(&srcInfo, |
493 | rgb.data, numpix, |
494 | xlong, dxdxlong, |
495 | ylong, dydxlong); |
496 | |
497 | /* Interpolate result pixels if needed */ |
498 | if (pInterpFunc) { |
499 | (*pInterpFunc)(rgb.data, numpix, |
500 | FractOfLong(xlong-LongOneHalf), |
501 | FractOfLong(dxdxlong), |
502 | FractOfLong(ylong-LongOneHalf), |
503 | FractOfLong(dydxlong)); |
504 | } |
505 | |
506 | /* Store/Composite interpolated pixels into dest */ |
507 | pDst = PtrCoord(dstInfo.rasBase, |
508 | dx1, dstInfo.pixelStride, |
509 | dy1, dstInfo.scanStride); |
510 | (*pMaskBlitPrim->funcs.maskblit)(pDst, rgb.data, |
511 | 0, 0, 0, |
512 | numpix, 1, |
513 | &dstInfo, &srcInfo, |
514 | pMaskBlitPrim, |
515 | &compInfo); |
516 | |
517 | /* Increment to next buffer worth of input pixels */ |
518 | dx1 += maxlinepix; |
519 | } |
520 | |
521 | /* Increment to next scanline */ |
522 | rowxlong += dxdylong; |
523 | rowylong += dydylong; |
524 | dy1++; |
525 | } |
526 | } |
527 | Region_EndIteration(env, &clipInfo); |
528 | } |
529 | SurfaceData_InvokeRelease(env, dstOps, &dstInfo); |
530 | SurfaceData_InvokeRelease(env, srcOps, &srcInfo); |
531 | } else { |
532 | pEdges[0] = pEdges[1] = 0; |
533 | } |
534 | |
535 | if (!JNU_IsNull(env, edgeArray)) { |
536 | (*env)->ReleasePrimitiveArrayCritical(env, edgeArray, pEdges, 0); |
537 | } else if (pEdges != edgebuf) { |
538 | free(pEdges); |
539 | } |
540 | SurfaceData_InvokeUnlock(env, dstOps, &dstInfo); |
541 | SurfaceData_InvokeUnlock(env, srcOps, &srcInfo); |
542 | } |
543 | |
544 | static void |
545 | Transform_SafeHelper(JNIEnv *env, |
546 | SurfaceDataOps *srcOps, |
547 | SurfaceDataOps *dstOps, |
548 | SurfaceDataRasInfo *pSrcInfo, |
549 | SurfaceDataRasInfo *pDstInfo, |
550 | NativePrimitive *pMaskBlitPrim, |
551 | CompositeInfo *pCompInfo, |
552 | TransformHelperFunc *pHelperFunc, |
553 | TransformInterpFunc *pInterpFunc, |
554 | RegionData *pClipInfo, TransformInfo *pItxInfo, |
555 | jint *pData, jint *pEdges, |
556 | jint dxoff, jint dyoff, jint sw, jint sh) |
557 | { |
558 | SurfaceDataBounds span; |
559 | jint dx1, dx2; |
560 | jint dy1, dy2; |
561 | jint i, iy; |
562 | |
563 | dy1 = pDstInfo->bounds.y1; |
564 | dy2 = pDstInfo->bounds.y2; |
565 | dx1 = pDstInfo->bounds.x1; |
566 | dx2 = pDstInfo->bounds.x2; |
567 | pEdges[0] = dy1; |
568 | pEdges[1] = dy2; |
569 | for (iy = dy1; iy < dy2; iy++) { |
570 | jint i = (iy - dy1) * 2; |
571 | /* row spans are set to max,min until we find a pixel in range below */ |
572 | pEdges[i + 2] = dx2; |
573 | pEdges[i + 3] = dx1; |
574 | } |
575 | |
576 | Region_StartIteration(env, pClipInfo); |
577 | while (Region_NextIteration(pClipInfo, &span)) { |
578 | dy1 = span.y1; |
579 | dy2 = span.y2; |
580 | while (dy1 < dy2) { |
581 | dx1 = span.x1; |
582 | dx2 = span.x2; |
583 | i = (dy1 - pDstInfo->bounds.y1) * 2; |
584 | while (dx1 < dx2) { |
585 | jdouble x, y; |
586 | jlong xlong, ylong; |
587 | |
588 | x = dxoff + dx1 + 0.5; |
589 | y = dyoff + dy1 + 0.5; |
590 | Transform_transform(pItxInfo, &x, &y); |
591 | xlong = DblToLong(x); |
592 | ylong = DblToLong(y); |
593 | |
594 | /* Process only pixels with centers in bounds |
595 | * Test double values to avoid overflow in conversion |
596 | * to long values and then also test the long values |
597 | * in case they rounded up and out of bounds during |
598 | * the conversion. |
599 | */ |
600 | if (x >= 0 && y >= 0 && x < sw && y < sh && |
601 | WholeOfLong(xlong) < sw && |
602 | WholeOfLong(ylong) < sh) |
603 | { |
604 | void *pDst; |
605 | |
606 | if (pEdges[i + 2] > dx1) { |
607 | pEdges[i + 2] = dx1; |
608 | } |
609 | if (pEdges[i + 3] <= dx1) { |
610 | pEdges[i + 3] = dx1 + 1; |
611 | } |
612 | |
613 | /* Get IntArgbPre pixel data from source */ |
614 | (*pHelperFunc)(pSrcInfo, |
615 | pData, 1, |
616 | xlong, 0, |
617 | ylong, 0); |
618 | |
619 | /* Interpolate result pixels if needed */ |
620 | if (pInterpFunc) { |
621 | (*pInterpFunc)(pData, 1, |
622 | FractOfLong(xlong-LongOneHalf), 0, |
623 | FractOfLong(ylong-LongOneHalf), 0); |
624 | } |
625 | |
626 | /* Store/Composite interpolated pixels into dest */ |
627 | pDst = PtrCoord(pDstInfo->rasBase, |
628 | dx1, pDstInfo->pixelStride, |
629 | dy1, pDstInfo->scanStride); |
630 | (*pMaskBlitPrim->funcs.maskblit)(pDst, pData, |
631 | 0, 0, 0, |
632 | 1, 1, |
633 | pDstInfo, pSrcInfo, |
634 | pMaskBlitPrim, |
635 | pCompInfo); |
636 | } |
637 | |
638 | /* Increment to next input pixel */ |
639 | dx1++; |
640 | } |
641 | |
642 | /* Increment to next scanline */ |
643 | dy1++; |
644 | } |
645 | } |
646 | Region_EndIteration(env, pClipInfo); |
647 | } |
648 | |
649 | #define BL_INTERP_V1_to_V2_by_F(v1, v2, f) \ |
650 | (((v1)<<8) + ((v2)-(v1))*(f)) |
651 | |
652 | #define BL_ACCUM(comp) \ |
653 | do { \ |
654 | jint c1 = ((jubyte *) pRGB)[comp]; \ |
655 | jint c2 = ((jubyte *) pRGB)[comp+4]; \ |
656 | jint cR = BL_INTERP_V1_to_V2_by_F(c1, c2, xfactor); \ |
657 | c1 = ((jubyte *) pRGB)[comp+8]; \ |
658 | c2 = ((jubyte *) pRGB)[comp+12]; \ |
659 | c2 = BL_INTERP_V1_to_V2_by_F(c1, c2, xfactor); \ |
660 | cR = BL_INTERP_V1_to_V2_by_F(cR, c2, yfactor); \ |
661 | ((jubyte *)pRes)[comp] = (jubyte) ((cR + (1<<15)) >> 16); \ |
662 | } while (0) |
663 | |
664 | static void |
665 | BilinearInterp(jint *pRGB, jint numpix, |
666 | jint xfract, jint dxfract, |
667 | jint yfract, jint dyfract) |
668 | { |
669 | jint j; |
670 | jint *pRes = pRGB; |
671 | |
672 | for (j = 0; j < numpix; j++) { |
673 | jint xfactor; |
674 | jint yfactor; |
675 | xfactor = URShift(xfract, 32-8); |
676 | yfactor = URShift(yfract, 32-8); |
677 | BL_ACCUM(0); |
678 | BL_ACCUM(1); |
679 | BL_ACCUM(2); |
680 | BL_ACCUM(3); |
681 | pRes++; |
682 | pRGB += 4; |
683 | xfract += dxfract; |
684 | yfract += dyfract; |
685 | } |
686 | } |
687 | |
688 | #define SAT(val, max) \ |
689 | do { \ |
690 | val &= ~(val >> 31); /* negatives become 0 */ \ |
691 | val -= max; /* only overflows are now positive */ \ |
692 | val &= (val >> 31); /* positives become 0 */ \ |
693 | val += max; /* range is now [0 -> max] */ \ |
694 | } while (0) |
695 | |
696 | #ifdef __sparc |
697 | /* For sparc, floating point multiplies are faster than integer */ |
698 | #define BICUBIC_USE_DBL_LUT |
699 | #else |
700 | /* For x86, integer multiplies are faster than floating point */ |
701 | /* Note that on x86 Linux the choice of best algorithm varies |
702 | * depending on the compiler optimization and the processor type. |
703 | * Currently, the sun/awt x86 Linux builds are not optimized so |
704 | * all the variations produce mediocre performance. |
705 | * For now we will use the choice that works best for the Windows |
706 | * build until the (lack of) optimization issues on Linux are resolved. |
707 | */ |
708 | #define BICUBIC_USE_INT_MATH |
709 | #endif |
710 | |
711 | #ifdef BICUBIC_USE_DBL_CAST |
712 | |
713 | #define BC_DblToCoeff(v) (v) |
714 | #define BC_COEFF_ONE 1.0 |
715 | #define BC_TYPE jdouble |
716 | #define BC_V_HALF 0.5 |
717 | #define BC_CompToV(v) ((jdouble) (v)) |
718 | #define BC_STORE_COMPS(pRes) \ |
719 | do { \ |
720 | jint a = (jint) accumA; \ |
721 | jint r = (jint) accumR; \ |
722 | jint g = (jint) accumG; \ |
723 | jint b = (jint) accumB; \ |
724 | SAT(a, 255); \ |
725 | SAT(r, a); \ |
726 | SAT(g, a); \ |
727 | SAT(b, a); \ |
728 | *pRes = ((a << 24) | (r << 16) | (g << 8) | (b)); \ |
729 | } while (0) |
730 | |
731 | #endif /* BICUBIC_USE_DBL_CAST */ |
732 | |
733 | #ifdef BICUBIC_USE_DBL_LUT |
734 | |
735 | #define ItoD1(v) ((jdouble) (v)) |
736 | #define ItoD4(v) ItoD1(v), ItoD1(v+1), ItoD1(v+2), ItoD1(v+3) |
737 | #define ItoD16(v) ItoD4(v), ItoD4(v+4), ItoD4(v+8), ItoD4(v+12) |
738 | #define ItoD64(v) ItoD16(v), ItoD16(v+16), ItoD16(v+32), ItoD16(v+48) |
739 | |
740 | static jdouble ItoD_table[] = { |
741 | ItoD64(0), ItoD64(64), ItoD64(128), ItoD64(192) |
742 | }; |
743 | |
744 | #define BC_DblToCoeff(v) (v) |
745 | #define BC_COEFF_ONE 1.0 |
746 | #define BC_TYPE jdouble |
747 | #define BC_V_HALF 0.5 |
748 | #define BC_CompToV(v) ItoD_table[v] |
749 | #define BC_STORE_COMPS(pRes) \ |
750 | do { \ |
751 | jint a = (jint) accumA; \ |
752 | jint r = (jint) accumR; \ |
753 | jint g = (jint) accumG; \ |
754 | jint b = (jint) accumB; \ |
755 | SAT(a, 255); \ |
756 | SAT(r, a); \ |
757 | SAT(g, a); \ |
758 | SAT(b, a); \ |
759 | *pRes = ((a << 24) | (r << 16) | (g << 8) | (b)); \ |
760 | } while (0) |
761 | |
762 | #endif /* BICUBIC_USE_DBL_LUT */ |
763 | |
764 | #ifdef BICUBIC_USE_INT_MATH |
765 | |
766 | #define BC_DblToCoeff(v) ((jint) ((v) * 256)) |
767 | #define BC_COEFF_ONE 256 |
768 | #define BC_TYPE jint |
769 | #define BC_V_HALF (1 << 15) |
770 | #define BC_CompToV(v) ((jint) v) |
771 | #define BC_STORE_COMPS(pRes) \ |
772 | do { \ |
773 | accumA >>= 16; \ |
774 | accumR >>= 16; \ |
775 | accumG >>= 16; \ |
776 | accumB >>= 16; \ |
777 | SAT(accumA, 255); \ |
778 | SAT(accumR, accumA); \ |
779 | SAT(accumG, accumA); \ |
780 | SAT(accumB, accumA); \ |
781 | *pRes = ((accumA << 24) | (accumR << 16) | (accumG << 8) | (accumB)); \ |
782 | } while (0) |
783 | |
784 | #endif /* BICUBIC_USE_INT_MATH */ |
785 | |
786 | #define BC_ACCUM(index, ycindex, xcindex) \ |
787 | do { \ |
788 | BC_TYPE factor = bicubic_coeff[xcindex] * bicubic_coeff[ycindex]; \ |
789 | int rgb; \ |
790 | rgb = pRGB[index]; \ |
791 | accumB += BC_CompToV((rgb >> 0) & 0xff) * factor; \ |
792 | accumG += BC_CompToV((rgb >> 8) & 0xff) * factor; \ |
793 | accumR += BC_CompToV((rgb >> 16) & 0xff) * factor; \ |
794 | accumA += BC_CompToV((rgb >> 24) & 0xff) * factor; \ |
795 | } while (0) |
796 | |
797 | static BC_TYPE bicubic_coeff[513]; |
798 | static jboolean bicubictableinited; |
799 | |
800 | static void |
801 | init_bicubic_table(jdouble A) |
802 | { |
803 | /* |
804 | * The following formulas are designed to give smooth |
805 | * results when 'A' is -0.5 or -1.0. |
806 | */ |
807 | int i; |
808 | for (i = 0; i < 256; i++) { |
809 | /* r(x) = (A + 2)|x|^3 - (A + 3)|x|^2 + 1 , 0 <= |x| < 1 */ |
810 | jdouble x = i / 256.0; |
811 | x = ((A+2)*x - (A+3))*x*x + 1; |
812 | bicubic_coeff[i] = BC_DblToCoeff(x); |
813 | } |
814 | |
815 | for (; i < 384; i++) { |
816 | /* r(x) = A|x|^3 - 5A|x|^2 + 8A|x| - 4A , 1 <= |x| < 2 */ |
817 | jdouble x = i / 256.0; |
818 | x = ((A*x - 5*A)*x + 8*A)*x - 4*A; |
819 | bicubic_coeff[i] = BC_DblToCoeff(x); |
820 | } |
821 | |
822 | bicubic_coeff[384] = (BC_COEFF_ONE - bicubic_coeff[128]*2) / 2; |
823 | |
824 | for (i++; i <= 512; i++) { |
825 | bicubic_coeff[i] = BC_COEFF_ONE - (bicubic_coeff[512-i] + |
826 | bicubic_coeff[i-256] + |
827 | bicubic_coeff[768-i]); |
828 | } |
829 | |
830 | bicubictableinited = JNI_TRUE; |
831 | } |
832 | |
833 | static void |
834 | BicubicInterp(jint *pRGB, jint numpix, |
835 | jint xfract, jint dxfract, |
836 | jint yfract, jint dyfract) |
837 | { |
838 | jint i; |
839 | jint *pRes = pRGB; |
840 | |
841 | if (!bicubictableinited) { |
842 | init_bicubic_table(-0.5); |
843 | } |
844 | |
845 | for (i = 0; i < numpix; i++) { |
846 | BC_TYPE accumA, accumR, accumG, accumB; |
847 | jint xfactor, yfactor; |
848 | |
849 | xfactor = URShift(xfract, 32-8); |
850 | yfactor = URShift(yfract, 32-8); |
851 | accumA = accumR = accumG = accumB = BC_V_HALF; |
852 | BC_ACCUM(0, yfactor+256, xfactor+256); |
853 | BC_ACCUM(1, yfactor+256, xfactor+ 0); |
854 | BC_ACCUM(2, yfactor+256, 256-xfactor); |
855 | BC_ACCUM(3, yfactor+256, 512-xfactor); |
856 | BC_ACCUM(4, yfactor+ 0, xfactor+256); |
857 | BC_ACCUM(5, yfactor+ 0, xfactor+ 0); |
858 | BC_ACCUM(6, yfactor+ 0, 256-xfactor); |
859 | BC_ACCUM(7, yfactor+ 0, 512-xfactor); |
860 | BC_ACCUM(8, 256-yfactor, xfactor+256); |
861 | BC_ACCUM(9, 256-yfactor, xfactor+ 0); |
862 | BC_ACCUM(10, 256-yfactor, 256-xfactor); |
863 | BC_ACCUM(11, 256-yfactor, 512-xfactor); |
864 | BC_ACCUM(12, 512-yfactor, xfactor+256); |
865 | BC_ACCUM(13, 512-yfactor, xfactor+ 0); |
866 | BC_ACCUM(14, 512-yfactor, 256-xfactor); |
867 | BC_ACCUM(15, 512-yfactor, 512-xfactor); |
868 | BC_STORE_COMPS(pRes); |
869 | pRes++; |
870 | pRGB += 16; |
871 | xfract += dxfract; |
872 | yfract += dyfract; |
873 | } |
874 | } |
875 | |
876 | #ifdef MAKE_STUBS |
877 | |
878 | static void |
879 | BilinearInterpStub(jint *pRGBbase, jint numpix, |
880 | jint xfract, jint dxfract, |
881 | jint yfract, jint dyfract) |
882 | { |
883 | jint *pRGB = pRGBbase; |
884 | while (--numpix >= 0) { |
885 | *pRGBbase = *pRGB; |
886 | pRGBbase += 1; |
887 | pRGB += 4; |
888 | } |
889 | } |
890 | |
891 | static void |
892 | BicubicInterpStub(jint *pRGBbase, jint numpix, |
893 | jint xfract, jint dxfract, |
894 | jint yfract, jint dyfract) |
895 | { |
896 | jint *pRGB = pRGBbase+5; |
897 | while (--numpix >= 0) { |
898 | *pRGBbase = *pRGB; |
899 | pRGBbase += 1; |
900 | pRGB += 16; |
901 | } |
902 | } |
903 | |
904 | #endif /* MAKE_STUBS */ |
905 | |