1/*
2 * Copyright (c) 2004, 2014, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26#include <stdlib.h>
27#include "jni_util.h"
28#include "math.h"
29
30#include "GraphicsPrimitiveMgr.h"
31#include "Region.h"
32
33#include "sun_java2d_loops_TransformHelper.h"
34#include "java_awt_image_AffineTransformOp.h"
35
36/*
37 * The stub functions replace the bilinear and bicubic interpolation
38 * functions with NOP versions so that the performance of the helper
39 * functions that fetch the data can be more directly tested. They
40 * are not compiled or enabled by default. Change the following
41 * #undef to a #define to build the stub functions.
42 *
43 * When compiled, they are enabled by the environment variable TXSTUB.
44 * When compiled, there is also code to disable the VIS versions and
45 * use the C versions in this file in their place by defining the TXNOVIS
46 * environment variable.
47 */
48#undef MAKE_STUBS
49
50/* The number of IntArgbPre samples to store in the temporary buffer. */
51#define LINE_SIZE 2048
52
53/* The size of a stack allocated buffer to hold edge coordinates (see below). */
54#define MAXEDGES 1024
55
56/* Declare the software interpolation functions. */
57static TransformInterpFunc BilinearInterp;
58static TransformInterpFunc BicubicInterp;
59
60#ifdef MAKE_STUBS
61/* Optionally Declare the stub interpolation functions. */
62static TransformInterpFunc BilinearInterpStub;
63static TransformInterpFunc BicubicInterpStub;
64#endif /* MAKE_STUBS */
65
66/*
67 * Initially choose the software interpolation functions.
68 * These choices can be overridden by platform code that runs during the
69 * primitive registration phase of initialization by storing pointers to
70 * better functions in these pointers.
71 * Compiling the stubs also turns on code below that can re-install the
72 * software functions or stub functions on the first call to this primitive.
73 */
74TransformInterpFunc *pBilinearFunc = BilinearInterp;
75TransformInterpFunc *pBicubicFunc = BicubicInterp;
76
77/*
78 * The dxydxy parameters of the inverse transform determine how
79 * quickly we step through the source image. For tiny scale
80 * factors (on the order of 1E-16 or so) the stepping distances
81 * are huge. The image has been scaled so small that stepping
82 * a single pixel in device space moves the sampling point by
83 * billions (or more) pixels in the source image space. These
84 * huge stepping values can overflow the whole part of the longs
85 * we use for the fixed point stepping equations and so we need
86 * a more robust solution. We could simply iterate over every
87 * device pixel, use the inverse transform to transform it back
88 * into the source image coordinate system and then test it for
89 * being in range and sample pixel-by-pixel, but that is quite
90 * a bit more expensive. Fortunately, if the scale factors are
91 * so tiny that we overflow our long values then the number of
92 * pixels we are planning to visit should be very tiny. The only
93 * exception to that rule is if the scale factor along one
94 * dimension is tiny (creating the huge stepping values), and
95 * the scale factor along the other dimension is fairly regular
96 * or an up-scale. In that case we have a lot of pixels along
97 * the direction of the larger axis to sample, but few along the
98 * smaller axis. Though, pessimally, with an added shear factor
99 * such a linearly tiny image could have bounds that cover a large
100 * number of pixels. Such odd transformations should be very
101 * rare and the absolute limit on calculations would involve a
102 * single reverse transform of every pixel in the output image
103 * which is not fast, but it should not cause an undue stall
104 * of the rendering software.
105 *
106 * The specific test we will use is to calculate the inverse
107 * transformed values of every corner of the destination bounds
108 * (in order to be user-clip independent) and if we can
109 * perform a fixed-point-long inverse transform of all of
110 * those points without overflowing we will use the fast
111 * fixed point algorithm. Otherwise we will use the safe
112 * per-pixel transform algorithm.
113 * The 4 corners are 0,0, 0,dsth, dstw,0, dstw,dsth
114 * Transformed they are:
115 * tx, ty
116 * tx +dxdy*H, ty +dydy*H
117 * tx+dxdx*W, ty+dydx*W
118 * tx+dxdx*W+dxdy*H, ty+dydx*W+dydy*H
119 */
120/* We reject coordinates not less than 1<<30 so that the distance between */
121/* any 2 of them is less than 1<<31 which would overflow into the sign */
122/* bit of a signed long value used to represent fixed point coordinates. */
123#define TX_FIXED_UNSAFE(v) (fabs(v) >= (1<<30))
124static jboolean
125checkOverflow(jint dxoff, jint dyoff,
126 SurfaceDataBounds *pBounds,
127 TransformInfo *pItxInfo,
128 jdouble *retx, jdouble *rety)
129{
130 jdouble x, y;
131
132 x = dxoff+pBounds->x1+0.5; /* Center of pixel x1 */
133 y = dyoff+pBounds->y1+0.5; /* Center of pixel y1 */
134 Transform_transform(pItxInfo, &x, &y);
135 *retx = x;
136 *rety = y;
137 if (TX_FIXED_UNSAFE(x) || TX_FIXED_UNSAFE(y)) {
138 return JNI_TRUE;
139 }
140
141 x = dxoff+pBounds->x2-0.5; /* Center of pixel x2-1 */
142 y = dyoff+pBounds->y1+0.5; /* Center of pixel y1 */
143 Transform_transform(pItxInfo, &x, &y);
144 if (TX_FIXED_UNSAFE(x) || TX_FIXED_UNSAFE(y)) {
145 return JNI_TRUE;
146 }
147
148 x = dxoff+pBounds->x1+0.5; /* Center of pixel x1 */
149 y = dyoff+pBounds->y2-0.5; /* Center of pixel y2-1 */
150 Transform_transform(pItxInfo, &x, &y);
151 if (TX_FIXED_UNSAFE(x) || TX_FIXED_UNSAFE(y)) {
152 return JNI_TRUE;
153 }
154
155 x = dxoff+pBounds->x2-0.5; /* Center of pixel x2-1 */
156 y = dyoff+pBounds->y2-0.5; /* Center of pixel y2-1 */
157 Transform_transform(pItxInfo, &x, &y);
158 if (TX_FIXED_UNSAFE(x) || TX_FIXED_UNSAFE(y)) {
159 return JNI_TRUE;
160 }
161
162 return JNI_FALSE;
163}
164
165/*
166 * Fill the edge buffer with pairs of coordinates representing the maximum
167 * left and right pixels of the destination surface that should be processed
168 * on each scanline, clipped to the bounds parameter.
169 * The number of scanlines to calculate is implied by the bounds parameter.
170 * Only pixels that map back through the specified (inverse) transform to a
171 * source coordinate that falls within the (0, 0, sw, sh) bounds of the
172 * source image should be processed.
173 * pEdges points to an array of jints that holds 2 + numedges*2 values where
174 * numedges should match (pBounds->y2 - pBounds->y1).
175 * The first two jints in pEdges should be set to y1 and y2 and every pair
176 * of jints after that represent the xmin,xmax of all pixels in range of
177 * the transformed blit for the corresponding scanline.
178 */
179static void
180calculateEdges(jint *pEdges,
181 SurfaceDataBounds *pBounds,
182 TransformInfo *pItxInfo,
183 jlong xbase, jlong ybase,
184 juint sw, juint sh)
185{
186 jlong dxdxlong, dydxlong;
187 jlong dxdylong, dydylong;
188 jlong drowxlong, drowylong;
189 jint dx1, dy1, dx2, dy2;
190
191 dxdxlong = DblToLong(pItxInfo->dxdx);
192 dydxlong = DblToLong(pItxInfo->dydx);
193 dxdylong = DblToLong(pItxInfo->dxdy);
194 dydylong = DblToLong(pItxInfo->dydy);
195
196 dx1 = pBounds->x1;
197 dy1 = pBounds->y1;
198 dx2 = pBounds->x2;
199 dy2 = pBounds->y2;
200 *pEdges++ = dy1;
201 *pEdges++ = dy2;
202
203 drowxlong = (dx2-dx1-1) * dxdxlong;
204 drowylong = (dx2-dx1-1) * dydxlong;
205
206 while (dy1 < dy2) {
207 jlong xlong, ylong;
208
209 dx1 = pBounds->x1;
210 dx2 = pBounds->x2;
211
212 xlong = xbase;
213 ylong = ybase;
214 while (dx1 < dx2 &&
215 (((juint) WholeOfLong(ylong)) >= sh ||
216 ((juint) WholeOfLong(xlong)) >= sw))
217 {
218 dx1++;
219 xlong += dxdxlong;
220 ylong += dydxlong;
221 }
222
223 xlong = xbase + drowxlong;
224 ylong = ybase + drowylong;
225 while (dx2 > dx1 &&
226 (((juint) WholeOfLong(ylong)) >= sh ||
227 ((juint) WholeOfLong(xlong)) >= sw))
228 {
229 dx2--;
230 xlong -= dxdxlong;
231 ylong -= dydxlong;
232 }
233
234 *pEdges++ = dx1;
235 *pEdges++ = dx2;
236
237 /* Increment to next scanline */
238 xbase += dxdylong;
239 ybase += dydylong;
240 dy1++;
241 }
242}
243
244static void
245Transform_SafeHelper(JNIEnv *env,
246 SurfaceDataOps *srcOps,
247 SurfaceDataOps *dstOps,
248 SurfaceDataRasInfo *pSrcInfo,
249 SurfaceDataRasInfo *pDstInfo,
250 NativePrimitive *pMaskBlitPrim,
251 CompositeInfo *pCompInfo,
252 TransformHelperFunc *pHelperFunc,
253 TransformInterpFunc *pInterpFunc,
254 RegionData *pClipInfo, TransformInfo *pItxInfo,
255 jint *pData, jint *pEdges,
256 jint dxoff, jint dyoff, jint sw, jint sh);
257
258/*
259 * Class: sun_java2d_loops_TransformHelper
260 * Method: Transform
261 * Signature: (Lsun/java2d/loops/MaskBlit;Lsun/java2d/SurfaceData;Lsun/java2d/SurfaceData;Ljava/awt/Composite;Lsun/java2d/pipe/Region;Ljava/awt/geom/AffineTransform;IIIIIIIII[I)V
262 */
263JNIEXPORT void JNICALL
264Java_sun_java2d_loops_TransformHelper_Transform
265 (JNIEnv *env, jobject self,
266 jobject maskblit,
267 jobject srcData, jobject dstData,
268 jobject comp, jobject clip,
269 jobject itxform, jint txtype,
270 jint sx1, jint sy1, jint sx2, jint sy2,
271 jint dx1, jint dy1, jint dx2, jint dy2,
272 jintArray edgeArray, jint dxoff, jint dyoff)
273{
274 SurfaceDataOps *srcOps;
275 SurfaceDataOps *dstOps;
276 SurfaceDataRasInfo srcInfo;
277 SurfaceDataRasInfo dstInfo;
278 NativePrimitive *pHelperPrim;
279 NativePrimitive *pMaskBlitPrim;
280 CompositeInfo compInfo;
281 RegionData clipInfo;
282 TransformInfo itxInfo;
283 jint maxlinepix;
284 TransformHelperFunc *pHelperFunc;
285 TransformInterpFunc *pInterpFunc;
286 jdouble xorig, yorig;
287 jlong numedges;
288 jint *pEdges;
289 jint edgebuf[2 + MAXEDGES * 2];
290 union {
291 jlong align;
292 jint data[LINE_SIZE];
293 } rgb;
294
295#ifdef MAKE_STUBS
296 static int th_initialized;
297
298 /* For debugging only - used to swap in alternate funcs for perf testing */
299 if (!th_initialized) {
300 if (getenv("TXSTUB") != 0) {
301 pBilinearFunc = BilinearInterpStub;
302 pBicubicFunc = BicubicInterpStub;
303 } else if (getenv("TXNOVIS") != 0) {
304 pBilinearFunc = BilinearInterp;
305 pBicubicFunc = BicubicInterp;
306 }
307 th_initialized = 1;
308 }
309#endif /* MAKE_STUBS */
310
311 pHelperPrim = GetNativePrim(env, self);
312 if (pHelperPrim == NULL) {
313 /* Should never happen... */
314 return;
315 }
316 pMaskBlitPrim = GetNativePrim(env, maskblit);
317 if (pMaskBlitPrim == NULL) {
318 /* Exception was thrown by GetNativePrim */
319 return;
320 }
321 if (pMaskBlitPrim->pCompType->getCompInfo != NULL) {
322 (*pMaskBlitPrim->pCompType->getCompInfo)(env, &compInfo, comp);
323 }
324 if (Region_GetInfo(env, clip, &clipInfo)) {
325 return;
326 }
327
328 srcOps = SurfaceData_GetOps(env, srcData);
329 if (srcOps == 0) {
330 return;
331 }
332 dstOps = SurfaceData_GetOps(env, dstData);
333 if (dstOps == 0) {
334 return;
335 }
336
337 /*
338 * Grab the appropriate pointer to the helper and interpolation
339 * routines and calculate the maximum number of destination pixels
340 * that can be processed in one intermediate buffer based on the
341 * size of the buffer and the number of samples needed per pixel.
342 */
343 switch (txtype) {
344 case java_awt_image_AffineTransformOp_TYPE_NEAREST_NEIGHBOR:
345 pHelperFunc = pHelperPrim->funcs.transformhelpers->nnHelper;
346 pInterpFunc = NULL;
347 maxlinepix = LINE_SIZE;
348 break;
349 case java_awt_image_AffineTransformOp_TYPE_BILINEAR:
350 pHelperFunc = pHelperPrim->funcs.transformhelpers->blHelper;
351 pInterpFunc = pBilinearFunc;
352 maxlinepix = LINE_SIZE / 4;
353 break;
354 case java_awt_image_AffineTransformOp_TYPE_BICUBIC:
355 pHelperFunc = pHelperPrim->funcs.transformhelpers->bcHelper;
356 pInterpFunc = pBicubicFunc;
357 maxlinepix = LINE_SIZE / 16;
358 break;
359 default:
360 // Should not happen, but just in case.
361 return;
362 }
363
364 srcInfo.bounds.x1 = sx1;
365 srcInfo.bounds.y1 = sy1;
366 srcInfo.bounds.x2 = sx2;
367 srcInfo.bounds.y2 = sy2;
368 dstInfo.bounds.x1 = dx1;
369 dstInfo.bounds.y1 = dy1;
370 dstInfo.bounds.x2 = dx2;
371 dstInfo.bounds.y2 = dy2;
372 SurfaceData_IntersectBounds(&dstInfo.bounds, &clipInfo.bounds);
373 if (srcOps->Lock(env, srcOps, &srcInfo, pHelperPrim->srcflags)
374 != SD_SUCCESS)
375 {
376 /* edgeArray should already contain zeros for min/maxy */
377 return;
378 }
379 if (dstOps->Lock(env, dstOps, &dstInfo, pMaskBlitPrim->dstflags)
380 != SD_SUCCESS)
381 {
382 SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
383 /* edgeArray should already contain zeros for min/maxy */
384 return;
385 }
386 Region_IntersectBounds(&clipInfo, &dstInfo.bounds);
387 Transform_GetInfo(env, itxform, &itxInfo);
388
389 numedges = (((jlong) dstInfo.bounds.y2) - ((jlong) dstInfo.bounds.y1));
390 if (numedges <= 0) {
391 pEdges = NULL;
392 } else if (!JNU_IsNull(env, edgeArray)) {
393 /*
394 * Ideally Java should allocate an array large enough, but if
395 * we ever have a miscommunication about the number of edge
396 * lines, or if the Java array calculation should overflow to
397 * a positive number and succeed in allocating an array that
398 * is too small, we need to verify that it can still hold the
399 * number of integers that we plan to store to be safe.
400 */
401 jsize edgesize = (*env)->GetArrayLength(env, edgeArray);
402 /* (edgesize/2 - 1) should avoid any overflow or underflow. */
403 pEdges = (((edgesize / 2) - 1) >= numedges)
404 ? (*env)->GetPrimitiveArrayCritical(env, edgeArray, NULL)
405 : NULL;
406 } else if (numedges > MAXEDGES) {
407 /* numedges variable (jlong) can be at most ((1<<32)-1) */
408 /* memsize can overflow a jint, but not a jlong */
409 jlong memsize = ((numedges * 2) + 2) * sizeof(*pEdges);
410 pEdges = (memsize == ((size_t) memsize))
411 ? malloc((size_t) memsize)
412 : NULL;
413 } else {
414 pEdges = edgebuf;
415 }
416
417 if (pEdges == NULL) {
418 if (!(*env)->ExceptionCheck(env) && numedges > 0) {
419 JNU_ThrowInternalError(env, "Unable to allocate edge list");
420 }
421 SurfaceData_InvokeUnlock(env, dstOps, &dstInfo);
422 SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
423 /* edgeArray should already contain zeros for min/maxy */
424 return;
425 }
426
427
428 if (!Region_IsEmpty(&clipInfo)) {
429 srcOps->GetRasInfo(env, srcOps, &srcInfo);
430 dstOps->GetRasInfo(env, dstOps, &dstInfo);
431 if (srcInfo.rasBase == NULL || dstInfo.rasBase == NULL) {
432 pEdges[0] = pEdges[1] = 0;
433 } else if (checkOverflow(dxoff, dyoff, &dstInfo.bounds,
434 &itxInfo, &xorig, &yorig))
435 {
436 Transform_SafeHelper(env, srcOps, dstOps,
437 &srcInfo, &dstInfo,
438 pMaskBlitPrim, &compInfo,
439 pHelperFunc, pInterpFunc,
440 &clipInfo, &itxInfo, rgb.data, pEdges,
441 dxoff, dyoff, sx2-sx1, sy2-sy1);
442 } else {
443 SurfaceDataBounds span;
444 jlong dxdxlong, dydxlong;
445 jlong dxdylong, dydylong;
446 jlong xbase, ybase;
447
448 dxdxlong = DblToLong(itxInfo.dxdx);
449 dydxlong = DblToLong(itxInfo.dydx);
450 dxdylong = DblToLong(itxInfo.dxdy);
451 dydylong = DblToLong(itxInfo.dydy);
452 xbase = DblToLong(xorig);
453 ybase = DblToLong(yorig);
454
455 calculateEdges(pEdges, &dstInfo.bounds, &itxInfo,
456 xbase, ybase, sx2-sx1, sy2-sy1);
457
458 Region_StartIteration(env, &clipInfo);
459 while (Region_NextIteration(&clipInfo, &span)) {
460 jlong rowxlong, rowylong;
461 void *pDst;
462
463 dy1 = span.y1;
464 dy2 = span.y2;
465 rowxlong = xbase + (dy1 - dstInfo.bounds.y1) * dxdylong;
466 rowylong = ybase + (dy1 - dstInfo.bounds.y1) * dydylong;
467
468 while (dy1 < dy2) {
469 jlong xlong, ylong;
470
471 /* Note - process at most one scanline at a time. */
472
473 dx1 = pEdges[(dy1 - dstInfo.bounds.y1) * 2 + 2];
474 dx2 = pEdges[(dy1 - dstInfo.bounds.y1) * 2 + 3];
475 if (dx1 < span.x1) dx1 = span.x1;
476 if (dx2 > span.x2) dx2 = span.x2;
477
478 /* All pixels from dx1 to dx2 have centers in bounds */
479 while (dx1 < dx2) {
480 /* Can process at most one buffer full at a time */
481 jint numpix = dx2 - dx1;
482 if (numpix > maxlinepix) {
483 numpix = maxlinepix;
484 }
485
486 xlong =
487 rowxlong + ((dx1 - dstInfo.bounds.x1) * dxdxlong);
488 ylong =
489 rowylong + ((dx1 - dstInfo.bounds.x1) * dydxlong);
490
491 /* Get IntArgbPre pixel data from source */
492 (*pHelperFunc)(&srcInfo,
493 rgb.data, numpix,
494 xlong, dxdxlong,
495 ylong, dydxlong);
496
497 /* Interpolate result pixels if needed */
498 if (pInterpFunc) {
499 (*pInterpFunc)(rgb.data, numpix,
500 FractOfLong(xlong-LongOneHalf),
501 FractOfLong(dxdxlong),
502 FractOfLong(ylong-LongOneHalf),
503 FractOfLong(dydxlong));
504 }
505
506 /* Store/Composite interpolated pixels into dest */
507 pDst = PtrCoord(dstInfo.rasBase,
508 dx1, dstInfo.pixelStride,
509 dy1, dstInfo.scanStride);
510 (*pMaskBlitPrim->funcs.maskblit)(pDst, rgb.data,
511 0, 0, 0,
512 numpix, 1,
513 &dstInfo, &srcInfo,
514 pMaskBlitPrim,
515 &compInfo);
516
517 /* Increment to next buffer worth of input pixels */
518 dx1 += maxlinepix;
519 }
520
521 /* Increment to next scanline */
522 rowxlong += dxdylong;
523 rowylong += dydylong;
524 dy1++;
525 }
526 }
527 Region_EndIteration(env, &clipInfo);
528 }
529 SurfaceData_InvokeRelease(env, dstOps, &dstInfo);
530 SurfaceData_InvokeRelease(env, srcOps, &srcInfo);
531 } else {
532 pEdges[0] = pEdges[1] = 0;
533 }
534
535 if (!JNU_IsNull(env, edgeArray)) {
536 (*env)->ReleasePrimitiveArrayCritical(env, edgeArray, pEdges, 0);
537 } else if (pEdges != edgebuf) {
538 free(pEdges);
539 }
540 SurfaceData_InvokeUnlock(env, dstOps, &dstInfo);
541 SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
542}
543
544static void
545Transform_SafeHelper(JNIEnv *env,
546 SurfaceDataOps *srcOps,
547 SurfaceDataOps *dstOps,
548 SurfaceDataRasInfo *pSrcInfo,
549 SurfaceDataRasInfo *pDstInfo,
550 NativePrimitive *pMaskBlitPrim,
551 CompositeInfo *pCompInfo,
552 TransformHelperFunc *pHelperFunc,
553 TransformInterpFunc *pInterpFunc,
554 RegionData *pClipInfo, TransformInfo *pItxInfo,
555 jint *pData, jint *pEdges,
556 jint dxoff, jint dyoff, jint sw, jint sh)
557{
558 SurfaceDataBounds span;
559 jint dx1, dx2;
560 jint dy1, dy2;
561 jint i, iy;
562
563 dy1 = pDstInfo->bounds.y1;
564 dy2 = pDstInfo->bounds.y2;
565 dx1 = pDstInfo->bounds.x1;
566 dx2 = pDstInfo->bounds.x2;
567 pEdges[0] = dy1;
568 pEdges[1] = dy2;
569 for (iy = dy1; iy < dy2; iy++) {
570 jint i = (iy - dy1) * 2;
571 /* row spans are set to max,min until we find a pixel in range below */
572 pEdges[i + 2] = dx2;
573 pEdges[i + 3] = dx1;
574 }
575
576 Region_StartIteration(env, pClipInfo);
577 while (Region_NextIteration(pClipInfo, &span)) {
578 dy1 = span.y1;
579 dy2 = span.y2;
580 while (dy1 < dy2) {
581 dx1 = span.x1;
582 dx2 = span.x2;
583 i = (dy1 - pDstInfo->bounds.y1) * 2;
584 while (dx1 < dx2) {
585 jdouble x, y;
586 jlong xlong, ylong;
587
588 x = dxoff + dx1 + 0.5;
589 y = dyoff + dy1 + 0.5;
590 Transform_transform(pItxInfo, &x, &y);
591 xlong = DblToLong(x);
592 ylong = DblToLong(y);
593
594 /* Process only pixels with centers in bounds
595 * Test double values to avoid overflow in conversion
596 * to long values and then also test the long values
597 * in case they rounded up and out of bounds during
598 * the conversion.
599 */
600 if (x >= 0 && y >= 0 && x < sw && y < sh &&
601 WholeOfLong(xlong) < sw &&
602 WholeOfLong(ylong) < sh)
603 {
604 void *pDst;
605
606 if (pEdges[i + 2] > dx1) {
607 pEdges[i + 2] = dx1;
608 }
609 if (pEdges[i + 3] <= dx1) {
610 pEdges[i + 3] = dx1 + 1;
611 }
612
613 /* Get IntArgbPre pixel data from source */
614 (*pHelperFunc)(pSrcInfo,
615 pData, 1,
616 xlong, 0,
617 ylong, 0);
618
619 /* Interpolate result pixels if needed */
620 if (pInterpFunc) {
621 (*pInterpFunc)(pData, 1,
622 FractOfLong(xlong-LongOneHalf), 0,
623 FractOfLong(ylong-LongOneHalf), 0);
624 }
625
626 /* Store/Composite interpolated pixels into dest */
627 pDst = PtrCoord(pDstInfo->rasBase,
628 dx1, pDstInfo->pixelStride,
629 dy1, pDstInfo->scanStride);
630 (*pMaskBlitPrim->funcs.maskblit)(pDst, pData,
631 0, 0, 0,
632 1, 1,
633 pDstInfo, pSrcInfo,
634 pMaskBlitPrim,
635 pCompInfo);
636 }
637
638 /* Increment to next input pixel */
639 dx1++;
640 }
641
642 /* Increment to next scanline */
643 dy1++;
644 }
645 }
646 Region_EndIteration(env, pClipInfo);
647}
648
649#define BL_INTERP_V1_to_V2_by_F(v1, v2, f) \
650 (((v1)<<8) + ((v2)-(v1))*(f))
651
652#define BL_ACCUM(comp) \
653 do { \
654 jint c1 = ((jubyte *) pRGB)[comp]; \
655 jint c2 = ((jubyte *) pRGB)[comp+4]; \
656 jint cR = BL_INTERP_V1_to_V2_by_F(c1, c2, xfactor); \
657 c1 = ((jubyte *) pRGB)[comp+8]; \
658 c2 = ((jubyte *) pRGB)[comp+12]; \
659 c2 = BL_INTERP_V1_to_V2_by_F(c1, c2, xfactor); \
660 cR = BL_INTERP_V1_to_V2_by_F(cR, c2, yfactor); \
661 ((jubyte *)pRes)[comp] = (jubyte) ((cR + (1<<15)) >> 16); \
662 } while (0)
663
664static void
665BilinearInterp(jint *pRGB, jint numpix,
666 jint xfract, jint dxfract,
667 jint yfract, jint dyfract)
668{
669 jint j;
670 jint *pRes = pRGB;
671
672 for (j = 0; j < numpix; j++) {
673 jint xfactor;
674 jint yfactor;
675 xfactor = URShift(xfract, 32-8);
676 yfactor = URShift(yfract, 32-8);
677 BL_ACCUM(0);
678 BL_ACCUM(1);
679 BL_ACCUM(2);
680 BL_ACCUM(3);
681 pRes++;
682 pRGB += 4;
683 xfract += dxfract;
684 yfract += dyfract;
685 }
686}
687
688#define SAT(val, max) \
689 do { \
690 val &= ~(val >> 31); /* negatives become 0 */ \
691 val -= max; /* only overflows are now positive */ \
692 val &= (val >> 31); /* positives become 0 */ \
693 val += max; /* range is now [0 -> max] */ \
694 } while (0)
695
696#ifdef __sparc
697/* For sparc, floating point multiplies are faster than integer */
698#define BICUBIC_USE_DBL_LUT
699#else
700/* For x86, integer multiplies are faster than floating point */
701/* Note that on x86 Linux the choice of best algorithm varies
702 * depending on the compiler optimization and the processor type.
703 * Currently, the sun/awt x86 Linux builds are not optimized so
704 * all the variations produce mediocre performance.
705 * For now we will use the choice that works best for the Windows
706 * build until the (lack of) optimization issues on Linux are resolved.
707 */
708#define BICUBIC_USE_INT_MATH
709#endif
710
711#ifdef BICUBIC_USE_DBL_CAST
712
713#define BC_DblToCoeff(v) (v)
714#define BC_COEFF_ONE 1.0
715#define BC_TYPE jdouble
716#define BC_V_HALF 0.5
717#define BC_CompToV(v) ((jdouble) (v))
718#define BC_STORE_COMPS(pRes) \
719 do { \
720 jint a = (jint) accumA; \
721 jint r = (jint) accumR; \
722 jint g = (jint) accumG; \
723 jint b = (jint) accumB; \
724 SAT(a, 255); \
725 SAT(r, a); \
726 SAT(g, a); \
727 SAT(b, a); \
728 *pRes = ((a << 24) | (r << 16) | (g << 8) | (b)); \
729 } while (0)
730
731#endif /* BICUBIC_USE_DBL_CAST */
732
733#ifdef BICUBIC_USE_DBL_LUT
734
735#define ItoD1(v) ((jdouble) (v))
736#define ItoD4(v) ItoD1(v), ItoD1(v+1), ItoD1(v+2), ItoD1(v+3)
737#define ItoD16(v) ItoD4(v), ItoD4(v+4), ItoD4(v+8), ItoD4(v+12)
738#define ItoD64(v) ItoD16(v), ItoD16(v+16), ItoD16(v+32), ItoD16(v+48)
739
740static jdouble ItoD_table[] = {
741 ItoD64(0), ItoD64(64), ItoD64(128), ItoD64(192)
742};
743
744#define BC_DblToCoeff(v) (v)
745#define BC_COEFF_ONE 1.0
746#define BC_TYPE jdouble
747#define BC_V_HALF 0.5
748#define BC_CompToV(v) ItoD_table[v]
749#define BC_STORE_COMPS(pRes) \
750 do { \
751 jint a = (jint) accumA; \
752 jint r = (jint) accumR; \
753 jint g = (jint) accumG; \
754 jint b = (jint) accumB; \
755 SAT(a, 255); \
756 SAT(r, a); \
757 SAT(g, a); \
758 SAT(b, a); \
759 *pRes = ((a << 24) | (r << 16) | (g << 8) | (b)); \
760 } while (0)
761
762#endif /* BICUBIC_USE_DBL_LUT */
763
764#ifdef BICUBIC_USE_INT_MATH
765
766#define BC_DblToCoeff(v) ((jint) ((v) * 256))
767#define BC_COEFF_ONE 256
768#define BC_TYPE jint
769#define BC_V_HALF (1 << 15)
770#define BC_CompToV(v) ((jint) v)
771#define BC_STORE_COMPS(pRes) \
772 do { \
773 accumA >>= 16; \
774 accumR >>= 16; \
775 accumG >>= 16; \
776 accumB >>= 16; \
777 SAT(accumA, 255); \
778 SAT(accumR, accumA); \
779 SAT(accumG, accumA); \
780 SAT(accumB, accumA); \
781 *pRes = ((accumA << 24) | (accumR << 16) | (accumG << 8) | (accumB)); \
782 } while (0)
783
784#endif /* BICUBIC_USE_INT_MATH */
785
786#define BC_ACCUM(index, ycindex, xcindex) \
787 do { \
788 BC_TYPE factor = bicubic_coeff[xcindex] * bicubic_coeff[ycindex]; \
789 int rgb; \
790 rgb = pRGB[index]; \
791 accumB += BC_CompToV((rgb >> 0) & 0xff) * factor; \
792 accumG += BC_CompToV((rgb >> 8) & 0xff) * factor; \
793 accumR += BC_CompToV((rgb >> 16) & 0xff) * factor; \
794 accumA += BC_CompToV((rgb >> 24) & 0xff) * factor; \
795 } while (0)
796
797static BC_TYPE bicubic_coeff[513];
798static jboolean bicubictableinited;
799
800static void
801init_bicubic_table(jdouble A)
802{
803 /*
804 * The following formulas are designed to give smooth
805 * results when 'A' is -0.5 or -1.0.
806 */
807 int i;
808 for (i = 0; i < 256; i++) {
809 /* r(x) = (A + 2)|x|^3 - (A + 3)|x|^2 + 1 , 0 <= |x| < 1 */
810 jdouble x = i / 256.0;
811 x = ((A+2)*x - (A+3))*x*x + 1;
812 bicubic_coeff[i] = BC_DblToCoeff(x);
813 }
814
815 for (; i < 384; i++) {
816 /* r(x) = A|x|^3 - 5A|x|^2 + 8A|x| - 4A , 1 <= |x| < 2 */
817 jdouble x = i / 256.0;
818 x = ((A*x - 5*A)*x + 8*A)*x - 4*A;
819 bicubic_coeff[i] = BC_DblToCoeff(x);
820 }
821
822 bicubic_coeff[384] = (BC_COEFF_ONE - bicubic_coeff[128]*2) / 2;
823
824 for (i++; i <= 512; i++) {
825 bicubic_coeff[i] = BC_COEFF_ONE - (bicubic_coeff[512-i] +
826 bicubic_coeff[i-256] +
827 bicubic_coeff[768-i]);
828 }
829
830 bicubictableinited = JNI_TRUE;
831}
832
833static void
834BicubicInterp(jint *pRGB, jint numpix,
835 jint xfract, jint dxfract,
836 jint yfract, jint dyfract)
837{
838 jint i;
839 jint *pRes = pRGB;
840
841 if (!bicubictableinited) {
842 init_bicubic_table(-0.5);
843 }
844
845 for (i = 0; i < numpix; i++) {
846 BC_TYPE accumA, accumR, accumG, accumB;
847 jint xfactor, yfactor;
848
849 xfactor = URShift(xfract, 32-8);
850 yfactor = URShift(yfract, 32-8);
851 accumA = accumR = accumG = accumB = BC_V_HALF;
852 BC_ACCUM(0, yfactor+256, xfactor+256);
853 BC_ACCUM(1, yfactor+256, xfactor+ 0);
854 BC_ACCUM(2, yfactor+256, 256-xfactor);
855 BC_ACCUM(3, yfactor+256, 512-xfactor);
856 BC_ACCUM(4, yfactor+ 0, xfactor+256);
857 BC_ACCUM(5, yfactor+ 0, xfactor+ 0);
858 BC_ACCUM(6, yfactor+ 0, 256-xfactor);
859 BC_ACCUM(7, yfactor+ 0, 512-xfactor);
860 BC_ACCUM(8, 256-yfactor, xfactor+256);
861 BC_ACCUM(9, 256-yfactor, xfactor+ 0);
862 BC_ACCUM(10, 256-yfactor, 256-xfactor);
863 BC_ACCUM(11, 256-yfactor, 512-xfactor);
864 BC_ACCUM(12, 512-yfactor, xfactor+256);
865 BC_ACCUM(13, 512-yfactor, xfactor+ 0);
866 BC_ACCUM(14, 512-yfactor, 256-xfactor);
867 BC_ACCUM(15, 512-yfactor, 512-xfactor);
868 BC_STORE_COMPS(pRes);
869 pRes++;
870 pRGB += 16;
871 xfract += dxfract;
872 yfract += dyfract;
873 }
874}
875
876#ifdef MAKE_STUBS
877
878static void
879BilinearInterpStub(jint *pRGBbase, jint numpix,
880 jint xfract, jint dxfract,
881 jint yfract, jint dyfract)
882{
883 jint *pRGB = pRGBbase;
884 while (--numpix >= 0) {
885 *pRGBbase = *pRGB;
886 pRGBbase += 1;
887 pRGB += 4;
888 }
889}
890
891static void
892BicubicInterpStub(jint *pRGBbase, jint numpix,
893 jint xfract, jint dxfract,
894 jint yfract, jint dyfract)
895{
896 jint *pRGB = pRGBbase+5;
897 while (--numpix >= 0) {
898 *pRGBbase = *pRGB;
899 pRGBbase += 1;
900 pRGB += 16;
901 }
902}
903
904#endif /* MAKE_STUBS */
905