| 1 | /* |
| 2 | * Copyright (c) 2004, 2014, Oracle and/or its affiliates. All rights reserved. |
| 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 4 | * |
| 5 | * This code is free software; you can redistribute it and/or modify it |
| 6 | * under the terms of the GNU General Public License version 2 only, as |
| 7 | * published by the Free Software Foundation. Oracle designates this |
| 8 | * particular file as subject to the "Classpath" exception as provided |
| 9 | * by Oracle in the LICENSE file that accompanied this code. |
| 10 | * |
| 11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
| 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 14 | * version 2 for more details (a copy is included in the LICENSE file that |
| 15 | * accompanied this code). |
| 16 | * |
| 17 | * You should have received a copy of the GNU General Public License version |
| 18 | * 2 along with this work; if not, write to the Free Software Foundation, |
| 19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 20 | * |
| 21 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| 22 | * or visit www.oracle.com if you need additional information or have any |
| 23 | * questions. |
| 24 | */ |
| 25 | |
| 26 | #include <stdlib.h> |
| 27 | #include "jni_util.h" |
| 28 | #include "math.h" |
| 29 | |
| 30 | #include "GraphicsPrimitiveMgr.h" |
| 31 | #include "Region.h" |
| 32 | |
| 33 | #include "sun_java2d_loops_TransformHelper.h" |
| 34 | #include "java_awt_image_AffineTransformOp.h" |
| 35 | |
| 36 | /* |
| 37 | * The stub functions replace the bilinear and bicubic interpolation |
| 38 | * functions with NOP versions so that the performance of the helper |
| 39 | * functions that fetch the data can be more directly tested. They |
| 40 | * are not compiled or enabled by default. Change the following |
| 41 | * #undef to a #define to build the stub functions. |
| 42 | * |
| 43 | * When compiled, they are enabled by the environment variable TXSTUB. |
| 44 | * When compiled, there is also code to disable the VIS versions and |
| 45 | * use the C versions in this file in their place by defining the TXNOVIS |
| 46 | * environment variable. |
| 47 | */ |
| 48 | #undef MAKE_STUBS |
| 49 | |
| 50 | /* The number of IntArgbPre samples to store in the temporary buffer. */ |
| 51 | #define LINE_SIZE 2048 |
| 52 | |
| 53 | /* The size of a stack allocated buffer to hold edge coordinates (see below). */ |
| 54 | #define MAXEDGES 1024 |
| 55 | |
| 56 | /* Declare the software interpolation functions. */ |
| 57 | static TransformInterpFunc BilinearInterp; |
| 58 | static TransformInterpFunc BicubicInterp; |
| 59 | |
| 60 | #ifdef MAKE_STUBS |
| 61 | /* Optionally Declare the stub interpolation functions. */ |
| 62 | static TransformInterpFunc BilinearInterpStub; |
| 63 | static TransformInterpFunc BicubicInterpStub; |
| 64 | #endif /* MAKE_STUBS */ |
| 65 | |
| 66 | /* |
| 67 | * Initially choose the software interpolation functions. |
| 68 | * These choices can be overridden by platform code that runs during the |
| 69 | * primitive registration phase of initialization by storing pointers to |
| 70 | * better functions in these pointers. |
| 71 | * Compiling the stubs also turns on code below that can re-install the |
| 72 | * software functions or stub functions on the first call to this primitive. |
| 73 | */ |
| 74 | TransformInterpFunc *pBilinearFunc = BilinearInterp; |
| 75 | TransformInterpFunc *pBicubicFunc = BicubicInterp; |
| 76 | |
| 77 | /* |
| 78 | * The dxydxy parameters of the inverse transform determine how |
| 79 | * quickly we step through the source image. For tiny scale |
| 80 | * factors (on the order of 1E-16 or so) the stepping distances |
| 81 | * are huge. The image has been scaled so small that stepping |
| 82 | * a single pixel in device space moves the sampling point by |
| 83 | * billions (or more) pixels in the source image space. These |
| 84 | * huge stepping values can overflow the whole part of the longs |
| 85 | * we use for the fixed point stepping equations and so we need |
| 86 | * a more robust solution. We could simply iterate over every |
| 87 | * device pixel, use the inverse transform to transform it back |
| 88 | * into the source image coordinate system and then test it for |
| 89 | * being in range and sample pixel-by-pixel, but that is quite |
| 90 | * a bit more expensive. Fortunately, if the scale factors are |
| 91 | * so tiny that we overflow our long values then the number of |
| 92 | * pixels we are planning to visit should be very tiny. The only |
| 93 | * exception to that rule is if the scale factor along one |
| 94 | * dimension is tiny (creating the huge stepping values), and |
| 95 | * the scale factor along the other dimension is fairly regular |
| 96 | * or an up-scale. In that case we have a lot of pixels along |
| 97 | * the direction of the larger axis to sample, but few along the |
| 98 | * smaller axis. Though, pessimally, with an added shear factor |
| 99 | * such a linearly tiny image could have bounds that cover a large |
| 100 | * number of pixels. Such odd transformations should be very |
| 101 | * rare and the absolute limit on calculations would involve a |
| 102 | * single reverse transform of every pixel in the output image |
| 103 | * which is not fast, but it should not cause an undue stall |
| 104 | * of the rendering software. |
| 105 | * |
| 106 | * The specific test we will use is to calculate the inverse |
| 107 | * transformed values of every corner of the destination bounds |
| 108 | * (in order to be user-clip independent) and if we can |
| 109 | * perform a fixed-point-long inverse transform of all of |
| 110 | * those points without overflowing we will use the fast |
| 111 | * fixed point algorithm. Otherwise we will use the safe |
| 112 | * per-pixel transform algorithm. |
| 113 | * The 4 corners are 0,0, 0,dsth, dstw,0, dstw,dsth |
| 114 | * Transformed they are: |
| 115 | * tx, ty |
| 116 | * tx +dxdy*H, ty +dydy*H |
| 117 | * tx+dxdx*W, ty+dydx*W |
| 118 | * tx+dxdx*W+dxdy*H, ty+dydx*W+dydy*H |
| 119 | */ |
| 120 | /* We reject coordinates not less than 1<<30 so that the distance between */ |
| 121 | /* any 2 of them is less than 1<<31 which would overflow into the sign */ |
| 122 | /* bit of a signed long value used to represent fixed point coordinates. */ |
| 123 | #define TX_FIXED_UNSAFE(v) (fabs(v) >= (1<<30)) |
| 124 | static jboolean |
| 125 | checkOverflow(jint dxoff, jint dyoff, |
| 126 | SurfaceDataBounds *pBounds, |
| 127 | TransformInfo *pItxInfo, |
| 128 | jdouble *retx, jdouble *rety) |
| 129 | { |
| 130 | jdouble x, y; |
| 131 | |
| 132 | x = dxoff+pBounds->x1+0.5; /* Center of pixel x1 */ |
| 133 | y = dyoff+pBounds->y1+0.5; /* Center of pixel y1 */ |
| 134 | Transform_transform(pItxInfo, &x, &y); |
| 135 | *retx = x; |
| 136 | *rety = y; |
| 137 | if (TX_FIXED_UNSAFE(x) || TX_FIXED_UNSAFE(y)) { |
| 138 | return JNI_TRUE; |
| 139 | } |
| 140 | |
| 141 | x = dxoff+pBounds->x2-0.5; /* Center of pixel x2-1 */ |
| 142 | y = dyoff+pBounds->y1+0.5; /* Center of pixel y1 */ |
| 143 | Transform_transform(pItxInfo, &x, &y); |
| 144 | if (TX_FIXED_UNSAFE(x) || TX_FIXED_UNSAFE(y)) { |
| 145 | return JNI_TRUE; |
| 146 | } |
| 147 | |
| 148 | x = dxoff+pBounds->x1+0.5; /* Center of pixel x1 */ |
| 149 | y = dyoff+pBounds->y2-0.5; /* Center of pixel y2-1 */ |
| 150 | Transform_transform(pItxInfo, &x, &y); |
| 151 | if (TX_FIXED_UNSAFE(x) || TX_FIXED_UNSAFE(y)) { |
| 152 | return JNI_TRUE; |
| 153 | } |
| 154 | |
| 155 | x = dxoff+pBounds->x2-0.5; /* Center of pixel x2-1 */ |
| 156 | y = dyoff+pBounds->y2-0.5; /* Center of pixel y2-1 */ |
| 157 | Transform_transform(pItxInfo, &x, &y); |
| 158 | if (TX_FIXED_UNSAFE(x) || TX_FIXED_UNSAFE(y)) { |
| 159 | return JNI_TRUE; |
| 160 | } |
| 161 | |
| 162 | return JNI_FALSE; |
| 163 | } |
| 164 | |
| 165 | /* |
| 166 | * Fill the edge buffer with pairs of coordinates representing the maximum |
| 167 | * left and right pixels of the destination surface that should be processed |
| 168 | * on each scanline, clipped to the bounds parameter. |
| 169 | * The number of scanlines to calculate is implied by the bounds parameter. |
| 170 | * Only pixels that map back through the specified (inverse) transform to a |
| 171 | * source coordinate that falls within the (0, 0, sw, sh) bounds of the |
| 172 | * source image should be processed. |
| 173 | * pEdges points to an array of jints that holds 2 + numedges*2 values where |
| 174 | * numedges should match (pBounds->y2 - pBounds->y1). |
| 175 | * The first two jints in pEdges should be set to y1 and y2 and every pair |
| 176 | * of jints after that represent the xmin,xmax of all pixels in range of |
| 177 | * the transformed blit for the corresponding scanline. |
| 178 | */ |
| 179 | static void |
| 180 | calculateEdges(jint *pEdges, |
| 181 | SurfaceDataBounds *pBounds, |
| 182 | TransformInfo *pItxInfo, |
| 183 | jlong xbase, jlong ybase, |
| 184 | juint sw, juint sh) |
| 185 | { |
| 186 | jlong dxdxlong, dydxlong; |
| 187 | jlong dxdylong, dydylong; |
| 188 | jlong drowxlong, drowylong; |
| 189 | jint dx1, dy1, dx2, dy2; |
| 190 | |
| 191 | dxdxlong = DblToLong(pItxInfo->dxdx); |
| 192 | dydxlong = DblToLong(pItxInfo->dydx); |
| 193 | dxdylong = DblToLong(pItxInfo->dxdy); |
| 194 | dydylong = DblToLong(pItxInfo->dydy); |
| 195 | |
| 196 | dx1 = pBounds->x1; |
| 197 | dy1 = pBounds->y1; |
| 198 | dx2 = pBounds->x2; |
| 199 | dy2 = pBounds->y2; |
| 200 | *pEdges++ = dy1; |
| 201 | *pEdges++ = dy2; |
| 202 | |
| 203 | drowxlong = (dx2-dx1-1) * dxdxlong; |
| 204 | drowylong = (dx2-dx1-1) * dydxlong; |
| 205 | |
| 206 | while (dy1 < dy2) { |
| 207 | jlong xlong, ylong; |
| 208 | |
| 209 | dx1 = pBounds->x1; |
| 210 | dx2 = pBounds->x2; |
| 211 | |
| 212 | xlong = xbase; |
| 213 | ylong = ybase; |
| 214 | while (dx1 < dx2 && |
| 215 | (((juint) WholeOfLong(ylong)) >= sh || |
| 216 | ((juint) WholeOfLong(xlong)) >= sw)) |
| 217 | { |
| 218 | dx1++; |
| 219 | xlong += dxdxlong; |
| 220 | ylong += dydxlong; |
| 221 | } |
| 222 | |
| 223 | xlong = xbase + drowxlong; |
| 224 | ylong = ybase + drowylong; |
| 225 | while (dx2 > dx1 && |
| 226 | (((juint) WholeOfLong(ylong)) >= sh || |
| 227 | ((juint) WholeOfLong(xlong)) >= sw)) |
| 228 | { |
| 229 | dx2--; |
| 230 | xlong -= dxdxlong; |
| 231 | ylong -= dydxlong; |
| 232 | } |
| 233 | |
| 234 | *pEdges++ = dx1; |
| 235 | *pEdges++ = dx2; |
| 236 | |
| 237 | /* Increment to next scanline */ |
| 238 | xbase += dxdylong; |
| 239 | ybase += dydylong; |
| 240 | dy1++; |
| 241 | } |
| 242 | } |
| 243 | |
| 244 | static void |
| 245 | Transform_SafeHelper(JNIEnv *env, |
| 246 | SurfaceDataOps *srcOps, |
| 247 | SurfaceDataOps *dstOps, |
| 248 | SurfaceDataRasInfo *pSrcInfo, |
| 249 | SurfaceDataRasInfo *pDstInfo, |
| 250 | NativePrimitive *pMaskBlitPrim, |
| 251 | CompositeInfo *pCompInfo, |
| 252 | TransformHelperFunc *pHelperFunc, |
| 253 | TransformInterpFunc *pInterpFunc, |
| 254 | RegionData *pClipInfo, TransformInfo *pItxInfo, |
| 255 | jint *pData, jint *pEdges, |
| 256 | jint dxoff, jint dyoff, jint sw, jint sh); |
| 257 | |
| 258 | /* |
| 259 | * Class: sun_java2d_loops_TransformHelper |
| 260 | * Method: Transform |
| 261 | * Signature: (Lsun/java2d/loops/MaskBlit;Lsun/java2d/SurfaceData;Lsun/java2d/SurfaceData;Ljava/awt/Composite;Lsun/java2d/pipe/Region;Ljava/awt/geom/AffineTransform;IIIIIIIII[I)V |
| 262 | */ |
| 263 | JNIEXPORT void JNICALL |
| 264 | Java_sun_java2d_loops_TransformHelper_Transform |
| 265 | (JNIEnv *env, jobject self, |
| 266 | jobject maskblit, |
| 267 | jobject srcData, jobject dstData, |
| 268 | jobject comp, jobject clip, |
| 269 | jobject itxform, jint txtype, |
| 270 | jint sx1, jint sy1, jint sx2, jint sy2, |
| 271 | jint dx1, jint dy1, jint dx2, jint dy2, |
| 272 | jintArray edgeArray, jint dxoff, jint dyoff) |
| 273 | { |
| 274 | SurfaceDataOps *srcOps; |
| 275 | SurfaceDataOps *dstOps; |
| 276 | SurfaceDataRasInfo srcInfo; |
| 277 | SurfaceDataRasInfo dstInfo; |
| 278 | NativePrimitive *pHelperPrim; |
| 279 | NativePrimitive *pMaskBlitPrim; |
| 280 | CompositeInfo compInfo; |
| 281 | RegionData clipInfo; |
| 282 | TransformInfo itxInfo; |
| 283 | jint maxlinepix; |
| 284 | TransformHelperFunc *pHelperFunc; |
| 285 | TransformInterpFunc *pInterpFunc; |
| 286 | jdouble xorig, yorig; |
| 287 | jlong numedges; |
| 288 | jint *pEdges; |
| 289 | jint edgebuf[2 + MAXEDGES * 2]; |
| 290 | union { |
| 291 | jlong align; |
| 292 | jint data[LINE_SIZE]; |
| 293 | } rgb; |
| 294 | |
| 295 | #ifdef MAKE_STUBS |
| 296 | static int th_initialized; |
| 297 | |
| 298 | /* For debugging only - used to swap in alternate funcs for perf testing */ |
| 299 | if (!th_initialized) { |
| 300 | if (getenv("TXSTUB" ) != 0) { |
| 301 | pBilinearFunc = BilinearInterpStub; |
| 302 | pBicubicFunc = BicubicInterpStub; |
| 303 | } else if (getenv("TXNOVIS" ) != 0) { |
| 304 | pBilinearFunc = BilinearInterp; |
| 305 | pBicubicFunc = BicubicInterp; |
| 306 | } |
| 307 | th_initialized = 1; |
| 308 | } |
| 309 | #endif /* MAKE_STUBS */ |
| 310 | |
| 311 | pHelperPrim = GetNativePrim(env, self); |
| 312 | if (pHelperPrim == NULL) { |
| 313 | /* Should never happen... */ |
| 314 | return; |
| 315 | } |
| 316 | pMaskBlitPrim = GetNativePrim(env, maskblit); |
| 317 | if (pMaskBlitPrim == NULL) { |
| 318 | /* Exception was thrown by GetNativePrim */ |
| 319 | return; |
| 320 | } |
| 321 | if (pMaskBlitPrim->pCompType->getCompInfo != NULL) { |
| 322 | (*pMaskBlitPrim->pCompType->getCompInfo)(env, &compInfo, comp); |
| 323 | } |
| 324 | if (Region_GetInfo(env, clip, &clipInfo)) { |
| 325 | return; |
| 326 | } |
| 327 | |
| 328 | srcOps = SurfaceData_GetOps(env, srcData); |
| 329 | if (srcOps == 0) { |
| 330 | return; |
| 331 | } |
| 332 | dstOps = SurfaceData_GetOps(env, dstData); |
| 333 | if (dstOps == 0) { |
| 334 | return; |
| 335 | } |
| 336 | |
| 337 | /* |
| 338 | * Grab the appropriate pointer to the helper and interpolation |
| 339 | * routines and calculate the maximum number of destination pixels |
| 340 | * that can be processed in one intermediate buffer based on the |
| 341 | * size of the buffer and the number of samples needed per pixel. |
| 342 | */ |
| 343 | switch (txtype) { |
| 344 | case java_awt_image_AffineTransformOp_TYPE_NEAREST_NEIGHBOR: |
| 345 | pHelperFunc = pHelperPrim->funcs.transformhelpers->nnHelper; |
| 346 | pInterpFunc = NULL; |
| 347 | maxlinepix = LINE_SIZE; |
| 348 | break; |
| 349 | case java_awt_image_AffineTransformOp_TYPE_BILINEAR: |
| 350 | pHelperFunc = pHelperPrim->funcs.transformhelpers->blHelper; |
| 351 | pInterpFunc = pBilinearFunc; |
| 352 | maxlinepix = LINE_SIZE / 4; |
| 353 | break; |
| 354 | case java_awt_image_AffineTransformOp_TYPE_BICUBIC: |
| 355 | pHelperFunc = pHelperPrim->funcs.transformhelpers->bcHelper; |
| 356 | pInterpFunc = pBicubicFunc; |
| 357 | maxlinepix = LINE_SIZE / 16; |
| 358 | break; |
| 359 | default: |
| 360 | // Should not happen, but just in case. |
| 361 | return; |
| 362 | } |
| 363 | |
| 364 | srcInfo.bounds.x1 = sx1; |
| 365 | srcInfo.bounds.y1 = sy1; |
| 366 | srcInfo.bounds.x2 = sx2; |
| 367 | srcInfo.bounds.y2 = sy2; |
| 368 | dstInfo.bounds.x1 = dx1; |
| 369 | dstInfo.bounds.y1 = dy1; |
| 370 | dstInfo.bounds.x2 = dx2; |
| 371 | dstInfo.bounds.y2 = dy2; |
| 372 | SurfaceData_IntersectBounds(&dstInfo.bounds, &clipInfo.bounds); |
| 373 | if (srcOps->Lock(env, srcOps, &srcInfo, pHelperPrim->srcflags) |
| 374 | != SD_SUCCESS) |
| 375 | { |
| 376 | /* edgeArray should already contain zeros for min/maxy */ |
| 377 | return; |
| 378 | } |
| 379 | if (dstOps->Lock(env, dstOps, &dstInfo, pMaskBlitPrim->dstflags) |
| 380 | != SD_SUCCESS) |
| 381 | { |
| 382 | SurfaceData_InvokeUnlock(env, srcOps, &srcInfo); |
| 383 | /* edgeArray should already contain zeros for min/maxy */ |
| 384 | return; |
| 385 | } |
| 386 | Region_IntersectBounds(&clipInfo, &dstInfo.bounds); |
| 387 | Transform_GetInfo(env, itxform, &itxInfo); |
| 388 | |
| 389 | numedges = (((jlong) dstInfo.bounds.y2) - ((jlong) dstInfo.bounds.y1)); |
| 390 | if (numedges <= 0) { |
| 391 | pEdges = NULL; |
| 392 | } else if (!JNU_IsNull(env, edgeArray)) { |
| 393 | /* |
| 394 | * Ideally Java should allocate an array large enough, but if |
| 395 | * we ever have a miscommunication about the number of edge |
| 396 | * lines, or if the Java array calculation should overflow to |
| 397 | * a positive number and succeed in allocating an array that |
| 398 | * is too small, we need to verify that it can still hold the |
| 399 | * number of integers that we plan to store to be safe. |
| 400 | */ |
| 401 | jsize edgesize = (*env)->GetArrayLength(env, edgeArray); |
| 402 | /* (edgesize/2 - 1) should avoid any overflow or underflow. */ |
| 403 | pEdges = (((edgesize / 2) - 1) >= numedges) |
| 404 | ? (*env)->GetPrimitiveArrayCritical(env, edgeArray, NULL) |
| 405 | : NULL; |
| 406 | } else if (numedges > MAXEDGES) { |
| 407 | /* numedges variable (jlong) can be at most ((1<<32)-1) */ |
| 408 | /* memsize can overflow a jint, but not a jlong */ |
| 409 | jlong memsize = ((numedges * 2) + 2) * sizeof(*pEdges); |
| 410 | pEdges = (memsize == ((size_t) memsize)) |
| 411 | ? malloc((size_t) memsize) |
| 412 | : NULL; |
| 413 | } else { |
| 414 | pEdges = edgebuf; |
| 415 | } |
| 416 | |
| 417 | if (pEdges == NULL) { |
| 418 | if (!(*env)->ExceptionCheck(env) && numedges > 0) { |
| 419 | JNU_ThrowInternalError(env, "Unable to allocate edge list" ); |
| 420 | } |
| 421 | SurfaceData_InvokeUnlock(env, dstOps, &dstInfo); |
| 422 | SurfaceData_InvokeUnlock(env, srcOps, &srcInfo); |
| 423 | /* edgeArray should already contain zeros for min/maxy */ |
| 424 | return; |
| 425 | } |
| 426 | |
| 427 | |
| 428 | if (!Region_IsEmpty(&clipInfo)) { |
| 429 | srcOps->GetRasInfo(env, srcOps, &srcInfo); |
| 430 | dstOps->GetRasInfo(env, dstOps, &dstInfo); |
| 431 | if (srcInfo.rasBase == NULL || dstInfo.rasBase == NULL) { |
| 432 | pEdges[0] = pEdges[1] = 0; |
| 433 | } else if (checkOverflow(dxoff, dyoff, &dstInfo.bounds, |
| 434 | &itxInfo, &xorig, &yorig)) |
| 435 | { |
| 436 | Transform_SafeHelper(env, srcOps, dstOps, |
| 437 | &srcInfo, &dstInfo, |
| 438 | pMaskBlitPrim, &compInfo, |
| 439 | pHelperFunc, pInterpFunc, |
| 440 | &clipInfo, &itxInfo, rgb.data, pEdges, |
| 441 | dxoff, dyoff, sx2-sx1, sy2-sy1); |
| 442 | } else { |
| 443 | SurfaceDataBounds span; |
| 444 | jlong dxdxlong, dydxlong; |
| 445 | jlong dxdylong, dydylong; |
| 446 | jlong xbase, ybase; |
| 447 | |
| 448 | dxdxlong = DblToLong(itxInfo.dxdx); |
| 449 | dydxlong = DblToLong(itxInfo.dydx); |
| 450 | dxdylong = DblToLong(itxInfo.dxdy); |
| 451 | dydylong = DblToLong(itxInfo.dydy); |
| 452 | xbase = DblToLong(xorig); |
| 453 | ybase = DblToLong(yorig); |
| 454 | |
| 455 | calculateEdges(pEdges, &dstInfo.bounds, &itxInfo, |
| 456 | xbase, ybase, sx2-sx1, sy2-sy1); |
| 457 | |
| 458 | Region_StartIteration(env, &clipInfo); |
| 459 | while (Region_NextIteration(&clipInfo, &span)) { |
| 460 | jlong rowxlong, rowylong; |
| 461 | void *pDst; |
| 462 | |
| 463 | dy1 = span.y1; |
| 464 | dy2 = span.y2; |
| 465 | rowxlong = xbase + (dy1 - dstInfo.bounds.y1) * dxdylong; |
| 466 | rowylong = ybase + (dy1 - dstInfo.bounds.y1) * dydylong; |
| 467 | |
| 468 | while (dy1 < dy2) { |
| 469 | jlong xlong, ylong; |
| 470 | |
| 471 | /* Note - process at most one scanline at a time. */ |
| 472 | |
| 473 | dx1 = pEdges[(dy1 - dstInfo.bounds.y1) * 2 + 2]; |
| 474 | dx2 = pEdges[(dy1 - dstInfo.bounds.y1) * 2 + 3]; |
| 475 | if (dx1 < span.x1) dx1 = span.x1; |
| 476 | if (dx2 > span.x2) dx2 = span.x2; |
| 477 | |
| 478 | /* All pixels from dx1 to dx2 have centers in bounds */ |
| 479 | while (dx1 < dx2) { |
| 480 | /* Can process at most one buffer full at a time */ |
| 481 | jint numpix = dx2 - dx1; |
| 482 | if (numpix > maxlinepix) { |
| 483 | numpix = maxlinepix; |
| 484 | } |
| 485 | |
| 486 | xlong = |
| 487 | rowxlong + ((dx1 - dstInfo.bounds.x1) * dxdxlong); |
| 488 | ylong = |
| 489 | rowylong + ((dx1 - dstInfo.bounds.x1) * dydxlong); |
| 490 | |
| 491 | /* Get IntArgbPre pixel data from source */ |
| 492 | (*pHelperFunc)(&srcInfo, |
| 493 | rgb.data, numpix, |
| 494 | xlong, dxdxlong, |
| 495 | ylong, dydxlong); |
| 496 | |
| 497 | /* Interpolate result pixels if needed */ |
| 498 | if (pInterpFunc) { |
| 499 | (*pInterpFunc)(rgb.data, numpix, |
| 500 | FractOfLong(xlong-LongOneHalf), |
| 501 | FractOfLong(dxdxlong), |
| 502 | FractOfLong(ylong-LongOneHalf), |
| 503 | FractOfLong(dydxlong)); |
| 504 | } |
| 505 | |
| 506 | /* Store/Composite interpolated pixels into dest */ |
| 507 | pDst = PtrCoord(dstInfo.rasBase, |
| 508 | dx1, dstInfo.pixelStride, |
| 509 | dy1, dstInfo.scanStride); |
| 510 | (*pMaskBlitPrim->funcs.maskblit)(pDst, rgb.data, |
| 511 | 0, 0, 0, |
| 512 | numpix, 1, |
| 513 | &dstInfo, &srcInfo, |
| 514 | pMaskBlitPrim, |
| 515 | &compInfo); |
| 516 | |
| 517 | /* Increment to next buffer worth of input pixels */ |
| 518 | dx1 += maxlinepix; |
| 519 | } |
| 520 | |
| 521 | /* Increment to next scanline */ |
| 522 | rowxlong += dxdylong; |
| 523 | rowylong += dydylong; |
| 524 | dy1++; |
| 525 | } |
| 526 | } |
| 527 | Region_EndIteration(env, &clipInfo); |
| 528 | } |
| 529 | SurfaceData_InvokeRelease(env, dstOps, &dstInfo); |
| 530 | SurfaceData_InvokeRelease(env, srcOps, &srcInfo); |
| 531 | } else { |
| 532 | pEdges[0] = pEdges[1] = 0; |
| 533 | } |
| 534 | |
| 535 | if (!JNU_IsNull(env, edgeArray)) { |
| 536 | (*env)->ReleasePrimitiveArrayCritical(env, edgeArray, pEdges, 0); |
| 537 | } else if (pEdges != edgebuf) { |
| 538 | free(pEdges); |
| 539 | } |
| 540 | SurfaceData_InvokeUnlock(env, dstOps, &dstInfo); |
| 541 | SurfaceData_InvokeUnlock(env, srcOps, &srcInfo); |
| 542 | } |
| 543 | |
| 544 | static void |
| 545 | Transform_SafeHelper(JNIEnv *env, |
| 546 | SurfaceDataOps *srcOps, |
| 547 | SurfaceDataOps *dstOps, |
| 548 | SurfaceDataRasInfo *pSrcInfo, |
| 549 | SurfaceDataRasInfo *pDstInfo, |
| 550 | NativePrimitive *pMaskBlitPrim, |
| 551 | CompositeInfo *pCompInfo, |
| 552 | TransformHelperFunc *pHelperFunc, |
| 553 | TransformInterpFunc *pInterpFunc, |
| 554 | RegionData *pClipInfo, TransformInfo *pItxInfo, |
| 555 | jint *pData, jint *pEdges, |
| 556 | jint dxoff, jint dyoff, jint sw, jint sh) |
| 557 | { |
| 558 | SurfaceDataBounds span; |
| 559 | jint dx1, dx2; |
| 560 | jint dy1, dy2; |
| 561 | jint i, iy; |
| 562 | |
| 563 | dy1 = pDstInfo->bounds.y1; |
| 564 | dy2 = pDstInfo->bounds.y2; |
| 565 | dx1 = pDstInfo->bounds.x1; |
| 566 | dx2 = pDstInfo->bounds.x2; |
| 567 | pEdges[0] = dy1; |
| 568 | pEdges[1] = dy2; |
| 569 | for (iy = dy1; iy < dy2; iy++) { |
| 570 | jint i = (iy - dy1) * 2; |
| 571 | /* row spans are set to max,min until we find a pixel in range below */ |
| 572 | pEdges[i + 2] = dx2; |
| 573 | pEdges[i + 3] = dx1; |
| 574 | } |
| 575 | |
| 576 | Region_StartIteration(env, pClipInfo); |
| 577 | while (Region_NextIteration(pClipInfo, &span)) { |
| 578 | dy1 = span.y1; |
| 579 | dy2 = span.y2; |
| 580 | while (dy1 < dy2) { |
| 581 | dx1 = span.x1; |
| 582 | dx2 = span.x2; |
| 583 | i = (dy1 - pDstInfo->bounds.y1) * 2; |
| 584 | while (dx1 < dx2) { |
| 585 | jdouble x, y; |
| 586 | jlong xlong, ylong; |
| 587 | |
| 588 | x = dxoff + dx1 + 0.5; |
| 589 | y = dyoff + dy1 + 0.5; |
| 590 | Transform_transform(pItxInfo, &x, &y); |
| 591 | xlong = DblToLong(x); |
| 592 | ylong = DblToLong(y); |
| 593 | |
| 594 | /* Process only pixels with centers in bounds |
| 595 | * Test double values to avoid overflow in conversion |
| 596 | * to long values and then also test the long values |
| 597 | * in case they rounded up and out of bounds during |
| 598 | * the conversion. |
| 599 | */ |
| 600 | if (x >= 0 && y >= 0 && x < sw && y < sh && |
| 601 | WholeOfLong(xlong) < sw && |
| 602 | WholeOfLong(ylong) < sh) |
| 603 | { |
| 604 | void *pDst; |
| 605 | |
| 606 | if (pEdges[i + 2] > dx1) { |
| 607 | pEdges[i + 2] = dx1; |
| 608 | } |
| 609 | if (pEdges[i + 3] <= dx1) { |
| 610 | pEdges[i + 3] = dx1 + 1; |
| 611 | } |
| 612 | |
| 613 | /* Get IntArgbPre pixel data from source */ |
| 614 | (*pHelperFunc)(pSrcInfo, |
| 615 | pData, 1, |
| 616 | xlong, 0, |
| 617 | ylong, 0); |
| 618 | |
| 619 | /* Interpolate result pixels if needed */ |
| 620 | if (pInterpFunc) { |
| 621 | (*pInterpFunc)(pData, 1, |
| 622 | FractOfLong(xlong-LongOneHalf), 0, |
| 623 | FractOfLong(ylong-LongOneHalf), 0); |
| 624 | } |
| 625 | |
| 626 | /* Store/Composite interpolated pixels into dest */ |
| 627 | pDst = PtrCoord(pDstInfo->rasBase, |
| 628 | dx1, pDstInfo->pixelStride, |
| 629 | dy1, pDstInfo->scanStride); |
| 630 | (*pMaskBlitPrim->funcs.maskblit)(pDst, pData, |
| 631 | 0, 0, 0, |
| 632 | 1, 1, |
| 633 | pDstInfo, pSrcInfo, |
| 634 | pMaskBlitPrim, |
| 635 | pCompInfo); |
| 636 | } |
| 637 | |
| 638 | /* Increment to next input pixel */ |
| 639 | dx1++; |
| 640 | } |
| 641 | |
| 642 | /* Increment to next scanline */ |
| 643 | dy1++; |
| 644 | } |
| 645 | } |
| 646 | Region_EndIteration(env, pClipInfo); |
| 647 | } |
| 648 | |
| 649 | #define BL_INTERP_V1_to_V2_by_F(v1, v2, f) \ |
| 650 | (((v1)<<8) + ((v2)-(v1))*(f)) |
| 651 | |
| 652 | #define BL_ACCUM(comp) \ |
| 653 | do { \ |
| 654 | jint c1 = ((jubyte *) pRGB)[comp]; \ |
| 655 | jint c2 = ((jubyte *) pRGB)[comp+4]; \ |
| 656 | jint cR = BL_INTERP_V1_to_V2_by_F(c1, c2, xfactor); \ |
| 657 | c1 = ((jubyte *) pRGB)[comp+8]; \ |
| 658 | c2 = ((jubyte *) pRGB)[comp+12]; \ |
| 659 | c2 = BL_INTERP_V1_to_V2_by_F(c1, c2, xfactor); \ |
| 660 | cR = BL_INTERP_V1_to_V2_by_F(cR, c2, yfactor); \ |
| 661 | ((jubyte *)pRes)[comp] = (jubyte) ((cR + (1<<15)) >> 16); \ |
| 662 | } while (0) |
| 663 | |
| 664 | static void |
| 665 | BilinearInterp(jint *pRGB, jint numpix, |
| 666 | jint xfract, jint dxfract, |
| 667 | jint yfract, jint dyfract) |
| 668 | { |
| 669 | jint j; |
| 670 | jint *pRes = pRGB; |
| 671 | |
| 672 | for (j = 0; j < numpix; j++) { |
| 673 | jint xfactor; |
| 674 | jint yfactor; |
| 675 | xfactor = URShift(xfract, 32-8); |
| 676 | yfactor = URShift(yfract, 32-8); |
| 677 | BL_ACCUM(0); |
| 678 | BL_ACCUM(1); |
| 679 | BL_ACCUM(2); |
| 680 | BL_ACCUM(3); |
| 681 | pRes++; |
| 682 | pRGB += 4; |
| 683 | xfract += dxfract; |
| 684 | yfract += dyfract; |
| 685 | } |
| 686 | } |
| 687 | |
| 688 | #define SAT(val, max) \ |
| 689 | do { \ |
| 690 | val &= ~(val >> 31); /* negatives become 0 */ \ |
| 691 | val -= max; /* only overflows are now positive */ \ |
| 692 | val &= (val >> 31); /* positives become 0 */ \ |
| 693 | val += max; /* range is now [0 -> max] */ \ |
| 694 | } while (0) |
| 695 | |
| 696 | #ifdef __sparc |
| 697 | /* For sparc, floating point multiplies are faster than integer */ |
| 698 | #define BICUBIC_USE_DBL_LUT |
| 699 | #else |
| 700 | /* For x86, integer multiplies are faster than floating point */ |
| 701 | /* Note that on x86 Linux the choice of best algorithm varies |
| 702 | * depending on the compiler optimization and the processor type. |
| 703 | * Currently, the sun/awt x86 Linux builds are not optimized so |
| 704 | * all the variations produce mediocre performance. |
| 705 | * For now we will use the choice that works best for the Windows |
| 706 | * build until the (lack of) optimization issues on Linux are resolved. |
| 707 | */ |
| 708 | #define BICUBIC_USE_INT_MATH |
| 709 | #endif |
| 710 | |
| 711 | #ifdef BICUBIC_USE_DBL_CAST |
| 712 | |
| 713 | #define BC_DblToCoeff(v) (v) |
| 714 | #define BC_COEFF_ONE 1.0 |
| 715 | #define BC_TYPE jdouble |
| 716 | #define BC_V_HALF 0.5 |
| 717 | #define BC_CompToV(v) ((jdouble) (v)) |
| 718 | #define BC_STORE_COMPS(pRes) \ |
| 719 | do { \ |
| 720 | jint a = (jint) accumA; \ |
| 721 | jint r = (jint) accumR; \ |
| 722 | jint g = (jint) accumG; \ |
| 723 | jint b = (jint) accumB; \ |
| 724 | SAT(a, 255); \ |
| 725 | SAT(r, a); \ |
| 726 | SAT(g, a); \ |
| 727 | SAT(b, a); \ |
| 728 | *pRes = ((a << 24) | (r << 16) | (g << 8) | (b)); \ |
| 729 | } while (0) |
| 730 | |
| 731 | #endif /* BICUBIC_USE_DBL_CAST */ |
| 732 | |
| 733 | #ifdef BICUBIC_USE_DBL_LUT |
| 734 | |
| 735 | #define ItoD1(v) ((jdouble) (v)) |
| 736 | #define ItoD4(v) ItoD1(v), ItoD1(v+1), ItoD1(v+2), ItoD1(v+3) |
| 737 | #define ItoD16(v) ItoD4(v), ItoD4(v+4), ItoD4(v+8), ItoD4(v+12) |
| 738 | #define ItoD64(v) ItoD16(v), ItoD16(v+16), ItoD16(v+32), ItoD16(v+48) |
| 739 | |
| 740 | static jdouble ItoD_table[] = { |
| 741 | ItoD64(0), ItoD64(64), ItoD64(128), ItoD64(192) |
| 742 | }; |
| 743 | |
| 744 | #define BC_DblToCoeff(v) (v) |
| 745 | #define BC_COEFF_ONE 1.0 |
| 746 | #define BC_TYPE jdouble |
| 747 | #define BC_V_HALF 0.5 |
| 748 | #define BC_CompToV(v) ItoD_table[v] |
| 749 | #define BC_STORE_COMPS(pRes) \ |
| 750 | do { \ |
| 751 | jint a = (jint) accumA; \ |
| 752 | jint r = (jint) accumR; \ |
| 753 | jint g = (jint) accumG; \ |
| 754 | jint b = (jint) accumB; \ |
| 755 | SAT(a, 255); \ |
| 756 | SAT(r, a); \ |
| 757 | SAT(g, a); \ |
| 758 | SAT(b, a); \ |
| 759 | *pRes = ((a << 24) | (r << 16) | (g << 8) | (b)); \ |
| 760 | } while (0) |
| 761 | |
| 762 | #endif /* BICUBIC_USE_DBL_LUT */ |
| 763 | |
| 764 | #ifdef BICUBIC_USE_INT_MATH |
| 765 | |
| 766 | #define BC_DblToCoeff(v) ((jint) ((v) * 256)) |
| 767 | #define BC_COEFF_ONE 256 |
| 768 | #define BC_TYPE jint |
| 769 | #define BC_V_HALF (1 << 15) |
| 770 | #define BC_CompToV(v) ((jint) v) |
| 771 | #define BC_STORE_COMPS(pRes) \ |
| 772 | do { \ |
| 773 | accumA >>= 16; \ |
| 774 | accumR >>= 16; \ |
| 775 | accumG >>= 16; \ |
| 776 | accumB >>= 16; \ |
| 777 | SAT(accumA, 255); \ |
| 778 | SAT(accumR, accumA); \ |
| 779 | SAT(accumG, accumA); \ |
| 780 | SAT(accumB, accumA); \ |
| 781 | *pRes = ((accumA << 24) | (accumR << 16) | (accumG << 8) | (accumB)); \ |
| 782 | } while (0) |
| 783 | |
| 784 | #endif /* BICUBIC_USE_INT_MATH */ |
| 785 | |
| 786 | #define BC_ACCUM(index, ycindex, xcindex) \ |
| 787 | do { \ |
| 788 | BC_TYPE factor = bicubic_coeff[xcindex] * bicubic_coeff[ycindex]; \ |
| 789 | int rgb; \ |
| 790 | rgb = pRGB[index]; \ |
| 791 | accumB += BC_CompToV((rgb >> 0) & 0xff) * factor; \ |
| 792 | accumG += BC_CompToV((rgb >> 8) & 0xff) * factor; \ |
| 793 | accumR += BC_CompToV((rgb >> 16) & 0xff) * factor; \ |
| 794 | accumA += BC_CompToV((rgb >> 24) & 0xff) * factor; \ |
| 795 | } while (0) |
| 796 | |
| 797 | static BC_TYPE bicubic_coeff[513]; |
| 798 | static jboolean bicubictableinited; |
| 799 | |
| 800 | static void |
| 801 | init_bicubic_table(jdouble A) |
| 802 | { |
| 803 | /* |
| 804 | * The following formulas are designed to give smooth |
| 805 | * results when 'A' is -0.5 or -1.0. |
| 806 | */ |
| 807 | int i; |
| 808 | for (i = 0; i < 256; i++) { |
| 809 | /* r(x) = (A + 2)|x|^3 - (A + 3)|x|^2 + 1 , 0 <= |x| < 1 */ |
| 810 | jdouble x = i / 256.0; |
| 811 | x = ((A+2)*x - (A+3))*x*x + 1; |
| 812 | bicubic_coeff[i] = BC_DblToCoeff(x); |
| 813 | } |
| 814 | |
| 815 | for (; i < 384; i++) { |
| 816 | /* r(x) = A|x|^3 - 5A|x|^2 + 8A|x| - 4A , 1 <= |x| < 2 */ |
| 817 | jdouble x = i / 256.0; |
| 818 | x = ((A*x - 5*A)*x + 8*A)*x - 4*A; |
| 819 | bicubic_coeff[i] = BC_DblToCoeff(x); |
| 820 | } |
| 821 | |
| 822 | bicubic_coeff[384] = (BC_COEFF_ONE - bicubic_coeff[128]*2) / 2; |
| 823 | |
| 824 | for (i++; i <= 512; i++) { |
| 825 | bicubic_coeff[i] = BC_COEFF_ONE - (bicubic_coeff[512-i] + |
| 826 | bicubic_coeff[i-256] + |
| 827 | bicubic_coeff[768-i]); |
| 828 | } |
| 829 | |
| 830 | bicubictableinited = JNI_TRUE; |
| 831 | } |
| 832 | |
| 833 | static void |
| 834 | BicubicInterp(jint *pRGB, jint numpix, |
| 835 | jint xfract, jint dxfract, |
| 836 | jint yfract, jint dyfract) |
| 837 | { |
| 838 | jint i; |
| 839 | jint *pRes = pRGB; |
| 840 | |
| 841 | if (!bicubictableinited) { |
| 842 | init_bicubic_table(-0.5); |
| 843 | } |
| 844 | |
| 845 | for (i = 0; i < numpix; i++) { |
| 846 | BC_TYPE accumA, accumR, accumG, accumB; |
| 847 | jint xfactor, yfactor; |
| 848 | |
| 849 | xfactor = URShift(xfract, 32-8); |
| 850 | yfactor = URShift(yfract, 32-8); |
| 851 | accumA = accumR = accumG = accumB = BC_V_HALF; |
| 852 | BC_ACCUM(0, yfactor+256, xfactor+256); |
| 853 | BC_ACCUM(1, yfactor+256, xfactor+ 0); |
| 854 | BC_ACCUM(2, yfactor+256, 256-xfactor); |
| 855 | BC_ACCUM(3, yfactor+256, 512-xfactor); |
| 856 | BC_ACCUM(4, yfactor+ 0, xfactor+256); |
| 857 | BC_ACCUM(5, yfactor+ 0, xfactor+ 0); |
| 858 | BC_ACCUM(6, yfactor+ 0, 256-xfactor); |
| 859 | BC_ACCUM(7, yfactor+ 0, 512-xfactor); |
| 860 | BC_ACCUM(8, 256-yfactor, xfactor+256); |
| 861 | BC_ACCUM(9, 256-yfactor, xfactor+ 0); |
| 862 | BC_ACCUM(10, 256-yfactor, 256-xfactor); |
| 863 | BC_ACCUM(11, 256-yfactor, 512-xfactor); |
| 864 | BC_ACCUM(12, 512-yfactor, xfactor+256); |
| 865 | BC_ACCUM(13, 512-yfactor, xfactor+ 0); |
| 866 | BC_ACCUM(14, 512-yfactor, 256-xfactor); |
| 867 | BC_ACCUM(15, 512-yfactor, 512-xfactor); |
| 868 | BC_STORE_COMPS(pRes); |
| 869 | pRes++; |
| 870 | pRGB += 16; |
| 871 | xfract += dxfract; |
| 872 | yfract += dyfract; |
| 873 | } |
| 874 | } |
| 875 | |
| 876 | #ifdef MAKE_STUBS |
| 877 | |
| 878 | static void |
| 879 | BilinearInterpStub(jint *pRGBbase, jint numpix, |
| 880 | jint xfract, jint dxfract, |
| 881 | jint yfract, jint dyfract) |
| 882 | { |
| 883 | jint *pRGB = pRGBbase; |
| 884 | while (--numpix >= 0) { |
| 885 | *pRGBbase = *pRGB; |
| 886 | pRGBbase += 1; |
| 887 | pRGB += 4; |
| 888 | } |
| 889 | } |
| 890 | |
| 891 | static void |
| 892 | BicubicInterpStub(jint *pRGBbase, jint numpix, |
| 893 | jint xfract, jint dxfract, |
| 894 | jint yfract, jint dyfract) |
| 895 | { |
| 896 | jint *pRGB = pRGBbase+5; |
| 897 | while (--numpix >= 0) { |
| 898 | *pRGBbase = *pRGB; |
| 899 | pRGBbase += 1; |
| 900 | pRGB += 16; |
| 901 | } |
| 902 | } |
| 903 | |
| 904 | #endif /* MAKE_STUBS */ |
| 905 | |