1/*
2 * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26#include <math.h>
27
28#include "jni_util.h"
29#include "GraphicsPrimitiveMgr.h"
30#include "Region.h"
31
32#include "sun_java2d_loops_ScaledBlit.h"
33
34/*
35 * The scaling loops used inside the helper functions are based on the
36 * following pseudocode for stepping through the source image:
37 *
38 * shift - number of bits of sub-pixel precision in scaled values
39 * srcxorig, srcyorig - scaled location of first pixel
40 * srcxinc, srcyinc - scaled x and y increments
41 * dstwidth, dstheight - number of pixels to process across and down
42 *
43 * 1. srcy = srcyorig;
44 * 2. for (dstheight) {
45 * 3. srcx = srcxorig;
46 * 4. for (dstwidth) {
47 * 5. fetch and process pixel for (srcx >> shift, srcy >> shift)
48 * 6. srcx += srcxinc;
49 * 7. }
50 * 8. srcy += srcyinc;
51 * 9. }
52 *
53 * Note that each execution of line 6 or 8 accumulates error of
54 * +/- 1 into the scaled coordinate variables. These lines are
55 * each executed once per pixel across or once per pixel down
56 * the region being iterated over, thus the error can accumulate
57 * up to a magnitude of dstwidth in the horizontal direction and
58 * dstheight in the vertical direction.
59 *
60 * If the error ever reaches a magnitude of (1 << shift) then we
61 * will be off by at least 1 source pixel in our mapping.
62 *
63 * Note that we increment the source coordinates by the srcxinc
64 * and srcyinc variables in each step. Thus, if our error ever
65 * accumulates to a magnitude equal to srcxinc or srcyinc then
66 * we will be ahead or behind of "where we should be" by at least
67 * one iteration. Since each iteration is a destination pixel,
68 * this means that our actual location will be off by at least
69 * one destination pixel.
70 *
71 * This means that all of the values:
72 *
73 * - (1 << shift)
74 * - srcxinc
75 * - srcyinc
76 *
77 * all represent a maximum bound on how much error we can accumulate
78 * before we are off by a source or a destination pixel. Thus,
79 * we should make sure that we never process more than that many
80 * pixels if we want to maintain single pixel accuracy. Even
81 * better would be to process many fewer pixels than those bounds
82 * to ensure that our accumulated error is much smaller than a
83 * pixel.
84 */
85
86/*
87 * Find and return the largest tile size that is a power of 2 and
88 * which is small enough to yield some reassuring degree of subpixel
89 * accuracy. The degree of subpixel accuracy that will be preserved
90 * by the tile size it chooses will vary and the details on how
91 * it makes this decision are detailed in the comments below.
92 */
93static jint
94findpow2tilesize(jint shift, jint sxinc, jint syinc)
95{
96 /*
97 * The initial value of shift is our first estimate for
98 * the power of 2 for our tilesize since it ensures
99 * less than 1 source pixel of error.
100 *
101 * Reducing it until (1 << shift) is not larger than the
102 * smallest of our increments ensures we will have no more
103 * than 1 destination pixel of error as well.
104 */
105 if (sxinc > syinc) {
106 sxinc = syinc;
107 }
108 if (sxinc == 0) {
109 /* Degenerate case will cause infinite loop in next loop... */
110 return 1;
111 }
112 while ((1 << shift) > sxinc) {
113 shift--;
114 }
115 /*
116 * shift is now the largest it can be for less than 1 pixel
117 * of error in either source or destination spaces.
118 *
119 * Now we will try for at least 8 bits of subpixel accuracy
120 * with a tile size of at least 256x256 and reduce our subpixel
121 * accuracy on a sliding scale down to a tilesize of 1x1 when
122 * we have no bits of sub-pixel accuracy.
123 */
124 if (shift >= 16) {
125 /* Subtracting 8 asks for 8 bits of subpixel accuracy. */
126 shift -= 8;
127 } else {
128 /* Ask for half of the remaining bits to be subpixel accuracy. */
129 /* Rounding is in favor of subpixel accuracy over tile size. */
130 /* Worst case, shift == 0 and tilesize == (1 << 0) == 1 */
131 shift /= 2;
132 }
133 return (1 << shift);
134}
135
136/*
137 * For a given integer destination pixel coordinate "id", calculate the
138 * integer destination coordinate of the start of the "ts" sized tile
139 * in which it resides.
140 * Tiles all start at even multiples of the tile size from the integer
141 * destination origin "io".
142 *
143 * id == integer destination coordinate
144 * io == integer destination operation origin
145 * ts == tilesize (must be power of 2)
146 */
147#define TILESTART(id, io, ts) ((io) + (((id)-(io)) & (~((ts)-1))))
148
149/*
150 * For a given integer destination pixel coordinate "id", calculate the
151 * sub-pixel accurate source coordinate from which its sample comes.
152 * The returned source coordinate is expressed in a shifted fractional
153 * arithmetic number system.
154 *
155 * id == integer destination coordinate
156 * fo == floating point destination operation origin,
157 * sf == source coordinate scale factor per destination pixel
158 * (multiplied by fractional arithmetic "shift")
159 *
160 * The caller is required to cast this value to the appropriate
161 * integer type for the needed precision. The rendering code which
162 * deals only with valid coordinates within the bounds of the source
163 * rectangle uses jint. The setup code, which occasionally deals
164 * with coordinates that run out of bounds, uses jlong.
165 *
166 * Note that the rounding in this calculation is at a fraction of a
167 * source pixel of (1.0 / (1<<shift)) since the scale factor includes
168 * the fractional shift. As a result, the type of rounding used is
169 * not very significant (floor, floor(x+.5), or ceil(x-.5)), but the
170 * ceil(x-.5) version is used for consistency with the way that pixel
171 * coordinates are rounded to assign the ".5" value to the lower
172 * integer.
173 */
174#define SRCLOC(id, fo, sf) (ceil((((id) + 0.5) - (fo)) * (sf) - 0.5))
175
176/*
177 * Reverse map a srctarget coordinate into device space and refine the
178 * answer. More specifically, what we are looking for is the smallest
179 * destination coordinate that maps to a source coordinate that is
180 * greater than or equal to the given target source coordinate.
181 *
182 * Note that since the inner loops use math that maps a destination
183 * coordinate into source space and that, even though the equation
184 * we use below is the theoretical inverse of the dst->src mapping,
185 * we cannot rely on floating point math to guarantee that applying
186 * both of these equations in sequence will give us an exact mapping
187 * of src->dst->src. Thus, we must search back and forth to see if
188 * we really map back to the given source coordinate and that we are
189 * the smallest destination coordinate that does so.
190 *
191 * Note that, in practice, the answer from the initial guess tends to
192 * be the right answer most of the time and the loop ends up finding
193 * one iteration to be ">= srctarget" and the next to be "< srctarget"
194 * and thus finds the answer in 2 iterations. A small number of
195 * times, the initial guess is 1 too low and so we do one iteration
196 * at "< srctarget" and the next at ">= srctarget" and again find the
197 * answer in 2 iterations. All cases encountered during testing ended
198 * up falling into one of those 2 categories and so the loop was always
199 * executed exactly twice.
200 *
201 * Note also that the calculation of srcloc below may attempt to calculate
202 * the src location of the destination pixel which is "1 beyond" the
203 * end of the source image. Since our shift calculation code in the
204 * main function only guaranteed that "srcw << shift" did not overflow
205 * a 32-bit signed integer, we cannot guarantee that "(srcw+1) << shift"
206 * or, more generally, "(srcw << shift)+srcinc" does not overflow.
207 * As a result, we perform our calculations here with jlong values
208 * so that we aren't affected by this overflow. Since srcw (shifted)
209 * and srcinc are both 32-bit values, their sum cannot possibly overflow
210 * a jlong. In fact, we can step up to a couple of billion steps of
211 * size "srcinc" past the end of the image before we have to worry
212 * about overflow - in practice, though, the search never steps more
213 * than 1 past the end of the image so this buffer is more than enough.
214 */
215static jint
216refine(jint intorigin, jdouble dblorigin, jint tilesize,
217 jdouble scale, jint srctarget, jint srcinc)
218{
219 /* Make a first estimate of dest coordinate from srctarget */
220 jint dstloc = (jint) ceil(dblorigin + srctarget / scale - 0.5);
221 /* Loop until we get at least one value < and one >= the target */
222 jboolean wasneg = JNI_FALSE;
223 jboolean waspos = JNI_FALSE;
224 jlong lsrcinc = srcinc;
225 jlong lsrctarget = srctarget;
226
227 while (JNI_TRUE) {
228 /*
229 * Find src coordinate from dest coordinate using the same
230 * math we will use below when iterating over tiles.
231 */
232 jint tilestart = TILESTART(dstloc, intorigin, tilesize);
233 jlong lsrcloc = (jlong) SRCLOC(tilestart, dblorigin, scale);
234 if (dstloc > tilestart) {
235 lsrcloc += lsrcinc * ((jlong) dstloc - tilestart);
236 }
237 if (lsrcloc >= lsrctarget) {
238 /*
239 * If we were previously less than target, then the current
240 * dstloc is the smallest dst which maps >= the target.
241 */
242 if (wasneg) break;
243 dstloc--;
244 waspos = JNI_TRUE;
245 } else {
246 /*
247 * If we were previously greater than target, then this must
248 * be the first dstloc which maps to < the target. Since we
249 * want the smallest which maps >= the target, increment it
250 * first before returning.
251 */
252 dstloc++;
253 if (waspos) break;
254 wasneg = JNI_TRUE;
255 }
256 }
257 return dstloc;
258}
259
260/*
261 * Class: sun_java2d_loops_ScaledBlit
262 * Method: Scale
263 * Signature: (Lsun/java2d/SurfaceData;Lsun/java2d/SurfaceData;Ljava/awt/Composite;Lsun/java2d/pipe/Region;IIIIDDDD)V
264 */
265JNIEXPORT void JNICALL
266Java_sun_java2d_loops_ScaledBlit_Scale
267 (JNIEnv *env, jobject self,
268 jobject srcData, jobject dstData,
269 jobject comp, jobject clip,
270 jint sx1, jint sy1, jint sx2, jint sy2,
271 jdouble ddx1, jdouble ddy1, jdouble ddx2, jdouble ddy2)
272{
273 SurfaceDataOps *srcOps;
274 SurfaceDataOps *dstOps;
275 SurfaceDataRasInfo srcInfo;
276 SurfaceDataRasInfo dstInfo;
277 NativePrimitive *pPrim;
278 CompositeInfo compInfo;
279 jint sxinc, syinc, shift;
280 jint tilesize;
281 jint idx1, idy1;
282 jdouble scalex, scaley;
283 RegionData clipInfo;
284 jint dstFlags;
285 jboolean xunderflow, yunderflow;
286
287 pPrim = GetNativePrim(env, self);
288 if (pPrim == NULL) {
289 return;
290 }
291 if (pPrim->pCompType->getCompInfo != NULL) {
292 (*pPrim->pCompType->getCompInfo)(env, &compInfo, comp);
293 }
294 if (Region_GetInfo(env, clip, &clipInfo)) {
295 return;
296 }
297
298 srcOps = SurfaceData_GetOps(env, srcData);
299 if (srcOps == 0) {
300 return;
301 }
302 dstOps = SurfaceData_GetOps(env, dstData);
303 if (dstOps == 0) {
304 return;
305 }
306
307 /*
308 * Determine the precision to use for the fixed point math
309 * for the coordinate scaling.
310 * - OR together srcw and srch to get the MSB between the two
311 * - Next shift it up until it goes negative
312 * - Count the shifts and that will be the most accurate
313 * precision available for the fixed point math
314 * - a source coordinate of 1.0 will be (1 << shift)
315 * - srcw & srch will be (srcw << shift) and (srch << shift)
316 * and will not overflow
317 * Note that if srcw or srch are so large that they are
318 * negative numbers before shifting, then:
319 * - shift will be 0
320 * - tilesize will end up being 1x1 tiles
321 * - we will brute force calculate the source location
322 * of every destination pixel using the TILESTART and
323 * SRCLOC macros in this function and then call the
324 * scale helper function to copy one pixel at a time.
325 * - TILESTART involves mostly jdouble calculations so
326 * it should not have integer overflow problems.
327 */
328 sxinc = (sx2 - sx1) | (sy2 - sy1);
329 shift = 0;
330 if (sxinc > 0) {
331 while ((sxinc <<= 1) > 0) {
332 shift++;
333 }
334 }
335 /*
336 * Now determine the scaled integer increments used to traverse
337 * the source image for each destination pixel. Our shift value
338 * has been calculated above so that any location within the
339 * destination image can be represented as a scaled integer
340 * without incurring integer overflow.
341 *
342 * But we also need to worry about overflow of the sxinc and syinc
343 * parameters. We already know that "srcw<<shift" and "srch<<shift"
344 * cannot overflow a jint, and the only time that sxinc and syinc
345 * can be larger than those two values is if ddy2-ddy1 or ddx2-ddx1
346 * are smaller than 1. Since this situation implies that the
347 * output area is no more than one pixel wide or tall, then we are
348 * stepping by distances that are at least the size of the image
349 * and only one destination pixel will ever be rendered - thus the
350 * amount by which we step is largely irrelevant since after
351 * drawing the first "in bounds" pixel, we will step completely
352 * out of the source image and render nothing more. As a result,
353 * we assign the appropriate "size of image" stepping parameter
354 * for any scale to smaller than one device pixel.
355 */
356 yunderflow = (ddy2 - ddy1) < 1.0;
357 scaley = (((jdouble) (sy2 - sy1)) / (ddy2 - ddy1)) * (1 << shift);
358 syinc = (yunderflow ? ((sy2 - sy1) << shift) : (jint) scaley);
359 xunderflow = (ddx2 - ddx1) < 1.0;
360 scalex = (((jdouble) (sx2 - sx1)) / (ddx2 - ddx1)) * (1 << shift);
361 sxinc = (xunderflow ? ((sx2 - sx1) << shift) : (jint) scalex);
362 tilesize = findpow2tilesize(shift, sxinc, syinc);
363
364
365 srcInfo.bounds.x1 = sx1;
366 srcInfo.bounds.y1 = sy1;
367 srcInfo.bounds.x2 = sx2;
368 srcInfo.bounds.y2 = sy2;
369 if (srcOps->Lock(env, srcOps, &srcInfo, pPrim->srcflags) != SD_SUCCESS) {
370 return;
371 }
372 if (srcInfo.bounds.x2 <= srcInfo.bounds.x1 ||
373 srcInfo.bounds.y2 <= srcInfo.bounds.y1)
374 {
375 SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
376 return;
377 }
378
379 /*
380 * Only refine lower bounds if lower source coordinate was clipped
381 * because the math will work out to be exactly idx1, idy1 if not.
382 * Always refine upper bounds since we want to make sure not to
383 * overstep the source bounds based on the tiled iteration math.
384 *
385 * For underflow cases, simply check if the SRCLOC for the single
386 * destination pixel maps inside the source bounds. If it does,
387 * we render that pixel row or column (and only that pixel row
388 * or column). If it does not, we render nothing.
389 */
390 idx1 = (jint) ceil(ddx1 - 0.5);
391 idy1 = (jint) ceil(ddy1 - 0.5);
392 if (xunderflow) {
393 jdouble x = sx1 + (SRCLOC(idx1, ddx1, scalex) / (1 << shift));
394 dstInfo.bounds.x1 = dstInfo.bounds.x2 = idx1;
395 if (x >= srcInfo.bounds.x1 && x < srcInfo.bounds.x2) {
396 dstInfo.bounds.x2++;
397 }
398 } else {
399 dstInfo.bounds.x1 = ((srcInfo.bounds.x1 <= sx1)
400 ? idx1
401 : refine(idx1, ddx1, tilesize, scalex,
402 (srcInfo.bounds.x1-sx1) << shift, sxinc));
403 dstInfo.bounds.x2 = refine(idx1, ddx1, tilesize, scalex,
404 (srcInfo.bounds.x2-sx1) << shift, sxinc);
405 }
406 if (yunderflow) {
407 jdouble y = sy1 + (SRCLOC(idy1, ddy1, scaley) / (1 << shift));
408 dstInfo.bounds.y1 = dstInfo.bounds.y2 = idy1;
409 if (y >= srcInfo.bounds.y1 && y < srcInfo.bounds.y2) {
410 dstInfo.bounds.y2++;
411 }
412 } else {
413 dstInfo.bounds.y1 = ((srcInfo.bounds.y1 <= sy1)
414 ? idy1
415 : refine(idy1, ddy1, tilesize, scaley,
416 (srcInfo.bounds.y1-sy1) << shift, syinc));
417 dstInfo.bounds.y2 = refine(idy1, ddy1, tilesize, scaley,
418 (srcInfo.bounds.y2-sy1) << shift, syinc);
419 }
420
421 SurfaceData_IntersectBounds(&dstInfo.bounds, &clipInfo.bounds);
422 dstFlags = pPrim->dstflags;
423 if (!Region_IsRectangular(&clipInfo)) {
424 dstFlags |= SD_LOCK_PARTIAL_WRITE;
425 }
426 if (dstOps->Lock(env, dstOps, &dstInfo, dstFlags) != SD_SUCCESS) {
427 SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
428 return;
429 }
430
431 if (dstInfo.bounds.x2 > dstInfo.bounds.x1 &&
432 dstInfo.bounds.y2 > dstInfo.bounds.y1)
433 {
434 srcOps->GetRasInfo(env, srcOps, &srcInfo);
435 dstOps->GetRasInfo(env, dstOps, &dstInfo);
436 if (srcInfo.rasBase && dstInfo.rasBase) {
437 SurfaceDataBounds span;
438 void *pSrc = PtrCoord(srcInfo.rasBase,
439 sx1, srcInfo.pixelStride,
440 sy1, srcInfo.scanStride);
441
442 Region_IntersectBounds(&clipInfo, &dstInfo.bounds);
443 Region_StartIteration(env, &clipInfo);
444 if (tilesize >= (ddx2 - ddx1) &&
445 tilesize >= (ddy2 - ddy1))
446 {
447 /* Do everything in one tile */
448 jint sxloc = (jint) SRCLOC(idx1, ddx1, scalex);
449 jint syloc = (jint) SRCLOC(idy1, ddy1, scaley);
450 while (Region_NextIteration(&clipInfo, &span)) {
451 jint tsxloc = sxloc;
452 jint tsyloc = syloc;
453 void *pDst;
454
455 if (span.y1 > idy1) {
456 tsyloc += syinc * (span.y1 - idy1);
457 }
458 if (span.x1 > idx1) {
459 tsxloc += sxinc * (span.x1 - idx1);
460 }
461
462 pDst = PtrCoord(dstInfo.rasBase,
463 span.x1, dstInfo.pixelStride,
464 span.y1, dstInfo.scanStride);
465 (*pPrim->funcs.scaledblit)(pSrc, pDst,
466 span.x2-span.x1, span.y2-span.y1,
467 tsxloc, tsyloc,
468 sxinc, syinc, shift,
469 &srcInfo, &dstInfo,
470 pPrim, &compInfo);
471 }
472 } else {
473 /* Break each clip span into tiles for better accuracy. */
474 while (Region_NextIteration(&clipInfo, &span)) {
475 jint tilex, tiley;
476 jint sxloc, syloc;
477 jint x1, y1, x2, y2;
478 void *pDst;
479
480 for (tiley = TILESTART(span.y1, idy1, tilesize);
481 tiley < span.y2;
482 tiley += tilesize)
483 {
484 /* Clip span to Y range of current tile */
485 y1 = tiley;
486 y2 = tiley + tilesize;
487 if (y1 < span.y1) y1 = span.y1;
488 if (y2 > span.y2) y2 = span.y2;
489
490 /* Find scaled source coordinate of first pixel */
491 syloc = (jint) SRCLOC(tiley, ddy1, scaley);
492 if (y1 > tiley) {
493 syloc += syinc * (y1 - tiley);
494 }
495
496 for (tilex = TILESTART(span.x1, idx1, tilesize);
497 tilex < span.x2;
498 tilex += tilesize)
499 {
500 /* Clip span to X range of current tile */
501 x1 = tilex;
502 x2 = tilex + tilesize;
503 if (x1 < span.x1) x1 = span.x1;
504 if (x2 > span.x2) x2 = span.x2;
505
506 /* Find scaled source coordinate of first pixel */
507 sxloc = (jint) SRCLOC(tilex, ddx1, scalex);
508 if (x1 > tilex) {
509 sxloc += sxinc * (x1 - tilex);
510 }
511
512 pDst = PtrCoord(dstInfo.rasBase,
513 x1, dstInfo.pixelStride,
514 y1, dstInfo.scanStride);
515 (*pPrim->funcs.scaledblit)(pSrc, pDst, x2-x1, y2-y1,
516 sxloc, syloc,
517 sxinc, syinc, shift,
518 &srcInfo, &dstInfo,
519 pPrim, &compInfo);
520 }
521 }
522 }
523 }
524 Region_EndIteration(env, &clipInfo);
525 }
526 SurfaceData_InvokeRelease(env, dstOps, &dstInfo);
527 SurfaceData_InvokeRelease(env, srcOps, &srcInfo);
528 }
529 SurfaceData_InvokeUnlock(env, dstOps, &dstInfo);
530 SurfaceData_InvokeUnlock(env, srcOps, &srcInfo);
531}
532