| 1 | /* |
| 2 | This code does smooth scaling of a pixmap. |
| 3 | |
| 4 | This function returns a new pixmap representing the area starting at (0,0) |
| 5 | given by taking the source pixmap src, scaling it to width w, and height h, |
| 6 | and then positioning it at (frac(x),frac(y)). |
| 7 | |
| 8 | This is a cut-down version of draw_scale.c that only copes with filters |
| 9 | that return values strictly in the 0..1 range, and uses bytes for |
| 10 | intermediate results rather than ints. |
| 11 | */ |
| 12 | |
| 13 | #include "mupdf/fitz.h" |
| 14 | #include "draw-imp.h" |
| 15 | |
| 16 | #include <math.h> |
| 17 | #include <string.h> |
| 18 | #include <assert.h> |
| 19 | #include <limits.h> |
| 20 | |
| 21 | /* Do we special case handling of single pixel high/wide images? The |
| 22 | * 'purest' handling is given by not special casing them, but certain |
| 23 | * files that use such images 'stack' them to give full images. Not |
| 24 | * special casing them results in then being fainter and giving noticeable |
| 25 | * rounding errors. |
| 26 | */ |
| 27 | #define SINGLE_PIXEL_SPECIALS |
| 28 | |
| 29 | /* |
| 30 | Consider a row of source samples, src, of width src_w, positioned at x, |
| 31 | scaled to width dst_w. |
| 32 | |
| 33 | src[i] is centred at: x + (i + 0.5)*dst_w/src_w |
| 34 | |
| 35 | Therefore the distance between the centre of the jth output pixel and |
| 36 | the centre of the ith source sample is: |
| 37 | |
| 38 | dist[j,i] = j + 0.5 - (x + (i + 0.5)*dst_w/src_w) |
| 39 | |
| 40 | When scaling up, therefore: |
| 41 | |
| 42 | dst[j] = SUM(filter(dist[j,i]) * src[i]) |
| 43 | (for all ints i) |
| 44 | |
| 45 | This can be simplified by noticing that filters are only non zero within |
| 46 | a given filter width (henceforth called W). So: |
| 47 | |
| 48 | dst[j] = SUM(filter(dist[j,i]) * src[i]) |
| 49 | (for ints i, s.t. (j*src_w/dst_w)-W < i < (j*src_w/dst_w)+W) |
| 50 | |
| 51 | When scaling down, each filtered source sample is stretched to be wider |
| 52 | to avoid aliasing issues. This effectively reduces the distance between |
| 53 | centres. |
| 54 | |
| 55 | dst[j] = SUM(filter(dist[j,i] * F) * F * src[i]) |
| 56 | (where F = dst_w/src_w) |
| 57 | (for ints i, s.t. (j-W)/F < i < (j+W)/F) |
| 58 | |
| 59 | */ |
| 60 | |
| 61 | typedef struct fz_scale_filter_s fz_scale_filter; |
| 62 | |
| 63 | struct fz_scale_filter_s |
| 64 | { |
| 65 | int width; |
| 66 | float (*fn)(fz_scale_filter *, float); |
| 67 | }; |
| 68 | |
| 69 | /* Image scale filters */ |
| 70 | |
| 71 | static float |
| 72 | triangle(fz_scale_filter *filter, float f) |
| 73 | { |
| 74 | if (f >= 1) |
| 75 | return 0; |
| 76 | return 1-f; |
| 77 | } |
| 78 | |
| 79 | static float |
| 80 | box(fz_scale_filter *filter, float f) |
| 81 | { |
| 82 | if (f >= 0.5f) |
| 83 | return 0; |
| 84 | return 1; |
| 85 | } |
| 86 | |
| 87 | static float |
| 88 | simple(fz_scale_filter *filter, float x) |
| 89 | { |
| 90 | if (x >= 1) |
| 91 | return 0; |
| 92 | return 1 + (2*x - 3)*x*x; |
| 93 | } |
| 94 | |
| 95 | fz_scale_filter fz_scale_filter_box = { 1, box }; |
| 96 | fz_scale_filter fz_scale_filter_triangle = { 1, triangle }; |
| 97 | fz_scale_filter fz_scale_filter_simple = { 1, simple }; |
| 98 | |
| 99 | /* |
| 100 | We build ourselves a set of tables to contain the precalculated weights |
| 101 | for a given set of scale settings. |
| 102 | |
| 103 | The first dst_w entries in index are the index into index of the |
| 104 | sets of weight for each destination pixel. |
| 105 | |
| 106 | Each of the sets of weights is a set of values consisting of: |
| 107 | the minimum source pixel index used for this destination pixel |
| 108 | the number of weights used for this destination pixel |
| 109 | the weights themselves |
| 110 | |
| 111 | So to calculate dst[i] we do the following: |
| 112 | |
| 113 | weights = &index[index[i]]; |
| 114 | min = *weights++; |
| 115 | len = *weights++; |
| 116 | dst[i] = 0; |
| 117 | while (--len > 0) |
| 118 | dst[i] += src[min++] * *weights++ |
| 119 | |
| 120 | in addition, we guarantee that at the end of this process weights will now |
| 121 | point to the weights value for dst pixel i+1. |
| 122 | |
| 123 | In the simplest version of this algorithm, we would scale the whole image |
| 124 | horizontally first into a temporary buffer, then scale that temporary |
| 125 | buffer again vertically to give us our result. Using such a simple |
| 126 | algorithm would mean that could use the same style of weights for both |
| 127 | horizontal and vertical scaling. |
| 128 | |
| 129 | Unfortunately, this would also require a large temporary buffer, |
| 130 | particularly in the case where we are scaling up. |
| 131 | |
| 132 | We therefore modify the algorithm as follows; we scale scanlines from the |
| 133 | source image horizontally into a temporary buffer, until we have all the |
| 134 | contributors for a given output scanline. We then produce that output |
| 135 | scanline from the temporary buffer. In this way we restrict the height |
| 136 | of the temporary buffer to a small fraction of the final size. |
| 137 | |
| 138 | Unfortunately, this means that the pseudo code for recombining a |
| 139 | scanline of fully scaled pixels is as follows: |
| 140 | |
| 141 | weights = &index[index[y]]; |
| 142 | min = *weights++; |
| 143 | len = *weights++; |
| 144 | for (x=0 to dst_w) |
| 145 | min2 = min |
| 146 | len2 = len |
| 147 | weights2 = weights |
| 148 | dst[x] = 0; |
| 149 | while (--len2 > 0) |
| 150 | dst[x] += temp[x][(min2++) % tmp_buf_height] * *weights2++ |
| 151 | |
| 152 | i.e. it requires a % operation for every source pixel - this is typically |
| 153 | expensive. |
| 154 | |
| 155 | To avoid this, we alter the order in which vertical weights are stored, |
| 156 | so that they are ordered in the same order as the temporary buffer lines |
| 157 | would appear. This simplifies the algorithm to: |
| 158 | |
| 159 | weights = &index[index[y]]; |
| 160 | min = *weights++; |
| 161 | len = *weights++; |
| 162 | for (x=0 to dst_w) |
| 163 | min2 = 0 |
| 164 | len2 = len |
| 165 | weights2 = weights |
| 166 | dst[x] = 0; |
| 167 | while (--len2 > 0) |
| 168 | dst[x] += temp[i][min2++] * *weights2++ |
| 169 | |
| 170 | This means that len may be larger than it needs to be (due to the |
| 171 | possible inclusion of a zero weight row or two), but in practise this |
| 172 | is only an increase of 1 or 2 at worst. |
| 173 | |
| 174 | We implement this by generating the weights as normal (but ensuring we |
| 175 | leave enough space) and then reordering afterwards. |
| 176 | |
| 177 | */ |
| 178 | |
| 179 | typedef struct fz_weights_s fz_weights; |
| 180 | |
| 181 | /* This structure is accessed from ARM code - bear this in mind before |
| 182 | * altering it! */ |
| 183 | struct fz_weights_s |
| 184 | { |
| 185 | int flip; /* true if outputting reversed */ |
| 186 | int count; /* number of output pixels we have records for in this table */ |
| 187 | int max_len; /* Maximum number of weights for any one output pixel */ |
| 188 | int n; /* number of components (src->n) */ |
| 189 | int new_line; /* True if no weights for the current output pixel */ |
| 190 | int patch_l; /* How many output pixels we skip over */ |
| 191 | int index[1]; |
| 192 | }; |
| 193 | |
| 194 | struct fz_scale_cache_s |
| 195 | { |
| 196 | int src_w; |
| 197 | float x; |
| 198 | float dst_w; |
| 199 | fz_scale_filter *filter; |
| 200 | int vertical; |
| 201 | int dst_w_int; |
| 202 | int patch_l; |
| 203 | int patch_r; |
| 204 | int n; |
| 205 | int flip; |
| 206 | fz_weights *weights; |
| 207 | }; |
| 208 | |
| 209 | static fz_weights * |
| 210 | new_weights(fz_context *ctx, fz_scale_filter *filter, int src_w, float dst_w, int patch_w, int n, int flip, int patch_l) |
| 211 | { |
| 212 | int max_len; |
| 213 | fz_weights *weights; |
| 214 | |
| 215 | if (src_w > dst_w) |
| 216 | { |
| 217 | /* Scaling down, so there will be a maximum of |
| 218 | * 2*filterwidth*src_w/dst_w src pixels |
| 219 | * contributing to each dst pixel. */ |
| 220 | max_len = (int)ceilf((2 * filter->width * src_w)/dst_w); |
| 221 | if (max_len > src_w) |
| 222 | max_len = src_w; |
| 223 | } |
| 224 | else |
| 225 | { |
| 226 | /* Scaling up, so there will be a maximum of |
| 227 | * 2*filterwidth src pixels contributing to each dst pixel. |
| 228 | */ |
| 229 | max_len = 2 * filter->width; |
| 230 | } |
| 231 | /* We need the size of the struct, |
| 232 | * plus patch_w*sizeof(int) for the index |
| 233 | * plus (2+max_len)*sizeof(int) for the weights |
| 234 | * plus room for an extra set of weights for reordering. |
| 235 | */ |
| 236 | weights = fz_malloc(ctx, sizeof(*weights)+(max_len+3)*(patch_w+1)*sizeof(int)); |
| 237 | if (!weights) |
| 238 | return NULL; |
| 239 | weights->count = -1; |
| 240 | weights->max_len = max_len; |
| 241 | weights->index[0] = patch_w; |
| 242 | weights->n = n; |
| 243 | weights->patch_l = patch_l; |
| 244 | weights->flip = flip; |
| 245 | return weights; |
| 246 | } |
| 247 | |
| 248 | /* j is destination pixel in the patch_l..patch_l+patch_w range */ |
| 249 | static void |
| 250 | init_weights(fz_weights *weights, int j) |
| 251 | { |
| 252 | int index; |
| 253 | |
| 254 | j -= weights->patch_l; |
| 255 | assert(weights->count == j-1); |
| 256 | weights->count++; |
| 257 | weights->new_line = 1; |
| 258 | if (j == 0) |
| 259 | index = weights->index[0]; |
| 260 | else |
| 261 | { |
| 262 | index = weights->index[j-1]; |
| 263 | index += 2 + weights->index[index+1]; |
| 264 | } |
| 265 | weights->index[j] = index; /* row pointer */ |
| 266 | weights->index[index] = 0; /* min */ |
| 267 | weights->index[index+1] = 0; /* len */ |
| 268 | } |
| 269 | |
| 270 | static void |
| 271 | add_weight(fz_weights *weights, int j, int i, fz_scale_filter *filter, |
| 272 | float x, float F, float G, int src_w, float dst_w) |
| 273 | { |
| 274 | float dist = j - x + 0.5f - ((i + 0.5f)*dst_w/src_w); |
| 275 | float f; |
| 276 | int min, len, index, weight; |
| 277 | |
| 278 | dist *= G; |
| 279 | if (dist < 0) |
| 280 | dist = -dist; |
| 281 | f = filter->fn(filter, dist)*F; |
| 282 | weight = (int)(256*f+0.5f); |
| 283 | |
| 284 | /* Ensure i is in range */ |
| 285 | if (i < 0 || i >= src_w) |
| 286 | return; |
| 287 | if (weight == 0) |
| 288 | { |
| 289 | /* We add a fudge factor here to allow for extreme downscales |
| 290 | * where all the weights round to 0. Ensure that at least one |
| 291 | * (arbitrarily the first one) is non zero. */ |
| 292 | if (weights->new_line && f > 0) |
| 293 | weight = 1; |
| 294 | else |
| 295 | return; |
| 296 | } |
| 297 | |
| 298 | /* Move j from patch_l...patch_l+patch_w range to 0..patch_w range */ |
| 299 | j -= weights->patch_l; |
| 300 | if (weights->new_line) |
| 301 | { |
| 302 | /* New line */ |
| 303 | weights->new_line = 0; |
| 304 | index = weights->index[j]; /* row pointer */ |
| 305 | weights->index[index] = i; /* min */ |
| 306 | weights->index[index+1] = 0; /* len */ |
| 307 | } |
| 308 | index = weights->index[j]; |
| 309 | min = weights->index[index++]; |
| 310 | len = weights->index[index++]; |
| 311 | while (i < min) |
| 312 | { |
| 313 | /* This only happens in rare cases, but we need to insert |
| 314 | * one earlier. In exceedingly rare cases we may need to |
| 315 | * insert more than one earlier. */ |
| 316 | int k; |
| 317 | |
| 318 | for (k = len; k > 0; k--) |
| 319 | { |
| 320 | weights->index[index+k] = weights->index[index+k-1]; |
| 321 | } |
| 322 | weights->index[index] = 0; |
| 323 | min--; |
| 324 | len++; |
| 325 | weights->index[index-2] = min; |
| 326 | weights->index[index-1] = len; |
| 327 | } |
| 328 | if (i-min >= len) |
| 329 | { |
| 330 | /* The usual case */ |
| 331 | while (i-min >= ++len) |
| 332 | { |
| 333 | weights->index[index+len-1] = 0; |
| 334 | } |
| 335 | assert(len-1 == i-min); |
| 336 | weights->index[index+i-min] = weight; |
| 337 | weights->index[index-1] = len; |
| 338 | assert(len <= weights->max_len); |
| 339 | } |
| 340 | else |
| 341 | { |
| 342 | /* Infrequent case */ |
| 343 | weights->index[index+i-min] += weight; |
| 344 | } |
| 345 | } |
| 346 | |
| 347 | static void |
| 348 | reorder_weights(fz_weights *weights, int j, int src_w) |
| 349 | { |
| 350 | int idx = weights->index[j - weights->patch_l]; |
| 351 | int min = weights->index[idx++]; |
| 352 | int len = weights->index[idx++]; |
| 353 | int max = weights->max_len; |
| 354 | int tmp = idx+max; |
| 355 | int i, off; |
| 356 | |
| 357 | /* Copy into the temporary area */ |
| 358 | memcpy(&weights->index[tmp], &weights->index[idx], sizeof(int)*len); |
| 359 | |
| 360 | /* Pad out if required */ |
| 361 | assert(len <= max); |
| 362 | assert(min+len <= src_w); |
| 363 | off = 0; |
| 364 | if (len < max) |
| 365 | { |
| 366 | memset(&weights->index[tmp+len], 0, sizeof(int)*(max-len)); |
| 367 | len = max; |
| 368 | if (min + len > src_w) |
| 369 | { |
| 370 | off = min + len - src_w; |
| 371 | min = src_w - len; |
| 372 | weights->index[idx-2] = min; |
| 373 | } |
| 374 | weights->index[idx-1] = len; |
| 375 | } |
| 376 | |
| 377 | /* Copy back into the proper places */ |
| 378 | for (i = 0; i < len; i++) |
| 379 | { |
| 380 | weights->index[idx+((min+i+off) % max)] = weights->index[tmp+i]; |
| 381 | } |
| 382 | } |
| 383 | |
| 384 | /* Due to rounding and edge effects, the sums for the weights sometimes don't |
| 385 | * add up to 256. This causes visible rendering effects. Therefore, we take |
| 386 | * pains to ensure that they 1) never exceed 256, and 2) add up to exactly |
| 387 | * 256 for all pixels that are completely covered. See bug #691629. */ |
| 388 | static void |
| 389 | check_weights(fz_weights *weights, int j, int w, float x, float wf) |
| 390 | { |
| 391 | int idx, len; |
| 392 | int sum = 0; |
| 393 | int max = -256; |
| 394 | int maxidx = 0; |
| 395 | int i; |
| 396 | |
| 397 | idx = weights->index[j - weights->patch_l]; |
| 398 | idx++; /* min */ |
| 399 | len = weights->index[idx++]; |
| 400 | |
| 401 | for(i=0; i < len; i++) |
| 402 | { |
| 403 | int v = weights->index[idx++]; |
| 404 | sum += v; |
| 405 | if (v > max) |
| 406 | { |
| 407 | max = v; |
| 408 | maxidx = idx; |
| 409 | } |
| 410 | } |
| 411 | /* If we aren't the first or last pixel, OR if the sum is too big |
| 412 | * then adjust it. */ |
| 413 | if (((j != 0) && (j != w-1)) || (sum > 256)) |
| 414 | weights->index[maxidx-1] += 256-sum; |
| 415 | /* Otherwise, if we are the first pixel, and it's fully covered, then |
| 416 | * adjust it. */ |
| 417 | else if ((j == 0) && (x < 0.0001f) && (sum != 256)) |
| 418 | weights->index[maxidx-1] += 256-sum; |
| 419 | /* Finally, if we are the last pixel, and it's fully covered, then |
| 420 | * adjust it. */ |
| 421 | else if ((j == w-1) && (w - wf < 0.0001f) && (sum != 256)) |
| 422 | weights->index[maxidx-1] += 256-sum; |
| 423 | } |
| 424 | |
| 425 | static fz_weights * |
| 426 | make_weights(fz_context *ctx, int src_w, float x, float dst_w, fz_scale_filter *filter, int vertical, int dst_w_int, int patch_l, int patch_r, int n, int flip, fz_scale_cache *cache) |
| 427 | { |
| 428 | fz_weights *weights; |
| 429 | float F, G; |
| 430 | float window; |
| 431 | int j; |
| 432 | |
| 433 | if (cache) |
| 434 | { |
| 435 | if (cache->src_w == src_w && cache->x == x && cache->dst_w == dst_w && |
| 436 | cache->filter == filter && cache->vertical == vertical && |
| 437 | cache->dst_w_int == dst_w_int && |
| 438 | cache->patch_l == patch_l && cache->patch_r == patch_r && |
| 439 | cache->n == n && cache->flip == flip) |
| 440 | { |
| 441 | return cache->weights; |
| 442 | } |
| 443 | cache->src_w = src_w; |
| 444 | cache->x = x; |
| 445 | cache->dst_w = dst_w; |
| 446 | cache->filter = filter; |
| 447 | cache->vertical = vertical; |
| 448 | cache->dst_w_int = dst_w_int; |
| 449 | cache->patch_l = patch_l; |
| 450 | cache->patch_r = patch_r; |
| 451 | cache->n = n; |
| 452 | cache->flip = flip; |
| 453 | fz_free(ctx, cache->weights); |
| 454 | cache->weights = NULL; |
| 455 | } |
| 456 | |
| 457 | if (dst_w < src_w) |
| 458 | { |
| 459 | /* Scaling down */ |
| 460 | F = dst_w / src_w; |
| 461 | G = 1; |
| 462 | } |
| 463 | else |
| 464 | { |
| 465 | /* Scaling up */ |
| 466 | F = 1; |
| 467 | G = src_w / dst_w; |
| 468 | } |
| 469 | window = filter->width / F; |
| 470 | weights = new_weights(ctx, filter, src_w, dst_w, patch_r-patch_l, n, flip, patch_l); |
| 471 | if (!weights) |
| 472 | return NULL; |
| 473 | for (j = patch_l; j < patch_r; j++) |
| 474 | { |
| 475 | /* find the position of the centre of dst[j] in src space */ |
| 476 | float centre = (j - x + 0.5f)*src_w/dst_w - 0.5f; |
| 477 | int l, r; |
| 478 | l = ceilf(centre - window); |
| 479 | r = floorf(centre + window); |
| 480 | init_weights(weights, j); |
| 481 | for (; l <= r; l++) |
| 482 | { |
| 483 | add_weight(weights, j, l, filter, x, F, G, src_w, dst_w); |
| 484 | } |
| 485 | check_weights(weights, j, dst_w_int, x, dst_w); |
| 486 | if (vertical) |
| 487 | { |
| 488 | reorder_weights(weights, j, src_w); |
| 489 | } |
| 490 | } |
| 491 | weights->count++; /* weights->count = dst_w_int now */ |
| 492 | if (cache) |
| 493 | { |
| 494 | cache->weights = weights; |
| 495 | } |
| 496 | return weights; |
| 497 | } |
| 498 | |
| 499 | static void |
| 500 | scale_row_to_temp(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) |
| 501 | { |
| 502 | const int *contrib = &weights->index[weights->index[0]]; |
| 503 | int len, i, j, n; |
| 504 | const unsigned char *min; |
| 505 | int tmp[FZ_MAX_COLORS]; |
| 506 | int *t = tmp; |
| 507 | |
| 508 | n = weights->n; |
| 509 | for (j = 0; j < n; j++) |
| 510 | tmp[j] = 128; |
| 511 | if (weights->flip) |
| 512 | { |
| 513 | dst += (weights->count-1)*n; |
| 514 | for (i=weights->count; i > 0; i--) |
| 515 | { |
| 516 | min = &src[n * *contrib++]; |
| 517 | len = *contrib++; |
| 518 | while (len-- > 0) |
| 519 | { |
| 520 | for (j = n; j > 0; j--) |
| 521 | *t++ += *min++ * *contrib; |
| 522 | t -= n; |
| 523 | contrib++; |
| 524 | } |
| 525 | for (j = n; j > 0; j--) |
| 526 | { |
| 527 | *dst++ = (unsigned char)(*t>>8); |
| 528 | *t++ = 128; |
| 529 | } |
| 530 | t -= n; |
| 531 | dst -= n*2; |
| 532 | } |
| 533 | } |
| 534 | else |
| 535 | { |
| 536 | for (i=weights->count; i > 0; i--) |
| 537 | { |
| 538 | min = &src[n * *contrib++]; |
| 539 | len = *contrib++; |
| 540 | while (len-- > 0) |
| 541 | { |
| 542 | for (j = n; j > 0; j--) |
| 543 | *t++ += *min++ * *contrib; |
| 544 | t -= n; |
| 545 | contrib++; |
| 546 | } |
| 547 | for (j = n; j > 0; j--) |
| 548 | { |
| 549 | *dst++ = (unsigned char)(*t>>8); |
| 550 | *t++ = 128; |
| 551 | } |
| 552 | t -= n; |
| 553 | } |
| 554 | } |
| 555 | } |
| 556 | |
| 557 | #ifdef ARCH_ARM |
| 558 | |
| 559 | static void |
| 560 | scale_row_to_temp1(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) |
| 561 | __attribute__((naked)); |
| 562 | |
| 563 | static void |
| 564 | scale_row_to_temp2(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) |
| 565 | __attribute__((naked)); |
| 566 | |
| 567 | static void |
| 568 | scale_row_to_temp3(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) |
| 569 | __attribute__((naked)); |
| 570 | |
| 571 | static void |
| 572 | scale_row_to_temp4(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) |
| 573 | __attribute__((naked)); |
| 574 | |
| 575 | static void |
| 576 | scale_row_from_temp(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int width, int n, int row) |
| 577 | __attribute__((naked)); |
| 578 | |
| 579 | static void |
| 580 | scale_row_from_temp_alpha(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int width, int n, int row) |
| 581 | __attribute__((naked)); |
| 582 | |
| 583 | static void |
| 584 | scale_row_to_temp1(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) |
| 585 | { |
| 586 | asm volatile( |
| 587 | ENTER_ARM |
| 588 | ".syntax unified\n" |
| 589 | "stmfd r13!,{r4-r7,r9,r14} \n" |
| 590 | "@ r0 = dst \n" |
| 591 | "@ r1 = src \n" |
| 592 | "@ r2 = weights \n" |
| 593 | "ldr r12,[r2],#4 @ r12= flip \n" |
| 594 | "ldr r3, [r2],#20 @ r3 = count r2 = &index\n" |
| 595 | "ldr r4, [r2] @ r4 = index[0] \n" |
| 596 | "cmp r12,#0 @ if (flip) \n" |
| 597 | "beq 5f @ { \n" |
| 598 | "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" |
| 599 | "add r0, r0, r3 @ dst += count \n" |
| 600 | "1: \n" |
| 601 | "ldr r4, [r2], #4 @ r4 = *contrib++ \n" |
| 602 | "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" |
| 603 | "mov r5, #128 @ r5 = a = 128 \n" |
| 604 | "add r4, r1, r4 @ r4 = min = &src[r4] \n" |
| 605 | "subs r9, r9, #1 @ len-- \n" |
| 606 | "blt 3f @ while (len >= 0) \n" |
| 607 | "2: @ { \n" |
| 608 | "ldrgt r6, [r2], #4 @ r6 = *contrib++ \n" |
| 609 | "ldrbgt r7, [r4], #1 @ r7 = *min++ \n" |
| 610 | "ldr r12,[r2], #4 @ r12 = *contrib++ \n" |
| 611 | "ldrb r14,[r4], #1 @ r14 = *min++ \n" |
| 612 | "mlagt r5, r6, r7, r5 @ g += r6 * r7 \n" |
| 613 | "subs r9, r9, #2 @ r9 = len -= 2 \n" |
| 614 | "mla r5, r12,r14,r5 @ g += r14 * r12 \n" |
| 615 | "bge 2b @ } \n" |
| 616 | "3: \n" |
| 617 | "mov r5, r5, lsr #8 @ g >>= 8 \n" |
| 618 | "strb r5,[r0, #-1]! @ *--dst=a \n" |
| 619 | "subs r3, r3, #1 @ i-- \n" |
| 620 | "bgt 1b @ \n" |
| 621 | "ldmfd r13!,{r4-r7,r9,PC} @ pop, return to thumb \n" |
| 622 | "5:" |
| 623 | "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" |
| 624 | "6:" |
| 625 | "ldr r4, [r2], #4 @ r4 = *contrib++ \n" |
| 626 | "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" |
| 627 | "mov r5, #128 @ r5 = a = 128 \n" |
| 628 | "add r4, r1, r4 @ r4 = min = &src[r4] \n" |
| 629 | "subs r9, r9, #1 @ len-- \n" |
| 630 | "blt 9f @ while (len > 0) \n" |
| 631 | "7: @ { \n" |
| 632 | "ldrgt r6, [r2], #4 @ r6 = *contrib++ \n" |
| 633 | "ldrbgt r7, [r4], #1 @ r7 = *min++ \n" |
| 634 | "ldr r12,[r2], #4 @ r12 = *contrib++ \n" |
| 635 | "ldrb r14,[r4], #1 @ r14 = *min++ \n" |
| 636 | "mlagt r5, r6,r7,r5 @ a += r6 * r7 \n" |
| 637 | "subs r9, r9, #2 @ r9 = len -= 2 \n" |
| 638 | "mla r5, r12,r14,r5 @ a += r14 * r12 \n" |
| 639 | "bge 7b @ } \n" |
| 640 | "9: \n" |
| 641 | "mov r5, r5, LSR #8 @ a >>= 8 \n" |
| 642 | "strb r5, [r0], #1 @ *dst++=a \n" |
| 643 | "subs r3, r3, #1 @ i-- \n" |
| 644 | "bgt 6b @ \n" |
| 645 | "ldmfd r13!,{r4-r7,r9,PC} @ pop, return to thumb \n" |
| 646 | ENTER_THUMB |
| 647 | ); |
| 648 | } |
| 649 | |
| 650 | static void |
| 651 | scale_row_to_temp2(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) |
| 652 | { |
| 653 | asm volatile( |
| 654 | ENTER_ARM |
| 655 | "stmfd r13!,{r4-r6,r9-r11,r14} \n" |
| 656 | "@ r0 = dst \n" |
| 657 | "@ r1 = src \n" |
| 658 | "@ r2 = weights \n" |
| 659 | "ldr r12,[r2],#4 @ r12= flip \n" |
| 660 | "ldr r3, [r2],#20 @ r3 = count r2 = &index\n" |
| 661 | "ldr r4, [r2] @ r4 = index[0] \n" |
| 662 | "cmp r12,#0 @ if (flip) \n" |
| 663 | "beq 4f @ { \n" |
| 664 | "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" |
| 665 | "add r0, r0, r3, LSL #1 @ dst += 2*count \n" |
| 666 | "1: \n" |
| 667 | "ldr r4, [r2], #4 @ r4 = *contrib++ \n" |
| 668 | "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" |
| 669 | "mov r5, #128 @ r5 = g = 128 \n" |
| 670 | "mov r6, #128 @ r6 = a = 128 \n" |
| 671 | "add r4, r1, r4, LSL #1 @ r4 = min = &src[2*r4] \n" |
| 672 | "cmp r9, #0 @ while (len-- > 0) \n" |
| 673 | "beq 3f @ { \n" |
| 674 | "2: \n" |
| 675 | "ldr r14,[r2], #4 @ r14 = *contrib++ \n" |
| 676 | "ldrb r11,[r4], #1 @ r11 = *min++ \n" |
| 677 | "ldrb r12,[r4], #1 @ r12 = *min++ \n" |
| 678 | "subs r9, r9, #1 @ r9 = len-- \n" |
| 679 | "mla r5, r14,r11,r5 @ g += r11 * r14 \n" |
| 680 | "mla r6, r14,r12,r6 @ a += r12 * r14 \n" |
| 681 | "bgt 2b @ } \n" |
| 682 | "3: \n" |
| 683 | "mov r5, r5, lsr #8 @ g >>= 8 \n" |
| 684 | "mov r6, r6, lsr #8 @ a >>= 8 \n" |
| 685 | "strb r5, [r0, #-2]! @ *--dst=a \n" |
| 686 | "strb r6, [r0, #1] @ *--dst=g \n" |
| 687 | "subs r3, r3, #1 @ i-- \n" |
| 688 | "bgt 1b @ \n" |
| 689 | "ldmfd r13!,{r4-r6,r9-r11,PC} @ pop, return to thumb \n" |
| 690 | "4:" |
| 691 | "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" |
| 692 | "5:" |
| 693 | "ldr r4, [r2], #4 @ r4 = *contrib++ \n" |
| 694 | "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" |
| 695 | "mov r5, #128 @ r5 = g = 128 \n" |
| 696 | "mov r6, #128 @ r6 = a = 128 \n" |
| 697 | "add r4, r1, r4, LSL #1 @ r4 = min = &src[2*r4] \n" |
| 698 | "cmp r9, #0 @ while (len-- > 0) \n" |
| 699 | "beq 7f @ { \n" |
| 700 | "6: \n" |
| 701 | "ldr r14,[r2], #4 @ r10 = *contrib++ \n" |
| 702 | "ldrb r11,[r4], #1 @ r11 = *min++ \n" |
| 703 | "ldrb r12,[r4], #1 @ r12 = *min++ \n" |
| 704 | "subs r9, r9, #1 @ r9 = len-- \n" |
| 705 | "mla r5, r14,r11,r5 @ g += r11 * r14 \n" |
| 706 | "mla r6, r14,r12,r6 @ a += r12 * r14 \n" |
| 707 | "bgt 6b @ } \n" |
| 708 | "7: \n" |
| 709 | "mov r5, r5, lsr #8 @ g >>= 8 \n" |
| 710 | "mov r6, r6, lsr #8 @ a >>= 8 \n" |
| 711 | "strb r5, [r0], #1 @ *dst++=g \n" |
| 712 | "strb r6, [r0], #1 @ *dst++=a \n" |
| 713 | "subs r3, r3, #1 @ i-- \n" |
| 714 | "bgt 5b @ \n" |
| 715 | "ldmfd r13!,{r4-r6,r9-r11,PC} @ pop, return to thumb \n" |
| 716 | ENTER_THUMB |
| 717 | ); |
| 718 | } |
| 719 | |
| 720 | static void |
| 721 | scale_row_to_temp3(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) |
| 722 | { |
| 723 | asm volatile( |
| 724 | ENTER_ARM |
| 725 | "stmfd r13!,{r4-r11,r14} \n" |
| 726 | "@ r0 = dst \n" |
| 727 | "@ r1 = src \n" |
| 728 | "@ r2 = weights \n" |
| 729 | "ldr r12,[r2],#4 @ r12= flip \n" |
| 730 | "ldr r3, [r2],#20 @ r3 = count r2 = &index\n" |
| 731 | "ldr r4, [r2] @ r4 = index[0] \n" |
| 732 | "cmp r12,#0 @ if (flip) \n" |
| 733 | "beq 4f @ { \n" |
| 734 | "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" |
| 735 | "add r0, r0, r3, LSL #1 @ \n" |
| 736 | "add r0, r0, r3 @ dst += 3*count \n" |
| 737 | "1: \n" |
| 738 | "ldr r4, [r2], #4 @ r4 = *contrib++ \n" |
| 739 | "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" |
| 740 | "mov r5, #128 @ r5 = r = 128 \n" |
| 741 | "mov r6, #128 @ r6 = g = 128 \n" |
| 742 | "add r7, r1, r4, LSL #1 @ \n" |
| 743 | "add r4, r7, r4 @ r4 = min = &src[3*r4] \n" |
| 744 | "mov r7, #128 @ r7 = b = 128 \n" |
| 745 | "cmp r9, #0 @ while (len-- > 0) \n" |
| 746 | "beq 3f @ { \n" |
| 747 | "2: \n" |
| 748 | "ldr r14,[r2], #4 @ r14 = *contrib++ \n" |
| 749 | "ldrb r8, [r4], #1 @ r8 = *min++ \n" |
| 750 | "ldrb r11,[r4], #1 @ r11 = *min++ \n" |
| 751 | "ldrb r12,[r4], #1 @ r12 = *min++ \n" |
| 752 | "subs r9, r9, #1 @ r9 = len-- \n" |
| 753 | "mla r5, r14,r8, r5 @ r += r8 * r14 \n" |
| 754 | "mla r6, r14,r11,r6 @ g += r11 * r14 \n" |
| 755 | "mla r7, r14,r12,r7 @ b += r12 * r14 \n" |
| 756 | "bgt 2b @ } \n" |
| 757 | "3: \n" |
| 758 | "mov r5, r5, lsr #8 @ r >>= 8 \n" |
| 759 | "mov r6, r6, lsr #8 @ g >>= 8 \n" |
| 760 | "mov r7, r7, lsr #8 @ b >>= 8 \n" |
| 761 | "strb r5, [r0, #-3]! @ *--dst=r \n" |
| 762 | "strb r6, [r0, #1] @ *--dst=g \n" |
| 763 | "strb r7, [r0, #2] @ *--dst=b \n" |
| 764 | "subs r3, r3, #1 @ i-- \n" |
| 765 | "bgt 1b @ \n" |
| 766 | "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n" |
| 767 | "4:" |
| 768 | "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" |
| 769 | "5:" |
| 770 | "ldr r4, [r2], #4 @ r4 = *contrib++ \n" |
| 771 | "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" |
| 772 | "mov r5, #128 @ r5 = r = 128 \n" |
| 773 | "mov r6, #128 @ r6 = g = 128 \n" |
| 774 | "add r7, r1, r4, LSL #1 @ r7 = min = &src[2*r4] \n" |
| 775 | "add r4, r7, r4 @ r4 = min = &src[3*r4] \n" |
| 776 | "mov r7, #128 @ r7 = b = 128 \n" |
| 777 | "cmp r9, #0 @ while (len-- > 0) \n" |
| 778 | "beq 7f @ { \n" |
| 779 | "6: \n" |
| 780 | "ldr r14,[r2], #4 @ r10 = *contrib++ \n" |
| 781 | "ldrb r8, [r4], #1 @ r8 = *min++ \n" |
| 782 | "ldrb r11,[r4], #1 @ r11 = *min++ \n" |
| 783 | "ldrb r12,[r4], #1 @ r12 = *min++ \n" |
| 784 | "subs r9, r9, #1 @ r9 = len-- \n" |
| 785 | "mla r5, r14,r8, r5 @ r += r8 * r14 \n" |
| 786 | "mla r6, r14,r11,r6 @ g += r11 * r14 \n" |
| 787 | "mla r7, r14,r12,r7 @ b += r12 * r14 \n" |
| 788 | "bgt 6b @ } \n" |
| 789 | "7: \n" |
| 790 | "mov r5, r5, lsr #8 @ r >>= 8 \n" |
| 791 | "mov r6, r6, lsr #8 @ g >>= 8 \n" |
| 792 | "mov r7, r7, lsr #8 @ b >>= 8 \n" |
| 793 | "strb r5, [r0], #1 @ *dst++=r \n" |
| 794 | "strb r6, [r0], #1 @ *dst++=g \n" |
| 795 | "strb r7, [r0], #1 @ *dst++=b \n" |
| 796 | "subs r3, r3, #1 @ i-- \n" |
| 797 | "bgt 5b @ \n" |
| 798 | "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n" |
| 799 | ENTER_THUMB |
| 800 | ); |
| 801 | } |
| 802 | |
| 803 | static void |
| 804 | scale_row_to_temp4(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) |
| 805 | { |
| 806 | asm volatile( |
| 807 | ENTER_ARM |
| 808 | "stmfd r13!,{r4-r11,r14} \n" |
| 809 | "@ r0 = dst \n" |
| 810 | "@ r1 = src \n" |
| 811 | "@ r2 = weights \n" |
| 812 | "ldr r12,[r2],#4 @ r12= flip \n" |
| 813 | "ldr r3, [r2],#20 @ r3 = count r2 = &index\n" |
| 814 | "ldr r4, [r2] @ r4 = index[0] \n" |
| 815 | "ldr r5,=0x00800080 @ r5 = rounding \n" |
| 816 | "ldr r6,=0x00FF00FF @ r7 = 0x00FF00FF \n" |
| 817 | "cmp r12,#0 @ if (flip) \n" |
| 818 | "beq 4f @ { \n" |
| 819 | "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" |
| 820 | "add r0, r0, r3, LSL #2 @ dst += 4*count \n" |
| 821 | "1: \n" |
| 822 | "ldr r4, [r2], #4 @ r4 = *contrib++ \n" |
| 823 | "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" |
| 824 | "mov r7, r5 @ r7 = b = rounding \n" |
| 825 | "mov r8, r5 @ r8 = a = rounding \n" |
| 826 | "add r4, r1, r4, LSL #2 @ r4 = min = &src[4*r4] \n" |
| 827 | "cmp r9, #0 @ while (len-- > 0) \n" |
| 828 | "beq 3f @ { \n" |
| 829 | "2: \n" |
| 830 | "ldr r11,[r4], #4 @ r11 = *min++ \n" |
| 831 | "ldr r10,[r2], #4 @ r10 = *contrib++ \n" |
| 832 | "subs r9, r9, #1 @ r9 = len-- \n" |
| 833 | "and r12,r6, r11 @ r12 = __22__00 \n" |
| 834 | "and r11,r6, r11,LSR #8 @ r11 = __33__11 \n" |
| 835 | "mla r7, r10,r12,r7 @ b += r14 * r10 \n" |
| 836 | "mla r8, r10,r11,r8 @ a += r11 * r10 \n" |
| 837 | "bgt 2b @ } \n" |
| 838 | "3: \n" |
| 839 | "and r7, r6, r7, lsr #8 @ r7 = __22__00 \n" |
| 840 | "bic r8, r8, r6 @ r8 = 33__11__ \n" |
| 841 | "orr r7, r7, r8 @ r7 = 33221100 \n" |
| 842 | "str r7, [r0, #-4]! @ *--dst=r \n" |
| 843 | "subs r3, r3, #1 @ i-- \n" |
| 844 | "bgt 1b @ \n" |
| 845 | "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n" |
| 846 | "4: \n" |
| 847 | "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" |
| 848 | "5: \n" |
| 849 | "ldr r4, [r2], #4 @ r4 = *contrib++ \n" |
| 850 | "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" |
| 851 | "mov r7, r5 @ r7 = b = rounding \n" |
| 852 | "mov r8, r5 @ r8 = a = rounding \n" |
| 853 | "add r4, r1, r4, LSL #2 @ r4 = min = &src[4*r4] \n" |
| 854 | "cmp r9, #0 @ while (len-- > 0) \n" |
| 855 | "beq 7f @ { \n" |
| 856 | "6: \n" |
| 857 | "ldr r11,[r4], #4 @ r11 = *min++ \n" |
| 858 | "ldr r10,[r2], #4 @ r10 = *contrib++ \n" |
| 859 | "subs r9, r9, #1 @ r9 = len-- \n" |
| 860 | "and r12,r6, r11 @ r12 = __22__00 \n" |
| 861 | "and r11,r6, r11,LSR #8 @ r11 = __33__11 \n" |
| 862 | "mla r7, r10,r12,r7 @ b += r14 * r10 \n" |
| 863 | "mla r8, r10,r11,r8 @ a += r11 * r10 \n" |
| 864 | "bgt 6b @ } \n" |
| 865 | "7: \n" |
| 866 | "and r7, r6, r7, lsr #8 @ r7 = __22__00 \n" |
| 867 | "bic r8, r8, r6 @ r8 = 33__11__ \n" |
| 868 | "orr r7, r7, r8 @ r7 = 33221100 \n" |
| 869 | "str r7, [r0], #4 @ *dst++=r \n" |
| 870 | "subs r3, r3, #1 @ i-- \n" |
| 871 | "bgt 5b @ \n" |
| 872 | "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n" |
| 873 | ENTER_THUMB |
| 874 | ); |
| 875 | } |
| 876 | |
| 877 | static void |
| 878 | scale_row_from_temp(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int width, int n, int row) |
| 879 | { |
| 880 | asm volatile( |
| 881 | ENTER_ARM |
| 882 | "stmfd r13!,{r4-r11,r14} \n" |
| 883 | "@ r0 = dst \n" |
| 884 | "@ r1 = src \n" |
| 885 | "@ r2 = &weights->index[0] \n" |
| 886 | "@ r3 = width \n" |
| 887 | "@ r12= row \n" |
| 888 | "ldr r14,[r13,#4*9] @ r14= n \n" |
| 889 | "ldr r12,[r13,#4*10] @ r12= row \n" |
| 890 | "add r2, r2, #24 @ r2 = weights->index \n" |
| 891 | "mul r3, r14, r3 @ r3 = width *= n \n" |
| 892 | "ldr r4, [r2, r12, LSL #2] @ r4 = index[row] \n" |
| 893 | "add r2, r2, #4 @ r2 = &index[1] \n" |
| 894 | "subs r6, r3, #4 @ r6 = x = width-4 \n" |
| 895 | "ldr r14,[r2, r4, LSL #2]! @ r2 = contrib = index[index[row]+1]\n" |
| 896 | " @ r14= len = *contrib \n" |
| 897 | "blt 4f @ while (x >= 0) { \n" |
| 898 | #ifndef ARCH_UNALIGNED_OK |
| 899 | "tst r3, #3 @ if ((r3 & 3) \n" |
| 900 | "tsteq r1, #3 @ || (r1 & 3)) \n" |
| 901 | "bne 4f @ can't do fast code \n" |
| 902 | #endif |
| 903 | "ldr r9, =0x00FF00FF @ r9 = 0x00FF00FF \n" |
| 904 | "1: \n" |
| 905 | "ldr r7, =0x00800080 @ r5 = val0 = round \n" |
| 906 | "stmfd r13!,{r1,r2,r7} @ stash r1,r2,r5 \n" |
| 907 | " @ r1 = min = src \n" |
| 908 | " @ r2 = contrib2-4 \n" |
| 909 | "movs r8, r14 @ r8 = len2 = len \n" |
| 910 | "mov r5, r7 @ r7 = val1 = round \n" |
| 911 | "ble 3f @ while (len2-- > 0) { \n" |
| 912 | "2: \n" |
| 913 | "ldr r12,[r1], r3 @ r12 = *min r5 = min += width\n" |
| 914 | "ldr r10,[r2, #4]! @ r10 = *contrib2++ \n" |
| 915 | "subs r8, r8, #1 @ len2-- \n" |
| 916 | "and r11,r9, r12 @ r11= __22__00 \n" |
| 917 | "and r12,r9, r12,LSR #8 @ r12= __33__11 \n" |
| 918 | "mla r5, r10,r11,r5 @ r5 = val0 += r11 * r10\n" |
| 919 | "mla r7, r10,r12,r7 @ r7 = val1 += r12 * r10\n" |
| 920 | "bgt 2b @ } \n" |
| 921 | "and r5, r9, r5, LSR #8 @ r5 = __22__00 \n" |
| 922 | "and r7, r7, r9, LSL #8 @ r7 = 33__11__ \n" |
| 923 | "orr r5, r5, r7 @ r5 = 33221100 \n" |
| 924 | "3: \n" |
| 925 | "ldmfd r13!,{r1,r2,r7} @ restore r1,r2,r7 \n" |
| 926 | "subs r6, r6, #4 @ x-- \n" |
| 927 | "add r1, r1, #4 @ src++ \n" |
| 928 | "str r5, [r0], #4 @ *dst++ = val \n" |
| 929 | "bge 1b @ \n" |
| 930 | "4: @ } (Less than 4 to go) \n" |
| 931 | "adds r6, r6, #4 @ r6 = x += 4 \n" |
| 932 | "beq 8f @ if (x == 0) done \n" |
| 933 | "5: \n" |
| 934 | "mov r5, r1 @ r5 = min = src \n" |
| 935 | "mov r7, #128 @ r7 = val = 128 \n" |
| 936 | "movs r8, r14 @ r8 = len2 = len \n" |
| 937 | "add r9, r2, #4 @ r9 = contrib2 \n" |
| 938 | "ble 7f @ while (len2-- > 0) { \n" |
| 939 | "6: \n" |
| 940 | "ldr r10,[r9], #4 @ r10 = *contrib2++ \n" |
| 941 | "ldrb r12,[r5], r3 @ r12 = *min r5 = min += width\n" |
| 942 | "subs r8, r8, #1 @ len2-- \n" |
| 943 | "@ stall r12 \n" |
| 944 | "mla r7, r10,r12,r7 @ val += r12 * r10 \n" |
| 945 | "bgt 6b @ } \n" |
| 946 | "7: \n" |
| 947 | "mov r7, r7, asr #8 @ r7 = val >>= 8 \n" |
| 948 | "subs r6, r6, #1 @ x-- \n" |
| 949 | "add r1, r1, #1 @ src++ \n" |
| 950 | "strb r7, [r0], #1 @ *dst++ = val \n" |
| 951 | "bgt 5b @ \n" |
| 952 | "8: \n" |
| 953 | "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n" |
| 954 | ".ltorg \n" |
| 955 | ENTER_THUMB |
| 956 | ); |
| 957 | } |
| 958 | |
| 959 | static void |
| 960 | scale_row_from_temp_alpha(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int width, int n, int row) |
| 961 | { |
| 962 | asm volatile( |
| 963 | ENTER_ARM |
| 964 | "stmfd r13!,{r4-r11,r14} \n" |
| 965 | "mov r11,#255 @ r11= 255 \n" |
| 966 | "ldr r12,[r13,#4*10] @ r12= row \n" |
| 967 | "@ r0 = dst \n" |
| 968 | "@ r1 = src \n" |
| 969 | "@ r2 = &weights->index[0] \n" |
| 970 | "@ r3 = width \n" |
| 971 | "@ r11= 255 \n" |
| 972 | "@ r12= row \n" |
| 973 | "add r2, r2, #24 @ r2 = weights->index \n" |
| 974 | "ldr r4, [r2, r12, LSL #2] @ r4 = index[row] \n" |
| 975 | "add r2, r2, #4 @ r2 = &index[1] \n" |
| 976 | "mov r6, r3 @ r6 = x = width \n" |
| 977 | "ldr r14,[r2, r4, LSL #2]! @ r2 = contrib = index[index[row]+1]\n" |
| 978 | " @ r14= len = *contrib \n" |
| 979 | "5: \n" |
| 980 | "ldr r4,[r13,#4*9] @ r10= nn = n \n" |
| 981 | "1: \n" |
| 982 | "mov r5, r1 @ r5 = min = src \n" |
| 983 | "mov r7, #128 @ r7 = val = 128 \n" |
| 984 | "movs r8, r14 @ r8 = len2 = len \n" |
| 985 | "add r9, r2, #4 @ r9 = contrib2 \n" |
| 986 | "ble 7f @ while (len2-- > 0) { \n" |
| 987 | "6: \n" |
| 988 | "ldr r10,[r9], #4 @ r10 = *contrib2++ \n" |
| 989 | "ldrb r12,[r5], r3 @ r12 = *min r5 = min += width\n" |
| 990 | "subs r8, r8, #1 @ len2-- \n" |
| 991 | "@ stall r12 \n" |
| 992 | "mla r7, r10,r12,r7 @ val += r12 * r10 \n" |
| 993 | "bgt 6b @ } \n" |
| 994 | "7: \n" |
| 995 | "mov r7, r7, asr #8 @ r7 = val >>= 8 \n" |
| 996 | "subs r4, r4, #1 @ r4 = nn-- \n" |
| 997 | "add r1, r1, #1 @ src++ \n" |
| 998 | "strb r7, [r0], #1 @ *dst++ = val \n" |
| 999 | "bgt 1b @ \n" |
| 1000 | "subs r6, r6, #1 @ x-- \n" |
| 1001 | "strb r11,[r0], #1 @ *dst++ = 255 \n" |
| 1002 | "bgt 5b @ \n" |
| 1003 | "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n" |
| 1004 | ".ltorg \n" |
| 1005 | ENTER_THUMB |
| 1006 | ); |
| 1007 | } |
| 1008 | #else |
| 1009 | |
| 1010 | static void |
| 1011 | scale_row_to_temp1(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) |
| 1012 | { |
| 1013 | const int *contrib = &weights->index[weights->index[0]]; |
| 1014 | int len, i; |
| 1015 | const unsigned char *min; |
| 1016 | |
| 1017 | assert(weights->n == 1); |
| 1018 | if (weights->flip) |
| 1019 | { |
| 1020 | dst += weights->count; |
| 1021 | for (i=weights->count; i > 0; i--) |
| 1022 | { |
| 1023 | int val = 128; |
| 1024 | min = &src[*contrib++]; |
| 1025 | len = *contrib++; |
| 1026 | while (len-- > 0) |
| 1027 | { |
| 1028 | val += *min++ * *contrib++; |
| 1029 | } |
| 1030 | *--dst = (unsigned char)(val>>8); |
| 1031 | } |
| 1032 | } |
| 1033 | else |
| 1034 | { |
| 1035 | for (i=weights->count; i > 0; i--) |
| 1036 | { |
| 1037 | int val = 128; |
| 1038 | min = &src[*contrib++]; |
| 1039 | len = *contrib++; |
| 1040 | while (len-- > 0) |
| 1041 | { |
| 1042 | val += *min++ * *contrib++; |
| 1043 | } |
| 1044 | *dst++ = (unsigned char)(val>>8); |
| 1045 | } |
| 1046 | } |
| 1047 | } |
| 1048 | |
| 1049 | static void |
| 1050 | scale_row_to_temp2(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) |
| 1051 | { |
| 1052 | const int *contrib = &weights->index[weights->index[0]]; |
| 1053 | int len, i; |
| 1054 | const unsigned char *min; |
| 1055 | |
| 1056 | assert(weights->n == 2); |
| 1057 | if (weights->flip) |
| 1058 | { |
| 1059 | dst += 2*weights->count; |
| 1060 | for (i=weights->count; i > 0; i--) |
| 1061 | { |
| 1062 | int c1 = 128; |
| 1063 | int c2 = 128; |
| 1064 | min = &src[2 * *contrib++]; |
| 1065 | len = *contrib++; |
| 1066 | while (len-- > 0) |
| 1067 | { |
| 1068 | c1 += *min++ * *contrib; |
| 1069 | c2 += *min++ * *contrib++; |
| 1070 | } |
| 1071 | *--dst = (unsigned char)(c2>>8); |
| 1072 | *--dst = (unsigned char)(c1>>8); |
| 1073 | } |
| 1074 | } |
| 1075 | else |
| 1076 | { |
| 1077 | for (i=weights->count; i > 0; i--) |
| 1078 | { |
| 1079 | int c1 = 128; |
| 1080 | int c2 = 128; |
| 1081 | min = &src[2 * *contrib++]; |
| 1082 | len = *contrib++; |
| 1083 | while (len-- > 0) |
| 1084 | { |
| 1085 | c1 += *min++ * *contrib; |
| 1086 | c2 += *min++ * *contrib++; |
| 1087 | } |
| 1088 | *dst++ = (unsigned char)(c1>>8); |
| 1089 | *dst++ = (unsigned char)(c2>>8); |
| 1090 | } |
| 1091 | } |
| 1092 | } |
| 1093 | |
| 1094 | static void |
| 1095 | scale_row_to_temp3(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) |
| 1096 | { |
| 1097 | const int *contrib = &weights->index[weights->index[0]]; |
| 1098 | int len, i; |
| 1099 | const unsigned char *min; |
| 1100 | |
| 1101 | assert(weights->n == 3); |
| 1102 | if (weights->flip) |
| 1103 | { |
| 1104 | dst += 3*weights->count; |
| 1105 | for (i=weights->count; i > 0; i--) |
| 1106 | { |
| 1107 | int c1 = 128; |
| 1108 | int c2 = 128; |
| 1109 | int c3 = 128; |
| 1110 | min = &src[3 * *contrib++]; |
| 1111 | len = *contrib++; |
| 1112 | while (len-- > 0) |
| 1113 | { |
| 1114 | int c = *contrib++; |
| 1115 | c1 += *min++ * c; |
| 1116 | c2 += *min++ * c; |
| 1117 | c3 += *min++ * c; |
| 1118 | } |
| 1119 | *--dst = (unsigned char)(c3>>8); |
| 1120 | *--dst = (unsigned char)(c2>>8); |
| 1121 | *--dst = (unsigned char)(c1>>8); |
| 1122 | } |
| 1123 | } |
| 1124 | else |
| 1125 | { |
| 1126 | for (i=weights->count; i > 0; i--) |
| 1127 | { |
| 1128 | int c1 = 128; |
| 1129 | int c2 = 128; |
| 1130 | int c3 = 128; |
| 1131 | min = &src[3 * *contrib++]; |
| 1132 | len = *contrib++; |
| 1133 | while (len-- > 0) |
| 1134 | { |
| 1135 | int c = *contrib++; |
| 1136 | c1 += *min++ * c; |
| 1137 | c2 += *min++ * c; |
| 1138 | c3 += *min++ * c; |
| 1139 | } |
| 1140 | *dst++ = (unsigned char)(c1>>8); |
| 1141 | *dst++ = (unsigned char)(c2>>8); |
| 1142 | *dst++ = (unsigned char)(c3>>8); |
| 1143 | } |
| 1144 | } |
| 1145 | } |
| 1146 | |
| 1147 | static void |
| 1148 | scale_row_to_temp4(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) |
| 1149 | { |
| 1150 | const int *contrib = &weights->index[weights->index[0]]; |
| 1151 | int len, i; |
| 1152 | const unsigned char *min; |
| 1153 | |
| 1154 | assert(weights->n == 4); |
| 1155 | if (weights->flip) |
| 1156 | { |
| 1157 | dst += 4*weights->count; |
| 1158 | for (i=weights->count; i > 0; i--) |
| 1159 | { |
| 1160 | int r = 128; |
| 1161 | int g = 128; |
| 1162 | int b = 128; |
| 1163 | int a = 128; |
| 1164 | min = &src[4 * *contrib++]; |
| 1165 | len = *contrib++; |
| 1166 | while (len-- > 0) |
| 1167 | { |
| 1168 | r += *min++ * *contrib; |
| 1169 | g += *min++ * *contrib; |
| 1170 | b += *min++ * *contrib; |
| 1171 | a += *min++ * *contrib++; |
| 1172 | } |
| 1173 | *--dst = (unsigned char)(a>>8); |
| 1174 | *--dst = (unsigned char)(b>>8); |
| 1175 | *--dst = (unsigned char)(g>>8); |
| 1176 | *--dst = (unsigned char)(r>>8); |
| 1177 | } |
| 1178 | } |
| 1179 | else |
| 1180 | { |
| 1181 | for (i=weights->count; i > 0; i--) |
| 1182 | { |
| 1183 | int r = 128; |
| 1184 | int g = 128; |
| 1185 | int b = 128; |
| 1186 | int a = 128; |
| 1187 | min = &src[4 * *contrib++]; |
| 1188 | len = *contrib++; |
| 1189 | while (len-- > 0) |
| 1190 | { |
| 1191 | r += *min++ * *contrib; |
| 1192 | g += *min++ * *contrib; |
| 1193 | b += *min++ * *contrib; |
| 1194 | a += *min++ * *contrib++; |
| 1195 | } |
| 1196 | *dst++ = (unsigned char)(r>>8); |
| 1197 | *dst++ = (unsigned char)(g>>8); |
| 1198 | *dst++ = (unsigned char)(b>>8); |
| 1199 | *dst++ = (unsigned char)(a>>8); |
| 1200 | } |
| 1201 | } |
| 1202 | } |
| 1203 | |
| 1204 | static void |
| 1205 | scale_row_from_temp(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int w, int n, int row) |
| 1206 | { |
| 1207 | const int *contrib = &weights->index[weights->index[row]]; |
| 1208 | int len, x; |
| 1209 | int width = w * n; |
| 1210 | |
| 1211 | contrib++; /* Skip min */ |
| 1212 | len = *contrib++; |
| 1213 | for (x=width; x > 0; x--) |
| 1214 | { |
| 1215 | const unsigned char *min = src; |
| 1216 | int val = 128; |
| 1217 | int len2 = len; |
| 1218 | const int *contrib2 = contrib; |
| 1219 | |
| 1220 | while (len2-- > 0) |
| 1221 | { |
| 1222 | val += *min * *contrib2++; |
| 1223 | min += width; |
| 1224 | } |
| 1225 | *dst++ = (unsigned char)(val>>8); |
| 1226 | src++; |
| 1227 | } |
| 1228 | } |
| 1229 | |
| 1230 | static void |
| 1231 | scale_row_from_temp_alpha(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int w, int n, int row) |
| 1232 | { |
| 1233 | const int *contrib = &weights->index[weights->index[row]]; |
| 1234 | int len, x; |
| 1235 | int width = w * n; |
| 1236 | |
| 1237 | contrib++; /* Skip min */ |
| 1238 | len = *contrib++; |
| 1239 | for (x=w; x > 0; x--) |
| 1240 | { |
| 1241 | int nn; |
| 1242 | for (nn = n; nn > 0; nn--) |
| 1243 | { |
| 1244 | const unsigned char *min = src; |
| 1245 | int val = 128; |
| 1246 | int len2 = len; |
| 1247 | const int *contrib2 = contrib; |
| 1248 | |
| 1249 | while (len2-- > 0) |
| 1250 | { |
| 1251 | val += *min * *contrib2++; |
| 1252 | min += width; |
| 1253 | } |
| 1254 | *dst++ = (unsigned char)(val>>8); |
| 1255 | src++; |
| 1256 | } |
| 1257 | *dst++ = 255; |
| 1258 | } |
| 1259 | } |
| 1260 | #endif |
| 1261 | |
| 1262 | #ifdef SINGLE_PIXEL_SPECIALS |
| 1263 | static void |
| 1264 | duplicate_single_pixel(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, int n, int forcealpha, int w, int h, int stride) |
| 1265 | { |
| 1266 | int i; |
| 1267 | |
| 1268 | for (i = n; i > 0; i--) |
| 1269 | *dst++ = *src++; |
| 1270 | if (forcealpha) |
| 1271 | *dst++ = 255; |
| 1272 | n += forcealpha; |
| 1273 | for (i = w-1; i > 0; i--) |
| 1274 | { |
| 1275 | memcpy(dst, dst-n, n); |
| 1276 | dst += n; |
| 1277 | } |
| 1278 | w *= n; |
| 1279 | dst -= w; |
| 1280 | h--; |
| 1281 | while (h--) |
| 1282 | { |
| 1283 | memcpy(dst+stride, dst, w); |
| 1284 | dst += stride; |
| 1285 | } |
| 1286 | } |
| 1287 | |
| 1288 | static void |
| 1289 | scale_single_row(unsigned char * FZ_RESTRICT dst, int dstride, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int src_w, int h, int forcealpha) |
| 1290 | { |
| 1291 | const int *contrib = &weights->index[weights->index[0]]; |
| 1292 | int min, len, i, j, n, nf; |
| 1293 | int tmp[FZ_MAX_COLORS]; |
| 1294 | |
| 1295 | n = weights->n; |
| 1296 | nf = n + forcealpha; |
| 1297 | /* Scale a single row */ |
| 1298 | for (j = 0; j < nf; j++) |
| 1299 | tmp[j] = 128; |
| 1300 | if (weights->flip) |
| 1301 | { |
| 1302 | dst += (weights->count-1)*nf; |
| 1303 | for (i=weights->count; i > 0; i--) |
| 1304 | { |
| 1305 | min = *contrib++; |
| 1306 | len = *contrib++; |
| 1307 | min *= n; |
| 1308 | while (len-- > 0) |
| 1309 | { |
| 1310 | int c = *contrib++; |
| 1311 | for (j = 0; j < n; j++) |
| 1312 | tmp[j] += src[min++] * c; |
| 1313 | if (forcealpha) |
| 1314 | tmp[j] += 255 * c; |
| 1315 | } |
| 1316 | for (j = 0; j < nf; j++) |
| 1317 | { |
| 1318 | *dst++ = (unsigned char)(tmp[j]>>8); |
| 1319 | tmp[j] = 128; |
| 1320 | } |
| 1321 | dst -= 2*nf; |
| 1322 | } |
| 1323 | dst += nf + dstride; |
| 1324 | } |
| 1325 | else |
| 1326 | { |
| 1327 | for (i=weights->count; i > 0; i--) |
| 1328 | { |
| 1329 | min = *contrib++; |
| 1330 | len = *contrib++; |
| 1331 | min *= n; |
| 1332 | while (len-- > 0) |
| 1333 | { |
| 1334 | int c = *contrib++; |
| 1335 | for (j = 0; j < n; j++) |
| 1336 | tmp[j] += src[min++] * c; |
| 1337 | if (forcealpha) |
| 1338 | tmp[j] += 255 * c; |
| 1339 | } |
| 1340 | for (j = 0; j < nf; j++) |
| 1341 | { |
| 1342 | *dst++ = (unsigned char)(tmp[j]>>8); |
| 1343 | tmp[j] = 128; |
| 1344 | } |
| 1345 | } |
| 1346 | dst += dstride - weights->count * nf; |
| 1347 | } |
| 1348 | /* And then duplicate it h times */ |
| 1349 | nf *= weights->count; |
| 1350 | while (--h > 0) |
| 1351 | { |
| 1352 | memcpy(dst, dst-dstride, nf); |
| 1353 | dst += dstride; |
| 1354 | } |
| 1355 | } |
| 1356 | |
| 1357 | static void |
| 1358 | scale_single_col(unsigned char * FZ_RESTRICT dst, int dstride, const unsigned char * FZ_RESTRICT src, int sstride, const fz_weights * FZ_RESTRICT weights, int src_w, int n, int w, int forcealpha) |
| 1359 | { |
| 1360 | const int *contrib = &weights->index[weights->index[0]]; |
| 1361 | int min, len, i, j; |
| 1362 | int tmp[FZ_MAX_COLORS]; |
| 1363 | int nf = n + forcealpha; |
| 1364 | |
| 1365 | for (j = 0; j < nf; j++) |
| 1366 | tmp[j] = 128; |
| 1367 | if (weights->flip) |
| 1368 | { |
| 1369 | src_w = (src_w-1)*sstride; |
| 1370 | for (i=weights->count; i > 0; i--) |
| 1371 | { |
| 1372 | /* Scale the next pixel in the column */ |
| 1373 | min = *contrib++; |
| 1374 | len = *contrib++; |
| 1375 | min = src_w-min*sstride; |
| 1376 | while (len-- > 0) |
| 1377 | { |
| 1378 | int c = *contrib++; |
| 1379 | for (j = 0; j < n; j++) |
| 1380 | tmp[j] += src[min+j] * c; |
| 1381 | if (forcealpha) |
| 1382 | tmp[j] += 255 * c; |
| 1383 | min -= sstride; |
| 1384 | } |
| 1385 | for (j = 0; j < nf; j++) |
| 1386 | { |
| 1387 | *dst++ = (unsigned char)(tmp[j]>>8); |
| 1388 | tmp[j] = 128; |
| 1389 | } |
| 1390 | /* And then duplicate it across the row */ |
| 1391 | for (j = (w-1)*nf; j > 0; j--) |
| 1392 | { |
| 1393 | *dst = dst[-nf]; |
| 1394 | dst++; |
| 1395 | } |
| 1396 | dst += dstride - w*nf; |
| 1397 | } |
| 1398 | } |
| 1399 | else |
| 1400 | { |
| 1401 | for (i=weights->count; i > 0; i--) |
| 1402 | { |
| 1403 | /* Scale the next pixel in the column */ |
| 1404 | min = *contrib++; |
| 1405 | len = *contrib++; |
| 1406 | min *= sstride; |
| 1407 | while (len-- > 0) |
| 1408 | { |
| 1409 | int c = *contrib++; |
| 1410 | for (j = 0; j < n; j++) |
| 1411 | tmp[j] += src[min+j] * c; |
| 1412 | if (forcealpha) |
| 1413 | tmp[j] += 255 * c; |
| 1414 | min += sstride; |
| 1415 | } |
| 1416 | for (j = 0; j < nf; j++) |
| 1417 | { |
| 1418 | *dst++ = (unsigned char)(tmp[j]>>8); |
| 1419 | tmp[j] = 128; |
| 1420 | } |
| 1421 | /* And then duplicate it across the row */ |
| 1422 | for (j = (w-1)*nf; j > 0; j--) |
| 1423 | { |
| 1424 | *dst = dst[-nf]; |
| 1425 | dst++; |
| 1426 | } |
| 1427 | dst += dstride - w*nf; |
| 1428 | } |
| 1429 | } |
| 1430 | } |
| 1431 | #endif /* SINGLE_PIXEL_SPECIALS */ |
| 1432 | |
| 1433 | static void |
| 1434 | get_alpha_edge_values(const fz_weights * FZ_RESTRICT rows, int * FZ_RESTRICT tp, int * FZ_RESTRICT bp) |
| 1435 | { |
| 1436 | const int *contrib = &rows->index[rows->index[0]]; |
| 1437 | int len, i, t, b; |
| 1438 | |
| 1439 | /* Calculate the edge alpha values */ |
| 1440 | contrib++; /* Skip min */ |
| 1441 | len = *contrib++; |
| 1442 | t = 0; |
| 1443 | while (len--) |
| 1444 | t += *contrib++; |
| 1445 | for (i=rows->count-2; i > 0; i--) |
| 1446 | { |
| 1447 | contrib++; /* Skip min */ |
| 1448 | len = *contrib++; |
| 1449 | contrib += len; |
| 1450 | } |
| 1451 | b = 0; |
| 1452 | if (i == 0) |
| 1453 | { |
| 1454 | contrib++; |
| 1455 | len = *contrib++; |
| 1456 | while (len--) |
| 1457 | b += *contrib++; |
| 1458 | } |
| 1459 | if (rows->flip && i == 0) |
| 1460 | { |
| 1461 | *tp = b; |
| 1462 | *bp = t; |
| 1463 | } |
| 1464 | else |
| 1465 | { |
| 1466 | *tp = t; |
| 1467 | *bp = b; |
| 1468 | } |
| 1469 | } |
| 1470 | |
| 1471 | static void |
| 1472 | adjust_alpha_edges(fz_pixmap * FZ_RESTRICT pix, const fz_weights * FZ_RESTRICT rows, const fz_weights * FZ_RESTRICT cols) |
| 1473 | { |
| 1474 | int t, l, r, b, tl, tr, bl, br, x, y; |
| 1475 | unsigned char *dp = pix->samples; |
| 1476 | int w = pix->w; |
| 1477 | int n = pix->n; |
| 1478 | int span = w >= 2 ? (w-1)*n : 0; |
| 1479 | int stride = pix->stride; |
| 1480 | |
| 1481 | get_alpha_edge_values(rows, &t, &b); |
| 1482 | get_alpha_edge_values(cols, &l, &r); |
| 1483 | |
| 1484 | l = (255 * l + 128)>>8; |
| 1485 | r = (255 * r + 128)>>8; |
| 1486 | tl = (l * t + 128)>>8; |
| 1487 | tr = (r * t + 128)>>8; |
| 1488 | bl = (l * b + 128)>>8; |
| 1489 | br = (r * b + 128)>>8; |
| 1490 | t = (255 * t + 128)>>8; |
| 1491 | b = (255 * b + 128)>>8; |
| 1492 | dp += n-1; |
| 1493 | *dp = tl; |
| 1494 | dp += n; |
| 1495 | for (x = w-2; x > 0; x--) |
| 1496 | { |
| 1497 | *dp = t; |
| 1498 | dp += n; |
| 1499 | } |
| 1500 | if (x == 0) |
| 1501 | { |
| 1502 | *dp = tr; |
| 1503 | dp += n; |
| 1504 | } |
| 1505 | dp += stride - w*n; |
| 1506 | for (y = pix->h-2; y > 0; y--) |
| 1507 | { |
| 1508 | dp[span] = r; |
| 1509 | *dp = l; |
| 1510 | dp += stride; |
| 1511 | } |
| 1512 | if (y == 0) |
| 1513 | { |
| 1514 | *dp = bl; |
| 1515 | dp += n; |
| 1516 | for (x = w-2; x > 0; x--) |
| 1517 | { |
| 1518 | *dp = b; |
| 1519 | dp += n; |
| 1520 | } |
| 1521 | if (x == 0) |
| 1522 | { |
| 1523 | *dp = br; |
| 1524 | } |
| 1525 | } |
| 1526 | } |
| 1527 | |
| 1528 | fz_pixmap * |
| 1529 | fz_scale_pixmap(fz_context *ctx, fz_pixmap *src, float x, float y, float w, float h, const fz_irect *clip) |
| 1530 | { |
| 1531 | return fz_scale_pixmap_cached(ctx, src, x, y, w, h, clip, NULL, NULL); |
| 1532 | } |
| 1533 | |
| 1534 | fz_pixmap * |
| 1535 | fz_scale_pixmap_cached(fz_context *ctx, const fz_pixmap *src, float x, float y, float w, float h, const fz_irect *clip, fz_scale_cache *cache_x, fz_scale_cache *cache_y) |
| 1536 | { |
| 1537 | fz_scale_filter *filter = &fz_scale_filter_simple; |
| 1538 | fz_weights *contrib_rows = NULL; |
| 1539 | fz_weights *contrib_cols = NULL; |
| 1540 | fz_pixmap *output = NULL; |
| 1541 | unsigned char *temp = NULL; |
| 1542 | int max_row, temp_span, temp_rows, row; |
| 1543 | int dst_w_int, dst_h_int, dst_x_int, dst_y_int; |
| 1544 | int flip_x, flip_y, forcealpha; |
| 1545 | fz_rect patch; |
| 1546 | |
| 1547 | fz_var(contrib_cols); |
| 1548 | fz_var(contrib_rows); |
| 1549 | |
| 1550 | /* Avoid extreme scales where overflows become problematic. */ |
| 1551 | if (w > (1<<24) || h > (1<<24) || w < -(1<<24) || h < -(1<<24)) |
| 1552 | return NULL; |
| 1553 | if (x > (1<<24) || y > (1<<24) || x < -(1<<24) || y < -(1<<24)) |
| 1554 | return NULL; |
| 1555 | |
| 1556 | /* Clamp small ranges of w and h */ |
| 1557 | if (w <= -1) |
| 1558 | { |
| 1559 | } |
| 1560 | else if (w < 0) |
| 1561 | { |
| 1562 | w = -1; |
| 1563 | } |
| 1564 | else if (w < 1) |
| 1565 | { |
| 1566 | w = 1; |
| 1567 | } |
| 1568 | if (h <= -1) |
| 1569 | { |
| 1570 | } |
| 1571 | else if (h < 0) |
| 1572 | { |
| 1573 | h = -1; |
| 1574 | } |
| 1575 | else if (h < 1) |
| 1576 | { |
| 1577 | h = 1; |
| 1578 | } |
| 1579 | |
| 1580 | /* If the src has an alpha, we'll make the dst have an alpha automatically. |
| 1581 | * We also need to force the dst to have an alpha if x/y/w/h aren't ints. */ |
| 1582 | forcealpha = !src->alpha && (x != (float)(int)x || y != (float)(int)y || w != (float)(int)w || h != (float)(int)h); |
| 1583 | |
| 1584 | /* Find the destination bbox, width/height, and sub pixel offset, |
| 1585 | * allowing for whether we're flipping or not. */ |
| 1586 | /* The (x,y) position given describes where the top left corner |
| 1587 | * of the source image should be mapped to (i.e. where (0,0) in image |
| 1588 | * space ends up). Also there are differences in the way we scale |
| 1589 | * horizontally and vertically. When scaling rows horizontally, we |
| 1590 | * always read forwards through the source, and store either forwards |
| 1591 | * or in reverse as required. When scaling vertically, we always store |
| 1592 | * out forwards, but may feed source rows in in a different order. |
| 1593 | * |
| 1594 | * Consider the image rectangle 'r' to which the image is mapped, |
| 1595 | * and the (possibly) larger rectangle 'R', given by expanding 'r' to |
| 1596 | * complete pixels. |
| 1597 | * |
| 1598 | * x can either be r.xmin-R.xmin or R.xmax-r.xmax depending on whether |
| 1599 | * the image is x flipped or not. Whatever happens 0 <= x < 1. |
| 1600 | * y is always R.ymax - r.ymax. |
| 1601 | */ |
| 1602 | /* dst_x_int is calculated to be the left of the scaled image, and |
| 1603 | * x (the sub pixel offset) is the distance in from either the left |
| 1604 | * or right pixel expanded edge. */ |
| 1605 | flip_x = (w < 0); |
| 1606 | if (flip_x) |
| 1607 | { |
| 1608 | float tmp; |
| 1609 | w = -w; |
| 1610 | dst_x_int = floorf(x-w); |
| 1611 | tmp = ceilf(x); |
| 1612 | dst_w_int = (int)tmp; |
| 1613 | x = tmp - x; |
| 1614 | dst_w_int -= dst_x_int; |
| 1615 | } |
| 1616 | else |
| 1617 | { |
| 1618 | dst_x_int = floorf(x); |
| 1619 | x -= dst_x_int; |
| 1620 | dst_w_int = (int)ceilf(x + w); |
| 1621 | } |
| 1622 | /* dst_y_int is calculated to be the top of the scaled image, and |
| 1623 | * y (the sub pixel offset) is the distance in from either the top |
| 1624 | * or bottom pixel expanded edge. |
| 1625 | */ |
| 1626 | flip_y = (h < 0); |
| 1627 | if (flip_y) |
| 1628 | { |
| 1629 | float tmp; |
| 1630 | h = -h; |
| 1631 | dst_y_int = floorf(y-h); |
| 1632 | tmp = ceilf(y); |
| 1633 | dst_h_int = (int)tmp; |
| 1634 | y = tmp - y; |
| 1635 | dst_h_int -= dst_y_int; |
| 1636 | } |
| 1637 | else |
| 1638 | { |
| 1639 | dst_y_int = floorf(y); |
| 1640 | y -= dst_y_int; |
| 1641 | dst_h_int = (int)ceilf(y + h); |
| 1642 | } |
| 1643 | |
| 1644 | fz_valgrind_pixmap(src); |
| 1645 | |
| 1646 | /* Step 0: Calculate the patch */ |
| 1647 | patch.x0 = 0; |
| 1648 | patch.y0 = 0; |
| 1649 | patch.x1 = dst_w_int; |
| 1650 | patch.y1 = dst_h_int; |
| 1651 | if (clip) |
| 1652 | { |
| 1653 | if (flip_x) |
| 1654 | { |
| 1655 | if (dst_x_int + dst_w_int > clip->x1) |
| 1656 | patch.x0 = dst_x_int + dst_w_int - clip->x1; |
| 1657 | if (clip->x0 > dst_x_int) |
| 1658 | { |
| 1659 | patch.x1 = dst_w_int - (clip->x0 - dst_x_int); |
| 1660 | dst_x_int = clip->x0; |
| 1661 | } |
| 1662 | } |
| 1663 | else |
| 1664 | { |
| 1665 | if (dst_x_int + dst_w_int > clip->x1) |
| 1666 | patch.x1 = clip->x1 - dst_x_int; |
| 1667 | if (clip->x0 > dst_x_int) |
| 1668 | { |
| 1669 | patch.x0 = clip->x0 - dst_x_int; |
| 1670 | dst_x_int += patch.x0; |
| 1671 | } |
| 1672 | } |
| 1673 | |
| 1674 | if (flip_y) |
| 1675 | { |
| 1676 | if (dst_y_int + dst_h_int > clip->y1) |
| 1677 | patch.y1 = clip->y1 - dst_y_int; |
| 1678 | if (clip->y0 > dst_y_int) |
| 1679 | { |
| 1680 | patch.y0 = clip->y0 - dst_y_int; |
| 1681 | dst_y_int = clip->y0; |
| 1682 | } |
| 1683 | } |
| 1684 | else |
| 1685 | { |
| 1686 | if (dst_y_int + dst_h_int > clip->y1) |
| 1687 | patch.y1 = clip->y1 - dst_y_int; |
| 1688 | if (clip->y0 > dst_y_int) |
| 1689 | { |
| 1690 | patch.y0 = clip->y0 - dst_y_int; |
| 1691 | dst_y_int += patch.y0; |
| 1692 | } |
| 1693 | } |
| 1694 | } |
| 1695 | if (patch.x0 >= patch.x1 || patch.y0 >= patch.y1) |
| 1696 | return NULL; |
| 1697 | |
| 1698 | fz_try(ctx) |
| 1699 | { |
| 1700 | /* Step 1: Calculate the weights for columns and rows */ |
| 1701 | #ifdef SINGLE_PIXEL_SPECIALS |
| 1702 | if (src->w == 1) |
| 1703 | contrib_cols = NULL; |
| 1704 | else |
| 1705 | #endif /* SINGLE_PIXEL_SPECIALS */ |
| 1706 | contrib_cols = make_weights(ctx, src->w, x, w, filter, 0, dst_w_int, patch.x0, patch.x1, src->n, flip_x, cache_x); |
| 1707 | #ifdef SINGLE_PIXEL_SPECIALS |
| 1708 | if (src->h == 1) |
| 1709 | contrib_rows = NULL; |
| 1710 | else |
| 1711 | #endif /* SINGLE_PIXEL_SPECIALS */ |
| 1712 | contrib_rows = make_weights(ctx, src->h, y, h, filter, 1, dst_h_int, patch.y0, patch.y1, src->n, flip_y, cache_y); |
| 1713 | |
| 1714 | output = fz_new_pixmap(ctx, src->colorspace, patch.x1 - patch.x0, patch.y1 - patch.y0, src->seps, src->alpha || forcealpha); |
| 1715 | } |
| 1716 | fz_catch(ctx) |
| 1717 | { |
| 1718 | if (!cache_x) |
| 1719 | fz_free(ctx, contrib_cols); |
| 1720 | if (!cache_y) |
| 1721 | fz_free(ctx, contrib_rows); |
| 1722 | fz_rethrow(ctx); |
| 1723 | } |
| 1724 | output->x = dst_x_int; |
| 1725 | output->y = dst_y_int; |
| 1726 | |
| 1727 | /* Step 2: Apply the weights */ |
| 1728 | #ifdef SINGLE_PIXEL_SPECIALS |
| 1729 | if (!contrib_rows) |
| 1730 | { |
| 1731 | /* Only 1 source pixel high. */ |
| 1732 | if (!contrib_cols) |
| 1733 | { |
| 1734 | /* Only 1 pixel in the entire image! */ |
| 1735 | duplicate_single_pixel(output->samples, src->samples, src->n, forcealpha, patch.x1-patch.x0, patch.y1-patch.y0, output->stride); |
| 1736 | fz_valgrind_pixmap(output); |
| 1737 | } |
| 1738 | else |
| 1739 | { |
| 1740 | /* Scale the row once, then copy it. */ |
| 1741 | scale_single_row(output->samples, output->stride, src->samples, contrib_cols, src->w, patch.y1-patch.y0, forcealpha); |
| 1742 | fz_valgrind_pixmap(output); |
| 1743 | } |
| 1744 | } |
| 1745 | else if (!contrib_cols) |
| 1746 | { |
| 1747 | /* Only 1 source pixel wide. Scale the col and duplicate. */ |
| 1748 | scale_single_col(output->samples, output->stride, src->samples, src->stride, contrib_rows, src->h, src->n, patch.x1-patch.x0, forcealpha); |
| 1749 | fz_valgrind_pixmap(output); |
| 1750 | } |
| 1751 | else |
| 1752 | #endif /* SINGLE_PIXEL_SPECIALS */ |
| 1753 | { |
| 1754 | void (*row_scale_in)(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights); |
| 1755 | void (*row_scale_out)(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int w, int n, int row); |
| 1756 | |
| 1757 | temp_span = contrib_cols->count * src->n; |
| 1758 | temp_rows = contrib_rows->max_len; |
| 1759 | if (temp_span <= 0 || temp_rows > INT_MAX / temp_span) |
| 1760 | goto cleanup; |
| 1761 | fz_try(ctx) |
| 1762 | { |
| 1763 | temp = fz_calloc(ctx, temp_span*temp_rows, sizeof(unsigned char)); |
| 1764 | } |
| 1765 | fz_catch(ctx) |
| 1766 | { |
| 1767 | fz_drop_pixmap(ctx, output); |
| 1768 | if (!cache_x) |
| 1769 | fz_free(ctx, contrib_cols); |
| 1770 | if (!cache_y) |
| 1771 | fz_free(ctx, contrib_rows); |
| 1772 | fz_rethrow(ctx); |
| 1773 | } |
| 1774 | switch (src->n) |
| 1775 | { |
| 1776 | default: |
| 1777 | row_scale_in = scale_row_to_temp; |
| 1778 | break; |
| 1779 | case 1: /* Image mask case or Greyscale case */ |
| 1780 | row_scale_in = scale_row_to_temp1; |
| 1781 | break; |
| 1782 | case 2: /* Greyscale with alpha case */ |
| 1783 | row_scale_in = scale_row_to_temp2; |
| 1784 | break; |
| 1785 | case 3: /* RGB case */ |
| 1786 | row_scale_in = scale_row_to_temp3; |
| 1787 | break; |
| 1788 | case 4: /* RGBA or CMYK case */ |
| 1789 | row_scale_in = scale_row_to_temp4; |
| 1790 | break; |
| 1791 | } |
| 1792 | row_scale_out = forcealpha ? scale_row_from_temp_alpha : scale_row_from_temp; |
| 1793 | max_row = contrib_rows->index[contrib_rows->index[0]]; |
| 1794 | for (row = 0; row < contrib_rows->count; row++) |
| 1795 | { |
| 1796 | /* |
| 1797 | Which source rows do we need to have scaled into the |
| 1798 | temporary buffer in order to be able to do the final |
| 1799 | scale? |
| 1800 | */ |
| 1801 | int row_index = contrib_rows->index[row]; |
| 1802 | int row_min = contrib_rows->index[row_index++]; |
| 1803 | int row_len = contrib_rows->index[row_index]; |
| 1804 | while (max_row < row_min+row_len) |
| 1805 | { |
| 1806 | /* Scale another row */ |
| 1807 | assert(max_row < src->h); |
| 1808 | (*row_scale_in)(&temp[temp_span*(max_row % temp_rows)], &src->samples[(flip_y ? (src->h-1-max_row): max_row)*src->stride], contrib_cols); |
| 1809 | max_row++; |
| 1810 | } |
| 1811 | |
| 1812 | (*row_scale_out)(&output->samples[row*output->stride], temp, contrib_rows, contrib_cols->count, src->n, row); |
| 1813 | } |
| 1814 | fz_free(ctx, temp); |
| 1815 | |
| 1816 | if (forcealpha) |
| 1817 | adjust_alpha_edges(output, contrib_rows, contrib_cols); |
| 1818 | |
| 1819 | fz_valgrind_pixmap(output); |
| 1820 | } |
| 1821 | |
| 1822 | cleanup: |
| 1823 | if (!cache_y) |
| 1824 | fz_free(ctx, contrib_rows); |
| 1825 | if (!cache_x) |
| 1826 | fz_free(ctx, contrib_cols); |
| 1827 | |
| 1828 | return output; |
| 1829 | } |
| 1830 | |
| 1831 | void |
| 1832 | fz_drop_scale_cache(fz_context *ctx, fz_scale_cache *sc) |
| 1833 | { |
| 1834 | if (!sc) |
| 1835 | return; |
| 1836 | fz_free(ctx, sc->weights); |
| 1837 | fz_free(ctx, sc); |
| 1838 | } |
| 1839 | |
| 1840 | fz_scale_cache * |
| 1841 | fz_new_scale_cache(fz_context *ctx) |
| 1842 | { |
| 1843 | return fz_malloc_struct(ctx, fz_scale_cache); |
| 1844 | } |
| 1845 | |