1 | /* |
2 | This code does smooth scaling of a pixmap. |
3 | |
4 | This function returns a new pixmap representing the area starting at (0,0) |
5 | given by taking the source pixmap src, scaling it to width w, and height h, |
6 | and then positioning it at (frac(x),frac(y)). |
7 | |
8 | This is a cut-down version of draw_scale.c that only copes with filters |
9 | that return values strictly in the 0..1 range, and uses bytes for |
10 | intermediate results rather than ints. |
11 | */ |
12 | |
13 | #include "mupdf/fitz.h" |
14 | #include "draw-imp.h" |
15 | |
16 | #include <math.h> |
17 | #include <string.h> |
18 | #include <assert.h> |
19 | #include <limits.h> |
20 | |
21 | /* Do we special case handling of single pixel high/wide images? The |
22 | * 'purest' handling is given by not special casing them, but certain |
23 | * files that use such images 'stack' them to give full images. Not |
24 | * special casing them results in then being fainter and giving noticeable |
25 | * rounding errors. |
26 | */ |
27 | #define SINGLE_PIXEL_SPECIALS |
28 | |
29 | /* |
30 | Consider a row of source samples, src, of width src_w, positioned at x, |
31 | scaled to width dst_w. |
32 | |
33 | src[i] is centred at: x + (i + 0.5)*dst_w/src_w |
34 | |
35 | Therefore the distance between the centre of the jth output pixel and |
36 | the centre of the ith source sample is: |
37 | |
38 | dist[j,i] = j + 0.5 - (x + (i + 0.5)*dst_w/src_w) |
39 | |
40 | When scaling up, therefore: |
41 | |
42 | dst[j] = SUM(filter(dist[j,i]) * src[i]) |
43 | (for all ints i) |
44 | |
45 | This can be simplified by noticing that filters are only non zero within |
46 | a given filter width (henceforth called W). So: |
47 | |
48 | dst[j] = SUM(filter(dist[j,i]) * src[i]) |
49 | (for ints i, s.t. (j*src_w/dst_w)-W < i < (j*src_w/dst_w)+W) |
50 | |
51 | When scaling down, each filtered source sample is stretched to be wider |
52 | to avoid aliasing issues. This effectively reduces the distance between |
53 | centres. |
54 | |
55 | dst[j] = SUM(filter(dist[j,i] * F) * F * src[i]) |
56 | (where F = dst_w/src_w) |
57 | (for ints i, s.t. (j-W)/F < i < (j+W)/F) |
58 | |
59 | */ |
60 | |
61 | typedef struct fz_scale_filter_s fz_scale_filter; |
62 | |
63 | struct fz_scale_filter_s |
64 | { |
65 | int width; |
66 | float (*fn)(fz_scale_filter *, float); |
67 | }; |
68 | |
69 | /* Image scale filters */ |
70 | |
71 | static float |
72 | triangle(fz_scale_filter *filter, float f) |
73 | { |
74 | if (f >= 1) |
75 | return 0; |
76 | return 1-f; |
77 | } |
78 | |
79 | static float |
80 | box(fz_scale_filter *filter, float f) |
81 | { |
82 | if (f >= 0.5f) |
83 | return 0; |
84 | return 1; |
85 | } |
86 | |
87 | static float |
88 | simple(fz_scale_filter *filter, float x) |
89 | { |
90 | if (x >= 1) |
91 | return 0; |
92 | return 1 + (2*x - 3)*x*x; |
93 | } |
94 | |
95 | fz_scale_filter fz_scale_filter_box = { 1, box }; |
96 | fz_scale_filter fz_scale_filter_triangle = { 1, triangle }; |
97 | fz_scale_filter fz_scale_filter_simple = { 1, simple }; |
98 | |
99 | /* |
100 | We build ourselves a set of tables to contain the precalculated weights |
101 | for a given set of scale settings. |
102 | |
103 | The first dst_w entries in index are the index into index of the |
104 | sets of weight for each destination pixel. |
105 | |
106 | Each of the sets of weights is a set of values consisting of: |
107 | the minimum source pixel index used for this destination pixel |
108 | the number of weights used for this destination pixel |
109 | the weights themselves |
110 | |
111 | So to calculate dst[i] we do the following: |
112 | |
113 | weights = &index[index[i]]; |
114 | min = *weights++; |
115 | len = *weights++; |
116 | dst[i] = 0; |
117 | while (--len > 0) |
118 | dst[i] += src[min++] * *weights++ |
119 | |
120 | in addition, we guarantee that at the end of this process weights will now |
121 | point to the weights value for dst pixel i+1. |
122 | |
123 | In the simplest version of this algorithm, we would scale the whole image |
124 | horizontally first into a temporary buffer, then scale that temporary |
125 | buffer again vertically to give us our result. Using such a simple |
126 | algorithm would mean that could use the same style of weights for both |
127 | horizontal and vertical scaling. |
128 | |
129 | Unfortunately, this would also require a large temporary buffer, |
130 | particularly in the case where we are scaling up. |
131 | |
132 | We therefore modify the algorithm as follows; we scale scanlines from the |
133 | source image horizontally into a temporary buffer, until we have all the |
134 | contributors for a given output scanline. We then produce that output |
135 | scanline from the temporary buffer. In this way we restrict the height |
136 | of the temporary buffer to a small fraction of the final size. |
137 | |
138 | Unfortunately, this means that the pseudo code for recombining a |
139 | scanline of fully scaled pixels is as follows: |
140 | |
141 | weights = &index[index[y]]; |
142 | min = *weights++; |
143 | len = *weights++; |
144 | for (x=0 to dst_w) |
145 | min2 = min |
146 | len2 = len |
147 | weights2 = weights |
148 | dst[x] = 0; |
149 | while (--len2 > 0) |
150 | dst[x] += temp[x][(min2++) % tmp_buf_height] * *weights2++ |
151 | |
152 | i.e. it requires a % operation for every source pixel - this is typically |
153 | expensive. |
154 | |
155 | To avoid this, we alter the order in which vertical weights are stored, |
156 | so that they are ordered in the same order as the temporary buffer lines |
157 | would appear. This simplifies the algorithm to: |
158 | |
159 | weights = &index[index[y]]; |
160 | min = *weights++; |
161 | len = *weights++; |
162 | for (x=0 to dst_w) |
163 | min2 = 0 |
164 | len2 = len |
165 | weights2 = weights |
166 | dst[x] = 0; |
167 | while (--len2 > 0) |
168 | dst[x] += temp[i][min2++] * *weights2++ |
169 | |
170 | This means that len may be larger than it needs to be (due to the |
171 | possible inclusion of a zero weight row or two), but in practise this |
172 | is only an increase of 1 or 2 at worst. |
173 | |
174 | We implement this by generating the weights as normal (but ensuring we |
175 | leave enough space) and then reordering afterwards. |
176 | |
177 | */ |
178 | |
179 | typedef struct fz_weights_s fz_weights; |
180 | |
181 | /* This structure is accessed from ARM code - bear this in mind before |
182 | * altering it! */ |
183 | struct fz_weights_s |
184 | { |
185 | int flip; /* true if outputting reversed */ |
186 | int count; /* number of output pixels we have records for in this table */ |
187 | int max_len; /* Maximum number of weights for any one output pixel */ |
188 | int n; /* number of components (src->n) */ |
189 | int new_line; /* True if no weights for the current output pixel */ |
190 | int patch_l; /* How many output pixels we skip over */ |
191 | int index[1]; |
192 | }; |
193 | |
194 | struct fz_scale_cache_s |
195 | { |
196 | int src_w; |
197 | float x; |
198 | float dst_w; |
199 | fz_scale_filter *filter; |
200 | int vertical; |
201 | int dst_w_int; |
202 | int patch_l; |
203 | int patch_r; |
204 | int n; |
205 | int flip; |
206 | fz_weights *weights; |
207 | }; |
208 | |
209 | static fz_weights * |
210 | new_weights(fz_context *ctx, fz_scale_filter *filter, int src_w, float dst_w, int patch_w, int n, int flip, int patch_l) |
211 | { |
212 | int max_len; |
213 | fz_weights *weights; |
214 | |
215 | if (src_w > dst_w) |
216 | { |
217 | /* Scaling down, so there will be a maximum of |
218 | * 2*filterwidth*src_w/dst_w src pixels |
219 | * contributing to each dst pixel. */ |
220 | max_len = (int)ceilf((2 * filter->width * src_w)/dst_w); |
221 | if (max_len > src_w) |
222 | max_len = src_w; |
223 | } |
224 | else |
225 | { |
226 | /* Scaling up, so there will be a maximum of |
227 | * 2*filterwidth src pixels contributing to each dst pixel. |
228 | */ |
229 | max_len = 2 * filter->width; |
230 | } |
231 | /* We need the size of the struct, |
232 | * plus patch_w*sizeof(int) for the index |
233 | * plus (2+max_len)*sizeof(int) for the weights |
234 | * plus room for an extra set of weights for reordering. |
235 | */ |
236 | weights = fz_malloc(ctx, sizeof(*weights)+(max_len+3)*(patch_w+1)*sizeof(int)); |
237 | if (!weights) |
238 | return NULL; |
239 | weights->count = -1; |
240 | weights->max_len = max_len; |
241 | weights->index[0] = patch_w; |
242 | weights->n = n; |
243 | weights->patch_l = patch_l; |
244 | weights->flip = flip; |
245 | return weights; |
246 | } |
247 | |
248 | /* j is destination pixel in the patch_l..patch_l+patch_w range */ |
249 | static void |
250 | init_weights(fz_weights *weights, int j) |
251 | { |
252 | int index; |
253 | |
254 | j -= weights->patch_l; |
255 | assert(weights->count == j-1); |
256 | weights->count++; |
257 | weights->new_line = 1; |
258 | if (j == 0) |
259 | index = weights->index[0]; |
260 | else |
261 | { |
262 | index = weights->index[j-1]; |
263 | index += 2 + weights->index[index+1]; |
264 | } |
265 | weights->index[j] = index; /* row pointer */ |
266 | weights->index[index] = 0; /* min */ |
267 | weights->index[index+1] = 0; /* len */ |
268 | } |
269 | |
270 | static void |
271 | add_weight(fz_weights *weights, int j, int i, fz_scale_filter *filter, |
272 | float x, float F, float G, int src_w, float dst_w) |
273 | { |
274 | float dist = j - x + 0.5f - ((i + 0.5f)*dst_w/src_w); |
275 | float f; |
276 | int min, len, index, weight; |
277 | |
278 | dist *= G; |
279 | if (dist < 0) |
280 | dist = -dist; |
281 | f = filter->fn(filter, dist)*F; |
282 | weight = (int)(256*f+0.5f); |
283 | |
284 | /* Ensure i is in range */ |
285 | if (i < 0 || i >= src_w) |
286 | return; |
287 | if (weight == 0) |
288 | { |
289 | /* We add a fudge factor here to allow for extreme downscales |
290 | * where all the weights round to 0. Ensure that at least one |
291 | * (arbitrarily the first one) is non zero. */ |
292 | if (weights->new_line && f > 0) |
293 | weight = 1; |
294 | else |
295 | return; |
296 | } |
297 | |
298 | /* Move j from patch_l...patch_l+patch_w range to 0..patch_w range */ |
299 | j -= weights->patch_l; |
300 | if (weights->new_line) |
301 | { |
302 | /* New line */ |
303 | weights->new_line = 0; |
304 | index = weights->index[j]; /* row pointer */ |
305 | weights->index[index] = i; /* min */ |
306 | weights->index[index+1] = 0; /* len */ |
307 | } |
308 | index = weights->index[j]; |
309 | min = weights->index[index++]; |
310 | len = weights->index[index++]; |
311 | while (i < min) |
312 | { |
313 | /* This only happens in rare cases, but we need to insert |
314 | * one earlier. In exceedingly rare cases we may need to |
315 | * insert more than one earlier. */ |
316 | int k; |
317 | |
318 | for (k = len; k > 0; k--) |
319 | { |
320 | weights->index[index+k] = weights->index[index+k-1]; |
321 | } |
322 | weights->index[index] = 0; |
323 | min--; |
324 | len++; |
325 | weights->index[index-2] = min; |
326 | weights->index[index-1] = len; |
327 | } |
328 | if (i-min >= len) |
329 | { |
330 | /* The usual case */ |
331 | while (i-min >= ++len) |
332 | { |
333 | weights->index[index+len-1] = 0; |
334 | } |
335 | assert(len-1 == i-min); |
336 | weights->index[index+i-min] = weight; |
337 | weights->index[index-1] = len; |
338 | assert(len <= weights->max_len); |
339 | } |
340 | else |
341 | { |
342 | /* Infrequent case */ |
343 | weights->index[index+i-min] += weight; |
344 | } |
345 | } |
346 | |
347 | static void |
348 | reorder_weights(fz_weights *weights, int j, int src_w) |
349 | { |
350 | int idx = weights->index[j - weights->patch_l]; |
351 | int min = weights->index[idx++]; |
352 | int len = weights->index[idx++]; |
353 | int max = weights->max_len; |
354 | int tmp = idx+max; |
355 | int i, off; |
356 | |
357 | /* Copy into the temporary area */ |
358 | memcpy(&weights->index[tmp], &weights->index[idx], sizeof(int)*len); |
359 | |
360 | /* Pad out if required */ |
361 | assert(len <= max); |
362 | assert(min+len <= src_w); |
363 | off = 0; |
364 | if (len < max) |
365 | { |
366 | memset(&weights->index[tmp+len], 0, sizeof(int)*(max-len)); |
367 | len = max; |
368 | if (min + len > src_w) |
369 | { |
370 | off = min + len - src_w; |
371 | min = src_w - len; |
372 | weights->index[idx-2] = min; |
373 | } |
374 | weights->index[idx-1] = len; |
375 | } |
376 | |
377 | /* Copy back into the proper places */ |
378 | for (i = 0; i < len; i++) |
379 | { |
380 | weights->index[idx+((min+i+off) % max)] = weights->index[tmp+i]; |
381 | } |
382 | } |
383 | |
384 | /* Due to rounding and edge effects, the sums for the weights sometimes don't |
385 | * add up to 256. This causes visible rendering effects. Therefore, we take |
386 | * pains to ensure that they 1) never exceed 256, and 2) add up to exactly |
387 | * 256 for all pixels that are completely covered. See bug #691629. */ |
388 | static void |
389 | check_weights(fz_weights *weights, int j, int w, float x, float wf) |
390 | { |
391 | int idx, len; |
392 | int sum = 0; |
393 | int max = -256; |
394 | int maxidx = 0; |
395 | int i; |
396 | |
397 | idx = weights->index[j - weights->patch_l]; |
398 | idx++; /* min */ |
399 | len = weights->index[idx++]; |
400 | |
401 | for(i=0; i < len; i++) |
402 | { |
403 | int v = weights->index[idx++]; |
404 | sum += v; |
405 | if (v > max) |
406 | { |
407 | max = v; |
408 | maxidx = idx; |
409 | } |
410 | } |
411 | /* If we aren't the first or last pixel, OR if the sum is too big |
412 | * then adjust it. */ |
413 | if (((j != 0) && (j != w-1)) || (sum > 256)) |
414 | weights->index[maxidx-1] += 256-sum; |
415 | /* Otherwise, if we are the first pixel, and it's fully covered, then |
416 | * adjust it. */ |
417 | else if ((j == 0) && (x < 0.0001f) && (sum != 256)) |
418 | weights->index[maxidx-1] += 256-sum; |
419 | /* Finally, if we are the last pixel, and it's fully covered, then |
420 | * adjust it. */ |
421 | else if ((j == w-1) && (w - wf < 0.0001f) && (sum != 256)) |
422 | weights->index[maxidx-1] += 256-sum; |
423 | } |
424 | |
425 | static fz_weights * |
426 | make_weights(fz_context *ctx, int src_w, float x, float dst_w, fz_scale_filter *filter, int vertical, int dst_w_int, int patch_l, int patch_r, int n, int flip, fz_scale_cache *cache) |
427 | { |
428 | fz_weights *weights; |
429 | float F, G; |
430 | float window; |
431 | int j; |
432 | |
433 | if (cache) |
434 | { |
435 | if (cache->src_w == src_w && cache->x == x && cache->dst_w == dst_w && |
436 | cache->filter == filter && cache->vertical == vertical && |
437 | cache->dst_w_int == dst_w_int && |
438 | cache->patch_l == patch_l && cache->patch_r == patch_r && |
439 | cache->n == n && cache->flip == flip) |
440 | { |
441 | return cache->weights; |
442 | } |
443 | cache->src_w = src_w; |
444 | cache->x = x; |
445 | cache->dst_w = dst_w; |
446 | cache->filter = filter; |
447 | cache->vertical = vertical; |
448 | cache->dst_w_int = dst_w_int; |
449 | cache->patch_l = patch_l; |
450 | cache->patch_r = patch_r; |
451 | cache->n = n; |
452 | cache->flip = flip; |
453 | fz_free(ctx, cache->weights); |
454 | cache->weights = NULL; |
455 | } |
456 | |
457 | if (dst_w < src_w) |
458 | { |
459 | /* Scaling down */ |
460 | F = dst_w / src_w; |
461 | G = 1; |
462 | } |
463 | else |
464 | { |
465 | /* Scaling up */ |
466 | F = 1; |
467 | G = src_w / dst_w; |
468 | } |
469 | window = filter->width / F; |
470 | weights = new_weights(ctx, filter, src_w, dst_w, patch_r-patch_l, n, flip, patch_l); |
471 | if (!weights) |
472 | return NULL; |
473 | for (j = patch_l; j < patch_r; j++) |
474 | { |
475 | /* find the position of the centre of dst[j] in src space */ |
476 | float centre = (j - x + 0.5f)*src_w/dst_w - 0.5f; |
477 | int l, r; |
478 | l = ceilf(centre - window); |
479 | r = floorf(centre + window); |
480 | init_weights(weights, j); |
481 | for (; l <= r; l++) |
482 | { |
483 | add_weight(weights, j, l, filter, x, F, G, src_w, dst_w); |
484 | } |
485 | check_weights(weights, j, dst_w_int, x, dst_w); |
486 | if (vertical) |
487 | { |
488 | reorder_weights(weights, j, src_w); |
489 | } |
490 | } |
491 | weights->count++; /* weights->count = dst_w_int now */ |
492 | if (cache) |
493 | { |
494 | cache->weights = weights; |
495 | } |
496 | return weights; |
497 | } |
498 | |
499 | static void |
500 | scale_row_to_temp(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) |
501 | { |
502 | const int *contrib = &weights->index[weights->index[0]]; |
503 | int len, i, j, n; |
504 | const unsigned char *min; |
505 | int tmp[FZ_MAX_COLORS]; |
506 | int *t = tmp; |
507 | |
508 | n = weights->n; |
509 | for (j = 0; j < n; j++) |
510 | tmp[j] = 128; |
511 | if (weights->flip) |
512 | { |
513 | dst += (weights->count-1)*n; |
514 | for (i=weights->count; i > 0; i--) |
515 | { |
516 | min = &src[n * *contrib++]; |
517 | len = *contrib++; |
518 | while (len-- > 0) |
519 | { |
520 | for (j = n; j > 0; j--) |
521 | *t++ += *min++ * *contrib; |
522 | t -= n; |
523 | contrib++; |
524 | } |
525 | for (j = n; j > 0; j--) |
526 | { |
527 | *dst++ = (unsigned char)(*t>>8); |
528 | *t++ = 128; |
529 | } |
530 | t -= n; |
531 | dst -= n*2; |
532 | } |
533 | } |
534 | else |
535 | { |
536 | for (i=weights->count; i > 0; i--) |
537 | { |
538 | min = &src[n * *contrib++]; |
539 | len = *contrib++; |
540 | while (len-- > 0) |
541 | { |
542 | for (j = n; j > 0; j--) |
543 | *t++ += *min++ * *contrib; |
544 | t -= n; |
545 | contrib++; |
546 | } |
547 | for (j = n; j > 0; j--) |
548 | { |
549 | *dst++ = (unsigned char)(*t>>8); |
550 | *t++ = 128; |
551 | } |
552 | t -= n; |
553 | } |
554 | } |
555 | } |
556 | |
557 | #ifdef ARCH_ARM |
558 | |
559 | static void |
560 | scale_row_to_temp1(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) |
561 | __attribute__((naked)); |
562 | |
563 | static void |
564 | scale_row_to_temp2(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) |
565 | __attribute__((naked)); |
566 | |
567 | static void |
568 | scale_row_to_temp3(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) |
569 | __attribute__((naked)); |
570 | |
571 | static void |
572 | scale_row_to_temp4(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) |
573 | __attribute__((naked)); |
574 | |
575 | static void |
576 | scale_row_from_temp(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int width, int n, int row) |
577 | __attribute__((naked)); |
578 | |
579 | static void |
580 | scale_row_from_temp_alpha(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int width, int n, int row) |
581 | __attribute__((naked)); |
582 | |
583 | static void |
584 | scale_row_to_temp1(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) |
585 | { |
586 | asm volatile( |
587 | ENTER_ARM |
588 | ".syntax unified\n" |
589 | "stmfd r13!,{r4-r7,r9,r14} \n" |
590 | "@ r0 = dst \n" |
591 | "@ r1 = src \n" |
592 | "@ r2 = weights \n" |
593 | "ldr r12,[r2],#4 @ r12= flip \n" |
594 | "ldr r3, [r2],#20 @ r3 = count r2 = &index\n" |
595 | "ldr r4, [r2] @ r4 = index[0] \n" |
596 | "cmp r12,#0 @ if (flip) \n" |
597 | "beq 5f @ { \n" |
598 | "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" |
599 | "add r0, r0, r3 @ dst += count \n" |
600 | "1: \n" |
601 | "ldr r4, [r2], #4 @ r4 = *contrib++ \n" |
602 | "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" |
603 | "mov r5, #128 @ r5 = a = 128 \n" |
604 | "add r4, r1, r4 @ r4 = min = &src[r4] \n" |
605 | "subs r9, r9, #1 @ len-- \n" |
606 | "blt 3f @ while (len >= 0) \n" |
607 | "2: @ { \n" |
608 | "ldrgt r6, [r2], #4 @ r6 = *contrib++ \n" |
609 | "ldrbgt r7, [r4], #1 @ r7 = *min++ \n" |
610 | "ldr r12,[r2], #4 @ r12 = *contrib++ \n" |
611 | "ldrb r14,[r4], #1 @ r14 = *min++ \n" |
612 | "mlagt r5, r6, r7, r5 @ g += r6 * r7 \n" |
613 | "subs r9, r9, #2 @ r9 = len -= 2 \n" |
614 | "mla r5, r12,r14,r5 @ g += r14 * r12 \n" |
615 | "bge 2b @ } \n" |
616 | "3: \n" |
617 | "mov r5, r5, lsr #8 @ g >>= 8 \n" |
618 | "strb r5,[r0, #-1]! @ *--dst=a \n" |
619 | "subs r3, r3, #1 @ i-- \n" |
620 | "bgt 1b @ \n" |
621 | "ldmfd r13!,{r4-r7,r9,PC} @ pop, return to thumb \n" |
622 | "5:" |
623 | "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" |
624 | "6:" |
625 | "ldr r4, [r2], #4 @ r4 = *contrib++ \n" |
626 | "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" |
627 | "mov r5, #128 @ r5 = a = 128 \n" |
628 | "add r4, r1, r4 @ r4 = min = &src[r4] \n" |
629 | "subs r9, r9, #1 @ len-- \n" |
630 | "blt 9f @ while (len > 0) \n" |
631 | "7: @ { \n" |
632 | "ldrgt r6, [r2], #4 @ r6 = *contrib++ \n" |
633 | "ldrbgt r7, [r4], #1 @ r7 = *min++ \n" |
634 | "ldr r12,[r2], #4 @ r12 = *contrib++ \n" |
635 | "ldrb r14,[r4], #1 @ r14 = *min++ \n" |
636 | "mlagt r5, r6,r7,r5 @ a += r6 * r7 \n" |
637 | "subs r9, r9, #2 @ r9 = len -= 2 \n" |
638 | "mla r5, r12,r14,r5 @ a += r14 * r12 \n" |
639 | "bge 7b @ } \n" |
640 | "9: \n" |
641 | "mov r5, r5, LSR #8 @ a >>= 8 \n" |
642 | "strb r5, [r0], #1 @ *dst++=a \n" |
643 | "subs r3, r3, #1 @ i-- \n" |
644 | "bgt 6b @ \n" |
645 | "ldmfd r13!,{r4-r7,r9,PC} @ pop, return to thumb \n" |
646 | ENTER_THUMB |
647 | ); |
648 | } |
649 | |
650 | static void |
651 | scale_row_to_temp2(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) |
652 | { |
653 | asm volatile( |
654 | ENTER_ARM |
655 | "stmfd r13!,{r4-r6,r9-r11,r14} \n" |
656 | "@ r0 = dst \n" |
657 | "@ r1 = src \n" |
658 | "@ r2 = weights \n" |
659 | "ldr r12,[r2],#4 @ r12= flip \n" |
660 | "ldr r3, [r2],#20 @ r3 = count r2 = &index\n" |
661 | "ldr r4, [r2] @ r4 = index[0] \n" |
662 | "cmp r12,#0 @ if (flip) \n" |
663 | "beq 4f @ { \n" |
664 | "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" |
665 | "add r0, r0, r3, LSL #1 @ dst += 2*count \n" |
666 | "1: \n" |
667 | "ldr r4, [r2], #4 @ r4 = *contrib++ \n" |
668 | "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" |
669 | "mov r5, #128 @ r5 = g = 128 \n" |
670 | "mov r6, #128 @ r6 = a = 128 \n" |
671 | "add r4, r1, r4, LSL #1 @ r4 = min = &src[2*r4] \n" |
672 | "cmp r9, #0 @ while (len-- > 0) \n" |
673 | "beq 3f @ { \n" |
674 | "2: \n" |
675 | "ldr r14,[r2], #4 @ r14 = *contrib++ \n" |
676 | "ldrb r11,[r4], #1 @ r11 = *min++ \n" |
677 | "ldrb r12,[r4], #1 @ r12 = *min++ \n" |
678 | "subs r9, r9, #1 @ r9 = len-- \n" |
679 | "mla r5, r14,r11,r5 @ g += r11 * r14 \n" |
680 | "mla r6, r14,r12,r6 @ a += r12 * r14 \n" |
681 | "bgt 2b @ } \n" |
682 | "3: \n" |
683 | "mov r5, r5, lsr #8 @ g >>= 8 \n" |
684 | "mov r6, r6, lsr #8 @ a >>= 8 \n" |
685 | "strb r5, [r0, #-2]! @ *--dst=a \n" |
686 | "strb r6, [r0, #1] @ *--dst=g \n" |
687 | "subs r3, r3, #1 @ i-- \n" |
688 | "bgt 1b @ \n" |
689 | "ldmfd r13!,{r4-r6,r9-r11,PC} @ pop, return to thumb \n" |
690 | "4:" |
691 | "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" |
692 | "5:" |
693 | "ldr r4, [r2], #4 @ r4 = *contrib++ \n" |
694 | "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" |
695 | "mov r5, #128 @ r5 = g = 128 \n" |
696 | "mov r6, #128 @ r6 = a = 128 \n" |
697 | "add r4, r1, r4, LSL #1 @ r4 = min = &src[2*r4] \n" |
698 | "cmp r9, #0 @ while (len-- > 0) \n" |
699 | "beq 7f @ { \n" |
700 | "6: \n" |
701 | "ldr r14,[r2], #4 @ r10 = *contrib++ \n" |
702 | "ldrb r11,[r4], #1 @ r11 = *min++ \n" |
703 | "ldrb r12,[r4], #1 @ r12 = *min++ \n" |
704 | "subs r9, r9, #1 @ r9 = len-- \n" |
705 | "mla r5, r14,r11,r5 @ g += r11 * r14 \n" |
706 | "mla r6, r14,r12,r6 @ a += r12 * r14 \n" |
707 | "bgt 6b @ } \n" |
708 | "7: \n" |
709 | "mov r5, r5, lsr #8 @ g >>= 8 \n" |
710 | "mov r6, r6, lsr #8 @ a >>= 8 \n" |
711 | "strb r5, [r0], #1 @ *dst++=g \n" |
712 | "strb r6, [r0], #1 @ *dst++=a \n" |
713 | "subs r3, r3, #1 @ i-- \n" |
714 | "bgt 5b @ \n" |
715 | "ldmfd r13!,{r4-r6,r9-r11,PC} @ pop, return to thumb \n" |
716 | ENTER_THUMB |
717 | ); |
718 | } |
719 | |
720 | static void |
721 | scale_row_to_temp3(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) |
722 | { |
723 | asm volatile( |
724 | ENTER_ARM |
725 | "stmfd r13!,{r4-r11,r14} \n" |
726 | "@ r0 = dst \n" |
727 | "@ r1 = src \n" |
728 | "@ r2 = weights \n" |
729 | "ldr r12,[r2],#4 @ r12= flip \n" |
730 | "ldr r3, [r2],#20 @ r3 = count r2 = &index\n" |
731 | "ldr r4, [r2] @ r4 = index[0] \n" |
732 | "cmp r12,#0 @ if (flip) \n" |
733 | "beq 4f @ { \n" |
734 | "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" |
735 | "add r0, r0, r3, LSL #1 @ \n" |
736 | "add r0, r0, r3 @ dst += 3*count \n" |
737 | "1: \n" |
738 | "ldr r4, [r2], #4 @ r4 = *contrib++ \n" |
739 | "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" |
740 | "mov r5, #128 @ r5 = r = 128 \n" |
741 | "mov r6, #128 @ r6 = g = 128 \n" |
742 | "add r7, r1, r4, LSL #1 @ \n" |
743 | "add r4, r7, r4 @ r4 = min = &src[3*r4] \n" |
744 | "mov r7, #128 @ r7 = b = 128 \n" |
745 | "cmp r9, #0 @ while (len-- > 0) \n" |
746 | "beq 3f @ { \n" |
747 | "2: \n" |
748 | "ldr r14,[r2], #4 @ r14 = *contrib++ \n" |
749 | "ldrb r8, [r4], #1 @ r8 = *min++ \n" |
750 | "ldrb r11,[r4], #1 @ r11 = *min++ \n" |
751 | "ldrb r12,[r4], #1 @ r12 = *min++ \n" |
752 | "subs r9, r9, #1 @ r9 = len-- \n" |
753 | "mla r5, r14,r8, r5 @ r += r8 * r14 \n" |
754 | "mla r6, r14,r11,r6 @ g += r11 * r14 \n" |
755 | "mla r7, r14,r12,r7 @ b += r12 * r14 \n" |
756 | "bgt 2b @ } \n" |
757 | "3: \n" |
758 | "mov r5, r5, lsr #8 @ r >>= 8 \n" |
759 | "mov r6, r6, lsr #8 @ g >>= 8 \n" |
760 | "mov r7, r7, lsr #8 @ b >>= 8 \n" |
761 | "strb r5, [r0, #-3]! @ *--dst=r \n" |
762 | "strb r6, [r0, #1] @ *--dst=g \n" |
763 | "strb r7, [r0, #2] @ *--dst=b \n" |
764 | "subs r3, r3, #1 @ i-- \n" |
765 | "bgt 1b @ \n" |
766 | "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n" |
767 | "4:" |
768 | "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" |
769 | "5:" |
770 | "ldr r4, [r2], #4 @ r4 = *contrib++ \n" |
771 | "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" |
772 | "mov r5, #128 @ r5 = r = 128 \n" |
773 | "mov r6, #128 @ r6 = g = 128 \n" |
774 | "add r7, r1, r4, LSL #1 @ r7 = min = &src[2*r4] \n" |
775 | "add r4, r7, r4 @ r4 = min = &src[3*r4] \n" |
776 | "mov r7, #128 @ r7 = b = 128 \n" |
777 | "cmp r9, #0 @ while (len-- > 0) \n" |
778 | "beq 7f @ { \n" |
779 | "6: \n" |
780 | "ldr r14,[r2], #4 @ r10 = *contrib++ \n" |
781 | "ldrb r8, [r4], #1 @ r8 = *min++ \n" |
782 | "ldrb r11,[r4], #1 @ r11 = *min++ \n" |
783 | "ldrb r12,[r4], #1 @ r12 = *min++ \n" |
784 | "subs r9, r9, #1 @ r9 = len-- \n" |
785 | "mla r5, r14,r8, r5 @ r += r8 * r14 \n" |
786 | "mla r6, r14,r11,r6 @ g += r11 * r14 \n" |
787 | "mla r7, r14,r12,r7 @ b += r12 * r14 \n" |
788 | "bgt 6b @ } \n" |
789 | "7: \n" |
790 | "mov r5, r5, lsr #8 @ r >>= 8 \n" |
791 | "mov r6, r6, lsr #8 @ g >>= 8 \n" |
792 | "mov r7, r7, lsr #8 @ b >>= 8 \n" |
793 | "strb r5, [r0], #1 @ *dst++=r \n" |
794 | "strb r6, [r0], #1 @ *dst++=g \n" |
795 | "strb r7, [r0], #1 @ *dst++=b \n" |
796 | "subs r3, r3, #1 @ i-- \n" |
797 | "bgt 5b @ \n" |
798 | "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n" |
799 | ENTER_THUMB |
800 | ); |
801 | } |
802 | |
803 | static void |
804 | scale_row_to_temp4(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) |
805 | { |
806 | asm volatile( |
807 | ENTER_ARM |
808 | "stmfd r13!,{r4-r11,r14} \n" |
809 | "@ r0 = dst \n" |
810 | "@ r1 = src \n" |
811 | "@ r2 = weights \n" |
812 | "ldr r12,[r2],#4 @ r12= flip \n" |
813 | "ldr r3, [r2],#20 @ r3 = count r2 = &index\n" |
814 | "ldr r4, [r2] @ r4 = index[0] \n" |
815 | "ldr r5,=0x00800080 @ r5 = rounding \n" |
816 | "ldr r6,=0x00FF00FF @ r7 = 0x00FF00FF \n" |
817 | "cmp r12,#0 @ if (flip) \n" |
818 | "beq 4f @ { \n" |
819 | "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" |
820 | "add r0, r0, r3, LSL #2 @ dst += 4*count \n" |
821 | "1: \n" |
822 | "ldr r4, [r2], #4 @ r4 = *contrib++ \n" |
823 | "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" |
824 | "mov r7, r5 @ r7 = b = rounding \n" |
825 | "mov r8, r5 @ r8 = a = rounding \n" |
826 | "add r4, r1, r4, LSL #2 @ r4 = min = &src[4*r4] \n" |
827 | "cmp r9, #0 @ while (len-- > 0) \n" |
828 | "beq 3f @ { \n" |
829 | "2: \n" |
830 | "ldr r11,[r4], #4 @ r11 = *min++ \n" |
831 | "ldr r10,[r2], #4 @ r10 = *contrib++ \n" |
832 | "subs r9, r9, #1 @ r9 = len-- \n" |
833 | "and r12,r6, r11 @ r12 = __22__00 \n" |
834 | "and r11,r6, r11,LSR #8 @ r11 = __33__11 \n" |
835 | "mla r7, r10,r12,r7 @ b += r14 * r10 \n" |
836 | "mla r8, r10,r11,r8 @ a += r11 * r10 \n" |
837 | "bgt 2b @ } \n" |
838 | "3: \n" |
839 | "and r7, r6, r7, lsr #8 @ r7 = __22__00 \n" |
840 | "bic r8, r8, r6 @ r8 = 33__11__ \n" |
841 | "orr r7, r7, r8 @ r7 = 33221100 \n" |
842 | "str r7, [r0, #-4]! @ *--dst=r \n" |
843 | "subs r3, r3, #1 @ i-- \n" |
844 | "bgt 1b @ \n" |
845 | "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n" |
846 | "4: \n" |
847 | "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n" |
848 | "5: \n" |
849 | "ldr r4, [r2], #4 @ r4 = *contrib++ \n" |
850 | "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n" |
851 | "mov r7, r5 @ r7 = b = rounding \n" |
852 | "mov r8, r5 @ r8 = a = rounding \n" |
853 | "add r4, r1, r4, LSL #2 @ r4 = min = &src[4*r4] \n" |
854 | "cmp r9, #0 @ while (len-- > 0) \n" |
855 | "beq 7f @ { \n" |
856 | "6: \n" |
857 | "ldr r11,[r4], #4 @ r11 = *min++ \n" |
858 | "ldr r10,[r2], #4 @ r10 = *contrib++ \n" |
859 | "subs r9, r9, #1 @ r9 = len-- \n" |
860 | "and r12,r6, r11 @ r12 = __22__00 \n" |
861 | "and r11,r6, r11,LSR #8 @ r11 = __33__11 \n" |
862 | "mla r7, r10,r12,r7 @ b += r14 * r10 \n" |
863 | "mla r8, r10,r11,r8 @ a += r11 * r10 \n" |
864 | "bgt 6b @ } \n" |
865 | "7: \n" |
866 | "and r7, r6, r7, lsr #8 @ r7 = __22__00 \n" |
867 | "bic r8, r8, r6 @ r8 = 33__11__ \n" |
868 | "orr r7, r7, r8 @ r7 = 33221100 \n" |
869 | "str r7, [r0], #4 @ *dst++=r \n" |
870 | "subs r3, r3, #1 @ i-- \n" |
871 | "bgt 5b @ \n" |
872 | "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n" |
873 | ENTER_THUMB |
874 | ); |
875 | } |
876 | |
877 | static void |
878 | scale_row_from_temp(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int width, int n, int row) |
879 | { |
880 | asm volatile( |
881 | ENTER_ARM |
882 | "stmfd r13!,{r4-r11,r14} \n" |
883 | "@ r0 = dst \n" |
884 | "@ r1 = src \n" |
885 | "@ r2 = &weights->index[0] \n" |
886 | "@ r3 = width \n" |
887 | "@ r12= row \n" |
888 | "ldr r14,[r13,#4*9] @ r14= n \n" |
889 | "ldr r12,[r13,#4*10] @ r12= row \n" |
890 | "add r2, r2, #24 @ r2 = weights->index \n" |
891 | "mul r3, r14, r3 @ r3 = width *= n \n" |
892 | "ldr r4, [r2, r12, LSL #2] @ r4 = index[row] \n" |
893 | "add r2, r2, #4 @ r2 = &index[1] \n" |
894 | "subs r6, r3, #4 @ r6 = x = width-4 \n" |
895 | "ldr r14,[r2, r4, LSL #2]! @ r2 = contrib = index[index[row]+1]\n" |
896 | " @ r14= len = *contrib \n" |
897 | "blt 4f @ while (x >= 0) { \n" |
898 | #ifndef ARCH_UNALIGNED_OK |
899 | "tst r3, #3 @ if ((r3 & 3) \n" |
900 | "tsteq r1, #3 @ || (r1 & 3)) \n" |
901 | "bne 4f @ can't do fast code \n" |
902 | #endif |
903 | "ldr r9, =0x00FF00FF @ r9 = 0x00FF00FF \n" |
904 | "1: \n" |
905 | "ldr r7, =0x00800080 @ r5 = val0 = round \n" |
906 | "stmfd r13!,{r1,r2,r7} @ stash r1,r2,r5 \n" |
907 | " @ r1 = min = src \n" |
908 | " @ r2 = contrib2-4 \n" |
909 | "movs r8, r14 @ r8 = len2 = len \n" |
910 | "mov r5, r7 @ r7 = val1 = round \n" |
911 | "ble 3f @ while (len2-- > 0) { \n" |
912 | "2: \n" |
913 | "ldr r12,[r1], r3 @ r12 = *min r5 = min += width\n" |
914 | "ldr r10,[r2, #4]! @ r10 = *contrib2++ \n" |
915 | "subs r8, r8, #1 @ len2-- \n" |
916 | "and r11,r9, r12 @ r11= __22__00 \n" |
917 | "and r12,r9, r12,LSR #8 @ r12= __33__11 \n" |
918 | "mla r5, r10,r11,r5 @ r5 = val0 += r11 * r10\n" |
919 | "mla r7, r10,r12,r7 @ r7 = val1 += r12 * r10\n" |
920 | "bgt 2b @ } \n" |
921 | "and r5, r9, r5, LSR #8 @ r5 = __22__00 \n" |
922 | "and r7, r7, r9, LSL #8 @ r7 = 33__11__ \n" |
923 | "orr r5, r5, r7 @ r5 = 33221100 \n" |
924 | "3: \n" |
925 | "ldmfd r13!,{r1,r2,r7} @ restore r1,r2,r7 \n" |
926 | "subs r6, r6, #4 @ x-- \n" |
927 | "add r1, r1, #4 @ src++ \n" |
928 | "str r5, [r0], #4 @ *dst++ = val \n" |
929 | "bge 1b @ \n" |
930 | "4: @ } (Less than 4 to go) \n" |
931 | "adds r6, r6, #4 @ r6 = x += 4 \n" |
932 | "beq 8f @ if (x == 0) done \n" |
933 | "5: \n" |
934 | "mov r5, r1 @ r5 = min = src \n" |
935 | "mov r7, #128 @ r7 = val = 128 \n" |
936 | "movs r8, r14 @ r8 = len2 = len \n" |
937 | "add r9, r2, #4 @ r9 = contrib2 \n" |
938 | "ble 7f @ while (len2-- > 0) { \n" |
939 | "6: \n" |
940 | "ldr r10,[r9], #4 @ r10 = *contrib2++ \n" |
941 | "ldrb r12,[r5], r3 @ r12 = *min r5 = min += width\n" |
942 | "subs r8, r8, #1 @ len2-- \n" |
943 | "@ stall r12 \n" |
944 | "mla r7, r10,r12,r7 @ val += r12 * r10 \n" |
945 | "bgt 6b @ } \n" |
946 | "7: \n" |
947 | "mov r7, r7, asr #8 @ r7 = val >>= 8 \n" |
948 | "subs r6, r6, #1 @ x-- \n" |
949 | "add r1, r1, #1 @ src++ \n" |
950 | "strb r7, [r0], #1 @ *dst++ = val \n" |
951 | "bgt 5b @ \n" |
952 | "8: \n" |
953 | "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n" |
954 | ".ltorg \n" |
955 | ENTER_THUMB |
956 | ); |
957 | } |
958 | |
959 | static void |
960 | scale_row_from_temp_alpha(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int width, int n, int row) |
961 | { |
962 | asm volatile( |
963 | ENTER_ARM |
964 | "stmfd r13!,{r4-r11,r14} \n" |
965 | "mov r11,#255 @ r11= 255 \n" |
966 | "ldr r12,[r13,#4*10] @ r12= row \n" |
967 | "@ r0 = dst \n" |
968 | "@ r1 = src \n" |
969 | "@ r2 = &weights->index[0] \n" |
970 | "@ r3 = width \n" |
971 | "@ r11= 255 \n" |
972 | "@ r12= row \n" |
973 | "add r2, r2, #24 @ r2 = weights->index \n" |
974 | "ldr r4, [r2, r12, LSL #2] @ r4 = index[row] \n" |
975 | "add r2, r2, #4 @ r2 = &index[1] \n" |
976 | "mov r6, r3 @ r6 = x = width \n" |
977 | "ldr r14,[r2, r4, LSL #2]! @ r2 = contrib = index[index[row]+1]\n" |
978 | " @ r14= len = *contrib \n" |
979 | "5: \n" |
980 | "ldr r4,[r13,#4*9] @ r10= nn = n \n" |
981 | "1: \n" |
982 | "mov r5, r1 @ r5 = min = src \n" |
983 | "mov r7, #128 @ r7 = val = 128 \n" |
984 | "movs r8, r14 @ r8 = len2 = len \n" |
985 | "add r9, r2, #4 @ r9 = contrib2 \n" |
986 | "ble 7f @ while (len2-- > 0) { \n" |
987 | "6: \n" |
988 | "ldr r10,[r9], #4 @ r10 = *contrib2++ \n" |
989 | "ldrb r12,[r5], r3 @ r12 = *min r5 = min += width\n" |
990 | "subs r8, r8, #1 @ len2-- \n" |
991 | "@ stall r12 \n" |
992 | "mla r7, r10,r12,r7 @ val += r12 * r10 \n" |
993 | "bgt 6b @ } \n" |
994 | "7: \n" |
995 | "mov r7, r7, asr #8 @ r7 = val >>= 8 \n" |
996 | "subs r4, r4, #1 @ r4 = nn-- \n" |
997 | "add r1, r1, #1 @ src++ \n" |
998 | "strb r7, [r0], #1 @ *dst++ = val \n" |
999 | "bgt 1b @ \n" |
1000 | "subs r6, r6, #1 @ x-- \n" |
1001 | "strb r11,[r0], #1 @ *dst++ = 255 \n" |
1002 | "bgt 5b @ \n" |
1003 | "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n" |
1004 | ".ltorg \n" |
1005 | ENTER_THUMB |
1006 | ); |
1007 | } |
1008 | #else |
1009 | |
1010 | static void |
1011 | scale_row_to_temp1(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) |
1012 | { |
1013 | const int *contrib = &weights->index[weights->index[0]]; |
1014 | int len, i; |
1015 | const unsigned char *min; |
1016 | |
1017 | assert(weights->n == 1); |
1018 | if (weights->flip) |
1019 | { |
1020 | dst += weights->count; |
1021 | for (i=weights->count; i > 0; i--) |
1022 | { |
1023 | int val = 128; |
1024 | min = &src[*contrib++]; |
1025 | len = *contrib++; |
1026 | while (len-- > 0) |
1027 | { |
1028 | val += *min++ * *contrib++; |
1029 | } |
1030 | *--dst = (unsigned char)(val>>8); |
1031 | } |
1032 | } |
1033 | else |
1034 | { |
1035 | for (i=weights->count; i > 0; i--) |
1036 | { |
1037 | int val = 128; |
1038 | min = &src[*contrib++]; |
1039 | len = *contrib++; |
1040 | while (len-- > 0) |
1041 | { |
1042 | val += *min++ * *contrib++; |
1043 | } |
1044 | *dst++ = (unsigned char)(val>>8); |
1045 | } |
1046 | } |
1047 | } |
1048 | |
1049 | static void |
1050 | scale_row_to_temp2(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) |
1051 | { |
1052 | const int *contrib = &weights->index[weights->index[0]]; |
1053 | int len, i; |
1054 | const unsigned char *min; |
1055 | |
1056 | assert(weights->n == 2); |
1057 | if (weights->flip) |
1058 | { |
1059 | dst += 2*weights->count; |
1060 | for (i=weights->count; i > 0; i--) |
1061 | { |
1062 | int c1 = 128; |
1063 | int c2 = 128; |
1064 | min = &src[2 * *contrib++]; |
1065 | len = *contrib++; |
1066 | while (len-- > 0) |
1067 | { |
1068 | c1 += *min++ * *contrib; |
1069 | c2 += *min++ * *contrib++; |
1070 | } |
1071 | *--dst = (unsigned char)(c2>>8); |
1072 | *--dst = (unsigned char)(c1>>8); |
1073 | } |
1074 | } |
1075 | else |
1076 | { |
1077 | for (i=weights->count; i > 0; i--) |
1078 | { |
1079 | int c1 = 128; |
1080 | int c2 = 128; |
1081 | min = &src[2 * *contrib++]; |
1082 | len = *contrib++; |
1083 | while (len-- > 0) |
1084 | { |
1085 | c1 += *min++ * *contrib; |
1086 | c2 += *min++ * *contrib++; |
1087 | } |
1088 | *dst++ = (unsigned char)(c1>>8); |
1089 | *dst++ = (unsigned char)(c2>>8); |
1090 | } |
1091 | } |
1092 | } |
1093 | |
1094 | static void |
1095 | scale_row_to_temp3(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) |
1096 | { |
1097 | const int *contrib = &weights->index[weights->index[0]]; |
1098 | int len, i; |
1099 | const unsigned char *min; |
1100 | |
1101 | assert(weights->n == 3); |
1102 | if (weights->flip) |
1103 | { |
1104 | dst += 3*weights->count; |
1105 | for (i=weights->count; i > 0; i--) |
1106 | { |
1107 | int c1 = 128; |
1108 | int c2 = 128; |
1109 | int c3 = 128; |
1110 | min = &src[3 * *contrib++]; |
1111 | len = *contrib++; |
1112 | while (len-- > 0) |
1113 | { |
1114 | int c = *contrib++; |
1115 | c1 += *min++ * c; |
1116 | c2 += *min++ * c; |
1117 | c3 += *min++ * c; |
1118 | } |
1119 | *--dst = (unsigned char)(c3>>8); |
1120 | *--dst = (unsigned char)(c2>>8); |
1121 | *--dst = (unsigned char)(c1>>8); |
1122 | } |
1123 | } |
1124 | else |
1125 | { |
1126 | for (i=weights->count; i > 0; i--) |
1127 | { |
1128 | int c1 = 128; |
1129 | int c2 = 128; |
1130 | int c3 = 128; |
1131 | min = &src[3 * *contrib++]; |
1132 | len = *contrib++; |
1133 | while (len-- > 0) |
1134 | { |
1135 | int c = *contrib++; |
1136 | c1 += *min++ * c; |
1137 | c2 += *min++ * c; |
1138 | c3 += *min++ * c; |
1139 | } |
1140 | *dst++ = (unsigned char)(c1>>8); |
1141 | *dst++ = (unsigned char)(c2>>8); |
1142 | *dst++ = (unsigned char)(c3>>8); |
1143 | } |
1144 | } |
1145 | } |
1146 | |
1147 | static void |
1148 | scale_row_to_temp4(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights) |
1149 | { |
1150 | const int *contrib = &weights->index[weights->index[0]]; |
1151 | int len, i; |
1152 | const unsigned char *min; |
1153 | |
1154 | assert(weights->n == 4); |
1155 | if (weights->flip) |
1156 | { |
1157 | dst += 4*weights->count; |
1158 | for (i=weights->count; i > 0; i--) |
1159 | { |
1160 | int r = 128; |
1161 | int g = 128; |
1162 | int b = 128; |
1163 | int a = 128; |
1164 | min = &src[4 * *contrib++]; |
1165 | len = *contrib++; |
1166 | while (len-- > 0) |
1167 | { |
1168 | r += *min++ * *contrib; |
1169 | g += *min++ * *contrib; |
1170 | b += *min++ * *contrib; |
1171 | a += *min++ * *contrib++; |
1172 | } |
1173 | *--dst = (unsigned char)(a>>8); |
1174 | *--dst = (unsigned char)(b>>8); |
1175 | *--dst = (unsigned char)(g>>8); |
1176 | *--dst = (unsigned char)(r>>8); |
1177 | } |
1178 | } |
1179 | else |
1180 | { |
1181 | for (i=weights->count; i > 0; i--) |
1182 | { |
1183 | int r = 128; |
1184 | int g = 128; |
1185 | int b = 128; |
1186 | int a = 128; |
1187 | min = &src[4 * *contrib++]; |
1188 | len = *contrib++; |
1189 | while (len-- > 0) |
1190 | { |
1191 | r += *min++ * *contrib; |
1192 | g += *min++ * *contrib; |
1193 | b += *min++ * *contrib; |
1194 | a += *min++ * *contrib++; |
1195 | } |
1196 | *dst++ = (unsigned char)(r>>8); |
1197 | *dst++ = (unsigned char)(g>>8); |
1198 | *dst++ = (unsigned char)(b>>8); |
1199 | *dst++ = (unsigned char)(a>>8); |
1200 | } |
1201 | } |
1202 | } |
1203 | |
1204 | static void |
1205 | scale_row_from_temp(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int w, int n, int row) |
1206 | { |
1207 | const int *contrib = &weights->index[weights->index[row]]; |
1208 | int len, x; |
1209 | int width = w * n; |
1210 | |
1211 | contrib++; /* Skip min */ |
1212 | len = *contrib++; |
1213 | for (x=width; x > 0; x--) |
1214 | { |
1215 | const unsigned char *min = src; |
1216 | int val = 128; |
1217 | int len2 = len; |
1218 | const int *contrib2 = contrib; |
1219 | |
1220 | while (len2-- > 0) |
1221 | { |
1222 | val += *min * *contrib2++; |
1223 | min += width; |
1224 | } |
1225 | *dst++ = (unsigned char)(val>>8); |
1226 | src++; |
1227 | } |
1228 | } |
1229 | |
1230 | static void |
1231 | scale_row_from_temp_alpha(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int w, int n, int row) |
1232 | { |
1233 | const int *contrib = &weights->index[weights->index[row]]; |
1234 | int len, x; |
1235 | int width = w * n; |
1236 | |
1237 | contrib++; /* Skip min */ |
1238 | len = *contrib++; |
1239 | for (x=w; x > 0; x--) |
1240 | { |
1241 | int nn; |
1242 | for (nn = n; nn > 0; nn--) |
1243 | { |
1244 | const unsigned char *min = src; |
1245 | int val = 128; |
1246 | int len2 = len; |
1247 | const int *contrib2 = contrib; |
1248 | |
1249 | while (len2-- > 0) |
1250 | { |
1251 | val += *min * *contrib2++; |
1252 | min += width; |
1253 | } |
1254 | *dst++ = (unsigned char)(val>>8); |
1255 | src++; |
1256 | } |
1257 | *dst++ = 255; |
1258 | } |
1259 | } |
1260 | #endif |
1261 | |
1262 | #ifdef SINGLE_PIXEL_SPECIALS |
1263 | static void |
1264 | duplicate_single_pixel(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, int n, int forcealpha, int w, int h, int stride) |
1265 | { |
1266 | int i; |
1267 | |
1268 | for (i = n; i > 0; i--) |
1269 | *dst++ = *src++; |
1270 | if (forcealpha) |
1271 | *dst++ = 255; |
1272 | n += forcealpha; |
1273 | for (i = w-1; i > 0; i--) |
1274 | { |
1275 | memcpy(dst, dst-n, n); |
1276 | dst += n; |
1277 | } |
1278 | w *= n; |
1279 | dst -= w; |
1280 | h--; |
1281 | while (h--) |
1282 | { |
1283 | memcpy(dst+stride, dst, w); |
1284 | dst += stride; |
1285 | } |
1286 | } |
1287 | |
1288 | static void |
1289 | scale_single_row(unsigned char * FZ_RESTRICT dst, int dstride, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int src_w, int h, int forcealpha) |
1290 | { |
1291 | const int *contrib = &weights->index[weights->index[0]]; |
1292 | int min, len, i, j, n, nf; |
1293 | int tmp[FZ_MAX_COLORS]; |
1294 | |
1295 | n = weights->n; |
1296 | nf = n + forcealpha; |
1297 | /* Scale a single row */ |
1298 | for (j = 0; j < nf; j++) |
1299 | tmp[j] = 128; |
1300 | if (weights->flip) |
1301 | { |
1302 | dst += (weights->count-1)*nf; |
1303 | for (i=weights->count; i > 0; i--) |
1304 | { |
1305 | min = *contrib++; |
1306 | len = *contrib++; |
1307 | min *= n; |
1308 | while (len-- > 0) |
1309 | { |
1310 | int c = *contrib++; |
1311 | for (j = 0; j < n; j++) |
1312 | tmp[j] += src[min++] * c; |
1313 | if (forcealpha) |
1314 | tmp[j] += 255 * c; |
1315 | } |
1316 | for (j = 0; j < nf; j++) |
1317 | { |
1318 | *dst++ = (unsigned char)(tmp[j]>>8); |
1319 | tmp[j] = 128; |
1320 | } |
1321 | dst -= 2*nf; |
1322 | } |
1323 | dst += nf + dstride; |
1324 | } |
1325 | else |
1326 | { |
1327 | for (i=weights->count; i > 0; i--) |
1328 | { |
1329 | min = *contrib++; |
1330 | len = *contrib++; |
1331 | min *= n; |
1332 | while (len-- > 0) |
1333 | { |
1334 | int c = *contrib++; |
1335 | for (j = 0; j < n; j++) |
1336 | tmp[j] += src[min++] * c; |
1337 | if (forcealpha) |
1338 | tmp[j] += 255 * c; |
1339 | } |
1340 | for (j = 0; j < nf; j++) |
1341 | { |
1342 | *dst++ = (unsigned char)(tmp[j]>>8); |
1343 | tmp[j] = 128; |
1344 | } |
1345 | } |
1346 | dst += dstride - weights->count * nf; |
1347 | } |
1348 | /* And then duplicate it h times */ |
1349 | nf *= weights->count; |
1350 | while (--h > 0) |
1351 | { |
1352 | memcpy(dst, dst-dstride, nf); |
1353 | dst += dstride; |
1354 | } |
1355 | } |
1356 | |
1357 | static void |
1358 | scale_single_col(unsigned char * FZ_RESTRICT dst, int dstride, const unsigned char * FZ_RESTRICT src, int sstride, const fz_weights * FZ_RESTRICT weights, int src_w, int n, int w, int forcealpha) |
1359 | { |
1360 | const int *contrib = &weights->index[weights->index[0]]; |
1361 | int min, len, i, j; |
1362 | int tmp[FZ_MAX_COLORS]; |
1363 | int nf = n + forcealpha; |
1364 | |
1365 | for (j = 0; j < nf; j++) |
1366 | tmp[j] = 128; |
1367 | if (weights->flip) |
1368 | { |
1369 | src_w = (src_w-1)*sstride; |
1370 | for (i=weights->count; i > 0; i--) |
1371 | { |
1372 | /* Scale the next pixel in the column */ |
1373 | min = *contrib++; |
1374 | len = *contrib++; |
1375 | min = src_w-min*sstride; |
1376 | while (len-- > 0) |
1377 | { |
1378 | int c = *contrib++; |
1379 | for (j = 0; j < n; j++) |
1380 | tmp[j] += src[min+j] * c; |
1381 | if (forcealpha) |
1382 | tmp[j] += 255 * c; |
1383 | min -= sstride; |
1384 | } |
1385 | for (j = 0; j < nf; j++) |
1386 | { |
1387 | *dst++ = (unsigned char)(tmp[j]>>8); |
1388 | tmp[j] = 128; |
1389 | } |
1390 | /* And then duplicate it across the row */ |
1391 | for (j = (w-1)*nf; j > 0; j--) |
1392 | { |
1393 | *dst = dst[-nf]; |
1394 | dst++; |
1395 | } |
1396 | dst += dstride - w*nf; |
1397 | } |
1398 | } |
1399 | else |
1400 | { |
1401 | for (i=weights->count; i > 0; i--) |
1402 | { |
1403 | /* Scale the next pixel in the column */ |
1404 | min = *contrib++; |
1405 | len = *contrib++; |
1406 | min *= sstride; |
1407 | while (len-- > 0) |
1408 | { |
1409 | int c = *contrib++; |
1410 | for (j = 0; j < n; j++) |
1411 | tmp[j] += src[min+j] * c; |
1412 | if (forcealpha) |
1413 | tmp[j] += 255 * c; |
1414 | min += sstride; |
1415 | } |
1416 | for (j = 0; j < nf; j++) |
1417 | { |
1418 | *dst++ = (unsigned char)(tmp[j]>>8); |
1419 | tmp[j] = 128; |
1420 | } |
1421 | /* And then duplicate it across the row */ |
1422 | for (j = (w-1)*nf; j > 0; j--) |
1423 | { |
1424 | *dst = dst[-nf]; |
1425 | dst++; |
1426 | } |
1427 | dst += dstride - w*nf; |
1428 | } |
1429 | } |
1430 | } |
1431 | #endif /* SINGLE_PIXEL_SPECIALS */ |
1432 | |
1433 | static void |
1434 | get_alpha_edge_values(const fz_weights * FZ_RESTRICT rows, int * FZ_RESTRICT tp, int * FZ_RESTRICT bp) |
1435 | { |
1436 | const int *contrib = &rows->index[rows->index[0]]; |
1437 | int len, i, t, b; |
1438 | |
1439 | /* Calculate the edge alpha values */ |
1440 | contrib++; /* Skip min */ |
1441 | len = *contrib++; |
1442 | t = 0; |
1443 | while (len--) |
1444 | t += *contrib++; |
1445 | for (i=rows->count-2; i > 0; i--) |
1446 | { |
1447 | contrib++; /* Skip min */ |
1448 | len = *contrib++; |
1449 | contrib += len; |
1450 | } |
1451 | b = 0; |
1452 | if (i == 0) |
1453 | { |
1454 | contrib++; |
1455 | len = *contrib++; |
1456 | while (len--) |
1457 | b += *contrib++; |
1458 | } |
1459 | if (rows->flip && i == 0) |
1460 | { |
1461 | *tp = b; |
1462 | *bp = t; |
1463 | } |
1464 | else |
1465 | { |
1466 | *tp = t; |
1467 | *bp = b; |
1468 | } |
1469 | } |
1470 | |
1471 | static void |
1472 | adjust_alpha_edges(fz_pixmap * FZ_RESTRICT pix, const fz_weights * FZ_RESTRICT rows, const fz_weights * FZ_RESTRICT cols) |
1473 | { |
1474 | int t, l, r, b, tl, tr, bl, br, x, y; |
1475 | unsigned char *dp = pix->samples; |
1476 | int w = pix->w; |
1477 | int n = pix->n; |
1478 | int span = w >= 2 ? (w-1)*n : 0; |
1479 | int stride = pix->stride; |
1480 | |
1481 | get_alpha_edge_values(rows, &t, &b); |
1482 | get_alpha_edge_values(cols, &l, &r); |
1483 | |
1484 | l = (255 * l + 128)>>8; |
1485 | r = (255 * r + 128)>>8; |
1486 | tl = (l * t + 128)>>8; |
1487 | tr = (r * t + 128)>>8; |
1488 | bl = (l * b + 128)>>8; |
1489 | br = (r * b + 128)>>8; |
1490 | t = (255 * t + 128)>>8; |
1491 | b = (255 * b + 128)>>8; |
1492 | dp += n-1; |
1493 | *dp = tl; |
1494 | dp += n; |
1495 | for (x = w-2; x > 0; x--) |
1496 | { |
1497 | *dp = t; |
1498 | dp += n; |
1499 | } |
1500 | if (x == 0) |
1501 | { |
1502 | *dp = tr; |
1503 | dp += n; |
1504 | } |
1505 | dp += stride - w*n; |
1506 | for (y = pix->h-2; y > 0; y--) |
1507 | { |
1508 | dp[span] = r; |
1509 | *dp = l; |
1510 | dp += stride; |
1511 | } |
1512 | if (y == 0) |
1513 | { |
1514 | *dp = bl; |
1515 | dp += n; |
1516 | for (x = w-2; x > 0; x--) |
1517 | { |
1518 | *dp = b; |
1519 | dp += n; |
1520 | } |
1521 | if (x == 0) |
1522 | { |
1523 | *dp = br; |
1524 | } |
1525 | } |
1526 | } |
1527 | |
1528 | fz_pixmap * |
1529 | fz_scale_pixmap(fz_context *ctx, fz_pixmap *src, float x, float y, float w, float h, const fz_irect *clip) |
1530 | { |
1531 | return fz_scale_pixmap_cached(ctx, src, x, y, w, h, clip, NULL, NULL); |
1532 | } |
1533 | |
1534 | fz_pixmap * |
1535 | fz_scale_pixmap_cached(fz_context *ctx, const fz_pixmap *src, float x, float y, float w, float h, const fz_irect *clip, fz_scale_cache *cache_x, fz_scale_cache *cache_y) |
1536 | { |
1537 | fz_scale_filter *filter = &fz_scale_filter_simple; |
1538 | fz_weights *contrib_rows = NULL; |
1539 | fz_weights *contrib_cols = NULL; |
1540 | fz_pixmap *output = NULL; |
1541 | unsigned char *temp = NULL; |
1542 | int max_row, temp_span, temp_rows, row; |
1543 | int dst_w_int, dst_h_int, dst_x_int, dst_y_int; |
1544 | int flip_x, flip_y, forcealpha; |
1545 | fz_rect patch; |
1546 | |
1547 | fz_var(contrib_cols); |
1548 | fz_var(contrib_rows); |
1549 | |
1550 | /* Avoid extreme scales where overflows become problematic. */ |
1551 | if (w > (1<<24) || h > (1<<24) || w < -(1<<24) || h < -(1<<24)) |
1552 | return NULL; |
1553 | if (x > (1<<24) || y > (1<<24) || x < -(1<<24) || y < -(1<<24)) |
1554 | return NULL; |
1555 | |
1556 | /* Clamp small ranges of w and h */ |
1557 | if (w <= -1) |
1558 | { |
1559 | } |
1560 | else if (w < 0) |
1561 | { |
1562 | w = -1; |
1563 | } |
1564 | else if (w < 1) |
1565 | { |
1566 | w = 1; |
1567 | } |
1568 | if (h <= -1) |
1569 | { |
1570 | } |
1571 | else if (h < 0) |
1572 | { |
1573 | h = -1; |
1574 | } |
1575 | else if (h < 1) |
1576 | { |
1577 | h = 1; |
1578 | } |
1579 | |
1580 | /* If the src has an alpha, we'll make the dst have an alpha automatically. |
1581 | * We also need to force the dst to have an alpha if x/y/w/h aren't ints. */ |
1582 | forcealpha = !src->alpha && (x != (float)(int)x || y != (float)(int)y || w != (float)(int)w || h != (float)(int)h); |
1583 | |
1584 | /* Find the destination bbox, width/height, and sub pixel offset, |
1585 | * allowing for whether we're flipping or not. */ |
1586 | /* The (x,y) position given describes where the top left corner |
1587 | * of the source image should be mapped to (i.e. where (0,0) in image |
1588 | * space ends up). Also there are differences in the way we scale |
1589 | * horizontally and vertically. When scaling rows horizontally, we |
1590 | * always read forwards through the source, and store either forwards |
1591 | * or in reverse as required. When scaling vertically, we always store |
1592 | * out forwards, but may feed source rows in in a different order. |
1593 | * |
1594 | * Consider the image rectangle 'r' to which the image is mapped, |
1595 | * and the (possibly) larger rectangle 'R', given by expanding 'r' to |
1596 | * complete pixels. |
1597 | * |
1598 | * x can either be r.xmin-R.xmin or R.xmax-r.xmax depending on whether |
1599 | * the image is x flipped or not. Whatever happens 0 <= x < 1. |
1600 | * y is always R.ymax - r.ymax. |
1601 | */ |
1602 | /* dst_x_int is calculated to be the left of the scaled image, and |
1603 | * x (the sub pixel offset) is the distance in from either the left |
1604 | * or right pixel expanded edge. */ |
1605 | flip_x = (w < 0); |
1606 | if (flip_x) |
1607 | { |
1608 | float tmp; |
1609 | w = -w; |
1610 | dst_x_int = floorf(x-w); |
1611 | tmp = ceilf(x); |
1612 | dst_w_int = (int)tmp; |
1613 | x = tmp - x; |
1614 | dst_w_int -= dst_x_int; |
1615 | } |
1616 | else |
1617 | { |
1618 | dst_x_int = floorf(x); |
1619 | x -= dst_x_int; |
1620 | dst_w_int = (int)ceilf(x + w); |
1621 | } |
1622 | /* dst_y_int is calculated to be the top of the scaled image, and |
1623 | * y (the sub pixel offset) is the distance in from either the top |
1624 | * or bottom pixel expanded edge. |
1625 | */ |
1626 | flip_y = (h < 0); |
1627 | if (flip_y) |
1628 | { |
1629 | float tmp; |
1630 | h = -h; |
1631 | dst_y_int = floorf(y-h); |
1632 | tmp = ceilf(y); |
1633 | dst_h_int = (int)tmp; |
1634 | y = tmp - y; |
1635 | dst_h_int -= dst_y_int; |
1636 | } |
1637 | else |
1638 | { |
1639 | dst_y_int = floorf(y); |
1640 | y -= dst_y_int; |
1641 | dst_h_int = (int)ceilf(y + h); |
1642 | } |
1643 | |
1644 | fz_valgrind_pixmap(src); |
1645 | |
1646 | /* Step 0: Calculate the patch */ |
1647 | patch.x0 = 0; |
1648 | patch.y0 = 0; |
1649 | patch.x1 = dst_w_int; |
1650 | patch.y1 = dst_h_int; |
1651 | if (clip) |
1652 | { |
1653 | if (flip_x) |
1654 | { |
1655 | if (dst_x_int + dst_w_int > clip->x1) |
1656 | patch.x0 = dst_x_int + dst_w_int - clip->x1; |
1657 | if (clip->x0 > dst_x_int) |
1658 | { |
1659 | patch.x1 = dst_w_int - (clip->x0 - dst_x_int); |
1660 | dst_x_int = clip->x0; |
1661 | } |
1662 | } |
1663 | else |
1664 | { |
1665 | if (dst_x_int + dst_w_int > clip->x1) |
1666 | patch.x1 = clip->x1 - dst_x_int; |
1667 | if (clip->x0 > dst_x_int) |
1668 | { |
1669 | patch.x0 = clip->x0 - dst_x_int; |
1670 | dst_x_int += patch.x0; |
1671 | } |
1672 | } |
1673 | |
1674 | if (flip_y) |
1675 | { |
1676 | if (dst_y_int + dst_h_int > clip->y1) |
1677 | patch.y1 = clip->y1 - dst_y_int; |
1678 | if (clip->y0 > dst_y_int) |
1679 | { |
1680 | patch.y0 = clip->y0 - dst_y_int; |
1681 | dst_y_int = clip->y0; |
1682 | } |
1683 | } |
1684 | else |
1685 | { |
1686 | if (dst_y_int + dst_h_int > clip->y1) |
1687 | patch.y1 = clip->y1 - dst_y_int; |
1688 | if (clip->y0 > dst_y_int) |
1689 | { |
1690 | patch.y0 = clip->y0 - dst_y_int; |
1691 | dst_y_int += patch.y0; |
1692 | } |
1693 | } |
1694 | } |
1695 | if (patch.x0 >= patch.x1 || patch.y0 >= patch.y1) |
1696 | return NULL; |
1697 | |
1698 | fz_try(ctx) |
1699 | { |
1700 | /* Step 1: Calculate the weights for columns and rows */ |
1701 | #ifdef SINGLE_PIXEL_SPECIALS |
1702 | if (src->w == 1) |
1703 | contrib_cols = NULL; |
1704 | else |
1705 | #endif /* SINGLE_PIXEL_SPECIALS */ |
1706 | contrib_cols = make_weights(ctx, src->w, x, w, filter, 0, dst_w_int, patch.x0, patch.x1, src->n, flip_x, cache_x); |
1707 | #ifdef SINGLE_PIXEL_SPECIALS |
1708 | if (src->h == 1) |
1709 | contrib_rows = NULL; |
1710 | else |
1711 | #endif /* SINGLE_PIXEL_SPECIALS */ |
1712 | contrib_rows = make_weights(ctx, src->h, y, h, filter, 1, dst_h_int, patch.y0, patch.y1, src->n, flip_y, cache_y); |
1713 | |
1714 | output = fz_new_pixmap(ctx, src->colorspace, patch.x1 - patch.x0, patch.y1 - patch.y0, src->seps, src->alpha || forcealpha); |
1715 | } |
1716 | fz_catch(ctx) |
1717 | { |
1718 | if (!cache_x) |
1719 | fz_free(ctx, contrib_cols); |
1720 | if (!cache_y) |
1721 | fz_free(ctx, contrib_rows); |
1722 | fz_rethrow(ctx); |
1723 | } |
1724 | output->x = dst_x_int; |
1725 | output->y = dst_y_int; |
1726 | |
1727 | /* Step 2: Apply the weights */ |
1728 | #ifdef SINGLE_PIXEL_SPECIALS |
1729 | if (!contrib_rows) |
1730 | { |
1731 | /* Only 1 source pixel high. */ |
1732 | if (!contrib_cols) |
1733 | { |
1734 | /* Only 1 pixel in the entire image! */ |
1735 | duplicate_single_pixel(output->samples, src->samples, src->n, forcealpha, patch.x1-patch.x0, patch.y1-patch.y0, output->stride); |
1736 | fz_valgrind_pixmap(output); |
1737 | } |
1738 | else |
1739 | { |
1740 | /* Scale the row once, then copy it. */ |
1741 | scale_single_row(output->samples, output->stride, src->samples, contrib_cols, src->w, patch.y1-patch.y0, forcealpha); |
1742 | fz_valgrind_pixmap(output); |
1743 | } |
1744 | } |
1745 | else if (!contrib_cols) |
1746 | { |
1747 | /* Only 1 source pixel wide. Scale the col and duplicate. */ |
1748 | scale_single_col(output->samples, output->stride, src->samples, src->stride, contrib_rows, src->h, src->n, patch.x1-patch.x0, forcealpha); |
1749 | fz_valgrind_pixmap(output); |
1750 | } |
1751 | else |
1752 | #endif /* SINGLE_PIXEL_SPECIALS */ |
1753 | { |
1754 | void (*row_scale_in)(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights); |
1755 | void (*row_scale_out)(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int w, int n, int row); |
1756 | |
1757 | temp_span = contrib_cols->count * src->n; |
1758 | temp_rows = contrib_rows->max_len; |
1759 | if (temp_span <= 0 || temp_rows > INT_MAX / temp_span) |
1760 | goto cleanup; |
1761 | fz_try(ctx) |
1762 | { |
1763 | temp = fz_calloc(ctx, temp_span*temp_rows, sizeof(unsigned char)); |
1764 | } |
1765 | fz_catch(ctx) |
1766 | { |
1767 | fz_drop_pixmap(ctx, output); |
1768 | if (!cache_x) |
1769 | fz_free(ctx, contrib_cols); |
1770 | if (!cache_y) |
1771 | fz_free(ctx, contrib_rows); |
1772 | fz_rethrow(ctx); |
1773 | } |
1774 | switch (src->n) |
1775 | { |
1776 | default: |
1777 | row_scale_in = scale_row_to_temp; |
1778 | break; |
1779 | case 1: /* Image mask case or Greyscale case */ |
1780 | row_scale_in = scale_row_to_temp1; |
1781 | break; |
1782 | case 2: /* Greyscale with alpha case */ |
1783 | row_scale_in = scale_row_to_temp2; |
1784 | break; |
1785 | case 3: /* RGB case */ |
1786 | row_scale_in = scale_row_to_temp3; |
1787 | break; |
1788 | case 4: /* RGBA or CMYK case */ |
1789 | row_scale_in = scale_row_to_temp4; |
1790 | break; |
1791 | } |
1792 | row_scale_out = forcealpha ? scale_row_from_temp_alpha : scale_row_from_temp; |
1793 | max_row = contrib_rows->index[contrib_rows->index[0]]; |
1794 | for (row = 0; row < contrib_rows->count; row++) |
1795 | { |
1796 | /* |
1797 | Which source rows do we need to have scaled into the |
1798 | temporary buffer in order to be able to do the final |
1799 | scale? |
1800 | */ |
1801 | int row_index = contrib_rows->index[row]; |
1802 | int row_min = contrib_rows->index[row_index++]; |
1803 | int row_len = contrib_rows->index[row_index]; |
1804 | while (max_row < row_min+row_len) |
1805 | { |
1806 | /* Scale another row */ |
1807 | assert(max_row < src->h); |
1808 | (*row_scale_in)(&temp[temp_span*(max_row % temp_rows)], &src->samples[(flip_y ? (src->h-1-max_row): max_row)*src->stride], contrib_cols); |
1809 | max_row++; |
1810 | } |
1811 | |
1812 | (*row_scale_out)(&output->samples[row*output->stride], temp, contrib_rows, contrib_cols->count, src->n, row); |
1813 | } |
1814 | fz_free(ctx, temp); |
1815 | |
1816 | if (forcealpha) |
1817 | adjust_alpha_edges(output, contrib_rows, contrib_cols); |
1818 | |
1819 | fz_valgrind_pixmap(output); |
1820 | } |
1821 | |
1822 | cleanup: |
1823 | if (!cache_y) |
1824 | fz_free(ctx, contrib_rows); |
1825 | if (!cache_x) |
1826 | fz_free(ctx, contrib_cols); |
1827 | |
1828 | return output; |
1829 | } |
1830 | |
1831 | void |
1832 | fz_drop_scale_cache(fz_context *ctx, fz_scale_cache *sc) |
1833 | { |
1834 | if (!sc) |
1835 | return; |
1836 | fz_free(ctx, sc->weights); |
1837 | fz_free(ctx, sc); |
1838 | } |
1839 | |
1840 | fz_scale_cache * |
1841 | fz_new_scale_cache(fz_context *ctx) |
1842 | { |
1843 | return fz_malloc_struct(ctx, fz_scale_cache); |
1844 | } |
1845 | |