1/*
2This code does smooth scaling of a pixmap.
3
4This function returns a new pixmap representing the area starting at (0,0)
5given by taking the source pixmap src, scaling it to width w, and height h,
6and then positioning it at (frac(x),frac(y)).
7
8This is a cut-down version of draw_scale.c that only copes with filters
9that return values strictly in the 0..1 range, and uses bytes for
10intermediate results rather than ints.
11*/
12
13#include "mupdf/fitz.h"
14#include "draw-imp.h"
15
16#include <math.h>
17#include <string.h>
18#include <assert.h>
19#include <limits.h>
20
21/* Do we special case handling of single pixel high/wide images? The
22 * 'purest' handling is given by not special casing them, but certain
23 * files that use such images 'stack' them to give full images. Not
24 * special casing them results in then being fainter and giving noticeable
25 * rounding errors.
26 */
27#define SINGLE_PIXEL_SPECIALS
28
29/*
30Consider a row of source samples, src, of width src_w, positioned at x,
31scaled to width dst_w.
32
33src[i] is centred at: x + (i + 0.5)*dst_w/src_w
34
35Therefore the distance between the centre of the jth output pixel and
36the centre of the ith source sample is:
37
38dist[j,i] = j + 0.5 - (x + (i + 0.5)*dst_w/src_w)
39
40When scaling up, therefore:
41
42dst[j] = SUM(filter(dist[j,i]) * src[i])
43 (for all ints i)
44
45This can be simplified by noticing that filters are only non zero within
46a given filter width (henceforth called W). So:
47
48dst[j] = SUM(filter(dist[j,i]) * src[i])
49 (for ints i, s.t. (j*src_w/dst_w)-W < i < (j*src_w/dst_w)+W)
50
51When scaling down, each filtered source sample is stretched to be wider
52to avoid aliasing issues. This effectively reduces the distance between
53centres.
54
55dst[j] = SUM(filter(dist[j,i] * F) * F * src[i])
56 (where F = dst_w/src_w)
57 (for ints i, s.t. (j-W)/F < i < (j+W)/F)
58
59*/
60
61typedef struct fz_scale_filter_s fz_scale_filter;
62
63struct fz_scale_filter_s
64{
65 int width;
66 float (*fn)(fz_scale_filter *, float);
67};
68
69/* Image scale filters */
70
71static float
72triangle(fz_scale_filter *filter, float f)
73{
74 if (f >= 1)
75 return 0;
76 return 1-f;
77}
78
79static float
80box(fz_scale_filter *filter, float f)
81{
82 if (f >= 0.5f)
83 return 0;
84 return 1;
85}
86
87static float
88simple(fz_scale_filter *filter, float x)
89{
90 if (x >= 1)
91 return 0;
92 return 1 + (2*x - 3)*x*x;
93}
94
95fz_scale_filter fz_scale_filter_box = { 1, box };
96fz_scale_filter fz_scale_filter_triangle = { 1, triangle };
97fz_scale_filter fz_scale_filter_simple = { 1, simple };
98
99/*
100We build ourselves a set of tables to contain the precalculated weights
101for a given set of scale settings.
102
103The first dst_w entries in index are the index into index of the
104sets of weight for each destination pixel.
105
106Each of the sets of weights is a set of values consisting of:
107 the minimum source pixel index used for this destination pixel
108 the number of weights used for this destination pixel
109 the weights themselves
110
111So to calculate dst[i] we do the following:
112
113 weights = &index[index[i]];
114 min = *weights++;
115 len = *weights++;
116 dst[i] = 0;
117 while (--len > 0)
118 dst[i] += src[min++] * *weights++
119
120in addition, we guarantee that at the end of this process weights will now
121point to the weights value for dst pixel i+1.
122
123In the simplest version of this algorithm, we would scale the whole image
124horizontally first into a temporary buffer, then scale that temporary
125buffer again vertically to give us our result. Using such a simple
126algorithm would mean that could use the same style of weights for both
127horizontal and vertical scaling.
128
129Unfortunately, this would also require a large temporary buffer,
130particularly in the case where we are scaling up.
131
132We therefore modify the algorithm as follows; we scale scanlines from the
133source image horizontally into a temporary buffer, until we have all the
134contributors for a given output scanline. We then produce that output
135scanline from the temporary buffer. In this way we restrict the height
136of the temporary buffer to a small fraction of the final size.
137
138Unfortunately, this means that the pseudo code for recombining a
139scanline of fully scaled pixels is as follows:
140
141 weights = &index[index[y]];
142 min = *weights++;
143 len = *weights++;
144 for (x=0 to dst_w)
145 min2 = min
146 len2 = len
147 weights2 = weights
148 dst[x] = 0;
149 while (--len2 > 0)
150 dst[x] += temp[x][(min2++) % tmp_buf_height] * *weights2++
151
152i.e. it requires a % operation for every source pixel - this is typically
153expensive.
154
155To avoid this, we alter the order in which vertical weights are stored,
156so that they are ordered in the same order as the temporary buffer lines
157would appear. This simplifies the algorithm to:
158
159 weights = &index[index[y]];
160 min = *weights++;
161 len = *weights++;
162 for (x=0 to dst_w)
163 min2 = 0
164 len2 = len
165 weights2 = weights
166 dst[x] = 0;
167 while (--len2 > 0)
168 dst[x] += temp[i][min2++] * *weights2++
169
170This means that len may be larger than it needs to be (due to the
171possible inclusion of a zero weight row or two), but in practise this
172is only an increase of 1 or 2 at worst.
173
174We implement this by generating the weights as normal (but ensuring we
175leave enough space) and then reordering afterwards.
176
177*/
178
179typedef struct fz_weights_s fz_weights;
180
181/* This structure is accessed from ARM code - bear this in mind before
182 * altering it! */
183struct fz_weights_s
184{
185 int flip; /* true if outputting reversed */
186 int count; /* number of output pixels we have records for in this table */
187 int max_len; /* Maximum number of weights for any one output pixel */
188 int n; /* number of components (src->n) */
189 int new_line; /* True if no weights for the current output pixel */
190 int patch_l; /* How many output pixels we skip over */
191 int index[1];
192};
193
194struct fz_scale_cache_s
195{
196 int src_w;
197 float x;
198 float dst_w;
199 fz_scale_filter *filter;
200 int vertical;
201 int dst_w_int;
202 int patch_l;
203 int patch_r;
204 int n;
205 int flip;
206 fz_weights *weights;
207};
208
209static fz_weights *
210new_weights(fz_context *ctx, fz_scale_filter *filter, int src_w, float dst_w, int patch_w, int n, int flip, int patch_l)
211{
212 int max_len;
213 fz_weights *weights;
214
215 if (src_w > dst_w)
216 {
217 /* Scaling down, so there will be a maximum of
218 * 2*filterwidth*src_w/dst_w src pixels
219 * contributing to each dst pixel. */
220 max_len = (int)ceilf((2 * filter->width * src_w)/dst_w);
221 if (max_len > src_w)
222 max_len = src_w;
223 }
224 else
225 {
226 /* Scaling up, so there will be a maximum of
227 * 2*filterwidth src pixels contributing to each dst pixel.
228 */
229 max_len = 2 * filter->width;
230 }
231 /* We need the size of the struct,
232 * plus patch_w*sizeof(int) for the index
233 * plus (2+max_len)*sizeof(int) for the weights
234 * plus room for an extra set of weights for reordering.
235 */
236 weights = fz_malloc(ctx, sizeof(*weights)+(max_len+3)*(patch_w+1)*sizeof(int));
237 if (!weights)
238 return NULL;
239 weights->count = -1;
240 weights->max_len = max_len;
241 weights->index[0] = patch_w;
242 weights->n = n;
243 weights->patch_l = patch_l;
244 weights->flip = flip;
245 return weights;
246}
247
248/* j is destination pixel in the patch_l..patch_l+patch_w range */
249static void
250init_weights(fz_weights *weights, int j)
251{
252 int index;
253
254 j -= weights->patch_l;
255 assert(weights->count == j-1);
256 weights->count++;
257 weights->new_line = 1;
258 if (j == 0)
259 index = weights->index[0];
260 else
261 {
262 index = weights->index[j-1];
263 index += 2 + weights->index[index+1];
264 }
265 weights->index[j] = index; /* row pointer */
266 weights->index[index] = 0; /* min */
267 weights->index[index+1] = 0; /* len */
268}
269
270static void
271add_weight(fz_weights *weights, int j, int i, fz_scale_filter *filter,
272 float x, float F, float G, int src_w, float dst_w)
273{
274 float dist = j - x + 0.5f - ((i + 0.5f)*dst_w/src_w);
275 float f;
276 int min, len, index, weight;
277
278 dist *= G;
279 if (dist < 0)
280 dist = -dist;
281 f = filter->fn(filter, dist)*F;
282 weight = (int)(256*f+0.5f);
283
284 /* Ensure i is in range */
285 if (i < 0 || i >= src_w)
286 return;
287 if (weight == 0)
288 {
289 /* We add a fudge factor here to allow for extreme downscales
290 * where all the weights round to 0. Ensure that at least one
291 * (arbitrarily the first one) is non zero. */
292 if (weights->new_line && f > 0)
293 weight = 1;
294 else
295 return;
296 }
297
298 /* Move j from patch_l...patch_l+patch_w range to 0..patch_w range */
299 j -= weights->patch_l;
300 if (weights->new_line)
301 {
302 /* New line */
303 weights->new_line = 0;
304 index = weights->index[j]; /* row pointer */
305 weights->index[index] = i; /* min */
306 weights->index[index+1] = 0; /* len */
307 }
308 index = weights->index[j];
309 min = weights->index[index++];
310 len = weights->index[index++];
311 while (i < min)
312 {
313 /* This only happens in rare cases, but we need to insert
314 * one earlier. In exceedingly rare cases we may need to
315 * insert more than one earlier. */
316 int k;
317
318 for (k = len; k > 0; k--)
319 {
320 weights->index[index+k] = weights->index[index+k-1];
321 }
322 weights->index[index] = 0;
323 min--;
324 len++;
325 weights->index[index-2] = min;
326 weights->index[index-1] = len;
327 }
328 if (i-min >= len)
329 {
330 /* The usual case */
331 while (i-min >= ++len)
332 {
333 weights->index[index+len-1] = 0;
334 }
335 assert(len-1 == i-min);
336 weights->index[index+i-min] = weight;
337 weights->index[index-1] = len;
338 assert(len <= weights->max_len);
339 }
340 else
341 {
342 /* Infrequent case */
343 weights->index[index+i-min] += weight;
344 }
345}
346
347static void
348reorder_weights(fz_weights *weights, int j, int src_w)
349{
350 int idx = weights->index[j - weights->patch_l];
351 int min = weights->index[idx++];
352 int len = weights->index[idx++];
353 int max = weights->max_len;
354 int tmp = idx+max;
355 int i, off;
356
357 /* Copy into the temporary area */
358 memcpy(&weights->index[tmp], &weights->index[idx], sizeof(int)*len);
359
360 /* Pad out if required */
361 assert(len <= max);
362 assert(min+len <= src_w);
363 off = 0;
364 if (len < max)
365 {
366 memset(&weights->index[tmp+len], 0, sizeof(int)*(max-len));
367 len = max;
368 if (min + len > src_w)
369 {
370 off = min + len - src_w;
371 min = src_w - len;
372 weights->index[idx-2] = min;
373 }
374 weights->index[idx-1] = len;
375 }
376
377 /* Copy back into the proper places */
378 for (i = 0; i < len; i++)
379 {
380 weights->index[idx+((min+i+off) % max)] = weights->index[tmp+i];
381 }
382}
383
384/* Due to rounding and edge effects, the sums for the weights sometimes don't
385 * add up to 256. This causes visible rendering effects. Therefore, we take
386 * pains to ensure that they 1) never exceed 256, and 2) add up to exactly
387 * 256 for all pixels that are completely covered. See bug #691629. */
388static void
389check_weights(fz_weights *weights, int j, int w, float x, float wf)
390{
391 int idx, len;
392 int sum = 0;
393 int max = -256;
394 int maxidx = 0;
395 int i;
396
397 idx = weights->index[j - weights->patch_l];
398 idx++; /* min */
399 len = weights->index[idx++];
400
401 for(i=0; i < len; i++)
402 {
403 int v = weights->index[idx++];
404 sum += v;
405 if (v > max)
406 {
407 max = v;
408 maxidx = idx;
409 }
410 }
411 /* If we aren't the first or last pixel, OR if the sum is too big
412 * then adjust it. */
413 if (((j != 0) && (j != w-1)) || (sum > 256))
414 weights->index[maxidx-1] += 256-sum;
415 /* Otherwise, if we are the first pixel, and it's fully covered, then
416 * adjust it. */
417 else if ((j == 0) && (x < 0.0001f) && (sum != 256))
418 weights->index[maxidx-1] += 256-sum;
419 /* Finally, if we are the last pixel, and it's fully covered, then
420 * adjust it. */
421 else if ((j == w-1) && (w - wf < 0.0001f) && (sum != 256))
422 weights->index[maxidx-1] += 256-sum;
423}
424
425static fz_weights *
426make_weights(fz_context *ctx, int src_w, float x, float dst_w, fz_scale_filter *filter, int vertical, int dst_w_int, int patch_l, int patch_r, int n, int flip, fz_scale_cache *cache)
427{
428 fz_weights *weights;
429 float F, G;
430 float window;
431 int j;
432
433 if (cache)
434 {
435 if (cache->src_w == src_w && cache->x == x && cache->dst_w == dst_w &&
436 cache->filter == filter && cache->vertical == vertical &&
437 cache->dst_w_int == dst_w_int &&
438 cache->patch_l == patch_l && cache->patch_r == patch_r &&
439 cache->n == n && cache->flip == flip)
440 {
441 return cache->weights;
442 }
443 cache->src_w = src_w;
444 cache->x = x;
445 cache->dst_w = dst_w;
446 cache->filter = filter;
447 cache->vertical = vertical;
448 cache->dst_w_int = dst_w_int;
449 cache->patch_l = patch_l;
450 cache->patch_r = patch_r;
451 cache->n = n;
452 cache->flip = flip;
453 fz_free(ctx, cache->weights);
454 cache->weights = NULL;
455 }
456
457 if (dst_w < src_w)
458 {
459 /* Scaling down */
460 F = dst_w / src_w;
461 G = 1;
462 }
463 else
464 {
465 /* Scaling up */
466 F = 1;
467 G = src_w / dst_w;
468 }
469 window = filter->width / F;
470 weights = new_weights(ctx, filter, src_w, dst_w, patch_r-patch_l, n, flip, patch_l);
471 if (!weights)
472 return NULL;
473 for (j = patch_l; j < patch_r; j++)
474 {
475 /* find the position of the centre of dst[j] in src space */
476 float centre = (j - x + 0.5f)*src_w/dst_w - 0.5f;
477 int l, r;
478 l = ceilf(centre - window);
479 r = floorf(centre + window);
480 init_weights(weights, j);
481 for (; l <= r; l++)
482 {
483 add_weight(weights, j, l, filter, x, F, G, src_w, dst_w);
484 }
485 check_weights(weights, j, dst_w_int, x, dst_w);
486 if (vertical)
487 {
488 reorder_weights(weights, j, src_w);
489 }
490 }
491 weights->count++; /* weights->count = dst_w_int now */
492 if (cache)
493 {
494 cache->weights = weights;
495 }
496 return weights;
497}
498
499static void
500scale_row_to_temp(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
501{
502 const int *contrib = &weights->index[weights->index[0]];
503 int len, i, j, n;
504 const unsigned char *min;
505 int tmp[FZ_MAX_COLORS];
506 int *t = tmp;
507
508 n = weights->n;
509 for (j = 0; j < n; j++)
510 tmp[j] = 128;
511 if (weights->flip)
512 {
513 dst += (weights->count-1)*n;
514 for (i=weights->count; i > 0; i--)
515 {
516 min = &src[n * *contrib++];
517 len = *contrib++;
518 while (len-- > 0)
519 {
520 for (j = n; j > 0; j--)
521 *t++ += *min++ * *contrib;
522 t -= n;
523 contrib++;
524 }
525 for (j = n; j > 0; j--)
526 {
527 *dst++ = (unsigned char)(*t>>8);
528 *t++ = 128;
529 }
530 t -= n;
531 dst -= n*2;
532 }
533 }
534 else
535 {
536 for (i=weights->count; i > 0; i--)
537 {
538 min = &src[n * *contrib++];
539 len = *contrib++;
540 while (len-- > 0)
541 {
542 for (j = n; j > 0; j--)
543 *t++ += *min++ * *contrib;
544 t -= n;
545 contrib++;
546 }
547 for (j = n; j > 0; j--)
548 {
549 *dst++ = (unsigned char)(*t>>8);
550 *t++ = 128;
551 }
552 t -= n;
553 }
554 }
555}
556
557#ifdef ARCH_ARM
558
559static void
560scale_row_to_temp1(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
561__attribute__((naked));
562
563static void
564scale_row_to_temp2(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
565__attribute__((naked));
566
567static void
568scale_row_to_temp3(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
569__attribute__((naked));
570
571static void
572scale_row_to_temp4(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
573__attribute__((naked));
574
575static void
576scale_row_from_temp(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int width, int n, int row)
577__attribute__((naked));
578
579static void
580scale_row_from_temp_alpha(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int width, int n, int row)
581__attribute__((naked));
582
583static void
584scale_row_to_temp1(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
585{
586 asm volatile(
587 ENTER_ARM
588 ".syntax unified\n"
589 "stmfd r13!,{r4-r7,r9,r14} \n"
590 "@ r0 = dst \n"
591 "@ r1 = src \n"
592 "@ r2 = weights \n"
593 "ldr r12,[r2],#4 @ r12= flip \n"
594 "ldr r3, [r2],#20 @ r3 = count r2 = &index\n"
595 "ldr r4, [r2] @ r4 = index[0] \n"
596 "cmp r12,#0 @ if (flip) \n"
597 "beq 5f @ { \n"
598 "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n"
599 "add r0, r0, r3 @ dst += count \n"
600 "1: \n"
601 "ldr r4, [r2], #4 @ r4 = *contrib++ \n"
602 "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n"
603 "mov r5, #128 @ r5 = a = 128 \n"
604 "add r4, r1, r4 @ r4 = min = &src[r4] \n"
605 "subs r9, r9, #1 @ len-- \n"
606 "blt 3f @ while (len >= 0) \n"
607 "2: @ { \n"
608 "ldrgt r6, [r2], #4 @ r6 = *contrib++ \n"
609 "ldrbgt r7, [r4], #1 @ r7 = *min++ \n"
610 "ldr r12,[r2], #4 @ r12 = *contrib++ \n"
611 "ldrb r14,[r4], #1 @ r14 = *min++ \n"
612 "mlagt r5, r6, r7, r5 @ g += r6 * r7 \n"
613 "subs r9, r9, #2 @ r9 = len -= 2 \n"
614 "mla r5, r12,r14,r5 @ g += r14 * r12 \n"
615 "bge 2b @ } \n"
616 "3: \n"
617 "mov r5, r5, lsr #8 @ g >>= 8 \n"
618 "strb r5,[r0, #-1]! @ *--dst=a \n"
619 "subs r3, r3, #1 @ i-- \n"
620 "bgt 1b @ \n"
621 "ldmfd r13!,{r4-r7,r9,PC} @ pop, return to thumb \n"
622 "5:"
623 "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n"
624 "6:"
625 "ldr r4, [r2], #4 @ r4 = *contrib++ \n"
626 "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n"
627 "mov r5, #128 @ r5 = a = 128 \n"
628 "add r4, r1, r4 @ r4 = min = &src[r4] \n"
629 "subs r9, r9, #1 @ len-- \n"
630 "blt 9f @ while (len > 0) \n"
631 "7: @ { \n"
632 "ldrgt r6, [r2], #4 @ r6 = *contrib++ \n"
633 "ldrbgt r7, [r4], #1 @ r7 = *min++ \n"
634 "ldr r12,[r2], #4 @ r12 = *contrib++ \n"
635 "ldrb r14,[r4], #1 @ r14 = *min++ \n"
636 "mlagt r5, r6,r7,r5 @ a += r6 * r7 \n"
637 "subs r9, r9, #2 @ r9 = len -= 2 \n"
638 "mla r5, r12,r14,r5 @ a += r14 * r12 \n"
639 "bge 7b @ } \n"
640 "9: \n"
641 "mov r5, r5, LSR #8 @ a >>= 8 \n"
642 "strb r5, [r0], #1 @ *dst++=a \n"
643 "subs r3, r3, #1 @ i-- \n"
644 "bgt 6b @ \n"
645 "ldmfd r13!,{r4-r7,r9,PC} @ pop, return to thumb \n"
646 ENTER_THUMB
647 );
648}
649
650static void
651scale_row_to_temp2(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
652{
653 asm volatile(
654 ENTER_ARM
655 "stmfd r13!,{r4-r6,r9-r11,r14} \n"
656 "@ r0 = dst \n"
657 "@ r1 = src \n"
658 "@ r2 = weights \n"
659 "ldr r12,[r2],#4 @ r12= flip \n"
660 "ldr r3, [r2],#20 @ r3 = count r2 = &index\n"
661 "ldr r4, [r2] @ r4 = index[0] \n"
662 "cmp r12,#0 @ if (flip) \n"
663 "beq 4f @ { \n"
664 "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n"
665 "add r0, r0, r3, LSL #1 @ dst += 2*count \n"
666 "1: \n"
667 "ldr r4, [r2], #4 @ r4 = *contrib++ \n"
668 "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n"
669 "mov r5, #128 @ r5 = g = 128 \n"
670 "mov r6, #128 @ r6 = a = 128 \n"
671 "add r4, r1, r4, LSL #1 @ r4 = min = &src[2*r4] \n"
672 "cmp r9, #0 @ while (len-- > 0) \n"
673 "beq 3f @ { \n"
674 "2: \n"
675 "ldr r14,[r2], #4 @ r14 = *contrib++ \n"
676 "ldrb r11,[r4], #1 @ r11 = *min++ \n"
677 "ldrb r12,[r4], #1 @ r12 = *min++ \n"
678 "subs r9, r9, #1 @ r9 = len-- \n"
679 "mla r5, r14,r11,r5 @ g += r11 * r14 \n"
680 "mla r6, r14,r12,r6 @ a += r12 * r14 \n"
681 "bgt 2b @ } \n"
682 "3: \n"
683 "mov r5, r5, lsr #8 @ g >>= 8 \n"
684 "mov r6, r6, lsr #8 @ a >>= 8 \n"
685 "strb r5, [r0, #-2]! @ *--dst=a \n"
686 "strb r6, [r0, #1] @ *--dst=g \n"
687 "subs r3, r3, #1 @ i-- \n"
688 "bgt 1b @ \n"
689 "ldmfd r13!,{r4-r6,r9-r11,PC} @ pop, return to thumb \n"
690 "4:"
691 "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n"
692 "5:"
693 "ldr r4, [r2], #4 @ r4 = *contrib++ \n"
694 "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n"
695 "mov r5, #128 @ r5 = g = 128 \n"
696 "mov r6, #128 @ r6 = a = 128 \n"
697 "add r4, r1, r4, LSL #1 @ r4 = min = &src[2*r4] \n"
698 "cmp r9, #0 @ while (len-- > 0) \n"
699 "beq 7f @ { \n"
700 "6: \n"
701 "ldr r14,[r2], #4 @ r10 = *contrib++ \n"
702 "ldrb r11,[r4], #1 @ r11 = *min++ \n"
703 "ldrb r12,[r4], #1 @ r12 = *min++ \n"
704 "subs r9, r9, #1 @ r9 = len-- \n"
705 "mla r5, r14,r11,r5 @ g += r11 * r14 \n"
706 "mla r6, r14,r12,r6 @ a += r12 * r14 \n"
707 "bgt 6b @ } \n"
708 "7: \n"
709 "mov r5, r5, lsr #8 @ g >>= 8 \n"
710 "mov r6, r6, lsr #8 @ a >>= 8 \n"
711 "strb r5, [r0], #1 @ *dst++=g \n"
712 "strb r6, [r0], #1 @ *dst++=a \n"
713 "subs r3, r3, #1 @ i-- \n"
714 "bgt 5b @ \n"
715 "ldmfd r13!,{r4-r6,r9-r11,PC} @ pop, return to thumb \n"
716 ENTER_THUMB
717 );
718}
719
720static void
721scale_row_to_temp3(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
722{
723 asm volatile(
724 ENTER_ARM
725 "stmfd r13!,{r4-r11,r14} \n"
726 "@ r0 = dst \n"
727 "@ r1 = src \n"
728 "@ r2 = weights \n"
729 "ldr r12,[r2],#4 @ r12= flip \n"
730 "ldr r3, [r2],#20 @ r3 = count r2 = &index\n"
731 "ldr r4, [r2] @ r4 = index[0] \n"
732 "cmp r12,#0 @ if (flip) \n"
733 "beq 4f @ { \n"
734 "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n"
735 "add r0, r0, r3, LSL #1 @ \n"
736 "add r0, r0, r3 @ dst += 3*count \n"
737 "1: \n"
738 "ldr r4, [r2], #4 @ r4 = *contrib++ \n"
739 "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n"
740 "mov r5, #128 @ r5 = r = 128 \n"
741 "mov r6, #128 @ r6 = g = 128 \n"
742 "add r7, r1, r4, LSL #1 @ \n"
743 "add r4, r7, r4 @ r4 = min = &src[3*r4] \n"
744 "mov r7, #128 @ r7 = b = 128 \n"
745 "cmp r9, #0 @ while (len-- > 0) \n"
746 "beq 3f @ { \n"
747 "2: \n"
748 "ldr r14,[r2], #4 @ r14 = *contrib++ \n"
749 "ldrb r8, [r4], #1 @ r8 = *min++ \n"
750 "ldrb r11,[r4], #1 @ r11 = *min++ \n"
751 "ldrb r12,[r4], #1 @ r12 = *min++ \n"
752 "subs r9, r9, #1 @ r9 = len-- \n"
753 "mla r5, r14,r8, r5 @ r += r8 * r14 \n"
754 "mla r6, r14,r11,r6 @ g += r11 * r14 \n"
755 "mla r7, r14,r12,r7 @ b += r12 * r14 \n"
756 "bgt 2b @ } \n"
757 "3: \n"
758 "mov r5, r5, lsr #8 @ r >>= 8 \n"
759 "mov r6, r6, lsr #8 @ g >>= 8 \n"
760 "mov r7, r7, lsr #8 @ b >>= 8 \n"
761 "strb r5, [r0, #-3]! @ *--dst=r \n"
762 "strb r6, [r0, #1] @ *--dst=g \n"
763 "strb r7, [r0, #2] @ *--dst=b \n"
764 "subs r3, r3, #1 @ i-- \n"
765 "bgt 1b @ \n"
766 "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n"
767 "4:"
768 "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n"
769 "5:"
770 "ldr r4, [r2], #4 @ r4 = *contrib++ \n"
771 "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n"
772 "mov r5, #128 @ r5 = r = 128 \n"
773 "mov r6, #128 @ r6 = g = 128 \n"
774 "add r7, r1, r4, LSL #1 @ r7 = min = &src[2*r4] \n"
775 "add r4, r7, r4 @ r4 = min = &src[3*r4] \n"
776 "mov r7, #128 @ r7 = b = 128 \n"
777 "cmp r9, #0 @ while (len-- > 0) \n"
778 "beq 7f @ { \n"
779 "6: \n"
780 "ldr r14,[r2], #4 @ r10 = *contrib++ \n"
781 "ldrb r8, [r4], #1 @ r8 = *min++ \n"
782 "ldrb r11,[r4], #1 @ r11 = *min++ \n"
783 "ldrb r12,[r4], #1 @ r12 = *min++ \n"
784 "subs r9, r9, #1 @ r9 = len-- \n"
785 "mla r5, r14,r8, r5 @ r += r8 * r14 \n"
786 "mla r6, r14,r11,r6 @ g += r11 * r14 \n"
787 "mla r7, r14,r12,r7 @ b += r12 * r14 \n"
788 "bgt 6b @ } \n"
789 "7: \n"
790 "mov r5, r5, lsr #8 @ r >>= 8 \n"
791 "mov r6, r6, lsr #8 @ g >>= 8 \n"
792 "mov r7, r7, lsr #8 @ b >>= 8 \n"
793 "strb r5, [r0], #1 @ *dst++=r \n"
794 "strb r6, [r0], #1 @ *dst++=g \n"
795 "strb r7, [r0], #1 @ *dst++=b \n"
796 "subs r3, r3, #1 @ i-- \n"
797 "bgt 5b @ \n"
798 "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n"
799 ENTER_THUMB
800 );
801}
802
803static void
804scale_row_to_temp4(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
805{
806 asm volatile(
807 ENTER_ARM
808 "stmfd r13!,{r4-r11,r14} \n"
809 "@ r0 = dst \n"
810 "@ r1 = src \n"
811 "@ r2 = weights \n"
812 "ldr r12,[r2],#4 @ r12= flip \n"
813 "ldr r3, [r2],#20 @ r3 = count r2 = &index\n"
814 "ldr r4, [r2] @ r4 = index[0] \n"
815 "ldr r5,=0x00800080 @ r5 = rounding \n"
816 "ldr r6,=0x00FF00FF @ r7 = 0x00FF00FF \n"
817 "cmp r12,#0 @ if (flip) \n"
818 "beq 4f @ { \n"
819 "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n"
820 "add r0, r0, r3, LSL #2 @ dst += 4*count \n"
821 "1: \n"
822 "ldr r4, [r2], #4 @ r4 = *contrib++ \n"
823 "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n"
824 "mov r7, r5 @ r7 = b = rounding \n"
825 "mov r8, r5 @ r8 = a = rounding \n"
826 "add r4, r1, r4, LSL #2 @ r4 = min = &src[4*r4] \n"
827 "cmp r9, #0 @ while (len-- > 0) \n"
828 "beq 3f @ { \n"
829 "2: \n"
830 "ldr r11,[r4], #4 @ r11 = *min++ \n"
831 "ldr r10,[r2], #4 @ r10 = *contrib++ \n"
832 "subs r9, r9, #1 @ r9 = len-- \n"
833 "and r12,r6, r11 @ r12 = __22__00 \n"
834 "and r11,r6, r11,LSR #8 @ r11 = __33__11 \n"
835 "mla r7, r10,r12,r7 @ b += r14 * r10 \n"
836 "mla r8, r10,r11,r8 @ a += r11 * r10 \n"
837 "bgt 2b @ } \n"
838 "3: \n"
839 "and r7, r6, r7, lsr #8 @ r7 = __22__00 \n"
840 "bic r8, r8, r6 @ r8 = 33__11__ \n"
841 "orr r7, r7, r8 @ r7 = 33221100 \n"
842 "str r7, [r0, #-4]! @ *--dst=r \n"
843 "subs r3, r3, #1 @ i-- \n"
844 "bgt 1b @ \n"
845 "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n"
846 "4: \n"
847 "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n"
848 "5: \n"
849 "ldr r4, [r2], #4 @ r4 = *contrib++ \n"
850 "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n"
851 "mov r7, r5 @ r7 = b = rounding \n"
852 "mov r8, r5 @ r8 = a = rounding \n"
853 "add r4, r1, r4, LSL #2 @ r4 = min = &src[4*r4] \n"
854 "cmp r9, #0 @ while (len-- > 0) \n"
855 "beq 7f @ { \n"
856 "6: \n"
857 "ldr r11,[r4], #4 @ r11 = *min++ \n"
858 "ldr r10,[r2], #4 @ r10 = *contrib++ \n"
859 "subs r9, r9, #1 @ r9 = len-- \n"
860 "and r12,r6, r11 @ r12 = __22__00 \n"
861 "and r11,r6, r11,LSR #8 @ r11 = __33__11 \n"
862 "mla r7, r10,r12,r7 @ b += r14 * r10 \n"
863 "mla r8, r10,r11,r8 @ a += r11 * r10 \n"
864 "bgt 6b @ } \n"
865 "7: \n"
866 "and r7, r6, r7, lsr #8 @ r7 = __22__00 \n"
867 "bic r8, r8, r6 @ r8 = 33__11__ \n"
868 "orr r7, r7, r8 @ r7 = 33221100 \n"
869 "str r7, [r0], #4 @ *dst++=r \n"
870 "subs r3, r3, #1 @ i-- \n"
871 "bgt 5b @ \n"
872 "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n"
873 ENTER_THUMB
874 );
875}
876
877static void
878scale_row_from_temp(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int width, int n, int row)
879{
880 asm volatile(
881 ENTER_ARM
882 "stmfd r13!,{r4-r11,r14} \n"
883 "@ r0 = dst \n"
884 "@ r1 = src \n"
885 "@ r2 = &weights->index[0] \n"
886 "@ r3 = width \n"
887 "@ r12= row \n"
888 "ldr r14,[r13,#4*9] @ r14= n \n"
889 "ldr r12,[r13,#4*10] @ r12= row \n"
890 "add r2, r2, #24 @ r2 = weights->index \n"
891 "mul r3, r14, r3 @ r3 = width *= n \n"
892 "ldr r4, [r2, r12, LSL #2] @ r4 = index[row] \n"
893 "add r2, r2, #4 @ r2 = &index[1] \n"
894 "subs r6, r3, #4 @ r6 = x = width-4 \n"
895 "ldr r14,[r2, r4, LSL #2]! @ r2 = contrib = index[index[row]+1]\n"
896 " @ r14= len = *contrib \n"
897 "blt 4f @ while (x >= 0) { \n"
898#ifndef ARCH_UNALIGNED_OK
899 "tst r3, #3 @ if ((r3 & 3) \n"
900 "tsteq r1, #3 @ || (r1 & 3)) \n"
901 "bne 4f @ can't do fast code \n"
902#endif
903 "ldr r9, =0x00FF00FF @ r9 = 0x00FF00FF \n"
904 "1: \n"
905 "ldr r7, =0x00800080 @ r5 = val0 = round \n"
906 "stmfd r13!,{r1,r2,r7} @ stash r1,r2,r5 \n"
907 " @ r1 = min = src \n"
908 " @ r2 = contrib2-4 \n"
909 "movs r8, r14 @ r8 = len2 = len \n"
910 "mov r5, r7 @ r7 = val1 = round \n"
911 "ble 3f @ while (len2-- > 0) { \n"
912 "2: \n"
913 "ldr r12,[r1], r3 @ r12 = *min r5 = min += width\n"
914 "ldr r10,[r2, #4]! @ r10 = *contrib2++ \n"
915 "subs r8, r8, #1 @ len2-- \n"
916 "and r11,r9, r12 @ r11= __22__00 \n"
917 "and r12,r9, r12,LSR #8 @ r12= __33__11 \n"
918 "mla r5, r10,r11,r5 @ r5 = val0 += r11 * r10\n"
919 "mla r7, r10,r12,r7 @ r7 = val1 += r12 * r10\n"
920 "bgt 2b @ } \n"
921 "and r5, r9, r5, LSR #8 @ r5 = __22__00 \n"
922 "and r7, r7, r9, LSL #8 @ r7 = 33__11__ \n"
923 "orr r5, r5, r7 @ r5 = 33221100 \n"
924 "3: \n"
925 "ldmfd r13!,{r1,r2,r7} @ restore r1,r2,r7 \n"
926 "subs r6, r6, #4 @ x-- \n"
927 "add r1, r1, #4 @ src++ \n"
928 "str r5, [r0], #4 @ *dst++ = val \n"
929 "bge 1b @ \n"
930 "4: @ } (Less than 4 to go) \n"
931 "adds r6, r6, #4 @ r6 = x += 4 \n"
932 "beq 8f @ if (x == 0) done \n"
933 "5: \n"
934 "mov r5, r1 @ r5 = min = src \n"
935 "mov r7, #128 @ r7 = val = 128 \n"
936 "movs r8, r14 @ r8 = len2 = len \n"
937 "add r9, r2, #4 @ r9 = contrib2 \n"
938 "ble 7f @ while (len2-- > 0) { \n"
939 "6: \n"
940 "ldr r10,[r9], #4 @ r10 = *contrib2++ \n"
941 "ldrb r12,[r5], r3 @ r12 = *min r5 = min += width\n"
942 "subs r8, r8, #1 @ len2-- \n"
943 "@ stall r12 \n"
944 "mla r7, r10,r12,r7 @ val += r12 * r10 \n"
945 "bgt 6b @ } \n"
946 "7: \n"
947 "mov r7, r7, asr #8 @ r7 = val >>= 8 \n"
948 "subs r6, r6, #1 @ x-- \n"
949 "add r1, r1, #1 @ src++ \n"
950 "strb r7, [r0], #1 @ *dst++ = val \n"
951 "bgt 5b @ \n"
952 "8: \n"
953 "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n"
954 ".ltorg \n"
955 ENTER_THUMB
956 );
957}
958
959static void
960scale_row_from_temp_alpha(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int width, int n, int row)
961{
962 asm volatile(
963 ENTER_ARM
964 "stmfd r13!,{r4-r11,r14} \n"
965 "mov r11,#255 @ r11= 255 \n"
966 "ldr r12,[r13,#4*10] @ r12= row \n"
967 "@ r0 = dst \n"
968 "@ r1 = src \n"
969 "@ r2 = &weights->index[0] \n"
970 "@ r3 = width \n"
971 "@ r11= 255 \n"
972 "@ r12= row \n"
973 "add r2, r2, #24 @ r2 = weights->index \n"
974 "ldr r4, [r2, r12, LSL #2] @ r4 = index[row] \n"
975 "add r2, r2, #4 @ r2 = &index[1] \n"
976 "mov r6, r3 @ r6 = x = width \n"
977 "ldr r14,[r2, r4, LSL #2]! @ r2 = contrib = index[index[row]+1]\n"
978 " @ r14= len = *contrib \n"
979 "5: \n"
980 "ldr r4,[r13,#4*9] @ r10= nn = n \n"
981 "1: \n"
982 "mov r5, r1 @ r5 = min = src \n"
983 "mov r7, #128 @ r7 = val = 128 \n"
984 "movs r8, r14 @ r8 = len2 = len \n"
985 "add r9, r2, #4 @ r9 = contrib2 \n"
986 "ble 7f @ while (len2-- > 0) { \n"
987 "6: \n"
988 "ldr r10,[r9], #4 @ r10 = *contrib2++ \n"
989 "ldrb r12,[r5], r3 @ r12 = *min r5 = min += width\n"
990 "subs r8, r8, #1 @ len2-- \n"
991 "@ stall r12 \n"
992 "mla r7, r10,r12,r7 @ val += r12 * r10 \n"
993 "bgt 6b @ } \n"
994 "7: \n"
995 "mov r7, r7, asr #8 @ r7 = val >>= 8 \n"
996 "subs r4, r4, #1 @ r4 = nn-- \n"
997 "add r1, r1, #1 @ src++ \n"
998 "strb r7, [r0], #1 @ *dst++ = val \n"
999 "bgt 1b @ \n"
1000 "subs r6, r6, #1 @ x-- \n"
1001 "strb r11,[r0], #1 @ *dst++ = 255 \n"
1002 "bgt 5b @ \n"
1003 "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n"
1004 ".ltorg \n"
1005 ENTER_THUMB
1006 );
1007}
1008#else
1009
1010static void
1011scale_row_to_temp1(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
1012{
1013 const int *contrib = &weights->index[weights->index[0]];
1014 int len, i;
1015 const unsigned char *min;
1016
1017 assert(weights->n == 1);
1018 if (weights->flip)
1019 {
1020 dst += weights->count;
1021 for (i=weights->count; i > 0; i--)
1022 {
1023 int val = 128;
1024 min = &src[*contrib++];
1025 len = *contrib++;
1026 while (len-- > 0)
1027 {
1028 val += *min++ * *contrib++;
1029 }
1030 *--dst = (unsigned char)(val>>8);
1031 }
1032 }
1033 else
1034 {
1035 for (i=weights->count; i > 0; i--)
1036 {
1037 int val = 128;
1038 min = &src[*contrib++];
1039 len = *contrib++;
1040 while (len-- > 0)
1041 {
1042 val += *min++ * *contrib++;
1043 }
1044 *dst++ = (unsigned char)(val>>8);
1045 }
1046 }
1047}
1048
1049static void
1050scale_row_to_temp2(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
1051{
1052 const int *contrib = &weights->index[weights->index[0]];
1053 int len, i;
1054 const unsigned char *min;
1055
1056 assert(weights->n == 2);
1057 if (weights->flip)
1058 {
1059 dst += 2*weights->count;
1060 for (i=weights->count; i > 0; i--)
1061 {
1062 int c1 = 128;
1063 int c2 = 128;
1064 min = &src[2 * *contrib++];
1065 len = *contrib++;
1066 while (len-- > 0)
1067 {
1068 c1 += *min++ * *contrib;
1069 c2 += *min++ * *contrib++;
1070 }
1071 *--dst = (unsigned char)(c2>>8);
1072 *--dst = (unsigned char)(c1>>8);
1073 }
1074 }
1075 else
1076 {
1077 for (i=weights->count; i > 0; i--)
1078 {
1079 int c1 = 128;
1080 int c2 = 128;
1081 min = &src[2 * *contrib++];
1082 len = *contrib++;
1083 while (len-- > 0)
1084 {
1085 c1 += *min++ * *contrib;
1086 c2 += *min++ * *contrib++;
1087 }
1088 *dst++ = (unsigned char)(c1>>8);
1089 *dst++ = (unsigned char)(c2>>8);
1090 }
1091 }
1092}
1093
1094static void
1095scale_row_to_temp3(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
1096{
1097 const int *contrib = &weights->index[weights->index[0]];
1098 int len, i;
1099 const unsigned char *min;
1100
1101 assert(weights->n == 3);
1102 if (weights->flip)
1103 {
1104 dst += 3*weights->count;
1105 for (i=weights->count; i > 0; i--)
1106 {
1107 int c1 = 128;
1108 int c2 = 128;
1109 int c3 = 128;
1110 min = &src[3 * *contrib++];
1111 len = *contrib++;
1112 while (len-- > 0)
1113 {
1114 int c = *contrib++;
1115 c1 += *min++ * c;
1116 c2 += *min++ * c;
1117 c3 += *min++ * c;
1118 }
1119 *--dst = (unsigned char)(c3>>8);
1120 *--dst = (unsigned char)(c2>>8);
1121 *--dst = (unsigned char)(c1>>8);
1122 }
1123 }
1124 else
1125 {
1126 for (i=weights->count; i > 0; i--)
1127 {
1128 int c1 = 128;
1129 int c2 = 128;
1130 int c3 = 128;
1131 min = &src[3 * *contrib++];
1132 len = *contrib++;
1133 while (len-- > 0)
1134 {
1135 int c = *contrib++;
1136 c1 += *min++ * c;
1137 c2 += *min++ * c;
1138 c3 += *min++ * c;
1139 }
1140 *dst++ = (unsigned char)(c1>>8);
1141 *dst++ = (unsigned char)(c2>>8);
1142 *dst++ = (unsigned char)(c3>>8);
1143 }
1144 }
1145}
1146
1147static void
1148scale_row_to_temp4(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
1149{
1150 const int *contrib = &weights->index[weights->index[0]];
1151 int len, i;
1152 const unsigned char *min;
1153
1154 assert(weights->n == 4);
1155 if (weights->flip)
1156 {
1157 dst += 4*weights->count;
1158 for (i=weights->count; i > 0; i--)
1159 {
1160 int r = 128;
1161 int g = 128;
1162 int b = 128;
1163 int a = 128;
1164 min = &src[4 * *contrib++];
1165 len = *contrib++;
1166 while (len-- > 0)
1167 {
1168 r += *min++ * *contrib;
1169 g += *min++ * *contrib;
1170 b += *min++ * *contrib;
1171 a += *min++ * *contrib++;
1172 }
1173 *--dst = (unsigned char)(a>>8);
1174 *--dst = (unsigned char)(b>>8);
1175 *--dst = (unsigned char)(g>>8);
1176 *--dst = (unsigned char)(r>>8);
1177 }
1178 }
1179 else
1180 {
1181 for (i=weights->count; i > 0; i--)
1182 {
1183 int r = 128;
1184 int g = 128;
1185 int b = 128;
1186 int a = 128;
1187 min = &src[4 * *contrib++];
1188 len = *contrib++;
1189 while (len-- > 0)
1190 {
1191 r += *min++ * *contrib;
1192 g += *min++ * *contrib;
1193 b += *min++ * *contrib;
1194 a += *min++ * *contrib++;
1195 }
1196 *dst++ = (unsigned char)(r>>8);
1197 *dst++ = (unsigned char)(g>>8);
1198 *dst++ = (unsigned char)(b>>8);
1199 *dst++ = (unsigned char)(a>>8);
1200 }
1201 }
1202}
1203
1204static void
1205scale_row_from_temp(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int w, int n, int row)
1206{
1207 const int *contrib = &weights->index[weights->index[row]];
1208 int len, x;
1209 int width = w * n;
1210
1211 contrib++; /* Skip min */
1212 len = *contrib++;
1213 for (x=width; x > 0; x--)
1214 {
1215 const unsigned char *min = src;
1216 int val = 128;
1217 int len2 = len;
1218 const int *contrib2 = contrib;
1219
1220 while (len2-- > 0)
1221 {
1222 val += *min * *contrib2++;
1223 min += width;
1224 }
1225 *dst++ = (unsigned char)(val>>8);
1226 src++;
1227 }
1228}
1229
1230static void
1231scale_row_from_temp_alpha(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int w, int n, int row)
1232{
1233 const int *contrib = &weights->index[weights->index[row]];
1234 int len, x;
1235 int width = w * n;
1236
1237 contrib++; /* Skip min */
1238 len = *contrib++;
1239 for (x=w; x > 0; x--)
1240 {
1241 int nn;
1242 for (nn = n; nn > 0; nn--)
1243 {
1244 const unsigned char *min = src;
1245 int val = 128;
1246 int len2 = len;
1247 const int *contrib2 = contrib;
1248
1249 while (len2-- > 0)
1250 {
1251 val += *min * *contrib2++;
1252 min += width;
1253 }
1254 *dst++ = (unsigned char)(val>>8);
1255 src++;
1256 }
1257 *dst++ = 255;
1258 }
1259}
1260#endif
1261
1262#ifdef SINGLE_PIXEL_SPECIALS
1263static void
1264duplicate_single_pixel(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, int n, int forcealpha, int w, int h, int stride)
1265{
1266 int i;
1267
1268 for (i = n; i > 0; i--)
1269 *dst++ = *src++;
1270 if (forcealpha)
1271 *dst++ = 255;
1272 n += forcealpha;
1273 for (i = w-1; i > 0; i--)
1274 {
1275 memcpy(dst, dst-n, n);
1276 dst += n;
1277 }
1278 w *= n;
1279 dst -= w;
1280 h--;
1281 while (h--)
1282 {
1283 memcpy(dst+stride, dst, w);
1284 dst += stride;
1285 }
1286}
1287
1288static void
1289scale_single_row(unsigned char * FZ_RESTRICT dst, int dstride, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int src_w, int h, int forcealpha)
1290{
1291 const int *contrib = &weights->index[weights->index[0]];
1292 int min, len, i, j, n, nf;
1293 int tmp[FZ_MAX_COLORS];
1294
1295 n = weights->n;
1296 nf = n + forcealpha;
1297 /* Scale a single row */
1298 for (j = 0; j < nf; j++)
1299 tmp[j] = 128;
1300 if (weights->flip)
1301 {
1302 dst += (weights->count-1)*nf;
1303 for (i=weights->count; i > 0; i--)
1304 {
1305 min = *contrib++;
1306 len = *contrib++;
1307 min *= n;
1308 while (len-- > 0)
1309 {
1310 int c = *contrib++;
1311 for (j = 0; j < n; j++)
1312 tmp[j] += src[min++] * c;
1313 if (forcealpha)
1314 tmp[j] += 255 * c;
1315 }
1316 for (j = 0; j < nf; j++)
1317 {
1318 *dst++ = (unsigned char)(tmp[j]>>8);
1319 tmp[j] = 128;
1320 }
1321 dst -= 2*nf;
1322 }
1323 dst += nf + dstride;
1324 }
1325 else
1326 {
1327 for (i=weights->count; i > 0; i--)
1328 {
1329 min = *contrib++;
1330 len = *contrib++;
1331 min *= n;
1332 while (len-- > 0)
1333 {
1334 int c = *contrib++;
1335 for (j = 0; j < n; j++)
1336 tmp[j] += src[min++] * c;
1337 if (forcealpha)
1338 tmp[j] += 255 * c;
1339 }
1340 for (j = 0; j < nf; j++)
1341 {
1342 *dst++ = (unsigned char)(tmp[j]>>8);
1343 tmp[j] = 128;
1344 }
1345 }
1346 dst += dstride - weights->count * nf;
1347 }
1348 /* And then duplicate it h times */
1349 nf *= weights->count;
1350 while (--h > 0)
1351 {
1352 memcpy(dst, dst-dstride, nf);
1353 dst += dstride;
1354 }
1355}
1356
1357static void
1358scale_single_col(unsigned char * FZ_RESTRICT dst, int dstride, const unsigned char * FZ_RESTRICT src, int sstride, const fz_weights * FZ_RESTRICT weights, int src_w, int n, int w, int forcealpha)
1359{
1360 const int *contrib = &weights->index[weights->index[0]];
1361 int min, len, i, j;
1362 int tmp[FZ_MAX_COLORS];
1363 int nf = n + forcealpha;
1364
1365 for (j = 0; j < nf; j++)
1366 tmp[j] = 128;
1367 if (weights->flip)
1368 {
1369 src_w = (src_w-1)*sstride;
1370 for (i=weights->count; i > 0; i--)
1371 {
1372 /* Scale the next pixel in the column */
1373 min = *contrib++;
1374 len = *contrib++;
1375 min = src_w-min*sstride;
1376 while (len-- > 0)
1377 {
1378 int c = *contrib++;
1379 for (j = 0; j < n; j++)
1380 tmp[j] += src[min+j] * c;
1381 if (forcealpha)
1382 tmp[j] += 255 * c;
1383 min -= sstride;
1384 }
1385 for (j = 0; j < nf; j++)
1386 {
1387 *dst++ = (unsigned char)(tmp[j]>>8);
1388 tmp[j] = 128;
1389 }
1390 /* And then duplicate it across the row */
1391 for (j = (w-1)*nf; j > 0; j--)
1392 {
1393 *dst = dst[-nf];
1394 dst++;
1395 }
1396 dst += dstride - w*nf;
1397 }
1398 }
1399 else
1400 {
1401 for (i=weights->count; i > 0; i--)
1402 {
1403 /* Scale the next pixel in the column */
1404 min = *contrib++;
1405 len = *contrib++;
1406 min *= sstride;
1407 while (len-- > 0)
1408 {
1409 int c = *contrib++;
1410 for (j = 0; j < n; j++)
1411 tmp[j] += src[min+j] * c;
1412 if (forcealpha)
1413 tmp[j] += 255 * c;
1414 min += sstride;
1415 }
1416 for (j = 0; j < nf; j++)
1417 {
1418 *dst++ = (unsigned char)(tmp[j]>>8);
1419 tmp[j] = 128;
1420 }
1421 /* And then duplicate it across the row */
1422 for (j = (w-1)*nf; j > 0; j--)
1423 {
1424 *dst = dst[-nf];
1425 dst++;
1426 }
1427 dst += dstride - w*nf;
1428 }
1429 }
1430}
1431#endif /* SINGLE_PIXEL_SPECIALS */
1432
1433static void
1434get_alpha_edge_values(const fz_weights * FZ_RESTRICT rows, int * FZ_RESTRICT tp, int * FZ_RESTRICT bp)
1435{
1436 const int *contrib = &rows->index[rows->index[0]];
1437 int len, i, t, b;
1438
1439 /* Calculate the edge alpha values */
1440 contrib++; /* Skip min */
1441 len = *contrib++;
1442 t = 0;
1443 while (len--)
1444 t += *contrib++;
1445 for (i=rows->count-2; i > 0; i--)
1446 {
1447 contrib++; /* Skip min */
1448 len = *contrib++;
1449 contrib += len;
1450 }
1451 b = 0;
1452 if (i == 0)
1453 {
1454 contrib++;
1455 len = *contrib++;
1456 while (len--)
1457 b += *contrib++;
1458 }
1459 if (rows->flip && i == 0)
1460 {
1461 *tp = b;
1462 *bp = t;
1463 }
1464 else
1465 {
1466 *tp = t;
1467 *bp = b;
1468 }
1469}
1470
1471static void
1472adjust_alpha_edges(fz_pixmap * FZ_RESTRICT pix, const fz_weights * FZ_RESTRICT rows, const fz_weights * FZ_RESTRICT cols)
1473{
1474 int t, l, r, b, tl, tr, bl, br, x, y;
1475 unsigned char *dp = pix->samples;
1476 int w = pix->w;
1477 int n = pix->n;
1478 int span = w >= 2 ? (w-1)*n : 0;
1479 int stride = pix->stride;
1480
1481 get_alpha_edge_values(rows, &t, &b);
1482 get_alpha_edge_values(cols, &l, &r);
1483
1484 l = (255 * l + 128)>>8;
1485 r = (255 * r + 128)>>8;
1486 tl = (l * t + 128)>>8;
1487 tr = (r * t + 128)>>8;
1488 bl = (l * b + 128)>>8;
1489 br = (r * b + 128)>>8;
1490 t = (255 * t + 128)>>8;
1491 b = (255 * b + 128)>>8;
1492 dp += n-1;
1493 *dp = tl;
1494 dp += n;
1495 for (x = w-2; x > 0; x--)
1496 {
1497 *dp = t;
1498 dp += n;
1499 }
1500 if (x == 0)
1501 {
1502 *dp = tr;
1503 dp += n;
1504 }
1505 dp += stride - w*n;
1506 for (y = pix->h-2; y > 0; y--)
1507 {
1508 dp[span] = r;
1509 *dp = l;
1510 dp += stride;
1511 }
1512 if (y == 0)
1513 {
1514 *dp = bl;
1515 dp += n;
1516 for (x = w-2; x > 0; x--)
1517 {
1518 *dp = b;
1519 dp += n;
1520 }
1521 if (x == 0)
1522 {
1523 *dp = br;
1524 }
1525 }
1526}
1527
1528fz_pixmap *
1529fz_scale_pixmap(fz_context *ctx, fz_pixmap *src, float x, float y, float w, float h, const fz_irect *clip)
1530{
1531 return fz_scale_pixmap_cached(ctx, src, x, y, w, h, clip, NULL, NULL);
1532}
1533
1534fz_pixmap *
1535fz_scale_pixmap_cached(fz_context *ctx, const fz_pixmap *src, float x, float y, float w, float h, const fz_irect *clip, fz_scale_cache *cache_x, fz_scale_cache *cache_y)
1536{
1537 fz_scale_filter *filter = &fz_scale_filter_simple;
1538 fz_weights *contrib_rows = NULL;
1539 fz_weights *contrib_cols = NULL;
1540 fz_pixmap *output = NULL;
1541 unsigned char *temp = NULL;
1542 int max_row, temp_span, temp_rows, row;
1543 int dst_w_int, dst_h_int, dst_x_int, dst_y_int;
1544 int flip_x, flip_y, forcealpha;
1545 fz_rect patch;
1546
1547 fz_var(contrib_cols);
1548 fz_var(contrib_rows);
1549
1550 /* Avoid extreme scales where overflows become problematic. */
1551 if (w > (1<<24) || h > (1<<24) || w < -(1<<24) || h < -(1<<24))
1552 return NULL;
1553 if (x > (1<<24) || y > (1<<24) || x < -(1<<24) || y < -(1<<24))
1554 return NULL;
1555
1556 /* Clamp small ranges of w and h */
1557 if (w <= -1)
1558 {
1559 }
1560 else if (w < 0)
1561 {
1562 w = -1;
1563 }
1564 else if (w < 1)
1565 {
1566 w = 1;
1567 }
1568 if (h <= -1)
1569 {
1570 }
1571 else if (h < 0)
1572 {
1573 h = -1;
1574 }
1575 else if (h < 1)
1576 {
1577 h = 1;
1578 }
1579
1580 /* If the src has an alpha, we'll make the dst have an alpha automatically.
1581 * We also need to force the dst to have an alpha if x/y/w/h aren't ints. */
1582 forcealpha = !src->alpha && (x != (float)(int)x || y != (float)(int)y || w != (float)(int)w || h != (float)(int)h);
1583
1584 /* Find the destination bbox, width/height, and sub pixel offset,
1585 * allowing for whether we're flipping or not. */
1586 /* The (x,y) position given describes where the top left corner
1587 * of the source image should be mapped to (i.e. where (0,0) in image
1588 * space ends up). Also there are differences in the way we scale
1589 * horizontally and vertically. When scaling rows horizontally, we
1590 * always read forwards through the source, and store either forwards
1591 * or in reverse as required. When scaling vertically, we always store
1592 * out forwards, but may feed source rows in in a different order.
1593 *
1594 * Consider the image rectangle 'r' to which the image is mapped,
1595 * and the (possibly) larger rectangle 'R', given by expanding 'r' to
1596 * complete pixels.
1597 *
1598 * x can either be r.xmin-R.xmin or R.xmax-r.xmax depending on whether
1599 * the image is x flipped or not. Whatever happens 0 <= x < 1.
1600 * y is always R.ymax - r.ymax.
1601 */
1602 /* dst_x_int is calculated to be the left of the scaled image, and
1603 * x (the sub pixel offset) is the distance in from either the left
1604 * or right pixel expanded edge. */
1605 flip_x = (w < 0);
1606 if (flip_x)
1607 {
1608 float tmp;
1609 w = -w;
1610 dst_x_int = floorf(x-w);
1611 tmp = ceilf(x);
1612 dst_w_int = (int)tmp;
1613 x = tmp - x;
1614 dst_w_int -= dst_x_int;
1615 }
1616 else
1617 {
1618 dst_x_int = floorf(x);
1619 x -= dst_x_int;
1620 dst_w_int = (int)ceilf(x + w);
1621 }
1622 /* dst_y_int is calculated to be the top of the scaled image, and
1623 * y (the sub pixel offset) is the distance in from either the top
1624 * or bottom pixel expanded edge.
1625 */
1626 flip_y = (h < 0);
1627 if (flip_y)
1628 {
1629 float tmp;
1630 h = -h;
1631 dst_y_int = floorf(y-h);
1632 tmp = ceilf(y);
1633 dst_h_int = (int)tmp;
1634 y = tmp - y;
1635 dst_h_int -= dst_y_int;
1636 }
1637 else
1638 {
1639 dst_y_int = floorf(y);
1640 y -= dst_y_int;
1641 dst_h_int = (int)ceilf(y + h);
1642 }
1643
1644 fz_valgrind_pixmap(src);
1645
1646 /* Step 0: Calculate the patch */
1647 patch.x0 = 0;
1648 patch.y0 = 0;
1649 patch.x1 = dst_w_int;
1650 patch.y1 = dst_h_int;
1651 if (clip)
1652 {
1653 if (flip_x)
1654 {
1655 if (dst_x_int + dst_w_int > clip->x1)
1656 patch.x0 = dst_x_int + dst_w_int - clip->x1;
1657 if (clip->x0 > dst_x_int)
1658 {
1659 patch.x1 = dst_w_int - (clip->x0 - dst_x_int);
1660 dst_x_int = clip->x0;
1661 }
1662 }
1663 else
1664 {
1665 if (dst_x_int + dst_w_int > clip->x1)
1666 patch.x1 = clip->x1 - dst_x_int;
1667 if (clip->x0 > dst_x_int)
1668 {
1669 patch.x0 = clip->x0 - dst_x_int;
1670 dst_x_int += patch.x0;
1671 }
1672 }
1673
1674 if (flip_y)
1675 {
1676 if (dst_y_int + dst_h_int > clip->y1)
1677 patch.y1 = clip->y1 - dst_y_int;
1678 if (clip->y0 > dst_y_int)
1679 {
1680 patch.y0 = clip->y0 - dst_y_int;
1681 dst_y_int = clip->y0;
1682 }
1683 }
1684 else
1685 {
1686 if (dst_y_int + dst_h_int > clip->y1)
1687 patch.y1 = clip->y1 - dst_y_int;
1688 if (clip->y0 > dst_y_int)
1689 {
1690 patch.y0 = clip->y0 - dst_y_int;
1691 dst_y_int += patch.y0;
1692 }
1693 }
1694 }
1695 if (patch.x0 >= patch.x1 || patch.y0 >= patch.y1)
1696 return NULL;
1697
1698 fz_try(ctx)
1699 {
1700 /* Step 1: Calculate the weights for columns and rows */
1701#ifdef SINGLE_PIXEL_SPECIALS
1702 if (src->w == 1)
1703 contrib_cols = NULL;
1704 else
1705#endif /* SINGLE_PIXEL_SPECIALS */
1706 contrib_cols = make_weights(ctx, src->w, x, w, filter, 0, dst_w_int, patch.x0, patch.x1, src->n, flip_x, cache_x);
1707#ifdef SINGLE_PIXEL_SPECIALS
1708 if (src->h == 1)
1709 contrib_rows = NULL;
1710 else
1711#endif /* SINGLE_PIXEL_SPECIALS */
1712 contrib_rows = make_weights(ctx, src->h, y, h, filter, 1, dst_h_int, patch.y0, patch.y1, src->n, flip_y, cache_y);
1713
1714 output = fz_new_pixmap(ctx, src->colorspace, patch.x1 - patch.x0, patch.y1 - patch.y0, src->seps, src->alpha || forcealpha);
1715 }
1716 fz_catch(ctx)
1717 {
1718 if (!cache_x)
1719 fz_free(ctx, contrib_cols);
1720 if (!cache_y)
1721 fz_free(ctx, contrib_rows);
1722 fz_rethrow(ctx);
1723 }
1724 output->x = dst_x_int;
1725 output->y = dst_y_int;
1726
1727 /* Step 2: Apply the weights */
1728#ifdef SINGLE_PIXEL_SPECIALS
1729 if (!contrib_rows)
1730 {
1731 /* Only 1 source pixel high. */
1732 if (!contrib_cols)
1733 {
1734 /* Only 1 pixel in the entire image! */
1735 duplicate_single_pixel(output->samples, src->samples, src->n, forcealpha, patch.x1-patch.x0, patch.y1-patch.y0, output->stride);
1736 fz_valgrind_pixmap(output);
1737 }
1738 else
1739 {
1740 /* Scale the row once, then copy it. */
1741 scale_single_row(output->samples, output->stride, src->samples, contrib_cols, src->w, patch.y1-patch.y0, forcealpha);
1742 fz_valgrind_pixmap(output);
1743 }
1744 }
1745 else if (!contrib_cols)
1746 {
1747 /* Only 1 source pixel wide. Scale the col and duplicate. */
1748 scale_single_col(output->samples, output->stride, src->samples, src->stride, contrib_rows, src->h, src->n, patch.x1-patch.x0, forcealpha);
1749 fz_valgrind_pixmap(output);
1750 }
1751 else
1752#endif /* SINGLE_PIXEL_SPECIALS */
1753 {
1754 void (*row_scale_in)(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights);
1755 void (*row_scale_out)(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int w, int n, int row);
1756
1757 temp_span = contrib_cols->count * src->n;
1758 temp_rows = contrib_rows->max_len;
1759 if (temp_span <= 0 || temp_rows > INT_MAX / temp_span)
1760 goto cleanup;
1761 fz_try(ctx)
1762 {
1763 temp = fz_calloc(ctx, temp_span*temp_rows, sizeof(unsigned char));
1764 }
1765 fz_catch(ctx)
1766 {
1767 fz_drop_pixmap(ctx, output);
1768 if (!cache_x)
1769 fz_free(ctx, contrib_cols);
1770 if (!cache_y)
1771 fz_free(ctx, contrib_rows);
1772 fz_rethrow(ctx);
1773 }
1774 switch (src->n)
1775 {
1776 default:
1777 row_scale_in = scale_row_to_temp;
1778 break;
1779 case 1: /* Image mask case or Greyscale case */
1780 row_scale_in = scale_row_to_temp1;
1781 break;
1782 case 2: /* Greyscale with alpha case */
1783 row_scale_in = scale_row_to_temp2;
1784 break;
1785 case 3: /* RGB case */
1786 row_scale_in = scale_row_to_temp3;
1787 break;
1788 case 4: /* RGBA or CMYK case */
1789 row_scale_in = scale_row_to_temp4;
1790 break;
1791 }
1792 row_scale_out = forcealpha ? scale_row_from_temp_alpha : scale_row_from_temp;
1793 max_row = contrib_rows->index[contrib_rows->index[0]];
1794 for (row = 0; row < contrib_rows->count; row++)
1795 {
1796 /*
1797 Which source rows do we need to have scaled into the
1798 temporary buffer in order to be able to do the final
1799 scale?
1800 */
1801 int row_index = contrib_rows->index[row];
1802 int row_min = contrib_rows->index[row_index++];
1803 int row_len = contrib_rows->index[row_index];
1804 while (max_row < row_min+row_len)
1805 {
1806 /* Scale another row */
1807 assert(max_row < src->h);
1808 (*row_scale_in)(&temp[temp_span*(max_row % temp_rows)], &src->samples[(flip_y ? (src->h-1-max_row): max_row)*src->stride], contrib_cols);
1809 max_row++;
1810 }
1811
1812 (*row_scale_out)(&output->samples[row*output->stride], temp, contrib_rows, contrib_cols->count, src->n, row);
1813 }
1814 fz_free(ctx, temp);
1815
1816 if (forcealpha)
1817 adjust_alpha_edges(output, contrib_rows, contrib_cols);
1818
1819 fz_valgrind_pixmap(output);
1820 }
1821
1822cleanup:
1823 if (!cache_y)
1824 fz_free(ctx, contrib_rows);
1825 if (!cache_x)
1826 fz_free(ctx, contrib_cols);
1827
1828 return output;
1829}
1830
1831void
1832fz_drop_scale_cache(fz_context *ctx, fz_scale_cache *sc)
1833{
1834 if (!sc)
1835 return;
1836 fz_free(ctx, sc->weights);
1837 fz_free(ctx, sc);
1838}
1839
1840fz_scale_cache *
1841fz_new_scale_cache(fz_context *ctx)
1842{
1843 return fz_malloc_struct(ctx, fz_scale_cache);
1844}
1845