1 | // Copyright 2015 Google Inc. All Rights Reserved. |
2 | // |
3 | // Use of this source code is governed by a BSD-style license |
4 | // that can be found in the COPYING file in the root of the source |
5 | // tree. An additional intellectual property rights grant can be found |
6 | // in the file PATENTS. All contributing project authors may |
7 | // be found in the AUTHORS file in the root of the source tree. |
8 | // ----------------------------------------------------------------------------- |
9 | // |
10 | // Image transform methods for lossless encoder. |
11 | // |
12 | // Author(s): Djordje Pesut (djordje.pesut@imgtec.com) |
13 | // Jovan Zelincevic (jovan.zelincevic@imgtec.com) |
14 | |
15 | #include "./dsp.h" |
16 | |
17 | #if defined(WEBP_USE_MIPS_DSP_R2) |
18 | |
19 | #include "./lossless.h" |
20 | |
21 | static void SubtractGreenFromBlueAndRed(uint32_t* argb_data, |
22 | int num_pixels) { |
23 | uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; |
24 | uint32_t* const p_loop1_end = argb_data + (num_pixels & ~3); |
25 | uint32_t* const p_loop2_end = p_loop1_end + (num_pixels & 3); |
26 | __asm__ volatile ( |
27 | ".set push \n\t" |
28 | ".set noreorder \n\t" |
29 | "beq %[argb_data], %[p_loop1_end], 3f \n\t" |
30 | " nop \n\t" |
31 | "0: \n\t" |
32 | "lw %[temp0], 0(%[argb_data]) \n\t" |
33 | "lw %[temp1], 4(%[argb_data]) \n\t" |
34 | "lw %[temp2], 8(%[argb_data]) \n\t" |
35 | "lw %[temp3], 12(%[argb_data]) \n\t" |
36 | "ext %[temp4], %[temp0], 8, 8 \n\t" |
37 | "ext %[temp5], %[temp1], 8, 8 \n\t" |
38 | "ext %[temp6], %[temp2], 8, 8 \n\t" |
39 | "ext %[temp7], %[temp3], 8, 8 \n\t" |
40 | "addiu %[argb_data], %[argb_data], 16 \n\t" |
41 | "replv.ph %[temp4], %[temp4] \n\t" |
42 | "replv.ph %[temp5], %[temp5] \n\t" |
43 | "replv.ph %[temp6], %[temp6] \n\t" |
44 | "replv.ph %[temp7], %[temp7] \n\t" |
45 | "subu.qb %[temp0], %[temp0], %[temp4] \n\t" |
46 | "subu.qb %[temp1], %[temp1], %[temp5] \n\t" |
47 | "subu.qb %[temp2], %[temp2], %[temp6] \n\t" |
48 | "subu.qb %[temp3], %[temp3], %[temp7] \n\t" |
49 | "sw %[temp0], -16(%[argb_data]) \n\t" |
50 | "sw %[temp1], -12(%[argb_data]) \n\t" |
51 | "sw %[temp2], -8(%[argb_data]) \n\t" |
52 | "bne %[argb_data], %[p_loop1_end], 0b \n\t" |
53 | " sw %[temp3], -4(%[argb_data]) \n\t" |
54 | "3: \n\t" |
55 | "beq %[argb_data], %[p_loop2_end], 2f \n\t" |
56 | " nop \n\t" |
57 | "1: \n\t" |
58 | "lw %[temp0], 0(%[argb_data]) \n\t" |
59 | "addiu %[argb_data], %[argb_data], 4 \n\t" |
60 | "ext %[temp4], %[temp0], 8, 8 \n\t" |
61 | "replv.ph %[temp4], %[temp4] \n\t" |
62 | "subu.qb %[temp0], %[temp0], %[temp4] \n\t" |
63 | "bne %[argb_data], %[p_loop2_end], 1b \n\t" |
64 | " sw %[temp0], -4(%[argb_data]) \n\t" |
65 | "2: \n\t" |
66 | ".set pop \n\t" |
67 | : [argb_data]"+&r" (argb_data), [temp0]"=&r" (temp0), |
68 | [temp1]"=&r" (temp1), [temp2]"=&r" (temp2), [temp3]"=&r" (temp3), |
69 | [temp4]"=&r" (temp4), [temp5]"=&r" (temp5), [temp6]"=&r" (temp6), |
70 | [temp7]"=&r" (temp7) |
71 | : [p_loop1_end]"r" (p_loop1_end), [p_loop2_end]"r" (p_loop2_end) |
72 | : "memory" |
73 | ); |
74 | } |
75 | |
76 | static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred, |
77 | int8_t color) { |
78 | return (uint32_t)((int)(color_pred) * color) >> 5; |
79 | } |
80 | |
81 | static void TransformColor(const VP8LMultipliers* const m, uint32_t* data, |
82 | int num_pixels) { |
83 | int temp0, temp1, temp2, temp3, temp4, temp5; |
84 | uint32_t argb, argb1, new_red, new_red1; |
85 | const uint32_t G_to_R = m->green_to_red_; |
86 | const uint32_t G_to_B = m->green_to_blue_; |
87 | const uint32_t R_to_B = m->red_to_blue_; |
88 | uint32_t* const p_loop_end = data + (num_pixels & ~1); |
89 | __asm__ volatile ( |
90 | ".set push \n\t" |
91 | ".set noreorder \n\t" |
92 | "beq %[data], %[p_loop_end], 1f \n\t" |
93 | " nop \n\t" |
94 | "replv.ph %[temp0], %[G_to_R] \n\t" |
95 | "replv.ph %[temp1], %[G_to_B] \n\t" |
96 | "replv.ph %[temp2], %[R_to_B] \n\t" |
97 | "shll.ph %[temp0], %[temp0], 8 \n\t" |
98 | "shll.ph %[temp1], %[temp1], 8 \n\t" |
99 | "shll.ph %[temp2], %[temp2], 8 \n\t" |
100 | "shra.ph %[temp0], %[temp0], 8 \n\t" |
101 | "shra.ph %[temp1], %[temp1], 8 \n\t" |
102 | "shra.ph %[temp2], %[temp2], 8 \n\t" |
103 | "0: \n\t" |
104 | "lw %[argb], 0(%[data]) \n\t" |
105 | "lw %[argb1], 4(%[data]) \n\t" |
106 | "lhu %[new_red], 2(%[data]) \n\t" |
107 | "lhu %[new_red1], 6(%[data]) \n\t" |
108 | "precrq.qb.ph %[temp3], %[argb], %[argb1] \n\t" |
109 | "precr.qb.ph %[temp4], %[argb], %[argb1] \n\t" |
110 | "preceu.ph.qbra %[temp3], %[temp3] \n\t" |
111 | "preceu.ph.qbla %[temp4], %[temp4] \n\t" |
112 | "shll.ph %[temp3], %[temp3], 8 \n\t" |
113 | "shll.ph %[temp4], %[temp4], 8 \n\t" |
114 | "shra.ph %[temp3], %[temp3], 8 \n\t" |
115 | "shra.ph %[temp4], %[temp4], 8 \n\t" |
116 | "mul.ph %[temp5], %[temp3], %[temp0] \n\t" |
117 | "mul.ph %[temp3], %[temp3], %[temp1] \n\t" |
118 | "mul.ph %[temp4], %[temp4], %[temp2] \n\t" |
119 | "addiu %[data], %[data], 8 \n\t" |
120 | "ins %[new_red1], %[new_red], 16, 16 \n\t" |
121 | "ins %[argb1], %[argb], 16, 16 \n\t" |
122 | "shra.ph %[temp5], %[temp5], 5 \n\t" |
123 | "shra.ph %[temp3], %[temp3], 5 \n\t" |
124 | "shra.ph %[temp4], %[temp4], 5 \n\t" |
125 | "subu.ph %[new_red1], %[new_red1], %[temp5] \n\t" |
126 | "subu.ph %[argb1], %[argb1], %[temp3] \n\t" |
127 | "preceu.ph.qbra %[temp5], %[new_red1] \n\t" |
128 | "subu.ph %[argb1], %[argb1], %[temp4] \n\t" |
129 | "preceu.ph.qbra %[temp3], %[argb1] \n\t" |
130 | "sb %[temp5], -2(%[data]) \n\t" |
131 | "sb %[temp3], -4(%[data]) \n\t" |
132 | "sra %[temp5], %[temp5], 16 \n\t" |
133 | "sra %[temp3], %[temp3], 16 \n\t" |
134 | "sb %[temp5], -6(%[data]) \n\t" |
135 | "bne %[data], %[p_loop_end], 0b \n\t" |
136 | " sb %[temp3], -8(%[data]) \n\t" |
137 | "1: \n\t" |
138 | ".set pop \n\t" |
139 | : [temp0]"=&r" (temp0), [temp1]"=&r" (temp1), [temp2]"=&r" (temp2), |
140 | [temp3]"=&r" (temp3), [temp4]"=&r" (temp4), [temp5]"=&r" (temp5), |
141 | [new_red1]"=&r" (new_red1), [new_red]"=&r" (new_red), |
142 | [argb]"=&r" (argb), [argb1]"=&r" (argb1), [data]"+&r" (data) |
143 | : [G_to_R]"r" (G_to_R), [R_to_B]"r" (R_to_B), |
144 | [G_to_B]"r" (G_to_B), [p_loop_end]"r" (p_loop_end) |
145 | : "memory" , "hi" , "lo" |
146 | ); |
147 | |
148 | if (num_pixels & 1) { |
149 | const uint32_t argb_ = data[0]; |
150 | const uint32_t green = argb_ >> 8; |
151 | const uint32_t red = argb_ >> 16; |
152 | uint32_t new_blue = argb_; |
153 | new_red = red; |
154 | new_red -= ColorTransformDelta(m->green_to_red_, green); |
155 | new_red &= 0xff; |
156 | new_blue -= ColorTransformDelta(m->green_to_blue_, green); |
157 | new_blue -= ColorTransformDelta(m->red_to_blue_, red); |
158 | new_blue &= 0xff; |
159 | data[0] = (argb_ & 0xff00ff00u) | (new_red << 16) | (new_blue); |
160 | } |
161 | } |
162 | |
163 | static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue, |
164 | uint8_t red_to_blue, |
165 | uint32_t argb) { |
166 | const uint32_t green = argb >> 8; |
167 | const uint32_t red = argb >> 16; |
168 | uint8_t new_blue = argb; |
169 | new_blue -= ColorTransformDelta(green_to_blue, green); |
170 | new_blue -= ColorTransformDelta(red_to_blue, red); |
171 | return (new_blue & 0xff); |
172 | } |
173 | |
174 | static void CollectColorBlueTransforms(const uint32_t* argb, int stride, |
175 | int tile_width, int tile_height, |
176 | int green_to_blue, int red_to_blue, |
177 | int histo[]) { |
178 | const int rtb = (red_to_blue << 16) | (red_to_blue & 0xffff); |
179 | const int gtb = (green_to_blue << 16) | (green_to_blue & 0xffff); |
180 | const uint32_t mask = 0xff00ffu; |
181 | while (tile_height-- > 0) { |
182 | int x; |
183 | const uint32_t* p_argb = argb; |
184 | argb += stride; |
185 | for (x = 0; x < (tile_width >> 1); ++x) { |
186 | int temp0, temp1, temp2, temp3, temp4, temp5, temp6; |
187 | __asm__ volatile ( |
188 | "lw %[temp0], 0(%[p_argb]) \n\t" |
189 | "lw %[temp1], 4(%[p_argb]) \n\t" |
190 | "precr.qb.ph %[temp2], %[temp0], %[temp1] \n\t" |
191 | "ins %[temp1], %[temp0], 16, 16 \n\t" |
192 | "shra.ph %[temp2], %[temp2], 8 \n\t" |
193 | "shra.ph %[temp3], %[temp1], 8 \n\t" |
194 | "mul.ph %[temp5], %[temp2], %[rtb] \n\t" |
195 | "mul.ph %[temp6], %[temp3], %[gtb] \n\t" |
196 | "and %[temp4], %[temp1], %[mask] \n\t" |
197 | "addiu %[p_argb], %[p_argb], 8 \n\t" |
198 | "shra.ph %[temp5], %[temp5], 5 \n\t" |
199 | "shra.ph %[temp6], %[temp6], 5 \n\t" |
200 | "subu.qb %[temp2], %[temp4], %[temp5] \n\t" |
201 | "subu.qb %[temp2], %[temp2], %[temp6] \n\t" |
202 | : [p_argb]"+&r" (p_argb), [temp0]"=&r" (temp0), [temp1]"=&r" (temp1), |
203 | [temp2]"=&r" (temp2), [temp3]"=&r" (temp3), [temp4]"=&r" (temp4), |
204 | [temp5]"=&r" (temp5), [temp6]"=&r" (temp6) |
205 | : [rtb]"r" (rtb), [gtb]"r" (gtb), [mask]"r" (mask) |
206 | : "memory" , "hi" , "lo" |
207 | ); |
208 | ++histo[(uint8_t)(temp2 >> 16)]; |
209 | ++histo[(uint8_t)temp2]; |
210 | } |
211 | if (tile_width & 1) { |
212 | ++histo[TransformColorBlue(green_to_blue, red_to_blue, *p_argb)]; |
213 | } |
214 | } |
215 | } |
216 | |
217 | static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red, |
218 | uint32_t argb) { |
219 | const uint32_t green = argb >> 8; |
220 | uint32_t new_red = argb >> 16; |
221 | new_red -= ColorTransformDelta(green_to_red, green); |
222 | return (new_red & 0xff); |
223 | } |
224 | |
225 | static void CollectColorRedTransforms(const uint32_t* argb, int stride, |
226 | int tile_width, int tile_height, |
227 | int green_to_red, int histo[]) { |
228 | const int gtr = (green_to_red << 16) | (green_to_red & 0xffff); |
229 | while (tile_height-- > 0) { |
230 | int x; |
231 | const uint32_t* p_argb = argb; |
232 | argb += stride; |
233 | for (x = 0; x < (tile_width >> 1); ++x) { |
234 | int temp0, temp1, temp2, temp3, temp4; |
235 | __asm__ volatile ( |
236 | "lw %[temp0], 0(%[p_argb]) \n\t" |
237 | "lw %[temp1], 4(%[p_argb]) \n\t" |
238 | "precrq.ph.w %[temp4], %[temp0], %[temp1] \n\t" |
239 | "ins %[temp1], %[temp0], 16, 16 \n\t" |
240 | "shra.ph %[temp3], %[temp1], 8 \n\t" |
241 | "mul.ph %[temp2], %[temp3], %[gtr] \n\t" |
242 | "addiu %[p_argb], %[p_argb], 8 \n\t" |
243 | "shra.ph %[temp2], %[temp2], 5 \n\t" |
244 | "subu.qb %[temp2], %[temp4], %[temp2] \n\t" |
245 | : [p_argb]"+&r" (p_argb), [temp0]"=&r" (temp0), [temp1]"=&r" (temp1), |
246 | [temp2]"=&r" (temp2), [temp3]"=&r" (temp3), [temp4]"=&r" (temp4) |
247 | : [gtr]"r" (gtr) |
248 | : "memory" , "hi" , "lo" |
249 | ); |
250 | ++histo[(uint8_t)(temp2 >> 16)]; |
251 | ++histo[(uint8_t)temp2]; |
252 | } |
253 | if (tile_width & 1) { |
254 | ++histo[TransformColorRed(green_to_red, *p_argb)]; |
255 | } |
256 | } |
257 | } |
258 | |
259 | //------------------------------------------------------------------------------ |
260 | // Entry point |
261 | |
262 | extern void VP8LEncDspInitMIPSdspR2(void); |
263 | |
264 | WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitMIPSdspR2(void) { |
265 | VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed; |
266 | VP8LTransformColor = TransformColor; |
267 | VP8LCollectColorBlueTransforms = CollectColorBlueTransforms; |
268 | VP8LCollectColorRedTransforms = CollectColorRedTransforms; |
269 | } |
270 | |
271 | #else // !WEBP_USE_MIPS_DSP_R2 |
272 | |
273 | WEBP_DSP_INIT_STUB(VP8LEncDspInitMIPSdspR2) |
274 | |
275 | #endif // WEBP_USE_MIPS_DSP_R2 |
276 | |