lossless_mips_dsp_r2.c source code [Godot/thirdparty/libwebp/src/dsp/lossless_mips_dsp_r2.c]

1	// Copyright 2014 Google Inc. All Rights Reserved.
2	//
3	// Use of this source code is governed by a BSD-style license
4	// that can be found in the COPYING file in the root of the source
5	// tree. An additional intellectual property rights grant can be found
6	// in the file PATENTS. All contributing project authors may
7	// be found in the AUTHORS file in the root of the source tree.
8	// -----------------------------------------------------------------------------
9	//
10	// Image transforms and color space conversion methods for lossless decoder.
11	//
12	// Author(s): Djordje Pesut (djordje.pesut@imgtec.com)
13	// Jovan Zelincevic (jovan.zelincevic@imgtec.com)
14
15	#include "src/dsp/dsp.h"
16
17	#if defined(WEBP_USE_MIPS_DSP_R2)
18
19	#include "src/dsp/lossless.h"
20	#include "src/dsp/lossless_common.h"
21
22	#define MAP_COLOR_FUNCS(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE) \
23	static void FUNC_NAME(const TYPE* src, \
24	const uint32_t* const color_map, \
25	TYPE* dst, int y_start, int y_end, \
26	int width) { \
27	int y; \
28	for (y = y_start; y < y_end; ++y) { \
29	int x; \
30	for (x = 0; x < (width >> 2); ++x) { \
31	int tmp1, tmp2, tmp3, tmp4; \
32	__asm__ volatile ( \
33	".ifc " #TYPE ", uint8_t \n\t" \
34	"lbu %[tmp1], 0(%[src]) \n\t" \
35	"lbu %[tmp2], 1(%[src]) \n\t" \
36	"lbu %[tmp3], 2(%[src]) \n\t" \
37	"lbu %[tmp4], 3(%[src]) \n\t" \
38	"addiu %[src], %[src], 4 \n\t" \
39	".endif \n\t" \
40	".ifc " #TYPE ", uint32_t \n\t" \
41	"lw %[tmp1], 0(%[src]) \n\t" \
42	"lw %[tmp2], 4(%[src]) \n\t" \
43	"lw %[tmp3], 8(%[src]) \n\t" \
44	"lw %[tmp4], 12(%[src]) \n\t" \
45	"ext %[tmp1], %[tmp1], 8, 8 \n\t" \
46	"ext %[tmp2], %[tmp2], 8, 8 \n\t" \
47	"ext %[tmp3], %[tmp3], 8, 8 \n\t" \
48	"ext %[tmp4], %[tmp4], 8, 8 \n\t" \
49	"addiu %[src], %[src], 16 \n\t" \
50	".endif \n\t" \
51	"sll %[tmp1], %[tmp1], 2 \n\t" \
52	"sll %[tmp2], %[tmp2], 2 \n\t" \
53	"sll %[tmp3], %[tmp3], 2 \n\t" \
54	"sll %[tmp4], %[tmp4], 2 \n\t" \
55	"lwx %[tmp1], %[tmp1](%[color_map]) \n\t" \
56	"lwx %[tmp2], %[tmp2](%[color_map]) \n\t" \
57	"lwx %[tmp3], %[tmp3](%[color_map]) \n\t" \
58	"lwx %[tmp4], %[tmp4](%[color_map]) \n\t" \
59	".ifc " #TYPE ", uint8_t \n\t" \
60	"ext %[tmp1], %[tmp1], 8, 8 \n\t" \
61	"ext %[tmp2], %[tmp2], 8, 8 \n\t" \
62	"ext %[tmp3], %[tmp3], 8, 8 \n\t" \
63	"ext %[tmp4], %[tmp4], 8, 8 \n\t" \
64	"sb %[tmp1], 0(%[dst]) \n\t" \
65	"sb %[tmp2], 1(%[dst]) \n\t" \
66	"sb %[tmp3], 2(%[dst]) \n\t" \
67	"sb %[tmp4], 3(%[dst]) \n\t" \
68	"addiu %[dst], %[dst], 4 \n\t" \
69	".endif \n\t" \
70	".ifc " #TYPE ", uint32_t \n\t" \
71	"sw %[tmp1], 0(%[dst]) \n\t" \
72	"sw %[tmp2], 4(%[dst]) \n\t" \
73	"sw %[tmp3], 8(%[dst]) \n\t" \
74	"sw %[tmp4], 12(%[dst]) \n\t" \
75	"addiu %[dst], %[dst], 16 \n\t" \
76	".endif \n\t" \
77	: [tmp1]"=&r"(tmp1), [tmp2]"=&r"(tmp2), [tmp3]"=&r"(tmp3), \
78	[tmp4]"=&r"(tmp4), [src]"+&r"(src), [dst]"+r"(dst) \
79	: [color_map]"r"(color_map) \
80	: "memory" \
81	); \
82	} \
83	for (x = 0; x < (width & 3); ++x) { \
84	dst++ = GET_VALUE(color_map[GET_INDEX(src++)]); \
85	} \
86	} \
87	}
88
89	MAP_COLOR_FUNCS(MapARGB_MIPSdspR2, uint32_t, VP8GetARGBIndex, VP8GetARGBValue)
90	MAP_COLOR_FUNCS(MapAlpha_MIPSdspR2, uint8_t, VP8GetAlphaIndex, VP8GetAlphaValue)
91
92	#undef MAP_COLOR_FUNCS
93
94	static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
95	uint32_t c2) {
96	int temp0, temp1, temp2, temp3, temp4, temp5;
97	__asm__ volatile (
98	"preceu.ph.qbr %[temp1], %[c0] \n\t"
99	"preceu.ph.qbl %[temp2], %[c0] \n\t"
100	"preceu.ph.qbr %[temp3], %[c1] \n\t"
101	"preceu.ph.qbl %[temp4], %[c1] \n\t"
102	"preceu.ph.qbr %[temp5], %[c2] \n\t"
103	"preceu.ph.qbl %[temp0], %[c2] \n\t"
104	"subq.ph %[temp3], %[temp3], %[temp5] \n\t"
105	"subq.ph %[temp4], %[temp4], %[temp0] \n\t"
106	"addq.ph %[temp1], %[temp1], %[temp3] \n\t"
107	"addq.ph %[temp2], %[temp2], %[temp4] \n\t"
108	"shll_s.ph %[temp1], %[temp1], 7 \n\t"
109	"shll_s.ph %[temp2], %[temp2], 7 \n\t"
110	"precrqu_s.qb.ph %[temp2], %[temp2], %[temp1] \n\t"
111	: [temp0]"=r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
112	[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5)
113	: [c0]"r"(c0), [c1]"r"(c1), [c2]"r"(c2)
114	: "memory"
115	);
116	return temp2;
117	}
118
119	static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
120	uint32_t c2) {
121	int temp0, temp1, temp2, temp3, temp4, temp5;
122	__asm__ volatile (
123	"adduh.qb %[temp5], %[c0], %[c1] \n\t"
124	"preceu.ph.qbr %[temp3], %[c2] \n\t"
125	"preceu.ph.qbr %[temp1], %[temp5] \n\t"
126	"preceu.ph.qbl %[temp2], %[temp5] \n\t"
127	"preceu.ph.qbl %[temp4], %[c2] \n\t"
128	"subq.ph %[temp3], %[temp1], %[temp3] \n\t"
129	"subq.ph %[temp4], %[temp2], %[temp4] \n\t"
130	"shrl.ph %[temp5], %[temp3], 15 \n\t"
131	"shrl.ph %[temp0], %[temp4], 15 \n\t"
132	"addq.ph %[temp3], %[temp3], %[temp5] \n\t"
133	"addq.ph %[temp4], %[temp0], %[temp4] \n\t"
134	"shra.ph %[temp3], %[temp3], 1 \n\t"
135	"shra.ph %[temp4], %[temp4], 1 \n\t"
136	"addq.ph %[temp1], %[temp1], %[temp3] \n\t"
137	"addq.ph %[temp2], %[temp2], %[temp4] \n\t"
138	"shll_s.ph %[temp1], %[temp1], 7 \n\t"
139	"shll_s.ph %[temp2], %[temp2], 7 \n\t"
140	"precrqu_s.qb.ph %[temp1], %[temp2], %[temp1] \n\t"
141	: [temp0]"=r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
142	[temp3]"=&r"(temp3), [temp4]"=r"(temp4), [temp5]"=&r"(temp5)
143	: [c0]"r"(c0), [c1]"r"(c1), [c2]"r"(c2)
144	: "memory"
145	);
146	return temp1;
147	}
148
149	static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
150	int temp0, temp1, temp2, temp3, temp4, temp5;
151	__asm__ volatile (
152	"cmpgdu.lt.qb %[temp1], %[c], %[b] \n\t"
153	"pick.qb %[temp1], %[b], %[c] \n\t"
154	"pick.qb %[temp2], %[c], %[b] \n\t"
155	"cmpgdu.lt.qb %[temp4], %[c], %[a] \n\t"
156	"pick.qb %[temp4], %[a], %[c] \n\t"
157	"pick.qb %[temp5], %[c], %[a] \n\t"
158	"subu.qb %[temp3], %[temp1], %[temp2] \n\t"
159	"subu.qb %[temp0], %[temp4], %[temp5] \n\t"
160	"raddu.w.qb %[temp3], %[temp3] \n\t"
161	"raddu.w.qb %[temp0], %[temp0] \n\t"
162	"subu %[temp3], %[temp3], %[temp0] \n\t"
163	"slti %[temp0], %[temp3], 0x1 \n\t"
164	"movz %[a], %[b], %[temp0] \n\t"
165	: [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
166	[temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp0]"=&r"(temp0),
167	[a]"+&r"(a)
168	: [b]"r"(b), [c]"r"(c)
169	);
170	return a;
171	}
172
173	static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
174	__asm__ volatile (
175	"adduh.qb %[a0], %[a0], %[a1] \n\t"
176	: [a0]"+r"(a0)
177	: [a1]"r"(a1)
178	);
179	return a0;
180	}
181
182	static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
183	return Average2(Average2(a0, a2), a1);
184	}
185
186	static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
187	uint32_t a2, uint32_t a3) {
188	return Average2(Average2(a0, a1), Average2(a2, a3));
189	}
190
191	static uint32_t Predictor5_MIPSdspR2(const uint32_t* const left,
192	const uint32_t* const top) {
193	return Average3(*left, top[`0`], top[`1`]);
194	}
195
196	static uint32_t Predictor6_MIPSdspR2(const uint32_t* const left,
197	const uint32_t* const top) {
198	return Average2(*left, top[-`1`]);
199	}
200
201	static uint32_t Predictor7_MIPSdspR2(const uint32_t* const left,
202	const uint32_t* const top) {
203	return Average2(*left, top[`0`]);
204	}
205
206	static uint32_t Predictor8_MIPSdspR2(const uint32_t* const left,
207	const uint32_t* const top) {
208	(void)left;
209	return Average2(top[-`1`], top[`0`]);
210	}
211
212	static uint32_t Predictor9_MIPSdspR2(const uint32_t* const left,
213	const uint32_t* const top) {
214	(void)left;
215	return Average2(top[`0`], top[`1`]);
216	}
217
218	static uint32_t Predictor10_MIPSdspR2(const uint32_t* const left,
219	const uint32_t* const top) {
220	return Average4(*left, top[-`1`], top[`0`], top[`1`]);
221	}
222
223	static uint32_t Predictor11_MIPSdspR2(const uint32_t* const left,
224	const uint32_t* const top) {
225	return Select(top[`0`], *left, top[-`1`]);
226	}
227
228	static uint32_t Predictor12_MIPSdspR2(const uint32_t* const left,
229	const uint32_t* const top) {
230	return ClampedAddSubtractFull(*left, top[`0`], top[-`1`]);
231	}
232
233	static uint32_t Predictor13_MIPSdspR2(const uint32_t* const left,
234	const uint32_t* const top) {
235	return ClampedAddSubtractHalf(*left, top[`0`], top[-`1`]);
236	}
237
238	// Add green to blue and red channels (i.e. perform the inverse transform of
239	// 'subtract green').
240	static void AddGreenToBlueAndRed_MIPSdspR2(const uint32_t* src, int num_pixels,
241	uint32_t* dst) {
242	uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
243	const uint32_t* const p_loop1_end = src + (num_pixels & ~`3`);
244	const uint32_t* const p_loop2_end = src + num_pixels;
245	__asm__ volatile (
246	".set push \n\t"
247	".set noreorder \n\t"
248	"beq %[src], %[p_loop1_end], 3f \n\t"
249	" nop \n\t"
250	"0: \n\t"
251	"lw %[temp0], 0(%[src]) \n\t"
252	"lw %[temp1], 4(%[src]) \n\t"
253	"lw %[temp2], 8(%[src]) \n\t"
254	"lw %[temp3], 12(%[src]) \n\t"
255	"ext %[temp4], %[temp0], 8, 8 \n\t"
256	"ext %[temp5], %[temp1], 8, 8 \n\t"
257	"ext %[temp6], %[temp2], 8, 8 \n\t"
258	"ext %[temp7], %[temp3], 8, 8 \n\t"
259	"addiu %[src], %[src], 16 \n\t"
260	"addiu %[dst], %[dst], 16 \n\t"
261	"replv.ph %[temp4], %[temp4] \n\t"
262	"replv.ph %[temp5], %[temp5] \n\t"
263	"replv.ph %[temp6], %[temp6] \n\t"
264	"replv.ph %[temp7], %[temp7] \n\t"
265	"addu.qb %[temp0], %[temp0], %[temp4] \n\t"
266	"addu.qb %[temp1], %[temp1], %[temp5] \n\t"
267	"addu.qb %[temp2], %[temp2], %[temp6] \n\t"
268	"addu.qb %[temp3], %[temp3], %[temp7] \n\t"
269	"sw %[temp0], -16(%[dst]) \n\t"
270	"sw %[temp1], -12(%[dst]) \n\t"
271	"sw %[temp2], -8(%[dst]) \n\t"
272	"bne %[src], %[p_loop1_end], 0b \n\t"
273	" sw %[temp3], -4(%[dst]) \n\t"
274	"3: \n\t"
275	"beq %[src], %[p_loop2_end], 2f \n\t"
276	" nop \n\t"
277	"1: \n\t"
278	"lw %[temp0], 0(%[src]) \n\t"
279	"addiu %[src], %[src], 4 \n\t"
280	"addiu %[dst], %[dst], 4 \n\t"
281	"ext %[temp4], %[temp0], 8, 8 \n\t"
282	"replv.ph %[temp4], %[temp4] \n\t"
283	"addu.qb %[temp0], %[temp0], %[temp4] \n\t"
284	"bne %[src], %[p_loop2_end], 1b \n\t"
285	" sw %[temp0], -4(%[dst]) \n\t"
286	"2: \n\t"
287	".set pop \n\t"
288	: [dst]"+&r"(dst), [src]"+&r"(src), [temp0]"=&r"(temp0),
289	[temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
290	[temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6),
291	[temp7]"=&r"(temp7)
292	: [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
293	: "memory"
294	);
295	}
296
297	static void TransformColorInverse_MIPSdspR2(const VP8LMultipliers* const m,
298	const uint32_t* src, int num_pixels,
299	uint32_t* dst) {
300	int temp0, temp1, temp2, temp3, temp4, temp5;
301	uint32_t argb, argb1, new_red;
302	const uint32_t G_to_R = m->green_to_red_;
303	const uint32_t G_to_B = m->green_to_blue_;
304	const uint32_t R_to_B = m->red_to_blue_;
305	const uint32_t* const p_loop_end = src + (num_pixels & ~`1`);
306	__asm__ volatile (
307	".set push \n\t"
308	".set noreorder \n\t"
309	"beq %[src], %[p_loop_end], 1f \n\t"
310	" nop \n\t"
311	"replv.ph %[temp0], %[G_to_R] \n\t"
312	"replv.ph %[temp1], %[G_to_B] \n\t"
313	"replv.ph %[temp2], %[R_to_B] \n\t"
314	"shll.ph %[temp0], %[temp0], 8 \n\t"
315	"shll.ph %[temp1], %[temp1], 8 \n\t"
316	"shll.ph %[temp2], %[temp2], 8 \n\t"
317	"shra.ph %[temp0], %[temp0], 8 \n\t"
318	"shra.ph %[temp1], %[temp1], 8 \n\t"
319	"shra.ph %[temp2], %[temp2], 8 \n\t"
320	"0: \n\t"
321	"lw %[argb], 0(%[src]) \n\t"
322	"lw %[argb1], 4(%[src]) \n\t"
323	"sw %[argb], 0(%[dst]) \n\t"
324	"sw %[argb1], 4(%[dst]) \n\t"
325	"addiu %[src], %[src], 8 \n\t"
326	"addiu %[dst], %[dst], 8 \n\t"
327	"precrq.qb.ph %[temp3], %[argb], %[argb1] \n\t"
328	"preceu.ph.qbra %[temp3], %[temp3] \n\t"
329	"shll.ph %[temp3], %[temp3], 8 \n\t"
330	"shra.ph %[temp3], %[temp3], 8 \n\t"
331	"mul.ph %[temp5], %[temp3], %[temp0] \n\t"
332	"mul.ph %[temp3], %[temp3], %[temp1] \n\t"
333	"precrq.ph.w %[new_red], %[argb], %[argb1] \n\t"
334	"ins %[argb1], %[argb], 16, 16 \n\t"
335	"shra.ph %[temp5], %[temp5], 5 \n\t"
336	"shra.ph %[temp3], %[temp3], 5 \n\t"
337	"addu.ph %[new_red], %[new_red], %[temp5] \n\t"
338	"addu.ph %[argb1], %[argb1], %[temp3] \n\t"
339	"preceu.ph.qbra %[temp5], %[new_red] \n\t"
340	"shll.ph %[temp4], %[temp5], 8 \n\t"
341	"shra.ph %[temp4], %[temp4], 8 \n\t"
342	"mul.ph %[temp4], %[temp4], %[temp2] \n\t"
343	"sb %[temp5], -2(%[dst]) \n\t"
344	"sra %[temp5], %[temp5], 16 \n\t"
345	"shra.ph %[temp4], %[temp4], 5 \n\t"
346	"addu.ph %[argb1], %[argb1], %[temp4] \n\t"
347	"preceu.ph.qbra %[temp3], %[argb1] \n\t"
348	"sb %[temp5], -6(%[dst]) \n\t"
349	"sb %[temp3], -4(%[dst]) \n\t"
350	"sra %[temp3], %[temp3], 16 \n\t"
351	"bne %[src], %[p_loop_end], 0b \n\t"
352	" sb %[temp3], -8(%[dst]) \n\t"
353	"1: \n\t"
354	".set pop \n\t"
355	: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
356	[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
357	[new_red]"=&r"(new_red), [argb]"=&r"(argb),
358	[argb1]"=&r"(argb1), [dst]"+&r"(dst), [src]"+&r"(src)
359	: [G_to_R]"r"(G_to_R), [R_to_B]"r"(R_to_B),
360	[G_to_B]"r"(G_to_B), [p_loop_end]"r"(p_loop_end)
361	: "memory", "hi", "lo"
362	);
363
364	// Fall-back to C-version for left-overs.
365	if (num_pixels & `1`) VP8LTransformColorInverse_C(m, src, `1`, dst);
366	}
367
368	static void ConvertBGRAToRGB_MIPSdspR2(const uint32_t* src,
369	int num_pixels, uint8_t* dst) {
370	int temp0, temp1, temp2, temp3;
371	const uint32_t* const p_loop1_end = src + (num_pixels & ~`3`);
372	const uint32_t* const p_loop2_end = src + num_pixels;
373	__asm__ volatile (
374	".set push \n\t"
375	".set noreorder \n\t"
376	"beq %[src], %[p_loop1_end], 3f \n\t"
377	" nop \n\t"
378	"0: \n\t"
379	"lw %[temp3], 12(%[src]) \n\t"
380	"lw %[temp2], 8(%[src]) \n\t"
381	"lw %[temp1], 4(%[src]) \n\t"
382	"lw %[temp0], 0(%[src]) \n\t"
383	"ins %[temp3], %[temp2], 24, 8 \n\t"
384	"sll %[temp2], %[temp2], 8 \n\t"
385	"rotr %[temp3], %[temp3], 16 \n\t"
386	"ins %[temp2], %[temp1], 0, 16 \n\t"
387	"sll %[temp1], %[temp1], 8 \n\t"
388	"wsbh %[temp3], %[temp3] \n\t"
389	"balign %[temp0], %[temp1], 1 \n\t"
390	"wsbh %[temp2], %[temp2] \n\t"
391	"wsbh %[temp0], %[temp0] \n\t"
392	"usw %[temp3], 8(%[dst]) \n\t"
393	"rotr %[temp0], %[temp0], 16 \n\t"
394	"usw %[temp2], 4(%[dst]) \n\t"
395	"addiu %[src], %[src], 16 \n\t"
396	"usw %[temp0], 0(%[dst]) \n\t"
397	"bne %[src], %[p_loop1_end], 0b \n\t"
398	" addiu %[dst], %[dst], 12 \n\t"
399	"3: \n\t"
400	"beq %[src], %[p_loop2_end], 2f \n\t"
401	" nop \n\t"
402	"1: \n\t"
403	"lw %[temp0], 0(%[src]) \n\t"
404	"addiu %[src], %[src], 4 \n\t"
405	"wsbh %[temp1], %[temp0] \n\t"
406	"addiu %[dst], %[dst], 3 \n\t"
407	"ush %[temp1], -2(%[dst]) \n\t"
408	"sra %[temp0], %[temp0], 16 \n\t"
409	"bne %[src], %[p_loop2_end], 1b \n\t"
410	" sb %[temp0], -3(%[dst]) \n\t"
411	"2: \n\t"
412	".set pop \n\t"
413	: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
414	[temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src)
415	: [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
416	: "memory"
417	);
418	}
419
420	static void ConvertBGRAToRGBA_MIPSdspR2(const uint32_t* src,
421	int num_pixels, uint8_t* dst) {
422	int temp0, temp1, temp2, temp3;
423	const uint32_t* const p_loop1_end = src + (num_pixels & ~`3`);
424	const uint32_t* const p_loop2_end = src + num_pixels;
425	__asm__ volatile (
426	".set push \n\t"
427	".set noreorder \n\t"
428	"beq %[src], %[p_loop1_end], 3f \n\t"
429	" nop \n\t"
430	"0: \n\t"
431	"lw %[temp0], 0(%[src]) \n\t"
432	"lw %[temp1], 4(%[src]) \n\t"
433	"lw %[temp2], 8(%[src]) \n\t"
434	"lw %[temp3], 12(%[src]) \n\t"
435	"wsbh %[temp0], %[temp0] \n\t"
436	"wsbh %[temp1], %[temp1] \n\t"
437	"wsbh %[temp2], %[temp2] \n\t"
438	"wsbh %[temp3], %[temp3] \n\t"
439	"addiu %[src], %[src], 16 \n\t"
440	"balign %[temp0], %[temp0], 1 \n\t"
441	"balign %[temp1], %[temp1], 1 \n\t"
442	"balign %[temp2], %[temp2], 1 \n\t"
443	"balign %[temp3], %[temp3], 1 \n\t"
444	"usw %[temp0], 0(%[dst]) \n\t"
445	"usw %[temp1], 4(%[dst]) \n\t"
446	"usw %[temp2], 8(%[dst]) \n\t"
447	"usw %[temp3], 12(%[dst]) \n\t"
448	"bne %[src], %[p_loop1_end], 0b \n\t"
449	" addiu %[dst], %[dst], 16 \n\t"
450	"3: \n\t"
451	"beq %[src], %[p_loop2_end], 2f \n\t"
452	" nop \n\t"
453	"1: \n\t"
454	"lw %[temp0], 0(%[src]) \n\t"
455	"wsbh %[temp0], %[temp0] \n\t"
456	"addiu %[src], %[src], 4 \n\t"
457	"balign %[temp0], %[temp0], 1 \n\t"
458	"usw %[temp0], 0(%[dst]) \n\t"
459	"bne %[src], %[p_loop2_end], 1b \n\t"
460	" addiu %[dst], %[dst], 4 \n\t"
461	"2: \n\t"
462	".set pop \n\t"
463	: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
464	[temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src)
465	: [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
466	: "memory"
467	);
468	}
469
470	static void ConvertBGRAToRGBA4444_MIPSdspR2(const uint32_t* src,
471	int num_pixels, uint8_t* dst) {
472	int temp0, temp1, temp2, temp3, temp4, temp5;
473	const uint32_t* const p_loop1_end = src + (num_pixels & ~`3`);
474	const uint32_t* const p_loop2_end = src + num_pixels;
475	__asm__ volatile (
476	".set push \n\t"
477	".set noreorder \n\t"
478	"beq %[src], %[p_loop1_end], 3f \n\t"
479	" nop \n\t"
480	"0: \n\t"
481	"lw %[temp0], 0(%[src]) \n\t"
482	"lw %[temp1], 4(%[src]) \n\t"
483	"lw %[temp2], 8(%[src]) \n\t"
484	"lw %[temp3], 12(%[src]) \n\t"
485	"ext %[temp4], %[temp0], 28, 4 \n\t"
486	"ext %[temp5], %[temp0], 12, 4 \n\t"
487	"ins %[temp0], %[temp4], 0, 4 \n\t"
488	"ext %[temp4], %[temp1], 28, 4 \n\t"
489	"ins %[temp0], %[temp5], 16, 4 \n\t"
490	"ext %[temp5], %[temp1], 12, 4 \n\t"
491	"ins %[temp1], %[temp4], 0, 4 \n\t"
492	"ext %[temp4], %[temp2], 28, 4 \n\t"
493	"ins %[temp1], %[temp5], 16, 4 \n\t"
494	"ext %[temp5], %[temp2], 12, 4 \n\t"
495	"ins %[temp2], %[temp4], 0, 4 \n\t"
496	"ext %[temp4], %[temp3], 28, 4 \n\t"
497	"ins %[temp2], %[temp5], 16, 4 \n\t"
498	"ext %[temp5], %[temp3], 12, 4 \n\t"
499	"ins %[temp3], %[temp4], 0, 4 \n\t"
500	"precr.qb.ph %[temp1], %[temp1], %[temp0] \n\t"
501	"ins %[temp3], %[temp5], 16, 4 \n\t"
502	"addiu %[src], %[src], 16 \n\t"
503	"precr.qb.ph %[temp3], %[temp3], %[temp2] \n\t"
504	#if (WEBP_SWAP_16BIT_CSP == 1)
505	"usw %[temp1], 0(%[dst]) \n\t"
506	"usw %[temp3], 4(%[dst]) \n\t"
507	#else
508	"wsbh %[temp1], %[temp1] \n\t"
509	"wsbh %[temp3], %[temp3] \n\t"
510	"usw %[temp1], 0(%[dst]) \n\t"
511	"usw %[temp3], 4(%[dst]) \n\t"
512	#endif
513	"bne %[src], %[p_loop1_end], 0b \n\t"
514	" addiu %[dst], %[dst], 8 \n\t"
515	"3: \n\t"
516	"beq %[src], %[p_loop2_end], 2f \n\t"
517	" nop \n\t"
518	"1: \n\t"
519	"lw %[temp0], 0(%[src]) \n\t"
520	"ext %[temp4], %[temp0], 28, 4 \n\t"
521	"ext %[temp5], %[temp0], 12, 4 \n\t"
522	"ins %[temp0], %[temp4], 0, 4 \n\t"
523	"ins %[temp0], %[temp5], 16, 4 \n\t"
524	"addiu %[src], %[src], 4 \n\t"
525	"precr.qb.ph %[temp0], %[temp0], %[temp0] \n\t"
526	#if (WEBP_SWAP_16BIT_CSP == 1)
527	"ush %[temp0], 0(%[dst]) \n\t"
528	#else
529	"wsbh %[temp0], %[temp0] \n\t"
530	"ush %[temp0], 0(%[dst]) \n\t"
531	#endif
532	"bne %[src], %[p_loop2_end], 1b \n\t"
533	" addiu %[dst], %[dst], 2 \n\t"
534	"2: \n\t"
535	".set pop \n\t"
536	: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
537	[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
538	[dst]"+&r"(dst), [src]"+&r"(src)
539	: [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
540	: "memory"
541	);
542	}
543
544	static void ConvertBGRAToRGB565_MIPSdspR2(const uint32_t* src,
545	int num_pixels, uint8_t* dst) {
546	int temp0, temp1, temp2, temp3, temp4, temp5;
547	const uint32_t* const p_loop1_end = src + (num_pixels & ~`3`);
548	const uint32_t* const p_loop2_end = src + num_pixels;
549	__asm__ volatile (
550	".set push \n\t"
551	".set noreorder \n\t"
552	"beq %[src], %[p_loop1_end], 3f \n\t"
553	" nop \n\t"
554	"0: \n\t"
555	"lw %[temp0], 0(%[src]) \n\t"
556	"lw %[temp1], 4(%[src]) \n\t"
557	"lw %[temp2], 8(%[src]) \n\t"
558	"lw %[temp3], 12(%[src]) \n\t"
559	"ext %[temp4], %[temp0], 8, 16 \n\t"
560	"ext %[temp5], %[temp0], 5, 11 \n\t"
561	"ext %[temp0], %[temp0], 3, 5 \n\t"
562	"ins %[temp4], %[temp5], 0, 11 \n\t"
563	"ext %[temp5], %[temp1], 5, 11 \n\t"
564	"ins %[temp4], %[temp0], 0, 5 \n\t"
565	"ext %[temp0], %[temp1], 8, 16 \n\t"
566	"ext %[temp1], %[temp1], 3, 5 \n\t"
567	"ins %[temp0], %[temp5], 0, 11 \n\t"
568	"ext %[temp5], %[temp2], 5, 11 \n\t"
569	"ins %[temp0], %[temp1], 0, 5 \n\t"
570	"ext %[temp1], %[temp2], 8, 16 \n\t"
571	"ext %[temp2], %[temp2], 3, 5 \n\t"
572	"ins %[temp1], %[temp5], 0, 11 \n\t"
573	"ext %[temp5], %[temp3], 5, 11 \n\t"
574	"ins %[temp1], %[temp2], 0, 5 \n\t"
575	"ext %[temp2], %[temp3], 8, 16 \n\t"
576	"ext %[temp3], %[temp3], 3, 5 \n\t"
577	"ins %[temp2], %[temp5], 0, 11 \n\t"
578	"append %[temp0], %[temp4], 16 \n\t"
579	"ins %[temp2], %[temp3], 0, 5 \n\t"
580	"addiu %[src], %[src], 16 \n\t"
581	"append %[temp2], %[temp1], 16 \n\t"
582	#if (WEBP_SWAP_16BIT_CSP == 1)
583	"usw %[temp0], 0(%[dst]) \n\t"
584	"usw %[temp2], 4(%[dst]) \n\t"
585	#else
586	"wsbh %[temp0], %[temp0] \n\t"
587	"wsbh %[temp2], %[temp2] \n\t"
588	"usw %[temp0], 0(%[dst]) \n\t"
589	"usw %[temp2], 4(%[dst]) \n\t"
590	#endif
591	"bne %[src], %[p_loop1_end], 0b \n\t"
592	" addiu %[dst], %[dst], 8 \n\t"
593	"3: \n\t"
594	"beq %[src], %[p_loop2_end], 2f \n\t"
595	" nop \n\t"
596	"1: \n\t"
597	"lw %[temp0], 0(%[src]) \n\t"
598	"ext %[temp4], %[temp0], 8, 16 \n\t"
599	"ext %[temp5], %[temp0], 5, 11 \n\t"
600	"ext %[temp0], %[temp0], 3, 5 \n\t"
601	"ins %[temp4], %[temp5], 0, 11 \n\t"
602	"addiu %[src], %[src], 4 \n\t"
603	"ins %[temp4], %[temp0], 0, 5 \n\t"
604	#if (WEBP_SWAP_16BIT_CSP == 1)
605	"ush %[temp4], 0(%[dst]) \n\t"
606	#else
607	"wsbh %[temp4], %[temp4] \n\t"
608	"ush %[temp4], 0(%[dst]) \n\t"
609	#endif
610	"bne %[src], %[p_loop2_end], 1b \n\t"
611	" addiu %[dst], %[dst], 2 \n\t"
612	"2: \n\t"
613	".set pop \n\t"
614	: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
615	[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
616	[dst]"+&r"(dst), [src]"+&r"(src)
617	: [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
618	: "memory"
619	);
620	}
621
622	static void ConvertBGRAToBGR_MIPSdspR2(const uint32_t* src,
623	int num_pixels, uint8_t* dst) {
624	int temp0, temp1, temp2, temp3;
625	const uint32_t* const p_loop1_end = src + (num_pixels & ~`3`);
626	const uint32_t* const p_loop2_end = src + num_pixels;
627	__asm__ volatile (
628	".set push \n\t"
629	".set noreorder \n\t"
630	"beq %[src], %[p_loop1_end], 3f \n\t"
631	" nop \n\t"
632	"0: \n\t"
633	"lw %[temp0], 0(%[src]) \n\t"
634	"lw %[temp1], 4(%[src]) \n\t"
635	"lw %[temp2], 8(%[src]) \n\t"
636	"lw %[temp3], 12(%[src]) \n\t"
637	"ins %[temp0], %[temp1], 24, 8 \n\t"
638	"sra %[temp1], %[temp1], 8 \n\t"
639	"ins %[temp1], %[temp2], 16, 16 \n\t"
640	"sll %[temp2], %[temp2], 8 \n\t"
641	"balign %[temp3], %[temp2], 1 \n\t"
642	"addiu %[src], %[src], 16 \n\t"
643	"usw %[temp0], 0(%[dst]) \n\t"
644	"usw %[temp1], 4(%[dst]) \n\t"
645	"usw %[temp3], 8(%[dst]) \n\t"
646	"bne %[src], %[p_loop1_end], 0b \n\t"
647	" addiu %[dst], %[dst], 12 \n\t"
648	"3: \n\t"
649	"beq %[src], %[p_loop2_end], 2f \n\t"
650	" nop \n\t"
651	"1: \n\t"
652	"lw %[temp0], 0(%[src]) \n\t"
653	"addiu %[src], %[src], 4 \n\t"
654	"addiu %[dst], %[dst], 3 \n\t"
655	"ush %[temp0], -3(%[dst]) \n\t"
656	"sra %[temp0], %[temp0], 16 \n\t"
657	"bne %[src], %[p_loop2_end], 1b \n\t"
658	" sb %[temp0], -1(%[dst]) \n\t"
659	"2: \n\t"
660	".set pop \n\t"
661	: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
662	[temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src)
663	: [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
664	: "memory"
665	);
666	}
667
668	//------------------------------------------------------------------------------
669	// Entry point
670
671	extern void VP8LDspInitMIPSdspR2(void);
672
673	WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitMIPSdspR2(void) {
674	VP8LMapColor32b = MapARGB_MIPSdspR2;
675	VP8LMapColor8b = MapAlpha_MIPSdspR2;
676
677	VP8LPredictors[`5`] = Predictor5_MIPSdspR2;
678	VP8LPredictors[`6`] = Predictor6_MIPSdspR2;
679	VP8LPredictors[`7`] = Predictor7_MIPSdspR2;
680	VP8LPredictors[`8`] = Predictor8_MIPSdspR2;
681	VP8LPredictors[`9`] = Predictor9_MIPSdspR2;
682	VP8LPredictors[`10`] = Predictor10_MIPSdspR2;
683	VP8LPredictors[`11`] = Predictor11_MIPSdspR2;
684	VP8LPredictors[`12`] = Predictor12_MIPSdspR2;
685	VP8LPredictors[`13`] = Predictor13_MIPSdspR2;
686
687	VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed_MIPSdspR2;
688	VP8LTransformColorInverse = TransformColorInverse_MIPSdspR2;
689
690	VP8LConvertBGRAToRGB = ConvertBGRAToRGB_MIPSdspR2;
691	VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA_MIPSdspR2;
692	VP8LConvertBGRAToRGBA4444 = ConvertBGRAToRGBA4444_MIPSdspR2;
693	VP8LConvertBGRAToRGB565 = ConvertBGRAToRGB565_MIPSdspR2;
694	VP8LConvertBGRAToBGR = ConvertBGRAToBGR_MIPSdspR2;
695	}
696
697	#else // !WEBP_USE_MIPS_DSP_R2
698
699	WEBP_DSP_INIT_STUB(VP8LDspInitMIPSdspR2)
700
701	#endif // WEBP_USE_MIPS_DSP_R2
702

Browse the source code of Godot/thirdparty/libwebp/src/dsp/lossless_mips_dsp_r2.c