vis_helper.c source code [qemu/target/sparc/vis_helper.c]

1	/*
2	* VIS op helpers
3	*
4	* Copyright (c) 2003-2005 Fabrice Bellard
5	*
6	* This library is free software; you can redistribute it and/or
7	* modify it under the terms of the GNU Lesser General Public
8	* License as published by the Free Software Foundation; either
9	* version 2 of the License, or (at your option) any later version.
10	*
11	* This library is distributed in the hope that it will be useful,
12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	* Lesser General Public License for more details.
15	*
16	* You should have received a copy of the GNU Lesser General Public
17	* License along with this library; if not, see <http://www.gnu.org/licenses/>.
18	*/
19
20	#include "qemu/osdep.h"
21	#include "cpu.h"
22	#include "exec/helper-proto.h"
23
24	/ This function uses non-native bit order /
25	#define GET_FIELD(X, FROM, TO) \
26	((X) >> (63 - (TO)) & ((1ULL << ((TO) - (FROM) + 1)) - 1))
27
28	/ This function uses the order in the manuals, i.e. bit 0 is 2^0 /
29	#define GET_FIELD_SP(X, FROM, TO) \
30	GET_FIELD(X, 63 - (TO), 63 - (FROM))
31
32	target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize)
33	{
34	return (GET_FIELD_SP(pixel_addr, `60`, `63`) << (`17` + `2` * cubesize)) \|
35	(GET_FIELD_SP(pixel_addr, `39`, `39` + cubesize - `1`) << (`17` + cubesize)) \|
36	(GET_FIELD_SP(pixel_addr, `17` + cubesize - `1`, `17`) << `17`) \|
37	(GET_FIELD_SP(pixel_addr, `56`, `59`) << `13`) \|
38	(GET_FIELD_SP(pixel_addr, `35`, `38`) << `9`) \|
39	(GET_FIELD_SP(pixel_addr, `13`, `16`) << `5`) \|
40	(((pixel_addr >> `55`) & `1`) << `4`) \|
41	(GET_FIELD_SP(pixel_addr, `33`, `34`) << `2`) \|
42	GET_FIELD_SP(pixel_addr, `11`, `12`);
43	}
44
45	#ifdef HOST_WORDS_BIGENDIAN
46	#define VIS_B64(n) b[7 - (n)]
47	#define VIS_W64(n) w[3 - (n)]
48	#define VIS_SW64(n) sw[3 - (n)]
49	#define VIS_L64(n) l[1 - (n)]
50	#define VIS_B32(n) b[3 - (n)]
51	#define VIS_W32(n) w[1 - (n)]
52	#else
53	#define VIS_B64(n) b[n]
54	#define VIS_W64(n) w[n]
55	#define VIS_SW64(n) sw[n]
56	#define VIS_L64(n) l[n]
57	#define VIS_B32(n) b[n]
58	#define VIS_W32(n) w[n]
59	#endif
60
61	typedef union {
62	uint8_t b[`8`];
63	uint16_t w[`4`];
64	int16_t sw[`4`];
65	uint32_t l[`2`];
66	uint64_t ll;
67	float64 d;
68	} VIS64;
69
70	typedef union {
71	uint8_t b[`4`];
72	uint16_t w[`2`];
73	uint32_t l;
74	float32 f;
75	} VIS32;
76
77	uint64_t helper_fpmerge(uint64_t src1, uint64_t src2)
78	{
79	VIS64 s, d;
80
81	s.ll = src1;
82	d.ll = src2;
83
84	/ Reverse calculation order to handle overlap /
85	d.VIS_B64(`7`) = s.VIS_B64(`3`);
86	d.VIS_B64(`6`) = d.VIS_B64(`3`);
87	d.VIS_B64(`5`) = s.VIS_B64(`2`);
88	d.VIS_B64(`4`) = d.VIS_B64(`2`);
89	d.VIS_B64(`3`) = s.VIS_B64(`1`);
90	d.VIS_B64(`2`) = d.VIS_B64(`1`);
91	d.VIS_B64(`1`) = s.VIS_B64(`0`);
92	/ d.VIS_B64(0) = d.VIS_B64(0); /
93
94	return d.ll;
95	}
96
97	uint64_t helper_fmul8x16(uint64_t src1, uint64_t src2)
98	{
99	VIS64 s, d;
100	uint32_t tmp;
101
102	s.ll = src1;
103	d.ll = src2;
104
105	#define PMUL(r) \
106	tmp = (int32_t)d.VIS_SW64(r) * (int32_t)s.VIS_B64(r); \
107	if ((tmp & 0xff) > 0x7f) { \
108	tmp += 0x100; \
109	} \
110	d.VIS_W64(r) = tmp >> 8;
111
112	PMUL(`0`);
113	PMUL(`1`);
114	PMUL(`2`);
115	PMUL(`3`);
116	#undef PMUL
117
118	return d.ll;
119	}
120
121	uint64_t helper_fmul8x16al(uint64_t src1, uint64_t src2)
122	{
123	VIS64 s, d;
124	uint32_t tmp;
125
126	s.ll = src1;
127	d.ll = src2;
128
129	#define PMUL(r) \
130	tmp = (int32_t)d.VIS_SW64(1) * (int32_t)s.VIS_B64(r); \
131	if ((tmp & 0xff) > 0x7f) { \
132	tmp += 0x100; \
133	} \
134	d.VIS_W64(r) = tmp >> 8;
135
136	PMUL(`0`);
137	PMUL(`1`);
138	PMUL(`2`);
139	PMUL(`3`);
140	#undef PMUL
141
142	return d.ll;
143	}
144
145	uint64_t helper_fmul8x16au(uint64_t src1, uint64_t src2)
146	{
147	VIS64 s, d;
148	uint32_t tmp;
149
150	s.ll = src1;
151	d.ll = src2;
152
153	#define PMUL(r) \
154	tmp = (int32_t)d.VIS_SW64(0) * (int32_t)s.VIS_B64(r); \
155	if ((tmp & 0xff) > 0x7f) { \
156	tmp += 0x100; \
157	} \
158	d.VIS_W64(r) = tmp >> 8;
159
160	PMUL(`0`);
161	PMUL(`1`);
162	PMUL(`2`);
163	PMUL(`3`);
164	#undef PMUL
165
166	return d.ll;
167	}
168
169	uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2)
170	{
171	VIS64 s, d;
172	uint32_t tmp;
173
174	s.ll = src1;
175	d.ll = src2;
176
177	#define PMUL(r) \
178	tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8); \
179	if ((tmp & 0xff) > 0x7f) { \
180	tmp += 0x100; \
181	} \
182	d.VIS_W64(r) = tmp >> 8;
183
184	PMUL(`0`);
185	PMUL(`1`);
186	PMUL(`2`);
187	PMUL(`3`);
188	#undef PMUL
189
190	return d.ll;
191	}
192
193	uint64_t helper_fmul8ulx16(uint64_t src1, uint64_t src2)
194	{
195	VIS64 s, d;
196	uint32_t tmp;
197
198	s.ll = src1;
199	d.ll = src2;
200
201	#define PMUL(r) \
202	tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2)); \
203	if ((tmp & 0xff) > 0x7f) { \
204	tmp += 0x100; \
205	} \
206	d.VIS_W64(r) = tmp >> 8;
207
208	PMUL(`0`);
209	PMUL(`1`);
210	PMUL(`2`);
211	PMUL(`3`);
212	#undef PMUL
213
214	return d.ll;
215	}
216
217	uint64_t helper_fmuld8sux16(uint64_t src1, uint64_t src2)
218	{
219	VIS64 s, d;
220	uint32_t tmp;
221
222	s.ll = src1;
223	d.ll = src2;
224
225	#define PMUL(r) \
226	tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8); \
227	if ((tmp & 0xff) > 0x7f) { \
228	tmp += 0x100; \
229	} \
230	d.VIS_L64(r) = tmp;
231
232	/ Reverse calculation order to handle overlap /
233	PMUL(`1`);
234	PMUL(`0`);
235	#undef PMUL
236
237	return d.ll;
238	}
239
240	uint64_t helper_fmuld8ulx16(uint64_t src1, uint64_t src2)
241	{
242	VIS64 s, d;
243	uint32_t tmp;
244
245	s.ll = src1;
246	d.ll = src2;
247
248	#define PMUL(r) \
249	tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2)); \
250	if ((tmp & 0xff) > 0x7f) { \
251	tmp += 0x100; \
252	} \
253	d.VIS_L64(r) = tmp;
254
255	/ Reverse calculation order to handle overlap /
256	PMUL(`1`);
257	PMUL(`0`);
258	#undef PMUL
259
260	return d.ll;
261	}
262
263	uint64_t helper_fexpand(uint64_t src1, uint64_t src2)
264	{
265	VIS32 s;
266	VIS64 d;
267
268	s.l = (uint32_t)src1;
269	d.ll = src2;
270	d.VIS_W64(`0`) = s.VIS_B32(`0`) << `4`;
271	d.VIS_W64(`1`) = s.VIS_B32(`1`) << `4`;
272	d.VIS_W64(`2`) = s.VIS_B32(`2`) << `4`;
273	d.VIS_W64(`3`) = s.VIS_B32(`3`) << `4`;
274
275	return d.ll;
276	}
277
278	#define VIS_HELPER(name, F) \
279	uint64_t name##16(uint64_t src1, uint64_t src2) \
280	{ \
281	VIS64 s, d; \
282	\
283	s.ll = src1; \
284	d.ll = src2; \
285	\
286	d.VIS_W64(0) = F(d.VIS_W64(0), s.VIS_W64(0)); \
287	d.VIS_W64(1) = F(d.VIS_W64(1), s.VIS_W64(1)); \
288	d.VIS_W64(2) = F(d.VIS_W64(2), s.VIS_W64(2)); \
289	d.VIS_W64(3) = F(d.VIS_W64(3), s.VIS_W64(3)); \
290	\
291	return d.ll; \
292	} \
293	\
294	uint32_t name##16s(uint32_t src1, uint32_t src2) \
295	{ \
296	VIS32 s, d; \
297	\
298	s.l = src1; \
299	d.l = src2; \
300	\
301	d.VIS_W32(0) = F(d.VIS_W32(0), s.VIS_W32(0)); \
302	d.VIS_W32(1) = F(d.VIS_W32(1), s.VIS_W32(1)); \
303	\
304	return d.l; \
305	} \
306	\
307	uint64_t name##32(uint64_t src1, uint64_t src2) \
308	{ \
309	VIS64 s, d; \
310	\
311	s.ll = src1; \
312	d.ll = src2; \
313	\
314	d.VIS_L64(0) = F(d.VIS_L64(0), s.VIS_L64(0)); \
315	d.VIS_L64(1) = F(d.VIS_L64(1), s.VIS_L64(1)); \
316	\
317	return d.ll; \
318	} \
319	\
320	uint32_t name##32s(uint32_t src1, uint32_t src2) \
321	{ \
322	VIS32 s, d; \
323	\
324	s.l = src1; \
325	d.l = src2; \
326	\
327	d.l = F(d.l, s.l); \
328	\
329	return d.l; \
330	}
331
332	#define FADD(a, b) ((a) + (b))
333	#define FSUB(a, b) ((a) - (b))
334	VIS_HELPER(helper_fpadd, FADD)
335	VIS_HELPER(helper_fpsub, FSUB)
336
337	#define VIS_CMPHELPER(name, F) \
338	uint64_t name##16(uint64_t src1, uint64_t src2) \
339	{ \
340	VIS64 s, d; \
341	\
342	s.ll = src1; \
343	d.ll = src2; \
344	\
345	d.VIS_W64(0) = F(s.VIS_W64(0), d.VIS_W64(0)) ? 1 : 0; \
346	d.VIS_W64(0) \|= F(s.VIS_W64(1), d.VIS_W64(1)) ? 2 : 0; \
347	d.VIS_W64(0) \|= F(s.VIS_W64(2), d.VIS_W64(2)) ? 4 : 0; \
348	d.VIS_W64(0) \|= F(s.VIS_W64(3), d.VIS_W64(3)) ? 8 : 0; \
349	d.VIS_W64(1) = d.VIS_W64(2) = d.VIS_W64(3) = 0; \
350	\
351	return d.ll; \
352	} \
353	\
354	uint64_t name##32(uint64_t src1, uint64_t src2) \
355	{ \
356	VIS64 s, d; \
357	\
358	s.ll = src1; \
359	d.ll = src2; \
360	\
361	d.VIS_L64(0) = F(s.VIS_L64(0), d.VIS_L64(0)) ? 1 : 0; \
362	d.VIS_L64(0) \|= F(s.VIS_L64(1), d.VIS_L64(1)) ? 2 : 0; \
363	d.VIS_L64(1) = 0; \
364	\
365	return d.ll; \
366	}
367
368	#define FCMPGT(a, b) ((a) > (b))
369	#define FCMPEQ(a, b) ((a) == (b))
370	#define FCMPLE(a, b) ((a) <= (b))
371	#define FCMPNE(a, b) ((a) != (b))
372
373	VIS_CMPHELPER(helper_fcmpgt, FCMPGT)
374	VIS_CMPHELPER(helper_fcmpeq, FCMPEQ)
375	VIS_CMPHELPER(helper_fcmple, FCMPLE)
376	VIS_CMPHELPER(helper_fcmpne, FCMPNE)
377
378	uint64_t helper_pdist(uint64_t sum, uint64_t src1, uint64_t src2)
379	{
380	int i;
381	for (i = `0`; i < `8`; i++) {
382	int s1, s2;
383
384	s1 = (src1 >> (`56` - (i * `8`))) & `0xff`;
385	s2 = (src2 >> (`56` - (i * `8`))) & `0xff`;
386
387	/ Absolute value of difference. /
388	s1 -= s2;
389	if (s1 < `0`) {
390	s1 = -s1;
391	}
392
393	sum += s1;
394	}
395
396	return sum;
397	}
398
399	uint32_t helper_fpack16(uint64_t gsr, uint64_t rs2)
400	{
401	int scale = (gsr >> `3`) & `0xf`;
402	uint32_t ret = `0`;
403	int byte;
404
405	for (byte = `0`; byte < `4`; byte++) {
406	uint32_t val;
407	int16_t src = rs2 >> (byte * `16`);
408	int32_t scaled = src << scale;
409	int32_t from_fixed = scaled >> `7`;
410
411	val = (from_fixed < `0` ? `0` :
412	from_fixed > `255` ? `255` : from_fixed);
413
414	ret \|= val << (`8` * byte);
415	}
416
417	return ret;
418	}
419
420	uint64_t helper_fpack32(uint64_t gsr, uint64_t rs1, uint64_t rs2)
421	{
422	int scale = (gsr >> `3`) & `0x1f`;
423	uint64_t ret = `0`;
424	int word;
425
426	ret = (rs1 << `8`) & ~(`0x000000ff000000ffULL`);
427	for (word = `0`; word < `2`; word++) {
428	uint64_t val;
429	int32_t src = rs2 >> (word * `32`);
430	int64_t scaled = (int64_t)src << scale;
431	int64_t from_fixed = scaled >> `23`;
432
433	val = (from_fixed < `0` ? `0` :
434	(from_fixed > `255`) ? `255` : from_fixed);
435
436	ret \|= val << (`32` * word);
437	}
438
439	return ret;
440	}
441
442	uint32_t helper_fpackfix(uint64_t gsr, uint64_t rs2)
443	{
444	int scale = (gsr >> `3`) & `0x1f`;
445	uint32_t ret = `0`;
446	int word;
447
448	for (word = `0`; word < `2`; word++) {
449	uint32_t val;
450	int32_t src = rs2 >> (word * `32`);
451	int64_t scaled = (int64_t)src << scale;
452	int64_t from_fixed = scaled >> `16`;
453
454	val = (from_fixed < -`32768` ? -`32768` :
455	from_fixed > `32767` ? `32767` : from_fixed);
456
457	ret \|= (val & `0xffff`) << (word * `16`);
458	}
459
460	return ret;
461	}
462
463	uint64_t helper_bshuffle(uint64_t gsr, uint64_t src1, uint64_t src2)
464	{
465	union {
466	uint64_t ll[`2`];
467	uint8_t b[`16`];
468	} s;
469	VIS64 r;
470	uint32_t i, mask, host;
471
472	/ Set up S such that we can index across all of the bytes. /
473	#ifdef HOST_WORDS_BIGENDIAN
474	s.ll[`0`] = src1;
475	s.ll[`1`] = src2;
476	host = `0`;
477	#else
478	s.ll[`1`] = src1;
479	s.ll[`0`] = src2;
480	host = `15`;
481	#endif
482	mask = gsr >> `32`;
483
484	for (i = `0`; i < `8`; ++i) {
485	unsigned e = (mask >> (`28` - i*`4`)) & `0xf`;
486	r.VIS_B64(i) = s.b[e ^ host];
487	}
488
489	return r.ll;
490	}
491

Browse the source code of qemu/target/sparc/vis_helper.c