1/*
2 * QEMU TCG support -- s390x vector string instruction support
3 *
4 * Copyright (C) 2019 Red Hat Inc
5 *
6 * Authors:
7 * David Hildenbrand <david@redhat.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
11 */
12#include "qemu/osdep.h"
13#include "qemu-common.h"
14#include "cpu.h"
15#include "internal.h"
16#include "vec.h"
17#include "tcg/tcg.h"
18#include "tcg/tcg-gvec-desc.h"
19#include "exec/helper-proto.h"
20
21/*
22 * Returns a bit set in the MSB of each element that is zero,
23 * as defined by the mask.
24 */
25static inline uint64_t zero_search(uint64_t a, uint64_t mask)
26{
27 return ~(((a & mask) + mask) | a | mask);
28}
29
30/*
31 * Returns a bit set in the MSB of each element that is not zero,
32 * as defined by the mask.
33 */
34static inline uint64_t nonzero_search(uint64_t a, uint64_t mask)
35{
36 return (((a & mask) + mask) | a) & ~mask;
37}
38
39/*
40 * Returns the byte offset for the first match, or 16 for no match.
41 */
42static inline int match_index(uint64_t c0, uint64_t c1)
43{
44 return (c0 ? clz64(c0) : clz64(c1) + 64) >> 3;
45}
46
47/*
48 * Returns the number of bits composing one element.
49 */
50static uint8_t get_element_bits(uint8_t es)
51{
52 return (1 << es) * BITS_PER_BYTE;
53}
54
55/*
56 * Returns the bitmask for a single element.
57 */
58static uint64_t get_single_element_mask(uint8_t es)
59{
60 return -1ull >> (64 - get_element_bits(es));
61}
62
63/*
64 * Returns the bitmask for a single element (excluding the MSB).
65 */
66static uint64_t get_single_element_lsbs_mask(uint8_t es)
67{
68 return -1ull >> (65 - get_element_bits(es));
69}
70
71/*
72 * Returns the bitmasks for multiple elements (excluding the MSBs).
73 */
74static uint64_t get_element_lsbs_mask(uint8_t es)
75{
76 return dup_const(es, get_single_element_lsbs_mask(es));
77}
78
79static int vfae(void *v1, const void *v2, const void *v3, bool in,
80 bool rt, bool zs, uint8_t es)
81{
82 const uint64_t mask = get_element_lsbs_mask(es);
83 const int bits = get_element_bits(es);
84 uint64_t a0, a1, b0, b1, e0, e1, t0, t1, z0, z1;
85 uint64_t first_zero = 16;
86 uint64_t first_equal;
87 int i;
88
89 a0 = s390_vec_read_element64(v2, 0);
90 a1 = s390_vec_read_element64(v2, 1);
91 b0 = s390_vec_read_element64(v3, 0);
92 b1 = s390_vec_read_element64(v3, 1);
93 e0 = 0;
94 e1 = 0;
95 /* compare against equality with every other element */
96 for (i = 0; i < 64; i += bits) {
97 t0 = rol64(b0, i);
98 t1 = rol64(b1, i);
99 e0 |= zero_search(a0 ^ t0, mask);
100 e0 |= zero_search(a0 ^ t1, mask);
101 e1 |= zero_search(a1 ^ t0, mask);
102 e1 |= zero_search(a1 ^ t1, mask);
103 }
104 /* invert the result if requested - invert only the MSBs */
105 if (in) {
106 e0 = ~e0 & ~mask;
107 e1 = ~e1 & ~mask;
108 }
109 first_equal = match_index(e0, e1);
110
111 if (zs) {
112 z0 = zero_search(a0, mask);
113 z1 = zero_search(a1, mask);
114 first_zero = match_index(z0, z1);
115 }
116
117 if (rt) {
118 e0 = (e0 >> (bits - 1)) * get_single_element_mask(es);
119 e1 = (e1 >> (bits - 1)) * get_single_element_mask(es);
120 s390_vec_write_element64(v1, 0, e0);
121 s390_vec_write_element64(v1, 1, e1);
122 } else {
123 s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero));
124 s390_vec_write_element64(v1, 1, 0);
125 }
126
127 if (first_zero == 16 && first_equal == 16) {
128 return 3; /* no match */
129 } else if (first_zero == 16) {
130 return 1; /* matching elements, no match for zero */
131 } else if (first_equal < first_zero) {
132 return 2; /* matching elements before match for zero */
133 }
134 return 0; /* match for zero */
135}
136
137#define DEF_VFAE_HELPER(BITS) \
138void HELPER(gvec_vfae##BITS)(void *v1, const void *v2, const void *v3, \
139 uint32_t desc) \
140{ \
141 const bool in = extract32(simd_data(desc), 3, 1); \
142 const bool rt = extract32(simd_data(desc), 2, 1); \
143 const bool zs = extract32(simd_data(desc), 1, 1); \
144 \
145 vfae(v1, v2, v3, in, rt, zs, MO_##BITS); \
146}
147DEF_VFAE_HELPER(8)
148DEF_VFAE_HELPER(16)
149DEF_VFAE_HELPER(32)
150
151#define DEF_VFAE_CC_HELPER(BITS) \
152void HELPER(gvec_vfae_cc##BITS)(void *v1, const void *v2, const void *v3, \
153 CPUS390XState *env, uint32_t desc) \
154{ \
155 const bool in = extract32(simd_data(desc), 3, 1); \
156 const bool rt = extract32(simd_data(desc), 2, 1); \
157 const bool zs = extract32(simd_data(desc), 1, 1); \
158 \
159 env->cc_op = vfae(v1, v2, v3, in, rt, zs, MO_##BITS); \
160}
161DEF_VFAE_CC_HELPER(8)
162DEF_VFAE_CC_HELPER(16)
163DEF_VFAE_CC_HELPER(32)
164
165static int vfee(void *v1, const void *v2, const void *v3, bool zs, uint8_t es)
166{
167 const uint64_t mask = get_element_lsbs_mask(es);
168 uint64_t a0, a1, b0, b1, e0, e1, z0, z1;
169 uint64_t first_zero = 16;
170 uint64_t first_equal;
171
172 a0 = s390_vec_read_element64(v2, 0);
173 a1 = s390_vec_read_element64(v2, 1);
174 b0 = s390_vec_read_element64(v3, 0);
175 b1 = s390_vec_read_element64(v3, 1);
176 e0 = zero_search(a0 ^ b0, mask);
177 e1 = zero_search(a1 ^ b1, mask);
178 first_equal = match_index(e0, e1);
179
180 if (zs) {
181 z0 = zero_search(a0, mask);
182 z1 = zero_search(a1, mask);
183 first_zero = match_index(z0, z1);
184 }
185
186 s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero));
187 s390_vec_write_element64(v1, 1, 0);
188 if (first_zero == 16 && first_equal == 16) {
189 return 3; /* no match */
190 } else if (first_zero == 16) {
191 return 1; /* matching elements, no match for zero */
192 } else if (first_equal < first_zero) {
193 return 2; /* matching elements before match for zero */
194 }
195 return 0; /* match for zero */
196}
197
198#define DEF_VFEE_HELPER(BITS) \
199void HELPER(gvec_vfee##BITS)(void *v1, const void *v2, const void *v3, \
200 uint32_t desc) \
201{ \
202 const bool zs = extract32(simd_data(desc), 1, 1); \
203 \
204 vfee(v1, v2, v3, zs, MO_##BITS); \
205}
206DEF_VFEE_HELPER(8)
207DEF_VFEE_HELPER(16)
208DEF_VFEE_HELPER(32)
209
210#define DEF_VFEE_CC_HELPER(BITS) \
211void HELPER(gvec_vfee_cc##BITS)(void *v1, const void *v2, const void *v3, \
212 CPUS390XState *env, uint32_t desc) \
213{ \
214 const bool zs = extract32(simd_data(desc), 1, 1); \
215 \
216 env->cc_op = vfee(v1, v2, v3, zs, MO_##BITS); \
217}
218DEF_VFEE_CC_HELPER(8)
219DEF_VFEE_CC_HELPER(16)
220DEF_VFEE_CC_HELPER(32)
221
222static int vfene(void *v1, const void *v2, const void *v3, bool zs, uint8_t es)
223{
224 const uint64_t mask = get_element_lsbs_mask(es);
225 uint64_t a0, a1, b0, b1, e0, e1, z0, z1;
226 uint64_t first_zero = 16;
227 uint64_t first_inequal;
228 bool smaller = false;
229
230 a0 = s390_vec_read_element64(v2, 0);
231 a1 = s390_vec_read_element64(v2, 1);
232 b0 = s390_vec_read_element64(v3, 0);
233 b1 = s390_vec_read_element64(v3, 1);
234 e0 = nonzero_search(a0 ^ b0, mask);
235 e1 = nonzero_search(a1 ^ b1, mask);
236 first_inequal = match_index(e0, e1);
237
238 /* identify the smaller element */
239 if (first_inequal < 16) {
240 uint8_t enr = first_inequal / (1 << es);
241 uint32_t a = s390_vec_read_element(v2, enr, es);
242 uint32_t b = s390_vec_read_element(v3, enr, es);
243
244 smaller = a < b;
245 }
246
247 if (zs) {
248 z0 = zero_search(a0, mask);
249 z1 = zero_search(a1, mask);
250 first_zero = match_index(z0, z1);
251 }
252
253 s390_vec_write_element64(v1, 0, MIN(first_inequal, first_zero));
254 s390_vec_write_element64(v1, 1, 0);
255 if (first_zero == 16 && first_inequal == 16) {
256 return 3;
257 } else if (first_zero < first_inequal) {
258 return 0;
259 }
260 return smaller ? 1 : 2;
261}
262
263#define DEF_VFENE_HELPER(BITS) \
264void HELPER(gvec_vfene##BITS)(void *v1, const void *v2, const void *v3, \
265 uint32_t desc) \
266{ \
267 const bool zs = extract32(simd_data(desc), 1, 1); \
268 \
269 vfene(v1, v2, v3, zs, MO_##BITS); \
270}
271DEF_VFENE_HELPER(8)
272DEF_VFENE_HELPER(16)
273DEF_VFENE_HELPER(32)
274
275#define DEF_VFENE_CC_HELPER(BITS) \
276void HELPER(gvec_vfene_cc##BITS)(void *v1, const void *v2, const void *v3, \
277 CPUS390XState *env, uint32_t desc) \
278{ \
279 const bool zs = extract32(simd_data(desc), 1, 1); \
280 \
281 env->cc_op = vfene(v1, v2, v3, zs, MO_##BITS); \
282}
283DEF_VFENE_CC_HELPER(8)
284DEF_VFENE_CC_HELPER(16)
285DEF_VFENE_CC_HELPER(32)
286
287static int vistr(void *v1, const void *v2, uint8_t es)
288{
289 const uint64_t mask = get_element_lsbs_mask(es);
290 uint64_t a0 = s390_vec_read_element64(v2, 0);
291 uint64_t a1 = s390_vec_read_element64(v2, 1);
292 uint64_t z;
293 int cc = 3;
294
295 z = zero_search(a0, mask);
296 if (z) {
297 a0 &= ~(-1ull >> clz64(z));
298 a1 = 0;
299 cc = 0;
300 } else {
301 z = zero_search(a1, mask);
302 if (z) {
303 a1 &= ~(-1ull >> clz64(z));
304 cc = 0;
305 }
306 }
307
308 s390_vec_write_element64(v1, 0, a0);
309 s390_vec_write_element64(v1, 1, a1);
310 return cc;
311}
312
313#define DEF_VISTR_HELPER(BITS) \
314void HELPER(gvec_vistr##BITS)(void *v1, const void *v2, uint32_t desc) \
315{ \
316 vistr(v1, v2, MO_##BITS); \
317}
318DEF_VISTR_HELPER(8)
319DEF_VISTR_HELPER(16)
320DEF_VISTR_HELPER(32)
321
322#define DEF_VISTR_CC_HELPER(BITS) \
323void HELPER(gvec_vistr_cc##BITS)(void *v1, const void *v2, CPUS390XState *env, \
324 uint32_t desc) \
325{ \
326 env->cc_op = vistr(v1, v2, MO_##BITS); \
327}
328DEF_VISTR_CC_HELPER(8)
329DEF_VISTR_CC_HELPER(16)
330DEF_VISTR_CC_HELPER(32)
331
332static bool element_compare(uint32_t data, uint32_t l, uint8_t c)
333{
334 const bool equal = extract32(c, 7, 1);
335 const bool lower = extract32(c, 6, 1);
336 const bool higher = extract32(c, 5, 1);
337
338 if (data < l) {
339 return lower;
340 } else if (data > l) {
341 return higher;
342 }
343 return equal;
344}
345
346static int vstrc(void *v1, const void *v2, const void *v3, const void *v4,
347 bool in, bool rt, bool zs, uint8_t es)
348{
349 const uint64_t mask = get_element_lsbs_mask(es);
350 uint64_t a0 = s390_vec_read_element64(v2, 0);
351 uint64_t a1 = s390_vec_read_element64(v2, 1);
352 int first_zero = 16, first_match = 16;
353 S390Vector rt_result = {};
354 uint64_t z0, z1;
355 int i, j;
356
357 if (zs) {
358 z0 = zero_search(a0, mask);
359 z1 = zero_search(a1, mask);
360 first_zero = match_index(z0, z1);
361 }
362
363 for (i = 0; i < 16 / (1 << es); i++) {
364 const uint32_t data = s390_vec_read_element(v2, i, es);
365 const int cur_byte = i * (1 << es);
366 bool any_match = false;
367
368 /* if we don't need a bit vector, we can stop early */
369 if (cur_byte == first_zero && !rt) {
370 break;
371 }
372
373 for (j = 0; j < 16 / (1 << es); j += 2) {
374 const uint32_t l1 = s390_vec_read_element(v3, j, es);
375 const uint32_t l2 = s390_vec_read_element(v3, j + 1, es);
376 /* we are only interested in the highest byte of each element */
377 const uint8_t c1 = s390_vec_read_element8(v4, j * (1 << es));
378 const uint8_t c2 = s390_vec_read_element8(v4, (j + 1) * (1 << es));
379
380 if (element_compare(data, l1, c1) &&
381 element_compare(data, l2, c2)) {
382 any_match = true;
383 break;
384 }
385 }
386 /* invert the result if requested */
387 any_match = in ^ any_match;
388
389 if (any_match) {
390 /* indicate bit vector if requested */
391 if (rt) {
392 const uint64_t val = -1ull;
393
394 first_match = MIN(cur_byte, first_match);
395 s390_vec_write_element(&rt_result, i, es, val);
396 } else {
397 /* stop on the first match */
398 first_match = cur_byte;
399 break;
400 }
401 }
402 }
403
404 if (rt) {
405 *(S390Vector *)v1 = rt_result;
406 } else {
407 s390_vec_write_element64(v1, 0, MIN(first_match, first_zero));
408 s390_vec_write_element64(v1, 1, 0);
409 }
410
411 if (first_zero == 16 && first_match == 16) {
412 return 3; /* no match */
413 } else if (first_zero == 16) {
414 return 1; /* matching elements, no match for zero */
415 } else if (first_match < first_zero) {
416 return 2; /* matching elements before match for zero */
417 }
418 return 0; /* match for zero */
419}
420
421#define DEF_VSTRC_HELPER(BITS) \
422void HELPER(gvec_vstrc##BITS)(void *v1, const void *v2, const void *v3, \
423 const void *v4, uint32_t desc) \
424{ \
425 const bool in = extract32(simd_data(desc), 3, 1); \
426 const bool zs = extract32(simd_data(desc), 1, 1); \
427 \
428 vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS); \
429}
430DEF_VSTRC_HELPER(8)
431DEF_VSTRC_HELPER(16)
432DEF_VSTRC_HELPER(32)
433
434#define DEF_VSTRC_RT_HELPER(BITS) \
435void HELPER(gvec_vstrc_rt##BITS)(void *v1, const void *v2, const void *v3, \
436 const void *v4, uint32_t desc) \
437{ \
438 const bool in = extract32(simd_data(desc), 3, 1); \
439 const bool zs = extract32(simd_data(desc), 1, 1); \
440 \
441 vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS); \
442}
443DEF_VSTRC_RT_HELPER(8)
444DEF_VSTRC_RT_HELPER(16)
445DEF_VSTRC_RT_HELPER(32)
446
447#define DEF_VSTRC_CC_HELPER(BITS) \
448void HELPER(gvec_vstrc_cc##BITS)(void *v1, const void *v2, const void *v3, \
449 const void *v4, CPUS390XState *env, \
450 uint32_t desc) \
451{ \
452 const bool in = extract32(simd_data(desc), 3, 1); \
453 const bool zs = extract32(simd_data(desc), 1, 1); \
454 \
455 env->cc_op = vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS); \
456}
457DEF_VSTRC_CC_HELPER(8)
458DEF_VSTRC_CC_HELPER(16)
459DEF_VSTRC_CC_HELPER(32)
460
461#define DEF_VSTRC_CC_RT_HELPER(BITS) \
462void HELPER(gvec_vstrc_cc_rt##BITS)(void *v1, const void *v2, const void *v3, \
463 const void *v4, CPUS390XState *env, \
464 uint32_t desc) \
465{ \
466 const bool in = extract32(simd_data(desc), 3, 1); \
467 const bool zs = extract32(simd_data(desc), 1, 1); \
468 \
469 env->cc_op = vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS); \
470}
471DEF_VSTRC_CC_RT_HELPER(8)
472DEF_VSTRC_CC_RT_HELPER(16)
473DEF_VSTRC_CC_RT_HELPER(32)
474