1/*
2 * QEMU model of the Milkymist programmable FPU.
3 *
4 * Copyright (c) 2010 Michael Walle <michael@walle.cc>
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 *
19 *
20 * Specification available at:
21 * http://milkymist.walle.cc/socdoc/pfpu.pdf
22 *
23 */
24
25#include "qemu/osdep.h"
26#include "hw/irq.h"
27#include "hw/sysbus.h"
28#include "migration/vmstate.h"
29#include "trace.h"
30#include "qemu/log.h"
31#include "qemu/module.h"
32#include "qemu/error-report.h"
33#include <math.h>
34
35/* #define TRACE_EXEC */
36
37#ifdef TRACE_EXEC
38# define D_EXEC(x) x
39#else
40# define D_EXEC(x)
41#endif
42
43enum {
44 R_CTL = 0,
45 R_MESHBASE,
46 R_HMESHLAST,
47 R_VMESHLAST,
48 R_CODEPAGE,
49 R_VERTICES,
50 R_COLLISIONS,
51 R_STRAYWRITES,
52 R_LASTDMA,
53 R_PC,
54 R_DREGBASE,
55 R_CODEBASE,
56 R_MAX
57};
58
59enum {
60 CTL_START_BUSY = (1<<0),
61};
62
63enum {
64 OP_NOP = 0,
65 OP_FADD,
66 OP_FSUB,
67 OP_FMUL,
68 OP_FABS,
69 OP_F2I,
70 OP_I2F,
71 OP_VECTOUT,
72 OP_SIN,
73 OP_COS,
74 OP_ABOVE,
75 OP_EQUAL,
76 OP_COPY,
77 OP_IF,
78 OP_TSIGN,
79 OP_QUAKE,
80};
81
82enum {
83 GPR_X = 0,
84 GPR_Y = 1,
85 GPR_FLAGS = 2,
86};
87
88enum {
89 LATENCY_FADD = 5,
90 LATENCY_FSUB = 5,
91 LATENCY_FMUL = 7,
92 LATENCY_FABS = 2,
93 LATENCY_F2I = 2,
94 LATENCY_I2F = 3,
95 LATENCY_VECTOUT = 0,
96 LATENCY_SIN = 4,
97 LATENCY_COS = 4,
98 LATENCY_ABOVE = 2,
99 LATENCY_EQUAL = 2,
100 LATENCY_COPY = 2,
101 LATENCY_IF = 2,
102 LATENCY_TSIGN = 2,
103 LATENCY_QUAKE = 2,
104 MAX_LATENCY = 7
105};
106
107#define GPR_BEGIN 0x100
108#define GPR_END 0x17f
109#define MICROCODE_BEGIN 0x200
110#define MICROCODE_END 0x3ff
111#define MICROCODE_WORDS 2048
112
113#define REINTERPRET_CAST(type, val) (*((type *)&(val)))
114
115#ifdef TRACE_EXEC
116static const char *opcode_to_str[] = {
117 "NOP", "FADD", "FSUB", "FMUL", "FABS", "F2I", "I2F", "VECTOUT",
118 "SIN", "COS", "ABOVE", "EQUAL", "COPY", "IF", "TSIGN", "QUAKE",
119};
120#endif
121
122#define TYPE_MILKYMIST_PFPU "milkymist-pfpu"
123#define MILKYMIST_PFPU(obj) \
124 OBJECT_CHECK(MilkymistPFPUState, (obj), TYPE_MILKYMIST_PFPU)
125
126struct MilkymistPFPUState {
127 SysBusDevice parent_obj;
128
129 MemoryRegion regs_region;
130 Chardev *chr;
131 qemu_irq irq;
132
133 uint32_t regs[R_MAX];
134 uint32_t gp_regs[128];
135 uint32_t microcode[MICROCODE_WORDS];
136
137 int output_queue_pos;
138 uint32_t output_queue[MAX_LATENCY];
139};
140typedef struct MilkymistPFPUState MilkymistPFPUState;
141
142static inline uint32_t
143get_dma_address(uint32_t base, uint32_t x, uint32_t y)
144{
145 return base + 8 * (128 * y + x);
146}
147
148static inline void
149output_queue_insert(MilkymistPFPUState *s, uint32_t val, int pos)
150{
151 s->output_queue[(s->output_queue_pos + pos) % MAX_LATENCY] = val;
152}
153
154static inline uint32_t
155output_queue_remove(MilkymistPFPUState *s)
156{
157 return s->output_queue[s->output_queue_pos];
158}
159
160static inline void
161output_queue_advance(MilkymistPFPUState *s)
162{
163 s->output_queue[s->output_queue_pos] = 0;
164 s->output_queue_pos = (s->output_queue_pos + 1) % MAX_LATENCY;
165}
166
167static int pfpu_decode_insn(MilkymistPFPUState *s)
168{
169 uint32_t pc = s->regs[R_PC];
170 uint32_t insn = s->microcode[pc];
171 uint32_t reg_a = (insn >> 18) & 0x7f;
172 uint32_t reg_b = (insn >> 11) & 0x7f;
173 uint32_t op = (insn >> 7) & 0xf;
174 uint32_t reg_d = insn & 0x7f;
175 uint32_t r = 0;
176 int latency = 0;
177
178 switch (op) {
179 case OP_NOP:
180 break;
181 case OP_FADD:
182 {
183 float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
184 float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
185 float t = a + b;
186 r = REINTERPRET_CAST(uint32_t, t);
187 latency = LATENCY_FADD;
188 D_EXEC(qemu_log("ADD a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
189 } break;
190 case OP_FSUB:
191 {
192 float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
193 float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
194 float t = a - b;
195 r = REINTERPRET_CAST(uint32_t, t);
196 latency = LATENCY_FSUB;
197 D_EXEC(qemu_log("SUB a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
198 } break;
199 case OP_FMUL:
200 {
201 float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
202 float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
203 float t = a * b;
204 r = REINTERPRET_CAST(uint32_t, t);
205 latency = LATENCY_FMUL;
206 D_EXEC(qemu_log("MUL a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
207 } break;
208 case OP_FABS:
209 {
210 float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
211 float t = fabsf(a);
212 r = REINTERPRET_CAST(uint32_t, t);
213 latency = LATENCY_FABS;
214 D_EXEC(qemu_log("ABS a=%f t=%f, r=%08x\n", a, t, r));
215 } break;
216 case OP_F2I:
217 {
218 float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
219 int32_t t = a;
220 r = REINTERPRET_CAST(uint32_t, t);
221 latency = LATENCY_F2I;
222 D_EXEC(qemu_log("F2I a=%f t=%d, r=%08x\n", a, t, r));
223 } break;
224 case OP_I2F:
225 {
226 int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
227 float t = a;
228 r = REINTERPRET_CAST(uint32_t, t);
229 latency = LATENCY_I2F;
230 D_EXEC(qemu_log("I2F a=%08x t=%f, r=%08x\n", a, t, r));
231 } break;
232 case OP_VECTOUT:
233 {
234 uint32_t a = cpu_to_be32(s->gp_regs[reg_a]);
235 uint32_t b = cpu_to_be32(s->gp_regs[reg_b]);
236 hwaddr dma_ptr =
237 get_dma_address(s->regs[R_MESHBASE],
238 s->gp_regs[GPR_X], s->gp_regs[GPR_Y]);
239 cpu_physical_memory_write(dma_ptr, &a, 4);
240 cpu_physical_memory_write(dma_ptr + 4, &b, 4);
241 s->regs[R_LASTDMA] = dma_ptr + 4;
242 D_EXEC(qemu_log("VECTOUT a=%08x b=%08x dma=%08x\n", a, b, dma_ptr));
243 trace_milkymist_pfpu_vectout(a, b, dma_ptr);
244 } break;
245 case OP_SIN:
246 {
247 int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
248 float t = sinf(a * (1.0f / (M_PI * 4096.0f)));
249 r = REINTERPRET_CAST(uint32_t, t);
250 latency = LATENCY_SIN;
251 D_EXEC(qemu_log("SIN a=%d t=%f, r=%08x\n", a, t, r));
252 } break;
253 case OP_COS:
254 {
255 int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
256 float t = cosf(a * (1.0f / (M_PI * 4096.0f)));
257 r = REINTERPRET_CAST(uint32_t, t);
258 latency = LATENCY_COS;
259 D_EXEC(qemu_log("COS a=%d t=%f, r=%08x\n", a, t, r));
260 } break;
261 case OP_ABOVE:
262 {
263 float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
264 float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
265 float t = (a > b) ? 1.0f : 0.0f;
266 r = REINTERPRET_CAST(uint32_t, t);
267 latency = LATENCY_ABOVE;
268 D_EXEC(qemu_log("ABOVE a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
269 } break;
270 case OP_EQUAL:
271 {
272 float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
273 float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
274 float t = (a == b) ? 1.0f : 0.0f;
275 r = REINTERPRET_CAST(uint32_t, t);
276 latency = LATENCY_EQUAL;
277 D_EXEC(qemu_log("EQUAL a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
278 } break;
279 case OP_COPY:
280 {
281 r = s->gp_regs[reg_a];
282 latency = LATENCY_COPY;
283 D_EXEC(qemu_log("COPY"));
284 } break;
285 case OP_IF:
286 {
287 float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
288 float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
289 uint32_t f = s->gp_regs[GPR_FLAGS];
290 float t = (f != 0) ? a : b;
291 r = REINTERPRET_CAST(uint32_t, t);
292 latency = LATENCY_IF;
293 D_EXEC(qemu_log("IF f=%u a=%f b=%f t=%f, r=%08x\n", f, a, b, t, r));
294 } break;
295 case OP_TSIGN:
296 {
297 float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
298 float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
299 float t = (b < 0) ? -a : a;
300 r = REINTERPRET_CAST(uint32_t, t);
301 latency = LATENCY_TSIGN;
302 D_EXEC(qemu_log("TSIGN a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
303 } break;
304 case OP_QUAKE:
305 {
306 uint32_t a = s->gp_regs[reg_a];
307 r = 0x5f3759df - (a >> 1);
308 latency = LATENCY_QUAKE;
309 D_EXEC(qemu_log("QUAKE a=%d r=%08x\n", a, r));
310 } break;
311
312 default:
313 error_report("milkymist_pfpu: unknown opcode %d", op);
314 break;
315 }
316
317 if (!reg_d) {
318 D_EXEC(qemu_log("%04d %8s R%03d, R%03d <L=%d, E=%04d>\n",
319 s->regs[R_PC], opcode_to_str[op], reg_a, reg_b, latency,
320 s->regs[R_PC] + latency));
321 } else {
322 D_EXEC(qemu_log("%04d %8s R%03d, R%03d <L=%d, E=%04d> -> R%03d\n",
323 s->regs[R_PC], opcode_to_str[op], reg_a, reg_b, latency,
324 s->regs[R_PC] + latency, reg_d));
325 }
326
327 if (op == OP_VECTOUT) {
328 return 0;
329 }
330
331 /* store output for this cycle */
332 if (reg_d) {
333 uint32_t val = output_queue_remove(s);
334 D_EXEC(qemu_log("R%03d <- 0x%08x\n", reg_d, val));
335 s->gp_regs[reg_d] = val;
336 }
337
338 output_queue_advance(s);
339
340 /* store op output */
341 if (op != OP_NOP) {
342 output_queue_insert(s, r, latency-1);
343 }
344
345 /* advance PC */
346 s->regs[R_PC]++;
347
348 return 1;
349};
350
351static void pfpu_start(MilkymistPFPUState *s)
352{
353 int x, y;
354 int i;
355
356 for (y = 0; y <= s->regs[R_VMESHLAST]; y++) {
357 for (x = 0; x <= s->regs[R_HMESHLAST]; x++) {
358 D_EXEC(qemu_log("\nprocessing x=%d y=%d\n", x, y));
359
360 /* set current position */
361 s->gp_regs[GPR_X] = x;
362 s->gp_regs[GPR_Y] = y;
363
364 /* run microcode on this position */
365 i = 0;
366 while (pfpu_decode_insn(s)) {
367 /* decode at most MICROCODE_WORDS instructions */
368 if (++i >= MICROCODE_WORDS) {
369 error_report("milkymist_pfpu: too many instructions "
370 "executed in microcode. No VECTOUT?");
371 break;
372 }
373 }
374
375 /* reset pc for next run */
376 s->regs[R_PC] = 0;
377 }
378 }
379
380 s->regs[R_VERTICES] = x * y;
381
382 trace_milkymist_pfpu_pulse_irq();
383 qemu_irq_pulse(s->irq);
384}
385
386static inline int get_microcode_address(MilkymistPFPUState *s, uint32_t addr)
387{
388 return (512 * s->regs[R_CODEPAGE]) + addr - MICROCODE_BEGIN;
389}
390
391static uint64_t pfpu_read(void *opaque, hwaddr addr,
392 unsigned size)
393{
394 MilkymistPFPUState *s = opaque;
395 uint32_t r = 0;
396
397 addr >>= 2;
398 switch (addr) {
399 case R_CTL:
400 case R_MESHBASE:
401 case R_HMESHLAST:
402 case R_VMESHLAST:
403 case R_CODEPAGE:
404 case R_VERTICES:
405 case R_COLLISIONS:
406 case R_STRAYWRITES:
407 case R_LASTDMA:
408 case R_PC:
409 case R_DREGBASE:
410 case R_CODEBASE:
411 r = s->regs[addr];
412 break;
413 case GPR_BEGIN ... GPR_END:
414 r = s->gp_regs[addr - GPR_BEGIN];
415 break;
416 case MICROCODE_BEGIN ... MICROCODE_END:
417 r = s->microcode[get_microcode_address(s, addr)];
418 break;
419
420 default:
421 error_report("milkymist_pfpu: read access to unknown register 0x"
422 TARGET_FMT_plx, addr << 2);
423 break;
424 }
425
426 trace_milkymist_pfpu_memory_read(addr << 2, r);
427
428 return r;
429}
430
431static void pfpu_write(void *opaque, hwaddr addr, uint64_t value,
432 unsigned size)
433{
434 MilkymistPFPUState *s = opaque;
435
436 trace_milkymist_pfpu_memory_write(addr, value);
437
438 addr >>= 2;
439 switch (addr) {
440 case R_CTL:
441 if (value & CTL_START_BUSY) {
442 pfpu_start(s);
443 }
444 break;
445 case R_MESHBASE:
446 case R_HMESHLAST:
447 case R_VMESHLAST:
448 case R_CODEPAGE:
449 case R_VERTICES:
450 case R_COLLISIONS:
451 case R_STRAYWRITES:
452 case R_LASTDMA:
453 case R_PC:
454 case R_DREGBASE:
455 case R_CODEBASE:
456 s->regs[addr] = value;
457 break;
458 case GPR_BEGIN ... GPR_END:
459 s->gp_regs[addr - GPR_BEGIN] = value;
460 break;
461 case MICROCODE_BEGIN ... MICROCODE_END:
462 s->microcode[get_microcode_address(s, addr)] = value;
463 break;
464
465 default:
466 error_report("milkymist_pfpu: write access to unknown register 0x"
467 TARGET_FMT_plx, addr << 2);
468 break;
469 }
470}
471
472static const MemoryRegionOps pfpu_mmio_ops = {
473 .read = pfpu_read,
474 .write = pfpu_write,
475 .valid = {
476 .min_access_size = 4,
477 .max_access_size = 4,
478 },
479 .endianness = DEVICE_NATIVE_ENDIAN,
480};
481
482static void milkymist_pfpu_reset(DeviceState *d)
483{
484 MilkymistPFPUState *s = MILKYMIST_PFPU(d);
485 int i;
486
487 for (i = 0; i < R_MAX; i++) {
488 s->regs[i] = 0;
489 }
490 for (i = 0; i < 128; i++) {
491 s->gp_regs[i] = 0;
492 }
493 for (i = 0; i < MICROCODE_WORDS; i++) {
494 s->microcode[i] = 0;
495 }
496 s->output_queue_pos = 0;
497 for (i = 0; i < MAX_LATENCY; i++) {
498 s->output_queue[i] = 0;
499 }
500}
501
502static void milkymist_pfpu_realize(DeviceState *dev, Error **errp)
503{
504 MilkymistPFPUState *s = MILKYMIST_PFPU(dev);
505 SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
506
507 sysbus_init_irq(sbd, &s->irq);
508
509 memory_region_init_io(&s->regs_region, OBJECT(dev), &pfpu_mmio_ops, s,
510 "milkymist-pfpu", MICROCODE_END * 4);
511 sysbus_init_mmio(sbd, &s->regs_region);
512}
513
514static const VMStateDescription vmstate_milkymist_pfpu = {
515 .name = "milkymist-pfpu",
516 .version_id = 1,
517 .minimum_version_id = 1,
518 .fields = (VMStateField[]) {
519 VMSTATE_UINT32_ARRAY(regs, MilkymistPFPUState, R_MAX),
520 VMSTATE_UINT32_ARRAY(gp_regs, MilkymistPFPUState, 128),
521 VMSTATE_UINT32_ARRAY(microcode, MilkymistPFPUState, MICROCODE_WORDS),
522 VMSTATE_INT32(output_queue_pos, MilkymistPFPUState),
523 VMSTATE_UINT32_ARRAY(output_queue, MilkymistPFPUState, MAX_LATENCY),
524 VMSTATE_END_OF_LIST()
525 }
526};
527
528static void milkymist_pfpu_class_init(ObjectClass *klass, void *data)
529{
530 DeviceClass *dc = DEVICE_CLASS(klass);
531
532 dc->realize = milkymist_pfpu_realize;
533 dc->reset = milkymist_pfpu_reset;
534 dc->vmsd = &vmstate_milkymist_pfpu;
535}
536
537static const TypeInfo milkymist_pfpu_info = {
538 .name = TYPE_MILKYMIST_PFPU,
539 .parent = TYPE_SYS_BUS_DEVICE,
540 .instance_size = sizeof(MilkymistPFPUState),
541 .class_init = milkymist_pfpu_class_init,
542};
543
544static void milkymist_pfpu_register_types(void)
545{
546 type_register_static(&milkymist_pfpu_info);
547}
548
549type_init(milkymist_pfpu_register_types)
550