1 | /* |
2 | * QEMU model of the Milkymist programmable FPU. |
3 | * |
4 | * Copyright (c) 2010 Michael Walle <michael@walle.cc> |
5 | * |
6 | * This library is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU Lesser General Public |
8 | * License as published by the Free Software Foundation; either |
9 | * version 2 of the License, or (at your option) any later version. |
10 | * |
11 | * This library is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | * Lesser General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU Lesser General Public |
17 | * License along with this library; if not, see <http://www.gnu.org/licenses/>. |
18 | * |
19 | * |
20 | * Specification available at: |
21 | * http://milkymist.walle.cc/socdoc/pfpu.pdf |
22 | * |
23 | */ |
24 | |
25 | #include "qemu/osdep.h" |
26 | #include "hw/irq.h" |
27 | #include "hw/sysbus.h" |
28 | #include "migration/vmstate.h" |
29 | #include "trace.h" |
30 | #include "qemu/log.h" |
31 | #include "qemu/module.h" |
32 | #include "qemu/error-report.h" |
33 | #include <math.h> |
34 | |
35 | /* #define TRACE_EXEC */ |
36 | |
37 | #ifdef TRACE_EXEC |
38 | # define D_EXEC(x) x |
39 | #else |
40 | # define D_EXEC(x) |
41 | #endif |
42 | |
43 | enum { |
44 | R_CTL = 0, |
45 | R_MESHBASE, |
46 | R_HMESHLAST, |
47 | R_VMESHLAST, |
48 | R_CODEPAGE, |
49 | R_VERTICES, |
50 | R_COLLISIONS, |
51 | R_STRAYWRITES, |
52 | R_LASTDMA, |
53 | R_PC, |
54 | R_DREGBASE, |
55 | R_CODEBASE, |
56 | R_MAX |
57 | }; |
58 | |
59 | enum { |
60 | CTL_START_BUSY = (1<<0), |
61 | }; |
62 | |
63 | enum { |
64 | OP_NOP = 0, |
65 | OP_FADD, |
66 | OP_FSUB, |
67 | OP_FMUL, |
68 | OP_FABS, |
69 | OP_F2I, |
70 | OP_I2F, |
71 | OP_VECTOUT, |
72 | OP_SIN, |
73 | OP_COS, |
74 | OP_ABOVE, |
75 | OP_EQUAL, |
76 | OP_COPY, |
77 | OP_IF, |
78 | OP_TSIGN, |
79 | OP_QUAKE, |
80 | }; |
81 | |
82 | enum { |
83 | GPR_X = 0, |
84 | GPR_Y = 1, |
85 | GPR_FLAGS = 2, |
86 | }; |
87 | |
88 | enum { |
89 | LATENCY_FADD = 5, |
90 | LATENCY_FSUB = 5, |
91 | LATENCY_FMUL = 7, |
92 | LATENCY_FABS = 2, |
93 | LATENCY_F2I = 2, |
94 | LATENCY_I2F = 3, |
95 | LATENCY_VECTOUT = 0, |
96 | LATENCY_SIN = 4, |
97 | LATENCY_COS = 4, |
98 | LATENCY_ABOVE = 2, |
99 | LATENCY_EQUAL = 2, |
100 | LATENCY_COPY = 2, |
101 | LATENCY_IF = 2, |
102 | LATENCY_TSIGN = 2, |
103 | LATENCY_QUAKE = 2, |
104 | MAX_LATENCY = 7 |
105 | }; |
106 | |
107 | #define GPR_BEGIN 0x100 |
108 | #define GPR_END 0x17f |
109 | #define MICROCODE_BEGIN 0x200 |
110 | #define MICROCODE_END 0x3ff |
111 | #define MICROCODE_WORDS 2048 |
112 | |
113 | #define REINTERPRET_CAST(type, val) (*((type *)&(val))) |
114 | |
115 | #ifdef TRACE_EXEC |
116 | static const char *opcode_to_str[] = { |
117 | "NOP" , "FADD" , "FSUB" , "FMUL" , "FABS" , "F2I" , "I2F" , "VECTOUT" , |
118 | "SIN" , "COS" , "ABOVE" , "EQUAL" , "COPY" , "IF" , "TSIGN" , "QUAKE" , |
119 | }; |
120 | #endif |
121 | |
122 | #define TYPE_MILKYMIST_PFPU "milkymist-pfpu" |
123 | #define MILKYMIST_PFPU(obj) \ |
124 | OBJECT_CHECK(MilkymistPFPUState, (obj), TYPE_MILKYMIST_PFPU) |
125 | |
126 | struct MilkymistPFPUState { |
127 | SysBusDevice parent_obj; |
128 | |
129 | MemoryRegion regs_region; |
130 | Chardev *chr; |
131 | qemu_irq irq; |
132 | |
133 | uint32_t regs[R_MAX]; |
134 | uint32_t gp_regs[128]; |
135 | uint32_t microcode[MICROCODE_WORDS]; |
136 | |
137 | int output_queue_pos; |
138 | uint32_t output_queue[MAX_LATENCY]; |
139 | }; |
140 | typedef struct MilkymistPFPUState MilkymistPFPUState; |
141 | |
142 | static inline uint32_t |
143 | get_dma_address(uint32_t base, uint32_t x, uint32_t y) |
144 | { |
145 | return base + 8 * (128 * y + x); |
146 | } |
147 | |
148 | static inline void |
149 | output_queue_insert(MilkymistPFPUState *s, uint32_t val, int pos) |
150 | { |
151 | s->output_queue[(s->output_queue_pos + pos) % MAX_LATENCY] = val; |
152 | } |
153 | |
154 | static inline uint32_t |
155 | output_queue_remove(MilkymistPFPUState *s) |
156 | { |
157 | return s->output_queue[s->output_queue_pos]; |
158 | } |
159 | |
160 | static inline void |
161 | output_queue_advance(MilkymistPFPUState *s) |
162 | { |
163 | s->output_queue[s->output_queue_pos] = 0; |
164 | s->output_queue_pos = (s->output_queue_pos + 1) % MAX_LATENCY; |
165 | } |
166 | |
167 | static int pfpu_decode_insn(MilkymistPFPUState *s) |
168 | { |
169 | uint32_t pc = s->regs[R_PC]; |
170 | uint32_t insn = s->microcode[pc]; |
171 | uint32_t reg_a = (insn >> 18) & 0x7f; |
172 | uint32_t reg_b = (insn >> 11) & 0x7f; |
173 | uint32_t op = (insn >> 7) & 0xf; |
174 | uint32_t reg_d = insn & 0x7f; |
175 | uint32_t r = 0; |
176 | int latency = 0; |
177 | |
178 | switch (op) { |
179 | case OP_NOP: |
180 | break; |
181 | case OP_FADD: |
182 | { |
183 | float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]); |
184 | float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]); |
185 | float t = a + b; |
186 | r = REINTERPRET_CAST(uint32_t, t); |
187 | latency = LATENCY_FADD; |
188 | D_EXEC(qemu_log("ADD a=%f b=%f t=%f, r=%08x\n" , a, b, t, r)); |
189 | } break; |
190 | case OP_FSUB: |
191 | { |
192 | float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]); |
193 | float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]); |
194 | float t = a - b; |
195 | r = REINTERPRET_CAST(uint32_t, t); |
196 | latency = LATENCY_FSUB; |
197 | D_EXEC(qemu_log("SUB a=%f b=%f t=%f, r=%08x\n" , a, b, t, r)); |
198 | } break; |
199 | case OP_FMUL: |
200 | { |
201 | float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]); |
202 | float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]); |
203 | float t = a * b; |
204 | r = REINTERPRET_CAST(uint32_t, t); |
205 | latency = LATENCY_FMUL; |
206 | D_EXEC(qemu_log("MUL a=%f b=%f t=%f, r=%08x\n" , a, b, t, r)); |
207 | } break; |
208 | case OP_FABS: |
209 | { |
210 | float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]); |
211 | float t = fabsf(a); |
212 | r = REINTERPRET_CAST(uint32_t, t); |
213 | latency = LATENCY_FABS; |
214 | D_EXEC(qemu_log("ABS a=%f t=%f, r=%08x\n" , a, t, r)); |
215 | } break; |
216 | case OP_F2I: |
217 | { |
218 | float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]); |
219 | int32_t t = a; |
220 | r = REINTERPRET_CAST(uint32_t, t); |
221 | latency = LATENCY_F2I; |
222 | D_EXEC(qemu_log("F2I a=%f t=%d, r=%08x\n" , a, t, r)); |
223 | } break; |
224 | case OP_I2F: |
225 | { |
226 | int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]); |
227 | float t = a; |
228 | r = REINTERPRET_CAST(uint32_t, t); |
229 | latency = LATENCY_I2F; |
230 | D_EXEC(qemu_log("I2F a=%08x t=%f, r=%08x\n" , a, t, r)); |
231 | } break; |
232 | case OP_VECTOUT: |
233 | { |
234 | uint32_t a = cpu_to_be32(s->gp_regs[reg_a]); |
235 | uint32_t b = cpu_to_be32(s->gp_regs[reg_b]); |
236 | hwaddr dma_ptr = |
237 | get_dma_address(s->regs[R_MESHBASE], |
238 | s->gp_regs[GPR_X], s->gp_regs[GPR_Y]); |
239 | cpu_physical_memory_write(dma_ptr, &a, 4); |
240 | cpu_physical_memory_write(dma_ptr + 4, &b, 4); |
241 | s->regs[R_LASTDMA] = dma_ptr + 4; |
242 | D_EXEC(qemu_log("VECTOUT a=%08x b=%08x dma=%08x\n" , a, b, dma_ptr)); |
243 | trace_milkymist_pfpu_vectout(a, b, dma_ptr); |
244 | } break; |
245 | case OP_SIN: |
246 | { |
247 | int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]); |
248 | float t = sinf(a * (1.0f / (M_PI * 4096.0f))); |
249 | r = REINTERPRET_CAST(uint32_t, t); |
250 | latency = LATENCY_SIN; |
251 | D_EXEC(qemu_log("SIN a=%d t=%f, r=%08x\n" , a, t, r)); |
252 | } break; |
253 | case OP_COS: |
254 | { |
255 | int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]); |
256 | float t = cosf(a * (1.0f / (M_PI * 4096.0f))); |
257 | r = REINTERPRET_CAST(uint32_t, t); |
258 | latency = LATENCY_COS; |
259 | D_EXEC(qemu_log("COS a=%d t=%f, r=%08x\n" , a, t, r)); |
260 | } break; |
261 | case OP_ABOVE: |
262 | { |
263 | float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]); |
264 | float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]); |
265 | float t = (a > b) ? 1.0f : 0.0f; |
266 | r = REINTERPRET_CAST(uint32_t, t); |
267 | latency = LATENCY_ABOVE; |
268 | D_EXEC(qemu_log("ABOVE a=%f b=%f t=%f, r=%08x\n" , a, b, t, r)); |
269 | } break; |
270 | case OP_EQUAL: |
271 | { |
272 | float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]); |
273 | float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]); |
274 | float t = (a == b) ? 1.0f : 0.0f; |
275 | r = REINTERPRET_CAST(uint32_t, t); |
276 | latency = LATENCY_EQUAL; |
277 | D_EXEC(qemu_log("EQUAL a=%f b=%f t=%f, r=%08x\n" , a, b, t, r)); |
278 | } break; |
279 | case OP_COPY: |
280 | { |
281 | r = s->gp_regs[reg_a]; |
282 | latency = LATENCY_COPY; |
283 | D_EXEC(qemu_log("COPY" )); |
284 | } break; |
285 | case OP_IF: |
286 | { |
287 | float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]); |
288 | float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]); |
289 | uint32_t f = s->gp_regs[GPR_FLAGS]; |
290 | float t = (f != 0) ? a : b; |
291 | r = REINTERPRET_CAST(uint32_t, t); |
292 | latency = LATENCY_IF; |
293 | D_EXEC(qemu_log("IF f=%u a=%f b=%f t=%f, r=%08x\n" , f, a, b, t, r)); |
294 | } break; |
295 | case OP_TSIGN: |
296 | { |
297 | float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]); |
298 | float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]); |
299 | float t = (b < 0) ? -a : a; |
300 | r = REINTERPRET_CAST(uint32_t, t); |
301 | latency = LATENCY_TSIGN; |
302 | D_EXEC(qemu_log("TSIGN a=%f b=%f t=%f, r=%08x\n" , a, b, t, r)); |
303 | } break; |
304 | case OP_QUAKE: |
305 | { |
306 | uint32_t a = s->gp_regs[reg_a]; |
307 | r = 0x5f3759df - (a >> 1); |
308 | latency = LATENCY_QUAKE; |
309 | D_EXEC(qemu_log("QUAKE a=%d r=%08x\n" , a, r)); |
310 | } break; |
311 | |
312 | default: |
313 | error_report("milkymist_pfpu: unknown opcode %d" , op); |
314 | break; |
315 | } |
316 | |
317 | if (!reg_d) { |
318 | D_EXEC(qemu_log("%04d %8s R%03d, R%03d <L=%d, E=%04d>\n" , |
319 | s->regs[R_PC], opcode_to_str[op], reg_a, reg_b, latency, |
320 | s->regs[R_PC] + latency)); |
321 | } else { |
322 | D_EXEC(qemu_log("%04d %8s R%03d, R%03d <L=%d, E=%04d> -> R%03d\n" , |
323 | s->regs[R_PC], opcode_to_str[op], reg_a, reg_b, latency, |
324 | s->regs[R_PC] + latency, reg_d)); |
325 | } |
326 | |
327 | if (op == OP_VECTOUT) { |
328 | return 0; |
329 | } |
330 | |
331 | /* store output for this cycle */ |
332 | if (reg_d) { |
333 | uint32_t val = output_queue_remove(s); |
334 | D_EXEC(qemu_log("R%03d <- 0x%08x\n" , reg_d, val)); |
335 | s->gp_regs[reg_d] = val; |
336 | } |
337 | |
338 | output_queue_advance(s); |
339 | |
340 | /* store op output */ |
341 | if (op != OP_NOP) { |
342 | output_queue_insert(s, r, latency-1); |
343 | } |
344 | |
345 | /* advance PC */ |
346 | s->regs[R_PC]++; |
347 | |
348 | return 1; |
349 | }; |
350 | |
351 | static void pfpu_start(MilkymistPFPUState *s) |
352 | { |
353 | int x, y; |
354 | int i; |
355 | |
356 | for (y = 0; y <= s->regs[R_VMESHLAST]; y++) { |
357 | for (x = 0; x <= s->regs[R_HMESHLAST]; x++) { |
358 | D_EXEC(qemu_log("\nprocessing x=%d y=%d\n" , x, y)); |
359 | |
360 | /* set current position */ |
361 | s->gp_regs[GPR_X] = x; |
362 | s->gp_regs[GPR_Y] = y; |
363 | |
364 | /* run microcode on this position */ |
365 | i = 0; |
366 | while (pfpu_decode_insn(s)) { |
367 | /* decode at most MICROCODE_WORDS instructions */ |
368 | if (++i >= MICROCODE_WORDS) { |
369 | error_report("milkymist_pfpu: too many instructions " |
370 | "executed in microcode. No VECTOUT?" ); |
371 | break; |
372 | } |
373 | } |
374 | |
375 | /* reset pc for next run */ |
376 | s->regs[R_PC] = 0; |
377 | } |
378 | } |
379 | |
380 | s->regs[R_VERTICES] = x * y; |
381 | |
382 | trace_milkymist_pfpu_pulse_irq(); |
383 | qemu_irq_pulse(s->irq); |
384 | } |
385 | |
386 | static inline int get_microcode_address(MilkymistPFPUState *s, uint32_t addr) |
387 | { |
388 | return (512 * s->regs[R_CODEPAGE]) + addr - MICROCODE_BEGIN; |
389 | } |
390 | |
391 | static uint64_t pfpu_read(void *opaque, hwaddr addr, |
392 | unsigned size) |
393 | { |
394 | MilkymistPFPUState *s = opaque; |
395 | uint32_t r = 0; |
396 | |
397 | addr >>= 2; |
398 | switch (addr) { |
399 | case R_CTL: |
400 | case R_MESHBASE: |
401 | case R_HMESHLAST: |
402 | case R_VMESHLAST: |
403 | case R_CODEPAGE: |
404 | case R_VERTICES: |
405 | case R_COLLISIONS: |
406 | case R_STRAYWRITES: |
407 | case R_LASTDMA: |
408 | case R_PC: |
409 | case R_DREGBASE: |
410 | case R_CODEBASE: |
411 | r = s->regs[addr]; |
412 | break; |
413 | case GPR_BEGIN ... GPR_END: |
414 | r = s->gp_regs[addr - GPR_BEGIN]; |
415 | break; |
416 | case MICROCODE_BEGIN ... MICROCODE_END: |
417 | r = s->microcode[get_microcode_address(s, addr)]; |
418 | break; |
419 | |
420 | default: |
421 | error_report("milkymist_pfpu: read access to unknown register 0x" |
422 | TARGET_FMT_plx, addr << 2); |
423 | break; |
424 | } |
425 | |
426 | trace_milkymist_pfpu_memory_read(addr << 2, r); |
427 | |
428 | return r; |
429 | } |
430 | |
431 | static void pfpu_write(void *opaque, hwaddr addr, uint64_t value, |
432 | unsigned size) |
433 | { |
434 | MilkymistPFPUState *s = opaque; |
435 | |
436 | trace_milkymist_pfpu_memory_write(addr, value); |
437 | |
438 | addr >>= 2; |
439 | switch (addr) { |
440 | case R_CTL: |
441 | if (value & CTL_START_BUSY) { |
442 | pfpu_start(s); |
443 | } |
444 | break; |
445 | case R_MESHBASE: |
446 | case R_HMESHLAST: |
447 | case R_VMESHLAST: |
448 | case R_CODEPAGE: |
449 | case R_VERTICES: |
450 | case R_COLLISIONS: |
451 | case R_STRAYWRITES: |
452 | case R_LASTDMA: |
453 | case R_PC: |
454 | case R_DREGBASE: |
455 | case R_CODEBASE: |
456 | s->regs[addr] = value; |
457 | break; |
458 | case GPR_BEGIN ... GPR_END: |
459 | s->gp_regs[addr - GPR_BEGIN] = value; |
460 | break; |
461 | case MICROCODE_BEGIN ... MICROCODE_END: |
462 | s->microcode[get_microcode_address(s, addr)] = value; |
463 | break; |
464 | |
465 | default: |
466 | error_report("milkymist_pfpu: write access to unknown register 0x" |
467 | TARGET_FMT_plx, addr << 2); |
468 | break; |
469 | } |
470 | } |
471 | |
472 | static const MemoryRegionOps pfpu_mmio_ops = { |
473 | .read = pfpu_read, |
474 | .write = pfpu_write, |
475 | .valid = { |
476 | .min_access_size = 4, |
477 | .max_access_size = 4, |
478 | }, |
479 | .endianness = DEVICE_NATIVE_ENDIAN, |
480 | }; |
481 | |
482 | static void milkymist_pfpu_reset(DeviceState *d) |
483 | { |
484 | MilkymistPFPUState *s = MILKYMIST_PFPU(d); |
485 | int i; |
486 | |
487 | for (i = 0; i < R_MAX; i++) { |
488 | s->regs[i] = 0; |
489 | } |
490 | for (i = 0; i < 128; i++) { |
491 | s->gp_regs[i] = 0; |
492 | } |
493 | for (i = 0; i < MICROCODE_WORDS; i++) { |
494 | s->microcode[i] = 0; |
495 | } |
496 | s->output_queue_pos = 0; |
497 | for (i = 0; i < MAX_LATENCY; i++) { |
498 | s->output_queue[i] = 0; |
499 | } |
500 | } |
501 | |
502 | static void milkymist_pfpu_realize(DeviceState *dev, Error **errp) |
503 | { |
504 | MilkymistPFPUState *s = MILKYMIST_PFPU(dev); |
505 | SysBusDevice *sbd = SYS_BUS_DEVICE(dev); |
506 | |
507 | sysbus_init_irq(sbd, &s->irq); |
508 | |
509 | memory_region_init_io(&s->regs_region, OBJECT(dev), &pfpu_mmio_ops, s, |
510 | "milkymist-pfpu" , MICROCODE_END * 4); |
511 | sysbus_init_mmio(sbd, &s->regs_region); |
512 | } |
513 | |
514 | static const VMStateDescription vmstate_milkymist_pfpu = { |
515 | .name = "milkymist-pfpu" , |
516 | .version_id = 1, |
517 | .minimum_version_id = 1, |
518 | .fields = (VMStateField[]) { |
519 | VMSTATE_UINT32_ARRAY(regs, MilkymistPFPUState, R_MAX), |
520 | VMSTATE_UINT32_ARRAY(gp_regs, MilkymistPFPUState, 128), |
521 | VMSTATE_UINT32_ARRAY(microcode, MilkymistPFPUState, MICROCODE_WORDS), |
522 | VMSTATE_INT32(output_queue_pos, MilkymistPFPUState), |
523 | VMSTATE_UINT32_ARRAY(output_queue, MilkymistPFPUState, MAX_LATENCY), |
524 | VMSTATE_END_OF_LIST() |
525 | } |
526 | }; |
527 | |
528 | static void milkymist_pfpu_class_init(ObjectClass *klass, void *data) |
529 | { |
530 | DeviceClass *dc = DEVICE_CLASS(klass); |
531 | |
532 | dc->realize = milkymist_pfpu_realize; |
533 | dc->reset = milkymist_pfpu_reset; |
534 | dc->vmsd = &vmstate_milkymist_pfpu; |
535 | } |
536 | |
537 | static const TypeInfo milkymist_pfpu_info = { |
538 | .name = TYPE_MILKYMIST_PFPU, |
539 | .parent = TYPE_SYS_BUS_DEVICE, |
540 | .instance_size = sizeof(MilkymistPFPUState), |
541 | .class_init = milkymist_pfpu_class_init, |
542 | }; |
543 | |
544 | static void milkymist_pfpu_register_types(void) |
545 | { |
546 | type_register_static(&milkymist_pfpu_info); |
547 | } |
548 | |
549 | type_init(milkymist_pfpu_register_types) |
550 | |