1//
2// m3_compile.c
3//
4// Created by Steven Massey on 4/17/19.
5// Copyright © 2019 Steven Massey. All rights reserved.
6//
7
8// Allow using opcodes for compilation process
9#define M3_COMPILE_OPCODES
10
11#include "m3_env.h"
12#include "m3_compile.h"
13#include "m3_exec.h"
14#include "m3_exception.h"
15#include "m3_info.h"
16
17//----- EMIT --------------------------------------------------------------------------------------------------------------
18
19static inline
20pc_t GetPC (IM3Compilation o)
21{
22 return GetPagePC (o->page);
23}
24
25static M3_NOINLINE
26M3Result EnsureCodePageNumLines (IM3Compilation o, u32 i_numLines)
27{
28 M3Result result = m3Err_none;
29
30 i_numLines += 2; // room for Bridge
31
32 if (NumFreeLines (o->page) < i_numLines)
33 {
34 IM3CodePage page = AcquireCodePageWithCapacity (o->runtime, i_numLines);
35
36 if (page)
37 {
38 m3log (emit, "bridging new code page from: %d %p (free slots: %d) to: %d", o->page->info.sequence, GetPC (o), NumFreeLines (o->page), page->info.sequence);
39 d_m3Assert (NumFreeLines (o->page) >= 2);
40
41 EmitWord (o->page, op_Branch);
42 EmitWord (o->page, GetPagePC (page));
43
44 ReleaseCodePage (o->runtime, o->page);
45
46 o->page = page;
47 }
48 else result = m3Err_mallocFailedCodePage;
49 }
50
51 return result;
52}
53
54static M3_NOINLINE
55M3Result EmitOp (IM3Compilation o, IM3Operation i_operation)
56{
57 M3Result result = m3Err_none; d_m3Assert (i_operation or IsStackPolymorphic (o));
58
59 // it's OK for page to be null; when compile-walking the bytecode without emitting
60 if (o->page)
61 {
62# if d_m3EnableOpTracing
63 if (i_operation != op_DumpStack)
64 o->numEmits++;
65# endif
66
67 // have execution jump to a new page if slots are critically low
68 result = EnsureCodePageNumLines (o, d_m3CodePageFreeLinesThreshold);
69
70 if (not result)
71 { if (d_m3LogEmit) log_emit (o, i_operation);
72# if d_m3RecordBacktraces
73 EmitMappingEntry (o->page, o->lastOpcodeStart - o->module->wasmStart);
74# endif // d_m3RecordBacktraces
75 EmitWord (o->page, i_operation);
76 }
77 }
78
79 return result;
80}
81
82// Push an immediate constant into the M3 codestream
83static M3_NOINLINE
84void EmitConstant32 (IM3Compilation o, const u32 i_immediate)
85{
86 if (o->page)
87 EmitWord32 (o->page, i_immediate);
88}
89
90static M3_NOINLINE
91void EmitSlotOffset (IM3Compilation o, const i32 i_offset)
92{
93 if (o->page)
94 EmitWord32 (o->page, i_offset);
95}
96
97static M3_NOINLINE
98pc_t EmitPointer (IM3Compilation o, const void * const i_pointer)
99{
100 pc_t ptr = GetPagePC (o->page);
101
102 if (o->page)
103 EmitWord (o->page, i_pointer);
104
105 return ptr;
106}
107
108static M3_NOINLINE
109void * ReservePointer (IM3Compilation o)
110{
111 pc_t ptr = GetPagePC (o->page);
112 EmitPointer (o, NULL);
113 return (void *) ptr;
114}
115
116
117//-------------------------------------------------------------------------------------------------------------------------
118
119#define d_indent " | %s"
120
121// just want less letters and numbers to stare at down the way in the compiler table
122#define i_32 c_m3Type_i32
123#define i_64 c_m3Type_i64
124#define f_32 c_m3Type_f32
125#define f_64 c_m3Type_f64
126#define none c_m3Type_none
127#define any (u8)-1
128
129#if d_m3HasFloat
130# define FPOP(x) x
131#else
132# define FPOP(x) NULL
133#endif
134
135static const IM3Operation c_preserveSetSlot [] = { NULL, op_PreserveSetSlot_i32, op_PreserveSetSlot_i64,
136 FPOP(op_PreserveSetSlot_f32), FPOP(op_PreserveSetSlot_f64) };
137static const IM3Operation c_setSetOps [] = { NULL, op_SetSlot_i32, op_SetSlot_i64,
138 FPOP(op_SetSlot_f32), FPOP(op_SetSlot_f64) };
139static const IM3Operation c_setGlobalOps [] = { NULL, op_SetGlobal_i32, op_SetGlobal_i64,
140 FPOP(op_SetGlobal_f32), FPOP(op_SetGlobal_f64) };
141static const IM3Operation c_setRegisterOps [] = { NULL, op_SetRegister_i32, op_SetRegister_i64,
142 FPOP(op_SetRegister_f32), FPOP(op_SetRegister_f64) };
143
144static const IM3Operation c_intSelectOps [2] [4] = { { op_Select_i32_rss, op_Select_i32_srs, op_Select_i32_ssr, op_Select_i32_sss },
145 { op_Select_i64_rss, op_Select_i64_srs, op_Select_i64_ssr, op_Select_i64_sss } };
146#if d_m3HasFloat
147static const IM3Operation c_fpSelectOps [2] [2] [3] = { { { op_Select_f32_sss, op_Select_f32_srs, op_Select_f32_ssr }, // selector in slot
148 { op_Select_f32_rss, op_Select_f32_rrs, op_Select_f32_rsr } }, // selector in reg
149 { { op_Select_f64_sss, op_Select_f64_srs, op_Select_f64_ssr }, // selector in slot
150 { op_Select_f64_rss, op_Select_f64_rrs, op_Select_f64_rsr } } }; // selector in reg
151#endif
152
153// all args & returns are 64-bit aligned, so use 2 slots for a d_m3Use32BitSlots=1 build
154static const u16 c_ioSlotCount = sizeof (u64) / sizeof (m3slot_t);
155
156static
157M3Result AcquireCompilationCodePage (IM3Compilation o, IM3CodePage * o_codePage)
158{
159 M3Result result = m3Err_none;
160
161 IM3CodePage page = AcquireCodePage (o->runtime);
162
163 if (page)
164 {
165# if (d_m3EnableCodePageRefCounting)
166 {
167 if (o->function)
168 {
169 IM3Function func = o->function;
170 page->info.usageCount++;
171
172 u32 index = func->numCodePageRefs++;
173_ (m3ReallocArray (& func->codePageRefs, IM3CodePage, func->numCodePageRefs, index));
174 func->codePageRefs [index] = page;
175 }
176 }
177# endif
178 }
179 else _throw (m3Err_mallocFailedCodePage);
180
181 _catch:
182
183 * o_codePage = page;
184
185 return result;
186}
187
188static inline
189void ReleaseCompilationCodePage (IM3Compilation o)
190{
191 ReleaseCodePage (o->runtime, o->page);
192}
193
194static inline
195u16 GetTypeNumSlots (u8 i_type)
196{
197# if d_m3Use32BitSlots
198 return Is64BitType (i_type) ? 2 : 1;
199# else
200 return 1;
201# endif
202}
203
204static inline
205void AlignSlotToType (u16 * io_slot, u8 i_type)
206{
207 // align 64-bit words to even slots (if d_m3Use32BitSlots)
208 u16 numSlots = GetTypeNumSlots (i_type);
209
210 u16 mask = numSlots - 1;
211 * io_slot = (* io_slot + mask) & ~mask;
212}
213
214static inline
215i16 GetStackTopIndex (IM3Compilation o)
216{ d_m3Assert (o->stackIndex > o->stackFirstDynamicIndex or IsStackPolymorphic (o));
217 return o->stackIndex - 1;
218}
219
220
221// Items in the static portion of the stack (args/locals) are hidden from GetStackTypeFromTop ()
222// In other words, only "real" Wasm stack items can be inspected. This is important when
223// returning values, etc. and you need an accurate wasm-view of the stack.
224static
225u8 GetStackTypeFromTop (IM3Compilation o, u16 i_offset)
226{
227 u8 type = c_m3Type_none;
228
229 ++i_offset;
230 if (o->stackIndex >= i_offset)
231 {
232 u16 index = o->stackIndex - i_offset;
233
234 if (index >= o->stackFirstDynamicIndex)
235 type = o->typeStack [index];
236 }
237
238 return type;
239}
240
241static inline
242u8 GetStackTopType (IM3Compilation o)
243{
244 return GetStackTypeFromTop (o, 0);
245}
246
247static inline
248u8 GetStackTypeFromBottom (IM3Compilation o, u16 i_offset)
249{
250 u8 type = c_m3Type_none;
251
252 if (i_offset < o->stackIndex)
253 type = o->typeStack [i_offset];
254
255 return type;
256}
257
258
259static inline bool IsConstantSlot (IM3Compilation o, u16 i_slot) { return (i_slot >= o->slotFirstConstIndex and i_slot < o->slotMaxConstIndex); }
260static inline bool IsSlotAllocated (IM3Compilation o, u16 i_slot) { return o->m3Slots [i_slot]; }
261
262static inline
263bool IsStackIndexInRegister (IM3Compilation o, i32 i_stackIndex)
264{ d_m3Assert (i_stackIndex < o->stackIndex or IsStackPolymorphic (o));
265 if (i_stackIndex >= 0 and i_stackIndex < o->stackIndex)
266 return (o->wasmStack [i_stackIndex] >= d_m3Reg0SlotAlias);
267 else
268 return false;
269}
270
271static inline u16 GetNumBlockValuesOnStack (IM3Compilation o) { return o->stackIndex - o->block.blockStackIndex; }
272
273static inline bool IsStackTopInRegister (IM3Compilation o) { return IsStackIndexInRegister (o, (i32) GetStackTopIndex (o)); }
274static inline bool IsStackTopMinus1InRegister (IM3Compilation o) { return IsStackIndexInRegister (o, (i32) GetStackTopIndex (o) - 1); }
275static inline bool IsStackTopMinus2InRegister (IM3Compilation o) { return IsStackIndexInRegister (o, (i32) GetStackTopIndex (o) - 2); }
276
277static inline bool IsStackTopInSlot (IM3Compilation o) { return not IsStackTopInRegister (o); }
278
279static inline bool IsValidSlot (u16 i_slot) { return (i_slot < d_m3MaxFunctionSlots); }
280
281static inline
282u16 GetStackTopSlotNumber (IM3Compilation o)
283{
284 i16 i = GetStackTopIndex (o);
285
286 u16 slot = c_slotUnused;
287
288 if (i >= 0)
289 slot = o->wasmStack [i];
290
291 return slot;
292}
293
294
295// from bottom
296static inline
297u16 GetSlotForStackIndex (IM3Compilation o, u16 i_stackIndex)
298{ d_m3Assert (i_stackIndex < o->stackIndex or IsStackPolymorphic (o));
299 u16 slot = c_slotUnused;
300
301 if (i_stackIndex < o->stackIndex)
302 slot = o->wasmStack [i_stackIndex];
303
304 return slot;
305}
306
307static inline
308u16 GetExtraSlotForStackIndex (IM3Compilation o, u16 i_stackIndex)
309{
310 u16 baseSlot = GetSlotForStackIndex (o, i_stackIndex);
311
312 if (baseSlot != c_slotUnused)
313 {
314 u16 extraSlot = GetTypeNumSlots (GetStackTypeFromBottom (o, i_stackIndex)) - 1;
315 baseSlot += extraSlot;
316 }
317
318 return baseSlot;
319}
320
321
322static inline
323void TouchSlot (IM3Compilation o, u16 i_slot)
324{
325 if (o->function)
326 {
327 // op_Entry uses this value to track and detect stack overflow
328 o->maxStackSlots = M3_MAX (o->maxStackSlots, i_slot + 1);
329 }
330}
331
332static inline
333void MarkSlotAllocated (IM3Compilation o, u16 i_slot)
334{ d_m3Assert (o->m3Slots [i_slot] == 0); // shouldn't be already allocated
335 o->m3Slots [i_slot] = 1;
336
337 o->slotMaxAllocatedIndexPlusOne = M3_MAX (o->slotMaxAllocatedIndexPlusOne, i_slot + 1);
338
339 TouchSlot (o, i_slot);
340}
341
342static inline
343void MarkSlotsAllocated (IM3Compilation o, u16 i_slot, u16 i_numSlots)
344{
345 while (i_numSlots--)
346 MarkSlotAllocated (o, i_slot++);
347}
348
349static inline
350void MarkSlotsAllocatedByType (IM3Compilation o, u16 i_slot, u8 i_type)
351{
352 u16 numSlots = GetTypeNumSlots (i_type);
353 MarkSlotsAllocated (o, i_slot, numSlots);
354}
355
356
357static
358M3Result AllocateSlotsWithinRange (IM3Compilation o, u16 * o_slot, u8 i_type, u16 i_startSlot, u16 i_endSlot)
359{
360 M3Result result = m3Err_functionStackOverflow;
361
362 u16 numSlots = GetTypeNumSlots (i_type);
363 u16 searchOffset = numSlots - 1;
364
365 AlignSlotToType (& i_startSlot, i_type);
366
367 // search for 1 or 2 consecutive slots in the execution stack
368 u16 i = i_startSlot;
369 while (i + searchOffset < i_endSlot)
370 {
371 if (o->m3Slots [i] == 0 and o->m3Slots [i + searchOffset] == 0)
372 {
373 MarkSlotsAllocated (o, i, numSlots);
374
375 * o_slot = i;
376 result = m3Err_none;
377 break;
378 }
379
380 // keep 2-slot allocations even-aligned
381 i += numSlots;
382 }
383
384 return result;
385}
386
387static inline
388M3Result AllocateSlots (IM3Compilation o, u16 * o_slot, u8 i_type)
389{
390 return AllocateSlotsWithinRange (o, o_slot, i_type, o->slotFirstDynamicIndex, d_m3MaxFunctionSlots);
391}
392
393static inline
394M3Result AllocateConstantSlots (IM3Compilation o, u16 * o_slot, u8 i_type)
395{
396 u16 maxTableIndex = o->slotFirstConstIndex + d_m3MaxConstantTableSize;
397 return AllocateSlotsWithinRange (o, o_slot, i_type, o->slotFirstConstIndex, M3_MIN(o->slotFirstDynamicIndex, maxTableIndex));
398}
399
400
401// TOQUE: this usage count system could be eliminated. real world code doesn't frequently trigger it. just copy to multiple
402// unique slots.
403static inline
404M3Result IncrementSlotUsageCount (IM3Compilation o, u16 i_slot)
405{ d_m3Assert (i_slot < d_m3MaxFunctionSlots);
406 M3Result result = m3Err_none; d_m3Assert (o->m3Slots [i_slot] > 0);
407
408 // OPTZ (memory): 'm3Slots' could still be fused with 'typeStack' if 4 bits were used to indicate: [0,1,2,many]. The many-case
409 // would scan 'wasmStack' to determine the actual usage count
410 if (o->m3Slots [i_slot] < 0xFF)
411 {
412 o->m3Slots [i_slot]++;
413 }
414 else result = "slot usage count overflow";
415
416 return result;
417}
418
419static inline
420void DeallocateSlot (IM3Compilation o, i16 i_slot, u8 i_type)
421{ d_m3Assert (i_slot >= o->slotFirstDynamicIndex);
422 d_m3Assert (i_slot < o->slotMaxAllocatedIndexPlusOne);
423 for (u16 i = 0; i < GetTypeNumSlots (i_type); ++i, ++i_slot)
424 { d_m3Assert (o->m3Slots [i_slot]);
425 -- o->m3Slots [i_slot];
426 }
427}
428
429
430static inline
431bool IsRegisterTypeAllocated (IM3Compilation o, u8 i_type)
432{
433 return IsRegisterAllocated (o, IsFpType (i_type));
434}
435
436static inline
437void AllocateRegister (IM3Compilation o, u32 i_register, u16 i_stackIndex)
438{ d_m3Assert (not IsRegisterAllocated (o, i_register));
439 o->regStackIndexPlusOne [i_register] = i_stackIndex + 1;
440}
441
442static inline
443void DeallocateRegister (IM3Compilation o, u32 i_register)
444{ d_m3Assert (IsRegisterAllocated (o, i_register));
445 o->regStackIndexPlusOne [i_register] = c_m3RegisterUnallocated;
446}
447
448static inline
449u16 GetRegisterStackIndex (IM3Compilation o, u32 i_register)
450{ d_m3Assert (IsRegisterAllocated (o, i_register));
451 return o->regStackIndexPlusOne [i_register] - 1;
452}
453
454u16 GetMaxUsedSlotPlusOne (IM3Compilation o)
455{
456 while (o->slotMaxAllocatedIndexPlusOne > o->slotFirstDynamicIndex)
457 {
458 if (IsSlotAllocated (o, o->slotMaxAllocatedIndexPlusOne - 1))
459 break;
460
461 o->slotMaxAllocatedIndexPlusOne--;
462 }
463
464# ifdef DEBUG
465 u16 maxSlot = o->slotMaxAllocatedIndexPlusOne;
466 while (maxSlot < d_m3MaxFunctionSlots)
467 {
468 d_m3Assert (o->m3Slots [maxSlot] == 0);
469 maxSlot++;
470 }
471# endif
472
473 return o->slotMaxAllocatedIndexPlusOne;
474}
475
476static
477M3Result PreserveRegisterIfOccupied (IM3Compilation o, u8 i_registerType)
478{
479 M3Result result = m3Err_none;
480
481 u32 regSelect = IsFpType (i_registerType);
482
483 if (IsRegisterAllocated (o, regSelect))
484 {
485 u16 stackIndex = GetRegisterStackIndex (o, regSelect);
486 DeallocateRegister (o, regSelect);
487
488 u8 type = GetStackTypeFromBottom (o, stackIndex);
489
490 // and point to a exec slot
491 u16 slot = c_slotUnused;
492_ (AllocateSlots (o, & slot, type));
493 o->wasmStack [stackIndex] = slot;
494
495_ (EmitOp (o, c_setSetOps [type]));
496 EmitSlotOffset (o, slot);
497 }
498
499 _catch: return result;
500}
501
502
503// all values must be in slots before entering loop, if, and else blocks
504// otherwise they'd end up preserve-copied in the block to probably different locations (if/else)
505static inline
506M3Result PreserveRegisters (IM3Compilation o)
507{
508 M3Result result;
509
510_ (PreserveRegisterIfOccupied (o, c_m3Type_f64));
511_ (PreserveRegisterIfOccupied (o, c_m3Type_i64));
512
513 _catch: return result;
514}
515
516static
517M3Result PreserveNonTopRegisters (IM3Compilation o)
518{
519 M3Result result = m3Err_none;
520
521 i16 stackTop = GetStackTopIndex (o);
522
523 if (stackTop >= 0)
524 {
525 if (IsRegisterAllocated (o, 0)) // r0
526 {
527 if (GetRegisterStackIndex (o, 0) != stackTop)
528_ (PreserveRegisterIfOccupied (o, c_m3Type_i64));
529 }
530
531 if (IsRegisterAllocated (o, 1)) // fp0
532 {
533 if (GetRegisterStackIndex (o, 1) != stackTop)
534_ (PreserveRegisterIfOccupied (o, c_m3Type_f64));
535 }
536 }
537
538 _catch: return result;
539}
540
541
542//----------------------------------------------------------------------------------------------------------------------
543
544static
545M3Result Push (IM3Compilation o, u8 i_type, u16 i_slot)
546{
547 M3Result result = m3Err_none;
548
549#if !d_m3HasFloat
550 if (i_type == c_m3Type_f32 || i_type == c_m3Type_f64) {
551 return m3Err_unknownOpcode;
552 }
553#endif
554
555 u16 stackIndex = o->stackIndex++; // printf ("push: %d\n", (i32) i);
556
557 if (stackIndex < d_m3MaxFunctionStackHeight)
558 {
559 o->wasmStack [stackIndex] = i_slot;
560 o->typeStack [stackIndex] = i_type;
561
562 if (IsRegisterSlotAlias (i_slot))
563 {
564 u32 regSelect = IsFpRegisterSlotAlias (i_slot);
565 AllocateRegister (o, regSelect, stackIndex);
566 }
567
568 if (d_m3LogWasmStack) dump_type_stack (o);
569 }
570 else result = m3Err_functionStackOverflow;
571
572 return result;
573}
574
575static inline
576M3Result PushRegister (IM3Compilation o, u8 i_type)
577{
578 M3Result result = m3Err_none; d_m3Assert ((u16) d_m3Reg0SlotAlias > (u16) d_m3MaxFunctionSlots);
579 u16 slot = IsFpType (i_type) ? d_m3Fp0SlotAlias : d_m3Reg0SlotAlias; d_m3Assert (i_type or IsStackPolymorphic (o));
580
581_ (Push (o, i_type, slot));
582
583 _catch: return result;
584}
585
586static
587M3Result Pop (IM3Compilation o)
588{
589 M3Result result = m3Err_none;
590
591 if (o->stackIndex > o->block.blockStackIndex)
592 {
593 o->stackIndex--; // printf ("pop: %d\n", (i32) o->stackIndex);
594
595 u16 slot = o->wasmStack [o->stackIndex];
596 u8 type = o->typeStack [o->stackIndex];
597
598 if (IsRegisterSlotAlias (slot))
599 {
600 u32 regSelect = IsFpRegisterSlotAlias (slot);
601 DeallocateRegister (o, regSelect);
602 }
603 else if (slot >= o->slotFirstDynamicIndex)
604 {
605 DeallocateSlot (o, slot, type);
606 }
607 }
608 else if (not IsStackPolymorphic (o))
609 result = m3Err_functionStackUnderrun;
610
611 return result;
612}
613
614static
615M3Result PopType (IM3Compilation o, u8 i_type)
616{
617 M3Result result = m3Err_none;
618
619 u8 topType = GetStackTopType (o);
620
621 if (i_type == topType or o->block.isPolymorphic)
622 {
623_ (Pop (o));
624 }
625 else _throw (m3Err_typeMismatch);
626
627 _catch:
628 return result;
629}
630
631static
632M3Result _PushAllocatedSlotAndEmit (IM3Compilation o, u8 i_type, bool i_doEmit)
633{
634 M3Result result = m3Err_none;
635
636 u16 slot = c_slotUnused;
637
638_ (AllocateSlots (o, & slot, i_type));
639_ (Push (o, i_type, slot));
640
641 if (i_doEmit)
642 EmitSlotOffset (o, slot);
643
644// printf ("push: %d\n", (u32) slot);
645
646 _catch: return result;
647}
648
649static inline
650M3Result PushAllocatedSlotAndEmit (IM3Compilation o, u8 i_type)
651{
652 return _PushAllocatedSlotAndEmit (o, i_type, true);
653}
654
655static inline
656M3Result PushAllocatedSlot (IM3Compilation o, u8 i_type)
657{
658 return _PushAllocatedSlotAndEmit (o, i_type, false);
659}
660
661static
662M3Result PushConst (IM3Compilation o, u64 i_word, u8 i_type)
663{
664 M3Result result = m3Err_none;
665
666 // Early-exit if we're not emitting
667 if (!o->page) return result;
668
669 bool matchFound = false;
670 bool is64BitType = Is64BitType (i_type);
671
672 u16 numRequiredSlots = GetTypeNumSlots (i_type);
673 u16 numUsedConstSlots = o->slotMaxConstIndex - o->slotFirstConstIndex;
674
675 // search for duplicate matching constant slot to reuse
676 if (numRequiredSlots == 2 and numUsedConstSlots >= 2)
677 {
678 u16 firstConstSlot = o->slotFirstConstIndex;
679 AlignSlotToType (& firstConstSlot, c_m3Type_i64);
680
681 for (u16 slot = firstConstSlot; slot < o->slotMaxConstIndex - 1; slot += 2)
682 {
683 if (IsSlotAllocated (o, slot) and IsSlotAllocated (o, slot + 1))
684 {
685 u64 constant = * (u64 *) & o->constants [slot - o->slotFirstConstIndex];
686
687 if (constant == i_word)
688 {
689 matchFound = true;
690_ (Push (o, i_type, slot));
691 break;
692 }
693 }
694 }
695 }
696 else if (numRequiredSlots == 1)
697 {
698 for (u16 i = 0; i < numUsedConstSlots; ++i)
699 {
700 u16 slot = o->slotFirstConstIndex + i;
701
702 if (IsSlotAllocated (o, slot))
703 {
704 u64 constant;
705 if (is64BitType) {
706 constant = * (u64 *) & o->constants [i];
707 } else {
708 constant = * (u32 *) & o->constants [i];
709 }
710 if (constant == i_word)
711 {
712 matchFound = true;
713_ (Push (o, i_type, slot));
714 break;
715 }
716 }
717 }
718 }
719
720 if (not matchFound)
721 {
722 u16 slot = c_slotUnused;
723 result = AllocateConstantSlots (o, & slot, i_type);
724
725 if (result) // no more constant table space; use inline constants
726 {
727 result = m3Err_none;
728
729 if (is64BitType) {
730_ (EmitOp (o, op_Const64));
731 EmitWord64 (o->page, i_word);
732 } else {
733_ (EmitOp (o, op_Const32));
734 EmitWord32 (o->page, (u32) i_word);
735 }
736
737_ (PushAllocatedSlotAndEmit (o, i_type));
738 }
739 else
740 {
741 u16 constTableIndex = slot - o->slotFirstConstIndex;
742
743 d_m3Assert(constTableIndex < d_m3MaxConstantTableSize);
744
745 if (is64BitType)
746 {
747 u64 * constant = (u64 *) & o->constants [constTableIndex];
748 * constant = i_word;
749 }
750 else
751 {
752 u32 * constant = (u32 *) & o->constants [constTableIndex];
753 * constant = (u32) i_word;
754 }
755
756_ (Push (o, i_type, slot));
757
758 o->slotMaxConstIndex = M3_MAX (slot + numRequiredSlots, o->slotMaxConstIndex);
759 }
760 }
761
762 _catch: return result;
763}
764
765static inline
766M3Result EmitSlotNumOfStackTopAndPop (IM3Compilation o)
767{
768 // no emit if value is in register
769 if (IsStackTopInSlot (o))
770 EmitSlotOffset (o, GetStackTopSlotNumber (o));
771
772 return Pop (o);
773}
774
775
776// Or, maybe: EmitTrappingOp
777M3Result AddTrapRecord (IM3Compilation o)
778{
779 M3Result result = m3Err_none;
780
781 if (o->function)
782 {
783 }
784
785 return result;
786}
787
788static
789M3Result UnwindBlockStack (IM3Compilation o)
790{
791 M3Result result = m3Err_none;
792
793 u32 popCount = 0;
794 while (o->stackIndex > o->block.blockStackIndex)
795 {
796_ (Pop (o));
797 ++popCount;
798 }
799
800 if (popCount)
801 {
802 m3log (compile, "unwound stack top: %d", popCount);
803 }
804
805 _catch: return result;
806}
807
808static inline
809M3Result SetStackPolymorphic (IM3Compilation o)
810{
811 o->block.isPolymorphic = true; m3log (compile, "stack set polymorphic");
812 return UnwindBlockStack (o);
813}
814
815static
816void PatchBranches (IM3Compilation o)
817{
818 pc_t pc = GetPC (o);
819
820 pc_t patches = o->block.patches;
821 o->block.patches = NULL;
822
823 while (patches)
824 { m3log (compile, "patching location: %p to pc: %p", patches, pc);
825 pc_t next = * (pc_t *) patches;
826 * (pc_t *) patches = pc;
827 patches = next;
828 }
829}
830
831//-------------------------------------------------------------------------------------------------------------------------
832
833static
834M3Result CopyStackIndexToSlot (IM3Compilation o, u16 i_destSlot, u16 i_stackIndex) // NoPushPop
835{
836 M3Result result = m3Err_none;
837
838 IM3Operation op;
839
840 u8 type = GetStackTypeFromBottom (o, i_stackIndex);
841 bool inRegister = IsStackIndexInRegister (o, i_stackIndex);
842
843 if (inRegister)
844 {
845 op = c_setSetOps [type];
846 }
847 else op = Is64BitType (type) ? op_CopySlot_64 : op_CopySlot_32;
848
849_ (EmitOp (o, op));
850 EmitSlotOffset (o, i_destSlot);
851
852 if (not inRegister)
853 {
854 u16 srcSlot = GetSlotForStackIndex (o, i_stackIndex);
855 EmitSlotOffset (o, srcSlot);
856 }
857
858 _catch: return result;
859}
860
861static
862M3Result CopyStackTopToSlot (IM3Compilation o, u16 i_destSlot) // NoPushPop
863{
864 M3Result result;
865
866 i16 stackTop = GetStackTopIndex (o);
867_ (CopyStackIndexToSlot (o, i_destSlot, (u16) stackTop));
868
869 _catch: return result;
870}
871
872
873// a copy-on-write strategy is used with locals. when a get local occurs, it's not copied anywhere. the stack
874// entry just has a index pointer to that local memory slot.
875// then, when a previously referenced local is set, the current value needs to be preserved for those references
876
877// TODO: consider getting rid of these specialized operations: PreserveSetSlot & PreserveCopySlot.
878// They likely just take up space (which seems to reduce performance) without improving performance.
879static
880M3Result PreservedCopyTopSlot (IM3Compilation o, u16 i_destSlot, u16 i_preserveSlot)
881{
882 M3Result result = m3Err_none; d_m3Assert (i_destSlot != i_preserveSlot);
883
884 IM3Operation op;
885
886 u8 type = GetStackTopType (o);
887
888 if (IsStackTopInRegister (o))
889 {
890 op = c_preserveSetSlot [type];
891 }
892 else op = Is64BitType (type) ? op_PreserveCopySlot_64 : op_PreserveCopySlot_32;
893
894_ (EmitOp (o, op));
895 EmitSlotOffset (o, i_destSlot);
896
897 if (IsStackTopInSlot (o))
898 EmitSlotOffset (o, GetStackTopSlotNumber (o));
899
900 EmitSlotOffset (o, i_preserveSlot);
901
902 _catch: return result;
903}
904
905static
906M3Result CopyStackTopToRegister (IM3Compilation o, bool i_updateStack)
907{
908 M3Result result = m3Err_none;
909
910 if (IsStackTopInSlot (o))
911 {
912 u8 type = GetStackTopType (o);
913
914_ (PreserveRegisterIfOccupied (o, type));
915
916 IM3Operation op = c_setRegisterOps [type];
917
918_ (EmitOp (o, op));
919 EmitSlotOffset (o, GetStackTopSlotNumber (o));
920
921 if (i_updateStack)
922 {
923_ (PopType (o, type));
924_ (PushRegister (o, type));
925 }
926 }
927
928 _catch: return result;
929}
930
931
932// if local is unreferenced, o_preservedSlotNumber will be equal to localIndex on return
933static
934M3Result FindReferencedLocalWithinCurrentBlock (IM3Compilation o, u16 * o_preservedSlotNumber, u32 i_localSlot)
935{
936 M3Result result = m3Err_none;
937
938 IM3CompilationScope scope = & o->block;
939 i16 startIndex = scope->blockStackIndex;
940
941 while (scope->opcode == c_waOp_block)
942 {
943 scope = scope->outer;
944 if (not scope)
945 break;
946
947 startIndex = scope->blockStackIndex;
948 }
949
950 * o_preservedSlotNumber = (u16) i_localSlot;
951
952 for (u32 i = startIndex; i < o->stackIndex; ++i)
953 {
954 if (o->wasmStack [i] == i_localSlot)
955 {
956 if (* o_preservedSlotNumber == i_localSlot)
957 {
958 u8 type = GetStackTypeFromBottom (o, i); d_m3Assert (type != c_m3Type_none)
959
960_ (AllocateSlots (o, o_preservedSlotNumber, type));
961 }
962 else
963_ (IncrementSlotUsageCount (o, * o_preservedSlotNumber));
964
965 o->wasmStack [i] = * o_preservedSlotNumber;
966 }
967 }
968
969 _catch: return result;
970}
971
972static
973M3Result GetBlockScope (IM3Compilation o, IM3CompilationScope * o_scope, u32 i_depth)
974{
975 M3Result result = m3Err_none;
976
977 IM3CompilationScope scope = & o->block;
978
979 while (i_depth--)
980 {
981 scope = scope->outer;
982 _throwif ("invalid block depth", not scope);
983 }
984
985 * o_scope = scope;
986
987 _catch:
988 return result;
989}
990
991static
992M3Result CopyStackSlotsR (IM3Compilation o, u16 i_targetSlotStackIndex, u16 i_stackIndex, u16 i_endStackIndex, u16 i_tempSlot)
993{
994 M3Result result = m3Err_none;
995
996 if (i_stackIndex < i_endStackIndex)
997 {
998 u16 srcSlot = GetSlotForStackIndex (o, i_stackIndex);
999
1000 u8 type = GetStackTypeFromBottom (o, i_stackIndex);
1001 u16 numSlots = GetTypeNumSlots (type);
1002 u16 extraSlot = numSlots - 1;
1003
1004 u16 targetSlot = GetSlotForStackIndex (o, i_targetSlotStackIndex);
1005
1006 u16 preserveIndex = i_stackIndex;
1007 u16 collisionSlot = srcSlot;
1008
1009 if (targetSlot != srcSlot)
1010 {
1011 // search for collisions
1012 u16 checkIndex = i_stackIndex + 1;
1013 while (checkIndex < i_endStackIndex)
1014 {
1015 u16 otherSlot1 = GetSlotForStackIndex (o, checkIndex);
1016 u16 otherSlot2 = GetExtraSlotForStackIndex (o, checkIndex);
1017
1018 if (targetSlot == otherSlot1 or
1019 targetSlot == otherSlot2 or
1020 targetSlot + extraSlot == otherSlot1)
1021 {
1022 _throwif (m3Err_functionStackOverflow, i_tempSlot >= d_m3MaxFunctionSlots);
1023
1024_ (CopyStackIndexToSlot (o, i_tempSlot, checkIndex));
1025 o->wasmStack [checkIndex] = i_tempSlot;
1026 i_tempSlot += GetTypeNumSlots (c_m3Type_i64);
1027 TouchSlot (o, i_tempSlot - 1);
1028
1029 // restore this on the way back down
1030 preserveIndex = checkIndex;
1031 collisionSlot = otherSlot1;
1032
1033 break;
1034 }
1035
1036 ++checkIndex;
1037 }
1038
1039_ (CopyStackIndexToSlot (o, targetSlot, i_stackIndex)); m3log (compile, " copying slot: %d to slot: %d", srcSlot, targetSlot);
1040 o->wasmStack [i_stackIndex] = targetSlot;
1041
1042 }
1043
1044_ (CopyStackSlotsR (o, i_targetSlotStackIndex + 1, i_stackIndex + 1, i_endStackIndex, i_tempSlot));
1045
1046 // restore the stack state
1047 o->wasmStack [i_stackIndex] = srcSlot;
1048 o->wasmStack [preserveIndex] = collisionSlot;
1049 }
1050
1051 _catch:
1052 return result;
1053}
1054
1055static
1056M3Result ResolveBlockResults (IM3Compilation o, IM3CompilationScope i_targetBlock, bool i_isBranch)
1057{
1058 M3Result result = m3Err_none; if (d_m3LogWasmStack) dump_type_stack (o);
1059
1060 bool isLoop = (i_targetBlock->opcode == c_waOp_loop and i_isBranch);
1061
1062 u16 numParams = GetFuncTypeNumParams (i_targetBlock->type);
1063 u16 numResults = GetFuncTypeNumResults (i_targetBlock->type);
1064
1065 u16 slotRecords = i_targetBlock->exitStackIndex;
1066
1067 u16 numValues;
1068
1069 if (not isLoop)
1070 {
1071 numValues = numResults;
1072 slotRecords += numParams;
1073 }
1074 else numValues = numParams;
1075
1076 u16 blockHeight = GetNumBlockValuesOnStack (o);
1077
1078 _throwif (m3Err_typeCountMismatch, i_isBranch ? (blockHeight < numValues) : (blockHeight != numValues));
1079
1080 if (numValues)
1081 {
1082 u16 endIndex = GetStackTopIndex (o) + 1;
1083
1084 if (not isLoop and IsFpType (GetStackTopType (o)))
1085 {
1086_ (CopyStackTopToRegister (o, false));
1087 --endIndex;
1088 }
1089
1090 // TODO: tempslot affects maxStackSlots, so can grow unnecess each time.
1091 u16 tempSlot = o->maxStackSlots;// GetMaxUsedSlotPlusOne (o); doesn't work cause can collide with slotRecords
1092 AlignSlotToType (& tempSlot, c_m3Type_i64);
1093
1094_ (CopyStackSlotsR (o, slotRecords, endIndex - numValues, endIndex, tempSlot));
1095
1096 if (d_m3LogWasmStack) dump_type_stack (o);
1097 }
1098
1099 _catch: return result;
1100}
1101
1102
1103static
1104M3Result ReturnValues (IM3Compilation o, IM3CompilationScope i_functionBlock, bool i_isBranch)
1105{
1106 M3Result result = m3Err_none; if (d_m3LogWasmStack) dump_type_stack (o);
1107
1108 u16 numReturns = GetFuncTypeNumResults (i_functionBlock->type); // could just o->function too...
1109 u16 blockHeight = GetNumBlockValuesOnStack (o);
1110
1111 if (not IsStackPolymorphic (o))
1112 _throwif (m3Err_typeCountMismatch, i_isBranch ? (blockHeight < numReturns) : (blockHeight != numReturns));
1113
1114 if (numReturns)
1115 {
1116 // return slots like args are 64-bit aligned
1117 u16 returnSlot = numReturns * c_ioSlotCount;
1118 u16 stackTop = GetStackTopIndex (o);
1119
1120 for (u16 i = 0; i < numReturns; ++i)
1121 {
1122 u8 returnType = GetFuncTypeResultType (i_functionBlock->type, numReturns - 1 - i);
1123
1124 u8 stackType = GetStackTypeFromTop (o, i); // using FromTop so that only dynamic items are checked
1125
1126 if (IsStackPolymorphic (o) and stackType == c_m3Type_none)
1127 stackType = returnType;
1128
1129 _throwif (m3Err_typeMismatch, returnType != stackType);
1130
1131 if (not IsStackPolymorphic (o))
1132 {
1133 returnSlot -= c_ioSlotCount;
1134_ (CopyStackIndexToSlot (o, returnSlot, stackTop--));
1135 }
1136 }
1137
1138 if (not i_isBranch)
1139 {
1140 while (numReturns--)
1141_ (Pop (o));
1142 }
1143 }
1144
1145 _catch: return result;
1146}
1147
1148
1149//-------------------------------------------------------------------------------------------------------------------------
1150
1151static
1152M3Result Compile_Const_i32 (IM3Compilation o, m3opcode_t i_opcode)
1153{
1154 M3Result result;
1155
1156 i32 value;
1157_ (ReadLEB_i32 (& value, & o->wasm, o->wasmEnd));
1158_ (PushConst (o, value, c_m3Type_i32)); m3log (compile, d_indent " (const i32 = %" PRIi32 ")", get_indention_string (o), value);
1159 _catch: return result;
1160}
1161
1162static
1163M3Result Compile_Const_i64 (IM3Compilation o, m3opcode_t i_opcode)
1164{
1165 M3Result result;
1166
1167 i64 value;
1168_ (ReadLEB_i64 (& value, & o->wasm, o->wasmEnd));
1169_ (PushConst (o, value, c_m3Type_i64)); m3log (compile, d_indent " (const i64 = %" PRIi64 ")", get_indention_string (o), value);
1170 _catch: return result;
1171}
1172
1173
1174#if d_m3ImplementFloat
1175static
1176M3Result Compile_Const_f32 (IM3Compilation o, m3opcode_t i_opcode)
1177{
1178 M3Result result;
1179
1180 union { u32 u; f32 f; } value = { 0 };
1181
1182_ (Read_f32 (& value.f, & o->wasm, o->wasmEnd)); m3log (compile, d_indent " (const f32 = %" PRIf32 ")", get_indention_string (o), value.f);
1183_ (PushConst (o, value.u, c_m3Type_f32));
1184
1185 _catch: return result;
1186}
1187
1188static
1189M3Result Compile_Const_f64 (IM3Compilation o, m3opcode_t i_opcode)
1190{
1191 M3Result result;
1192
1193 union { u64 u; f64 f; } value = { 0 };
1194
1195_ (Read_f64 (& value.f, & o->wasm, o->wasmEnd)); m3log (compile, d_indent " (const f64 = %" PRIf64 ")", get_indention_string (o), value.f);
1196_ (PushConst (o, value.u, c_m3Type_f64));
1197
1198 _catch: return result;
1199}
1200#endif
1201
1202#if d_m3CascadedOpcodes
1203
1204static
1205M3Result Compile_ExtendedOpcode (IM3Compilation o, m3opcode_t i_opcode)
1206{
1207_try {
1208 u8 opcode;
1209_ (Read_u8 (& opcode, & o->wasm, o->wasmEnd)); m3log (compile, d_indent " (FC: %" PRIi32 ")", get_indention_string (o), opcode);
1210
1211 i_opcode = (i_opcode << 8) | opcode;
1212
1213 //printf("Extended opcode: 0x%x\n", i_opcode);
1214
1215 IM3OpInfo opInfo = GetOpInfo (i_opcode);
1216 _throwif (m3Err_unknownOpcode, not opInfo);
1217
1218 M3Compiler compiler = opInfo->compiler;
1219 _throwif (m3Err_noCompiler, not compiler);
1220
1221_ ((* compiler) (o, i_opcode));
1222
1223 o->previousOpcode = i_opcode;
1224
1225 } _catch: return result;
1226}
1227#endif
1228
1229static
1230M3Result Compile_Return (IM3Compilation o, m3opcode_t i_opcode)
1231{
1232 M3Result result = m3Err_none;
1233
1234 if (not IsStackPolymorphic (o))
1235 {
1236 IM3CompilationScope functionScope;
1237_ (GetBlockScope (o, & functionScope, o->block.depth));
1238
1239_ (ReturnValues (o, functionScope, true));
1240
1241_ (EmitOp (o, op_Return));
1242
1243_ (SetStackPolymorphic (o));
1244 }
1245
1246 _catch: return result;
1247}
1248
1249static
1250M3Result ValidateBlockEnd (IM3Compilation o)
1251{
1252 M3Result result = m3Err_none;
1253/*
1254 u16 numResults = GetFuncTypeNumResults (o->block.type);
1255 u16 blockHeight = GetNumBlockValuesOnStack (o);
1256
1257 if (IsStackPolymorphic (o))
1258 {
1259 }
1260 else
1261 {
1262 }
1263
1264 _catch: */ return result;
1265}
1266
1267static
1268M3Result Compile_End (IM3Compilation o, m3opcode_t i_opcode)
1269{
1270 M3Result result = m3Err_none; //dump_type_stack (o);
1271
1272 // function end:
1273 if (o->block.depth == 0)
1274 {
1275 ValidateBlockEnd (o);
1276
1277// if (not IsStackPolymorphic (o))
1278 {
1279 if (o->function)
1280 {
1281_ (ReturnValues (o, & o->block, false));
1282 }
1283
1284_ (EmitOp (o, op_Return));
1285 }
1286 }
1287
1288 _catch: return result;
1289}
1290
1291
1292static
1293M3Result Compile_SetLocal (IM3Compilation o, m3opcode_t i_opcode)
1294{
1295 M3Result result;
1296
1297 u32 localIndex;
1298_ (ReadLEB_u32 (& localIndex, & o->wasm, o->wasmEnd)); // printf ("--- set local: %d \n", localSlot);
1299
1300 if (localIndex < GetFunctionNumArgsAndLocals (o->function))
1301 {
1302 u16 localSlot = GetSlotForStackIndex (o, localIndex);
1303
1304 u16 preserveSlot;
1305_ (FindReferencedLocalWithinCurrentBlock (o, & preserveSlot, localSlot)); // preserve will be different than local, if referenced
1306
1307 if (preserveSlot == localSlot)
1308_ (CopyStackTopToSlot (o, localSlot))
1309 else
1310_ (PreservedCopyTopSlot (o, localSlot, preserveSlot))
1311
1312 if (i_opcode != c_waOp_teeLocal)
1313_ (Pop (o));
1314 }
1315 else _throw ("local index out of bounds");
1316
1317 _catch: return result;
1318}
1319
1320static
1321M3Result Compile_GetLocal (IM3Compilation o, m3opcode_t i_opcode)
1322{
1323_try {
1324
1325 u32 localIndex;
1326_ (ReadLEB_u32 (& localIndex, & o->wasm, o->wasmEnd));
1327
1328 if (localIndex >= GetFunctionNumArgsAndLocals (o->function))
1329 _throw ("local index out of bounds");
1330
1331 u8 type = GetStackTypeFromBottom (o, localIndex);
1332 u16 slot = GetSlotForStackIndex (o, localIndex);
1333
1334_ (Push (o, type, slot));
1335
1336 } _catch: return result;
1337}
1338
1339static
1340M3Result Compile_GetGlobal (IM3Compilation o, M3Global * i_global)
1341{
1342 M3Result result;
1343
1344 IM3Operation op = Is64BitType (i_global->type) ? op_GetGlobal_s64 : op_GetGlobal_s32;
1345_ (EmitOp (o, op));
1346 EmitPointer (o, & i_global->intValue);
1347_ (PushAllocatedSlotAndEmit (o, i_global->type));
1348
1349 _catch: return result;
1350}
1351
1352static
1353M3Result Compile_SetGlobal (IM3Compilation o, M3Global * i_global)
1354{
1355 M3Result result = m3Err_none;
1356
1357 if (i_global->isMutable)
1358 {
1359 IM3Operation op;
1360 u8 type = GetStackTopType (o);
1361
1362 if (IsStackTopInRegister (o))
1363 {
1364 op = c_setGlobalOps [type];
1365 }
1366 else op = Is64BitType (type) ? op_SetGlobal_s64 : op_SetGlobal_s32;
1367
1368_ (EmitOp (o, op));
1369 EmitPointer (o, & i_global->intValue);
1370
1371 if (IsStackTopInSlot (o))
1372 EmitSlotOffset (o, GetStackTopSlotNumber (o));
1373
1374_ (Pop (o));
1375 }
1376 else _throw (m3Err_settingImmutableGlobal);
1377
1378 _catch: return result;
1379}
1380
1381static
1382M3Result Compile_GetSetGlobal (IM3Compilation o, m3opcode_t i_opcode)
1383{
1384 M3Result result = m3Err_none;
1385
1386 u32 globalIndex;
1387_ (ReadLEB_u32 (& globalIndex, & o->wasm, o->wasmEnd));
1388
1389 if (globalIndex < o->module->numGlobals)
1390 {
1391 if (o->module->globals)
1392 {
1393 M3Global * global = & o->module->globals [globalIndex];
1394
1395_ ((i_opcode == c_waOp_getGlobal) ? Compile_GetGlobal (o, global) : Compile_SetGlobal (o, global));
1396 }
1397 else _throw (ErrorCompile (m3Err_globalMemoryNotAllocated, o, "module '%s' is missing global memory", o->module->name));
1398 }
1399 else _throw (m3Err_globaIndexOutOfBounds);
1400
1401 _catch: return result;
1402}
1403
1404static
1405void EmitPatchingBranchPointer (IM3Compilation o, IM3CompilationScope i_scope)
1406{
1407 pc_t patch = EmitPointer (o, i_scope->patches); m3log (compile, "branch patch required at: %p", patch);
1408 i_scope->patches = patch;
1409}
1410
1411static
1412M3Result EmitPatchingBranch (IM3Compilation o, IM3CompilationScope i_scope)
1413{
1414 M3Result result = m3Err_none;
1415
1416_ (EmitOp (o, op_Branch));
1417 EmitPatchingBranchPointer (o, i_scope);
1418
1419 _catch: return result;
1420}
1421
1422static
1423M3Result Compile_Branch (IM3Compilation o, m3opcode_t i_opcode)
1424{
1425 M3Result result;
1426
1427 u32 depth;
1428_ (ReadLEB_u32 (& depth, & o->wasm, o->wasmEnd));
1429
1430 IM3CompilationScope scope;
1431_ (GetBlockScope (o, & scope, depth));
1432
1433 // branch target is a loop (continue)
1434 if (scope->opcode == c_waOp_loop)
1435 {
1436 if (i_opcode == c_waOp_branchIf)
1437 {
1438 if (GetFuncTypeNumParams (scope->type))
1439 {
1440 IM3Operation op = IsStackTopInRegister (o) ? op_BranchIfPrologue_r : op_BranchIfPrologue_s;
1441
1442_ (EmitOp (o, op));
1443_ (EmitSlotNumOfStackTopAndPop (o));
1444
1445 pc_t * jumpTo = (pc_t *) ReservePointer (o);
1446
1447_ (ResolveBlockResults (o, scope, /* isBranch: */ true));
1448
1449_ (EmitOp (o, op_ContinueLoop));
1450 EmitPointer (o, scope->pc);
1451
1452 * jumpTo = GetPC (o);
1453 }
1454 else
1455 {
1456 // move the condition to a register
1457_ (CopyStackTopToRegister (o, false));
1458_ (PopType (o, c_m3Type_i32));
1459
1460_ (EmitOp (o, op_ContinueLoopIf));
1461 EmitPointer (o, scope->pc);
1462 }
1463
1464// dump_type_stack(o);
1465 }
1466 else // is c_waOp_branch
1467 {
1468 _ (EmitOp (o, op_ContinueLoop));
1469 EmitPointer (o, scope->pc);
1470 o->block.isPolymorphic = true;
1471 }
1472 }
1473 else // forward branch
1474 {
1475 pc_t * jumpTo = NULL;
1476
1477 bool isReturn = (scope->depth == 0);
1478 bool targetHasResults = GetFuncTypeNumResults (scope->type);
1479
1480 if (i_opcode == c_waOp_branchIf)
1481 {
1482 if (targetHasResults or isReturn)
1483 {
1484 IM3Operation op = IsStackTopInRegister (o) ? op_BranchIfPrologue_r : op_BranchIfPrologue_s;
1485
1486 _ (EmitOp (o, op));
1487 _ (EmitSlotNumOfStackTopAndPop (o)); // condition
1488
1489 // this is continuation point, if the branch isn't taken
1490 jumpTo = (pc_t *) ReservePointer (o);
1491 }
1492 else
1493 {
1494 IM3Operation op = IsStackTopInRegister (o) ? op_BranchIf_r : op_BranchIf_s;
1495
1496 _ (EmitOp (o, op));
1497 _ (EmitSlotNumOfStackTopAndPop (o)); // condition
1498
1499 EmitPatchingBranchPointer (o, scope);
1500 goto _catch;
1501 }
1502 }
1503
1504 if (not IsStackPolymorphic (o))
1505 {
1506 if (isReturn)
1507 {
1508_ (ReturnValues (o, scope, true));
1509_ (EmitOp (o, op_Return));
1510 }
1511 else
1512 {
1513_ (ResolveBlockResults (o, scope, true));
1514_ (EmitPatchingBranch (o, scope));
1515 }
1516 }
1517
1518 if (jumpTo)
1519 {
1520 * jumpTo = GetPC (o);
1521 }
1522
1523 if (i_opcode == c_waOp_branch)
1524_ (SetStackPolymorphic (o));
1525 }
1526
1527 _catch: return result;
1528}
1529
1530static
1531M3Result Compile_BranchTable (IM3Compilation o, m3opcode_t i_opcode)
1532{
1533_try {
1534 u32 targetCount;
1535_ (ReadLEB_u32 (& targetCount, & o->wasm, o->wasmEnd));
1536
1537_ (PreserveRegisterIfOccupied (o, c_m3Type_i64)); // move branch operand to a slot
1538 u16 slot = GetStackTopSlotNumber (o);
1539_ (Pop (o));
1540
1541 // OPTZ: according to spec: "forward branches that target a control instruction with a non-empty
1542 // result type consume matching operands first and push them back on the operand stack after unwinding"
1543 // So, this move-to-reg is only necessary if the target scopes have a type.
1544
1545 u32 numCodeLines = targetCount + 4; // 3 => IM3Operation + slot + target_count + default_target
1546_ (EnsureCodePageNumLines (o, numCodeLines));
1547
1548_ (EmitOp (o, op_BranchTable));
1549 EmitSlotOffset (o, slot);
1550 EmitConstant32 (o, targetCount);
1551
1552 IM3CodePage continueOpPage = NULL;
1553
1554 ++targetCount; // include default
1555 for (u32 i = 0; i < targetCount; ++i)
1556 {
1557 u32 target;
1558_ (ReadLEB_u32 (& target, & o->wasm, o->wasmEnd));
1559
1560 IM3CompilationScope scope;
1561_ (GetBlockScope (o, & scope, target));
1562
1563 // TODO: don't need codepage rigmarole for
1564 // no-param forward-branch targets
1565
1566_ (AcquireCompilationCodePage (o, & continueOpPage));
1567
1568 pc_t startPC = GetPagePC (continueOpPage);
1569 IM3CodePage savedPage = o->page;
1570 o->page = continueOpPage;
1571
1572 if (scope->opcode == c_waOp_loop)
1573 {
1574_ (ResolveBlockResults (o, scope, true));
1575
1576_ (EmitOp (o, op_ContinueLoop));
1577 EmitPointer (o, scope->pc);
1578 }
1579 else
1580 {
1581 // TODO: this could be fused with equivalent targets
1582 if (not IsStackPolymorphic (o))
1583 {
1584 if (scope->depth == 0)
1585 {
1586_ (ReturnValues (o, scope, true));
1587_ (EmitOp (o, op_Return));
1588 }
1589 else
1590 {
1591_ (ResolveBlockResults (o, scope, true));
1592
1593_ (EmitPatchingBranch (o, scope));
1594 }
1595 }
1596 }
1597
1598 ReleaseCompilationCodePage (o); // FIX: continueOpPage can get lost if thrown
1599 o->page = savedPage;
1600
1601 EmitPointer (o, startPC);
1602 }
1603
1604_ (SetStackPolymorphic (o));
1605
1606 }
1607
1608 _catch: return result;
1609}
1610
1611static
1612M3Result CompileCallArgsAndReturn (IM3Compilation o, u16 * o_stackOffset, IM3FuncType i_type, bool i_isIndirect)
1613{
1614_try {
1615
1616 u16 topSlot = GetMaxUsedSlotPlusOne (o);
1617
1618 // force use of at least one stack slot; this is to help ensure
1619 // the m3 stack overflows (and traps) before the native stack can overflow.
1620 // e.g. see Wasm spec test 'runaway' in call.wast
1621 topSlot = M3_MAX (1, topSlot);
1622
1623 // stack frame is 64-bit aligned
1624 AlignSlotToType (& topSlot, c_m3Type_i64);
1625
1626 * o_stackOffset = topSlot;
1627
1628 // wait to pop this here so that topSlot search is correct
1629 if (i_isIndirect)
1630_ (Pop (o));
1631
1632 u16 numArgs = GetFuncTypeNumParams (i_type);
1633 u16 numRets = GetFuncTypeNumResults (i_type);
1634
1635 u16 argTop = topSlot + (numArgs + numRets) * c_ioSlotCount;
1636
1637 while (numArgs--)
1638 {
1639_ (CopyStackTopToSlot (o, argTop -= c_ioSlotCount));
1640_ (Pop (o));
1641 }
1642
1643 u16 i = 0;
1644 while (numRets--)
1645 {
1646 u8 type = GetFuncTypeResultType (i_type, i++);
1647
1648_ (Push (o, type, topSlot));
1649 MarkSlotsAllocatedByType (o, topSlot, type);
1650
1651 topSlot += c_ioSlotCount;
1652 }
1653
1654 } _catch: return result;
1655}
1656
1657static
1658M3Result Compile_Call (IM3Compilation o, m3opcode_t i_opcode)
1659{
1660_try {
1661 u32 functionIndex;
1662_ (ReadLEB_u32 (& functionIndex, & o->wasm, o->wasmEnd));
1663
1664 IM3Function function = Module_GetFunction (o->module, functionIndex);
1665
1666 if (function)
1667 { m3log (compile, d_indent " (func= [%d] '%s'; args= %d)",
1668 get_indention_string (o), functionIndex, m3_GetFunctionName (function), function->funcType->numArgs);
1669 if (function->module)
1670 {
1671 u16 slotTop;
1672_ (CompileCallArgsAndReturn (o, & slotTop, function->funcType, false));
1673
1674 IM3Operation op;
1675 const void * operand;
1676
1677 if (function->compiled)
1678 {
1679 op = op_Call;
1680 operand = function->compiled;
1681 }
1682 else
1683 {
1684 op = op_Compile;
1685 operand = function;
1686 }
1687
1688_ (EmitOp (o, op));
1689 EmitPointer (o, operand);
1690 EmitSlotOffset (o, slotTop);
1691 }
1692 else
1693 {
1694 _throw (ErrorCompile (m3Err_functionImportMissing, o, "'%s.%s'", GetFunctionImportModuleName (function), m3_GetFunctionName (function)));
1695 }
1696 }
1697 else _throw (m3Err_functionLookupFailed);
1698
1699 } _catch: return result;
1700}
1701
1702static
1703M3Result Compile_CallIndirect (IM3Compilation o, m3opcode_t i_opcode)
1704{
1705_try {
1706 u32 typeIndex;
1707_ (ReadLEB_u32 (& typeIndex, & o->wasm, o->wasmEnd));
1708
1709 u32 tableIndex;
1710_ (ReadLEB_u32 (& tableIndex, & o->wasm, o->wasmEnd));
1711
1712 _throwif ("function call type index out of range", typeIndex >= o->module->numFuncTypes);
1713
1714 if (IsStackTopInRegister (o))
1715_ (PreserveRegisterIfOccupied (o, c_m3Type_i32));
1716
1717 u16 tableIndexSlot = GetStackTopSlotNumber (o);
1718
1719 u16 execTop;
1720 IM3FuncType type = o->module->funcTypes [typeIndex];
1721_ (CompileCallArgsAndReturn (o, & execTop, type, true));
1722
1723_ (EmitOp (o, op_CallIndirect));
1724 EmitSlotOffset (o, tableIndexSlot);
1725 EmitPointer (o, o->module);
1726 EmitPointer (o, type); // TODO: unify all types in M3Environment
1727 EmitSlotOffset (o, execTop);
1728
1729} _catch:
1730 return result;
1731}
1732
1733static
1734M3Result Compile_Memory_Size (IM3Compilation o, m3opcode_t i_opcode)
1735{
1736 M3Result result;
1737
1738 i8 reserved;
1739_ (ReadLEB_i7 (& reserved, & o->wasm, o->wasmEnd));
1740
1741_ (PreserveRegisterIfOccupied (o, c_m3Type_i32));
1742
1743_ (EmitOp (o, op_MemSize));
1744
1745_ (PushRegister (o, c_m3Type_i32));
1746
1747 _catch: return result;
1748}
1749
1750static
1751M3Result Compile_Memory_Grow (IM3Compilation o, m3opcode_t i_opcode)
1752{
1753 M3Result result;
1754
1755 i8 reserved;
1756_ (ReadLEB_i7 (& reserved, & o->wasm, o->wasmEnd));
1757
1758_ (CopyStackTopToRegister (o, false));
1759_ (PopType (o, c_m3Type_i32));
1760
1761_ (EmitOp (o, op_MemGrow));
1762
1763_ (PushRegister (o, c_m3Type_i32));
1764
1765 _catch: return result;
1766}
1767
1768static
1769M3Result Compile_Memory_CopyFill (IM3Compilation o, m3opcode_t i_opcode)
1770{
1771 M3Result result = m3Err_none;
1772
1773 u32 sourceMemoryIdx, targetMemoryIdx;
1774 IM3Operation op;
1775 if (i_opcode == c_waOp_memoryCopy)
1776 {
1777_ (ReadLEB_u32 (& sourceMemoryIdx, & o->wasm, o->wasmEnd));
1778 op = op_MemCopy;
1779 }
1780 else op = op_MemFill;
1781
1782_ (ReadLEB_u32 (& targetMemoryIdx, & o->wasm, o->wasmEnd));
1783
1784_ (CopyStackTopToRegister (o, false));
1785
1786_ (EmitOp (o, op));
1787_ (PopType (o, c_m3Type_i32));
1788_ (EmitSlotNumOfStackTopAndPop (o));
1789_ (EmitSlotNumOfStackTopAndPop (o));
1790
1791 _catch: return result;
1792}
1793
1794
1795static
1796M3Result ReadBlockType (IM3Compilation o, IM3FuncType * o_blockType)
1797{
1798 M3Result result;
1799
1800 i64 type;
1801_ (ReadLebSigned (& type, 33, & o->wasm, o->wasmEnd));
1802
1803 if (type < 0)
1804 {
1805 u8 valueType;
1806_ (NormalizeType (&valueType, type)); m3log (compile, d_indent " (type: %s)", get_indention_string (o), c_waTypes [valueType]);
1807 *o_blockType = o->module->environment->retFuncTypes[valueType];
1808 }
1809 else
1810 {
1811 _throwif("func type out of bounds", type >= o->module->numFuncTypes);
1812 *o_blockType = o->module->funcTypes[type]; m3log (compile, d_indent " (type: %s)", get_indention_string (o), SPrintFuncTypeSignature (*o_blockType));
1813 }
1814 _catch: return result;
1815}
1816
1817static
1818M3Result PreserveArgsAndLocals (IM3Compilation o)
1819{
1820 M3Result result = m3Err_none;
1821
1822 if (o->stackIndex > o->stackFirstDynamicIndex)
1823 {
1824 u32 numArgsAndLocals = GetFunctionNumArgsAndLocals (o->function);
1825
1826 for (u32 i = 0; i < numArgsAndLocals; ++i)
1827 {
1828 u16 slot = GetSlotForStackIndex (o, i);
1829
1830 u16 preservedSlotNumber;
1831_ (FindReferencedLocalWithinCurrentBlock (o, & preservedSlotNumber, slot));
1832
1833 if (preservedSlotNumber != slot)
1834 {
1835 u8 type = GetStackTypeFromBottom (o, i); d_m3Assert (type != c_m3Type_none)
1836 IM3Operation op = Is64BitType (type) ? op_CopySlot_64 : op_CopySlot_32;
1837
1838 EmitOp (o, op);
1839 EmitSlotOffset (o, preservedSlotNumber);
1840 EmitSlotOffset (o, slot);
1841 }
1842 }
1843 }
1844
1845 _catch:
1846 return result;
1847}
1848
1849static
1850M3Result Compile_LoopOrBlock (IM3Compilation o, m3opcode_t i_opcode)
1851{
1852 M3Result result;
1853
1854 // TODO: these shouldn't be necessary for non-loop blocks?
1855_ (PreserveRegisters (o));
1856_ (PreserveArgsAndLocals (o));
1857
1858 IM3FuncType blockType;
1859_ (ReadBlockType (o, & blockType));
1860
1861 if (i_opcode == c_waOp_loop)
1862 {
1863 u16 numParams = GetFuncTypeNumParams (blockType);
1864 if (numParams)
1865 {
1866 // instantiate constants
1867 u16 numValues = GetNumBlockValuesOnStack (o); // CompileBlock enforces this at comptime
1868 d_m3Assert (numValues >= numParams);
1869 if (numValues >= numParams)
1870 {
1871 u16 stackTop = GetStackTopIndex (o) + 1;
1872
1873 for (u16 i = stackTop - numParams; i < stackTop; ++i)
1874 {
1875 u16 slot = GetSlotForStackIndex (o, i);
1876 u8 type = GetStackTypeFromBottom (o, i);
1877
1878 if (IsConstantSlot (o, slot))
1879 {
1880 u16 newSlot;
1881_ (AllocateSlots (o, & newSlot, type));
1882_ (CopyStackIndexToSlot (o, newSlot, i));
1883 o->wasmStack [i] = newSlot;
1884 }
1885 }
1886 }
1887 }
1888
1889_ (EmitOp (o, op_Loop));
1890 }
1891 else
1892 {
1893 }
1894
1895_ (CompileBlock (o, blockType, i_opcode));
1896
1897 _catch: return result;
1898}
1899
1900static
1901M3Result CompileElseBlock (IM3Compilation o, pc_t * o_startPC, IM3FuncType i_blockType)
1902{
1903_try {
1904
1905 IM3CodePage elsePage;
1906_ (AcquireCompilationCodePage (o, & elsePage));
1907
1908 * o_startPC = GetPagePC (elsePage);
1909
1910 IM3CodePage savedPage = o->page;
1911 o->page = elsePage;
1912
1913_ (CompileBlock (o, i_blockType, c_waOp_else));
1914
1915_ (EmitOp (o, op_Branch));
1916 EmitPointer (o, GetPagePC (savedPage));
1917
1918 ReleaseCompilationCodePage (o);
1919
1920 o->page = savedPage;
1921
1922} _catch:
1923 return result;
1924}
1925
1926static
1927M3Result Compile_If (IM3Compilation o, m3opcode_t i_opcode)
1928{
1929 /* [ op_If ]
1930 [ <else-pc> ] ----> [ ..else.. ]
1931 [ ..if.. ] [ ..block.. ]
1932 [ ..block.. ] [ op_Branch ]
1933 [ end ] <----- [ <end-pc> ] */
1934
1935_try {
1936
1937_ (PreserveNonTopRegisters (o));
1938_ (PreserveArgsAndLocals (o));
1939
1940 IM3Operation op = IsStackTopInRegister (o) ? op_If_r : op_If_s;
1941
1942_ (EmitOp (o, op));
1943_ (EmitSlotNumOfStackTopAndPop (o));
1944
1945 pc_t * pc = (pc_t *) ReservePointer (o);
1946
1947 IM3FuncType blockType;
1948_ (ReadBlockType (o, & blockType));
1949
1950// dump_type_stack (o);
1951
1952 u16 stackIndex = o->stackIndex;
1953
1954_ (CompileBlock (o, blockType, i_opcode));
1955
1956 if (o->previousOpcode == c_waOp_else)
1957 {
1958 o->stackIndex = stackIndex;
1959_ (CompileElseBlock (o, pc, blockType));
1960 }
1961 else
1962 {
1963 // if block produces values and there isn't a defined else
1964 // case, then we need to make one up so that the pass-through
1965 // results end up in the right place
1966 if (GetFuncTypeNumResults (blockType))
1967 {
1968 // rewind to the if's end to create a fake else block
1969 o->wasm--;
1970 o->stackIndex = stackIndex;
1971
1972// dump_type_stack (o);
1973
1974_ (CompileElseBlock (o, pc, blockType));
1975 }
1976 else * pc = GetPC (o);
1977 }
1978
1979 } _catch: return result;
1980}
1981
1982static
1983M3Result Compile_Select (IM3Compilation o, m3opcode_t i_opcode)
1984{
1985 M3Result result = m3Err_none;
1986
1987 u16 slots [3] = { c_slotUnused, c_slotUnused, c_slotUnused };
1988
1989 u8 type = GetStackTypeFromTop (o, 1); // get type of selection
1990
1991 IM3Operation op = NULL;
1992
1993 if (IsFpType (type))
1994 {
1995# if d_m3HasFloat
1996 // not consuming a fp reg, so preserve
1997 if (not IsStackTopMinus1InRegister (o) and
1998 not IsStackTopMinus2InRegister (o))
1999 {
2000_ (PreserveRegisterIfOccupied (o, type));
2001 }
2002
2003 bool selectorInReg = IsStackTopInRegister (o);
2004 slots [0] = GetStackTopSlotNumber (o);
2005_ (Pop (o));
2006
2007 u32 opIndex = 0;
2008
2009 for (u32 i = 1; i <= 2; ++i)
2010 {
2011 if (IsStackTopInRegister (o))
2012 opIndex = i;
2013 else
2014 slots [i] = GetStackTopSlotNumber (o);
2015
2016_ (Pop (o));
2017 }
2018
2019 op = c_fpSelectOps [type - c_m3Type_f32] [selectorInReg] [opIndex];
2020# else
2021 _throw (m3Err_unknownOpcode);
2022# endif
2023 }
2024 else if (IsIntType (type))
2025 {
2026 // 'sss' operation doesn't consume a register, so might have to protected its contents
2027 if (not IsStackTopInRegister (o) and
2028 not IsStackTopMinus1InRegister (o) and
2029 not IsStackTopMinus2InRegister (o))
2030 {
2031_ (PreserveRegisterIfOccupied (o, type));
2032 }
2033
2034 u32 opIndex = 3; // op_Select_*_sss
2035
2036 for (u32 i = 0; i < 3; ++i)
2037 {
2038 if (IsStackTopInRegister (o))
2039 opIndex = i;
2040 else
2041 slots [i] = GetStackTopSlotNumber (o);
2042
2043_ (Pop (o));
2044 }
2045
2046 op = c_intSelectOps [type - c_m3Type_i32] [opIndex];
2047 }
2048 else if (not IsStackPolymorphic (o))
2049 _throw (m3Err_functionStackUnderrun);
2050
2051 EmitOp (o, op);
2052 for (u32 i = 0; i < 3; i++)
2053 {
2054 if (IsValidSlot (slots [i]))
2055 EmitSlotOffset (o, slots [i]);
2056 }
2057_ (PushRegister (o, type));
2058
2059 _catch: return result;
2060}
2061
2062static
2063M3Result Compile_Drop (IM3Compilation o, m3opcode_t i_opcode)
2064{
2065 M3Result result = Pop (o); if (d_m3LogWasmStack) dump_type_stack (o);
2066 return result;
2067}
2068
2069static
2070M3Result Compile_Nop (IM3Compilation o, m3opcode_t i_opcode)
2071{
2072 return m3Err_none;
2073}
2074
2075static
2076M3Result Compile_Unreachable (IM3Compilation o, m3opcode_t i_opcode)
2077{
2078 M3Result result;
2079
2080_ (AddTrapRecord (o));
2081
2082_ (EmitOp (o, op_Unreachable));
2083_ (SetStackPolymorphic (o));
2084
2085 _catch:
2086 return result;
2087}
2088
2089
2090// OPTZ: currently all stack slot indices take up a full word, but
2091// dual stack source operands could be packed together
2092static
2093M3Result Compile_Operator (IM3Compilation o, m3opcode_t i_opcode)
2094{
2095 M3Result result;
2096
2097 IM3OpInfo opInfo = GetOpInfo (i_opcode);
2098 _throwif (m3Err_unknownOpcode, not opInfo);
2099
2100 IM3Operation op;
2101
2102 // This preserve is for for FP compare operations.
2103 // either need additional slot destination operations or the
2104 // easy fix, move _r0 out of the way.
2105 // moving out the way might be the optimal solution most often?
2106 // otherwise, the _r0 reg can get buried down in the stack
2107 // and be idle & wasted for a moment.
2108 if (IsFpType (GetStackTopType (o)) and IsIntType (opInfo->type))
2109 {
2110_ (PreserveRegisterIfOccupied (o, opInfo->type));
2111 }
2112
2113 if (opInfo->stackOffset == 0)
2114 {
2115 if (IsStackTopInRegister (o))
2116 {
2117 op = opInfo->operations [0]; // _s
2118 }
2119 else
2120 {
2121_ (PreserveRegisterIfOccupied (o, opInfo->type));
2122 op = opInfo->operations [1]; // _r
2123 }
2124 }
2125 else
2126 {
2127 if (IsStackTopInRegister (o))
2128 {
2129 op = opInfo->operations [0]; // _rs
2130
2131 if (IsStackTopMinus1InRegister (o))
2132 { d_m3Assert (i_opcode == c_waOp_store_f32 or i_opcode == c_waOp_store_f64);
2133 op = opInfo->operations [3]; // _rr for fp.store
2134 }
2135 }
2136 else if (IsStackTopMinus1InRegister (o))
2137 {
2138 op = opInfo->operations [1]; // _sr
2139
2140 if (not op) // must be commutative, then
2141 op = opInfo->operations [0];
2142 }
2143 else
2144 {
2145_ (PreserveRegisterIfOccupied (o, opInfo->type)); // _ss
2146 op = opInfo->operations [2];
2147 }
2148 }
2149
2150 if (op)
2151 {
2152_ (EmitOp (o, op));
2153
2154_ (EmitSlotNumOfStackTopAndPop (o));
2155
2156 if (opInfo->stackOffset < 0)
2157_ (EmitSlotNumOfStackTopAndPop (o));
2158
2159 if (opInfo->type != c_m3Type_none)
2160_ (PushRegister (o, opInfo->type));
2161 }
2162 else
2163 {
2164# ifdef DEBUG
2165 result = ErrorCompile ("no operation found for opcode", o, "'%s'", opInfo->name);
2166# else
2167 result = ErrorCompile ("no operation found for opcode", o, "%x", i_opcode);
2168# endif
2169 _throw (result);
2170 }
2171
2172 _catch: return result;
2173}
2174
2175static
2176M3Result Compile_Convert (IM3Compilation o, m3opcode_t i_opcode)
2177{
2178_try {
2179 IM3OpInfo opInfo = GetOpInfo (i_opcode);
2180 _throwif (m3Err_unknownOpcode, not opInfo);
2181
2182 bool destInSlot = IsRegisterTypeAllocated (o, opInfo->type);
2183 bool sourceInSlot = IsStackTopInSlot (o);
2184
2185 IM3Operation op = opInfo->operations [destInSlot * 2 + sourceInSlot];
2186
2187_ (EmitOp (o, op));
2188_ (EmitSlotNumOfStackTopAndPop (o));
2189
2190 if (destInSlot)
2191_ (PushAllocatedSlotAndEmit (o, opInfo->type))
2192 else
2193_ (PushRegister (o, opInfo->type))
2194
2195}
2196 _catch: return result;
2197}
2198
2199static
2200M3Result Compile_Load_Store (IM3Compilation o, m3opcode_t i_opcode)
2201{
2202_try {
2203 u32 alignHint, memoryOffset;
2204
2205_ (ReadLEB_u32 (& alignHint, & o->wasm, o->wasmEnd));
2206_ (ReadLEB_u32 (& memoryOffset, & o->wasm, o->wasmEnd));
2207 m3log (compile, d_indent " (offset = %d)", get_indention_string (o), memoryOffset);
2208 IM3OpInfo opInfo = GetOpInfo (i_opcode);
2209 _throwif (m3Err_unknownOpcode, not opInfo);
2210
2211 if (IsFpType (opInfo->type))
2212_ (PreserveRegisterIfOccupied (o, c_m3Type_f64));
2213
2214_ (Compile_Operator (o, i_opcode));
2215
2216 EmitConstant32 (o, memoryOffset);
2217}
2218 _catch: return result;
2219}
2220
2221
2222M3Result CompileRawFunction (IM3Module io_module, IM3Function io_function, const void * i_function, const void * i_userdata)
2223{
2224 d_m3Assert (io_module->runtime);
2225
2226 IM3CodePage page = AcquireCodePageWithCapacity (io_module->runtime, 4);
2227
2228 if (page)
2229 {
2230 io_function->compiled = GetPagePC (page);
2231 io_function->module = io_module;
2232
2233 EmitWord (page, op_CallRawFunction);
2234 EmitWord (page, i_function);
2235 EmitWord (page, io_function);
2236 EmitWord (page, i_userdata);
2237
2238 ReleaseCodePage (io_module->runtime, page);
2239 return m3Err_none;
2240 }
2241 else {
2242 return m3Err_mallocFailedCodePage;
2243 }
2244}
2245
2246
2247
2248// d_logOp, d_logOp2 macros aren't actually used by the compiler, just codepage decoding (d_m3LogCodePages = 1)
2249#define d_logOp(OP) { op_##OP, NULL, NULL, NULL }
2250#define d_logOp2(OP1,OP2) { op_##OP1, op_##OP2, NULL, NULL }
2251
2252#define d_emptyOpList { NULL, NULL, NULL, NULL }
2253#define d_unaryOpList(TYPE, NAME) { op_##TYPE##_##NAME##_r, op_##TYPE##_##NAME##_s, NULL, NULL }
2254#define d_binOpList(TYPE, NAME) { op_##TYPE##_##NAME##_rs, op_##TYPE##_##NAME##_sr, op_##TYPE##_##NAME##_ss, NULL }
2255#define d_storeFpOpList(TYPE, NAME) { op_##TYPE##_##NAME##_rs, op_##TYPE##_##NAME##_sr, op_##TYPE##_##NAME##_ss, op_##TYPE##_##NAME##_rr }
2256#define d_commutativeBinOpList(TYPE, NAME) { op_##TYPE##_##NAME##_rs, NULL, op_##TYPE##_##NAME##_ss, NULL }
2257#define d_convertOpList(OP) { op_##OP##_r_r, op_##OP##_r_s, op_##OP##_s_r, op_##OP##_s_s }
2258
2259
2260const M3OpInfo c_operations [] =
2261{
2262 M3OP( "unreachable", 0, none, d_logOp (Unreachable), Compile_Unreachable ), // 0x00
2263 M3OP( "nop", 0, none, d_emptyOpList, Compile_Nop ), // 0x01 .
2264 M3OP( "block", 0, none, d_emptyOpList, Compile_LoopOrBlock ), // 0x02
2265 M3OP( "loop", 0, none, d_logOp (Loop), Compile_LoopOrBlock ), // 0x03
2266 M3OP( "if", -1, none, d_emptyOpList, Compile_If ), // 0x04
2267 M3OP( "else", 0, none, d_emptyOpList, Compile_Nop ), // 0x05
2268
2269 M3OP_RESERVED, M3OP_RESERVED, M3OP_RESERVED, M3OP_RESERVED, M3OP_RESERVED, // 0x06...0x0a
2270
2271 M3OP( "end", 0, none, d_emptyOpList, Compile_End ), // 0x0b
2272 M3OP( "br", 0, none, d_logOp (Branch), Compile_Branch ), // 0x0c
2273 M3OP( "br_if", -1, none, d_logOp2 (BranchIf_r, BranchIf_s), Compile_Branch ), // 0x0d
2274 M3OP( "br_table", -1, none, d_logOp (BranchTable), Compile_BranchTable ), // 0x0e
2275 M3OP( "return", 0, any, d_logOp (Return), Compile_Return ), // 0x0f
2276 M3OP( "call", 0, any, d_logOp (Call), Compile_Call ), // 0x10
2277 M3OP( "call_indirect", 0, any, d_logOp (CallIndirect), Compile_CallIndirect ), // 0x11
2278 M3OP( "return_call", 0, any, d_emptyOpList, Compile_Call ), // 0x12 TODO: Optimize
2279 M3OP( "return_call_indirect",0, any, d_emptyOpList, Compile_CallIndirect ), // 0x13
2280
2281 M3OP_RESERVED, M3OP_RESERVED, // 0x14...
2282 M3OP_RESERVED, M3OP_RESERVED, M3OP_RESERVED, M3OP_RESERVED, // ...0x19
2283
2284 M3OP( "drop", -1, none, d_emptyOpList, Compile_Drop ), // 0x1a
2285 M3OP( "select", -2, any, d_emptyOpList, Compile_Select ), // 0x1b
2286
2287 M3OP_RESERVED, M3OP_RESERVED, M3OP_RESERVED, M3OP_RESERVED, // 0x1c...0x1f
2288
2289 M3OP( "local.get", 1, any, d_emptyOpList, Compile_GetLocal ), // 0x20
2290 M3OP( "local.set", 1, none, d_emptyOpList, Compile_SetLocal ), // 0x21
2291 M3OP( "local.tee", 0, any, d_emptyOpList, Compile_SetLocal ), // 0x22
2292 M3OP( "global.get", 1, none, d_emptyOpList, Compile_GetSetGlobal ), // 0x23
2293 M3OP( "global.set", 1, none, d_emptyOpList, Compile_GetSetGlobal ), // 0x24
2294
2295 M3OP_RESERVED, M3OP_RESERVED, M3OP_RESERVED, // 0x25...0x27
2296
2297 M3OP( "i32.load", 0, i_32, d_unaryOpList (i32, Load_i32), Compile_Load_Store ), // 0x28
2298 M3OP( "i64.load", 0, i_64, d_unaryOpList (i64, Load_i64), Compile_Load_Store ), // 0x29
2299 M3OP_F( "f32.load", 0, f_32, d_unaryOpList (f32, Load_f32), Compile_Load_Store ), // 0x2a
2300 M3OP_F( "f64.load", 0, f_64, d_unaryOpList (f64, Load_f64), Compile_Load_Store ), // 0x2b
2301
2302 M3OP( "i32.load8_s", 0, i_32, d_unaryOpList (i32, Load_i8), Compile_Load_Store ), // 0x2c
2303 M3OP( "i32.load8_u", 0, i_32, d_unaryOpList (i32, Load_u8), Compile_Load_Store ), // 0x2d
2304 M3OP( "i32.load16_s", 0, i_32, d_unaryOpList (i32, Load_i16), Compile_Load_Store ), // 0x2e
2305 M3OP( "i32.load16_u", 0, i_32, d_unaryOpList (i32, Load_u16), Compile_Load_Store ), // 0x2f
2306
2307 M3OP( "i64.load8_s", 0, i_64, d_unaryOpList (i64, Load_i8), Compile_Load_Store ), // 0x30
2308 M3OP( "i64.load8_u", 0, i_64, d_unaryOpList (i64, Load_u8), Compile_Load_Store ), // 0x31
2309 M3OP( "i64.load16_s", 0, i_64, d_unaryOpList (i64, Load_i16), Compile_Load_Store ), // 0x32
2310 M3OP( "i64.load16_u", 0, i_64, d_unaryOpList (i64, Load_u16), Compile_Load_Store ), // 0x33
2311 M3OP( "i64.load32_s", 0, i_64, d_unaryOpList (i64, Load_i32), Compile_Load_Store ), // 0x34
2312 M3OP( "i64.load32_u", 0, i_64, d_unaryOpList (i64, Load_u32), Compile_Load_Store ), // 0x35
2313
2314 M3OP( "i32.store", -2, none, d_binOpList (i32, Store_i32), Compile_Load_Store ), // 0x36
2315 M3OP( "i64.store", -2, none, d_binOpList (i64, Store_i64), Compile_Load_Store ), // 0x37
2316 M3OP_F( "f32.store", -2, none, d_storeFpOpList (f32, Store_f32), Compile_Load_Store ), // 0x38
2317 M3OP_F( "f64.store", -2, none, d_storeFpOpList (f64, Store_f64), Compile_Load_Store ), // 0x39
2318
2319 M3OP( "i32.store8", -2, none, d_binOpList (i32, Store_u8), Compile_Load_Store ), // 0x3a
2320 M3OP( "i32.store16", -2, none, d_binOpList (i32, Store_i16), Compile_Load_Store ), // 0x3b
2321
2322 M3OP( "i64.store8", -2, none, d_binOpList (i64, Store_u8), Compile_Load_Store ), // 0x3c
2323 M3OP( "i64.store16", -2, none, d_binOpList (i64, Store_i16), Compile_Load_Store ), // 0x3d
2324 M3OP( "i64.store32", -2, none, d_binOpList (i64, Store_i32), Compile_Load_Store ), // 0x3e
2325
2326 M3OP( "memory.size", 1, i_32, d_logOp (MemSize), Compile_Memory_Size ), // 0x3f
2327 M3OP( "memory.grow", 1, i_32, d_logOp (MemGrow), Compile_Memory_Grow ), // 0x40
2328
2329 M3OP( "i32.const", 1, i_32, d_logOp (Const32), Compile_Const_i32 ), // 0x41
2330 M3OP( "i64.const", 1, i_64, d_logOp (Const64), Compile_Const_i64 ), // 0x42
2331 M3OP_F( "f32.const", 1, f_32, d_emptyOpList, Compile_Const_f32 ), // 0x43
2332 M3OP_F( "f64.const", 1, f_64, d_emptyOpList, Compile_Const_f64 ), // 0x44
2333
2334 M3OP( "i32.eqz", 0, i_32, d_unaryOpList (i32, EqualToZero) , NULL ), // 0x45
2335 M3OP( "i32.eq", -1, i_32, d_commutativeBinOpList (i32, Equal) , NULL ), // 0x46
2336 M3OP( "i32.ne", -1, i_32, d_commutativeBinOpList (i32, NotEqual) , NULL ), // 0x47
2337 M3OP( "i32.lt_s", -1, i_32, d_binOpList (i32, LessThan) , NULL ), // 0x48
2338 M3OP( "i32.lt_u", -1, i_32, d_binOpList (u32, LessThan) , NULL ), // 0x49
2339 M3OP( "i32.gt_s", -1, i_32, d_binOpList (i32, GreaterThan) , NULL ), // 0x4a
2340 M3OP( "i32.gt_u", -1, i_32, d_binOpList (u32, GreaterThan) , NULL ), // 0x4b
2341 M3OP( "i32.le_s", -1, i_32, d_binOpList (i32, LessThanOrEqual) , NULL ), // 0x4c
2342 M3OP( "i32.le_u", -1, i_32, d_binOpList (u32, LessThanOrEqual) , NULL ), // 0x4d
2343 M3OP( "i32.ge_s", -1, i_32, d_binOpList (i32, GreaterThanOrEqual) , NULL ), // 0x4e
2344 M3OP( "i32.ge_u", -1, i_32, d_binOpList (u32, GreaterThanOrEqual) , NULL ), // 0x4f
2345
2346 M3OP( "i64.eqz", 0, i_32, d_unaryOpList (i64, EqualToZero) , NULL ), // 0x50
2347 M3OP( "i64.eq", -1, i_32, d_commutativeBinOpList (i64, Equal) , NULL ), // 0x51
2348 M3OP( "i64.ne", -1, i_32, d_commutativeBinOpList (i64, NotEqual) , NULL ), // 0x52
2349 M3OP( "i64.lt_s", -1, i_32, d_binOpList (i64, LessThan) , NULL ), // 0x53
2350 M3OP( "i64.lt_u", -1, i_32, d_binOpList (u64, LessThan) , NULL ), // 0x54
2351 M3OP( "i64.gt_s", -1, i_32, d_binOpList (i64, GreaterThan) , NULL ), // 0x55
2352 M3OP( "i64.gt_u", -1, i_32, d_binOpList (u64, GreaterThan) , NULL ), // 0x56
2353 M3OP( "i64.le_s", -1, i_32, d_binOpList (i64, LessThanOrEqual) , NULL ), // 0x57
2354 M3OP( "i64.le_u", -1, i_32, d_binOpList (u64, LessThanOrEqual) , NULL ), // 0x58
2355 M3OP( "i64.ge_s", -1, i_32, d_binOpList (i64, GreaterThanOrEqual) , NULL ), // 0x59
2356 M3OP( "i64.ge_u", -1, i_32, d_binOpList (u64, GreaterThanOrEqual) , NULL ), // 0x5a
2357
2358 M3OP_F( "f32.eq", -1, i_32, d_commutativeBinOpList (f32, Equal) , NULL ), // 0x5b
2359 M3OP_F( "f32.ne", -1, i_32, d_commutativeBinOpList (f32, NotEqual) , NULL ), // 0x5c
2360 M3OP_F( "f32.lt", -1, i_32, d_binOpList (f32, LessThan) , NULL ), // 0x5d
2361 M3OP_F( "f32.gt", -1, i_32, d_binOpList (f32, GreaterThan) , NULL ), // 0x5e
2362 M3OP_F( "f32.le", -1, i_32, d_binOpList (f32, LessThanOrEqual) , NULL ), // 0x5f
2363 M3OP_F( "f32.ge", -1, i_32, d_binOpList (f32, GreaterThanOrEqual) , NULL ), // 0x60
2364
2365 M3OP_F( "f64.eq", -1, i_32, d_commutativeBinOpList (f64, Equal) , NULL ), // 0x61
2366 M3OP_F( "f64.ne", -1, i_32, d_commutativeBinOpList (f64, NotEqual) , NULL ), // 0x62
2367 M3OP_F( "f64.lt", -1, i_32, d_binOpList (f64, LessThan) , NULL ), // 0x63
2368 M3OP_F( "f64.gt", -1, i_32, d_binOpList (f64, GreaterThan) , NULL ), // 0x64
2369 M3OP_F( "f64.le", -1, i_32, d_binOpList (f64, LessThanOrEqual) , NULL ), // 0x65
2370 M3OP_F( "f64.ge", -1, i_32, d_binOpList (f64, GreaterThanOrEqual) , NULL ), // 0x66
2371
2372 M3OP( "i32.clz", 0, i_32, d_unaryOpList (u32, Clz) , NULL ), // 0x67
2373 M3OP( "i32.ctz", 0, i_32, d_unaryOpList (u32, Ctz) , NULL ), // 0x68
2374 M3OP( "i32.popcnt", 0, i_32, d_unaryOpList (u32, Popcnt) , NULL ), // 0x69
2375
2376 M3OP( "i32.add", -1, i_32, d_commutativeBinOpList (i32, Add) , NULL ), // 0x6a
2377 M3OP( "i32.sub", -1, i_32, d_binOpList (i32, Subtract) , NULL ), // 0x6b
2378 M3OP( "i32.mul", -1, i_32, d_commutativeBinOpList (i32, Multiply) , NULL ), // 0x6c
2379 M3OP( "i32.div_s", -1, i_32, d_binOpList (i32, Divide) , NULL ), // 0x6d
2380 M3OP( "i32.div_u", -1, i_32, d_binOpList (u32, Divide) , NULL ), // 0x6e
2381 M3OP( "i32.rem_s", -1, i_32, d_binOpList (i32, Remainder) , NULL ), // 0x6f
2382 M3OP( "i32.rem_u", -1, i_32, d_binOpList (u32, Remainder) , NULL ), // 0x70
2383 M3OP( "i32.and", -1, i_32, d_commutativeBinOpList (u32, And) , NULL ), // 0x71
2384 M3OP( "i32.or", -1, i_32, d_commutativeBinOpList (u32, Or) , NULL ), // 0x72
2385 M3OP( "i32.xor", -1, i_32, d_commutativeBinOpList (u32, Xor) , NULL ), // 0x73
2386 M3OP( "i32.shl", -1, i_32, d_binOpList (u32, ShiftLeft) , NULL ), // 0x74
2387 M3OP( "i32.shr_s", -1, i_32, d_binOpList (i32, ShiftRight) , NULL ), // 0x75
2388 M3OP( "i32.shr_u", -1, i_32, d_binOpList (u32, ShiftRight) , NULL ), // 0x76
2389 M3OP( "i32.rotl", -1, i_32, d_binOpList (u32, Rotl) , NULL ), // 0x77
2390 M3OP( "i32.rotr", -1, i_32, d_binOpList (u32, Rotr) , NULL ), // 0x78
2391
2392 M3OP( "i64.clz", 0, i_64, d_unaryOpList (u64, Clz) , NULL ), // 0x79
2393 M3OP( "i64.ctz", 0, i_64, d_unaryOpList (u64, Ctz) , NULL ), // 0x7a
2394 M3OP( "i64.popcnt", 0, i_64, d_unaryOpList (u64, Popcnt) , NULL ), // 0x7b
2395
2396 M3OP( "i64.add", -1, i_64, d_commutativeBinOpList (i64, Add) , NULL ), // 0x7c
2397 M3OP( "i64.sub", -1, i_64, d_binOpList (i64, Subtract) , NULL ), // 0x7d
2398 M3OP( "i64.mul", -1, i_64, d_commutativeBinOpList (i64, Multiply) , NULL ), // 0x7e
2399 M3OP( "i64.div_s", -1, i_64, d_binOpList (i64, Divide) , NULL ), // 0x7f
2400 M3OP( "i64.div_u", -1, i_64, d_binOpList (u64, Divide) , NULL ), // 0x80
2401 M3OP( "i64.rem_s", -1, i_64, d_binOpList (i64, Remainder) , NULL ), // 0x81
2402 M3OP( "i64.rem_u", -1, i_64, d_binOpList (u64, Remainder) , NULL ), // 0x82
2403 M3OP( "i64.and", -1, i_64, d_commutativeBinOpList (u64, And) , NULL ), // 0x83
2404 M3OP( "i64.or", -1, i_64, d_commutativeBinOpList (u64, Or) , NULL ), // 0x84
2405 M3OP( "i64.xor", -1, i_64, d_commutativeBinOpList (u64, Xor) , NULL ), // 0x85
2406 M3OP( "i64.shl", -1, i_64, d_binOpList (u64, ShiftLeft) , NULL ), // 0x86
2407 M3OP( "i64.shr_s", -1, i_64, d_binOpList (i64, ShiftRight) , NULL ), // 0x87
2408 M3OP( "i64.shr_u", -1, i_64, d_binOpList (u64, ShiftRight) , NULL ), // 0x88
2409 M3OP( "i64.rotl", -1, i_64, d_binOpList (u64, Rotl) , NULL ), // 0x89
2410 M3OP( "i64.rotr", -1, i_64, d_binOpList (u64, Rotr) , NULL ), // 0x8a
2411
2412 M3OP_F( "f32.abs", 0, f_32, d_unaryOpList(f32, Abs) , NULL ), // 0x8b
2413 M3OP_F( "f32.neg", 0, f_32, d_unaryOpList(f32, Negate) , NULL ), // 0x8c
2414 M3OP_F( "f32.ceil", 0, f_32, d_unaryOpList(f32, Ceil) , NULL ), // 0x8d
2415 M3OP_F( "f32.floor", 0, f_32, d_unaryOpList(f32, Floor) , NULL ), // 0x8e
2416 M3OP_F( "f32.trunc", 0, f_32, d_unaryOpList(f32, Trunc) , NULL ), // 0x8f
2417 M3OP_F( "f32.nearest", 0, f_32, d_unaryOpList(f32, Nearest) , NULL ), // 0x90
2418 M3OP_F( "f32.sqrt", 0, f_32, d_unaryOpList(f32, Sqrt) , NULL ), // 0x91
2419
2420 M3OP_F( "f32.add", -1, f_32, d_commutativeBinOpList (f32, Add) , NULL ), // 0x92
2421 M3OP_F( "f32.sub", -1, f_32, d_binOpList (f32, Subtract) , NULL ), // 0x93
2422 M3OP_F( "f32.mul", -1, f_32, d_commutativeBinOpList (f32, Multiply) , NULL ), // 0x94
2423 M3OP_F( "f32.div", -1, f_32, d_binOpList (f32, Divide) , NULL ), // 0x95
2424 M3OP_F( "f32.min", -1, f_32, d_commutativeBinOpList (f32, Min) , NULL ), // 0x96
2425 M3OP_F( "f32.max", -1, f_32, d_commutativeBinOpList (f32, Max) , NULL ), // 0x97
2426 M3OP_F( "f32.copysign", -1, f_32, d_binOpList (f32, CopySign) , NULL ), // 0x98
2427
2428 M3OP_F( "f64.abs", 0, f_64, d_unaryOpList(f64, Abs) , NULL ), // 0x99
2429 M3OP_F( "f64.neg", 0, f_64, d_unaryOpList(f64, Negate) , NULL ), // 0x9a
2430 M3OP_F( "f64.ceil", 0, f_64, d_unaryOpList(f64, Ceil) , NULL ), // 0x9b
2431 M3OP_F( "f64.floor", 0, f_64, d_unaryOpList(f64, Floor) , NULL ), // 0x9c
2432 M3OP_F( "f64.trunc", 0, f_64, d_unaryOpList(f64, Trunc) , NULL ), // 0x9d
2433 M3OP_F( "f64.nearest", 0, f_64, d_unaryOpList(f64, Nearest) , NULL ), // 0x9e
2434 M3OP_F( "f64.sqrt", 0, f_64, d_unaryOpList(f64, Sqrt) , NULL ), // 0x9f
2435
2436 M3OP_F( "f64.add", -1, f_64, d_commutativeBinOpList (f64, Add) , NULL ), // 0xa0
2437 M3OP_F( "f64.sub", -1, f_64, d_binOpList (f64, Subtract) , NULL ), // 0xa1
2438 M3OP_F( "f64.mul", -1, f_64, d_commutativeBinOpList (f64, Multiply) , NULL ), // 0xa2
2439 M3OP_F( "f64.div", -1, f_64, d_binOpList (f64, Divide) , NULL ), // 0xa3
2440 M3OP_F( "f64.min", -1, f_64, d_commutativeBinOpList (f64, Min) , NULL ), // 0xa4
2441 M3OP_F( "f64.max", -1, f_64, d_commutativeBinOpList (f64, Max) , NULL ), // 0xa5
2442 M3OP_F( "f64.copysign", -1, f_64, d_binOpList (f64, CopySign) , NULL ), // 0xa6
2443
2444 M3OP( "i32.wrap/i64", 0, i_32, d_unaryOpList (i32, Wrap_i64), NULL ), // 0xa7
2445 M3OP_F( "i32.trunc_s/f32", 0, i_32, d_convertOpList (i32_Trunc_f32), Compile_Convert ), // 0xa8
2446 M3OP_F( "i32.trunc_u/f32", 0, i_32, d_convertOpList (u32_Trunc_f32), Compile_Convert ), // 0xa9
2447 M3OP_F( "i32.trunc_s/f64", 0, i_32, d_convertOpList (i32_Trunc_f64), Compile_Convert ), // 0xaa
2448 M3OP_F( "i32.trunc_u/f64", 0, i_32, d_convertOpList (u32_Trunc_f64), Compile_Convert ), // 0xab
2449
2450 M3OP( "i64.extend_s/i32", 0, i_64, d_unaryOpList (i64, Extend_i32), NULL ), // 0xac
2451 M3OP( "i64.extend_u/i32", 0, i_64, d_unaryOpList (i64, Extend_u32), NULL ), // 0xad
2452
2453 M3OP_F( "i64.trunc_s/f32", 0, i_64, d_convertOpList (i64_Trunc_f32), Compile_Convert ), // 0xae
2454 M3OP_F( "i64.trunc_u/f32", 0, i_64, d_convertOpList (u64_Trunc_f32), Compile_Convert ), // 0xaf
2455 M3OP_F( "i64.trunc_s/f64", 0, i_64, d_convertOpList (i64_Trunc_f64), Compile_Convert ), // 0xb0
2456 M3OP_F( "i64.trunc_u/f64", 0, i_64, d_convertOpList (u64_Trunc_f64), Compile_Convert ), // 0xb1
2457
2458 M3OP_F( "f32.convert_s/i32",0, f_32, d_convertOpList (f32_Convert_i32), Compile_Convert ), // 0xb2
2459 M3OP_F( "f32.convert_u/i32",0, f_32, d_convertOpList (f32_Convert_u32), Compile_Convert ), // 0xb3
2460 M3OP_F( "f32.convert_s/i64",0, f_32, d_convertOpList (f32_Convert_i64), Compile_Convert ), // 0xb4
2461 M3OP_F( "f32.convert_u/i64",0, f_32, d_convertOpList (f32_Convert_u64), Compile_Convert ), // 0xb5
2462
2463 M3OP_F( "f32.demote/f64", 0, f_32, d_unaryOpList (f32, Demote_f64), NULL ), // 0xb6
2464
2465 M3OP_F( "f64.convert_s/i32",0, f_64, d_convertOpList (f64_Convert_i32), Compile_Convert ), // 0xb7
2466 M3OP_F( "f64.convert_u/i32",0, f_64, d_convertOpList (f64_Convert_u32), Compile_Convert ), // 0xb8
2467 M3OP_F( "f64.convert_s/i64",0, f_64, d_convertOpList (f64_Convert_i64), Compile_Convert ), // 0xb9
2468 M3OP_F( "f64.convert_u/i64",0, f_64, d_convertOpList (f64_Convert_u64), Compile_Convert ), // 0xba
2469
2470 M3OP_F( "f64.promote/f32", 0, f_64, d_unaryOpList (f64, Promote_f32), NULL ), // 0xbb
2471
2472 M3OP_F( "i32.reinterpret/f32",0,i_32, d_convertOpList (i32_Reinterpret_f32), Compile_Convert ), // 0xbc
2473 M3OP_F( "i64.reinterpret/f64",0,i_64, d_convertOpList (i64_Reinterpret_f64), Compile_Convert ), // 0xbd
2474 M3OP_F( "f32.reinterpret/i32",0,f_32, d_convertOpList (f32_Reinterpret_i32), Compile_Convert ), // 0xbe
2475 M3OP_F( "f64.reinterpret/i64",0,f_64, d_convertOpList (f64_Reinterpret_i64), Compile_Convert ), // 0xbf
2476
2477 M3OP( "i32.extend8_s", 0, i_32, d_unaryOpList (i32, Extend8_s), NULL ), // 0xc0
2478 M3OP( "i32.extend16_s", 0, i_32, d_unaryOpList (i32, Extend16_s), NULL ), // 0xc1
2479 M3OP( "i64.extend8_s", 0, i_64, d_unaryOpList (i64, Extend8_s), NULL ), // 0xc2
2480 M3OP( "i64.extend16_s", 0, i_64, d_unaryOpList (i64, Extend16_s), NULL ), // 0xc3
2481 M3OP( "i64.extend32_s", 0, i_64, d_unaryOpList (i64, Extend32_s), NULL ), // 0xc4
2482
2483# ifdef DEBUG // for codepage logging. the order doesn't matter:
2484# define d_m3DebugOp(OP) M3OP (#OP, 0, none, { op_##OP })
2485
2486# if d_m3HasFloat
2487# define d_m3DebugTypedOp(OP) M3OP (#OP, 0, none, { op_##OP##_i32, op_##OP##_i64, op_##OP##_f32, op_##OP##_f64, })
2488# else
2489# define d_m3DebugTypedOp(OP) M3OP (#OP, 0, none, { op_##OP##_i32, op_##OP##_i64 })
2490# endif
2491
2492 d_m3DebugOp (Compile), d_m3DebugOp (Entry), d_m3DebugOp (End),
2493 d_m3DebugOp (Unsupported), d_m3DebugOp (CallRawFunction),
2494
2495 d_m3DebugOp (GetGlobal_s32), d_m3DebugOp (GetGlobal_s64), d_m3DebugOp (ContinueLoop), d_m3DebugOp (ContinueLoopIf),
2496
2497 d_m3DebugOp (CopySlot_32), d_m3DebugOp (PreserveCopySlot_32), d_m3DebugOp (If_s), d_m3DebugOp (BranchIfPrologue_s),
2498 d_m3DebugOp (CopySlot_64), d_m3DebugOp (PreserveCopySlot_64), d_m3DebugOp (If_r), d_m3DebugOp (BranchIfPrologue_r),
2499
2500 d_m3DebugOp (Select_i32_rss), d_m3DebugOp (Select_i32_srs), d_m3DebugOp (Select_i32_ssr), d_m3DebugOp (Select_i32_sss),
2501 d_m3DebugOp (Select_i64_rss), d_m3DebugOp (Select_i64_srs), d_m3DebugOp (Select_i64_ssr), d_m3DebugOp (Select_i64_sss),
2502
2503# if d_m3HasFloat
2504 d_m3DebugOp (Select_f32_sss), d_m3DebugOp (Select_f32_srs), d_m3DebugOp (Select_f32_ssr),
2505 d_m3DebugOp (Select_f32_rss), d_m3DebugOp (Select_f32_rrs), d_m3DebugOp (Select_f32_rsr),
2506
2507 d_m3DebugOp (Select_f64_sss), d_m3DebugOp (Select_f64_srs), d_m3DebugOp (Select_f64_ssr),
2508 d_m3DebugOp (Select_f64_rss), d_m3DebugOp (Select_f64_rrs), d_m3DebugOp (Select_f64_rsr),
2509# endif
2510
2511 d_m3DebugOp (MemFill), d_m3DebugOp (MemCopy),
2512
2513 d_m3DebugTypedOp (SetGlobal), d_m3DebugOp (SetGlobal_s32), d_m3DebugOp (SetGlobal_s64),
2514
2515 d_m3DebugTypedOp (SetRegister), d_m3DebugTypedOp (SetSlot), d_m3DebugTypedOp (PreserveSetSlot),
2516# endif
2517
2518# if d_m3CascadedOpcodes
2519 [c_waOp_extended] = M3OP( "0xFC", 0, c_m3Type_unknown, d_emptyOpList, Compile_ExtendedOpcode ),
2520# endif
2521
2522# ifdef DEBUG
2523 M3OP( "termination", 0, c_m3Type_unknown ) // for find_operation_info
2524# endif
2525};
2526
2527const M3OpInfo c_operationsFC [] =
2528{
2529 M3OP_F( "i32.trunc_s:sat/f32",0, i_32, d_convertOpList (i32_TruncSat_f32), Compile_Convert ), // 0x00
2530 M3OP_F( "i32.trunc_u:sat/f32",0, i_32, d_convertOpList (u32_TruncSat_f32), Compile_Convert ), // 0x01
2531 M3OP_F( "i32.trunc_s:sat/f64",0, i_32, d_convertOpList (i32_TruncSat_f64), Compile_Convert ), // 0x02
2532 M3OP_F( "i32.trunc_u:sat/f64",0, i_32, d_convertOpList (u32_TruncSat_f64), Compile_Convert ), // 0x03
2533 M3OP_F( "i64.trunc_s:sat/f32",0, i_64, d_convertOpList (i64_TruncSat_f32), Compile_Convert ), // 0x04
2534 M3OP_F( "i64.trunc_u:sat/f32",0, i_64, d_convertOpList (u64_TruncSat_f32), Compile_Convert ), // 0x05
2535 M3OP_F( "i64.trunc_s:sat/f64",0, i_64, d_convertOpList (i64_TruncSat_f64), Compile_Convert ), // 0x06
2536 M3OP_F( "i64.trunc_u:sat/f64",0, i_64, d_convertOpList (u64_TruncSat_f64), Compile_Convert ), // 0x07
2537
2538 M3OP_RESERVED, M3OP_RESERVED,
2539
2540 M3OP( "memory.copy", 0, none, d_emptyOpList, Compile_Memory_CopyFill ), // 0x0a
2541 M3OP( "memory.fill", 0, none, d_emptyOpList, Compile_Memory_CopyFill ), // 0x0b
2542
2543
2544# ifdef DEBUG
2545 M3OP( "termination", 0, c_m3Type_unknown ) // for find_operation_info
2546# endif
2547};
2548
2549
2550IM3OpInfo GetOpInfo (m3opcode_t opcode)
2551{
2552 switch (opcode >> 8) {
2553 case 0x00:
2554 if (M3_LIKELY(opcode < M3_COUNT_OF(c_operations))) {
2555 return &c_operations[opcode];
2556 }
2557 break;
2558 case c_waOp_extended:
2559 opcode &= 0xFF;
2560 if (M3_LIKELY(opcode < M3_COUNT_OF(c_operationsFC))) {
2561 return &c_operationsFC[opcode];
2562 }
2563 break;
2564 }
2565 return NULL;
2566}
2567
2568M3Result CompileBlockStatements (IM3Compilation o)
2569{
2570 M3Result result = m3Err_none;
2571 bool validEnd = false;
2572
2573 while (o->wasm < o->wasmEnd)
2574 {
2575# if d_m3EnableOpTracing
2576 if (o->numEmits)
2577 {
2578 EmitOp (o, op_DumpStack);
2579 EmitConstant32 (o, o->numOpcodes);
2580 EmitConstant32 (o, GetMaxUsedSlotPlusOne(o));
2581 EmitPointer (o, o->function);
2582
2583 o->numEmits = 0;
2584 }
2585# endif
2586 m3opcode_t opcode;
2587 o->lastOpcodeStart = o->wasm;
2588_ (Read_opcode (& opcode, & o->wasm, o->wasmEnd)); log_opcode (o, opcode);
2589
2590 // Restrict opcodes when evaluating expressions
2591 if (not o->function) {
2592 switch (opcode) {
2593 case c_waOp_i32_const: case c_waOp_i64_const:
2594 case c_waOp_f32_const: case c_waOp_f64_const:
2595 case c_waOp_getGlobal: case c_waOp_end:
2596 break;
2597 default:
2598 _throw(m3Err_restrictedOpcode);
2599 }
2600 }
2601
2602 IM3OpInfo opinfo = GetOpInfo (opcode);
2603
2604 if (opinfo == NULL)
2605 _throw (ErrorCompile (m3Err_unknownOpcode, o, "opcode '%x' not available", opcode));
2606
2607 if (opinfo->compiler) {
2608_ ((* opinfo->compiler) (o, opcode))
2609 } else {
2610_ (Compile_Operator (o, opcode));
2611 }
2612
2613 o->previousOpcode = opcode;
2614
2615 if (opcode == c_waOp_else)
2616 {
2617 _throwif (m3Err_wasmMalformed, o->block.opcode != c_waOp_if);
2618 validEnd = true;
2619 break;
2620 }
2621 else if (opcode == c_waOp_end)
2622 {
2623 validEnd = true;
2624 break;
2625 }
2626 }
2627 _throwif(m3Err_wasmMalformed, !(validEnd));
2628
2629_catch:
2630 return result;
2631}
2632
2633static
2634M3Result PushBlockResults (IM3Compilation o)
2635{
2636 M3Result result = m3Err_none;
2637
2638 u16 numResults = GetFuncTypeNumResults (o->block.type);
2639
2640 for (u16 i = 0; i < numResults; ++i)
2641 {
2642 u8 type = GetFuncTypeResultType (o->block.type, i);
2643
2644 if (i == numResults - 1 and IsFpType (type))
2645 {
2646_ (PushRegister (o, type));
2647 }
2648 else
2649_ (PushAllocatedSlot (o, type));
2650 }
2651
2652 _catch: return result;
2653}
2654
2655
2656M3Result CompileBlock (IM3Compilation o, IM3FuncType i_blockType, m3opcode_t i_blockOpcode)
2657{
2658 d_m3Assert (not IsRegisterAllocated (o, 0));
2659 d_m3Assert (not IsRegisterAllocated (o, 1));
2660 M3CompilationScope outerScope = o->block;
2661 M3CompilationScope * block = & o->block;
2662
2663 block->outer = & outerScope;
2664 block->pc = GetPagePC (o->page);
2665 block->patches = NULL;
2666 block->type = i_blockType;
2667 block->depth ++;
2668 block->opcode = i_blockOpcode;
2669
2670 /*
2671 The block stack frame is a little strange but for good reasons. Because blocks need to be restarted to
2672 compile different pathways (if/else), the incoming params must be saved. The parameters are popped
2673 and validated. But, then the stack top is readjusted so they aren't subsequently overwritten.
2674 Next, the result are preallocated to find destination slots. But again these are immediately popped
2675 (deallocated) and the stack top is readjusted to keep these records in pace. This allows branch instructions
2676 to find their result landing pads. Finally, the params are copied from the "dead" records and pushed back
2677 onto the stack as active stack items for the CompileBlockStatements () call.
2678
2679 [ block ]
2680 [ params ]
2681 ------------------
2682 [ result ] <---- blockStackIndex
2683 [ slots ]
2684 ------------------
2685 [ saved param ]
2686 [ records ]
2687 <----- exitStackIndex
2688 */
2689
2690_try {
2691 // validate and dealloc params ----------------------------
2692
2693 u16 stackIndex = o->stackIndex;
2694
2695 u16 numParams = GetFuncTypeNumParams (i_blockType);
2696
2697 if (i_blockOpcode != c_waOp_else)
2698 {
2699 for (u16 i = 0; i < numParams; ++i)
2700 {
2701 u8 type = GetFuncTypeParamType (i_blockType, numParams - 1 - i);
2702_ (PopType (o, type));
2703 }
2704 }
2705 else o->stackIndex -= numParams;
2706
2707 u16 paramIndex = o->stackIndex;
2708 block->exitStackIndex = paramIndex; // consume the params at block exit
2709
2710 // keep copies of param slots in the stack
2711 o->stackIndex = stackIndex;
2712
2713 // find slots for the results ----------------------------
2714 PushBlockResults (o);
2715
2716 stackIndex = o->stackIndex;
2717
2718 // dealloc but keep record of the result slots in the stack
2719 u16 numResults = GetFuncTypeNumResults (i_blockType);
2720 while (numResults--)
2721 Pop (o);
2722
2723 block->blockStackIndex = o->stackIndex = stackIndex;
2724
2725 // push the params back onto the stack -------------------
2726 for (u16 i = 0; i < numParams; ++i)
2727 {
2728 u8 type = GetFuncTypeParamType (i_blockType, i);
2729
2730 u16 slot = GetSlotForStackIndex (o, paramIndex + i);
2731 Push (o, type, slot);
2732
2733 if (slot >= o->slotFirstDynamicIndex)
2734 MarkSlotsAllocatedByType (o, slot, type);
2735 }
2736
2737 //--------------------------------------------------------
2738
2739_ (CompileBlockStatements (o));
2740
2741_ (ValidateBlockEnd (o));
2742
2743 if (o->function) // skip for expressions
2744 {
2745 if (not IsStackPolymorphic (o))
2746_ (ResolveBlockResults (o, & o->block, /* isBranch: */ false));
2747
2748_ (UnwindBlockStack (o))
2749
2750 if (not ((i_blockOpcode == c_waOp_if and numResults) or o->previousOpcode == c_waOp_else))
2751 {
2752 o->stackIndex = o->block.exitStackIndex;
2753_ (PushBlockResults (o));
2754 }
2755 }
2756
2757 PatchBranches (o);
2758
2759 o->block = outerScope;
2760
2761} _catch: return result;
2762}
2763
2764static
2765M3Result CompileLocals (IM3Compilation o)
2766{
2767 M3Result result;
2768
2769 u32 numLocals = 0;
2770 u32 numLocalBlocks;
2771_ (ReadLEB_u32 (& numLocalBlocks, & o->wasm, o->wasmEnd));
2772
2773 for (u32 l = 0; l < numLocalBlocks; ++l)
2774 {
2775 u32 varCount;
2776 i8 waType;
2777 u8 localType;
2778
2779_ (ReadLEB_u32 (& varCount, & o->wasm, o->wasmEnd));
2780_ (ReadLEB_i7 (& waType, & o->wasm, o->wasmEnd));
2781_ (NormalizeType (& localType, waType));
2782 numLocals += varCount; m3log (compile, "pushing locals. count: %d; type: %s", varCount, c_waTypes [localType]);
2783 while (varCount--)
2784_ (PushAllocatedSlot (o, localType));
2785 }
2786
2787 if (o->function)
2788 o->function->numLocals = numLocals;
2789
2790 _catch: return result;
2791}
2792
2793static
2794M3Result ReserveConstants (IM3Compilation o)
2795{
2796 M3Result result = m3Err_none;
2797
2798 // in the interest of speed, this blindly scans the Wasm code looking for any byte
2799 // that looks like an const opcode.
2800 u16 numConstantSlots = 0;
2801
2802 bytes_t wa = o->wasm;
2803 while (wa < o->wasmEnd)
2804 {
2805 u8 code = * wa++;
2806
2807 if (code == c_waOp_i32_const or code == c_waOp_f32_const)
2808 numConstantSlots += 1;
2809 else if (code == c_waOp_i64_const or code == c_waOp_f64_const)
2810 numConstantSlots += GetTypeNumSlots (c_m3Type_i64);
2811
2812 if (numConstantSlots >= d_m3MaxConstantTableSize)
2813 break;
2814 }
2815
2816 // if constants overflow their reserved stack space, the compiler simply emits op_Const
2817 // operations as needed. Compiled expressions (global inits) don't pass through this
2818 // ReserveConstants function and thus always produce inline constants.
2819
2820 AlignSlotToType (& numConstantSlots, c_m3Type_i64); m3log (compile, "reserved constant slots: %d", numConstantSlots);
2821
2822 o->slotFirstDynamicIndex = o->slotFirstConstIndex + numConstantSlots;
2823
2824 if (o->slotFirstDynamicIndex >= d_m3MaxFunctionSlots)
2825 _throw (m3Err_functionStackOverflow);
2826
2827 _catch:
2828 return result;
2829}
2830
2831
2832M3Result CompileFunction (IM3Function io_function)
2833{
2834 if (!io_function->wasm) return "function body is missing";
2835
2836 IM3FuncType funcType = io_function->funcType; m3log (compile, "compiling: [%d] %s %s; wasm-size: %d",
2837 io_function->index, m3_GetFunctionName (io_function), SPrintFuncTypeSignature (funcType), (u32) (io_function->wasmEnd - io_function->wasm));
2838 IM3Runtime runtime = io_function->module->runtime;
2839
2840 IM3Compilation o = & runtime->compilation; d_m3Assert (d_m3MaxFunctionSlots >= d_m3MaxFunctionStackHeight * (d_m3Use32BitSlots + 1)) // need twice as many slots in 32-bit mode
2841 memset (o, 0x0, sizeof (M3Compilation));
2842
2843 o->runtime = runtime;
2844 o->module = io_function->module;
2845 o->function = io_function;
2846 o->wasm = io_function->wasm;
2847 o->wasmEnd = io_function->wasmEnd;
2848 o->block.type = funcType;
2849
2850_try {
2851 // skip over code size. the end was already calculated during parse phase
2852 u32 size;
2853_ (ReadLEB_u32 (& size, & o->wasm, o->wasmEnd)); d_m3Assert (size == (o->wasmEnd - o->wasm))
2854
2855_ (AcquireCompilationCodePage (o, & o->page));
2856
2857 pc_t pc = GetPagePC (o->page);
2858
2859 u16 numRetSlots = GetFunctionNumReturns (o->function) * c_ioSlotCount;
2860
2861 for (u16 i = 0; i < numRetSlots; ++i)
2862 MarkSlotAllocated (o, i);
2863
2864 o->function->numRetSlots = o->slotFirstDynamicIndex = numRetSlots;
2865
2866 u16 numArgs = GetFunctionNumArgs (o->function);
2867
2868 // push the arg types to the type stack
2869 for (u16 i = 0; i < numArgs; ++i)
2870 {
2871 u8 type = GetFunctionArgType (o->function, i);
2872_ (PushAllocatedSlot (o, type));
2873
2874 // prevent allocator fill-in
2875 o->slotFirstDynamicIndex += c_ioSlotCount;
2876 }
2877
2878 o->slotMaxAllocatedIndexPlusOne = o->function->numRetAndArgSlots = o->slotFirstLocalIndex = o->slotFirstDynamicIndex;
2879
2880_ (CompileLocals (o));
2881
2882 u16 maxSlot = GetMaxUsedSlotPlusOne (o);
2883
2884 o->function->numLocalBytes = (maxSlot - o->slotFirstLocalIndex) * sizeof (m3slot_t);
2885
2886 o->slotFirstConstIndex = o->slotMaxConstIndex = maxSlot;
2887
2888 // ReserveConstants initializes o->firstDynamicSlotNumber
2889_ (ReserveConstants (o));
2890
2891 // start tracking the max stack used (Push() also updates this value) so that op_Entry can precisely detect stack overflow
2892 o->maxStackSlots = o->slotMaxAllocatedIndexPlusOne = o->slotFirstDynamicIndex;
2893
2894 o->block.blockStackIndex = o->stackFirstDynamicIndex = o->stackIndex; m3log (compile, "start stack index: %d",
2895 (u32) o->stackFirstDynamicIndex);
2896_ (EmitOp (o, op_Entry));
2897 EmitPointer (o, io_function);
2898
2899_ (CompileBlockStatements (o));
2900
2901 // TODO: validate opcode sequences
2902 _throwif(m3Err_wasmMalformed, o->previousOpcode != c_waOp_end);
2903
2904 io_function->compiled = pc;
2905 io_function->maxStackSlots = o->maxStackSlots;
2906
2907 u16 numConstantSlots = o->slotMaxConstIndex - o->slotFirstConstIndex; m3log (compile, "unique constant slots: %d; unused slots: %d",
2908 numConstantSlots, o->slotFirstDynamicIndex - o->slotMaxConstIndex);
2909 io_function->numConstantBytes = numConstantSlots * sizeof (m3slot_t);
2910
2911 if (numConstantSlots)
2912 {
2913 io_function->constants = m3_CopyMem (o->constants, io_function->numConstantBytes);
2914 _throwifnull(io_function->constants);
2915 }
2916
2917} _catch:
2918
2919 ReleaseCompilationCodePage (o);
2920
2921 return result;
2922}
2923