1//
2// m3_parse.c
3//
4// Created by Steven Massey on 4/19/19.
5// Copyright © 2019 Steven Massey. All rights reserved.
6//
7
8#include "m3_env.h"
9#include "m3_compile.h"
10#include "m3_exception.h"
11#include "m3_info.h"
12
13
14M3Result ParseType_Table (IM3Module io_module, bytes_t i_bytes, cbytes_t i_end)
15{
16 M3Result result = m3Err_none;
17
18 return result;
19}
20
21
22M3Result ParseType_Memory (M3MemoryInfo * o_memory, bytes_t * io_bytes, cbytes_t i_end)
23{
24 M3Result result = m3Err_none;
25
26 u8 flag;
27
28_ (ReadLEB_u7 (& flag, io_bytes, i_end)); // really a u1
29_ (ReadLEB_u32 (& o_memory->initPages, io_bytes, i_end));
30
31 o_memory->maxPages = 0;
32 if (flag)
33_ (ReadLEB_u32 (& o_memory->maxPages, io_bytes, i_end));
34
35 _catch: return result;
36}
37
38
39M3Result ParseSection_Type (IM3Module io_module, bytes_t i_bytes, cbytes_t i_end)
40{
41 IM3FuncType ftype = NULL;
42
43_try {
44 u32 numTypes;
45_ (ReadLEB_u32 (& numTypes, & i_bytes, i_end)); m3log (parse, "** Type [%d]", numTypes);
46
47 _throwif("too many types", numTypes > d_m3MaxSaneTypesCount);
48
49 if (numTypes)
50 {
51 // table of IM3FuncType (that point to the actual M3FuncType struct in the Environment)
52 io_module->funcTypes = m3_AllocArray (IM3FuncType, numTypes);
53 _throwifnull (io_module->funcTypes);
54 io_module->numFuncTypes = numTypes;
55
56 for (u32 i = 0; i < numTypes; ++i)
57 {
58 i8 form;
59_ (ReadLEB_i7 (& form, & i_bytes, i_end));
60 _throwif (m3Err_wasmMalformed, form != -32); // for Wasm MVP
61
62 u32 numArgs;
63_ (ReadLEB_u32 (& numArgs, & i_bytes, i_end));
64
65 _throwif (m3Err_tooManyArgsRets, numArgs > d_m3MaxSaneFunctionArgRetCount);
66#if defined(M3_COMPILER_MSVC)
67 u8 argTypes [d_m3MaxSaneFunctionArgRetCount];
68#else
69 u8 argTypes[numArgs+1]; // make ubsan happy
70#endif
71 for (u32 a = 0; a < numArgs; ++a)
72 {
73 i8 wasmType;
74 u8 argType;
75_ (ReadLEB_i7 (& wasmType, & i_bytes, i_end));
76_ (NormalizeType (& argType, wasmType));
77
78 argTypes[a] = argType;
79 }
80
81 u32 numRets;
82_ (ReadLEB_u32 (& numRets, & i_bytes, i_end));
83 _throwif (m3Err_tooManyArgsRets, (u64)(numRets) + numArgs > d_m3MaxSaneFunctionArgRetCount);
84
85_ (AllocFuncType (& ftype, numRets + numArgs));
86 ftype->numArgs = numArgs;
87 ftype->numRets = numRets;
88
89 for (u32 r = 0; r < numRets; ++r)
90 {
91 i8 wasmType;
92 u8 retType;
93_ (ReadLEB_i7 (& wasmType, & i_bytes, i_end));
94_ (NormalizeType (& retType, wasmType));
95
96 ftype->types[r] = retType;
97 }
98 memcpy (ftype->types + numRets, argTypes, numArgs); m3log (parse, " type %2d: %s", i, SPrintFuncTypeSignature (ftype));
99
100 Environment_AddFuncType (io_module->environment, & ftype);
101 io_module->funcTypes [i] = ftype;
102 ftype = NULL; // ownership transferred to environment
103 }
104 }
105
106} _catch:
107
108 if (result)
109 {
110 m3_Free (ftype);
111 // FIX: M3FuncTypes in the table are leaked
112 m3_Free (io_module->funcTypes);
113 io_module->numFuncTypes = 0;
114 }
115
116 return result;
117}
118
119
120M3Result ParseSection_Function (IM3Module io_module, bytes_t i_bytes, cbytes_t i_end)
121{
122 M3Result result = m3Err_none;
123
124 u32 numFunctions;
125_ (ReadLEB_u32 (& numFunctions, & i_bytes, i_end)); m3log (parse, "** Function [%d]", numFunctions);
126
127 _throwif("too many functions", numFunctions > d_m3MaxSaneFunctionsCount);
128
129_ (Module_PreallocFunctions(io_module, io_module->numFunctions + numFunctions));
130
131 for (u32 i = 0; i < numFunctions; ++i)
132 {
133 u32 funcTypeIndex;
134_ (ReadLEB_u32 (& funcTypeIndex, & i_bytes, i_end));
135
136_ (Module_AddFunction (io_module, funcTypeIndex, NULL /* import info */));
137 }
138
139 _catch: return result;
140}
141
142
143M3Result ParseSection_Import (IM3Module io_module, bytes_t i_bytes, cbytes_t i_end)
144{
145 M3Result result = m3Err_none;
146
147 M3ImportInfo import = { NULL, NULL }, clearImport = { NULL, NULL };
148
149 u32 numImports;
150_ (ReadLEB_u32 (& numImports, & i_bytes, i_end)); m3log (parse, "** Import [%d]", numImports);
151
152 _throwif("too many imports", numImports > d_m3MaxSaneImportsCount);
153
154 // Most imports are functions, so we won't waste much space anyway (if any)
155_ (Module_PreallocFunctions(io_module, numImports));
156
157 for (u32 i = 0; i < numImports; ++i)
158 {
159 u8 importKind;
160
161_ (Read_utf8 (& import.moduleUtf8, & i_bytes, i_end));
162_ (Read_utf8 (& import.fieldUtf8, & i_bytes, i_end));
163_ (Read_u8 (& importKind, & i_bytes, i_end)); m3log (parse, " kind: %d '%s.%s' ",
164 (u32) importKind, import.moduleUtf8, import.fieldUtf8);
165 switch (importKind)
166 {
167 case d_externalKind_function:
168 {
169 u32 typeIndex;
170_ (ReadLEB_u32 (& typeIndex, & i_bytes, i_end))
171
172_ (Module_AddFunction (io_module, typeIndex, & import))
173 import = clearImport;
174
175 io_module->numFuncImports++;
176 }
177 break;
178
179 case d_externalKind_table:
180// result = ParseType_Table (& i_bytes, i_end);
181 break;
182
183 case d_externalKind_memory:
184 {
185_ (ParseType_Memory (& io_module->memoryInfo, & i_bytes, i_end));
186 io_module->memoryImported = true;
187 }
188 break;
189
190 case d_externalKind_global:
191 {
192 i8 waType;
193 u8 type, isMutable;
194
195_ (ReadLEB_i7 (& waType, & i_bytes, i_end));
196_ (NormalizeType (& type, waType));
197_ (ReadLEB_u7 (& isMutable, & i_bytes, i_end)); m3log (parse, " global: %s mutable=%d", c_waTypes [type], (u32) isMutable);
198
199 IM3Global global;
200_ (Module_AddGlobal (io_module, & global, type, isMutable, true /* isImport */));
201 global->import = import;
202 import = clearImport;
203 }
204 break;
205
206 default:
207 _throw (m3Err_wasmMalformed);
208 }
209
210 FreeImportInfo (& import);
211 }
212
213 _catch:
214
215 FreeImportInfo (& import);
216
217 return result;
218}
219
220
221M3Result ParseSection_Export (IM3Module io_module, bytes_t i_bytes, cbytes_t i_end)
222{
223 M3Result result = m3Err_none;
224 const char * utf8 = NULL;
225
226 u32 numExports;
227_ (ReadLEB_u32 (& numExports, & i_bytes, i_end)); m3log (parse, "** Export [%d]", numExports);
228
229 _throwif("too many exports", numExports > d_m3MaxSaneExportsCount);
230
231 for (u32 i = 0; i < numExports; ++i)
232 {
233 u8 exportKind;
234 u32 index;
235
236_ (Read_utf8 (& utf8, & i_bytes, i_end));
237_ (Read_u8 (& exportKind, & i_bytes, i_end));
238_ (ReadLEB_u32 (& index, & i_bytes, i_end)); m3log (parse, " index: %3d; kind: %d; export: '%s'; ", index, (u32) exportKind, utf8);
239
240 if (exportKind == d_externalKind_function)
241 {
242 _throwif(m3Err_wasmMalformed, index >= io_module->numFunctions);
243 IM3Function func = &(io_module->functions [index]);
244 if (func->numNames < d_m3MaxDuplicateFunctionImpl)
245 {
246 func->names[func->numNames++] = utf8;
247 utf8 = NULL; // ownership transferred to M3Function
248 }
249 }
250 else if (exportKind == d_externalKind_global)
251 {
252 _throwif(m3Err_wasmMalformed, index >= io_module->numGlobals);
253 IM3Global global = &(io_module->globals [index]);
254 m3_Free (global->name);
255 global->name = utf8;
256 utf8 = NULL; // ownership transferred to M3Global
257 }
258
259 m3_Free (utf8);
260 }
261
262_catch:
263 m3_Free (utf8);
264 return result;
265}
266
267
268M3Result ParseSection_Start (IM3Module io_module, bytes_t i_bytes, cbytes_t i_end)
269{
270 M3Result result = m3Err_none;
271
272 u32 startFuncIndex;
273_ (ReadLEB_u32 (& startFuncIndex, & i_bytes, i_end)); m3log (parse, "** Start Function: %d", startFuncIndex);
274
275 if (startFuncIndex < io_module->numFunctions)
276 {
277 io_module->startFunction = startFuncIndex;
278 }
279 else result = "start function index out of bounds";
280
281 _catch: return result;
282}
283
284
285M3Result Parse_InitExpr (M3Module * io_module, bytes_t * io_bytes, cbytes_t i_end)
286{
287 M3Result result = m3Err_none;
288
289 // this doesn't generate code pages. just walks the wasm bytecode to find the end
290
291#if defined(d_m3PreferStaticAlloc)
292 static M3Compilation compilation;
293#else
294 M3Compilation compilation;
295#endif
296 compilation = (M3Compilation){ .runtime = NULL, .module = io_module, .wasm = * io_bytes, .wasmEnd = i_end };
297
298 result = CompileBlockStatements (& compilation);
299
300 * io_bytes = compilation.wasm;
301
302 return result;
303}
304
305
306M3Result ParseSection_Element (IM3Module io_module, bytes_t i_bytes, cbytes_t i_end)
307{
308 M3Result result = m3Err_none;
309
310 u32 numSegments;
311_ (ReadLEB_u32 (& numSegments, & i_bytes, i_end)); m3log (parse, "** Element [%d]", numSegments);
312
313 _throwif ("too many element segments", numSegments > d_m3MaxSaneElementSegments);
314
315 io_module->elementSection = i_bytes;
316 io_module->elementSectionEnd = i_end;
317 io_module->numElementSegments = numSegments;
318
319 _catch: return result;
320}
321
322
323M3Result ParseSection_Code (M3Module * io_module, bytes_t i_bytes, cbytes_t i_end)
324{
325 M3Result result;
326
327 u32 numFunctions;
328_ (ReadLEB_u32 (& numFunctions, & i_bytes, i_end)); m3log (parse, "** Code [%d]", numFunctions);
329
330 if (numFunctions != io_module->numFunctions - io_module->numFuncImports)
331 {
332 _throw ("mismatched function count in code section");
333 }
334
335 for (u32 f = 0; f < numFunctions; ++f)
336 {
337 const u8 * start = i_bytes;
338
339 u32 size;
340_ (ReadLEB_u32 (& size, & i_bytes, i_end));
341
342 if (size)
343 {
344 const u8 * ptr = i_bytes;
345 i_bytes += size;
346
347 if (i_bytes <= i_end)
348 {
349 /*
350 u32 numLocalBlocks;
351_ (ReadLEB_u32 (& numLocalBlocks, & ptr, i_end)); m3log (parse, " code size: %-4d", size);
352
353 u32 numLocals = 0;
354
355 for (u32 l = 0; l < numLocalBlocks; ++l)
356 {
357 u32 varCount;
358 i8 wasmType;
359 u8 normalType;
360
361_ (ReadLEB_u32 (& varCount, & ptr, i_end));
362_ (ReadLEB_i7 (& wasmType, & ptr, i_end));
363_ (NormalizeType (& normalType, wasmType));
364
365 numLocals += varCount; m3log (parse, " %2d locals; type: '%s'", varCount, c_waTypes [normalType]);
366 }
367 */
368
369 IM3Function func = Module_GetFunction (io_module, f + io_module->numFuncImports);
370
371 func->module = io_module;
372 func->wasm = start;
373 func->wasmEnd = i_bytes;
374 //func->ownsWasmCode = io_module->hasWasmCodeCopy;
375// func->numLocals = numLocals;
376 }
377 else _throw (m3Err_wasmSectionOverrun);
378 }
379 }
380
381 _catch:
382
383 if (not result and i_bytes != i_end)
384 result = m3Err_wasmSectionUnderrun;
385
386 return result;
387}
388
389
390M3Result ParseSection_Data (M3Module * io_module, bytes_t i_bytes, cbytes_t i_end)
391{
392 M3Result result = m3Err_none;
393
394 u32 numDataSegments;
395_ (ReadLEB_u32 (& numDataSegments, & i_bytes, i_end)); m3log (parse, "** Data [%d]", numDataSegments);
396
397 _throwif("too many data segments", numDataSegments > d_m3MaxSaneDataSegments);
398
399 io_module->dataSegments = m3_AllocArray (M3DataSegment, numDataSegments);
400 _throwifnull(io_module->dataSegments);
401 io_module->numDataSegments = numDataSegments;
402
403 for (u32 i = 0; i < numDataSegments; ++i)
404 {
405 M3DataSegment * segment = & io_module->dataSegments [i];
406
407_ (ReadLEB_u32 (& segment->memoryRegion, & i_bytes, i_end));
408
409 segment->initExpr = i_bytes;
410_ (Parse_InitExpr (io_module, & i_bytes, i_end));
411 segment->initExprSize = (u32) (i_bytes - segment->initExpr);
412
413 _throwif (m3Err_wasmMissingInitExpr, segment->initExprSize <= 1);
414
415_ (ReadLEB_u32 (& segment->size, & i_bytes, i_end));
416 segment->data = i_bytes; m3log (parse, " segment [%u] memory: %u; expr-size: %d; size: %d",
417 i, segment->memoryRegion, segment->initExprSize, segment->size);
418 i_bytes += segment->size;
419
420 _throwif("data segment underflow", i_bytes > i_end);
421 }
422
423 _catch:
424
425 return result;
426}
427
428
429M3Result ParseSection_Memory (M3Module * io_module, bytes_t i_bytes, cbytes_t i_end)
430{
431 M3Result result = m3Err_none;
432
433 // TODO: MVP; assert no memory imported
434
435 u32 numMemories;
436_ (ReadLEB_u32 (& numMemories, & i_bytes, i_end)); m3log (parse, "** Memory [%d]", numMemories);
437
438 _throwif (m3Err_tooManyMemorySections, numMemories != 1);
439
440 ParseType_Memory (& io_module->memoryInfo, & i_bytes, i_end);
441
442 _catch: return result;
443}
444
445
446M3Result ParseSection_Global (M3Module * io_module, bytes_t i_bytes, cbytes_t i_end)
447{
448 M3Result result = m3Err_none;
449
450 u32 numGlobals;
451_ (ReadLEB_u32 (& numGlobals, & i_bytes, i_end)); m3log (parse, "** Global [%d]", numGlobals);
452
453 _throwif("too many globals", numGlobals > d_m3MaxSaneGlobalsCount);
454
455 for (u32 i = 0; i < numGlobals; ++i)
456 {
457 i8 waType;
458 u8 type, isMutable;
459
460_ (ReadLEB_i7 (& waType, & i_bytes, i_end));
461_ (NormalizeType (& type, waType));
462_ (ReadLEB_u7 (& isMutable, & i_bytes, i_end)); m3log (parse, " global: [%d] %s mutable: %d", i, c_waTypes [type], (u32) isMutable);
463
464 IM3Global global;
465_ (Module_AddGlobal (io_module, & global, type, isMutable, false /* isImport */));
466
467 global->initExpr = i_bytes;
468_ (Parse_InitExpr (io_module, & i_bytes, i_end));
469 global->initExprSize = (u32) (i_bytes - global->initExpr);
470
471 _throwif (m3Err_wasmMissingInitExpr, global->initExprSize <= 1);
472 }
473
474 _catch: return result;
475}
476
477
478M3Result ParseSection_Name (M3Module * io_module, bytes_t i_bytes, cbytes_t i_end)
479{
480 M3Result result;
481
482 cstr_t name;
483
484 while (i_bytes < i_end)
485 {
486 u8 nameType;
487 u32 payloadLength;
488
489_ (ReadLEB_u7 (& nameType, & i_bytes, i_end));
490_ (ReadLEB_u32 (& payloadLength, & i_bytes, i_end));
491
492 bytes_t start = i_bytes;
493 if (nameType == 1)
494 {
495 u32 numNames;
496_ (ReadLEB_u32 (& numNames, & i_bytes, i_end));
497
498 _throwif("too many names", numNames > d_m3MaxSaneFunctionsCount);
499
500 for (u32 i = 0; i < numNames; ++i)
501 {
502 u32 index;
503_ (ReadLEB_u32 (& index, & i_bytes, i_end));
504_ (Read_utf8 (& name, & i_bytes, i_end));
505
506 if (index < io_module->numFunctions)
507 {
508 IM3Function func = &(io_module->functions [index]);
509 if (func->numNames == 0)
510 {
511 func->names[0] = name; m3log (parse, " naming function%5d: %s", index, name);
512 func->numNames = 1;
513 name = NULL; // transfer ownership
514 }
515// else m3log (parse, "prenamed: %s", io_module->functions [index].name);
516 }
517
518 m3_Free (name);
519 }
520 }
521
522 i_bytes = start + payloadLength;
523 }
524
525 _catch: return result;
526}
527
528
529M3Result ParseSection_Custom (M3Module * io_module, bytes_t i_bytes, cbytes_t i_end)
530{
531 M3Result result;
532
533 cstr_t name;
534_ (Read_utf8 (& name, & i_bytes, i_end));
535 m3log (parse, "** Custom: '%s'", name);
536 if (strcmp (name, "name") == 0) {
537_ (ParseSection_Name(io_module, i_bytes, i_end));
538 } else if (io_module->environment->customSectionHandler) {
539_ (io_module->environment->customSectionHandler(io_module, name, i_bytes, i_end));
540 }
541
542 m3_Free (name);
543
544 _catch: return result;
545}
546
547
548M3Result ParseModuleSection (M3Module * o_module, u8 i_sectionType, bytes_t i_bytes, u32 i_numBytes)
549{
550 M3Result result = m3Err_none;
551
552 typedef M3Result (* M3Parser) (M3Module *, bytes_t, cbytes_t);
553
554 static M3Parser s_parsers [] =
555 {
556 ParseSection_Custom, // 0
557 ParseSection_Type, // 1
558 ParseSection_Import, // 2
559 ParseSection_Function, // 3
560 NULL, // 4: TODO Table
561 ParseSection_Memory, // 5
562 ParseSection_Global, // 6
563 ParseSection_Export, // 7
564 ParseSection_Start, // 8
565 ParseSection_Element, // 9
566 ParseSection_Code, // 10
567 ParseSection_Data, // 11
568 NULL, // 12: TODO DataCount
569 };
570
571 M3Parser parser = NULL;
572
573 if (i_sectionType <= 12)
574 parser = s_parsers [i_sectionType];
575
576 if (parser)
577 {
578 cbytes_t end = i_bytes + i_numBytes;
579 result = parser (o_module, i_bytes, end);
580 }
581 else
582 {
583 m3log (parse, " skipped section type: %d", (u32) i_sectionType);
584 }
585
586 return result;
587}
588
589
590M3Result m3_ParseModule (IM3Environment i_environment, IM3Module * o_module, cbytes_t i_bytes, u32 i_numBytes)
591{
592 IM3Module module; m3log (parse, "load module: %d bytes", i_numBytes);
593_try {
594 module = m3_AllocStruct (M3Module);
595 _throwifnull (module);
596 module->name = ".unnamed"; m3log (parse, "load module: %d bytes", i_numBytes);
597 module->startFunction = -1;
598 //module->hasWasmCodeCopy = false;
599 module->environment = i_environment;
600
601 const u8 * pos = i_bytes;
602 const u8 * end = pos + i_numBytes;
603
604 module->wasmStart = pos;
605 module->wasmEnd = end;
606
607 u32 magic, version;
608_ (Read_u32 (& magic, & pos, end));
609_ (Read_u32 (& version, & pos, end));
610
611 _throwif (m3Err_wasmMalformed, magic != 0x6d736100);
612 _throwif (m3Err_incompatibleWasmVersion, version != 1);
613
614 static const u8 sectionsOrder[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 10, 11, 0 }; // 0 is a placeholder
615 u8 expectedSection = 0;
616
617 while (pos < end)
618 {
619 u8 section;
620_ (ReadLEB_u7 (& section, & pos, end));
621
622 if (section != 0) {
623 // Ensure sections appear only once and in order
624 while (sectionsOrder[expectedSection++] != section) {
625 _throwif(m3Err_misorderedWasmSection, expectedSection >= 12);
626 }
627 }
628
629 u32 sectionLength;
630_ (ReadLEB_u32 (& sectionLength, & pos, end));
631 _throwif(m3Err_wasmMalformed, pos + sectionLength > end);
632
633_ (ParseModuleSection (module, section, pos, sectionLength));
634
635 pos += sectionLength;
636 }
637
638} _catch:
639
640 if (result)
641 {
642 m3_FreeModule (module);
643 module = NULL;
644 }
645
646 * o_module = module;
647
648 return result;
649}
650