1 | // Licensed to the .NET Foundation under one or more agreements. |
2 | // The .NET Foundation licenses this file to you under the MIT license. |
3 | // See the LICENSE file in the project root for more information. |
4 | /********************************************************************************/ |
5 | /* Code goes here */ |
6 | |
7 | /********************************************************************************/ |
8 | extern int yyparse(); |
9 | |
10 | struct Keywords { |
11 | const char* name; |
12 | unsigned short token; |
13 | unsigned short tokenVal;// this holds the instruction enumeration for those keywords that are instrs |
14 | size_t stname; |
15 | }; |
16 | |
17 | #define NO_VALUE ((unsigned short)-1) // The token has no value |
18 | |
19 | static Keywords keywords[] = { |
20 | // Attention! Because of aliases, the instructions MUST go first! |
21 | // Redefine all the instructions (defined in assembler.h <- asmenum.h <- opcode.def) |
22 | #undef InlineNone |
23 | #undef InlineVar |
24 | #undef ShortInlineVar |
25 | #undef InlineI |
26 | #undef ShortInlineI |
27 | #undef InlineI8 |
28 | #undef InlineR |
29 | #undef ShortInlineR |
30 | #undef InlineBrTarget |
31 | #undef ShortInlineBrTarget |
32 | #undef InlineMethod |
33 | #undef InlineField |
34 | #undef InlineType |
35 | #undef InlineString |
36 | #undef InlineSig |
37 | #undef InlineTok |
38 | #undef InlineSwitch |
39 | #undef InlineVarTok |
40 | |
41 | |
42 | #define InlineNone INSTR_NONE |
43 | #define InlineVar INSTR_VAR |
44 | #define ShortInlineVar INSTR_VAR |
45 | #define InlineI INSTR_I |
46 | #define ShortInlineI INSTR_I |
47 | #define InlineI8 INSTR_I8 |
48 | #define InlineR INSTR_R |
49 | #define ShortInlineR INSTR_R |
50 | #define InlineBrTarget INSTR_BRTARGET |
51 | #define ShortInlineBrTarget INSTR_BRTARGET |
52 | #define InlineMethod INSTR_METHOD |
53 | #define InlineField INSTR_FIELD |
54 | #define InlineType INSTR_TYPE |
55 | #define InlineString INSTR_STRING |
56 | #define InlineSig INSTR_SIG |
57 | #define InlineTok INSTR_TOK |
58 | #define InlineSwitch INSTR_SWITCH |
59 | |
60 | #define InlineVarTok 0 |
61 | #define NEW_INLINE_NAMES |
62 | // The volatile instruction collides with the volatile keyword, so |
63 | // we treat it as a keyword everywhere and modify the grammar accordingly (Yuck!) |
64 | #define OPDEF(c,s,pop,push,args,type,l,s1,s2,ctrl) { s, args, c, lengthof(s)-1 }, |
65 | #define OPALIAS(alias_c, s, c) { s, NO_VALUE, c, lengthof(s)-1 }, |
66 | #include "opcode.def" |
67 | #undef OPALIAS |
68 | #undef OPDEF |
69 | |
70 | /* keywords */ |
71 | #define KYWD(name, sym, val) { name, sym, val, lengthof(name)-1 }, |
72 | #include "il_kywd.h" |
73 | #undef KYWD |
74 | |
75 | }; |
76 | |
77 | /********************************************************************************/ |
78 | /* File encoding-dependent functions */ |
79 | /*--------------------------------------------------------------------------*/ |
80 | char* nextcharA(__in __nullterminated char* pos) |
81 | { |
82 | return (*pos > 0) ? ++pos : (char *)_mbsinc((const unsigned char *)pos); |
83 | } |
84 | |
85 | char* nextcharU(__in __nullterminated char* pos) |
86 | { |
87 | return ++pos; |
88 | } |
89 | |
90 | char* nextcharW(__in __nullterminated char* pos) |
91 | { |
92 | return (pos+2); |
93 | } |
94 | /*--------------------------------------------------------------------------*/ |
95 | unsigned SymAU(__in __nullterminated char* curPos) |
96 | { |
97 | return (unsigned)*curPos; |
98 | } |
99 | |
100 | unsigned SymW(__in __nullterminated char* curPos) |
101 | { |
102 | return (unsigned)*((WCHAR*)curPos); |
103 | } |
104 | /*--------------------------------------------------------------------------*/ |
105 | char* NewStrFromTokenAU(__in_ecount(tokLen) char* curTok, size_t tokLen) |
106 | { |
107 | char *nb = new char[tokLen+1]; |
108 | if(nb != NULL) |
109 | { |
110 | memcpy(nb, curTok, tokLen); |
111 | nb[tokLen] = 0; |
112 | } |
113 | return nb; |
114 | } |
115 | char* NewStrFromTokenW(__in_ecount(tokLen) char* curTok, size_t tokLen) |
116 | { |
117 | WCHAR* wcurTok = (WCHAR*)curTok; |
118 | char *nb = new char[(tokLen<<1) + 2]; |
119 | if(nb != NULL) |
120 | { |
121 | tokLen = WszWideCharToMultiByte(CP_UTF8,0,(LPCWSTR)wcurTok,(int)(tokLen >> 1),nb,(int)(tokLen<<1) + 2,NULL,NULL); |
122 | nb[tokLen] = 0; |
123 | } |
124 | return nb; |
125 | } |
126 | /*--------------------------------------------------------------------------*/ |
127 | char* NewStaticStrFromTokenAU(__in_ecount(tokLen) char* curTok, size_t tokLen, __out_ecount(bufSize) char* staticBuf, size_t bufSize) |
128 | { |
129 | if(tokLen >= bufSize) return NULL; |
130 | memcpy(staticBuf, curTok, tokLen); |
131 | staticBuf[tokLen] = 0; |
132 | return staticBuf; |
133 | } |
134 | char* NewStaticStrFromTokenW(__in_ecount(tokLen) char* curTok, size_t tokLen, __out_ecount(bufSize) char* staticBuf, size_t bufSize) |
135 | { |
136 | WCHAR* wcurTok = (WCHAR*)curTok; |
137 | if(tokLen >= bufSize/2) return NULL; |
138 | tokLen = WszWideCharToMultiByte(CP_UTF8,0,(LPCWSTR)wcurTok,(int)(tokLen >> 1),staticBuf,(int)bufSize,NULL,NULL); |
139 | staticBuf[tokLen] = 0; |
140 | return staticBuf; |
141 | } |
142 | /*--------------------------------------------------------------------------*/ |
143 | unsigned GetDoubleAU(__in __nullterminated char* begNum, unsigned L, double** ppRes) |
144 | { |
145 | static char dbuff[128]; |
146 | char* pdummy; |
147 | if(L > 127) L = 127; |
148 | memcpy(dbuff,begNum,L); |
149 | dbuff[L] = 0; |
150 | *ppRes = new double(strtod(dbuff, &pdummy)); |
151 | return ((unsigned)(pdummy - dbuff)); |
152 | } |
153 | |
154 | unsigned GetDoubleW(__in __nullterminated char* begNum, unsigned L, double** ppRes) |
155 | { |
156 | static char dbuff[256]; |
157 | char* pdummy; |
158 | if(L > 254) L = 254; |
159 | memcpy(dbuff,begNum,L); |
160 | dbuff[L] = 0; |
161 | dbuff[L+1] = 0; |
162 | *ppRes = new double(wcstod((const wchar_t*)dbuff, (wchar_t**)&pdummy)); |
163 | return ((unsigned)(pdummy - dbuff)); |
164 | } |
165 | /*--------------------------------------------------------------------------*/ |
166 | char* yygetline(int Line) |
167 | { |
168 | static char buff[0x4000]; |
169 | char *pLine=NULL, *pNextLine=NULL; |
170 | char *pBegin=NULL, *pEnd = NULL; |
171 | unsigned uCount = parser->getAll(&pBegin); |
172 | pEnd = pBegin + uCount; |
173 | buff[0] = 0; |
174 | for(uCount=0, pLine=pBegin; pLine < pEnd; pLine = nextchar(pLine)) |
175 | { |
176 | if(Sym(pLine) == '\n') uCount++; |
177 | if(uCount == (unsigned int)(Line-1)) break; |
178 | } |
179 | pLine = nextchar(pLine); |
180 | if(pLine < pEnd) |
181 | { |
182 | for(pNextLine = pLine; pNextLine < pEnd; pNextLine = nextchar(pNextLine)) |
183 | { |
184 | if(Sym(pNextLine) == '\n') break; |
185 | } |
186 | if(Sym == SymW) // Unicode file |
187 | { |
188 | if(*((WCHAR*)pNextLine - 1) == '\r') pNextLine -= 2; |
189 | uCount = (unsigned)(pNextLine - pLine); |
190 | uCount &= 0x1FFF; // limit: 8K wchars |
191 | WCHAR* wzBuff = (WCHAR*)buff; |
192 | memcpy(buff,pLine,uCount); |
193 | wzBuff[uCount >> 1] = 0; |
194 | } |
195 | else |
196 | { |
197 | if(*(pNextLine-1)=='\r') pNextLine--; |
198 | uCount = (unsigned)(pNextLine - pLine); |
199 | uCount &= 0x3FFF; // limit: 16K chars |
200 | memcpy(buff,pLine,uCount); |
201 | buff[uCount]=0; |
202 | } |
203 | } |
204 | return buff; |
205 | } |
206 | |
207 | void yyerror(__in __nullterminated const char* str) { |
208 | char tokBuff[64]; |
209 | WCHAR *wzfile = (WCHAR*)(PENV->in->namew()); |
210 | int iline = PENV->curLine; |
211 | |
212 | size_t len = PENV->curPos - PENV->curTok; |
213 | if (len > 62) len = 62; |
214 | memcpy(tokBuff, PENV->curTok, len); |
215 | tokBuff[len] = 0; |
216 | tokBuff[len+1] = 0; |
217 | if(PENV->bExternSource) |
218 | { |
219 | wzfile = PASM->m_wzSourceFileName; |
220 | iline = PENV->nExtLine; |
221 | } |
222 | if(Sym == SymW) // Unicode file |
223 | fprintf(stderr, "%S(%d) : error : %s at token '%S' in: %S\n" , |
224 | wzfile, iline, str, (WCHAR*)tokBuff, (WCHAR*)yygetline(PENV->curLine)); |
225 | else |
226 | fprintf(stderr, "%S(%d) : error : %s at token '%s' in: %s\n" , |
227 | wzfile, iline, str, tokBuff, yygetline(PENV->curLine)); |
228 | parser->success = false; |
229 | } |
230 | |
231 | /********************************************************************************/ |
232 | /* looks up the typedef 'name' of length 'nameLen' (name does not need to be |
233 | null terminated) Returns 0 on failure */ |
234 | TypeDefDescr* findTypedef(__in_ecount(NameLen) char* name, size_t NameLen) |
235 | { |
236 | TypeDefDescr* pRet = NULL; |
237 | static char Name[4096]; |
238 | if(PASM->NumTypeDefs()) |
239 | { |
240 | if(NewStaticStrFromToken(name,NameLen,Name,4096)) |
241 | pRet = PASM->FindTypeDef(Name); |
242 | } |
243 | return pRet; |
244 | } |
245 | |
246 | int TYPEDEF(TypeDefDescr* pTDD) |
247 | { |
248 | switch(TypeFromToken(pTDD->m_tkTypeSpec)) |
249 | { |
250 | case mdtTypeDef: |
251 | case mdtTypeRef: |
252 | return TYPEDEF_T; |
253 | case mdtMethodDef: |
254 | case 0x99000000: |
255 | return TYPEDEF_M; |
256 | case mdtFieldDef: |
257 | case 0x98000000: |
258 | return TYPEDEF_F; |
259 | case mdtMemberRef: |
260 | return TYPEDEF_MR; |
261 | case mdtTypeSpec: |
262 | return TYPEDEF_TS; |
263 | case mdtCustomAttribute: |
264 | return TYPEDEF_CA; |
265 | } |
266 | return ERROR_; |
267 | |
268 | } |
269 | |
270 | /********************************************************************************/ |
271 | void indexKeywords(Indx* indx) // called in Assembler constructor (assem.cpp) |
272 | { |
273 | Keywords* low = keywords; |
274 | Keywords* high = keywords + (sizeof(keywords) / sizeof(Keywords)); |
275 | Keywords* mid; |
276 | for(mid = low; mid < high; mid++) |
277 | { |
278 | indx->IndexString((char*)(mid->name),mid); |
279 | } |
280 | } |
281 | |
282 | Instr* SetupInstr(unsigned short opcode) |
283 | { |
284 | Instr* pVal = NULL; |
285 | if((pVal = PASM->GetInstr())) |
286 | { |
287 | pVal->opcode = opcode; |
288 | if((pVal->pWriter = PASM->m_pSymDocument)!=NULL) |
289 | { |
290 | if(PENV->bExternSource) |
291 | { |
292 | pVal->linenum = PENV->nExtLine; |
293 | pVal->column = PENV->nExtCol; |
294 | pVal->linenum_end = PENV->nExtLineEnd; |
295 | pVal->column_end = PENV->nExtColEnd; |
296 | pVal->pc = nCurrPC; |
297 | } |
298 | else |
299 | { |
300 | pVal->linenum = PENV->curLine; |
301 | pVal->column = 1; |
302 | pVal->linenum_end = PENV->curLine; |
303 | pVal->column_end = 0; |
304 | pVal->pc = PASM->m_CurPC; |
305 | } |
306 | } |
307 | } |
308 | return pVal; |
309 | } |
310 | /* looks up the keyword 'name' of length 'nameLen' (name does not need to be |
311 | null terminated) Returns 0 on failure */ |
312 | int findKeyword(const char* name, size_t nameLen, unsigned short* pOpcode) |
313 | { |
314 | static char Name[128]; |
315 | Keywords* mid; |
316 | |
317 | if(NULL == NewStaticStrFromToken((char*)name,nameLen,Name,128)) return 0; // can't be a keyword |
318 | mid = (Keywords*)(PASM->indxKeywords.FindString(Name)); |
319 | if(mid == NULL) return 0; |
320 | *pOpcode = mid->tokenVal; |
321 | |
322 | return(mid->token); |
323 | } |
324 | |
325 | /********************************************************************************/ |
326 | /* convert str to a uint64 */ |
327 | unsigned digits[128]; |
328 | void Init_str2uint64() |
329 | { |
330 | int i; |
331 | memset(digits,255,sizeof(digits)); |
332 | for(i='0'; i <= '9'; i++) digits[i] = i - '0'; |
333 | for(i='A'; i <= 'Z'; i++) digits[i] = i + 10 - 'A'; |
334 | for(i='a'; i <= 'z'; i++) digits[i] = i + 10 - 'a'; |
335 | } |
336 | static unsigned __int64 str2uint64(const char* str, const char** endStr, unsigned radix) |
337 | { |
338 | unsigned __int64 ret = 0; |
339 | unsigned digit,ix; |
340 | _ASSERTE(radix <= 36); |
341 | for(;;str = nextchar((char*)str)) |
342 | { |
343 | ix = Sym((char*)str); |
344 | if(ix <= 0x7F) |
345 | { |
346 | digit = digits[ix]; |
347 | if(digit < radix) |
348 | { |
349 | ret = ret * radix + digit; |
350 | continue; |
351 | } |
352 | } |
353 | *endStr = str; |
354 | return(ret); |
355 | } |
356 | } |
357 | /********************************************************************************/ |
358 | /* Append an UTF-8 string preceded by compressed length, no zero terminator, to a BinStr */ |
359 | static void AppendStringWithLength(BinStr* pbs, __in __nullterminated char* sz) |
360 | { |
361 | if((pbs != NULL) && (sz != NULL)) |
362 | { |
363 | unsigned L = (unsigned) strlen(sz); |
364 | BYTE* pb = NULL; |
365 | corEmitInt(pbs,L); |
366 | if((pb = pbs->getBuff(L)) != NULL) |
367 | memcpy(pb,sz,L); |
368 | } |
369 | } |
370 | |
371 | /********************************************************************************/ |
372 | /* fetch the next token, and return it Also set the yylval.union if the |
373 | lexical token also has a value */ |
374 | |
375 | |
376 | BOOL _Alpha[128]; |
377 | BOOL _Digit[128]; |
378 | BOOL _AlNum[128]; |
379 | BOOL _ValidSS[128]; |
380 | BOOL _ValidCS[128]; |
381 | void SetSymbolTables() |
382 | { |
383 | unsigned i; |
384 | memset(_Alpha,0,sizeof(_Alpha)); |
385 | memset(_Digit,0,sizeof(_Digit)); |
386 | memset(_AlNum,0,sizeof(_AlNum)); |
387 | memset(_ValidSS,0,sizeof(_ValidSS)); |
388 | memset(_ValidCS,0,sizeof(_ValidCS)); |
389 | for(i = 'A'; i <= 'Z'; i++) |
390 | { |
391 | _Alpha[i] = TRUE; |
392 | _AlNum[i] = TRUE; |
393 | _ValidSS[i] = TRUE; |
394 | _ValidCS[i] = TRUE; |
395 | } |
396 | for(i = 'a'; i <= 'z'; i++) |
397 | { |
398 | _Alpha[i] = TRUE; |
399 | _AlNum[i] = TRUE; |
400 | _ValidSS[i] = TRUE; |
401 | _ValidCS[i] = TRUE; |
402 | } |
403 | for(i = '0'; i <= '9'; i++) |
404 | { |
405 | _Digit[i] = TRUE; |
406 | _AlNum[i] = TRUE; |
407 | _ValidCS[i] = TRUE; |
408 | } |
409 | _ValidSS[(unsigned char)'_'] = TRUE; |
410 | _ValidSS[(unsigned char)'#'] = TRUE; |
411 | _ValidSS[(unsigned char)'$'] = TRUE; |
412 | _ValidSS[(unsigned char)'@'] = TRUE; |
413 | |
414 | _ValidCS[(unsigned char)'_'] = TRUE; |
415 | _ValidCS[(unsigned char)'?'] = TRUE; |
416 | _ValidCS[(unsigned char)'$'] = TRUE; |
417 | _ValidCS[(unsigned char)'@'] = TRUE; |
418 | _ValidCS[(unsigned char)'`'] = TRUE; |
419 | } |
420 | BOOL IsAlpha(unsigned x) { return (x < 128)&&_Alpha[x]; } |
421 | BOOL IsDigit(unsigned x) { return (x < 128)&&_Digit[x]; } |
422 | BOOL IsAlNum(unsigned x) { return (x < 128)&&_AlNum[x]; } |
423 | BOOL IsValidStartingSymbol(unsigned x) { return (x < 128)&&_ValidSS[x]; } |
424 | BOOL IsValidContinuingSymbol(unsigned x) { return (x < 128)&&_ValidCS[x]; } |
425 | |
426 | |
427 | char* nextBlank(__in __nullterminated char* curPos) |
428 | { |
429 | for(;;) |
430 | { |
431 | switch(Sym(curPos)) |
432 | { |
433 | case '/' : |
434 | if ((Sym(nextchar(curPos)) == '/')|| (Sym(nextchar(curPos)) == '*')) |
435 | return curPos; |
436 | else |
437 | { |
438 | curPos = nextchar(curPos); |
439 | break; |
440 | } |
441 | case 0: |
442 | case '\n': |
443 | case '\r': |
444 | case ' ' : |
445 | case '\t': |
446 | case '\f': |
447 | return curPos; |
448 | |
449 | default: |
450 | curPos = nextchar(curPos); |
451 | } |
452 | } |
453 | } |
454 | |
455 | char* skipBlanks(__in __nullterminated char* curPos, unsigned* pstate) |
456 | { |
457 | const unsigned = 1; |
458 | const unsigned = 2; |
459 | unsigned nextSym, state = *pstate; |
460 | char* nextPos; |
461 | for(;;) |
462 | { // skip whitespace and comments |
463 | if (curPos >= PENV->endPos) |
464 | { |
465 | *pstate = state; |
466 | return NULL; |
467 | } |
468 | switch(Sym(curPos)) |
469 | { |
470 | case 0: |
471 | return NULL; // EOF |
472 | case '\n': |
473 | state &= ~eolComment; |
474 | PENV->curLine++; |
475 | if(PENV->bExternSource) |
476 | { |
477 | if(PENV->bExternSourceAutoincrement) PENV->nExtLine++; |
478 | PASM->m_ulCurLine = PENV->nExtLine; |
479 | PASM->m_ulCurColumn = PENV->nExtCol; |
480 | } |
481 | else |
482 | { |
483 | PASM->m_ulCurLine = PENV->curLine; |
484 | PASM->m_ulCurColumn = 1; |
485 | } |
486 | break; |
487 | case '\r': |
488 | case ' ' : |
489 | case '\t': |
490 | case '\f': |
491 | break; |
492 | |
493 | case '*' : |
494 | if(state == 0) goto PAST_WHITESPACE; |
495 | if(state & multiComment) |
496 | { |
497 | nextPos = nextchar(curPos); |
498 | if (Sym(nextPos) == '/') |
499 | { |
500 | curPos = nextPos; |
501 | state &= ~multiComment; |
502 | } |
503 | } |
504 | break; |
505 | |
506 | case '/' : |
507 | if(state == 0) |
508 | { |
509 | nextPos = nextchar(curPos); |
510 | nextSym = Sym(nextPos); |
511 | if (nextSym == '/') |
512 | { |
513 | curPos = nextPos; |
514 | state |= eolComment; |
515 | } |
516 | else if (nextSym == '*') |
517 | { |
518 | curPos = nextPos; |
519 | state |= multiComment; |
520 | } |
521 | else goto PAST_WHITESPACE; |
522 | } |
523 | break; |
524 | |
525 | default: |
526 | if (state == 0) goto PAST_WHITESPACE; |
527 | } |
528 | curPos = nextchar(curPos); |
529 | } |
530 | PAST_WHITESPACE: |
531 | *pstate = state; |
532 | return curPos; |
533 | } |
534 | |
535 | char* FullFileName(__in __nullterminated WCHAR* wzFileName, unsigned uCodePage); |
536 | |
537 | int ProcessEOF() |
538 | { |
539 | PARSING_ENVIRONMENT* prev_penv = parser->PEStack.POP(); |
540 | if(prev_penv != NULL) |
541 | { |
542 | //delete [] (WCHAR*)(PENV->in->namew()); |
543 | delete PENV->in; |
544 | delete PENV; |
545 | parser->penv = prev_penv; |
546 | SetFunctionPtrs(); |
547 | char* szFileName = new char[strlen(PENV->szFileName)+1]; |
548 | strcpy_s(szFileName,strlen(PENV->szFileName)+1,PENV->szFileName); |
549 | PASM->SetSourceFileName(szFileName); // deletes the argument! |
550 | return ';'; |
551 | } |
552 | //PENV->in = NULL; |
553 | return 0; |
554 | } |
555 | |
556 | #define NEXT_TOKEN {state=0; curPos=PENV->curPos; goto NextToken;} |
557 | |
558 | int parse_literal(unsigned curSym, __inout __nullterminated char* &curPos, BOOL translate_escapes) |
559 | { |
560 | unsigned quote = curSym; |
561 | curPos = nextchar(curPos); |
562 | char* fromPtr = curPos; |
563 | bool escape = false; |
564 | |
565 | for(;;) |
566 | { // Find matching quote |
567 | curSym = (curPos >= PENV->endPos) ? 0 : Sym(curPos); |
568 | if(curSym == 0) |
569 | { |
570 | PENV->curPos = curPos; |
571 | return(BAD_LITERAL_); |
572 | } |
573 | else if(curSym == '\\') |
574 | escape = !escape; |
575 | else |
576 | { |
577 | if(curSym == '\n') |
578 | { |
579 | PENV->curLine++; |
580 | if(PENV->bExternSource) |
581 | { |
582 | if(PENV->bExternSourceAutoincrement) PENV->nExtLine++; |
583 | PASM->m_ulCurLine = PENV->nExtLine; |
584 | PASM->m_ulCurColumn = PENV->nExtCol; |
585 | } |
586 | else |
587 | { |
588 | PASM->m_ulCurLine = PENV->curLine; |
589 | PASM->m_ulCurColumn = 1; |
590 | } |
591 | if (!escape) { PENV->curPos = curPos; return(BAD_LITERAL_); } |
592 | } |
593 | else if ((curSym == quote) && (!escape)) break; |
594 | escape = false; |
595 | } |
596 | curPos = nextchar(curPos); |
597 | } |
598 | // translate escaped characters |
599 | unsigned tokLen = (unsigned)(curPos - fromPtr); |
600 | char* newstr = NewStrFromToken(fromPtr, tokLen); |
601 | char* toPtr; |
602 | curPos = nextchar(curPos); // skip closing quote |
603 | if(translate_escapes) |
604 | { |
605 | fromPtr = newstr; |
606 | //_ASSERTE(0); |
607 | tokLen = (unsigned)strlen(newstr); |
608 | toPtr = new char[tokLen+1]; |
609 | if(toPtr==NULL) return BAD_LITERAL_; |
610 | yylval.string = toPtr; |
611 | char* endPtr = fromPtr+tokLen; |
612 | while(fromPtr < endPtr) |
613 | { |
614 | if (*fromPtr == '\\') |
615 | { |
616 | fromPtr++; |
617 | switch(*fromPtr) |
618 | { |
619 | case 't': |
620 | *toPtr++ = '\t'; |
621 | break; |
622 | case 'n': |
623 | *toPtr++ = '\n'; |
624 | break; |
625 | case 'b': |
626 | *toPtr++ = '\b'; |
627 | break; |
628 | case 'f': |
629 | *toPtr++ = '\f'; |
630 | break; |
631 | case 'v': |
632 | *toPtr++ = '\v'; |
633 | break; |
634 | case '?': |
635 | *toPtr++ = '\?'; |
636 | break; |
637 | case 'r': |
638 | *toPtr++ = '\r'; |
639 | break; |
640 | case 'a': |
641 | *toPtr++ = '\a'; |
642 | break; |
643 | case '\n': |
644 | do fromPtr++; |
645 | while(isspace(*fromPtr)); |
646 | --fromPtr; // undo the increment below |
647 | break; |
648 | case '0': |
649 | case '1': |
650 | case '2': |
651 | case '3': |
652 | if (IsDigit(fromPtr[1]) && IsDigit(fromPtr[2])) |
653 | { |
654 | *toPtr++ = ((fromPtr[0] - '0') * 8 + (fromPtr[1] - '0')) * 8 + (fromPtr[2] - '0'); |
655 | fromPtr+= 2; |
656 | } |
657 | else if(*fromPtr == '0') *toPtr++ = 0; |
658 | else *toPtr++ = *fromPtr; |
659 | break; |
660 | default: |
661 | *toPtr++ = *fromPtr; |
662 | } |
663 | fromPtr++; |
664 | } |
665 | else |
666 | // *toPtr++ = *fromPtr++; |
667 | { |
668 | char* tmpPtr = fromPtr; |
669 | fromPtr = (nextchar == nextcharW) ? nextcharU(fromPtr) : nextchar(fromPtr); |
670 | while(tmpPtr < fromPtr) *toPtr++ = *tmpPtr++; |
671 | } |
672 | |
673 | } //end while(fromPtr < endPtr) |
674 | *toPtr = 0; // terminate string |
675 | delete [] newstr; |
676 | } |
677 | else |
678 | { |
679 | yylval.string = newstr; |
680 | toPtr = newstr + strlen(newstr); |
681 | } |
682 | |
683 | PENV->curPos = curPos; |
684 | if(quote == '"') |
685 | { |
686 | BinStr* pBS = new BinStr(); |
687 | unsigned size = (unsigned)(toPtr - yylval.string); |
688 | memcpy(pBS->getBuff(size),yylval.string,size); |
689 | delete [] yylval.string; |
690 | yylval.binstr = pBS; |
691 | return QSTRING; |
692 | } |
693 | else |
694 | { |
695 | if(PASM->NumTypeDefs()) |
696 | { |
697 | TypeDefDescr* pTDD = PASM->FindTypeDef(yylval.string); |
698 | if(pTDD != NULL) |
699 | { |
700 | delete [] yylval.string; |
701 | yylval.tdd = pTDD; |
702 | return(TYPEDEF(pTDD)); |
703 | } |
704 | } |
705 | return SQSTRING; |
706 | } |
707 | } |
708 | |
709 | #ifdef _PREFAST_ |
710 | #pragma warning(push) |
711 | #pragma warning(disable:21000) // Suppress PREFast warning about overly large function |
712 | #endif |
713 | int yylex() |
714 | { |
715 | char* curPos = PENV->curPos; |
716 | unsigned state = 0; |
717 | const unsigned = 2; |
718 | unsigned curSym; |
719 | |
720 | char* newstr; |
721 | |
722 | NextToken: |
723 | // Skip any leading whitespace and comments |
724 | curPos = skipBlanks(curPos, &state); |
725 | if(curPos == NULL) |
726 | { |
727 | if (state & multiComment) return (BAD_COMMENT_); |
728 | if(ProcessEOF() == 0) return 0; // EOF |
729 | NEXT_TOKEN; |
730 | } |
731 | char* curTok = curPos; |
732 | PENV->curTok = curPos; |
733 | PENV->curPos = curPos; |
734 | int tok = ERROR_; |
735 | yylval.string = 0; |
736 | |
737 | curSym = Sym(curPos); |
738 | if(bParsingByteArray) // only hexadecimals w/o 0x, ')' and white space allowed! |
739 | { |
740 | int i,s=0; |
741 | for(i=0; i<2; i++, curPos = nextchar(curPos), curSym = Sym(curPos)) |
742 | { |
743 | if(('0' <= curSym)&&(curSym <= '9')) s = s*16+(curSym - '0'); |
744 | else if(('A' <= curSym)&&(curSym <= 'F')) s = s*16+(curSym - 'A' + 10); |
745 | else if(('a' <= curSym)&&(curSym <= 'f')) s = s*16+(curSym - 'a' + 10); |
746 | else break; // don't increase curPos! |
747 | } |
748 | if(i) |
749 | { |
750 | tok = HEXBYTE; |
751 | yylval.int32 = s; |
752 | } |
753 | else |
754 | { |
755 | if(curSym == ')' || curSym == '}') |
756 | { |
757 | bParsingByteArray = FALSE; |
758 | goto Just_A_Character; |
759 | } |
760 | } |
761 | PENV->curPos = curPos; |
762 | return(tok); |
763 | } |
764 | if(curSym == '?') // '?' may be part of an identifier, if it's not followed by punctuation |
765 | { |
766 | if(IsValidContinuingSymbol(Sym(nextchar(curPos)))) goto Its_An_Id; |
767 | goto Just_A_Character; |
768 | } |
769 | |
770 | if (IsValidStartingSymbol(curSym)) |
771 | { // is it an ID |
772 | Its_An_Id: |
773 | size_t offsetDot = (size_t)-1; // first appearance of '.' |
774 | size_t offsetDotDigit = (size_t)-1; // first appearance of '.<digit>' (not DOTTEDNAME!) |
775 | do |
776 | { |
777 | curPos = nextchar(curPos); |
778 | if (Sym(curPos) == '.') |
779 | { |
780 | if (offsetDot == (size_t)-1) offsetDot = curPos - curTok; |
781 | curPos = nextchar(curPos); |
782 | if((offsetDotDigit==(size_t)-1)&&(Sym(curPos) >= '0')&&(Sym(curPos) <= '9')) |
783 | offsetDotDigit = curPos - curTok - 1; |
784 | } |
785 | } while(IsValidContinuingSymbol(Sym(curPos))); |
786 | |
787 | size_t tokLen = curPos - curTok; |
788 | // check to see if it is a keyword |
789 | int token = findKeyword(curTok, tokLen, &yylval.opcode); |
790 | if (token != 0) |
791 | { |
792 | //printf("yylex: TOK = %d, curPos=0x%8.8X\n",token,curPos); |
793 | PENV->curPos = curPos; |
794 | PENV->curTok = curTok; |
795 | if(!SkipToken) |
796 | { |
797 | switch(token) |
798 | { |
799 | case P_INCLUDE: |
800 | //if(include_first_pass) |
801 | //{ |
802 | // PENV->curPos = curTok; |
803 | // include_first_pass = FALSE; |
804 | // return ';'; |
805 | //} |
806 | //include_first_pass = TRUE; |
807 | curPos = skipBlanks(curPos,&state); |
808 | if(curPos == NULL) |
809 | { |
810 | if (state & multiComment) return (BAD_COMMENT_); |
811 | if(ProcessEOF() == 0) return 0; // EOF |
812 | NEXT_TOKEN; |
813 | } |
814 | if(Sym(curPos) != '"') return ERROR_; |
815 | curPos = nextchar(curPos); |
816 | curTok = curPos; |
817 | PENV->curTok = curPos; |
818 | while(Sym(curPos) != '"') |
819 | { |
820 | curPos = nextchar(curPos); |
821 | if(curPos >= PENV->endPos) return ERROR_; |
822 | PENV->curPos = curPos; |
823 | } |
824 | tokLen = PENV->curPos - curTok; |
825 | curPos = nextchar(curPos); |
826 | PENV->curPos = curPos; |
827 | { |
828 | WCHAR* wzFile=NULL; |
829 | if(Sym == SymW) |
830 | { |
831 | if((wzFile = new WCHAR[tokLen/2 + 1]) != NULL) |
832 | { |
833 | memcpy(wzFile,curTok,tokLen); |
834 | wzFile[tokLen/2] = 0; |
835 | } |
836 | } |
837 | else |
838 | { |
839 | if((wzFile = new WCHAR[tokLen+1]) != NULL) |
840 | { |
841 | tokLen = WszMultiByteToWideChar(g_uCodePage,0,curTok,(int)tokLen,wzFile,(int)tokLen+1); |
842 | wzFile[tokLen] = 0; |
843 | } |
844 | } |
845 | if(wzFile != NULL) |
846 | { |
847 | if((parser->wzIncludePath != NULL) |
848 | &&(wcschr(wzFile,'\\')==NULL)&&(wcschr(wzFile,':')==NULL)) |
849 | { |
850 | PathString wzFullName; |
851 | |
852 | WCHAR* pwz; |
853 | DWORD dw = WszSearchPath(parser->wzIncludePath,wzFile,NULL, |
854 | TRUE, wzFullName,&pwz); |
855 | if(dw != 0) |
856 | { |
857 | delete [] wzFile; |
858 | |
859 | wzFile = wzFullName.GetCopyOfUnicodeString(); |
860 | } |
861 | |
862 | } |
863 | if(PASM->m_fReportProgress) |
864 | parser->msg("\nIncluding '%S'\n" ,wzFile); |
865 | MappedFileStream *pIn = new MappedFileStream(wzFile); |
866 | if((pIn != NULL)&&pIn->IsValid()) |
867 | { |
868 | parser->PEStack.PUSH(PENV); |
869 | PASM->SetSourceFileName(FullFileName(wzFile,CP_UTF8)); // deletes the argument! |
870 | parser->CreateEnvironment(pIn); |
871 | NEXT_TOKEN; |
872 | } |
873 | else |
874 | { |
875 | delete [] wzFile; |
876 | PASM->report->error("#include failed\n" ); |
877 | return ERROR_; |
878 | } |
879 | } |
880 | else |
881 | { |
882 | PASM->report->error("Out of memory\n" ); |
883 | return ERROR_; |
884 | } |
885 | } |
886 | curPos = PENV->curPos; |
887 | curTok = PENV->curTok; |
888 | break; |
889 | case P_IFDEF: |
890 | case P_IFNDEF: |
891 | case P_DEFINE: |
892 | case P_UNDEF: |
893 | curPos = skipBlanks(curPos,&state); |
894 | if(curPos == NULL) |
895 | { |
896 | if (state & multiComment) return (BAD_COMMENT_); |
897 | if(ProcessEOF() == 0) return 0; // EOF |
898 | NEXT_TOKEN; |
899 | } |
900 | curTok = curPos; |
901 | PENV->curTok = curPos; |
902 | PENV->curPos = curPos; |
903 | if (!IsValidStartingSymbol(Sym(curPos))) return ERROR_; |
904 | do |
905 | { |
906 | curPos = nextchar(curPos); |
907 | } while(IsValidContinuingSymbol(Sym(curPos))); |
908 | tokLen = curPos - curTok; |
909 | |
910 | newstr = NewStrFromToken(curTok, tokLen); |
911 | if((token==P_DEFINE)||(token==P_UNDEF)) |
912 | { |
913 | if(token == P_DEFINE) |
914 | { |
915 | curPos = skipBlanks(curPos,&state); |
916 | if ((curPos == NULL) && (ProcessEOF() == 0)) |
917 | { |
918 | DefineVar(newstr, NULL); |
919 | return 0; |
920 | } |
921 | curSym = Sym(curPos); |
922 | if(curSym != '"') |
923 | DefineVar(newstr, NULL); |
924 | else |
925 | { |
926 | tok = parse_literal(curSym, curPos, FALSE); |
927 | if(tok == QSTRING) |
928 | { |
929 | // if not ANSI, then string is in UTF-8, |
930 | // insert prefix |
931 | if(nextchar != nextcharA) |
932 | { |
933 | yylval.binstr->insertInt8(0xEF); |
934 | yylval.binstr->insertInt8(0xBB); |
935 | yylval.binstr->insertInt8(0xBF); |
936 | } |
937 | yylval.binstr->appendInt8(' '); |
938 | DefineVar(newstr, yylval.binstr); |
939 | } |
940 | else |
941 | return tok; |
942 | } |
943 | } |
944 | else UndefVar(newstr); |
945 | } |
946 | else |
947 | { |
948 | SkipToken = IsVarDefined(newstr); |
949 | if(token == P_IFDEF) SkipToken = !SkipToken; |
950 | IfEndif++; |
951 | if(SkipToken) IfEndifSkip=IfEndif; |
952 | } |
953 | break; |
954 | case P_ELSE: |
955 | SkipToken = TRUE; |
956 | IfEndifSkip=IfEndif; |
957 | break; |
958 | case P_ENDIF: |
959 | if(IfEndif == 0) |
960 | { |
961 | PASM->report->error("Unmatched #endif\n" ); |
962 | return ERROR_; |
963 | } |
964 | IfEndif--; |
965 | break; |
966 | default: |
967 | return(token); |
968 | } |
969 | goto NextToken; |
970 | } |
971 | if(SkipToken) |
972 | { |
973 | switch(token) |
974 | { |
975 | case P_IFDEF: |
976 | case P_IFNDEF: |
977 | IfEndif++; |
978 | break; |
979 | case P_ELSE: |
980 | if(IfEndif == IfEndifSkip) SkipToken = FALSE; |
981 | break; |
982 | case P_ENDIF: |
983 | if(IfEndif == IfEndifSkip) SkipToken = FALSE; |
984 | IfEndif--; |
985 | break; |
986 | default: |
987 | break; |
988 | } |
989 | //if(yylval.instr) yylval.instr->opcode = -1; |
990 | goto NextToken; |
991 | } |
992 | return(token); |
993 | } // end if token != 0 |
994 | if(SkipToken) { curPos = nextBlank(curPos); goto NextToken; } |
995 | |
996 | VarName* pVarName = FindVarDef(NewStrFromToken(curTok, tokLen)); |
997 | if(pVarName != NULL) |
998 | { |
999 | if(pVarName->pbody != NULL) |
1000 | { |
1001 | BinStrStream *pIn = new BinStrStream(pVarName->pbody); |
1002 | if((pIn != NULL)&&pIn->IsValid()) |
1003 | { |
1004 | PENV->curPos = curPos; |
1005 | parser->PEStack.PUSH(PENV); |
1006 | parser->CreateEnvironment(pIn); |
1007 | NEXT_TOKEN; |
1008 | } |
1009 | } |
1010 | } |
1011 | |
1012 | TypeDefDescr* pTDD = findTypedef(curTok,tokLen); |
1013 | |
1014 | if(pTDD != NULL) |
1015 | { |
1016 | yylval.tdd = pTDD; |
1017 | PENV->curPos = curPos; |
1018 | PENV->curTok = curTok; |
1019 | return(TYPEDEF(pTDD)); |
1020 | } |
1021 | if(Sym(curTok) == '#') |
1022 | { |
1023 | PENV->curPos = curPos; |
1024 | PENV->curTok = curTok; |
1025 | return(ERROR_); |
1026 | } |
1027 | // Not a keyword, normal identifiers don't have '.' in them |
1028 | if (offsetDot < (size_t)-1) |
1029 | { |
1030 | if(offsetDotDigit < (size_t)-1) |
1031 | { |
1032 | curPos = curTok+offsetDotDigit; |
1033 | tokLen = offsetDotDigit; |
1034 | } |
1035 | // protection against something like Foo.Bar..123 or Foo.Bar. |
1036 | unsigned D = (Sym == SymW) ? 2 : 1; // Unicode or ANSI/UTF8! |
1037 | while((Sym(curPos-D)=='.')&&(tokLen)) |
1038 | { |
1039 | curPos -= D; |
1040 | tokLen -= D; |
1041 | } |
1042 | } |
1043 | if((yylval.string = NewStrFromToken(curTok,tokLen))) |
1044 | { |
1045 | tok = (offsetDot == (size_t)(-1))? ID : DOTTEDNAME; |
1046 | //printf("yylex: ID = '%s', curPos=0x%8.8X\n",yylval.string,curPos); |
1047 | } |
1048 | else return BAD_LITERAL_; |
1049 | } |
1050 | else if(SkipToken) { curPos = nextBlank(curPos); goto NextToken; } |
1051 | else if (IsDigit(curSym) |
1052 | || (curSym == '.' && IsDigit(Sym(nextchar(curPos)))) |
1053 | || (curSym == '-' && IsDigit(Sym(nextchar(curPos))))) |
1054 | { |
1055 | const char* begNum = curPos; |
1056 | unsigned radix = 10; |
1057 | |
1058 | neg = (curSym == '-'); // always make it unsigned |
1059 | if (neg) curPos = nextchar(curPos); |
1060 | |
1061 | if (Sym(curPos) == '0' && Sym(nextchar(curPos)) != '.') |
1062 | { |
1063 | curPos = nextchar(curPos); |
1064 | radix = 8; |
1065 | if (Sym(curPos) == 'x' || Sym(curPos) == 'X') |
1066 | { |
1067 | curPos = nextchar(curPos); |
1068 | radix = 16; |
1069 | } |
1070 | } |
1071 | begNum = curPos; |
1072 | { |
1073 | unsigned __int64 i64 = str2uint64(begNum, const_cast<const char**>(&curPos), radix); |
1074 | unsigned __int64 mask64 = neg ? UI64(0xFFFFFFFF80000000) : UI64(0xFFFFFFFF00000000); |
1075 | unsigned __int64 largestNegVal32 = UI64(0x0000000080000000); |
1076 | if ((i64 & mask64) && (i64 != largestNegVal32)) |
1077 | { |
1078 | yylval.int64 = new __int64(i64); |
1079 | tok = INT64; |
1080 | if (neg) *yylval.int64 = -*yylval.int64; |
1081 | } |
1082 | else |
1083 | { |
1084 | yylval.int32 = (__int32)i64; |
1085 | tok = INT32; |
1086 | if(neg) yylval.int32 = -yylval.int32; |
1087 | } |
1088 | } |
1089 | if (radix == 10 && ((Sym(curPos) == '.' && Sym(nextchar(curPos)) != '.') || Sym(curPos) == 'E' || Sym(curPos) == 'e')) |
1090 | { |
1091 | unsigned L = (unsigned)(PENV->endPos - begNum); |
1092 | curPos = (char*)begNum + GetDouble((char*)begNum,L,&yylval.float64); |
1093 | if (neg) *yylval.float64 = -*yylval.float64; |
1094 | tok = FLOAT64; |
1095 | } |
1096 | } |
1097 | else |
1098 | { // punctuation |
1099 | if (curSym == '"' || curSym == '\'') |
1100 | { |
1101 | return parse_literal(curSym, curPos, TRUE); |
1102 | } // end if (*curPos == '"' || *curPos == '\'') |
1103 | else if (curSym==':' && Sym(nextchar(curPos))==':') |
1104 | { |
1105 | curPos = nextchar(nextchar(curPos)); |
1106 | tok = DCOLON; |
1107 | } |
1108 | else if(curSym == '.') |
1109 | { |
1110 | if (Sym(nextchar(curPos))=='.' && Sym(nextchar(nextchar(curPos)))=='.') |
1111 | { |
1112 | curPos = nextchar(nextchar(nextchar(curPos))); |
1113 | tok = ELIPSIS; |
1114 | } |
1115 | else |
1116 | { |
1117 | do |
1118 | { |
1119 | curPos = nextchar(curPos); |
1120 | if (curPos >= PENV->endPos) |
1121 | return ERROR_; |
1122 | curSym = Sym(curPos); |
1123 | } |
1124 | while(IsAlNum(curSym) || curSym == '_' || curSym == '$'|| curSym == '@'|| curSym == '?'); |
1125 | size_t tokLen = curPos - curTok; |
1126 | |
1127 | // check to see if it is a keyword |
1128 | int token = findKeyword(curTok, tokLen, &yylval.opcode); |
1129 | if(token) |
1130 | { |
1131 | //printf("yylex: TOK = %d, curPos=0x%8.8X\n",token,curPos); |
1132 | PENV->curPos = curPos; |
1133 | PENV->curTok = curTok; |
1134 | return(token); |
1135 | } |
1136 | tok = '.'; |
1137 | curPos = nextchar(curTok); |
1138 | } |
1139 | } |
1140 | else |
1141 | { |
1142 | Just_A_Character: |
1143 | tok = curSym; |
1144 | curPos = nextchar(curPos); |
1145 | } |
1146 | //printf("yylex: PUNCT curPos=0x%8.8X\n",curPos); |
1147 | } |
1148 | dbprintf((" Line %d token %d (%c) val = %s\n" , PENV->curLine, tok, |
1149 | (tok < 128 && isprint(tok)) ? tok : ' ', |
1150 | (tok > 255 && tok != INT32 && tok != INT64 && tok!= FLOAT64) ? yylval.string : "" )); |
1151 | |
1152 | PENV->curPos = curPos; |
1153 | PENV->curTok = curTok; |
1154 | return(tok); |
1155 | } |
1156 | #ifdef _PREFAST_ |
1157 | #pragma warning(pop) |
1158 | #endif |
1159 | |
1160 | /**************************************************************************/ |
1161 | static char* newString(__in __nullterminated const char* str1) |
1162 | { |
1163 | char* ret = new char[strlen(str1)+1]; |
1164 | if(ret) strcpy_s(ret, strlen(str1)+1, str1); |
1165 | return(ret); |
1166 | } |
1167 | |
1168 | /**************************************************************************/ |
1169 | /* concatenate strings and release them */ |
1170 | |
1171 | static char* newStringWDel(__in __nullterminated char* str1, char delimiter, __in __nullterminated char* str3) |
1172 | { |
1173 | size_t len1 = strlen(str1); |
1174 | size_t len = len1+2; |
1175 | if (str3) len += strlen(str3); |
1176 | char* ret = new char[len]; |
1177 | if(ret) |
1178 | { |
1179 | strcpy_s(ret, len, str1); |
1180 | delete [] str1; |
1181 | ret[len1] = delimiter; |
1182 | ret[len1+1] = 0; |
1183 | if (str3) |
1184 | { |
1185 | strcat_s(ret, len, str3); |
1186 | delete [] str3; |
1187 | } |
1188 | } |
1189 | return(ret); |
1190 | } |
1191 | |
1192 | /**************************************************************************/ |
1193 | static void corEmitInt(BinStr* buff, unsigned data) |
1194 | { |
1195 | unsigned cnt = CorSigCompressData(data, buff->getBuff(5)); |
1196 | buff->remove(5 - cnt); |
1197 | } |
1198 | |
1199 | |
1200 | /**************************************************************************/ |
1201 | /* move 'ptr past the exactly one type description */ |
1202 | |
1203 | unsigned __int8* skipType(unsigned __int8* ptr, BOOL fFixupType) |
1204 | { |
1205 | mdToken tk; |
1206 | AGAIN: |
1207 | switch(*ptr++) { |
1208 | case ELEMENT_TYPE_VOID : |
1209 | case ELEMENT_TYPE_BOOLEAN : |
1210 | case ELEMENT_TYPE_CHAR : |
1211 | case ELEMENT_TYPE_I1 : |
1212 | case ELEMENT_TYPE_U1 : |
1213 | case ELEMENT_TYPE_I2 : |
1214 | case ELEMENT_TYPE_U2 : |
1215 | case ELEMENT_TYPE_I4 : |
1216 | case ELEMENT_TYPE_U4 : |
1217 | case ELEMENT_TYPE_I8 : |
1218 | case ELEMENT_TYPE_U8 : |
1219 | case ELEMENT_TYPE_R4 : |
1220 | case ELEMENT_TYPE_R8 : |
1221 | case ELEMENT_TYPE_U : |
1222 | case ELEMENT_TYPE_I : |
1223 | case ELEMENT_TYPE_STRING : |
1224 | case ELEMENT_TYPE_OBJECT : |
1225 | case ELEMENT_TYPE_TYPEDBYREF : |
1226 | case ELEMENT_TYPE_SENTINEL : |
1227 | /* do nothing */ |
1228 | break; |
1229 | |
1230 | case ELEMENT_TYPE_VALUETYPE : |
1231 | case ELEMENT_TYPE_CLASS : |
1232 | ptr += CorSigUncompressToken(ptr, &tk); |
1233 | break; |
1234 | |
1235 | case ELEMENT_TYPE_CMOD_REQD : |
1236 | case ELEMENT_TYPE_CMOD_OPT : |
1237 | ptr += CorSigUncompressToken(ptr, &tk); |
1238 | goto AGAIN; |
1239 | |
1240 | case ELEMENT_TYPE_ARRAY : |
1241 | { |
1242 | ptr = skipType(ptr, fFixupType); // element Type |
1243 | unsigned rank = CorSigUncompressData((PCCOR_SIGNATURE&) ptr); |
1244 | if (rank != 0) |
1245 | { |
1246 | unsigned numSizes = CorSigUncompressData((PCCOR_SIGNATURE&) ptr); |
1247 | while(numSizes > 0) |
1248 | { |
1249 | CorSigUncompressData((PCCOR_SIGNATURE&) ptr); |
1250 | --numSizes; |
1251 | } |
1252 | unsigned numLowBounds = CorSigUncompressData((PCCOR_SIGNATURE&) ptr); |
1253 | while(numLowBounds > 0) |
1254 | { |
1255 | CorSigUncompressData((PCCOR_SIGNATURE&) ptr); |
1256 | --numLowBounds; |
1257 | } |
1258 | } |
1259 | } |
1260 | break; |
1261 | |
1262 | // Modifiers or dependent types |
1263 | case ELEMENT_TYPE_PINNED : |
1264 | case ELEMENT_TYPE_PTR : |
1265 | case ELEMENT_TYPE_BYREF : |
1266 | case ELEMENT_TYPE_SZARRAY : |
1267 | // tail recursion optimization |
1268 | // ptr = skipType(ptr, fFixupType); |
1269 | // break |
1270 | goto AGAIN; |
1271 | |
1272 | case ELEMENT_TYPE_VAR: |
1273 | case ELEMENT_TYPE_MVAR: |
1274 | CorSigUncompressData((PCCOR_SIGNATURE&) ptr); // bound |
1275 | break; |
1276 | |
1277 | case ELEMENT_TYPE_VARFIXUP: |
1278 | case ELEMENT_TYPE_MVARFIXUP: |
1279 | if(fFixupType) |
1280 | { |
1281 | BYTE* pb = ptr-1; // ptr incremented in switch |
1282 | unsigned __int8* ptr_save = ptr; |
1283 | int n = CorSigUncompressData((PCCOR_SIGNATURE&) ptr); // fixup # |
1284 | int compressed_size_n = (int)(ptr - ptr_save); // ptr was updated by CorSigUncompressData() |
1285 | int m = -1; |
1286 | if(PASM->m_TyParList) |
1287 | m = PASM->m_TyParList->IndexOf(TyParFixupList.PEEK(n)); |
1288 | if(m == -1) |
1289 | { |
1290 | PASM->report->error("(fixupType) Invalid %stype parameter '%s'\n" , |
1291 | (*pb == ELEMENT_TYPE_MVARFIXUP)? "method " : "" , |
1292 | TyParFixupList.PEEK(n)); |
1293 | m = 0; |
1294 | } |
1295 | *pb = (*pb == ELEMENT_TYPE_MVARFIXUP)? ELEMENT_TYPE_MVAR : ELEMENT_TYPE_VAR; |
1296 | int compressed_size_m = (int)CorSigCompressData(m,pb+1); |
1297 | |
1298 | // Note that CorSigCompressData() (and hence, CorSigUncompressData()) store a number |
1299 | // 0 <= x <= 0x1FFFFFFF in 1, 2, or 4 bytes. Above, 'n' is the fixup number being read, |
1300 | // and 'm' is the generic parameter number being written out (in the same place where 'n' |
1301 | // came from). If 'n' takes more space to compress than 'm' (e.g., 0x80 <= n <= 0x3fff so |
1302 | // it takes 2 bytes, and m < 0x80 so it takes one byte), then when we overwrite the fixup |
1303 | // number with the generic parameter number, we'll leave extra bytes in the signature following |
1304 | // the written generic parameter number. Thus, we do something of a hack to ensure that the |
1305 | // compressed number is correctly readable even if 'm' compresses smaller than 'n' did: we |
1306 | // recompress 'm' to use the same amount of space as 'n' used. This is possible because smaller |
1307 | // numbers can still be compressed in a larger amount of space, even though it's not optimal (and |
1308 | // CorSigCompressData() would never do it). If, however, the compressed sizes are the other |
1309 | // way around (m takes more space to compress than n), then we've already corrupted the |
1310 | // signature that we're reading by writing beyond what we should (is there some reason why |
1311 | // this is not possible?). |
1312 | // Note that 'ptr' has already been adjusted, above, to point to the next type after this one. |
1313 | // There is no need to update it when recompressing the data. |
1314 | |
1315 | if (compressed_size_m > compressed_size_n) |
1316 | { |
1317 | // We've got a problem: we just corrupted the rest of the signature! |
1318 | // (Can this ever happen in practice?) |
1319 | PASM->report->error("(fixupType) Too many %stype parameters\n" , |
1320 | (*pb == ELEMENT_TYPE_MVARFIXUP)? "method " : "" ); |
1321 | } |
1322 | else if (compressed_size_m < compressed_size_n) |
1323 | { |
1324 | // We didn't write out as much data as we read. This will leave extra bytes in the |
1325 | // signature that will be incorrectly recognized. Ideally, we would just shrink the |
1326 | // signature. That's not easy to do here. Instead, pad the bytes to force it to use |
1327 | // a larger encoding than needed. This assumes knowledge of the CorSigCompressData() |
1328 | // encoding. |
1329 | // |
1330 | // The cases: |
1331 | // compressed_size_m m bytes compressed_size_n result bytes |
1332 | // 1 m1 2 0x80 m1 |
1333 | // 1 m1 4 0xC0 0x00 0x00 m1 |
1334 | // 2 m1 m2 4 0xC0 0x00 (m1 & 0x7f) m2 |
1335 | |
1336 | _ASSERTE((compressed_size_m == 1) || (compressed_size_m == 2) || (compressed_size_m == 4)); |
1337 | _ASSERTE((compressed_size_n == 1) || (compressed_size_n == 2) || (compressed_size_n == 4)); |
1338 | |
1339 | if ((compressed_size_m == 1) && |
1340 | (compressed_size_n == 2)) |
1341 | { |
1342 | unsigned __int8 m1 = *(pb + 1); |
1343 | _ASSERTE(m1 < 0x80); |
1344 | *(pb + 1) = 0x80; |
1345 | *(pb + 2) = m1; |
1346 | } |
1347 | else |
1348 | if ((compressed_size_m == 1) && |
1349 | (compressed_size_n == 4)) |
1350 | { |
1351 | unsigned __int8 m1 = *(pb + 1); |
1352 | _ASSERTE(m1 < 0x80); |
1353 | *(pb + 1) = 0xC0; |
1354 | *(pb + 2) = 0x00; |
1355 | *(pb + 3) = 0x00; |
1356 | *(pb + 4) = m1; |
1357 | } |
1358 | else |
1359 | if ((compressed_size_m == 2) && |
1360 | (compressed_size_n == 4)) |
1361 | { |
1362 | unsigned __int8 m1 = *(pb + 1); |
1363 | unsigned __int8 m2 = *(pb + 2); |
1364 | _ASSERTE(m1 >= 0x80); |
1365 | m1 &= 0x7f; // strip the bit indicating it's a 2-byte thing |
1366 | *(pb + 1) = 0xC0; |
1367 | *(pb + 2) = 0x00; |
1368 | *(pb + 3) = m1; |
1369 | *(pb + 4) = m2; |
1370 | } |
1371 | } |
1372 | } |
1373 | else |
1374 | CorSigUncompressData((PCCOR_SIGNATURE&) ptr); // bound |
1375 | break; |
1376 | |
1377 | case ELEMENT_TYPE_FNPTR: |
1378 | { |
1379 | CorSigUncompressData((PCCOR_SIGNATURE&) ptr); // calling convention |
1380 | unsigned argCnt = CorSigUncompressData((PCCOR_SIGNATURE&) ptr); // arg count |
1381 | ptr = skipType(ptr, fFixupType); // return type |
1382 | while(argCnt > 0) |
1383 | { |
1384 | ptr = skipType(ptr, fFixupType); |
1385 | --argCnt; |
1386 | } |
1387 | } |
1388 | break; |
1389 | |
1390 | case ELEMENT_TYPE_GENERICINST: |
1391 | { |
1392 | ptr = skipType(ptr, fFixupType); // type constructor |
1393 | unsigned argCnt = CorSigUncompressData((PCCOR_SIGNATURE&)ptr); // arg count |
1394 | while(argCnt > 0) { |
1395 | ptr = skipType(ptr, fFixupType); |
1396 | --argCnt; |
1397 | } |
1398 | } |
1399 | break; |
1400 | |
1401 | default: |
1402 | case ELEMENT_TYPE_END : |
1403 | _ASSERTE(!"Unknown Type" ); |
1404 | break; |
1405 | } |
1406 | return(ptr); |
1407 | } |
1408 | |
1409 | /**************************************************************************/ |
1410 | void FixupTyPars(PCOR_SIGNATURE pSig, ULONG cSig) |
1411 | { |
1412 | if(TyParFixupList.COUNT() > 0) |
1413 | { |
1414 | BYTE* ptr = (BYTE*)pSig; |
1415 | BYTE* ptrEnd = ptr + cSig; |
1416 | while(ptr < ptrEnd) |
1417 | { |
1418 | ptr = skipType(ptr, TRUE); |
1419 | } // end while |
1420 | } // end if(COUNT>0) |
1421 | } |
1422 | void FixupTyPars(BinStr* pbstype) |
1423 | { |
1424 | FixupTyPars((PCOR_SIGNATURE)(pbstype->ptr()),(ULONG)(pbstype->length())); |
1425 | } |
1426 | /**************************************************************************/ |
1427 | static unsigned corCountArgs(BinStr* args) |
1428 | { |
1429 | unsigned __int8* ptr = args->ptr(); |
1430 | unsigned __int8* end = &args->ptr()[args->length()]; |
1431 | unsigned ret = 0; |
1432 | while(ptr < end) |
1433 | { |
1434 | if (*ptr != ELEMENT_TYPE_SENTINEL) |
1435 | { |
1436 | ptr = skipType(ptr, FALSE); |
1437 | ret++; |
1438 | } |
1439 | else ptr++; |
1440 | } |
1441 | return(ret); |
1442 | } |
1443 | |
1444 | /********************************************************************************/ |
1445 | AsmParse::AsmParse(ReadStream* aIn, Assembler *aAssem) |
1446 | { |
1447 | #ifdef DEBUG_PARSING |
1448 | extern int yydebug; |
1449 | yydebug = 1; |
1450 | #endif |
1451 | |
1452 | assem = aAssem; |
1453 | assem->SetErrorReporter((ErrorReporter *)this); |
1454 | |
1455 | assem->m_ulCurLine = 1; |
1456 | assem->m_ulCurColumn = 1; |
1457 | |
1458 | wzIncludePath = NULL; |
1459 | penv = NULL; |
1460 | |
1461 | hstdout = GetStdHandle(STD_OUTPUT_HANDLE); |
1462 | hstderr = GetStdHandle(STD_ERROR_HANDLE); |
1463 | |
1464 | success = true; |
1465 | _ASSERTE(parser == 0); // Should only be one parser instance at a time |
1466 | |
1467 | // Resolve aliases |
1468 | for (unsigned int i = 0; i < sizeof(keywords) / sizeof(Keywords); i++) |
1469 | { |
1470 | if (keywords[i].token == NO_VALUE) |
1471 | keywords[i].token = keywords[keywords[i].tokenVal].token; |
1472 | } |
1473 | SetSymbolTables(); |
1474 | Init_str2uint64(); |
1475 | parser = this; |
1476 | //yyparse(); |
1477 | } |
1478 | |
1479 | /********************************************************************************/ |
1480 | AsmParse::~AsmParse() |
1481 | { |
1482 | parser = 0; |
1483 | delete penv; |
1484 | while(m_ANSLast.POP()); |
1485 | } |
1486 | |
1487 | /**************************************************************************/ |
1488 | DWORD AsmParse::IsItUnicode(CONST LPVOID pBuff, int cb, LPINT lpi) |
1489 | { |
1490 | return IsTextUnicode(pBuff,cb,lpi); |
1491 | } |
1492 | |
1493 | /**************************************************************************/ |
1494 | void AsmParse::CreateEnvironment(ReadStream* stream) |
1495 | { |
1496 | penv = new PARSING_ENVIRONMENT; |
1497 | memset(penv,0,sizeof(PARSING_ENVIRONMENT)); |
1498 | penv->in = stream; |
1499 | penv->curLine = 1; |
1500 | strcpy_s(penv->szFileName, MAX_FILENAME_LENGTH*3+1,assem->m_szSourceFileName); |
1501 | |
1502 | penv->curPos = fillBuff(NULL); |
1503 | penv->uCodePage = g_uCodePage; |
1504 | |
1505 | SetFunctionPtrs(); |
1506 | }; |
1507 | |
1508 | /**************************************************************************/ |
1509 | void AsmParse::ParseFile(ReadStream* stream) |
1510 | { |
1511 | CreateEnvironment(stream); |
1512 | yyparse(); |
1513 | penv->in = NULL; |
1514 | }; |
1515 | |
1516 | /**************************************************************************/ |
1517 | char* AsmParse::fillBuff(__in_opt __nullterminated char* pos) |
1518 | { |
1519 | int iPutToBuffer; |
1520 | int iOptions = IS_TEXT_UNICODE_UNICODE_MASK; |
1521 | g_uCodePage = CP_ACP; |
1522 | iPutToBuffer = (int)penv->in->getAll(&(penv->curPos)); |
1523 | |
1524 | penv->endPos = penv->curPos + iPutToBuffer; |
1525 | if(iPutToBuffer > 128) iPutToBuffer = 128; |
1526 | if(IsItUnicode(penv->curPos,iPutToBuffer,&iOptions)) |
1527 | { |
1528 | g_uCodePage = CP_UTF8; |
1529 | if(iOptions & IS_TEXT_UNICODE_SIGNATURE) |
1530 | { |
1531 | penv->curPos += 2; |
1532 | } |
1533 | if(assem->m_fReportProgress) printf("Source file is UNICODE\n\n" ); |
1534 | penv->pfn_Sym = SymW; |
1535 | penv->pfn_nextchar = nextcharW; |
1536 | penv->pfn_NewStrFromToken = NewStrFromTokenW; |
1537 | penv->pfn_NewStaticStrFromToken = NewStaticStrFromTokenW; |
1538 | penv->pfn_GetDouble = GetDoubleW; |
1539 | } |
1540 | else |
1541 | { |
1542 | if(((penv->curPos[0]&0xFF)==0xEF)&&((penv->curPos[1]&0xFF)==0xBB)&&((penv->curPos[2]&0xFF)==0xBF)) |
1543 | { |
1544 | g_uCodePage = CP_UTF8; |
1545 | penv->curPos += 3; |
1546 | if(assem->m_fReportProgress) printf("Source file is UTF-8\n\n" ); |
1547 | penv->pfn_nextchar = nextcharU; |
1548 | } |
1549 | else |
1550 | { |
1551 | if(assem->m_fReportProgress) printf("Source file is ANSI\n\n" ); |
1552 | penv->pfn_nextchar = nextcharA; |
1553 | } |
1554 | penv->pfn_Sym = SymAU; |
1555 | penv->pfn_NewStrFromToken = NewStrFromTokenAU; |
1556 | penv->pfn_NewStaticStrFromToken = NewStaticStrFromTokenAU; |
1557 | penv->pfn_GetDouble = GetDoubleAU; |
1558 | } |
1559 | return(penv->curPos); |
1560 | } |
1561 | |
1562 | /********************************************************************************/ |
1563 | BinStr* AsmParse::MakeSig(unsigned callConv, BinStr* retType, BinStr* args, int ntyargs) |
1564 | { |
1565 | _ASSERTE((ntyargs != 0) == ((callConv & IMAGE_CEE_CS_CALLCONV_GENERIC) != 0)); |
1566 | BinStr* ret = new BinStr(); |
1567 | if(ret) |
1568 | { |
1569 | //if (retType != 0) |
1570 | ret->insertInt8(callConv); |
1571 | if (ntyargs != 0) |
1572 | corEmitInt(ret, ntyargs); |
1573 | corEmitInt(ret, corCountArgs(args)); |
1574 | |
1575 | if (retType != 0) |
1576 | { |
1577 | ret->append(retType); |
1578 | delete retType; |
1579 | } |
1580 | ret->append(args); |
1581 | } |
1582 | else |
1583 | error("\nOut of memory!\n" ); |
1584 | |
1585 | delete args; |
1586 | return(ret); |
1587 | } |
1588 | |
1589 | /********************************************************************************/ |
1590 | BinStr* AsmParse::MakeTypeArray(CorElementType kind, BinStr* elemType, BinStr* bounds) |
1591 | { |
1592 | // 'bounds' is a binary buffer, that contains an array of 'struct Bounds' |
1593 | struct Bounds { |
1594 | int lowerBound; |
1595 | unsigned numElements; |
1596 | }; |
1597 | |
1598 | _ASSERTE(bounds->length() % sizeof(Bounds) == 0); |
1599 | unsigned boundsLen = bounds->length() / sizeof(Bounds); |
1600 | _ASSERTE(boundsLen > 0); |
1601 | Bounds* boundsArr = (Bounds*) bounds->ptr(); |
1602 | |
1603 | BinStr* ret = new BinStr(); |
1604 | |
1605 | ret->appendInt8(kind); |
1606 | ret->append(elemType); |
1607 | corEmitInt(ret, boundsLen); // emit the rank |
1608 | |
1609 | unsigned lowerBoundsDefined = 0; |
1610 | unsigned numElementsDefined = 0; |
1611 | unsigned i; |
1612 | for(i=0; i < boundsLen; i++) |
1613 | { |
1614 | if(boundsArr[i].lowerBound < 0x7FFFFFFF) lowerBoundsDefined = i+1; |
1615 | else boundsArr[i].lowerBound = 0; |
1616 | |
1617 | if(boundsArr[i].numElements < 0x7FFFFFFF) numElementsDefined = i+1; |
1618 | else boundsArr[i].numElements = 0; |
1619 | } |
1620 | |
1621 | corEmitInt(ret, numElementsDefined); // emit number of bounds |
1622 | |
1623 | for(i=0; i < numElementsDefined; i++) |
1624 | { |
1625 | _ASSERTE (boundsArr[i].numElements >= 0); // enforced at rule time |
1626 | corEmitInt(ret, boundsArr[i].numElements); |
1627 | |
1628 | } |
1629 | |
1630 | corEmitInt(ret, lowerBoundsDefined); // emit number of lower bounds |
1631 | for(i=0; i < lowerBoundsDefined; i++) |
1632 | { |
1633 | unsigned cnt = CorSigCompressSignedInt(boundsArr[i].lowerBound, ret->getBuff(5)); |
1634 | ret->remove(5 - cnt); |
1635 | } |
1636 | delete elemType; |
1637 | delete bounds; |
1638 | return(ret); |
1639 | } |
1640 | |
1641 | /********************************************************************************/ |
1642 | BinStr* AsmParse::MakeTypeClass(CorElementType kind, mdToken tk) |
1643 | { |
1644 | |
1645 | BinStr* ret = new BinStr(); |
1646 | _ASSERTE(kind == ELEMENT_TYPE_CLASS || kind == ELEMENT_TYPE_VALUETYPE || |
1647 | kind == ELEMENT_TYPE_CMOD_REQD || kind == ELEMENT_TYPE_CMOD_OPT); |
1648 | ret->appendInt8(kind); |
1649 | unsigned cnt = CorSigCompressToken(tk, ret->getBuff(5)); |
1650 | ret->remove(5 - cnt); |
1651 | return(ret); |
1652 | } |
1653 | /**************************************************************************/ |
1654 | void PrintANSILine(FILE* pF, __in __nullterminated char* sz) |
1655 | { |
1656 | WCHAR *wz = &wzUniBuf[0]; |
1657 | if(g_uCodePage != CP_ACP) |
1658 | { |
1659 | memset(wz,0,dwUniBuf); // dwUniBuf/2 WCHARs = dwUniBuf bytes |
1660 | WszMultiByteToWideChar(g_uCodePage,0,sz,-1,wz,(dwUniBuf >> 1)-1); |
1661 | |
1662 | memset(sz,0,dwUniBuf); |
1663 | WszWideCharToMultiByte(g_uConsoleCP,0,wz,-1,sz,dwUniBuf-1,NULL,NULL); |
1664 | } |
1665 | fprintf(pF,"%s" ,sz); |
1666 | } |
1667 | /**************************************************************************/ |
1668 | void AsmParse::error(const char* fmt, ...) |
1669 | { |
1670 | char *sz = (char*)(&wzUniBuf[(dwUniBuf >> 1)]); |
1671 | char *psz=&sz[0]; |
1672 | FILE* pF = ((!assem->m_fReportProgress)&&(assem->OnErrGo)) ? stdout : stderr; |
1673 | success = false; |
1674 | va_list args; |
1675 | va_start(args, fmt); |
1676 | |
1677 | if((penv) && (penv->in)) psz+=sprintf_s(psz, (dwUniBuf >> 1), "%S(%d) : " , penv->in->namew(), penv->curLine); |
1678 | psz+=sprintf_s(psz, (dwUniBuf >> 1), "error : " ); |
1679 | _vsnprintf_s(psz, (dwUniBuf >> 1),(dwUniBuf >> 1)-strlen(sz)-1, fmt, args); |
1680 | PrintANSILine(pF,sz); |
1681 | } |
1682 | |
1683 | /**************************************************************************/ |
1684 | void AsmParse::warn(const char* fmt, ...) |
1685 | { |
1686 | char *sz = (char*)(&wzUniBuf[(dwUniBuf >> 1)]); |
1687 | char *psz=&sz[0]; |
1688 | FILE* pF = ((!assem->m_fReportProgress)&&(assem->OnErrGo)) ? stdout : stderr; |
1689 | va_list args; |
1690 | va_start(args, fmt); |
1691 | |
1692 | if((penv) && (penv->in)) psz+=sprintf_s(psz, (dwUniBuf >> 1), "%S(%d) : " , penv->in->namew(), penv->curLine); |
1693 | psz+=sprintf_s(psz, (dwUniBuf >> 1), "warning : " ); |
1694 | _vsnprintf_s(psz, (dwUniBuf >> 1),(dwUniBuf >> 1)-strlen(sz)-1, fmt, args); |
1695 | PrintANSILine(pF,sz); |
1696 | } |
1697 | /**************************************************************************/ |
1698 | void AsmParse::msg(const char* fmt, ...) |
1699 | { |
1700 | char *sz = (char*)(&wzUniBuf[(dwUniBuf >> 1)]); |
1701 | va_list args; |
1702 | va_start(args, fmt); |
1703 | |
1704 | _vsnprintf_s(sz, (dwUniBuf >> 1),(dwUniBuf >> 1)-1, fmt, args); |
1705 | PrintANSILine(stdout,sz); |
1706 | } |
1707 | |
1708 | #ifdef _MSC_VER |
1709 | #pragma warning(default : 4640) |
1710 | #endif |
1711 | |