grammar_after.cpp source code [CoreCLR/ilasm/grammar_after.cpp]

1	// Licensed to the .NET Foundation under one or more agreements.
2	// The .NET Foundation licenses this file to you under the MIT license.
3	// See the LICENSE file in the project root for more information.
4	/******************************************************************************/
5	/ Code goes here /
6
7	/******************************************************************************/
8	extern int yyparse();
9
10	struct Keywords {
11	const char* name;
12	unsigned short token;
13	unsigned short tokenVal;// this holds the instruction enumeration for those keywords that are instrs
14	size_t stname;
15	};
16
17	#define NO_VALUE ((unsigned short)-1) // The token has no value
18
19	static Keywords keywords[] = {
20	// Attention! Because of aliases, the instructions MUST go first!
21	// Redefine all the instructions (defined in assembler.h <- asmenum.h <- opcode.def)
22	#undef InlineNone
23	#undef InlineVar
24	#undef ShortInlineVar
25	#undef InlineI
26	#undef ShortInlineI
27	#undef InlineI8
28	#undef InlineR
29	#undef ShortInlineR
30	#undef InlineBrTarget
31	#undef ShortInlineBrTarget
32	#undef InlineMethod
33	#undef InlineField
34	#undef InlineType
35	#undef InlineString
36	#undef InlineSig
37	#undef InlineTok
38	#undef InlineSwitch
39	#undef InlineVarTok
40
41
42	#define InlineNone INSTR_NONE
43	#define InlineVar INSTR_VAR
44	#define ShortInlineVar INSTR_VAR
45	#define InlineI INSTR_I
46	#define ShortInlineI INSTR_I
47	#define InlineI8 INSTR_I8
48	#define InlineR INSTR_R
49	#define ShortInlineR INSTR_R
50	#define InlineBrTarget INSTR_BRTARGET
51	#define ShortInlineBrTarget INSTR_BRTARGET
52	#define InlineMethod INSTR_METHOD
53	#define InlineField INSTR_FIELD
54	#define InlineType INSTR_TYPE
55	#define InlineString INSTR_STRING
56	#define InlineSig INSTR_SIG
57	#define InlineTok INSTR_TOK
58	#define InlineSwitch INSTR_SWITCH
59
60	#define InlineVarTok 0
61	#define NEW_INLINE_NAMES
62	// The volatile instruction collides with the volatile keyword, so
63	// we treat it as a keyword everywhere and modify the grammar accordingly (Yuck!)
64	#define OPDEF(c,s,pop,push,args,type,l,s1,s2,ctrl) { s, args, c, lengthof(s)-1 },
65	#define OPALIAS(alias_c, s, c) { s, NO_VALUE, c, lengthof(s)-1 },
66	#include "opcode.def"
67	#undef OPALIAS
68	#undef OPDEF
69
70	/ keywords /
71	#define KYWD(name, sym, val) { name, sym, val, lengthof(name)-1 },
72	#include "il_kywd.h"
73	#undef KYWD
74
75	};
76
77	/******************************************************************************/
78	/ File encoding-dependent functions /
79	/--------------------------------------------------------------------------/
80	char* nextcharA(__in __nullterminated char* pos)
81	{
82	return (pos > `0`) ? ++pos : (char* )_mbsinc((const* unsigned char *)pos);
83	}
84
85	char* nextcharU(__in __nullterminated char* pos)
86	{
87	return ++pos;
88	}
89
90	char* nextcharW(__in __nullterminated char* pos)
91	{
92	return (pos+`2`);
93	}
94	/--------------------------------------------------------------------------/
95	unsigned SymAU(__in __nullterminated char* curPos)
96	{
97	return (unsigned)*curPos;
98	}
99
100	unsigned SymW(__in __nullterminated char* curPos)
101	{
102	return (unsigned)((WCHAR)curPos);
103	}
104	/--------------------------------------------------------------------------/
105	char* NewStrFromTokenAU(__in_ecount(tokLen) char* curTok, size_t tokLen)
106	{
107	char nb = new* char[tokLen+`1`];
108	if(nb != NULL)
109	{
110	memcpy(nb, curTok, tokLen);
111	nb[tokLen] = `0`;
112	}
113	return nb;
114	}
115	char* NewStrFromTokenW(__in_ecount(tokLen) char* curTok, size_t tokLen)
116	{
117	WCHAR* wcurTok = (WCHAR*)curTok;
118	char nb = new* char[(tokLen<<`1`) + `2`];
119	if(nb != NULL)
120	{
121	tokLen = WszWideCharToMultiByte(CP_UTF8,`0`,(LPCWSTR)wcurTok,(int)(tokLen >> `1`),nb,(int)(tokLen<<`1`) + `2`,NULL,NULL);
122	nb[tokLen] = `0`;
123	}
124	return nb;
125	}
126	/--------------------------------------------------------------------------/
127	char* NewStaticStrFromTokenAU(__in_ecount(tokLen) char* curTok, size_t tokLen, __out_ecount(bufSize) char* staticBuf, size_t bufSize)
128	{
129	if(tokLen >= bufSize) return NULL;
130	memcpy(staticBuf, curTok, tokLen);
131	staticBuf[tokLen] = `0`;
132	return staticBuf;
133	}
134	char* NewStaticStrFromTokenW(__in_ecount(tokLen) char* curTok, size_t tokLen, __out_ecount(bufSize) char* staticBuf, size_t bufSize)
135	{
136	WCHAR* wcurTok = (WCHAR*)curTok;
137	if(tokLen >= bufSize/`2`) return NULL;
138	tokLen = WszWideCharToMultiByte(CP_UTF8,`0`,(LPCWSTR)wcurTok,(int)(tokLen >> `1`),staticBuf,(int)bufSize,NULL,NULL);
139	staticBuf[tokLen] = `0`;
140	return staticBuf;
141	}
142	/--------------------------------------------------------------------------/
143	unsigned GetDoubleAU(__in __nullterminated char* begNum, unsigned L, double** ppRes)
144	{
145	static char dbuff[`128`];
146	char* pdummy;
147	if(L > `127`) L = `127`;
148	memcpy(dbuff,begNum,L);
149	dbuff[L] = `0`;
150	ppRes = new* double(strtod(dbuff, &pdummy));
151	return ((unsigned)(pdummy - dbuff));
152	}
153
154	unsigned GetDoubleW(__in __nullterminated char* begNum, unsigned L, double** ppRes)
155	{
156	static char dbuff[`256`];
157	char* pdummy;
158	if(L > `254`) L = `254`;
159	memcpy(dbuff,begNum,L);
160	dbuff[L] = `0`;
161	dbuff[L+`1`] = `0`;
162	ppRes = new* double(wcstod((const wchar_t)dbuff, (wchar_t***)&pdummy));
163	return ((unsigned)(pdummy - dbuff));
164	}
165	/--------------------------------------------------------------------------/
166	char* yygetline(int Line)
167	{
168	static char buff[`0x4000`];
169	char pLine=NULL, pNextLine=NULL;
170	char pBegin=NULL, pEnd = NULL;
171	unsigned uCount = parser->getAll(&pBegin);
172	pEnd = pBegin + uCount;
173	buff[`0`] = `0`;
174	for(uCount=`0`, pLine=pBegin; pLine < pEnd; pLine = nextchar(pLine))
175	{
176	if(Sym(pLine) == `'\n'`) uCount++;
177	if(uCount == (unsigned int)(Line-`1`)) break;
178	}
179	pLine = nextchar(pLine);
180	if(pLine < pEnd)
181	{
182	for(pNextLine = pLine; pNextLine < pEnd; pNextLine = nextchar(pNextLine))
183	{
184	if(Sym(pNextLine) == `'\n'`) break;
185	}
186	if(Sym == SymW) // Unicode file
187	{
188	if(((WCHAR)pNextLine - `1`) == `'\r'`) pNextLine -= `2`;
189	uCount = (unsigned)(pNextLine - pLine);
190	uCount &= `0x1FFF`; // limit: 8K wchars
191	WCHAR* wzBuff = (WCHAR*)buff;
192	memcpy(buff,pLine,uCount);
193	wzBuff[uCount >> `1`] = `0`;
194	}
195	else
196	{
197	if(*(pNextLine-`1`)==`'\r'`) pNextLine--;
198	uCount = (unsigned)(pNextLine - pLine);
199	uCount &= `0x3FFF`; // limit: 16K chars
200	memcpy(buff,pLine,uCount);
201	buff[uCount]=`0`;
202	}
203	}
204	return buff;
205	}
206
207	void yyerror(__in __nullterminated const char* str) {
208	char tokBuff[`64`];
209	WCHAR wzfile = (WCHAR)(PENV->in->namew());
210	int iline = PENV->curLine;
211
212	size_t len = PENV->curPos - PENV->curTok;
213	if (len > `62`) len = `62`;
214	memcpy(tokBuff, PENV->curTok, len);
215	tokBuff[len] = `0`;
216	tokBuff[len+`1`] = `0`;
217	if(PENV->bExternSource)
218	{
219	wzfile = PASM->m_wzSourceFileName;
220	iline = PENV->nExtLine;
221	}
222	if(Sym == SymW) // Unicode file
223	fprintf(stderr, "%S(%d) : error : %s at token '%S' in: %S\n",
224	wzfile, iline, str, (WCHAR)tokBuff, (WCHAR)yygetline(PENV->curLine));
225	else
226	fprintf(stderr, "%S(%d) : error : %s at token '%s' in: %s\n",
227	wzfile, iline, str, tokBuff, yygetline(PENV->curLine));
228	parser->success = false;
229	}
230
231	/******************************************************************************/
232	/ looks up the typedef 'name' of length 'nameLen' (name does not need to be*
233	null terminated) Returns 0 on failure /*
234	TypeDefDescr* findTypedef(__in_ecount(NameLen) char* name, size_t NameLen)
235	{
236	TypeDefDescr* pRet = NULL;
237	static char Name[`4096`];
238	if(PASM->NumTypeDefs())
239	{
240	if(NewStaticStrFromToken(name,NameLen,Name,`4096`))
241	pRet = PASM->FindTypeDef(Name);
242	}
243	return pRet;
244	}
245
246	int TYPEDEF(TypeDefDescr* pTDD)
247	{
248	switch(TypeFromToken(pTDD->m_tkTypeSpec))
249	{
250	case mdtTypeDef:
251	case mdtTypeRef:
252	return TYPEDEF_T;
253	case mdtMethodDef:
254	case `0x99000000`:
255	return TYPEDEF_M;
256	case mdtFieldDef:
257	case `0x98000000`:
258	return TYPEDEF_F;
259	case mdtMemberRef:
260	return TYPEDEF_MR;
261	case mdtTypeSpec:
262	return TYPEDEF_TS;
263	case mdtCustomAttribute:
264	return TYPEDEF_CA;
265	}
266	return ERROR_;
267
268	}
269
270	/******************************************************************************/
271	void indexKeywords(Indx* indx) // called in Assembler constructor (assem.cpp)
272	{
273	Keywords* low = keywords;
274	Keywords* high = keywords + (sizeof(keywords) / sizeof(Keywords));
275	Keywords* mid;
276	for(mid = low; mid < high; mid++)
277	{
278	indx->IndexString((char*)(mid->name),mid);
279	}
280	}
281
282	Instr* SetupInstr(unsigned short opcode)
283	{
284	Instr* pVal = NULL;
285	if((pVal = PASM->GetInstr()))
286	{
287	pVal->opcode = opcode;
288	if((pVal->pWriter = PASM->m_pSymDocument)!=NULL)
289	{
290	if(PENV->bExternSource)
291	{
292	pVal->linenum = PENV->nExtLine;
293	pVal->column = PENV->nExtCol;
294	pVal->linenum_end = PENV->nExtLineEnd;
295	pVal->column_end = PENV->nExtColEnd;
296	pVal->pc = nCurrPC;
297	}
298	else
299	{
300	pVal->linenum = PENV->curLine;
301	pVal->column = `1`;
302	pVal->linenum_end = PENV->curLine;
303	pVal->column_end = `0`;
304	pVal->pc = PASM->m_CurPC;
305	}
306	}
307	}
308	return pVal;
309	}
310	/ looks up the keyword 'name' of length 'nameLen' (name does not need to be*
311	null terminated) Returns 0 on failure /*
312	int findKeyword(const char* name, size_t nameLen, unsigned short* pOpcode)
313	{
314	static char Name[`128`];
315	Keywords* mid;
316
317	if(NULL == NewStaticStrFromToken((char)name,nameLen,Name,`128`)) return* `0`; // can't be a keyword
318	mid = (Keywords*)(PASM->indxKeywords.FindString(Name));
319	if(mid == NULL) return `0`;
320	*pOpcode = mid->tokenVal;
321
322	return(mid->token);
323	}
324
325	/******************************************************************************/
326	/ convert str to a uint64 /
327	unsigned digits[`128`];
328	void Init_str2uint64()
329	{
330	int i;
331	memset(digits,`255`,sizeof(digits));
332	for(i=`'0'`; i <= `'9'`; i++) digits[i] = i - `'0'`;
333	for(i=`'A'`; i <= `'Z'`; i++) digits[i] = i + `10` - `'A'`;
334	for(i=`'a'`; i <= `'z'`; i++) digits[i] = i + `10` - `'a'`;
335	}
336	static unsigned __int64 str2uint64(const char* str, const char** endStr, unsigned radix)
337	{
338	unsigned __int64 ret = `0`;
339	unsigned digit,ix;
340	_ASSERTE(radix <= `36`);
341	for(;;str = nextchar((char*)str))
342	{
343	ix = Sym((char*)str);
344	if(ix <= `0x7F`)
345	{
346	digit = digits[ix];
347	if(digit < radix)
348	{
349	ret = ret * radix + digit;
350	continue;
351	}
352	}
353	*endStr = str;
354	return(ret);
355	}
356	}
357	/******************************************************************************/
358	/ Append an UTF-8 string preceded by compressed length, no zero terminator, to a BinStr /
359	static void AppendStringWithLength(BinStr* pbs, __in __nullterminated char* sz)
360	{
361	if((pbs != NULL) && (sz != NULL))
362	{
363	unsigned L = (unsigned) strlen(sz);
364	BYTE* pb = NULL;
365	corEmitInt(pbs,L);
366	if((pb = pbs->getBuff(L)) != NULL)
367	memcpy(pb,sz,L);
368	}
369	}
370
371	/******************************************************************************/
372	/ fetch the next token, and return it Also set the yylval.union if the*
373	lexical token also has a value /*
374
375
376	BOOL _Alpha[`128`];
377	BOOL _Digit[`128`];
378	BOOL _AlNum[`128`];
379	BOOL _ValidSS[`128`];
380	BOOL _ValidCS[`128`];
381	void SetSymbolTables()
382	{
383	unsigned i;
384	memset(_Alpha,`0`,sizeof(_Alpha));
385	memset(_Digit,`0`,sizeof(_Digit));
386	memset(_AlNum,`0`,sizeof(_AlNum));
387	memset(_ValidSS,`0`,sizeof(_ValidSS));
388	memset(_ValidCS,`0`,sizeof(_ValidCS));
389	for(i = `'A'`; i <= `'Z'`; i++)
390	{
391	_Alpha[i] = TRUE;
392	_AlNum[i] = TRUE;
393	_ValidSS[i] = TRUE;
394	_ValidCS[i] = TRUE;
395	}
396	for(i = `'a'`; i <= `'z'`; i++)
397	{
398	_Alpha[i] = TRUE;
399	_AlNum[i] = TRUE;
400	_ValidSS[i] = TRUE;
401	_ValidCS[i] = TRUE;
402	}
403	for(i = `'0'`; i <= `'9'`; i++)
404	{
405	_Digit[i] = TRUE;
406	_AlNum[i] = TRUE;
407	_ValidCS[i] = TRUE;
408	}
409	_ValidSS[(unsigned char)`'_'`] = TRUE;
410	_ValidSS[(unsigned char)`'#'`] = TRUE;
411	_ValidSS[(unsigned char)`'$'`] = TRUE;
412	_ValidSS[(unsigned char)`'@'`] = TRUE;
413
414	_ValidCS[(unsigned char)`'_'`] = TRUE;
415	_ValidCS[(unsigned char)`'?'`] = TRUE;
416	_ValidCS[(unsigned char)`'$'`] = TRUE;
417	_ValidCS[(unsigned char)`'@'`] = TRUE;
418	_ValidCS[(unsigned char)'`'] = TRUE;
419	}
420	BOOL IsAlpha(unsigned x) { return (x < `128`)&&_Alpha[x]; }
421	BOOL IsDigit(unsigned x) { return (x < `128`)&&_Digit[x]; }
422	BOOL IsAlNum(unsigned x) { return (x < `128`)&&_AlNum[x]; }
423	BOOL IsValidStartingSymbol(unsigned x) { return (x < `128`)&&_ValidSS[x]; }
424	BOOL IsValidContinuingSymbol(unsigned x) { return (x < `128`)&&_ValidCS[x]; }
425
426
427	char* nextBlank(__in __nullterminated char* curPos)
428	{
429	for(;;)
430	{
431	switch(Sym(curPos))
432	{
433	case `'/'` :
434	if ((Sym(nextchar(curPos)) == `'/'`)\|\| (Sym(nextchar(curPos)) == `'*'`))
435	return curPos;
436	else
437	{
438	curPos = nextchar(curPos);
439	break;
440	}
441	case `0`:
442	case `'\n'`:
443	case `'\r'`:
444	case `' '` :
445	case `'\t'`:
446	case `'\f'`:
447	return curPos;
448
449	default:
450	curPos = nextchar(curPos);
451	}
452	}
453	}
454
455	char* skipBlanks(__in __nullterminated char* curPos, unsigned* pstate)
456	{
457	const unsigned eolComment = `1`;
458	const unsigned multiComment = `2`;
459	unsigned nextSym, state = *pstate;
460	char* nextPos;
461	for(;;)
462	{ // skip whitespace and comments
463	if (curPos >= PENV->endPos)
464	{
465	*pstate = state;
466	return NULL;
467	}
468	switch(Sym(curPos))
469	{
470	case `0`:
471	return NULL; // EOF
472	case `'\n'`:
473	state &= ~eolComment;
474	PENV->curLine++;
475	if(PENV->bExternSource)
476	{
477	if(PENV->bExternSourceAutoincrement) PENV->nExtLine++;
478	PASM->m_ulCurLine = PENV->nExtLine;
479	PASM->m_ulCurColumn = PENV->nExtCol;
480	}
481	else
482	{
483	PASM->m_ulCurLine = PENV->curLine;
484	PASM->m_ulCurColumn = `1`;
485	}
486	break;
487	case `'\r'`:
488	case `' '` :
489	case `'\t'`:
490	case `'\f'`:
491	break;
492
493	case `'*'` :
494	if(state == `0`) goto PAST_WHITESPACE;
495	if(state & multiComment)
496	{
497	nextPos = nextchar(curPos);
498	if (Sym(nextPos) == `'/'`)
499	{
500	curPos = nextPos;
501	state &= ~multiComment;
502	}
503	}
504	break;
505
506	case `'/'` :
507	if(state == `0`)
508	{
509	nextPos = nextchar(curPos);
510	nextSym = Sym(nextPos);
511	if (nextSym == `'/'`)
512	{
513	curPos = nextPos;
514	state \|= eolComment;
515	}
516	else if (nextSym == `'*'`)
517	{
518	curPos = nextPos;
519	state \|= multiComment;
520	}
521	else goto PAST_WHITESPACE;
522	}
523	break;
524
525	default:
526	if (state == `0`) goto PAST_WHITESPACE;
527	}
528	curPos = nextchar(curPos);
529	}
530	PAST_WHITESPACE:
531	*pstate = state;
532	return curPos;
533	}
534
535	char* FullFileName(__in __nullterminated WCHAR* wzFileName, unsigned uCodePage);
536
537	int ProcessEOF()
538	{
539	PARSING_ENVIRONMENT* prev_penv = parser->PEStack.POP();
540	if(prev_penv != NULL)
541	{
542	//delete [] (WCHAR)(PENV->in->namew());*
543	delete PENV->in;
544	delete PENV;
545	parser->penv = prev_penv;
546	SetFunctionPtrs();
547	char* szFileName = new char[strlen(PENV->szFileName)+`1`];
548	strcpy_s(szFileName,strlen(PENV->szFileName)+`1`,PENV->szFileName);
549	PASM->SetSourceFileName(szFileName); // deletes the argument!
550	return `';'`;
551	}
552	//PENV->in = NULL;
553	return `0`;
554	}
555
556	#define NEXT_TOKEN {state=0; curPos=PENV->curPos; goto NextToken;}
557
558	int parse_literal(unsigned curSym, __inout __nullterminated char* &curPos, BOOL translate_escapes)
559	{
560	unsigned quote = curSym;
561	curPos = nextchar(curPos);
562	char* fromPtr = curPos;
563	bool escape = false;
564
565	for(;;)
566	{ // Find matching quote
567	curSym = (curPos >= PENV->endPos) ? `0` : Sym(curPos);
568	if(curSym == `0`)
569	{
570	PENV->curPos = curPos;
571	return(BAD_LITERAL_);
572	}
573	else if(curSym == `'\\'`)
574	escape = !escape;
575	else
576	{
577	if(curSym == `'\n'`)
578	{
579	PENV->curLine++;
580	if(PENV->bExternSource)
581	{
582	if(PENV->bExternSourceAutoincrement) PENV->nExtLine++;
583	PASM->m_ulCurLine = PENV->nExtLine;
584	PASM->m_ulCurColumn = PENV->nExtCol;
585	}
586	else
587	{
588	PASM->m_ulCurLine = PENV->curLine;
589	PASM->m_ulCurColumn = `1`;
590	}
591	if (!escape) { PENV->curPos = curPos; return(BAD_LITERAL_); }
592	}
593	else if ((curSym == quote) && (!escape)) break;
594	escape = false;
595	}
596	curPos = nextchar(curPos);
597	}
598	// translate escaped characters
599	unsigned tokLen = (unsigned)(curPos - fromPtr);
600	char* newstr = NewStrFromToken(fromPtr, tokLen);
601	char* toPtr;
602	curPos = nextchar(curPos); // skip closing quote
603	if(translate_escapes)
604	{
605	fromPtr = newstr;
606	//_ASSERTE(0);
607	tokLen = (unsigned)strlen(newstr);
608	toPtr = new char[tokLen+`1`];
609	if(toPtr==NULL) return BAD_LITERAL_;
610	yylval.string = toPtr;
611	char* endPtr = fromPtr+tokLen;
612	while(fromPtr < endPtr)
613	{
614	if (*fromPtr == `'\\'`)
615	{
616	fromPtr++;
617	switch(*fromPtr)
618	{
619	case `'t'`:
620	*toPtr++ = `'\t'`;
621	break;
622	case `'n'`:
623	*toPtr++ = `'\n'`;
624	break;
625	case `'b'`:
626	*toPtr++ = `'\b'`;
627	break;
628	case `'f'`:
629	*toPtr++ = `'\f'`;
630	break;
631	case `'v'`:
632	*toPtr++ = `'\v'`;
633	break;
634	case `'?'`:
635	*toPtr++ = `'\?'`;
636	break;
637	case `'r'`:
638	*toPtr++ = `'\r'`;
639	break;
640	case `'a'`:
641	*toPtr++ = `'\a'`;
642	break;
643	case `'\n'`:
644	do fromPtr++;
645	while(isspace(*fromPtr));
646	--fromPtr; // undo the increment below
647	break;
648	case `'0'`:
649	case `'1'`:
650	case `'2'`:
651	case `'3'`:
652	if (IsDigit(fromPtr[`1`]) && IsDigit(fromPtr[`2`]))
653	{
654	toPtr++ = ((fromPtr[`0`] - `'0'`) `8` + (fromPtr[`1`] - `'0'`)) * `8` + (fromPtr[`2`] - `'0'`);
655	fromPtr+= `2`;
656	}
657	else if(fromPtr == `'0'`) toPtr++ = `0`;
658	else toPtr++ = fromPtr;
659	break;
660	default:
661	toPtr++ = fromPtr;
662	}
663	fromPtr++;
664	}
665	else
666	// toPtr++ = fromPtr++;
667	{
668	char* tmpPtr = fromPtr;
669	fromPtr = (nextchar == nextcharW) ? nextcharU(fromPtr) : nextchar(fromPtr);
670	while(tmpPtr < fromPtr) toPtr++ = tmpPtr++;
671	}
672
673	} //end while(fromPtr < endPtr)
674	toPtr = `0`; // terminate string*
675	delete [] newstr;
676	}
677	else
678	{
679	yylval.string = newstr;
680	toPtr = newstr + strlen(newstr);
681	}
682
683	PENV->curPos = curPos;
684	if(quote == `'"'`)
685	{
686	BinStr* pBS = new BinStr ();
687	unsigned size = (unsigned)(toPtr - yylval.string);
688	memcpy(pBS->getBuff(size),yylval.string,size);
689	delete [] yylval.string;
690	yylval.binstr = pBS;
691	return QSTRING;
692	}
693	else
694	{
695	if(PASM->NumTypeDefs())
696	{
697	TypeDefDescr* pTDD = PASM->FindTypeDef(yylval.string);
698	if(pTDD != NULL)
699	{
700	delete [] yylval.string;
701	yylval.tdd = pTDD;
702	return(TYPEDEF(pTDD));
703	}
704	}
705	return SQSTRING;
706	}
707	}
708
709	#ifdef _PREFAST_
710	#pragma warning(push)
711	#pragma warning(disable:21000) // Suppress PREFast warning about overly large function
712	#endif
713	int yylex()
714	{
715	char* curPos = PENV->curPos;
716	unsigned state = `0`;
717	const unsigned multiComment = `2`;
718	unsigned curSym;
719
720	char* newstr;
721
722	NextToken:
723	// Skip any leading whitespace and comments
724	curPos = skipBlanks(curPos, &state);
725	if(curPos == NULL)
726	{
727	if (state & multiComment) return (BAD_COMMENT_);
728	if(ProcessEOF() == `0`) return `0`; // EOF
729	NEXT_TOKEN;
730	}
731	char* curTok = curPos;
732	PENV->curTok = curPos;
733	PENV->curPos = curPos;
734	int tok = ERROR_;
735	yylval.string = `0`;
736
737	curSym = Sym(curPos);
738	if(bParsingByteArray) // only hexadecimals w/o 0x, ')' and white space allowed!
739	{
740	int i,s=`0`;
741	for(i=`0`; i<`2`; i++, curPos = nextchar(curPos), curSym = Sym(curPos))
742	{
743	if((`'0'` <= curSym)&&(curSym <= `'9'`)) s = s*`16`+(curSym - `'0'`);
744	else if((`'A'` <= curSym)&&(curSym <= `'F'`)) s = s*`16`+(curSym - `'A'` + `10`);
745	else if((`'a'` <= curSym)&&(curSym <= `'f'`)) s = s*`16`+(curSym - `'a'` + `10`);
746	else break; // don't increase curPos!
747	}
748	if(i)
749	{
750	tok = HEXBYTE;
751	yylval.int32 = s;
752	}
753	else
754	{
755	if(curSym == `')'` \|\| curSym == `'}'`)
756	{
757	bParsingByteArray = FALSE;
758	goto Just_A_Character;
759	}
760	}
761	PENV->curPos = curPos;
762	return(tok);
763	}
764	if(curSym == `'?'`) // '?' may be part of an identifier, if it's not followed by punctuation
765	{
766	if(IsValidContinuingSymbol(Sym(nextchar(curPos)))) goto Its_An_Id;
767	goto Just_A_Character;
768	}
769
770	if (IsValidStartingSymbol(curSym))
771	{ // is it an ID
772	Its_An_Id:
773	size_t offsetDot = (size_t)-`1`; // first appearance of '.'
774	size_t offsetDotDigit = (size_t)-`1`; // first appearance of '.<digit>' (not DOTTEDNAME!)
775	do
776	{
777	curPos = nextchar(curPos);
778	if (Sym(curPos) == `'.'`)
779	{
780	if (offsetDot == (size_t)-`1`) offsetDot = curPos - curTok;
781	curPos = nextchar(curPos);
782	if((offsetDotDigit==(size_t)-`1`)&&(Sym(curPos) >= `'0'`)&&(Sym(curPos) <= `'9'`))
783	offsetDotDigit = curPos - curTok - `1`;
784	}
785	} while(IsValidContinuingSymbol(Sym(curPos)));
786
787	size_t tokLen = curPos - curTok;
788	// check to see if it is a keyword
789	int token = findKeyword(curTok, tokLen, &yylval.opcode);
790	if (token != `0`)
791	{
792	//printf("yylex: TOK = %d, curPos=0x%8.8X\n",token,curPos);
793	PENV->curPos = curPos;
794	PENV->curTok = curTok;
795	if(!SkipToken)
796	{
797	switch(token)
798	{
799	case P_INCLUDE:
800	//if(include_first_pass)
801	//{
802	// PENV->curPos = curTok;
803	// include_first_pass = FALSE;
804	// return ';';
805	//}
806	//include_first_pass = TRUE;
807	curPos = skipBlanks(curPos,&state);
808	if(curPos == NULL)
809	{
810	if (state & multiComment) return (BAD_COMMENT_);
811	if(ProcessEOF() == `0`) return `0`; // EOF
812	NEXT_TOKEN;
813	}
814	if(Sym(curPos) != `'"'`) return ERROR_;
815	curPos = nextchar(curPos);
816	curTok = curPos;
817	PENV->curTok = curPos;
818	while(Sym(curPos) != `'"'`)
819	{
820	curPos = nextchar(curPos);
821	if(curPos >= PENV->endPos) return ERROR_;
822	PENV->curPos = curPos;
823	}
824	tokLen = PENV->curPos - curTok;
825	curPos = nextchar(curPos);
826	PENV->curPos = curPos;
827	{
828	WCHAR* wzFile=NULL;
829	if(Sym == SymW)
830	{
831	if((wzFile = new WCHAR[tokLen/`2` + `1`]) != NULL)
832	{
833	memcpy(wzFile,curTok,tokLen);
834	wzFile[tokLen/`2`] = `0`;
835	}
836	}
837	else
838	{
839	if((wzFile = new WCHAR[tokLen+`1`]) != NULL)
840	{
841	tokLen = WszMultiByteToWideChar(g_uCodePage,`0`,curTok,(int)tokLen,wzFile,(int)tokLen+`1`);
842	wzFile[tokLen] = `0`;
843	}
844	}
845	if(wzFile != NULL)
846	{
847	if((parser->wzIncludePath != NULL)
848	&&(wcschr(wzFile,`'\\'`)==NULL)&&(wcschr(wzFile,`':'`)==NULL))
849	{
850	PathString wzFullName;
851
852	WCHAR* pwz;
853	DWORD dw = WszSearchPath(parser->wzIncludePath,wzFile,NULL,
854	TRUE, wzFullName,&pwz);
855	if(dw != `0`)
856	{
857	delete [] wzFile;
858
859	wzFile = wzFullName.GetCopyOfUnicodeString();
860	}
861
862	}
863	if(PASM->m_fReportProgress)
864	parser->msg("\nIncluding '%S'\n",wzFile);
865	MappedFileStream pIn = new* MappedFileStream (wzFile);
866	if((pIn != NULL)&&pIn->IsValid())
867	{
868	parser->PEStack.PUSH(PENV);
869	PASM->SetSourceFileName(FullFileName(wzFile,CP_UTF8)); // deletes the argument!
870	parser->CreateEnvironment(pIn);
871	NEXT_TOKEN;
872	}
873	else
874	{
875	delete [] wzFile;
876	PASM->report->error("#include failed\n");
877	return ERROR_;
878	}
879	}
880	else
881	{
882	PASM->report->error("Out of memory\n");
883	return ERROR_;
884	}
885	}
886	curPos = PENV->curPos;
887	curTok = PENV->curTok;
888	break;
889	case P_IFDEF:
890	case P_IFNDEF:
891	case P_DEFINE:
892	case P_UNDEF:
893	curPos = skipBlanks(curPos,&state);
894	if(curPos == NULL)
895	{
896	if (state & multiComment) return (BAD_COMMENT_);
897	if(ProcessEOF() == `0`) return `0`; // EOF
898	NEXT_TOKEN;
899	}
900	curTok = curPos;
901	PENV->curTok = curPos;
902	PENV->curPos = curPos;
903	if (!IsValidStartingSymbol(Sym(curPos))) return ERROR_;
904	do
905	{
906	curPos = nextchar(curPos);
907	} while(IsValidContinuingSymbol(Sym(curPos)));
908	tokLen = curPos - curTok;
909
910	newstr = NewStrFromToken(curTok, tokLen);
911	if((token==P_DEFINE)\|\|(token==P_UNDEF))
912	{
913	if(token == P_DEFINE)
914	{
915	curPos = skipBlanks(curPos,&state);
916	if ((curPos == NULL) && (ProcessEOF() == `0`))
917	{
918	DefineVar(newstr, NULL);
919	return `0`;
920	}
921	curSym = Sym(curPos);
922	if(curSym != `'"'`)
923	DefineVar(newstr, NULL);
924	else
925	{
926	tok = parse_literal(curSym, curPos, FALSE);
927	if(tok == QSTRING)
928	{
929	// if not ANSI, then string is in UTF-8,
930	// insert prefix
931	if(nextchar != nextcharA)
932	{
933	yylval.binstr->insertInt8(`0xEF`);
934	yylval.binstr->insertInt8(`0xBB`);
935	yylval.binstr->insertInt8(`0xBF`);
936	}
937	yylval.binstr->appendInt8(`' '`);
938	DefineVar(newstr, yylval.binstr);
939	}
940	else
941	return tok;
942	}
943	}
944	else UndefVar(newstr);
945	}
946	else
947	{
948	SkipToken = IsVarDefined(newstr);
949	if(token == P_IFDEF) SkipToken = !SkipToken;
950	IfEndif++;
951	if(SkipToken) IfEndifSkip=IfEndif;
952	}
953	break;
954	case P_ELSE:
955	SkipToken = TRUE;
956	IfEndifSkip=IfEndif;
957	break;
958	case P_ENDIF:
959	if(IfEndif == `0`)
960	{
961	PASM->report->error("Unmatched #endif\n");
962	return ERROR_;
963	}
964	IfEndif--;
965	break;
966	default:
967	return(token);
968	}
969	goto NextToken;
970	}
971	if(SkipToken)
972	{
973	switch(token)
974	{
975	case P_IFDEF:
976	case P_IFNDEF:
977	IfEndif++;
978	break;
979	case P_ELSE:
980	if(IfEndif == IfEndifSkip) SkipToken = FALSE;
981	break;
982	case P_ENDIF:
983	if(IfEndif == IfEndifSkip) SkipToken = FALSE;
984	IfEndif--;
985	break;
986	default:
987	break;
988	}
989	//if(yylval.instr) yylval.instr->opcode = -1;
990	goto NextToken;
991	}
992	return(token);
993	} // end if token != 0
994	if(SkipToken) { curPos = nextBlank(curPos); goto NextToken; }
995
996	VarName* pVarName = FindVarDef(NewStrFromToken(curTok, tokLen));
997	if(pVarName != NULL)
998	{
999	if(pVarName->pbody != NULL)
1000	{
1001	BinStrStream pIn = new* BinStrStream (pVarName->pbody);
1002	if((pIn != NULL)&&pIn->IsValid())
1003	{
1004	PENV->curPos = curPos;
1005	parser->PEStack.PUSH(PENV);
1006	parser->CreateEnvironment(pIn);
1007	NEXT_TOKEN;
1008	}
1009	}
1010	}
1011
1012	TypeDefDescr* pTDD = findTypedef(curTok,tokLen);
1013
1014	if(pTDD != NULL)
1015	{
1016	yylval.tdd = pTDD;
1017	PENV->curPos = curPos;
1018	PENV->curTok = curTok;
1019	return(TYPEDEF(pTDD));
1020	}
1021	if(Sym(curTok) == `'#'`)
1022	{
1023	PENV->curPos = curPos;
1024	PENV->curTok = curTok;
1025	return(ERROR_);
1026	}
1027	// Not a keyword, normal identifiers don't have '.' in them
1028	if (offsetDot < (size_t)-`1`)
1029	{
1030	if(offsetDotDigit < (size_t)-`1`)
1031	{
1032	curPos = curTok+offsetDotDigit;
1033	tokLen = offsetDotDigit;
1034	}
1035	// protection against something like Foo.Bar..123 or Foo.Bar.
1036	unsigned D = (Sym == SymW) ? `2` : `1`; // Unicode or ANSI/UTF8!
1037	while((Sym(curPos-D)==`'.'`)&&(tokLen))
1038	{
1039	curPos -= D;
1040	tokLen -= D;
1041	}
1042	}
1043	if((yylval.string = NewStrFromToken(curTok,tokLen)))
1044	{
1045	tok = (offsetDot == (size_t)(-`1`))? ID : DOTTEDNAME;
1046	//printf("yylex: ID = '%s', curPos=0x%8.8X\n",yylval.string,curPos);
1047	}
1048	else return BAD_LITERAL_;
1049	}
1050	else if(SkipToken) { curPos = nextBlank(curPos); goto NextToken; }
1051	else if (IsDigit(curSym)
1052	\|\| (curSym == `'.'` && IsDigit(Sym(nextchar(curPos))))
1053	\|\| (curSym == `'-'` && IsDigit(Sym(nextchar(curPos)))))
1054	{
1055	const char* begNum = curPos;
1056	unsigned radix = `10`;
1057
1058	neg = (curSym == `'-'`); // always make it unsigned
1059	if (neg) curPos = nextchar(curPos);
1060
1061	if (Sym(curPos) == `'0'` && Sym(nextchar(curPos)) != `'.'`)
1062	{
1063	curPos = nextchar(curPos);
1064	radix = `8`;
1065	if (Sym(curPos) == `'x'` \|\| Sym(curPos) == `'X'`)
1066	{
1067	curPos = nextchar(curPos);
1068	radix = `16`;
1069	}
1070	}
1071	begNum = curPos;
1072	{
1073	unsigned __int64 i64 = str2uint64(begNum, const_cast<const char**>(&curPos), radix);
1074	unsigned __int64 mask64 = neg ? UI64(`0xFFFFFFFF80000000`) : UI64(`0xFFFFFFFF00000000`);
1075	unsigned __int64 largestNegVal32 = UI64(`0x0000000080000000`);
1076	if ((i64 & mask64) && (i64 != largestNegVal32))
1077	{
1078	yylval.int64 = new __int64(i64);
1079	tok = INT64;
1080	if (neg) yylval.int64 = -yylval.int64;
1081	}
1082	else
1083	{
1084	yylval.int32 = (__int32)i64;
1085	tok = INT32;
1086	if(neg) yylval.int32 = -yylval.int32;
1087	}
1088	}
1089	if (radix == `10` && ((Sym(curPos) == `'.'` && Sym(nextchar(curPos)) != `'.'`) \|\| Sym(curPos) == `'E'` \|\| Sym(curPos) == `'e'`))
1090	{
1091	unsigned L = (unsigned)(PENV->endPos - begNum);
1092	curPos = (char)begNum + GetDouble((char**)begNum,L,&yylval.float64);
1093	if (neg) yylval.float64 = -yylval.float64;
1094	tok = FLOAT64;
1095	}
1096	}
1097	else
1098	{ // punctuation
1099	if (curSym == `'"'` \|\| curSym == `'\''`)
1100	{
1101	return parse_literal(curSym, curPos, TRUE);
1102	} // end if (curPos == '"' \|\| curPos == '\'')
1103	else if (curSym==`':'` && Sym(nextchar(curPos))==`':'`)
1104	{
1105	curPos = nextchar(nextchar(curPos));
1106	tok = DCOLON;
1107	}
1108	else if(curSym == `'.'`)
1109	{
1110	if (Sym(nextchar(curPos))==`'.'` && Sym(nextchar(nextchar(curPos)))==`'.'`)
1111	{
1112	curPos = nextchar(nextchar(nextchar(curPos)));
1113	tok = ELIPSIS;
1114	}
1115	else
1116	{
1117	do
1118	{
1119	curPos = nextchar(curPos);
1120	if (curPos >= PENV->endPos)
1121	return ERROR_;
1122	curSym = Sym(curPos);
1123	}
1124	while(IsAlNum(curSym) \|\| curSym == `'_'` \|\| curSym == `'$'`\|\| curSym == `'@'`\|\| curSym == `'?'`);
1125	size_t tokLen = curPos - curTok;
1126
1127	// check to see if it is a keyword
1128	int token = findKeyword(curTok, tokLen, &yylval.opcode);
1129	if(token)
1130	{
1131	//printf("yylex: TOK = %d, curPos=0x%8.8X\n",token,curPos);
1132	PENV->curPos = curPos;
1133	PENV->curTok = curTok;
1134	return(token);
1135	}
1136	tok = `'.'`;
1137	curPos = nextchar(curTok);
1138	}
1139	}
1140	else
1141	{
1142	Just_A_Character:
1143	tok = curSym;
1144	curPos = nextchar(curPos);
1145	}
1146	//printf("yylex: PUNCT curPos=0x%8.8X\n",curPos);
1147	}
1148	dbprintf((" Line %d token %d (%c) val = %s\n", PENV->curLine, tok,
1149	(tok < `128` && isprint(tok)) ? tok : `' '`,
1150	(tok > `255` && tok != INT32 && tok != INT64 && tok!= FLOAT64) ? yylval.string : ""));
1151
1152	PENV->curPos = curPos;
1153	PENV->curTok = curTok;
1154	return(tok);
1155	}
1156	#ifdef _PREFAST_
1157	#pragma warning(pop)
1158	#endif
1159
1160	/************************************************************************/
1161	static char* newString(__in __nullterminated const char* str1)
1162	{
1163	char* ret = new char[strlen(str1)+`1`];
1164	if(ret) strcpy_s(ret, strlen(str1)+`1`, str1);
1165	return(ret);
1166	}
1167
1168	/************************************************************************/
1169	/ concatenate strings and release them /
1170
1171	static char* newStringWDel(__in __nullterminated char* str1, char delimiter, __in __nullterminated char* str3)
1172	{
1173	size_t len1 = strlen(str1);
1174	size_t len = len1+`2`;
1175	if (str3) len += strlen(str3);
1176	char* ret = new char[len];
1177	if(ret)
1178	{
1179	strcpy_s(ret, len, str1);
1180	delete [] str1;
1181	ret[len1] = delimiter;
1182	ret[len1+`1`] = `0`;
1183	if (str3)
1184	{
1185	strcat_s(ret, len, str3);
1186	delete [] str3;
1187	}
1188	}
1189	return(ret);
1190	}
1191
1192	/************************************************************************/
1193	static void corEmitInt(BinStr* buff, unsigned data)
1194	{
1195	unsigned cnt = CorSigCompressData(data, buff->getBuff(`5`));
1196	buff->remove(`5` - cnt);
1197	}
1198
1199
1200	/************************************************************************/
1201	/ move 'ptr past the exactly one type description /
1202
1203	unsigned __int8* skipType(unsigned __int8* ptr, BOOL fFixupType)
1204	{
1205	mdToken tk;
1206	AGAIN:
1207	switch(*ptr++) {
1208	case ELEMENT_TYPE_VOID :
1209	case ELEMENT_TYPE_BOOLEAN :
1210	case ELEMENT_TYPE_CHAR :
1211	case ELEMENT_TYPE_I1 :
1212	case ELEMENT_TYPE_U1 :
1213	case ELEMENT_TYPE_I2 :
1214	case ELEMENT_TYPE_U2 :
1215	case ELEMENT_TYPE_I4 :
1216	case ELEMENT_TYPE_U4 :
1217	case ELEMENT_TYPE_I8 :
1218	case ELEMENT_TYPE_U8 :
1219	case ELEMENT_TYPE_R4 :
1220	case ELEMENT_TYPE_R8 :
1221	case ELEMENT_TYPE_U :
1222	case ELEMENT_TYPE_I :
1223	case ELEMENT_TYPE_STRING :
1224	case ELEMENT_TYPE_OBJECT :
1225	case ELEMENT_TYPE_TYPEDBYREF :
1226	case ELEMENT_TYPE_SENTINEL :
1227	/ do nothing /
1228	break;
1229
1230	case ELEMENT_TYPE_VALUETYPE :
1231	case ELEMENT_TYPE_CLASS :
1232	ptr += CorSigUncompressToken(ptr, &tk);
1233	break;
1234
1235	case ELEMENT_TYPE_CMOD_REQD :
1236	case ELEMENT_TYPE_CMOD_OPT :
1237	ptr += CorSigUncompressToken(ptr, &tk);
1238	goto AGAIN;
1239
1240	case ELEMENT_TYPE_ARRAY :
1241	{
1242	ptr = skipType(ptr, fFixupType); // element Type
1243	unsigned rank = CorSigUncompressData((PCCOR_SIGNATURE&) ptr);
1244	if (rank != `0`)
1245	{
1246	unsigned numSizes = CorSigUncompressData((PCCOR_SIGNATURE&) ptr);
1247	while(numSizes > `0`)
1248	{
1249	CorSigUncompressData((PCCOR_SIGNATURE&) ptr);
1250	--numSizes;
1251	}
1252	unsigned numLowBounds = CorSigUncompressData((PCCOR_SIGNATURE&) ptr);
1253	while(numLowBounds > `0`)
1254	{
1255	CorSigUncompressData((PCCOR_SIGNATURE&) ptr);
1256	--numLowBounds;
1257	}
1258	}
1259	}
1260	break;
1261
1262	// Modifiers or dependent types
1263	case ELEMENT_TYPE_PINNED :
1264	case ELEMENT_TYPE_PTR :
1265	case ELEMENT_TYPE_BYREF :
1266	case ELEMENT_TYPE_SZARRAY :
1267	// tail recursion optimization
1268	// ptr = skipType(ptr, fFixupType);
1269	// break
1270	goto AGAIN;
1271
1272	case ELEMENT_TYPE_VAR:
1273	case ELEMENT_TYPE_MVAR:
1274	CorSigUncompressData((PCCOR_SIGNATURE&) ptr); // bound
1275	break;
1276
1277	case ELEMENT_TYPE_VARFIXUP:
1278	case ELEMENT_TYPE_MVARFIXUP:
1279	if(fFixupType)
1280	{
1281	BYTE* pb = ptr-`1`; // ptr incremented in switch
1282	unsigned __int8* ptr_save = ptr;
1283	int n = CorSigUncompressData((PCCOR_SIGNATURE&) ptr); // fixup #
1284	int compressed_size_n = (int)(ptr - ptr_save); // ptr was updated by CorSigUncompressData()
1285	int m = -`1`;
1286	if(PASM->m_TyParList)
1287	m = PASM->m_TyParList->IndexOf(TyParFixupList.PEEK(n));
1288	if(m == -`1`)
1289	{
1290	PASM->report->error("(fixupType) Invalid %stype parameter '%s'\n",
1291	(*pb == ELEMENT_TYPE_MVARFIXUP)? "method ": "",
1292	TyParFixupList.PEEK(n));
1293	m = `0`;
1294	}
1295	pb = (pb == ELEMENT_TYPE_MVARFIXUP)? ELEMENT_TYPE_MVAR : ELEMENT_TYPE_VAR;
1296	int compressed_size_m = (int)CorSigCompressData(m,pb+`1`);
1297
1298	// Note that CorSigCompressData() (and hence, CorSigUncompressData()) store a number
1299	// 0 <= x <= 0x1FFFFFFF in 1, 2, or 4 bytes. Above, 'n' is the fixup number being read,
1300	// and 'm' is the generic parameter number being written out (in the same place where 'n'
1301	// came from). If 'n' takes more space to compress than 'm' (e.g., 0x80 <= n <= 0x3fff so
1302	// it takes 2 bytes, and m < 0x80 so it takes one byte), then when we overwrite the fixup
1303	// number with the generic parameter number, we'll leave extra bytes in the signature following
1304	// the written generic parameter number. Thus, we do something of a hack to ensure that the
1305	// compressed number is correctly readable even if 'm' compresses smaller than 'n' did: we
1306	// recompress 'm' to use the same amount of space as 'n' used. This is possible because smaller
1307	// numbers can still be compressed in a larger amount of space, even though it's not optimal (and
1308	// CorSigCompressData() would never do it). If, however, the compressed sizes are the other
1309	// way around (m takes more space to compress than n), then we've already corrupted the
1310	// signature that we're reading by writing beyond what we should (is there some reason why
1311	// this is not possible?).
1312	// Note that 'ptr' has already been adjusted, above, to point to the next type after this one.
1313	// There is no need to update it when recompressing the data.
1314
1315	if (compressed_size_m > compressed_size_n)
1316	{
1317	// We've got a problem: we just corrupted the rest of the signature!
1318	// (Can this ever happen in practice?)
1319	PASM->report->error("(fixupType) Too many %stype parameters\n",
1320	(*pb == ELEMENT_TYPE_MVARFIXUP)? "method ": "");
1321	}
1322	else if (compressed_size_m < compressed_size_n)
1323	{
1324	// We didn't write out as much data as we read. This will leave extra bytes in the
1325	// signature that will be incorrectly recognized. Ideally, we would just shrink the
1326	// signature. That's not easy to do here. Instead, pad the bytes to force it to use
1327	// a larger encoding than needed. This assumes knowledge of the CorSigCompressData()
1328	// encoding.
1329	//
1330	// The cases:
1331	// compressed_size_m m bytes compressed_size_n result bytes
1332	// 1 m1 2 0x80 m1
1333	// 1 m1 4 0xC0 0x00 0x00 m1
1334	// 2 m1 m2 4 0xC0 0x00 (m1 & 0x7f) m2
1335
1336	_ASSERTE((compressed_size_m == `1`) \|\| (compressed_size_m == `2`) \|\| (compressed_size_m == `4`));
1337	_ASSERTE((compressed_size_n == `1`) \|\| (compressed_size_n == `2`) \|\| (compressed_size_n == `4`));
1338
1339	if ((compressed_size_m == `1`) &&
1340	(compressed_size_n == `2`))
1341	{
1342	unsigned __int8 m1 = *(pb + `1`);
1343	_ASSERTE(m1 < `0x80`);
1344	*(pb + `1`) = `0x80`;
1345	*(pb + `2`) = m1;
1346	}
1347	else
1348	if ((compressed_size_m == `1`) &&
1349	(compressed_size_n == `4`))
1350	{
1351	unsigned __int8 m1 = *(pb + `1`);
1352	_ASSERTE(m1 < `0x80`);
1353	*(pb + `1`) = `0xC0`;
1354	*(pb + `2`) = `0x00`;
1355	*(pb + `3`) = `0x00`;
1356	*(pb + `4`) = m1;
1357	}
1358	else
1359	if ((compressed_size_m == `2`) &&
1360	(compressed_size_n == `4`))
1361	{
1362	unsigned __int8 m1 = *(pb + `1`);
1363	unsigned __int8 m2 = *(pb + `2`);
1364	_ASSERTE(m1 >= `0x80`);
1365	m1 &= `0x7f`; // strip the bit indicating it's a 2-byte thing
1366	*(pb + `1`) = `0xC0`;
1367	*(pb + `2`) = `0x00`;
1368	*(pb + `3`) = m1;
1369	*(pb + `4`) = m2;
1370	}
1371	}
1372	}
1373	else
1374	CorSigUncompressData((PCCOR_SIGNATURE&) ptr); // bound
1375	break;
1376
1377	case ELEMENT_TYPE_FNPTR:
1378	{
1379	CorSigUncompressData((PCCOR_SIGNATURE&) ptr); // calling convention
1380	unsigned argCnt = CorSigUncompressData((PCCOR_SIGNATURE&) ptr); // arg count
1381	ptr = skipType(ptr, fFixupType); // return type
1382	while(argCnt > `0`)
1383	{
1384	ptr = skipType(ptr, fFixupType);
1385	--argCnt;
1386	}
1387	}
1388	break;
1389
1390	case ELEMENT_TYPE_GENERICINST:
1391	{
1392	ptr = skipType(ptr, fFixupType); // type constructor
1393	unsigned argCnt = CorSigUncompressData((PCCOR_SIGNATURE&)ptr); // arg count
1394	while(argCnt > `0`) {
1395	ptr = skipType(ptr, fFixupType);
1396	--argCnt;
1397	}
1398	}
1399	break;
1400
1401	default:
1402	case ELEMENT_TYPE_END :
1403	_ASSERTE(!"Unknown Type");
1404	break;
1405	}
1406	return(ptr);
1407	}
1408
1409	/************************************************************************/
1410	void FixupTyPars(PCOR_SIGNATURE pSig, ULONG cSig)
1411	{
1412	if(TyParFixupList.COUNT() > `0`)
1413	{
1414	BYTE* ptr = (BYTE*)pSig;
1415	BYTE* ptrEnd = ptr + cSig;
1416	while(ptr < ptrEnd)
1417	{
1418	ptr = skipType(ptr, TRUE);
1419	} // end while
1420	} // end if(COUNT>0)
1421	}
1422	void FixupTyPars(BinStr* pbstype)
1423	{
1424	FixupTyPars((PCOR_SIGNATURE)(pbstype->ptr()),(ULONG)(pbstype->length()));
1425	}
1426	/************************************************************************/
1427	static unsigned corCountArgs(BinStr* args)
1428	{
1429	unsigned __int8* ptr = args->ptr();
1430	unsigned __int8* end = &args->ptr()[args->length()];
1431	unsigned ret = `0`;
1432	while(ptr < end)
1433	{
1434	if (*ptr != ELEMENT_TYPE_SENTINEL)
1435	{
1436	ptr = skipType(ptr, FALSE);
1437	ret++;
1438	}
1439	else ptr++;
1440	}
1441	return(ret);
1442	}
1443
1444	/******************************************************************************/
1445	AsmParse::AsmParse(ReadStream* aIn, Assembler *aAssem)
1446	{
1447	#ifdef DEBUG_PARSING
1448	extern int yydebug;
1449	yydebug = `1`;
1450	#endif
1451
1452	assem = aAssem;
1453	assem->SetErrorReporter((ErrorReporter )this*);
1454
1455	assem->m_ulCurLine = `1`;
1456	assem->m_ulCurColumn = `1`;
1457
1458	wzIncludePath = NULL;
1459	penv = NULL;
1460
1461	hstdout = GetStdHandle(STD_OUTPUT_HANDLE);
1462	hstderr = GetStdHandle(STD_ERROR_HANDLE);
1463
1464	success = true;
1465	_ASSERTE(parser == `0`); // Should only be one parser instance at a time
1466
1467	// Resolve aliases
1468	for (unsigned int i = `0`; i < sizeof(keywords) / sizeof(Keywords); i++)
1469	{
1470	if (keywords[i].token == NO_VALUE)
1471	keywords[i].token = keywords[keywords[i].tokenVal].token;
1472	}
1473	SetSymbolTables();
1474	Init_str2uint64();
1475	parser = this;
1476	//yyparse();
1477	}
1478
1479	/******************************************************************************/
1480	AsmParse::~AsmParse()
1481	{
1482	parser = `0`;
1483	delete penv;
1484	while(m_ANSLast.POP());
1485	}
1486
1487	/************************************************************************/
1488	DWORD AsmParse::IsItUnicode(CONST LPVOID pBuff, int cb, LPINT lpi)
1489	{
1490	return IsTextUnicode(pBuff,cb,lpi);
1491	}
1492
1493	/************************************************************************/
1494	void AsmParse::CreateEnvironment(ReadStream* stream)
1495	{
1496	penv = new PARSING_ENVIRONMENT;
1497	memset(penv,`0`,sizeof(PARSING_ENVIRONMENT));
1498	penv->in = stream;
1499	penv->curLine = `1`;
1500	strcpy_s(penv->szFileName, MAX_FILENAME_LENGTH*`3`+`1`,assem->m_szSourceFileName);
1501
1502	penv->curPos = fillBuff(NULL);
1503	penv->uCodePage = g_uCodePage;
1504
1505	SetFunctionPtrs();
1506	};
1507
1508	/************************************************************************/
1509	void AsmParse::ParseFile(ReadStream* stream)
1510	{
1511	CreateEnvironment(stream);
1512	yyparse();
1513	penv->in = NULL;
1514	};
1515
1516	/************************************************************************/
1517	char* AsmParse::fillBuff(__in_opt __nullterminated char* pos)
1518	{
1519	int iPutToBuffer;
1520	int iOptions = IS_TEXT_UNICODE_UNICODE_MASK;
1521	g_uCodePage = CP_ACP;
1522	iPutToBuffer = (int)penv->in->getAll(&(penv->curPos));
1523
1524	penv->endPos = penv->curPos + iPutToBuffer;
1525	if(iPutToBuffer > `128`) iPutToBuffer = `128`;
1526	if(IsItUnicode(penv->curPos,iPutToBuffer,&iOptions))
1527	{
1528	g_uCodePage = CP_UTF8;
1529	if(iOptions & IS_TEXT_UNICODE_SIGNATURE)
1530	{
1531	penv->curPos += `2`;
1532	}
1533	if(assem->m_fReportProgress) printf("Source file is UNICODE\n\n");
1534	penv->pfn_Sym = SymW;
1535	penv->pfn_nextchar = nextcharW;
1536	penv->pfn_NewStrFromToken = NewStrFromTokenW;
1537	penv->pfn_NewStaticStrFromToken = NewStaticStrFromTokenW;
1538	penv->pfn_GetDouble = GetDoubleW;
1539	}
1540	else
1541	{
1542	if(((penv->curPos[`0`]&`0xFF`)==`0xEF`)&&((penv->curPos[`1`]&`0xFF`)==`0xBB`)&&((penv->curPos[`2`]&`0xFF`)==`0xBF`))
1543	{
1544	g_uCodePage = CP_UTF8;
1545	penv->curPos += `3`;
1546	if(assem->m_fReportProgress) printf("Source file is UTF-8\n\n");
1547	penv->pfn_nextchar = nextcharU;
1548	}
1549	else
1550	{
1551	if(assem->m_fReportProgress) printf("Source file is ANSI\n\n");
1552	penv->pfn_nextchar = nextcharA;
1553	}
1554	penv->pfn_Sym = SymAU;
1555	penv->pfn_NewStrFromToken = NewStrFromTokenAU;
1556	penv->pfn_NewStaticStrFromToken = NewStaticStrFromTokenAU;
1557	penv->pfn_GetDouble = GetDoubleAU;
1558	}
1559	return(penv->curPos);
1560	}
1561
1562	/******************************************************************************/
1563	BinStr* AsmParse::MakeSig(unsigned callConv, BinStr* retType, BinStr* args, int ntyargs)
1564	{
1565	_ASSERTE((ntyargs != `0`) == ((callConv & IMAGE_CEE_CS_CALLCONV_GENERIC) != `0`));
1566	BinStr* ret = new BinStr ();
1567	if(ret)
1568	{
1569	//if (retType != 0)
1570	ret->insertInt8(callConv);
1571	if (ntyargs != `0`)
1572	corEmitInt(ret, ntyargs);
1573	corEmitInt(ret, corCountArgs(args));
1574
1575	if (retType != `0`)
1576	{
1577	ret->append(retType);
1578	delete retType;
1579	}
1580	ret->append(args);
1581	}
1582	else
1583	error("\nOut of memory!\n");
1584
1585	delete args;
1586	return(ret);
1587	}
1588
1589	/******************************************************************************/
1590	BinStr* AsmParse::MakeTypeArray(CorElementType kind, BinStr* elemType, BinStr* bounds)
1591	{
1592	// 'bounds' is a binary buffer, that contains an array of 'struct Bounds'
1593	struct Bounds {
1594	int lowerBound;
1595	unsigned numElements;
1596	};
1597
1598	_ASSERTE(bounds->length() % sizeof(Bounds) == `0`);
1599	unsigned boundsLen = bounds->length() / sizeof(Bounds);
1600	_ASSERTE(boundsLen > `0`);
1601	Bounds* boundsArr = (Bounds*) bounds->ptr();
1602
1603	BinStr* ret = new BinStr ();
1604
1605	ret->appendInt8(kind);
1606	ret->append(elemType);
1607	corEmitInt(ret, boundsLen); // emit the rank
1608
1609	unsigned lowerBoundsDefined = `0`;
1610	unsigned numElementsDefined = `0`;
1611	unsigned i;
1612	for(i=`0`; i < boundsLen; i++)
1613	{
1614	if(boundsArr[i].lowerBound < `0x7FFFFFFF`) lowerBoundsDefined = i+`1`;
1615	else boundsArr[i].lowerBound = `0`;
1616
1617	if(boundsArr[i].numElements < `0x7FFFFFFF`) numElementsDefined = i+`1`;
1618	else boundsArr[i].numElements = `0`;
1619	}
1620
1621	corEmitInt(ret, numElementsDefined); // emit number of bounds
1622
1623	for(i=`0`; i < numElementsDefined; i++)
1624	{
1625	_ASSERTE (boundsArr[i].numElements >= `0`); // enforced at rule time
1626	corEmitInt(ret, boundsArr[i].numElements);
1627
1628	}
1629
1630	corEmitInt(ret, lowerBoundsDefined); // emit number of lower bounds
1631	for(i=`0`; i < lowerBoundsDefined; i++)
1632	{
1633	unsigned cnt = CorSigCompressSignedInt(boundsArr[i].lowerBound, ret->getBuff(`5`));
1634	ret->remove(`5` - cnt);
1635	}
1636	delete elemType;
1637	delete bounds;
1638	return(ret);
1639	}
1640
1641	/******************************************************************************/
1642	BinStr* AsmParse::MakeTypeClass(CorElementType kind, mdToken tk)
1643	{
1644
1645	BinStr* ret = new BinStr ();
1646	_ASSERTE(kind == ELEMENT_TYPE_CLASS \|\| kind == ELEMENT_TYPE_VALUETYPE \|\|
1647	kind == ELEMENT_TYPE_CMOD_REQD \|\| kind == ELEMENT_TYPE_CMOD_OPT);
1648	ret->appendInt8(kind);
1649	unsigned cnt = CorSigCompressToken(tk, ret->getBuff(`5`));
1650	ret->remove(`5` - cnt);
1651	return(ret);
1652	}
1653	/************************************************************************/
1654	void PrintANSILine(FILE* pF, __in __nullterminated char* sz)
1655	{
1656	WCHAR *wz = &wzUniBuf[`0`];
1657	if(g_uCodePage != CP_ACP)
1658	{
1659	memset(wz,`0`,dwUniBuf); // dwUniBuf/2 WCHARs = dwUniBuf bytes
1660	WszMultiByteToWideChar(g_uCodePage,`0`,sz,-`1`,wz,(dwUniBuf >> `1`)-`1`);
1661
1662	memset(sz,`0`,dwUniBuf);
1663	WszWideCharToMultiByte(g_uConsoleCP,`0`,wz,-`1`,sz,dwUniBuf-`1`,NULL,NULL);
1664	}
1665	fprintf(pF,"%s",sz);
1666	}
1667	/************************************************************************/
1668	void AsmParse::error(const char* fmt, ...)
1669	{
1670	char sz = (char**)(&wzUniBuf[(dwUniBuf >> `1`)]);
1671	char *psz=&sz[`0`];
1672	FILE* pF = ((!assem->m_fReportProgress)&&(assem->OnErrGo)) ? stdout : stderr;
1673	success = false;
1674	va_list args;
1675	va_start(args, fmt);
1676
1677	if((penv) && (penv->in)) psz+=sprintf_s(psz, (dwUniBuf >> `1`), "%S(%d) : ", penv->in->namew(), penv->curLine);
1678	psz+=sprintf_s(psz, (dwUniBuf >> `1`), "error : ");
1679	_vsnprintf_s(psz, (dwUniBuf >> `1`),(dwUniBuf >> `1`)-strlen(sz)-`1`, fmt, args);
1680	PrintANSILine(pF,sz);
1681	}
1682
1683	/************************************************************************/
1684	void AsmParse::warn(const char* fmt, ...)
1685	{
1686	char sz = (char**)(&wzUniBuf[(dwUniBuf >> `1`)]);
1687	char *psz=&sz[`0`];
1688	FILE* pF = ((!assem->m_fReportProgress)&&(assem->OnErrGo)) ? stdout : stderr;
1689	va_list args;
1690	va_start(args, fmt);
1691
1692	if((penv) && (penv->in)) psz+=sprintf_s(psz, (dwUniBuf >> `1`), "%S(%d) : ", penv->in->namew(), penv->curLine);
1693	psz+=sprintf_s(psz, (dwUniBuf >> `1`), "warning : ");
1694	_vsnprintf_s(psz, (dwUniBuf >> `1`),(dwUniBuf >> `1`)-strlen(sz)-`1`, fmt, args);
1695	PrintANSILine(pF,sz);
1696	}
1697	/************************************************************************/
1698	void AsmParse::msg(const char* fmt, ...)
1699	{
1700	char sz = (char**)(&wzUniBuf[(dwUniBuf >> `1`)]);
1701	va_list args;
1702	va_start(args, fmt);
1703
1704	_vsnprintf_s(sz, (dwUniBuf >> `1`),(dwUniBuf >> `1`)-`1`, fmt, args);
1705	PrintANSILine(stdout,sz);
1706	}
1707
1708	#ifdef _MSC_VER
1709	#pragma warning(default : 4640)
1710	#endif
1711

Browse the source code of CoreCLR/ilasm/grammar_after.cpp