1// SPDX-License-Identifier: MIT OR MPL-2.0 OR LGPL-2.1-or-later OR GPL-2.0-or-later
2// Copyright 2010, SIL International, All rights reserved.
3
4// This class represents loaded graphite stack machine code. It performs
5// basic sanity checks, on the incoming code to prevent more obvious problems
6// from crashing graphite.
7// Author: Tim Eves
8
9#include <cassert>
10#include <cstddef>
11#include <cstdlib>
12#include <cstring>
13#include "graphite2/Segment.h"
14#include "inc/Code.h"
15#include "inc/Face.h"
16#include "inc/GlyphFace.h"
17#include "inc/GlyphCache.h"
18#include "inc/Machine.h"
19#include "inc/Rule.h"
20#include "inc/Silf.h"
21
22#include <cstdio>
23
24#ifdef NDEBUG
25#ifdef __GNUC__
26#pragma GCC diagnostic ignored "-Wunused-parameter"
27#endif
28#endif
29
30
31using namespace graphite2;
32using namespace vm;
33
34namespace {
35
36inline bool is_return(const instr i) {
37 const opcode_t * opmap = Machine::getOpcodeTable();
38 const instr pop_ret = *opmap[POP_RET].impl,
39 ret_zero = *opmap[RET_ZERO].impl,
40 ret_true = *opmap[RET_TRUE].impl;
41 return i == pop_ret || i == ret_zero || i == ret_true;
42}
43
44struct context
45{
46 context(uint8 ref=0) : codeRef(ref) {flags.changed=false; flags.referenced=false;}
47 struct {
48 uint8 changed:1,
49 referenced:1;
50 } flags;
51 uint8 codeRef;
52};
53
54} // end namespace
55
56
57class Machine::Code::decoder
58{
59public:
60 struct limits;
61 static const int NUMCONTEXTS = 256;
62
63 decoder(limits & lims, Code &code, enum passtype pt) throw();
64
65 bool load(const byte * bc_begin, const byte * bc_end);
66 void apply_analysis(instr * const code, instr * code_end);
67 byte max_ref() { return _max_ref; }
68 int out_index() const { return _out_index; }
69
70private:
71 void set_ref(int index) throw();
72 void set_noref(int index) throw();
73 void set_changed(int index) throw();
74 opcode fetch_opcode(const byte * bc);
75 void analyse_opcode(const opcode, const int8 * const dp) throw();
76 bool emit_opcode(opcode opc, const byte * & bc);
77 bool validate_opcode(const byte opc, const byte * const bc);
78 bool valid_upto(const uint16 limit, const uint16 x) const throw();
79 bool test_context() const throw();
80 bool test_ref(int8 index) const throw();
81 bool test_attr(attrCode attr) const throw();
82 void failure(const status_t s) const throw() { _code.failure(s); }
83
84 Code & _code;
85 int _out_index;
86 uint16 _out_length;
87 instr * _instr;
88 byte * _data;
89 limits & _max;
90 enum passtype _passtype;
91 int _stack_depth;
92 bool _in_ctxt_item;
93 int16 _slotref;
94 context _contexts[NUMCONTEXTS];
95 byte _max_ref;
96};
97
98
99struct Machine::Code::decoder::limits
100{
101 const byte * bytecode;
102 const uint8 pre_context;
103 const uint16 rule_length,
104 classes,
105 glyf_attrs,
106 features;
107 const byte attrid[gr_slatMax];
108};
109
110inline Machine::Code::decoder::decoder(limits & lims, Code &code, enum passtype pt) throw()
111: _code(code),
112 _out_index(code._constraint ? 0 : lims.pre_context),
113 _out_length(code._constraint ? 1 : lims.rule_length),
114 _instr(code._code), _data(code._data), _max(lims), _passtype(pt),
115 _stack_depth(0),
116 _in_ctxt_item(false),
117 _slotref(0),
118 _max_ref(0)
119{ }
120
121
122
123Machine::Code::Code(bool is_constraint, const byte * bytecode_begin, const byte * const bytecode_end,
124 uint8 pre_context, uint16 rule_length, const Silf & silf, const Face & face,
125 enum passtype pt, byte * * const _out)
126 : _code(0), _data(0), _data_size(0), _instr_count(0), _max_ref(0), _status(loaded),
127 _constraint(is_constraint), _modify(false), _delete(false), _own(_out==0)
128{
129#ifdef GRAPHITE2_TELEMETRY
130 telemetry::category _code_cat(face.tele.code);
131#endif
132 assert(bytecode_begin != 0);
133 if (bytecode_begin == bytecode_end)
134 {
135 // ::new (this) Code();
136 return;
137 }
138 assert(bytecode_end > bytecode_begin);
139 const opcode_t * op_to_fn = Machine::getOpcodeTable();
140
141 // Allocate code and data target buffers, these sizes are a worst case
142 // estimate. Once we know their real sizes the we'll shrink them.
143 if (_out) _code = reinterpret_cast<instr *>(*_out);
144 else _code = static_cast<instr *>(malloc(estimateCodeDataOut(bytecode_end-bytecode_begin, 1, is_constraint ? 0 : rule_length)));
145 _data = reinterpret_cast<byte *>(_code + (bytecode_end - bytecode_begin));
146
147 if (!_code || !_data) {
148 failure(alloc_failed);
149 return;
150 }
151
152 decoder::limits lims = {
153 bytecode_end,
154 pre_context,
155 rule_length,
156 silf.numClasses(),
157 face.glyphs().numAttrs(),
158 face.numFeatures(),
159 {1,1,1,1,1,1,1,1,
160 1,1,1,1,1,1,1,255,
161 1,1,1,1,1,1,1,1,
162 1,1,1,1,1,1,0,0,
163 0,0,0,0,0,0,0,0,
164 0,0,0,0,0,0,0,0,
165 0,0,0,0,0,0,0, silf.numUser()}
166 };
167
168 decoder dec(lims, *this, pt);
169 if(!dec.load(bytecode_begin, bytecode_end))
170 return;
171
172 // Is this an empty program?
173 if (_instr_count == 0)
174 {
175 release_buffers();
176 ::new (this) Code();
177 return;
178 }
179
180 // When we reach the end check we've terminated it correctly
181 if (!is_return(_code[_instr_count-1])) {
182 failure(missing_return);
183 return;
184 }
185
186 assert((_constraint && immutable()) || !_constraint);
187 dec.apply_analysis(_code, _code + _instr_count);
188 _max_ref = dec.max_ref();
189
190 // Now we know exactly how much code and data the program really needs
191 // realloc the buffers to exactly the right size so we don't waste any
192 // memory.
193 assert((bytecode_end - bytecode_begin) >= ptrdiff_t(_instr_count));
194 assert((bytecode_end - bytecode_begin) >= ptrdiff_t(_data_size));
195 memmove(_code + (_instr_count+1), _data, _data_size*sizeof(byte));
196 size_t const total_sz = ((_instr_count+1) + (_data_size + sizeof(instr)-1)/sizeof(instr))*sizeof(instr);
197 if (_out)
198 *_out += total_sz;
199 else
200 {
201 instr * const old_code = _code;
202 _code = static_cast<instr *>(realloc(_code, total_sz));
203 if (!_code) free(old_code);
204 }
205 _data = reinterpret_cast<byte *>(_code + (_instr_count+1));
206
207 if (!_code)
208 {
209 failure(alloc_failed);
210 return;
211 }
212
213 // Make this RET_ZERO, we should never reach this but just in case ...
214 _code[_instr_count] = op_to_fn[RET_ZERO].impl[_constraint];
215
216#ifdef GRAPHITE2_TELEMETRY
217 telemetry::count_bytes(_data_size + (_instr_count+1)*sizeof(instr));
218#endif
219}
220
221Machine::Code::~Code() throw ()
222{
223 if (_own)
224 release_buffers();
225}
226
227
228bool Machine::Code::decoder::load(const byte * bc, const byte * bc_end)
229{
230 _max.bytecode = bc_end;
231 while (bc < bc_end)
232 {
233 const opcode opc = fetch_opcode(bc++);
234 if (opc == vm::MAX_OPCODE)
235 return false;
236
237 analyse_opcode(opc, reinterpret_cast<const int8 *>(bc));
238
239 if (!emit_opcode(opc, bc))
240 return false;
241 }
242
243 return bool(_code);
244}
245
246// Validation check and fixups.
247//
248
249opcode Machine::Code::decoder::fetch_opcode(const byte * bc)
250{
251 const byte opc = *bc++;
252
253 // Do some basic sanity checks based on what we know about the opcode
254 if (!validate_opcode(opc, bc)) return MAX_OPCODE;
255
256 // And check its arguments as far as possible
257 switch (opcode(opc))
258 {
259 case NOP :
260 break;
261 case PUSH_BYTE :
262 case PUSH_BYTEU :
263 case PUSH_SHORT :
264 case PUSH_SHORTU :
265 case PUSH_LONG :
266 ++_stack_depth;
267 break;
268 case ADD :
269 case SUB :
270 case MUL :
271 case DIV :
272 case MIN_ :
273 case MAX_ :
274 case AND :
275 case OR :
276 case EQUAL :
277 case NOT_EQ :
278 case LESS :
279 case GTR :
280 case LESS_EQ :
281 case GTR_EQ :
282 case BITOR :
283 case BITAND :
284 if (--_stack_depth <= 0)
285 failure(underfull_stack);
286 break;
287 case NEG :
288 case TRUNC8 :
289 case TRUNC16 :
290 case NOT :
291 case BITNOT :
292 case BITSET :
293 if (_stack_depth <= 0)
294 failure(underfull_stack);
295 break;
296 case COND :
297 _stack_depth -= 2;
298 if (_stack_depth <= 0)
299 failure(underfull_stack);
300 break;
301 case NEXT_N : // runtime checked
302 break;
303 case NEXT :
304 case COPY_NEXT :
305 ++_out_index;
306 if (_out_index < -1 || _out_index > _out_length || _slotref > _max.rule_length)
307 failure(out_of_range_data);
308 break;
309 case PUT_GLYPH_8BIT_OBS :
310 valid_upto(_max.classes, bc[0]);
311 test_context();
312 break;
313 case PUT_SUBS_8BIT_OBS :
314 test_ref(int8(bc[0]));
315 valid_upto(_max.classes, bc[1]);
316 valid_upto(_max.classes, bc[2]);
317 test_context();
318 break;
319 case PUT_COPY :
320 test_ref(int8(bc[0]));
321 test_context();
322 break;
323 case INSERT :
324 if (_passtype >= PASS_TYPE_POSITIONING)
325 failure(invalid_opcode);
326 ++_out_length;
327 if (_out_index < 0) ++_out_index;
328 if (_out_index < -1 || _out_index >= _out_length)
329 failure(out_of_range_data);
330 break;
331 case DELETE :
332 if (_passtype >= PASS_TYPE_POSITIONING)
333 failure(invalid_opcode);
334 if (_out_index < _max.pre_context)
335 failure(out_of_range_data);
336 --_out_index;
337 --_out_length;
338 if (_out_index < -1 || _out_index > _out_length)
339 failure(out_of_range_data);
340 break;
341 case ASSOC :
342 if (bc[0] == 0)
343 failure(out_of_range_data);
344 for (uint8 num = bc[0]; num; --num)
345 test_ref(int8(bc[num]));
346 test_context();
347 break;
348 case CNTXT_ITEM :
349 valid_upto(_max.rule_length, _max.pre_context + int8(bc[0]));
350 if (bc + 2 + bc[1] >= _max.bytecode) failure(jump_past_end);
351 if (_in_ctxt_item) failure(nested_context_item);
352 break;
353 case ATTR_SET :
354 case ATTR_ADD :
355 case ATTR_SUB :
356 case ATTR_SET_SLOT :
357 if (--_stack_depth < 0)
358 failure(underfull_stack);
359 valid_upto(gr_slatMax, bc[0]);
360 if (attrCode(bc[0]) == gr_slatUserDefn) // use IATTR for user attributes
361 failure(out_of_range_data);
362 test_attr(attrCode(bc[0]));
363 test_context();
364 break;
365 case IATTR_SET_SLOT :
366 if (--_stack_depth < 0)
367 failure(underfull_stack);
368 if (valid_upto(gr_slatMax, bc[0]))
369 valid_upto(_max.attrid[bc[0]], bc[1]);
370 test_attr(attrCode(bc[0]));
371 test_context();
372 break;
373 case PUSH_SLOT_ATTR :
374 ++_stack_depth;
375 valid_upto(gr_slatMax, bc[0]);
376 test_ref(int8(bc[1]));
377 if (attrCode(bc[0]) == gr_slatUserDefn) // use IATTR for user attributes
378 failure(out_of_range_data);
379 test_attr(attrCode(bc[0]));
380 break;
381 case PUSH_GLYPH_ATTR_OBS :
382 case PUSH_ATT_TO_GATTR_OBS :
383 ++_stack_depth;
384 valid_upto(_max.glyf_attrs, bc[0]);
385 test_ref(int8(bc[1]));
386 break;
387 case PUSH_ATT_TO_GLYPH_METRIC :
388 case PUSH_GLYPH_METRIC :
389 ++_stack_depth;
390 valid_upto(kgmetDescent, bc[0]);
391 test_ref(int8(bc[1]));
392 // level: dp[2] no check necessary
393 break;
394 case PUSH_FEAT :
395 ++_stack_depth;
396 valid_upto(_max.features, bc[0]);
397 test_ref(int8(bc[1]));
398 break;
399 case PUSH_ISLOT_ATTR :
400 ++_stack_depth;
401 if (valid_upto(gr_slatMax, bc[0]))
402 {
403 test_ref(int8(bc[1]));
404 valid_upto(_max.attrid[bc[0]], bc[2]);
405 }
406 test_attr(attrCode(bc[0]));
407 break;
408 case PUSH_IGLYPH_ATTR :// not implemented
409 ++_stack_depth;
410 break;
411 case POP_RET :
412 if (--_stack_depth < 0)
413 failure(underfull_stack);
414 GR_FALLTHROUGH;
415 // no break
416 case RET_ZERO :
417 case RET_TRUE :
418 break;
419 case IATTR_SET :
420 case IATTR_ADD :
421 case IATTR_SUB :
422 if (--_stack_depth < 0)
423 failure(underfull_stack);
424 if (valid_upto(gr_slatMax, bc[0]))
425 valid_upto(_max.attrid[bc[0]], bc[1]);
426 test_attr(attrCode(bc[0]));
427 test_context();
428 break;
429 case PUSH_PROC_STATE : // dummy: dp[0] no check necessary
430 case PUSH_VERSION :
431 ++_stack_depth;
432 break;
433 case PUT_SUBS :
434 test_ref(int8(bc[0]));
435 valid_upto(_max.classes, uint16(bc[1]<< 8) | bc[2]);
436 valid_upto(_max.classes, uint16(bc[3]<< 8) | bc[4]);
437 test_context();
438 break;
439 case PUT_SUBS2 : // not implemented
440 case PUT_SUBS3 : // not implemented
441 break;
442 case PUT_GLYPH :
443 valid_upto(_max.classes, uint16(bc[0]<< 8) | bc[1]);
444 test_context();
445 break;
446 case PUSH_GLYPH_ATTR :
447 case PUSH_ATT_TO_GLYPH_ATTR :
448 ++_stack_depth;
449 valid_upto(_max.glyf_attrs, uint16(bc[0]<< 8) | bc[1]);
450 test_ref(int8(bc[2]));
451 break;
452 case SET_FEAT :
453 valid_upto(_max.features, bc[0]);
454 test_ref(int8(bc[1]));
455 break;
456 default:
457 failure(invalid_opcode);
458 break;
459 }
460
461 return bool(_code) ? opcode(opc) : MAX_OPCODE;
462}
463
464
465void Machine::Code::decoder::analyse_opcode(const opcode opc, const int8 * arg) throw()
466{
467 switch (opc)
468 {
469 case DELETE :
470 _code._delete = true;
471 break;
472 case ASSOC :
473 set_changed(0);
474// for (uint8 num = arg[0]; num; --num)
475// _analysis.set_noref(num);
476 break;
477 case PUT_GLYPH_8BIT_OBS :
478 case PUT_GLYPH :
479 _code._modify = true;
480 set_changed(0);
481 break;
482 case ATTR_SET :
483 case ATTR_ADD :
484 case ATTR_SUB :
485 case ATTR_SET_SLOT :
486 case IATTR_SET_SLOT :
487 case IATTR_SET :
488 case IATTR_ADD :
489 case IATTR_SUB :
490 set_noref(0);
491 break;
492 case NEXT :
493 case COPY_NEXT :
494 ++_slotref;
495 _contexts[_slotref] = context(uint8(_code._instr_count+1));
496 // if (_analysis.slotref > _analysis.max_ref) _analysis.max_ref = _analysis.slotref;
497 break;
498 case INSERT :
499 if (_slotref >= 0) --_slotref;
500 _code._modify = true;
501 break;
502 case PUT_SUBS_8BIT_OBS : // slotref on 1st parameter
503 case PUT_SUBS :
504 _code._modify = true;
505 set_changed(0);
506 GR_FALLTHROUGH;
507 // no break
508 case PUT_COPY :
509 if (arg[0] != 0) { set_changed(0); _code._modify = true; }
510 set_ref(arg[0]);
511 break;
512 case PUSH_GLYPH_ATTR_OBS :
513 case PUSH_SLOT_ATTR :
514 case PUSH_GLYPH_METRIC :
515 case PUSH_ATT_TO_GATTR_OBS :
516 case PUSH_ATT_TO_GLYPH_METRIC :
517 case PUSH_ISLOT_ATTR :
518 case PUSH_FEAT :
519 case SET_FEAT :
520 set_ref(arg[1]);
521 break;
522 case PUSH_ATT_TO_GLYPH_ATTR :
523 case PUSH_GLYPH_ATTR :
524 set_ref(arg[2]);
525 break;
526 default:
527 break;
528 }
529}
530
531
532bool Machine::Code::decoder::emit_opcode(opcode opc, const byte * & bc)
533{
534 const opcode_t * op_to_fn = Machine::getOpcodeTable();
535 const opcode_t & op = op_to_fn[opc];
536 if (op.impl[_code._constraint] == 0)
537 {
538 failure(unimplemented_opcode_used);
539 return false;
540 }
541
542 const size_t param_sz = op.param_sz == VARARGS ? bc[0] + 1 : op.param_sz;
543
544 // Add this instruction
545 *_instr++ = op.impl[_code._constraint];
546 ++_code._instr_count;
547
548 // Grab the parameters
549 if (param_sz) {
550 memcpy(_data, bc, param_sz * sizeof(byte));
551 bc += param_sz;
552 _data += param_sz;
553 _code._data_size += param_sz;
554 }
555
556 // recursively decode a context item so we can split the skip into
557 // instruction and data portions.
558 if (opc == CNTXT_ITEM)
559 {
560 assert(_out_index == 0);
561 _in_ctxt_item = true;
562 _out_index = _max.pre_context + int8(_data[-2]);
563 _slotref = int8(_data[-2]);
564 _out_length = _max.rule_length;
565
566 const size_t ctxt_start = _code._instr_count;
567 byte & instr_skip = _data[-1];
568 byte & data_skip = *_data++;
569 ++_code._data_size;
570 const byte *curr_end = _max.bytecode;
571
572 if (load(bc, bc + instr_skip))
573 {
574 bc += instr_skip;
575 data_skip = instr_skip - byte(_code._instr_count - ctxt_start);
576 instr_skip = byte(_code._instr_count - ctxt_start);
577 _max.bytecode = curr_end;
578
579 _out_length = 1;
580 _out_index = 0;
581 _slotref = 0;
582 _in_ctxt_item = false;
583 }
584 else
585 {
586 _out_index = 0;
587 _slotref = 0;
588 return false;
589 }
590 }
591
592 return bool(_code);
593}
594
595
596void Machine::Code::decoder::apply_analysis(instr * const code, instr * code_end)
597{
598 // insert TEMP_COPY commands for slots that need them (that change and are referenced later)
599 int tempcount = 0;
600 if (_code._constraint) return;
601
602 const instr temp_copy = Machine::getOpcodeTable()[TEMP_COPY].impl[0];
603 for (const context * c = _contexts, * const ce = c + _slotref; c < ce; ++c)
604 {
605 if (!c->flags.referenced || !c->flags.changed) continue;
606
607 instr * const tip = code + c->codeRef + tempcount;
608 memmove(tip+1, tip, (code_end - tip) * sizeof(instr));
609 *tip = temp_copy;
610 ++code_end;
611 ++tempcount;
612 _code._delete = true;
613 }
614
615 _code._instr_count = code_end - code;
616}
617
618
619inline
620bool Machine::Code::decoder::validate_opcode(const byte opc, const byte * const bc)
621{
622 if (opc >= MAX_OPCODE)
623 {
624 failure(invalid_opcode);
625 return false;
626 }
627 const opcode_t & op = Machine::getOpcodeTable()[opc];
628 if (op.impl[_code._constraint] == 0)
629 {
630 failure(unimplemented_opcode_used);
631 return false;
632 }
633 if (op.param_sz == VARARGS && bc >= _max.bytecode)
634 {
635 failure(arguments_exhausted);
636 return false;
637 }
638 const size_t param_sz = op.param_sz == VARARGS ? bc[0] + 1 : op.param_sz;
639 if (bc - 1 + param_sz >= _max.bytecode)
640 {
641 failure(arguments_exhausted);
642 return false;
643 }
644 return true;
645}
646
647
648bool Machine::Code::decoder::valid_upto(const uint16 limit, const uint16 x) const throw()
649{
650 const bool t = (limit != 0) && (x < limit);
651 if (!t) failure(out_of_range_data);
652 return t;
653}
654
655inline
656bool Machine::Code::decoder::test_ref(int8 index) const throw()
657{
658 if (_code._constraint && !_in_ctxt_item)
659 {
660 if (index > 0 || -index > _max.pre_context)
661 {
662 failure(out_of_range_data);
663 return false;
664 }
665 }
666 else
667 {
668 if (_max.rule_length == 0
669 || (_slotref + _max.pre_context + index >= _max.rule_length)
670 || (_slotref + _max.pre_context + index < 0))
671 {
672 failure(out_of_range_data);
673 return false;
674 }
675 }
676 return true;
677}
678
679bool Machine::Code::decoder::test_context() const throw()
680{
681 if (_out_index >= _out_length || _out_index < 0 || _slotref >= NUMCONTEXTS - 1)
682 {
683 failure(out_of_range_data);
684 return false;
685 }
686 return true;
687}
688
689bool Machine::Code::decoder::test_attr(attrCode) const throw()
690{
691#if 0 // This code is coming but causes backward compatibility problems.
692 if (_passtype < PASS_TYPE_POSITIONING)
693 {
694 if (attr != gr_slatBreak && attr != gr_slatDir && attr != gr_slatUserDefn
695 && attr != gr_slatCompRef)
696 {
697 failure(out_of_range_data);
698 return false;
699 }
700 }
701#endif
702 return true;
703}
704
705inline
706void Machine::Code::failure(const status_t s) throw() {
707 release_buffers();
708 _status = s;
709}
710
711
712inline
713void Machine::Code::decoder::set_ref(int index) throw() {
714 if (index + _slotref < 0 || index + _slotref >= NUMCONTEXTS) return;
715 _contexts[index + _slotref].flags.referenced = true;
716 if (index + _slotref > _max_ref) _max_ref = index + _slotref;
717}
718
719
720inline
721void Machine::Code::decoder::set_noref(int index) throw() {
722 if (index + _slotref < 0 || index + _slotref >= NUMCONTEXTS) return;
723 if (index + _slotref > _max_ref) _max_ref = index + _slotref;
724}
725
726
727inline
728void Machine::Code::decoder::set_changed(int index) throw() {
729 if (index + _slotref < 0 || index + _slotref >= NUMCONTEXTS) return;
730 _contexts[index + _slotref].flags.changed= true;
731 if (index + _slotref > _max_ref) _max_ref = index + _slotref;
732}
733
734
735void Machine::Code::release_buffers() throw()
736{
737 if (_own)
738 free(_code);
739 _code = 0;
740 _data = 0;
741 _own = false;
742}
743
744
745int32 Machine::Code::run(Machine & m, slotref * & map) const
746{
747// assert(_own);
748 assert(*this); // Check we are actually runnable
749
750 if (m.slotMap().size() <= size_t(_max_ref + m.slotMap().context())
751 || m.slotMap()[_max_ref + m.slotMap().context()] == 0)
752 {
753 m._status = Machine::slot_offset_out_bounds;
754 return 1;
755// return m.run(_code, _data, map);
756 }
757
758 return m.run(_code, _data, map);
759}
760