1 | // -*- mode: C++ -*- |
2 | |
3 | // Copyright (c) 2010, Google Inc. |
4 | // All rights reserved. |
5 | // |
6 | // Redistribution and use in source and binary forms, with or without |
7 | // modification, are permitted provided that the following conditions are |
8 | // met: |
9 | // |
10 | // * Redistributions of source code must retain the above copyright |
11 | // notice, this list of conditions and the following disclaimer. |
12 | // * Redistributions in binary form must reproduce the above |
13 | // copyright notice, this list of conditions and the following disclaimer |
14 | // in the documentation and/or other materials provided with the |
15 | // distribution. |
16 | // * Neither the name of Google Inc. nor the names of its |
17 | // contributors may be used to endorse or promote products derived from |
18 | // this software without specific prior written permission. |
19 | // |
20 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
21 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
22 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
23 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
24 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
25 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
26 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
27 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
28 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
29 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
30 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
31 | |
32 | // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> |
33 | |
34 | // macho_reader.h: A class for parsing Mach-O files. |
35 | |
36 | #ifndef BREAKPAD_COMMON_MAC_MACHO_READER_H_ |
37 | #define BREAKPAD_COMMON_MAC_MACHO_READER_H_ |
38 | |
39 | #include <mach-o/loader.h> |
40 | #include <mach-o/fat.h> |
41 | #include <stdint.h> |
42 | #include <stdlib.h> |
43 | #include <unistd.h> |
44 | |
45 | #include <map> |
46 | #include <string> |
47 | #include <vector> |
48 | |
49 | #include "common/byte_cursor.h" |
50 | #include "common/mac/super_fat_arch.h" |
51 | |
52 | namespace google_breakpad { |
53 | namespace mach_o { |
54 | |
55 | using std::map; |
56 | using std::string; |
57 | using std::vector; |
58 | |
59 | // The Mac headers don't specify particular types for these groups of |
60 | // constants, but defining them here provides some documentation |
61 | // value. We also give them the same width as the fields in which |
62 | // they appear, which makes them a bit easier to use with ByteCursors. |
63 | typedef uint32_t Magic; |
64 | typedef uint32_t FileType; |
65 | typedef uint32_t FileFlags; |
66 | typedef uint32_t LoadCommandType; |
67 | typedef uint32_t SegmentFlags; |
68 | typedef uint32_t SectionFlags; |
69 | |
70 | // A parser for fat binary files, used to store universal binaries. |
71 | // When applied to a (non-fat) Mach-O file, this behaves as if the |
72 | // file were a fat file containing a single object file. |
73 | class FatReader { |
74 | public: |
75 | |
76 | // A class for reporting errors found while parsing fat binary files. The |
77 | // default definitions of these methods print messages to stderr. |
78 | class Reporter { |
79 | public: |
80 | // Create a reporter that attributes problems to |filename|. |
81 | explicit Reporter(const string& filename) : filename_(filename) { } |
82 | |
83 | virtual ~Reporter() { } |
84 | |
85 | // The data does not begin with a fat binary or Mach-O magic number. |
86 | // This is a fatal error. |
87 | virtual void (); |
88 | |
89 | // The Mach-O fat binary file ends abruptly, without enough space |
90 | // to contain an object file it claims is present. |
91 | virtual void MisplacedObjectFile(); |
92 | |
93 | // The file ends abruptly: either it is not large enough to hold a |
94 | // complete header, or the header implies that contents are present |
95 | // beyond the actual end of the file. |
96 | virtual void TooShort(); |
97 | |
98 | private: |
99 | // The filename to which the reader should attribute problems. |
100 | string filename_; |
101 | }; |
102 | |
103 | // Create a fat binary file reader that uses |reporter| to report problems. |
104 | explicit FatReader(Reporter* reporter) : reporter_(reporter) { } |
105 | |
106 | // Read the |size| bytes at |buffer| as a fat binary file. On success, |
107 | // return true; on failure, report the problem to reporter_ and return |
108 | // false. |
109 | // |
110 | // If the data is a plain Mach-O file, rather than a fat binary file, |
111 | // then the reader behaves as if it had found a fat binary file whose |
112 | // single object file is the Mach-O file. |
113 | bool Read(const uint8_t* buffer, size_t size); |
114 | |
115 | // Return an array of 'SuperFatArch' structures describing the |
116 | // object files present in this fat binary file. Set |size| to the |
117 | // number of elements in the array. |
118 | // |
119 | // Assuming Read returned true, the entries are validated: it is safe to |
120 | // assume that the offsets and sizes in each SuperFatArch refer to subranges |
121 | // of the bytes passed to Read. |
122 | // |
123 | // If there are no object files in this fat binary, then this |
124 | // function can return NULL. |
125 | // |
126 | // The array is owned by this FatReader instance; it will be freed when |
127 | // this FatReader is destroyed. |
128 | // |
129 | // This function returns a C-style array instead of a vector to make it |
130 | // possible to use the result with OS X functions like NXFindBestFatArch, |
131 | // so that the symbol dumper will behave consistently with other OS X |
132 | // utilities that work with fat binaries. |
133 | const SuperFatArch* object_files(size_t* count) const { |
134 | *count = object_files_.size(); |
135 | if (object_files_.size() > 0) |
136 | return &object_files_[0]; |
137 | return NULL; |
138 | } |
139 | |
140 | private: |
141 | // We use this to report problems parsing the file's contents. (WEAK) |
142 | Reporter* reporter_; |
143 | |
144 | // The contents of the fat binary or Mach-O file we're parsing. We do not |
145 | // own the storage it refers to. |
146 | ByteBuffer buffer_; |
147 | |
148 | // The magic number of this binary, in host byte order. |
149 | Magic magic_; |
150 | |
151 | // The list of object files in this binary. |
152 | // object_files_.size() == fat_header.nfat_arch |
153 | vector<SuperFatArch> object_files_; |
154 | }; |
155 | |
156 | // A segment in a Mach-O file. All these fields have been byte-swapped as |
157 | // appropriate for use by the executing architecture. |
158 | struct Segment { |
159 | // The ByteBuffers below point into the bytes passed to the Reader that |
160 | // created this Segment. |
161 | |
162 | ByteBuffer section_list; // This segment's section list. |
163 | ByteBuffer contents; // This segment's contents. |
164 | |
165 | // This segment's name. |
166 | string name; |
167 | |
168 | // The address at which this segment should be loaded in memory. If |
169 | // bits_64 is false, only the bottom 32 bits of this value are valid. |
170 | uint64_t vmaddr; |
171 | |
172 | // The size of this segment when loaded into memory. This may be larger |
173 | // than contents.Size(), in which case the extra area will be |
174 | // initialized with zeros. If bits_64 is false, only the bottom 32 bits |
175 | // of this value are valid. |
176 | uint64_t vmsize; |
177 | |
178 | // The file offset and size of the segment in the Mach-O image. |
179 | uint64_t fileoff; |
180 | uint64_t filesize; |
181 | |
182 | // The maximum and initial VM protection of this segment's contents. |
183 | uint32_t maxprot; |
184 | uint32_t initprot; |
185 | |
186 | // The number of sections in section_list. |
187 | uint32_t nsects; |
188 | |
189 | // Flags describing this segment, from SegmentFlags. |
190 | uint32_t flags; |
191 | |
192 | // True if this is a 64-bit section; false if it is a 32-bit section. |
193 | bool bits_64; |
194 | }; |
195 | |
196 | // A section in a Mach-O file. All these fields have been byte-swapped as |
197 | // appropriate for use by the executing architecture. |
198 | struct Section { |
199 | // This section's contents. This points into the bytes passed to the |
200 | // Reader that created this Section. |
201 | ByteBuffer contents; |
202 | |
203 | // This section's name. |
204 | string section_name; // section[_64].sectname |
205 | // The name of the segment this section belongs to. |
206 | string segment_name; // section[_64].segname |
207 | |
208 | // The address at which this section's contents should be loaded in |
209 | // memory. If bits_64 is false, only the bottom 32 bits of this value |
210 | // are valid. |
211 | uint64_t address; |
212 | |
213 | // The contents of this section should be loaded into memory at an |
214 | // address which is a multiple of (two raised to this power). |
215 | uint32_t align; |
216 | |
217 | // Flags from SectionFlags describing the section's contents. |
218 | uint32_t flags; |
219 | |
220 | // We don't support reading relocations yet. |
221 | |
222 | // True if this is a 64-bit section; false if it is a 32-bit section. |
223 | bool bits_64; |
224 | }; |
225 | |
226 | // A map from section names to Sections. |
227 | typedef map<string, Section> SectionMap; |
228 | |
229 | // A reader for a Mach-O file. |
230 | // |
231 | // This does not handle fat binaries; see FatReader above. FatReader |
232 | // provides a friendly interface for parsing data that could be either a |
233 | // fat binary or a Mach-O file. |
234 | class Reader { |
235 | public: |
236 | |
237 | // A class for reporting errors found while parsing Mach-O files. The |
238 | // default definitions of these member functions print messages to |
239 | // stderr. |
240 | class Reporter { |
241 | public: |
242 | // Create a reporter that attributes problems to |filename|. |
243 | explicit Reporter(const string& filename) : filename_(filename) { } |
244 | virtual ~Reporter() { } |
245 | |
246 | // Reporter functions for fatal errors return void; the reader will |
247 | // definitely return an error to its caller after calling them |
248 | |
249 | // The data does not begin with a Mach-O magic number, or the magic |
250 | // number does not match the expected value for the cpu architecture. |
251 | // This is a fatal error. |
252 | virtual void (); |
253 | |
254 | // The data contained in a Mach-O fat binary (|cpu_type|, |cpu_subtype|) |
255 | // does not match the expected CPU architecture |
256 | // (|expected_cpu_type|, |expected_cpu_subtype|). |
257 | virtual void CPUTypeMismatch(cpu_type_t cpu_type, |
258 | cpu_subtype_t cpu_subtype, |
259 | cpu_type_t expected_cpu_type, |
260 | cpu_subtype_t expected_cpu_subtype); |
261 | |
262 | // The file ends abruptly: either it is not large enough to hold a |
263 | // complete header, or the header implies that contents are present |
264 | // beyond the actual end of the file. |
265 | virtual void (); |
266 | |
267 | // The file's load command region, as given in the Mach-O header, is |
268 | // too large for the file. |
269 | virtual void LoadCommandRegionTruncated(); |
270 | |
271 | // The file's Mach-O header claims the file contains |claimed| load |
272 | // commands, but the I'th load command, of type |type|, extends beyond |
273 | // the end of the load command region, as given by the Mach-O header. |
274 | // If |type| is zero, the command's type was unreadable. |
275 | virtual void LoadCommandsOverrun(size_t claimed, size_t i, |
276 | LoadCommandType type); |
277 | |
278 | // The contents of the |i|'th load command, of type |type|, extend beyond |
279 | // the size given in the load command's header. |
280 | virtual void LoadCommandTooShort(size_t i, LoadCommandType type); |
281 | |
282 | // The LC_SEGMENT or LC_SEGMENT_64 load command for the segment named |
283 | // |name| is too short to hold the sections that its header says it does. |
284 | // (This more specific than LoadCommandTooShort.) |
285 | virtual void SectionsMissing(const string& name); |
286 | |
287 | // The segment named |name| claims that its contents lie beyond the end |
288 | // of the file. |
289 | virtual void MisplacedSegmentData(const string& name); |
290 | |
291 | // The section named |section| in the segment named |segment| claims that |
292 | // its contents do not lie entirely within the segment. |
293 | virtual void MisplacedSectionData(const string& section, |
294 | const string& segment); |
295 | |
296 | // The LC_SYMTAB command claims that symbol table contents are located |
297 | // beyond the end of the file. |
298 | virtual void MisplacedSymbolTable(); |
299 | |
300 | // An attempt was made to read a Mach-O file of the unsupported |
301 | // CPU architecture |cpu_type|. |
302 | virtual void UnsupportedCPUType(cpu_type_t cpu_type); |
303 | |
304 | private: |
305 | string filename_; |
306 | }; |
307 | |
308 | // A handler for sections parsed from a segment. The WalkSegmentSections |
309 | // member function accepts an instance of this class, and applies it to |
310 | // each section defined in a given segment. |
311 | class SectionHandler { |
312 | public: |
313 | virtual ~SectionHandler() { } |
314 | |
315 | // Called to report that the segment's section list contains |section|. |
316 | // This should return true if the iteration should continue, or false |
317 | // if it should stop. |
318 | virtual bool HandleSection(const Section& section) = 0; |
319 | }; |
320 | |
321 | // A handler for the load commands in a Mach-O file. |
322 | class LoadCommandHandler { |
323 | public: |
324 | LoadCommandHandler() { } |
325 | virtual ~LoadCommandHandler() { } |
326 | |
327 | // When called from WalkLoadCommands, the following handler functions |
328 | // should return true if they wish to continue iterating over the load |
329 | // command list, or false if they wish to stop iterating. |
330 | // |
331 | // When called from LoadCommandIterator::Handle or Reader::Handle, |
332 | // these functions' return values are simply passed through to Handle's |
333 | // caller. |
334 | // |
335 | // The definitions provided by this base class simply return true; the |
336 | // default is to silently ignore sections whose member functions the |
337 | // subclass doesn't override. |
338 | |
339 | // COMMAND is load command we don't recognize. We provide only the |
340 | // command type and a ByteBuffer enclosing the command's data (If we |
341 | // cannot parse the command type or its size, we call |
342 | // reporter_->IncompleteLoadCommand instead.) |
343 | virtual bool UnknownCommand(LoadCommandType type, |
344 | const ByteBuffer& contents) { |
345 | return true; |
346 | } |
347 | |
348 | // The load command is LC_SEGMENT or LC_SEGMENT_64, defining a segment |
349 | // with the properties given in |segment|. |
350 | virtual bool SegmentCommand(const Segment& segment) { |
351 | return true; |
352 | } |
353 | |
354 | // The load command is LC_SYMTAB. |entries| holds the array of nlist |
355 | // entries, and |names| holds the strings the entries refer to. |
356 | virtual bool SymtabCommand(const ByteBuffer& entries, |
357 | const ByteBuffer& names) { |
358 | return true; |
359 | } |
360 | |
361 | // Add handler functions for more load commands here as needed. |
362 | }; |
363 | |
364 | // Create a Mach-O file reader that reports problems to |reporter|. |
365 | explicit Reader(Reporter* reporter) |
366 | : reporter_(reporter) { } |
367 | |
368 | // Read the given data as a Mach-O file. The reader retains pointers |
369 | // into the data passed, so the data should live as long as the reader |
370 | // does. On success, return true; on failure, return false. |
371 | // |
372 | // At most one of these functions should be invoked once on each Reader |
373 | // instance. |
374 | bool Read(const uint8_t* buffer, |
375 | size_t size, |
376 | cpu_type_t expected_cpu_type, |
377 | cpu_subtype_t expected_cpu_subtype); |
378 | bool Read(const ByteBuffer& buffer, |
379 | cpu_type_t expected_cpu_type, |
380 | cpu_subtype_t expected_cpu_subtype) { |
381 | return Read(buffer.start, |
382 | buffer.Size(), |
383 | expected_cpu_type, |
384 | expected_cpu_subtype); |
385 | } |
386 | |
387 | // Return this file's characteristics, as found in the Mach-O header. |
388 | cpu_type_t cpu_type() const { return cpu_type_; } |
389 | cpu_subtype_t cpu_subtype() const { return cpu_subtype_; } |
390 | FileType file_type() const { return file_type_; } |
391 | FileFlags flags() const { return flags_; } |
392 | |
393 | // Return true if this is a 64-bit Mach-O file, false if it is a 32-bit |
394 | // Mach-O file. |
395 | bool bits_64() const { return bits_64_; } |
396 | |
397 | // Return true if this is a big-endian Mach-O file, false if it is |
398 | // little-endian. |
399 | bool big_endian() const { return big_endian_; } |
400 | |
401 | // Apply |handler| to each load command in this Mach-O file, stopping when |
402 | // a handler function returns false. If we encounter a malformed load |
403 | // command, report it via reporter_ and return false. Return true if all |
404 | // load commands were parseable and all handlers returned true. |
405 | bool WalkLoadCommands(LoadCommandHandler* handler) const; |
406 | |
407 | // Set |segment| to describe the segment named |name|, if present. If |
408 | // found, |segment|'s byte buffers refer to a subregion of the bytes |
409 | // passed to Read. If we find the section, return true; otherwise, |
410 | // return false. |
411 | bool FindSegment(const string& name, Segment* segment) const; |
412 | |
413 | // Apply |handler| to each section defined in |segment|. If |handler| returns |
414 | // false, stop iterating and return false. If all calls to |handler| return |
415 | // true and we reach the end of the section list, return true. |
416 | bool WalkSegmentSections(const Segment& segment, SectionHandler* handler) |
417 | const; |
418 | |
419 | // Clear |section_map| and then populate it with a map of the sections |
420 | // in |segment|, from section names to Section structures. |
421 | // Each Section's contents refer to bytes in |segment|'s contents. |
422 | // On success, return true; if a problem occurs, report it and return false. |
423 | bool MapSegmentSections(const Segment& segment, SectionMap* section_map) |
424 | const; |
425 | |
426 | private: |
427 | // Used internally. |
428 | class SegmentFinder; |
429 | class SectionMapper; |
430 | |
431 | // We use this to report problems parsing the file's contents. (WEAK) |
432 | Reporter* reporter_; |
433 | |
434 | // The contents of the Mach-O file we're parsing. We do not own the |
435 | // storage it refers to. |
436 | ByteBuffer buffer_; |
437 | |
438 | // True if this file is big-endian. |
439 | bool big_endian_; |
440 | |
441 | // True if this file is a 64-bit Mach-O file. |
442 | bool bits_64_; |
443 | |
444 | // This file's cpu type and subtype. |
445 | cpu_type_t cpu_type_; // mach_header[_64].cputype |
446 | cpu_subtype_t cpu_subtype_; // mach_header[_64].cpusubtype |
447 | |
448 | // This file's type. |
449 | FileType file_type_; // mach_header[_64].filetype |
450 | |
451 | // The region of buffer_ occupied by load commands. |
452 | ByteBuffer load_commands_; |
453 | |
454 | // The number of load commands in load_commands_. |
455 | uint32_t load_command_count_; // mach_header[_64].ncmds |
456 | |
457 | // This file's header flags. |
458 | FileFlags flags_; |
459 | }; |
460 | |
461 | } // namespace mach_o |
462 | } // namespace google_breakpad |
463 | |
464 | #endif // BREAKPAD_COMMON_MAC_MACHO_READER_H_ |
465 | |