1// -*- mode: C++ -*-
2
3// Copyright (c) 2010, Google Inc.
4// All rights reserved.
5//
6// Redistribution and use in source and binary forms, with or without
7// modification, are permitted provided that the following conditions are
8// met:
9//
10// * Redistributions of source code must retain the above copyright
11// notice, this list of conditions and the following disclaimer.
12// * Redistributions in binary form must reproduce the above
13// copyright notice, this list of conditions and the following disclaimer
14// in the documentation and/or other materials provided with the
15// distribution.
16// * Neither the name of Google Inc. nor the names of its
17// contributors may be used to endorse or promote products derived from
18// this software without specific prior written permission.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
32// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
33
34// macho_reader.h: A class for parsing Mach-O files.
35
36#ifndef BREAKPAD_COMMON_MAC_MACHO_READER_H_
37#define BREAKPAD_COMMON_MAC_MACHO_READER_H_
38
39#include <mach-o/loader.h>
40#include <mach-o/fat.h>
41#include <stdint.h>
42#include <stdlib.h>
43#include <unistd.h>
44
45#include <map>
46#include <string>
47#include <vector>
48
49#include "common/byte_cursor.h"
50#include "common/mac/super_fat_arch.h"
51
52namespace google_breakpad {
53namespace mach_o {
54
55using std::map;
56using std::string;
57using std::vector;
58
59// The Mac headers don't specify particular types for these groups of
60// constants, but defining them here provides some documentation
61// value. We also give them the same width as the fields in which
62// they appear, which makes them a bit easier to use with ByteCursors.
63typedef uint32_t Magic;
64typedef uint32_t FileType;
65typedef uint32_t FileFlags;
66typedef uint32_t LoadCommandType;
67typedef uint32_t SegmentFlags;
68typedef uint32_t SectionFlags;
69
70// A parser for fat binary files, used to store universal binaries.
71// When applied to a (non-fat) Mach-O file, this behaves as if the
72// file were a fat file containing a single object file.
73class FatReader {
74 public:
75
76 // A class for reporting errors found while parsing fat binary files. The
77 // default definitions of these methods print messages to stderr.
78 class Reporter {
79 public:
80 // Create a reporter that attributes problems to |filename|.
81 explicit Reporter(const string& filename) : filename_(filename) { }
82
83 virtual ~Reporter() { }
84
85 // The data does not begin with a fat binary or Mach-O magic number.
86 // This is a fatal error.
87 virtual void BadHeader();
88
89 // The Mach-O fat binary file ends abruptly, without enough space
90 // to contain an object file it claims is present.
91 virtual void MisplacedObjectFile();
92
93 // The file ends abruptly: either it is not large enough to hold a
94 // complete header, or the header implies that contents are present
95 // beyond the actual end of the file.
96 virtual void TooShort();
97
98 private:
99 // The filename to which the reader should attribute problems.
100 string filename_;
101 };
102
103 // Create a fat binary file reader that uses |reporter| to report problems.
104 explicit FatReader(Reporter* reporter) : reporter_(reporter) { }
105
106 // Read the |size| bytes at |buffer| as a fat binary file. On success,
107 // return true; on failure, report the problem to reporter_ and return
108 // false.
109 //
110 // If the data is a plain Mach-O file, rather than a fat binary file,
111 // then the reader behaves as if it had found a fat binary file whose
112 // single object file is the Mach-O file.
113 bool Read(const uint8_t* buffer, size_t size);
114
115 // Return an array of 'SuperFatArch' structures describing the
116 // object files present in this fat binary file. Set |size| to the
117 // number of elements in the array.
118 //
119 // Assuming Read returned true, the entries are validated: it is safe to
120 // assume that the offsets and sizes in each SuperFatArch refer to subranges
121 // of the bytes passed to Read.
122 //
123 // If there are no object files in this fat binary, then this
124 // function can return NULL.
125 //
126 // The array is owned by this FatReader instance; it will be freed when
127 // this FatReader is destroyed.
128 //
129 // This function returns a C-style array instead of a vector to make it
130 // possible to use the result with OS X functions like NXFindBestFatArch,
131 // so that the symbol dumper will behave consistently with other OS X
132 // utilities that work with fat binaries.
133 const SuperFatArch* object_files(size_t* count) const {
134 *count = object_files_.size();
135 if (object_files_.size() > 0)
136 return &object_files_[0];
137 return NULL;
138 }
139
140 private:
141 // We use this to report problems parsing the file's contents. (WEAK)
142 Reporter* reporter_;
143
144 // The contents of the fat binary or Mach-O file we're parsing. We do not
145 // own the storage it refers to.
146 ByteBuffer buffer_;
147
148 // The magic number of this binary, in host byte order.
149 Magic magic_;
150
151 // The list of object files in this binary.
152 // object_files_.size() == fat_header.nfat_arch
153 vector<SuperFatArch> object_files_;
154};
155
156// A segment in a Mach-O file. All these fields have been byte-swapped as
157// appropriate for use by the executing architecture.
158struct Segment {
159 // The ByteBuffers below point into the bytes passed to the Reader that
160 // created this Segment.
161
162 ByteBuffer section_list; // This segment's section list.
163 ByteBuffer contents; // This segment's contents.
164
165 // This segment's name.
166 string name;
167
168 // The address at which this segment should be loaded in memory. If
169 // bits_64 is false, only the bottom 32 bits of this value are valid.
170 uint64_t vmaddr;
171
172 // The size of this segment when loaded into memory. This may be larger
173 // than contents.Size(), in which case the extra area will be
174 // initialized with zeros. If bits_64 is false, only the bottom 32 bits
175 // of this value are valid.
176 uint64_t vmsize;
177
178 // The file offset and size of the segment in the Mach-O image.
179 uint64_t fileoff;
180 uint64_t filesize;
181
182 // The maximum and initial VM protection of this segment's contents.
183 uint32_t maxprot;
184 uint32_t initprot;
185
186 // The number of sections in section_list.
187 uint32_t nsects;
188
189 // Flags describing this segment, from SegmentFlags.
190 uint32_t flags;
191
192 // True if this is a 64-bit section; false if it is a 32-bit section.
193 bool bits_64;
194};
195
196// A section in a Mach-O file. All these fields have been byte-swapped as
197// appropriate for use by the executing architecture.
198struct Section {
199 // This section's contents. This points into the bytes passed to the
200 // Reader that created this Section.
201 ByteBuffer contents;
202
203 // This section's name.
204 string section_name; // section[_64].sectname
205 // The name of the segment this section belongs to.
206 string segment_name; // section[_64].segname
207
208 // The address at which this section's contents should be loaded in
209 // memory. If bits_64 is false, only the bottom 32 bits of this value
210 // are valid.
211 uint64_t address;
212
213 // The contents of this section should be loaded into memory at an
214 // address which is a multiple of (two raised to this power).
215 uint32_t align;
216
217 // Flags from SectionFlags describing the section's contents.
218 uint32_t flags;
219
220 // We don't support reading relocations yet.
221
222 // True if this is a 64-bit section; false if it is a 32-bit section.
223 bool bits_64;
224};
225
226// A map from section names to Sections.
227typedef map<string, Section> SectionMap;
228
229// A reader for a Mach-O file.
230//
231// This does not handle fat binaries; see FatReader above. FatReader
232// provides a friendly interface for parsing data that could be either a
233// fat binary or a Mach-O file.
234class Reader {
235 public:
236
237 // A class for reporting errors found while parsing Mach-O files. The
238 // default definitions of these member functions print messages to
239 // stderr.
240 class Reporter {
241 public:
242 // Create a reporter that attributes problems to |filename|.
243 explicit Reporter(const string& filename) : filename_(filename) { }
244 virtual ~Reporter() { }
245
246 // Reporter functions for fatal errors return void; the reader will
247 // definitely return an error to its caller after calling them
248
249 // The data does not begin with a Mach-O magic number, or the magic
250 // number does not match the expected value for the cpu architecture.
251 // This is a fatal error.
252 virtual void BadHeader();
253
254 // The data contained in a Mach-O fat binary (|cpu_type|, |cpu_subtype|)
255 // does not match the expected CPU architecture
256 // (|expected_cpu_type|, |expected_cpu_subtype|).
257 virtual void CPUTypeMismatch(cpu_type_t cpu_type,
258 cpu_subtype_t cpu_subtype,
259 cpu_type_t expected_cpu_type,
260 cpu_subtype_t expected_cpu_subtype);
261
262 // The file ends abruptly: either it is not large enough to hold a
263 // complete header, or the header implies that contents are present
264 // beyond the actual end of the file.
265 virtual void HeaderTruncated();
266
267 // The file's load command region, as given in the Mach-O header, is
268 // too large for the file.
269 virtual void LoadCommandRegionTruncated();
270
271 // The file's Mach-O header claims the file contains |claimed| load
272 // commands, but the I'th load command, of type |type|, extends beyond
273 // the end of the load command region, as given by the Mach-O header.
274 // If |type| is zero, the command's type was unreadable.
275 virtual void LoadCommandsOverrun(size_t claimed, size_t i,
276 LoadCommandType type);
277
278 // The contents of the |i|'th load command, of type |type|, extend beyond
279 // the size given in the load command's header.
280 virtual void LoadCommandTooShort(size_t i, LoadCommandType type);
281
282 // The LC_SEGMENT or LC_SEGMENT_64 load command for the segment named
283 // |name| is too short to hold the sections that its header says it does.
284 // (This more specific than LoadCommandTooShort.)
285 virtual void SectionsMissing(const string& name);
286
287 // The segment named |name| claims that its contents lie beyond the end
288 // of the file.
289 virtual void MisplacedSegmentData(const string& name);
290
291 // The section named |section| in the segment named |segment| claims that
292 // its contents do not lie entirely within the segment.
293 virtual void MisplacedSectionData(const string& section,
294 const string& segment);
295
296 // The LC_SYMTAB command claims that symbol table contents are located
297 // beyond the end of the file.
298 virtual void MisplacedSymbolTable();
299
300 // An attempt was made to read a Mach-O file of the unsupported
301 // CPU architecture |cpu_type|.
302 virtual void UnsupportedCPUType(cpu_type_t cpu_type);
303
304 private:
305 string filename_;
306 };
307
308 // A handler for sections parsed from a segment. The WalkSegmentSections
309 // member function accepts an instance of this class, and applies it to
310 // each section defined in a given segment.
311 class SectionHandler {
312 public:
313 virtual ~SectionHandler() { }
314
315 // Called to report that the segment's section list contains |section|.
316 // This should return true if the iteration should continue, or false
317 // if it should stop.
318 virtual bool HandleSection(const Section& section) = 0;
319 };
320
321 // A handler for the load commands in a Mach-O file.
322 class LoadCommandHandler {
323 public:
324 LoadCommandHandler() { }
325 virtual ~LoadCommandHandler() { }
326
327 // When called from WalkLoadCommands, the following handler functions
328 // should return true if they wish to continue iterating over the load
329 // command list, or false if they wish to stop iterating.
330 //
331 // When called from LoadCommandIterator::Handle or Reader::Handle,
332 // these functions' return values are simply passed through to Handle's
333 // caller.
334 //
335 // The definitions provided by this base class simply return true; the
336 // default is to silently ignore sections whose member functions the
337 // subclass doesn't override.
338
339 // COMMAND is load command we don't recognize. We provide only the
340 // command type and a ByteBuffer enclosing the command's data (If we
341 // cannot parse the command type or its size, we call
342 // reporter_->IncompleteLoadCommand instead.)
343 virtual bool UnknownCommand(LoadCommandType type,
344 const ByteBuffer& contents) {
345 return true;
346 }
347
348 // The load command is LC_SEGMENT or LC_SEGMENT_64, defining a segment
349 // with the properties given in |segment|.
350 virtual bool SegmentCommand(const Segment& segment) {
351 return true;
352 }
353
354 // The load command is LC_SYMTAB. |entries| holds the array of nlist
355 // entries, and |names| holds the strings the entries refer to.
356 virtual bool SymtabCommand(const ByteBuffer& entries,
357 const ByteBuffer& names) {
358 return true;
359 }
360
361 // Add handler functions for more load commands here as needed.
362 };
363
364 // Create a Mach-O file reader that reports problems to |reporter|.
365 explicit Reader(Reporter* reporter)
366 : reporter_(reporter) { }
367
368 // Read the given data as a Mach-O file. The reader retains pointers
369 // into the data passed, so the data should live as long as the reader
370 // does. On success, return true; on failure, return false.
371 //
372 // At most one of these functions should be invoked once on each Reader
373 // instance.
374 bool Read(const uint8_t* buffer,
375 size_t size,
376 cpu_type_t expected_cpu_type,
377 cpu_subtype_t expected_cpu_subtype);
378 bool Read(const ByteBuffer& buffer,
379 cpu_type_t expected_cpu_type,
380 cpu_subtype_t expected_cpu_subtype) {
381 return Read(buffer.start,
382 buffer.Size(),
383 expected_cpu_type,
384 expected_cpu_subtype);
385 }
386
387 // Return this file's characteristics, as found in the Mach-O header.
388 cpu_type_t cpu_type() const { return cpu_type_; }
389 cpu_subtype_t cpu_subtype() const { return cpu_subtype_; }
390 FileType file_type() const { return file_type_; }
391 FileFlags flags() const { return flags_; }
392
393 // Return true if this is a 64-bit Mach-O file, false if it is a 32-bit
394 // Mach-O file.
395 bool bits_64() const { return bits_64_; }
396
397 // Return true if this is a big-endian Mach-O file, false if it is
398 // little-endian.
399 bool big_endian() const { return big_endian_; }
400
401 // Apply |handler| to each load command in this Mach-O file, stopping when
402 // a handler function returns false. If we encounter a malformed load
403 // command, report it via reporter_ and return false. Return true if all
404 // load commands were parseable and all handlers returned true.
405 bool WalkLoadCommands(LoadCommandHandler* handler) const;
406
407 // Set |segment| to describe the segment named |name|, if present. If
408 // found, |segment|'s byte buffers refer to a subregion of the bytes
409 // passed to Read. If we find the section, return true; otherwise,
410 // return false.
411 bool FindSegment(const string& name, Segment* segment) const;
412
413 // Apply |handler| to each section defined in |segment|. If |handler| returns
414 // false, stop iterating and return false. If all calls to |handler| return
415 // true and we reach the end of the section list, return true.
416 bool WalkSegmentSections(const Segment& segment, SectionHandler* handler)
417 const;
418
419 // Clear |section_map| and then populate it with a map of the sections
420 // in |segment|, from section names to Section structures.
421 // Each Section's contents refer to bytes in |segment|'s contents.
422 // On success, return true; if a problem occurs, report it and return false.
423 bool MapSegmentSections(const Segment& segment, SectionMap* section_map)
424 const;
425
426 private:
427 // Used internally.
428 class SegmentFinder;
429 class SectionMapper;
430
431 // We use this to report problems parsing the file's contents. (WEAK)
432 Reporter* reporter_;
433
434 // The contents of the Mach-O file we're parsing. We do not own the
435 // storage it refers to.
436 ByteBuffer buffer_;
437
438 // True if this file is big-endian.
439 bool big_endian_;
440
441 // True if this file is a 64-bit Mach-O file.
442 bool bits_64_;
443
444 // This file's cpu type and subtype.
445 cpu_type_t cpu_type_; // mach_header[_64].cputype
446 cpu_subtype_t cpu_subtype_; // mach_header[_64].cpusubtype
447
448 // This file's type.
449 FileType file_type_; // mach_header[_64].filetype
450
451 // The region of buffer_ occupied by load commands.
452 ByteBuffer load_commands_;
453
454 // The number of load commands in load_commands_.
455 uint32_t load_command_count_; // mach_header[_64].ncmds
456
457 // This file's header flags.
458 FileFlags flags_;
459};
460
461} // namespace mach_o
462} // namespace google_breakpad
463
464#endif // BREAKPAD_COMMON_MAC_MACHO_READER_H_
465