1// Copyright (c) 2010, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8// * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14// * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
31
32// macho_reader.cc: Implementation of google_breakpad::Mach_O::FatReader and
33// google_breakpad::Mach_O::Reader. See macho_reader.h for details.
34
35#include "common/mac/macho_reader.h"
36
37#include <assert.h>
38#include <stdio.h>
39#include <stdlib.h>
40
41#include <limits>
42
43// Unfortunately, CPU_TYPE_ARM is not define for 10.4.
44#if !defined(CPU_TYPE_ARM)
45#define CPU_TYPE_ARM 12
46#endif
47
48#if !defined(CPU_TYPE_ARM_64)
49#define CPU_TYPE_ARM_64 16777228
50#endif
51
52namespace google_breakpad {
53namespace mach_o {
54
55// If NDEBUG is #defined, then the 'assert' macro doesn't evaluate its
56// arguments, so you can't place expressions that do necessary work in
57// the argument of an assert. Nor can you assign the result of the
58// expression to a variable and assert that the variable's value is
59// true: you'll get unused variable warnings when NDEBUG is #defined.
60//
61// ASSERT_ALWAYS_EVAL always evaluates its argument, and asserts that
62// the result is true if NDEBUG is not #defined.
63#if defined(NDEBUG)
64#define ASSERT_ALWAYS_EVAL(x) (x)
65#else
66#define ASSERT_ALWAYS_EVAL(x) assert(x)
67#endif
68
69void FatReader::Reporter::BadHeader() {
70 fprintf(stderr, "%s: file is neither a fat binary file"
71 " nor a Mach-O object file\n", filename_.c_str());
72}
73
74void FatReader::Reporter::TooShort() {
75 fprintf(stderr, "%s: file too short for the data it claims to contain\n",
76 filename_.c_str());
77}
78
79void FatReader::Reporter::MisplacedObjectFile() {
80 fprintf(stderr, "%s: file too short for the object files it claims"
81 " to contain\n", filename_.c_str());
82}
83
84bool FatReader::Read(const uint8_t* buffer, size_t size) {
85 buffer_.start = buffer;
86 buffer_.end = buffer + size;
87 ByteCursor cursor(&buffer_);
88
89 // Fat binaries always use big-endian, so read the magic number in
90 // that endianness. To recognize Mach-O magic numbers, which can use
91 // either endianness, check for both the proper and reversed forms
92 // of the magic numbers.
93 cursor.set_big_endian(true);
94 if (cursor >> magic_) {
95 if (magic_ == FAT_MAGIC) {
96 // How many object files does this fat binary contain?
97 uint32_t object_files_count;
98 if (!(cursor >> object_files_count)) { // nfat_arch
99 reporter_->TooShort();
100 return false;
101 }
102
103 // Read the list of object files.
104 object_files_.resize(object_files_count);
105 for (size_t i = 0; i < object_files_count; i++) {
106 struct fat_arch objfile;
107
108 // Read this object file entry, byte-swapping as appropriate.
109 cursor >> objfile.cputype
110 >> objfile.cpusubtype
111 >> objfile.offset
112 >> objfile.size
113 >> objfile.align;
114
115 SuperFatArch super_fat_arch(objfile);
116 object_files_[i] = super_fat_arch;
117
118 if (!cursor) {
119 reporter_->TooShort();
120 return false;
121 }
122 // Does the file actually have the bytes this entry refers to?
123 size_t fat_size = buffer_.Size();
124 if (objfile.offset > fat_size ||
125 objfile.size > fat_size - objfile.offset) {
126 reporter_->MisplacedObjectFile();
127 return false;
128 }
129 }
130
131 return true;
132 } else if (magic_ == MH_MAGIC || magic_ == MH_MAGIC_64 ||
133 magic_ == MH_CIGAM || magic_ == MH_CIGAM_64) {
134 // If this is a little-endian Mach-O file, fix the cursor's endianness.
135 if (magic_ == MH_CIGAM || magic_ == MH_CIGAM_64)
136 cursor.set_big_endian(false);
137 // Record the entire file as a single entry in the object file list.
138 object_files_.resize(1);
139
140 // Get the cpu type and subtype from the Mach-O header.
141 if (!(cursor >> object_files_[0].cputype
142 >> object_files_[0].cpusubtype)) {
143 reporter_->TooShort();
144 return false;
145 }
146
147 object_files_[0].offset = 0;
148 object_files_[0].size = static_cast<uint64_t>(buffer_.Size());
149 // This alignment is correct for 32 and 64-bit x86 and ppc.
150 // See get_align in the lipo source for other architectures:
151 // http://www.opensource.apple.com/source/cctools/cctools-773/misc/lipo.c
152 object_files_[0].align = 12; // 2^12 == 4096
153 return true;
154 }
155 }
156 reporter_->BadHeader();
157 return false;
158}
159
160void Reader::Reporter::BadHeader() {
161 fprintf(stderr, "%s: file is not a Mach-O object file\n", filename_.c_str());
162}
163
164void Reader::Reporter::CPUTypeMismatch(cpu_type_t cpu_type,
165 cpu_subtype_t cpu_subtype,
166 cpu_type_t expected_cpu_type,
167 cpu_subtype_t expected_cpu_subtype) {
168 fprintf(stderr, "%s: CPU type %d, subtype %d does not match expected"
169 " type %d, subtype %d\n",
170 filename_.c_str(), cpu_type, cpu_subtype,
171 expected_cpu_type, expected_cpu_subtype);
172}
173
174void Reader::Reporter::HeaderTruncated() {
175 fprintf(stderr, "%s: file does not contain a complete Mach-O header\n",
176 filename_.c_str());
177}
178
179void Reader::Reporter::LoadCommandRegionTruncated() {
180 fprintf(stderr, "%s: file too short to hold load command region"
181 " given in Mach-O header\n", filename_.c_str());
182}
183
184void Reader::Reporter::LoadCommandsOverrun(size_t claimed, size_t i,
185 LoadCommandType type) {
186 fprintf(stderr, "%s: file's header claims there are %zu"
187 " load commands, but load command #%zu",
188 filename_.c_str(), claimed, i);
189 if (type) fprintf(stderr, ", of type %d,", type);
190 fprintf(stderr, " extends beyond the end of the load command region\n");
191}
192
193void Reader::Reporter::LoadCommandTooShort(size_t i, LoadCommandType type) {
194 fprintf(stderr, "%s: the contents of load command #%zu, of type %d,"
195 " extend beyond the size given in the load command's header\n",
196 filename_.c_str(), i, type);
197}
198
199void Reader::Reporter::SectionsMissing(const string& name) {
200 fprintf(stderr, "%s: the load command for segment '%s'"
201 " is too short to hold the section headers it claims to have\n",
202 filename_.c_str(), name.c_str());
203}
204
205void Reader::Reporter::MisplacedSegmentData(const string& name) {
206 fprintf(stderr, "%s: the segment '%s' claims its contents lie beyond"
207 " the end of the file\n", filename_.c_str(), name.c_str());
208}
209
210void Reader::Reporter::MisplacedSectionData(const string& section,
211 const string& segment) {
212 fprintf(stderr, "%s: the section '%s' in segment '%s'"
213 " claims its contents lie outside the segment's contents\n",
214 filename_.c_str(), section.c_str(), segment.c_str());
215}
216
217void Reader::Reporter::MisplacedSymbolTable() {
218 fprintf(stderr, "%s: the LC_SYMTAB load command claims that the symbol"
219 " table's contents are located beyond the end of the file\n",
220 filename_.c_str());
221}
222
223void Reader::Reporter::UnsupportedCPUType(cpu_type_t cpu_type) {
224 fprintf(stderr, "%s: CPU type %d is not supported\n",
225 filename_.c_str(), cpu_type);
226}
227
228bool Reader::Read(const uint8_t* buffer,
229 size_t size,
230 cpu_type_t expected_cpu_type,
231 cpu_subtype_t expected_cpu_subtype) {
232 assert(!buffer_.start);
233 buffer_.start = buffer;
234 buffer_.end = buffer + size;
235 ByteCursor cursor(&buffer_, true);
236 uint32_t magic;
237 if (!(cursor >> magic)) {
238 reporter_->HeaderTruncated();
239 return false;
240 }
241
242 if (expected_cpu_type != CPU_TYPE_ANY) {
243 uint32_t expected_magic;
244 // validate that magic matches the expected cpu type
245 switch (expected_cpu_type) {
246 case CPU_TYPE_ARM:
247 case CPU_TYPE_I386:
248 expected_magic = MH_CIGAM;
249 break;
250 case CPU_TYPE_POWERPC:
251 expected_magic = MH_MAGIC;
252 break;
253 case CPU_TYPE_ARM_64:
254 case CPU_TYPE_X86_64:
255 expected_magic = MH_CIGAM_64;
256 break;
257 case CPU_TYPE_POWERPC64:
258 expected_magic = MH_MAGIC_64;
259 break;
260 default:
261 reporter_->UnsupportedCPUType(expected_cpu_type);
262 return false;
263 }
264
265 if (expected_magic != magic) {
266 reporter_->BadHeader();
267 return false;
268 }
269 }
270
271 // Since the byte cursor is in big-endian mode, a reversed magic number
272 // always indicates a little-endian file, regardless of our own endianness.
273 switch (magic) {
274 case MH_MAGIC: big_endian_ = true; bits_64_ = false; break;
275 case MH_CIGAM: big_endian_ = false; bits_64_ = false; break;
276 case MH_MAGIC_64: big_endian_ = true; bits_64_ = true; break;
277 case MH_CIGAM_64: big_endian_ = false; bits_64_ = true; break;
278 default:
279 reporter_->BadHeader();
280 return false;
281 }
282 cursor.set_big_endian(big_endian_);
283 uint32_t commands_size, reserved;
284 cursor >> cpu_type_ >> cpu_subtype_ >> file_type_ >> load_command_count_
285 >> commands_size >> flags_;
286 if (bits_64_)
287 cursor >> reserved;
288 if (!cursor) {
289 reporter_->HeaderTruncated();
290 return false;
291 }
292
293 if (expected_cpu_type != CPU_TYPE_ANY &&
294 (expected_cpu_type != cpu_type_ ||
295 expected_cpu_subtype != cpu_subtype_)) {
296 reporter_->CPUTypeMismatch(cpu_type_, cpu_subtype_,
297 expected_cpu_type, expected_cpu_subtype);
298 return false;
299 }
300
301 cursor
302 .PointTo(&load_commands_.start, commands_size)
303 .PointTo(&load_commands_.end, 0);
304 if (!cursor) {
305 reporter_->LoadCommandRegionTruncated();
306 return false;
307 }
308
309 return true;
310}
311
312bool Reader::WalkLoadCommands(Reader::LoadCommandHandler* handler) const {
313 ByteCursor list_cursor(&load_commands_, big_endian_);
314
315 for (size_t index = 0; index < load_command_count_; ++index) {
316 // command refers to this load command alone, so that cursor will
317 // refuse to read past the load command's end. But since we haven't
318 // read the size yet, let command initially refer to the entire
319 // remainder of the load command series.
320 ByteBuffer command(list_cursor.here(), list_cursor.Available());
321 ByteCursor cursor(&command, big_endian_);
322
323 // Read the command type and size --- fields common to all commands.
324 uint32_t type, size;
325 if (!(cursor >> type)) {
326 reporter_->LoadCommandsOverrun(load_command_count_, index, 0);
327 return false;
328 }
329 if (!(cursor >> size) || size > command.Size()) {
330 reporter_->LoadCommandsOverrun(load_command_count_, index, type);
331 return false;
332 }
333
334 // Now that we've read the length, restrict command's range to this
335 // load command only.
336 command.end = command.start + size;
337
338 switch (type) {
339 case LC_SEGMENT:
340 case LC_SEGMENT_64: {
341 Segment segment;
342 segment.bits_64 = (type == LC_SEGMENT_64);
343 size_t word_size = segment.bits_64 ? 8 : 4;
344 cursor.CString(&segment.name, 16);
345 cursor
346 .Read(word_size, false, &segment.vmaddr)
347 .Read(word_size, false, &segment.vmsize)
348 .Read(word_size, false, &segment.fileoff)
349 .Read(word_size, false, &segment.filesize);
350 cursor >> segment.maxprot
351 >> segment.initprot
352 >> segment.nsects
353 >> segment.flags;
354 if (!cursor) {
355 reporter_->LoadCommandTooShort(index, type);
356 return false;
357 }
358 if (segment.fileoff > buffer_.Size() ||
359 segment.filesize > buffer_.Size() - segment.fileoff) {
360 reporter_->MisplacedSegmentData(segment.name);
361 return false;
362 }
363 // Mach-O files in .dSYM bundles have the contents of the loaded
364 // segments removed, and their file offsets and file sizes zeroed
365 // out. To help us handle this special case properly, give such
366 // segments' contents NULL starting and ending pointers.
367 if (segment.fileoff == 0 && segment.filesize == 0) {
368 segment.contents.start = segment.contents.end = NULL;
369 } else {
370 segment.contents.start = buffer_.start + segment.fileoff;
371 segment.contents.end = segment.contents.start + segment.filesize;
372 }
373 // The section list occupies the remainder of this load command's space.
374 segment.section_list.start = cursor.here();
375 segment.section_list.end = command.end;
376
377 if (!handler->SegmentCommand(segment))
378 return false;
379 break;
380 }
381
382 case LC_SYMTAB: {
383 uint32_t symoff, nsyms, stroff, strsize;
384 cursor >> symoff >> nsyms >> stroff >> strsize;
385 if (!cursor) {
386 reporter_->LoadCommandTooShort(index, type);
387 return false;
388 }
389 // How big are the entries in the symbol table?
390 // sizeof(struct nlist_64) : sizeof(struct nlist),
391 // but be paranoid about alignment vs. target architecture.
392 size_t symbol_size = bits_64_ ? 16 : 12;
393 // How big is the entire symbol array?
394 size_t symbols_size = nsyms * symbol_size;
395 if (symoff > buffer_.Size() || symbols_size > buffer_.Size() - symoff ||
396 stroff > buffer_.Size() || strsize > buffer_.Size() - stroff) {
397 reporter_->MisplacedSymbolTable();
398 return false;
399 }
400 ByteBuffer entries(buffer_.start + symoff, symbols_size);
401 ByteBuffer names(buffer_.start + stroff, strsize);
402 if (!handler->SymtabCommand(entries, names))
403 return false;
404 break;
405 }
406
407 default: {
408 if (!handler->UnknownCommand(type, command))
409 return false;
410 break;
411 }
412 }
413
414 list_cursor.set_here(command.end);
415 }
416
417 return true;
418}
419
420// A load command handler that looks for a segment of a given name.
421class Reader::SegmentFinder : public LoadCommandHandler {
422 public:
423 // Create a load command handler that looks for a segment named NAME,
424 // and sets SEGMENT to describe it if found.
425 SegmentFinder(const string& name, Segment* segment)
426 : name_(name), segment_(segment), found_() { }
427
428 // Return true if the traversal found the segment, false otherwise.
429 bool found() const { return found_; }
430
431 bool SegmentCommand(const Segment& segment) {
432 if (segment.name == name_) {
433 *segment_ = segment;
434 found_ = true;
435 return false;
436 }
437 return true;
438 }
439
440 private:
441 // The name of the segment our creator is looking for.
442 const string& name_;
443
444 // Where we should store the segment if found. (WEAK)
445 Segment* segment_;
446
447 // True if we found the segment.
448 bool found_;
449};
450
451bool Reader::FindSegment(const string& name, Segment* segment) const {
452 SegmentFinder finder(name, segment);
453 WalkLoadCommands(&finder);
454 return finder.found();
455}
456
457bool Reader::WalkSegmentSections(const Segment& segment,
458 SectionHandler* handler) const {
459 size_t word_size = segment.bits_64 ? 8 : 4;
460 ByteCursor cursor(&segment.section_list, big_endian_);
461
462 for (size_t i = 0; i < segment.nsects; i++) {
463 Section section;
464 section.bits_64 = segment.bits_64;
465 uint64_t size, offset;
466 uint32_t dummy32;
467 cursor
468 .CString(&section.section_name, 16)
469 .CString(&section.segment_name, 16)
470 .Read(word_size, false, &section.address)
471 .Read(word_size, false, &size)
472 .Read(sizeof(uint32_t), false, &offset) // clears high bits of |offset|
473 >> section.align
474 >> dummy32
475 >> dummy32
476 >> section.flags
477 >> dummy32
478 >> dummy32;
479 if (section.bits_64)
480 cursor >> dummy32;
481 if (!cursor) {
482 reporter_->SectionsMissing(segment.name);
483 return false;
484 }
485
486 // Even 64-bit Mach-O isn’t a true 64-bit format in that it doesn’t handle
487 // 64-bit file offsets gracefully. Segment load commands do contain 64-bit
488 // file offsets, but sections within do not. Because segments load
489 // contiguously, recompute each section’s file offset on the basis of its
490 // containing segment’s file offset and the difference between the section’s
491 // and segment’s load addresses. If truncation is detected, honor the
492 // recomputed offset.
493 if (segment.bits_64 &&
494 segment.fileoff + segment.filesize >
495 std::numeric_limits<uint32_t>::max()) {
496 const uint64_t section_offset_recomputed =
497 segment.fileoff + section.address - segment.vmaddr;
498 if (offset == static_cast<uint32_t>(section_offset_recomputed)) {
499 offset = section_offset_recomputed;
500 }
501 }
502
503 const uint32_t section_type = section.flags & SECTION_TYPE;
504 if (section_type == S_ZEROFILL || section_type == S_THREAD_LOCAL_ZEROFILL ||
505 section_type == S_GB_ZEROFILL) {
506 // Zero-fill sections have a size, but no contents.
507 section.contents.start = section.contents.end = NULL;
508 } else if (segment.contents.start == NULL &&
509 segment.contents.end == NULL) {
510 // Mach-O files in .dSYM bundles have the contents of the loaded
511 // segments removed, and their file offsets and file sizes zeroed
512 // out. However, the sections within those segments still have
513 // non-zero sizes. There's no reason to call MisplacedSectionData in
514 // this case; the caller may just need the section's load
515 // address. But do set the contents' limits to NULL, for safety.
516 section.contents.start = section.contents.end = NULL;
517 } else {
518 if (offset < size_t(segment.contents.start - buffer_.start) ||
519 offset > size_t(segment.contents.end - buffer_.start) ||
520 size > size_t(segment.contents.end - buffer_.start - offset)) {
521 if (offset > 0) {
522 reporter_->MisplacedSectionData(section.section_name,
523 section.segment_name);
524 return false;
525 } else {
526 // Mach-O files in .dSYM bundles have the contents of the loaded
527 // segments partially removed. The removed sections will have zero as
528 // their offset. MisplacedSectionData should not be called in this
529 // case.
530 section.contents.start = section.contents.end = NULL;
531 }
532 } else {
533 section.contents.start = buffer_.start + offset;
534 section.contents.end = section.contents.start + size;
535 }
536 }
537 if (!handler->HandleSection(section))
538 return false;
539 }
540 return true;
541}
542
543// A SectionHandler that builds a SectionMap for the sections within a
544// given segment.
545class Reader::SectionMapper: public SectionHandler {
546 public:
547 // Create a SectionHandler that populates MAP with an entry for
548 // each section it is given.
549 SectionMapper(SectionMap* map) : map_(map) { }
550 bool HandleSection(const Section& section) {
551 (*map_)[section.section_name] = section;
552 return true;
553 }
554 private:
555 // The map under construction. (WEAK)
556 SectionMap* map_;
557};
558
559bool Reader::MapSegmentSections(const Segment& segment,
560 SectionMap* section_map) const {
561 section_map->clear();
562 SectionMapper mapper(section_map);
563 return WalkSegmentSections(segment, &mapper);
564}
565
566} // namespace mach_o
567} // namespace google_breakpad
568