macho_reader.cc source code [breakpad/common/mac/macho_reader.cc]

1	// Copyright (c) 2010, Google Inc.
2	// All rights reserved.
3	//
4	// Redistribution and use in source and binary forms, with or without
5	// modification, are permitted provided that the following conditions are
6	// met:
7	//
8	// Redistributions of source code must retain the above copyright*
9	// notice, this list of conditions and the following disclaimer.
10	// Redistributions in binary form must reproduce the above*
11	// copyright notice, this list of conditions and the following disclaimer
12	// in the documentation and/or other materials provided with the
13	// distribution.
14	// Neither the name of Google Inc. nor the names of its*
15	// contributors may be used to endorse or promote products derived from
16	// this software without specific prior written permission.
17	//
18	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30	// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
31
32	// macho_reader.cc: Implementation of google_breakpad::Mach_O::FatReader and
33	// google_breakpad::Mach_O::Reader. See macho_reader.h for details.
34
35	#include "common/mac/macho_reader.h"
36
37	#include <assert.h>
38	#include <stdio.h>
39	#include <stdlib.h>
40
41	#include <limits>
42
43	// Unfortunately, CPU_TYPE_ARM is not define for 10.4.
44	#if !defined(CPU_TYPE_ARM)
45	#define CPU_TYPE_ARM 12
46	#endif
47
48	#if !defined(CPU_TYPE_ARM_64)
49	#define CPU_TYPE_ARM_64 16777228
50	#endif
51
52	namespace google_breakpad {
53	namespace mach_o {
54
55	// If NDEBUG is #defined, then the 'assert' macro doesn't evaluate its
56	// arguments, so you can't place expressions that do necessary work in
57	// the argument of an assert. Nor can you assign the result of the
58	// expression to a variable and assert that the variable's value is
59	// true: you'll get unused variable warnings when NDEBUG is #defined.
60	//
61	// ASSERT_ALWAYS_EVAL always evaluates its argument, and asserts that
62	// the result is true if NDEBUG is not #defined.
63	#if defined(NDEBUG)
64	#define ASSERT_ALWAYS_EVAL(x) (x)
65	#else
66	#define ASSERT_ALWAYS_EVAL(x) assert(x)
67	#endif
68
69	void FatReader::Reporter::BadHeader() {
70	fprintf(stderr, "%s: file is neither a fat binary file"
71	" nor a Mach-O object file\n", filename_.c_str());
72	}
73
74	void FatReader::Reporter::TooShort() {
75	fprintf(stderr, "%s: file too short for the data it claims to contain\n",
76	filename_.c_str());
77	}
78
79	void FatReader::Reporter::MisplacedObjectFile() {
80	fprintf(stderr, "%s: file too short for the object files it claims"
81	" to contain\n", filename_.c_str());
82	}
83
84	bool FatReader::Read(const uint8_t* buffer, size_t size) {
85	buffer_.start = buffer;
86	buffer_.end = buffer + size;
87	ByteCursor cursor(&buffer_);
88
89	// Fat binaries always use big-endian, so read the magic number in
90	// that endianness. To recognize Mach-O magic numbers, which can use
91	// either endianness, check for both the proper and reversed forms
92	// of the magic numbers.
93	cursor.set_big_endian(true);
94	if (cursor >> magic_) {
95	if (magic_ == FAT_MAGIC) {
96	// How many object files does this fat binary contain?
97	uint32_t object_files_count;
98	if (!(cursor >> object_files_count)) { // nfat_arch
99	reporter_->TooShort();
100	return false;
101	}
102
103	// Read the list of object files.
104	object_files_.resize(object_files_count);
105	for (size_t i = `0`; i < object_files_count; i++) {
106	struct fat_arch objfile;
107
108	// Read this object file entry, byte-swapping as appropriate.
109	cursor >> objfile.cputype
110	>> objfile.cpusubtype
111	>> objfile.offset
112	>> objfile.size
113	>> objfile.align;
114
115	SuperFatArch super_fat_arch(objfile);
116	object_files_[i] = super_fat_arch;
117
118	if (!cursor) {
119	reporter_->TooShort();
120	return false;
121	}
122	// Does the file actually have the bytes this entry refers to?
123	size_t fat_size = buffer_.Size();
124	if (objfile.offset > fat_size \|\|
125	objfile.size > fat_size - objfile.offset) {
126	reporter_->MisplacedObjectFile();
127	return false;
128	}
129	}
130
131	return true;
132	} else if (magic_ == MH_MAGIC \|\| magic_ == MH_MAGIC_64 \|\|
133	magic_ == MH_CIGAM \|\| magic_ == MH_CIGAM_64) {
134	// If this is a little-endian Mach-O file, fix the cursor's endianness.
135	if (magic_ == MH_CIGAM \|\| magic_ == MH_CIGAM_64)
136	cursor.set_big_endian(false);
137	// Record the entire file as a single entry in the object file list.
138	object_files_.resize(`1`);
139
140	// Get the cpu type and subtype from the Mach-O header.
141	if (!(cursor >> object_files_[`0`].cputype
142	>> object_files_[`0`].cpusubtype)) {
143	reporter_->TooShort();
144	return false;
145	}
146
147	object_files_[`0`].offset = `0`;
148	object_files_[`0`].size = static_cast<uint64_t>(buffer_.Size());
149	// This alignment is correct for 32 and 64-bit x86 and ppc.
150	// See get_align in the lipo source for other architectures:
151	// http://www.opensource.apple.com/source/cctools/cctools-773/misc/lipo.c
152	object_files_[`0`].align = `12`; // 2^12 == 4096
153	return true;
154	}
155	}
156	reporter_->BadHeader();
157	return false;
158	}
159
160	void Reader::Reporter::BadHeader() {
161	fprintf(stderr, "%s: file is not a Mach-O object file\n", filename_.c_str());
162	}
163
164	void Reader::Reporter::CPUTypeMismatch(cpu_type_t cpu_type,
165	cpu_subtype_t cpu_subtype,
166	cpu_type_t expected_cpu_type,
167	cpu_subtype_t expected_cpu_subtype) {
168	fprintf(stderr, "%s: CPU type %d, subtype %d does not match expected"
169	" type %d, subtype %d\n",
170	filename_.c_str(), cpu_type, cpu_subtype,
171	expected_cpu_type, expected_cpu_subtype);
172	}
173
174	void Reader::Reporter::HeaderTruncated() {
175	fprintf(stderr, "%s: file does not contain a complete Mach-O header\n",
176	filename_.c_str());
177	}
178
179	void Reader::Reporter::LoadCommandRegionTruncated() {
180	fprintf(stderr, "%s: file too short to hold load command region"
181	" given in Mach-O header\n", filename_.c_str());
182	}
183
184	void Reader::Reporter::LoadCommandsOverrun(size_t claimed, size_t i,
185	LoadCommandType type) {
186	fprintf(stderr, "%s: file's header claims there are %zu"
187	" load commands, but load command #%zu",
188	filename_.c_str(), claimed, i);
189	if (type) fprintf(stderr, ", of type %d,", type);
190	fprintf(stderr, " extends beyond the end of the load command region\n");
191	}
192
193	void Reader::Reporter::LoadCommandTooShort(size_t i, LoadCommandType type) {
194	fprintf(stderr, "%s: the contents of load command #%zu, of type %d,"
195	" extend beyond the size given in the load command's header\n",
196	filename_.c_str(), i, type);
197	}
198
199	void Reader::Reporter::SectionsMissing(const string& name) {
200	fprintf(stderr, "%s: the load command for segment '%s'"
201	" is too short to hold the section headers it claims to have\n",
202	filename_.c_str(), name.c_str());
203	}
204
205	void Reader::Reporter::MisplacedSegmentData(const string& name) {
206	fprintf(stderr, "%s: the segment '%s' claims its contents lie beyond"
207	" the end of the file\n", filename_.c_str(), name.c_str());
208	}
209
210	void Reader::Reporter::MisplacedSectionData(const string& section,
211	const string& segment) {
212	fprintf(stderr, "%s: the section '%s' in segment '%s'"
213	" claims its contents lie outside the segment's contents\n",
214	filename_.c_str(), section.c_str(), segment.c_str());
215	}
216
217	void Reader::Reporter::MisplacedSymbolTable() {
218	fprintf(stderr, "%s: the LC_SYMTAB load command claims that the symbol"
219	" table's contents are located beyond the end of the file\n",
220	filename_.c_str());
221	}
222
223	void Reader::Reporter::UnsupportedCPUType(cpu_type_t cpu_type) {
224	fprintf(stderr, "%s: CPU type %d is not supported\n",
225	filename_.c_str(), cpu_type);
226	}
227
228	bool Reader::Read(const uint8_t* buffer,
229	size_t size,
230	cpu_type_t expected_cpu_type,
231	cpu_subtype_t expected_cpu_subtype) {
232	assert(!buffer_.start);
233	buffer_.start = buffer;
234	buffer_.end = buffer + size;
235	ByteCursor cursor(&buffer_, true);
236	uint32_t magic;
237	if (!(cursor >> magic)) {
238	reporter_->HeaderTruncated();
239	return false;
240	}
241
242	if (expected_cpu_type != CPU_TYPE_ANY) {
243	uint32_t expected_magic;
244	// validate that magic matches the expected cpu type
245	switch (expected_cpu_type) {
246	case CPU_TYPE_ARM:
247	case CPU_TYPE_I386:
248	expected_magic = MH_CIGAM;
249	break;
250	case CPU_TYPE_POWERPC:
251	expected_magic = MH_MAGIC;
252	break;
253	case CPU_TYPE_ARM_64:
254	case CPU_TYPE_X86_64:
255	expected_magic = MH_CIGAM_64;
256	break;
257	case CPU_TYPE_POWERPC64:
258	expected_magic = MH_MAGIC_64;
259	break;
260	default:
261	reporter_->UnsupportedCPUType(expected_cpu_type);
262	return false;
263	}
264
265	if (expected_magic != magic) {
266	reporter_->BadHeader();
267	return false;
268	}
269	}
270
271	// Since the byte cursor is in big-endian mode, a reversed magic number
272	// always indicates a little-endian file, regardless of our own endianness.
273	switch (magic) {
274	case MH_MAGIC: big_endian_ = true; bits_64_ = false; break;
275	case MH_CIGAM: big_endian_ = false; bits_64_ = false; break;
276	case MH_MAGIC_64: big_endian_ = true; bits_64_ = true; break;
277	case MH_CIGAM_64: big_endian_ = false; bits_64_ = true; break;
278	default:
279	reporter_->BadHeader();
280	return false;
281	}
282	cursor.set_big_endian(big_endian_);
283	uint32_t commands_size, reserved;
284	cursor >> cpu_type_ >> cpu_subtype_ >> file_type_ >> load_command_count_
285	>> commands_size >> flags_;
286	if (bits_64_)
287	cursor >> reserved;
288	if (!cursor) {
289	reporter_->HeaderTruncated();
290	return false;
291	}
292
293	if (expected_cpu_type != CPU_TYPE_ANY &&
294	(expected_cpu_type != cpu_type_ \|\|
295	expected_cpu_subtype != cpu_subtype_)) {
296	reporter_->CPUTypeMismatch(cpu_type_, cpu_subtype_,
297	expected_cpu_type, expected_cpu_subtype);
298	return false;
299	}
300
301	cursor
302	.PointTo(&load_commands_.start, commands_size)
303	.PointTo(&load_commands_.end, `0`);
304	if (!cursor) {
305	reporter_->LoadCommandRegionTruncated();
306	return false;
307	}
308
309	return true;
310	}
311
312	bool Reader::WalkLoadCommands(Reader::LoadCommandHandler* handler) const {
313	ByteCursor list_cursor(&load_commands_, big_endian_);
314
315	for (size_t index = `0`; index < load_command_count_; ++index) {
316	// command refers to this load command alone, so that cursor will
317	// refuse to read past the load command's end. But since we haven't
318	// read the size yet, let command initially refer to the entire
319	// remainder of the load command series.
320	ByteBuffer command(list_cursor.here(), list_cursor.Available());
321	ByteCursor cursor(&command, big_endian_);
322
323	// Read the command type and size --- fields common to all commands.
324	uint32_t type, size;
325	if (!(cursor >> type)) {
326	reporter_->LoadCommandsOverrun(load_command_count_, index, `0`);
327	return false;
328	}
329	if (!(cursor >> size) \|\| size > command.Size()) {
330	reporter_->LoadCommandsOverrun(load_command_count_, index, type);
331	return false;
332	}
333
334	// Now that we've read the length, restrict command's range to this
335	// load command only.
336	command.end = command.start + size;
337
338	switch (type) {
339	case LC_SEGMENT:
340	case LC_SEGMENT_64: {
341	Segment segment;
342	segment.bits_64 = (type == LC_SEGMENT_64);
343	size_t word_size = segment.bits_64 ? `8` : `4`;
344	cursor.CString(&segment.name, `16`);
345	cursor
346	.Read(word_size, false, &segment.vmaddr)
347	.Read(word_size, false, &segment.vmsize)
348	.Read(word_size, false, &segment.fileoff)
349	.Read(word_size, false, &segment.filesize);
350	cursor >> segment.maxprot
351	>> segment.initprot
352	>> segment.nsects
353	>> segment.flags;
354	if (!cursor) {
355	reporter_->LoadCommandTooShort(index, type);
356	return false;
357	}
358	if (segment.fileoff > buffer_.Size() \|\|
359	segment.filesize > buffer_.Size() - segment.fileoff) {
360	reporter_->MisplacedSegmentData(segment.name);
361	return false;
362	}
363	// Mach-O files in .dSYM bundles have the contents of the loaded
364	// segments removed, and their file offsets and file sizes zeroed
365	// out. To help us handle this special case properly, give such
366	// segments' contents NULL starting and ending pointers.
367	if (segment.fileoff == `0` && segment.filesize == `0`) {
368	segment.contents.start = segment.contents.end = NULL;
369	} else {
370	segment.contents.start = buffer_.start + segment.fileoff;
371	segment.contents.end = segment.contents.start + segment.filesize;
372	}
373	// The section list occupies the remainder of this load command's space.
374	segment.section_list.start = cursor.here();
375	segment.section_list.end = command.end;
376
377	if (!handler->SegmentCommand(segment))
378	return false;
379	break;
380	}
381
382	case LC_SYMTAB: {
383	uint32_t symoff, nsyms, stroff, strsize;
384	cursor >> symoff >> nsyms >> stroff >> strsize;
385	if (!cursor) {
386	reporter_->LoadCommandTooShort(index, type);
387	return false;
388	}
389	// How big are the entries in the symbol table?
390	// sizeof(struct nlist_64) : sizeof(struct nlist),
391	// but be paranoid about alignment vs. target architecture.
392	size_t symbol_size = bits_64_ ? `16` : `12`;
393	// How big is the entire symbol array?
394	size_t symbols_size = nsyms * symbol_size;
395	if (symoff > buffer_.Size() \|\| symbols_size > buffer_.Size() - symoff \|\|
396	stroff > buffer_.Size() \|\| strsize > buffer_.Size() - stroff) {
397	reporter_->MisplacedSymbolTable();
398	return false;
399	}
400	ByteBuffer entries(buffer_.start + symoff, symbols_size);
401	ByteBuffer names(buffer_.start + stroff, strsize);
402	if (!handler->SymtabCommand(entries, names))
403	return false;
404	break;
405	}
406
407	default: {
408	if (!handler->UnknownCommand(type, command))
409	return false;
410	break;
411	}
412	}
413
414	list_cursor.set_here(command.end);
415	}
416
417	return true;
418	}
419
420	// A load command handler that looks for a segment of a given name.
421	class Reader::SegmentFinder : public LoadCommandHandler {
422	public:
423	// Create a load command handler that looks for a segment named NAME,
424	// and sets SEGMENT to describe it if found.
425	SegmentFinder(const string& name, Segment* segment)
426	: name_(name), segment_(segment), found_() { }
427
428	// Return true if the traversal found the segment, false otherwise.
429	bool found() const { return found_; }
430
431	bool SegmentCommand(const Segment& segment) {
432	if (segment.name == name_) {
433	*segment_ = segment;
434	found_ = true;
435	return false;
436	}
437	return true;
438	}
439
440	private:
441	// The name of the segment our creator is looking for.
442	const string& name_;
443
444	// Where we should store the segment if found. (WEAK)
445	Segment* segment_;
446
447	// True if we found the segment.
448	bool found_;
449	};
450
451	bool Reader::FindSegment(const string& name, Segment* segment) const {
452	SegmentFinder finder(name, segment);
453	WalkLoadCommands(&finder);
454	return finder.found();
455	}
456
457	bool Reader::WalkSegmentSections(const Segment& segment,
458	SectionHandler* handler) const {
459	size_t word_size = segment.bits_64 ? `8` : `4`;
460	ByteCursor cursor(&segment.section_list, big_endian_);
461
462	for (size_t i = `0`; i < segment.nsects; i++) {
463	Section section;
464	section.bits_64 = segment.bits_64;
465	uint64_t size, offset;
466	uint32_t dummy32;
467	cursor
468	.CString(&section.section_name, `16`)
469	.CString(&section.segment_name, `16`)
470	.Read(word_size, false, &section.address)
471	.Read(word_size, false, &size)
472	.Read(sizeof(uint32_t), false, &offset) // clears high bits of \|offset\|
473	>> section.align
474	>> dummy32
475	>> dummy32
476	>> section.flags
477	>> dummy32
478	>> dummy32;
479	if (section.bits_64)
480	cursor >> dummy32;
481	if (!cursor) {
482	reporter_->SectionsMissing(segment.name);
483	return false;
484	}
485
486	// Even 64-bit Mach-O isn’t a true 64-bit format in that it doesn’t handle
487	// 64-bit file offsets gracefully. Segment load commands do contain 64-bit
488	// file offsets, but sections within do not. Because segments load
489	// contiguously, recompute each section’s file offset on the basis of its
490	// containing segment’s file offset and the difference between the section’s
491	// and segment’s load addresses. If truncation is detected, honor the
492	// recomputed offset.
493	if (segment.bits_64 &&
494	segment.fileoff + segment.filesize >
495	std::numeric_limits<uint32_t>::max()) {
496	const uint64_t section_offset_recomputed =
497	segment.fileoff + section.address - segment.vmaddr;
498	if (offset == static_cast<uint32_t>(section_offset_recomputed)) {
499	offset = section_offset_recomputed;
500	}
501	}
502
503	const uint32_t section_type = section.flags & SECTION_TYPE;
504	if (section_type == S_ZEROFILL \|\| section_type == S_THREAD_LOCAL_ZEROFILL \|\|
505	section_type == S_GB_ZEROFILL) {
506	// Zero-fill sections have a size, but no contents.
507	section.contents.start = section.contents.end = NULL;
508	} else if (segment.contents.start == NULL &&
509	segment.contents.end == NULL) {
510	// Mach-O files in .dSYM bundles have the contents of the loaded
511	// segments removed, and their file offsets and file sizes zeroed
512	// out. However, the sections within those segments still have
513	// non-zero sizes. There's no reason to call MisplacedSectionData in
514	// this case; the caller may just need the section's load
515	// address. But do set the contents' limits to NULL, for safety.
516	section.contents.start = section.contents.end = NULL;
517	} else {
518	if (offset < size_t(segment.contents.start - buffer_.start) \|\|
519	offset > size_t(segment.contents.end - buffer_.start) \|\|
520	size > size_t(segment.contents.end - buffer_.start - offset)) {
521	if (offset > `0`) {
522	reporter_->MisplacedSectionData(section.section_name,
523	section.segment_name);
524	return false;
525	} else {
526	// Mach-O files in .dSYM bundles have the contents of the loaded
527	// segments partially removed. The removed sections will have zero as
528	// their offset. MisplacedSectionData should not be called in this
529	// case.
530	section.contents.start = section.contents.end = NULL;
531	}
532	} else {
533	section.contents.start = buffer_.start + offset;
534	section.contents.end = section.contents.start + size;
535	}
536	}
537	if (!handler->HandleSection(section))
538	return false;
539	}
540	return true;
541	}
542
543	// A SectionHandler that builds a SectionMap for the sections within a
544	// given segment.
545	class Reader::SectionMapper: public SectionHandler {
546	public:
547	// Create a SectionHandler that populates MAP with an entry for
548	// each section it is given.
549	SectionMapper(SectionMap* map) : map_(map) { }
550	bool HandleSection(const Section& section) {
551	(*map_)[section.section_name] = section;
552	return true;
553	}
554	private:
555	// The map under construction. (WEAK)
556	SectionMap* map_;
557	};
558
559	bool Reader::MapSegmentSections(const Segment& segment,
560	SectionMap* section_map) const {
561	section_map->clear();
562	SectionMapper mapper(section_map);
563	return WalkSegmentSections(segment, &mapper);
564	}
565
566	} // namespace mach_o
567	} // namespace google_breakpad
568

Browse the source code of breakpad/common/mac/macho_reader.cc