1// Copyright (c) 2006, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8// * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14// * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29//
30// file_id.cc: Return a unique identifier for a file
31//
32// See file_id.h for documentation
33//
34
35#include "common/linux/file_id.h"
36
37#include <arpa/inet.h>
38#include <assert.h>
39#include <string.h>
40
41#include <algorithm>
42#include <string>
43
44#include "common/linux/elf_gnu_compat.h"
45#include "common/linux/elfutils.h"
46#include "common/linux/linux_libc_support.h"
47#include "common/linux/memory_mapped_file.h"
48#include "common/using_std_string.h"
49#include "third_party/lss/linux_syscall_support.h"
50
51namespace google_breakpad {
52namespace elf {
53
54// Used in a few places for backwards-compatibility.
55const size_t kMDGUIDSize = sizeof(MDGUID);
56
57FileID::FileID(const char* path) : path_(path) {}
58
59// ELF note name and desc are 32-bits word padded.
60#define NOTE_PADDING(a) ((a + 3) & ~3)
61
62// These functions are also used inside the crashed process, so be safe
63// and use the syscall/libc wrappers instead of direct syscalls or libc.
64
65static bool ElfClassBuildIDNoteIdentifier(const void* section, size_t length,
66 wasteful_vector<uint8_t>& identifier) {
67 static_assert(sizeof(ElfClass32::Nhdr) == sizeof(ElfClass64::Nhdr),
68 "Elf32_Nhdr and Elf64_Nhdr should be the same");
69 typedef typename ElfClass32::Nhdr Nhdr;
70
71 const void* section_end = reinterpret_cast<const char*>(section) + length;
72 const Nhdr* note_header = reinterpret_cast<const Nhdr*>(section);
73 while (reinterpret_cast<const void*>(note_header) < section_end) {
74 if (note_header->n_type == NT_GNU_BUILD_ID)
75 break;
76 note_header = reinterpret_cast<const Nhdr*>(
77 reinterpret_cast<const char*>(note_header) + sizeof(Nhdr) +
78 NOTE_PADDING(note_header->n_namesz) +
79 NOTE_PADDING(note_header->n_descsz));
80 }
81 if (reinterpret_cast<const void*>(note_header) >= section_end ||
82 note_header->n_descsz == 0) {
83 return false;
84 }
85
86 const uint8_t* build_id = reinterpret_cast<const uint8_t*>(note_header) +
87 sizeof(Nhdr) + NOTE_PADDING(note_header->n_namesz);
88 identifier.insert(identifier.end(),
89 build_id,
90 build_id + note_header->n_descsz);
91
92 return true;
93}
94
95// Attempt to locate a .note.gnu.build-id section in an ELF binary
96// and copy it into |identifier|.
97static bool FindElfBuildIDNote(const void* elf_mapped_base,
98 wasteful_vector<uint8_t>& identifier) {
99 PageAllocator allocator;
100 // lld normally creates 2 PT_NOTEs, gold normally creates 1.
101 auto_wasteful_vector<ElfSegment, 2> segs(&allocator);
102 if (FindElfSegments(elf_mapped_base, PT_NOTE, &segs)) {
103 for (ElfSegment& seg : segs) {
104 if (ElfClassBuildIDNoteIdentifier(seg.start, seg.size, identifier)) {
105 return true;
106 }
107 }
108 }
109
110 void* note_section;
111 size_t note_size;
112 if (FindElfSection(elf_mapped_base, ".note.gnu.build-id", SHT_NOTE,
113 (const void**)&note_section, &note_size)) {
114 return ElfClassBuildIDNoteIdentifier(note_section, note_size, identifier);
115 }
116
117 return false;
118}
119
120// Attempt to locate the .text section of an ELF binary and generate
121// a simple hash by XORing the first page worth of bytes into |identifier|.
122static bool HashElfTextSection(const void* elf_mapped_base,
123 wasteful_vector<uint8_t>& identifier) {
124 identifier.resize(kMDGUIDSize);
125
126 void* text_section;
127 size_t text_size;
128 if (!FindElfSection(elf_mapped_base, ".text", SHT_PROGBITS,
129 (const void**)&text_section, &text_size) ||
130 text_size == 0) {
131 return false;
132 }
133
134 // Only provide |kMDGUIDSize| bytes to keep identifiers produced by this
135 // function backwards-compatible.
136 my_memset(&identifier[0], 0, kMDGUIDSize);
137 const uint8_t* ptr = reinterpret_cast<const uint8_t*>(text_section);
138 const uint8_t* ptr_end = ptr + std::min(text_size, static_cast<size_t>(4096));
139 while (ptr < ptr_end) {
140 for (unsigned i = 0; i < kMDGUIDSize; i++)
141 identifier[i] ^= ptr[i];
142 ptr += kMDGUIDSize;
143 }
144 return true;
145}
146
147// static
148bool FileID::ElfFileIdentifierFromMappedFile(const void* base,
149 wasteful_vector<uint8_t>& identifier) {
150 // Look for a build id note first.
151 if (FindElfBuildIDNote(base, identifier))
152 return true;
153
154 // Fall back on hashing the first page of the text section.
155 return HashElfTextSection(base, identifier);
156}
157
158bool FileID::ElfFileIdentifier(wasteful_vector<uint8_t>& identifier) {
159 MemoryMappedFile mapped_file(path_.c_str(), 0);
160 if (!mapped_file.data()) // Should probably check if size >= ElfW(Ehdr)?
161 return false;
162
163 return ElfFileIdentifierFromMappedFile(mapped_file.data(), identifier);
164}
165
166// These three functions are not ever called in an unsafe context, so it's OK
167// to allocate memory and use libc.
168static string bytes_to_hex_string(const uint8_t* bytes, size_t count) {
169 string result;
170 for (unsigned int idx = 0; idx < count; ++idx) {
171 char buf[3];
172 snprintf(buf, sizeof(buf), "%02X", bytes[idx]);
173 result.append(buf);
174 }
175 return result;
176}
177
178// static
179string FileID::ConvertIdentifierToUUIDString(
180 const wasteful_vector<uint8_t>& identifier) {
181 uint8_t identifier_swapped[kMDGUIDSize] = { 0 };
182
183 // Endian-ness swap to match dump processor expectation.
184 memcpy(identifier_swapped, &identifier[0],
185 std::min(kMDGUIDSize, identifier.size()));
186 uint32_t* data1 = reinterpret_cast<uint32_t*>(identifier_swapped);
187 *data1 = htonl(*data1);
188 uint16_t* data2 = reinterpret_cast<uint16_t*>(identifier_swapped + 4);
189 *data2 = htons(*data2);
190 uint16_t* data3 = reinterpret_cast<uint16_t*>(identifier_swapped + 6);
191 *data3 = htons(*data3);
192
193 return bytes_to_hex_string(identifier_swapped, kMDGUIDSize);
194}
195
196// static
197string FileID::ConvertIdentifierToString(
198 const wasteful_vector<uint8_t>& identifier) {
199 return bytes_to_hex_string(&identifier[0], identifier.size());
200}
201
202} // elf
203} // namespace google_breakpad
204