1 | //===- llvm/Bitcode/BitcodeReader.h - Bitcode reader ------------*- C++ -*-===// |
2 | // |
3 | // The LLVM Compiler Infrastructure |
4 | // |
5 | // This file is distributed under the University of Illinois Open Source |
6 | // License. See LICENSE.TXT for details. |
7 | // |
8 | //===----------------------------------------------------------------------===// |
9 | // |
10 | // This header defines interfaces to read LLVM bitcode files/streams. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_BITCODE_BITCODEREADER_H |
15 | #define LLVM_BITCODE_BITCODEREADER_H |
16 | |
17 | #include "llvm/ADT/ArrayRef.h" |
18 | #include "llvm/ADT/StringRef.h" |
19 | #include "llvm/Bitcode/BitCodes.h" |
20 | #include "llvm/IR/ModuleSummaryIndex.h" |
21 | #include "llvm/Support/Endian.h" |
22 | #include "llvm/Support/Error.h" |
23 | #include "llvm/Support/ErrorOr.h" |
24 | #include "llvm/Support/MemoryBuffer.h" |
25 | #include <cstdint> |
26 | #include <memory> |
27 | #include <string> |
28 | #include <system_error> |
29 | #include <vector> |
30 | namespace llvm { |
31 | |
32 | class LLVMContext; |
33 | class Module; |
34 | |
35 | // These functions are for converting Expected/Error values to |
36 | // ErrorOr/std::error_code for compatibility with legacy clients. FIXME: |
37 | // Remove these functions once no longer needed by the C and libLTO APIs. |
38 | |
39 | std::error_code errorToErrorCodeAndEmitErrors(LLVMContext &Ctx, Error Err); |
40 | |
41 | template <typename T> |
42 | ErrorOr<T> expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected<T> Val) { |
43 | if (!Val) |
44 | return errorToErrorCodeAndEmitErrors(Ctx, Val.takeError()); |
45 | return std::move(*Val); |
46 | } |
47 | |
48 | struct BitcodeFileContents; |
49 | |
50 | /// Basic information extracted from a bitcode module to be used for LTO. |
51 | struct BitcodeLTOInfo { |
52 | bool IsThinLTO; |
53 | bool HasSummary; |
54 | bool EnableSplitLTOUnit; |
55 | }; |
56 | |
57 | /// Represents a module in a bitcode file. |
58 | class BitcodeModule { |
59 | // This covers the identification (if present) and module blocks. |
60 | ArrayRef<uint8_t> Buffer; |
61 | StringRef ModuleIdentifier; |
62 | |
63 | // The string table used to interpret this module. |
64 | StringRef Strtab; |
65 | |
66 | // The bitstream location of the IDENTIFICATION_BLOCK. |
67 | uint64_t IdentificationBit; |
68 | |
69 | // The bitstream location of this module's MODULE_BLOCK. |
70 | uint64_t ModuleBit; |
71 | |
72 | BitcodeModule(ArrayRef<uint8_t> Buffer, StringRef ModuleIdentifier, |
73 | uint64_t IdentificationBit, uint64_t ModuleBit) |
74 | : Buffer(Buffer), ModuleIdentifier(ModuleIdentifier), |
75 | IdentificationBit(IdentificationBit), ModuleBit(ModuleBit) {} |
76 | |
77 | // Calls the ctor. |
78 | friend Expected<BitcodeFileContents> |
79 | getBitcodeFileContents(MemoryBufferRef Buffer); |
80 | |
81 | Expected<std::unique_ptr<Module>> getModuleImpl(LLVMContext &Context, |
82 | bool MaterializeAll, |
83 | bool ShouldLazyLoadMetadata, |
84 | bool IsImporting); |
85 | |
86 | public: |
87 | StringRef getBuffer() const { |
88 | return StringRef((const char *)Buffer.begin(), Buffer.size()); |
89 | } |
90 | |
91 | StringRef getStrtab() const { return Strtab; } |
92 | |
93 | StringRef getModuleIdentifier() const { return ModuleIdentifier; } |
94 | |
95 | /// Read the bitcode module and prepare for lazy deserialization of function |
96 | /// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well. |
97 | /// If IsImporting is true, this module is being parsed for ThinLTO |
98 | /// importing into another module. |
99 | Expected<std::unique_ptr<Module>> getLazyModule(LLVMContext &Context, |
100 | bool ShouldLazyLoadMetadata, |
101 | bool IsImporting); |
102 | |
103 | /// Read the entire bitcode module and return it. |
104 | Expected<std::unique_ptr<Module>> parseModule(LLVMContext &Context); |
105 | |
106 | /// Returns information about the module to be used for LTO: whether to |
107 | /// compile with ThinLTO, and whether it has a summary. |
108 | Expected<BitcodeLTOInfo> getLTOInfo(); |
109 | |
110 | /// Parse the specified bitcode buffer, returning the module summary index. |
111 | Expected<std::unique_ptr<ModuleSummaryIndex>> getSummary(); |
112 | |
113 | /// Parse the specified bitcode buffer and merge its module summary index |
114 | /// into CombinedIndex. |
115 | Error readSummary(ModuleSummaryIndex &CombinedIndex, StringRef ModulePath, |
116 | uint64_t ModuleId); |
117 | }; |
118 | |
119 | struct BitcodeFileContents { |
120 | std::vector<BitcodeModule> Mods; |
121 | StringRef Symtab, StrtabForSymtab; |
122 | }; |
123 | |
124 | /// Returns the contents of a bitcode file. This includes the raw contents of |
125 | /// the symbol table embedded in the bitcode file. Clients which require a |
126 | /// symbol table should prefer to use irsymtab::read instead of this function |
127 | /// because it creates a reader for the irsymtab and handles upgrading bitcode |
128 | /// files without a symbol table or with an old symbol table. |
129 | Expected<BitcodeFileContents> getBitcodeFileContents(MemoryBufferRef Buffer); |
130 | |
131 | /// Returns a list of modules in the specified bitcode buffer. |
132 | Expected<std::vector<BitcodeModule>> |
133 | getBitcodeModuleList(MemoryBufferRef Buffer); |
134 | |
135 | /// Read the header of the specified bitcode buffer and prepare for lazy |
136 | /// deserialization of function bodies. If ShouldLazyLoadMetadata is true, |
137 | /// lazily load metadata as well. If IsImporting is true, this module is |
138 | /// being parsed for ThinLTO importing into another module. |
139 | Expected<std::unique_ptr<Module>> |
140 | getLazyBitcodeModule(MemoryBufferRef Buffer, LLVMContext &Context, |
141 | bool ShouldLazyLoadMetadata = false, |
142 | bool IsImporting = false); |
143 | |
144 | /// Like getLazyBitcodeModule, except that the module takes ownership of |
145 | /// the memory buffer if successful. If successful, this moves Buffer. On |
146 | /// error, this *does not* move Buffer. If IsImporting is true, this module is |
147 | /// being parsed for ThinLTO importing into another module. |
148 | Expected<std::unique_ptr<Module>> getOwningLazyBitcodeModule( |
149 | std::unique_ptr<MemoryBuffer> &&Buffer, LLVMContext &Context, |
150 | bool ShouldLazyLoadMetadata = false, bool IsImporting = false); |
151 | |
152 | /// Read the header of the specified bitcode buffer and extract just the |
153 | /// triple information. If successful, this returns a string. On error, this |
154 | /// returns "". |
155 | Expected<std::string> getBitcodeTargetTriple(MemoryBufferRef Buffer); |
156 | |
157 | /// Return true if \p Buffer contains a bitcode file with ObjC code (category |
158 | /// or class) in it. |
159 | Expected<bool> isBitcodeContainingObjCCategory(MemoryBufferRef Buffer); |
160 | |
161 | /// Read the header of the specified bitcode buffer and extract just the |
162 | /// producer string information. If successful, this returns a string. On |
163 | /// error, this returns "". |
164 | Expected<std::string> getBitcodeProducerString(MemoryBufferRef Buffer); |
165 | |
166 | /// Read the specified bitcode file, returning the module. |
167 | Expected<std::unique_ptr<Module>> parseBitcodeFile(MemoryBufferRef Buffer, |
168 | LLVMContext &Context); |
169 | |
170 | /// Returns LTO information for the specified bitcode file. |
171 | Expected<BitcodeLTOInfo> getBitcodeLTOInfo(MemoryBufferRef Buffer); |
172 | |
173 | /// Parse the specified bitcode buffer, returning the module summary index. |
174 | Expected<std::unique_ptr<ModuleSummaryIndex>> |
175 | getModuleSummaryIndex(MemoryBufferRef Buffer); |
176 | |
177 | /// Parse the specified bitcode buffer and merge the index into CombinedIndex. |
178 | Error readModuleSummaryIndex(MemoryBufferRef Buffer, |
179 | ModuleSummaryIndex &CombinedIndex, |
180 | uint64_t ModuleId); |
181 | |
182 | /// Parse the module summary index out of an IR file and return the module |
183 | /// summary index object if found, or an empty summary if not. If Path refers |
184 | /// to an empty file and IgnoreEmptyThinLTOIndexFile is true, then |
185 | /// this function will return nullptr. |
186 | Expected<std::unique_ptr<ModuleSummaryIndex>> |
187 | getModuleSummaryIndexForFile(StringRef Path, |
188 | bool IgnoreEmptyThinLTOIndexFile = false); |
189 | |
190 | /// isBitcodeWrapper - Return true if the given bytes are the magic bytes |
191 | /// for an LLVM IR bitcode wrapper. |
192 | inline bool isBitcodeWrapper(const unsigned char *BufPtr, |
193 | const unsigned char *BufEnd) { |
194 | // See if you can find the hidden message in the magic bytes :-). |
195 | // (Hint: it's a little-endian encoding.) |
196 | return BufPtr != BufEnd && |
197 | BufPtr[0] == 0xDE && |
198 | BufPtr[1] == 0xC0 && |
199 | BufPtr[2] == 0x17 && |
200 | BufPtr[3] == 0x0B; |
201 | } |
202 | |
203 | /// isRawBitcode - Return true if the given bytes are the magic bytes for |
204 | /// raw LLVM IR bitcode (without a wrapper). |
205 | inline bool isRawBitcode(const unsigned char *BufPtr, |
206 | const unsigned char *BufEnd) { |
207 | // These bytes sort of have a hidden message, but it's not in |
208 | // little-endian this time, and it's a little redundant. |
209 | return BufPtr != BufEnd && |
210 | BufPtr[0] == 'B' && |
211 | BufPtr[1] == 'C' && |
212 | BufPtr[2] == 0xc0 && |
213 | BufPtr[3] == 0xde; |
214 | } |
215 | |
216 | /// isBitcode - Return true if the given bytes are the magic bytes for |
217 | /// LLVM IR bitcode, either with or without a wrapper. |
218 | inline bool isBitcode(const unsigned char *BufPtr, |
219 | const unsigned char *BufEnd) { |
220 | return isBitcodeWrapper(BufPtr, BufEnd) || |
221 | isRawBitcode(BufPtr, BufEnd); |
222 | } |
223 | |
224 | /// SkipBitcodeWrapperHeader - Some systems wrap bc files with a special |
225 | /// header for padding or other reasons. The format of this header is: |
226 | /// |
227 | /// struct bc_header { |
228 | /// uint32_t Magic; // 0x0B17C0DE |
229 | /// uint32_t Version; // Version, currently always 0. |
230 | /// uint32_t BitcodeOffset; // Offset to traditional bitcode file. |
231 | /// uint32_t BitcodeSize; // Size of traditional bitcode file. |
232 | /// ... potentially other gunk ... |
233 | /// }; |
234 | /// |
235 | /// This function is called when we find a file with a matching magic number. |
236 | /// In this case, skip down to the subsection of the file that is actually a |
237 | /// BC file. |
238 | /// If 'VerifyBufferSize' is true, check that the buffer is large enough to |
239 | /// contain the whole bitcode file. |
240 | inline bool (const unsigned char *&BufPtr, |
241 | const unsigned char *&BufEnd, |
242 | bool VerifyBufferSize) { |
243 | // Must contain the offset and size field! |
244 | if (unsigned(BufEnd - BufPtr) < BWH_SizeField + 4) |
245 | return true; |
246 | |
247 | unsigned Offset = support::endian::read32le(&BufPtr[BWH_OffsetField]); |
248 | unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]); |
249 | uint64_t BitcodeOffsetEnd = (uint64_t)Offset + (uint64_t)Size; |
250 | |
251 | // Verify that Offset+Size fits in the file. |
252 | if (VerifyBufferSize && BitcodeOffsetEnd > uint64_t(BufEnd-BufPtr)) |
253 | return true; |
254 | BufPtr += Offset; |
255 | BufEnd = BufPtr+Size; |
256 | return false; |
257 | } |
258 | |
259 | const std::error_category &BitcodeErrorCategory(); |
260 | enum class BitcodeError { CorruptedBitcode = 1 }; |
261 | inline std::error_code make_error_code(BitcodeError E) { |
262 | return std::error_code(static_cast<int>(E), BitcodeErrorCategory()); |
263 | } |
264 | |
265 | } // end namespace llvm |
266 | |
267 | namespace std { |
268 | |
269 | template <> struct is_error_code_enum<llvm::BitcodeError> : std::true_type {}; |
270 | |
271 | } // end namespace std |
272 | |
273 | #endif // LLVM_BITCODE_BITCODEREADER_H |
274 | |