1//===- llvm/Bitcode/BitcodeReader.h - Bitcode reader ------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This header defines interfaces to read LLVM bitcode files/streams.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_BITCODE_BITCODEREADER_H
15#define LLVM_BITCODE_BITCODEREADER_H
16
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/StringRef.h"
19#include "llvm/Bitcode/BitCodes.h"
20#include "llvm/IR/ModuleSummaryIndex.h"
21#include "llvm/Support/Endian.h"
22#include "llvm/Support/Error.h"
23#include "llvm/Support/ErrorOr.h"
24#include "llvm/Support/MemoryBuffer.h"
25#include <cstdint>
26#include <memory>
27#include <string>
28#include <system_error>
29#include <vector>
30namespace llvm {
31
32class LLVMContext;
33class Module;
34
35 // These functions are for converting Expected/Error values to
36 // ErrorOr/std::error_code for compatibility with legacy clients. FIXME:
37 // Remove these functions once no longer needed by the C and libLTO APIs.
38
39 std::error_code errorToErrorCodeAndEmitErrors(LLVMContext &Ctx, Error Err);
40
41 template <typename T>
42 ErrorOr<T> expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected<T> Val) {
43 if (!Val)
44 return errorToErrorCodeAndEmitErrors(Ctx, Val.takeError());
45 return std::move(*Val);
46 }
47
48 struct BitcodeFileContents;
49
50 /// Basic information extracted from a bitcode module to be used for LTO.
51 struct BitcodeLTOInfo {
52 bool IsThinLTO;
53 bool HasSummary;
54 bool EnableSplitLTOUnit;
55 };
56
57 /// Represents a module in a bitcode file.
58 class BitcodeModule {
59 // This covers the identification (if present) and module blocks.
60 ArrayRef<uint8_t> Buffer;
61 StringRef ModuleIdentifier;
62
63 // The string table used to interpret this module.
64 StringRef Strtab;
65
66 // The bitstream location of the IDENTIFICATION_BLOCK.
67 uint64_t IdentificationBit;
68
69 // The bitstream location of this module's MODULE_BLOCK.
70 uint64_t ModuleBit;
71
72 BitcodeModule(ArrayRef<uint8_t> Buffer, StringRef ModuleIdentifier,
73 uint64_t IdentificationBit, uint64_t ModuleBit)
74 : Buffer(Buffer), ModuleIdentifier(ModuleIdentifier),
75 IdentificationBit(IdentificationBit), ModuleBit(ModuleBit) {}
76
77 // Calls the ctor.
78 friend Expected<BitcodeFileContents>
79 getBitcodeFileContents(MemoryBufferRef Buffer);
80
81 Expected<std::unique_ptr<Module>> getModuleImpl(LLVMContext &Context,
82 bool MaterializeAll,
83 bool ShouldLazyLoadMetadata,
84 bool IsImporting);
85
86 public:
87 StringRef getBuffer() const {
88 return StringRef((const char *)Buffer.begin(), Buffer.size());
89 }
90
91 StringRef getStrtab() const { return Strtab; }
92
93 StringRef getModuleIdentifier() const { return ModuleIdentifier; }
94
95 /// Read the bitcode module and prepare for lazy deserialization of function
96 /// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well.
97 /// If IsImporting is true, this module is being parsed for ThinLTO
98 /// importing into another module.
99 Expected<std::unique_ptr<Module>> getLazyModule(LLVMContext &Context,
100 bool ShouldLazyLoadMetadata,
101 bool IsImporting);
102
103 /// Read the entire bitcode module and return it.
104 Expected<std::unique_ptr<Module>> parseModule(LLVMContext &Context);
105
106 /// Returns information about the module to be used for LTO: whether to
107 /// compile with ThinLTO, and whether it has a summary.
108 Expected<BitcodeLTOInfo> getLTOInfo();
109
110 /// Parse the specified bitcode buffer, returning the module summary index.
111 Expected<std::unique_ptr<ModuleSummaryIndex>> getSummary();
112
113 /// Parse the specified bitcode buffer and merge its module summary index
114 /// into CombinedIndex.
115 Error readSummary(ModuleSummaryIndex &CombinedIndex, StringRef ModulePath,
116 uint64_t ModuleId);
117 };
118
119 struct BitcodeFileContents {
120 std::vector<BitcodeModule> Mods;
121 StringRef Symtab, StrtabForSymtab;
122 };
123
124 /// Returns the contents of a bitcode file. This includes the raw contents of
125 /// the symbol table embedded in the bitcode file. Clients which require a
126 /// symbol table should prefer to use irsymtab::read instead of this function
127 /// because it creates a reader for the irsymtab and handles upgrading bitcode
128 /// files without a symbol table or with an old symbol table.
129 Expected<BitcodeFileContents> getBitcodeFileContents(MemoryBufferRef Buffer);
130
131 /// Returns a list of modules in the specified bitcode buffer.
132 Expected<std::vector<BitcodeModule>>
133 getBitcodeModuleList(MemoryBufferRef Buffer);
134
135 /// Read the header of the specified bitcode buffer and prepare for lazy
136 /// deserialization of function bodies. If ShouldLazyLoadMetadata is true,
137 /// lazily load metadata as well. If IsImporting is true, this module is
138 /// being parsed for ThinLTO importing into another module.
139 Expected<std::unique_ptr<Module>>
140 getLazyBitcodeModule(MemoryBufferRef Buffer, LLVMContext &Context,
141 bool ShouldLazyLoadMetadata = false,
142 bool IsImporting = false);
143
144 /// Like getLazyBitcodeModule, except that the module takes ownership of
145 /// the memory buffer if successful. If successful, this moves Buffer. On
146 /// error, this *does not* move Buffer. If IsImporting is true, this module is
147 /// being parsed for ThinLTO importing into another module.
148 Expected<std::unique_ptr<Module>> getOwningLazyBitcodeModule(
149 std::unique_ptr<MemoryBuffer> &&Buffer, LLVMContext &Context,
150 bool ShouldLazyLoadMetadata = false, bool IsImporting = false);
151
152 /// Read the header of the specified bitcode buffer and extract just the
153 /// triple information. If successful, this returns a string. On error, this
154 /// returns "".
155 Expected<std::string> getBitcodeTargetTriple(MemoryBufferRef Buffer);
156
157 /// Return true if \p Buffer contains a bitcode file with ObjC code (category
158 /// or class) in it.
159 Expected<bool> isBitcodeContainingObjCCategory(MemoryBufferRef Buffer);
160
161 /// Read the header of the specified bitcode buffer and extract just the
162 /// producer string information. If successful, this returns a string. On
163 /// error, this returns "".
164 Expected<std::string> getBitcodeProducerString(MemoryBufferRef Buffer);
165
166 /// Read the specified bitcode file, returning the module.
167 Expected<std::unique_ptr<Module>> parseBitcodeFile(MemoryBufferRef Buffer,
168 LLVMContext &Context);
169
170 /// Returns LTO information for the specified bitcode file.
171 Expected<BitcodeLTOInfo> getBitcodeLTOInfo(MemoryBufferRef Buffer);
172
173 /// Parse the specified bitcode buffer, returning the module summary index.
174 Expected<std::unique_ptr<ModuleSummaryIndex>>
175 getModuleSummaryIndex(MemoryBufferRef Buffer);
176
177 /// Parse the specified bitcode buffer and merge the index into CombinedIndex.
178 Error readModuleSummaryIndex(MemoryBufferRef Buffer,
179 ModuleSummaryIndex &CombinedIndex,
180 uint64_t ModuleId);
181
182 /// Parse the module summary index out of an IR file and return the module
183 /// summary index object if found, or an empty summary if not. If Path refers
184 /// to an empty file and IgnoreEmptyThinLTOIndexFile is true, then
185 /// this function will return nullptr.
186 Expected<std::unique_ptr<ModuleSummaryIndex>>
187 getModuleSummaryIndexForFile(StringRef Path,
188 bool IgnoreEmptyThinLTOIndexFile = false);
189
190 /// isBitcodeWrapper - Return true if the given bytes are the magic bytes
191 /// for an LLVM IR bitcode wrapper.
192 inline bool isBitcodeWrapper(const unsigned char *BufPtr,
193 const unsigned char *BufEnd) {
194 // See if you can find the hidden message in the magic bytes :-).
195 // (Hint: it's a little-endian encoding.)
196 return BufPtr != BufEnd &&
197 BufPtr[0] == 0xDE &&
198 BufPtr[1] == 0xC0 &&
199 BufPtr[2] == 0x17 &&
200 BufPtr[3] == 0x0B;
201 }
202
203 /// isRawBitcode - Return true if the given bytes are the magic bytes for
204 /// raw LLVM IR bitcode (without a wrapper).
205 inline bool isRawBitcode(const unsigned char *BufPtr,
206 const unsigned char *BufEnd) {
207 // These bytes sort of have a hidden message, but it's not in
208 // little-endian this time, and it's a little redundant.
209 return BufPtr != BufEnd &&
210 BufPtr[0] == 'B' &&
211 BufPtr[1] == 'C' &&
212 BufPtr[2] == 0xc0 &&
213 BufPtr[3] == 0xde;
214 }
215
216 /// isBitcode - Return true if the given bytes are the magic bytes for
217 /// LLVM IR bitcode, either with or without a wrapper.
218 inline bool isBitcode(const unsigned char *BufPtr,
219 const unsigned char *BufEnd) {
220 return isBitcodeWrapper(BufPtr, BufEnd) ||
221 isRawBitcode(BufPtr, BufEnd);
222 }
223
224 /// SkipBitcodeWrapperHeader - Some systems wrap bc files with a special
225 /// header for padding or other reasons. The format of this header is:
226 ///
227 /// struct bc_header {
228 /// uint32_t Magic; // 0x0B17C0DE
229 /// uint32_t Version; // Version, currently always 0.
230 /// uint32_t BitcodeOffset; // Offset to traditional bitcode file.
231 /// uint32_t BitcodeSize; // Size of traditional bitcode file.
232 /// ... potentially other gunk ...
233 /// };
234 ///
235 /// This function is called when we find a file with a matching magic number.
236 /// In this case, skip down to the subsection of the file that is actually a
237 /// BC file.
238 /// If 'VerifyBufferSize' is true, check that the buffer is large enough to
239 /// contain the whole bitcode file.
240 inline bool SkipBitcodeWrapperHeader(const unsigned char *&BufPtr,
241 const unsigned char *&BufEnd,
242 bool VerifyBufferSize) {
243 // Must contain the offset and size field!
244 if (unsigned(BufEnd - BufPtr) < BWH_SizeField + 4)
245 return true;
246
247 unsigned Offset = support::endian::read32le(&BufPtr[BWH_OffsetField]);
248 unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]);
249 uint64_t BitcodeOffsetEnd = (uint64_t)Offset + (uint64_t)Size;
250
251 // Verify that Offset+Size fits in the file.
252 if (VerifyBufferSize && BitcodeOffsetEnd > uint64_t(BufEnd-BufPtr))
253 return true;
254 BufPtr += Offset;
255 BufEnd = BufPtr+Size;
256 return false;
257 }
258
259 const std::error_category &BitcodeErrorCategory();
260 enum class BitcodeError { CorruptedBitcode = 1 };
261 inline std::error_code make_error_code(BitcodeError E) {
262 return std::error_code(static_cast<int>(E), BitcodeErrorCategory());
263 }
264
265} // end namespace llvm
266
267namespace std {
268
269template <> struct is_error_code_enum<llvm::BitcodeError> : std::true_type {};
270
271} // end namespace std
272
273#endif // LLVM_BITCODE_BITCODEREADER_H
274