1#include "dyn_loader.h"
2
3#include <asm/prctl.h>
4#include <elf.h>
5#include <fcntl.h>
6#include <sys/auxv.h>
7#include <sys/mman.h>
8#include <sys/prctl.h>
9#include <sys/stat.h>
10
11#include <cstring>
12#include <filesystem>
13#include <fstream>
14#include <map>
15#include <memory>
16#include <optional>
17
18#include "libc_mapping.h"
19#include "utils.h"
20
21extern thread_local unsigned long sloader_dummy_to_secure_tls_space[];
22extern unsigned long sloader_tls_offset;
23void write_sloader_dummy_to_secure_tls_space();
24
25namespace {
26
27void read_ldsoconf_dfs(std::vector<std::filesystem::path>& res, const std::string& filename) {
28 std::ifstream f;
29 f.open(filename);
30
31 // TODO: Workaround not to load i386 libs.
32 if (!f || filename.find("i386") != std::string::npos || filename.find("lib32") != std::string::npos) {
33 return;
34 }
35 std::string head;
36 while (f >> head) {
37 if (head.substr(0, 1) == "#") {
38 std::string comment;
39 std::getline(f, comment);
40 } else if (head == "include") {
41 std::string descendants;
42 f >> descendants;
43
44 glob_t globbuf;
45 glob(descendants.c_str(), 0, NULL, &globbuf);
46 for (size_t i = 0; i < globbuf.gl_pathc; i++) {
47 read_ldsoconf_dfs(res, globbuf.gl_pathv[i]);
48 }
49 globfree(&globbuf);
50 } else {
51 res.push_back(head);
52 }
53 }
54}
55
56} // namespace
57
58std::vector<std::filesystem::path> read_ldsoconf() {
59 std::vector<std::filesystem::path> res;
60 read_ldsoconf_dfs(res, "/etc/ld.so.conf");
61
62 return res;
63}
64
65ELFBinary::ELFBinary(const std::filesystem::path path) : path_(path) {
66 int fd = open(path_.c_str(), O_RDONLY);
67 LOG(INFO) << LOG_KEY(path_) << LOG_KEY(fd);
68 CHECK(fd >= 0);
69
70 size_t size = lseek(fd, 0, SEEK_END);
71 CHECK_GT(size, 8UL + 16UL);
72
73 size_t mapped_size = (size + 0xfff) & ~0xfff;
74
75 file_base_addr_ = (char*)mmap(NULL, mapped_size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE, fd, 0);
76 CHECK(file_base_addr_ != MAP_FAILED);
77
78 ehdr_ = *reinterpret_cast<Elf64_Ehdr*>(file_base_addr_);
79 for (uint16_t i = 0; i < ehdr_.e_phnum; i++) {
80 Elf64_Phdr ph = *reinterpret_cast<Elf64_Phdr*>(file_base_addr_ + ehdr_.e_phoff + i * ehdr_.e_phentsize);
81 file_phdrs_.emplace_back(ph);
82
83 if (ph.p_type == PT_DYNAMIC) {
84 LOG(INFO) << "Found PT_DYNAMIC";
85 file_dynamic_ = ph;
86 } else if (ph.p_type == PT_TLS) {
87 LOG(INFO) << "Found PT_TLS";
88 has_tls_ = true;
89 file_tls_ = ph;
90 }
91 }
92}
93
94Elf64_Addr ELFBinary::Load(Elf64_Addr base_addr_arg, std::shared_ptr<std::ofstream> map_file) {
95 LOG(INFO) << LOG_BITS(base_addr_arg);
96 base_addr_ = (ehdr().e_type == ET_DYN) ? base_addr_arg : 0;
97 end_addr_ = base_addr_;
98
99 LOG(INFO) << "Load start " << path_;
100
101 for (auto ph : file_phdrs_) {
102 if (ph.p_type != PT_LOAD) {
103 continue;
104 }
105 LOG(INFO) << LOG_BITS(reinterpret_cast<void*>(ph.p_vaddr)) << LOG_BITS(ph.p_memsz);
106 void* mmap_start = reinterpret_cast<void*>(((ph.p_vaddr + base_addr()) & (~(0xfff))));
107 void* mmap_end = reinterpret_cast<void*>((((ph.p_vaddr + ph.p_memsz + base_addr()) + 0xfff) & (~(0xfff))));
108 end_addr_ = reinterpret_cast<Elf64_Addr>(mmap_end);
109 size_t mmap_size = reinterpret_cast<size_t>(mmap_end) - reinterpret_cast<size_t>(mmap_start);
110 int flags = 0;
111 std::string flags_str = "";
112 if (ph.p_flags & PF_R) {
113 flags |= PROT_READ;
114 flags_str += "r";
115 } else {
116 flags_str += "_";
117 }
118 if ((ph.p_flags & PF_W) || true) { // TODO: We need to write contents after mmap.
119 flags |= PROT_WRITE;
120 flags_str += "w";
121 } else {
122 flags_str += "_";
123 }
124 if (ph.p_flags & PF_X) {
125 flags |= PROT_EXEC;
126 flags_str += "x";
127 } else {
128 flags_str += "_";
129 }
130
131 char* p = reinterpret_cast<char*>(mmap(mmap_start, mmap_size, flags, MAP_SHARED | MAP_ANONYMOUS, -1, 0));
132 LOG(INFO) << "mmap: " << LOG_KEY(path_) << LOG_BITS(p) << LOG_BITS(mmap_start) << LOG_BITS(ph.p_vaddr)
133 << "errno = " << std::strerror(errno);
134 CHECK_EQ(mmap_start, reinterpret_cast<void*>(p));
135 CHECK_LE(reinterpret_cast<Elf64_Addr>(mmap_start), ph.p_vaddr + base_addr());
136 CHECK_LE(ph.p_vaddr + base_addr() + ph.p_memsz, reinterpret_cast<Elf64_Addr>(mmap_end));
137 LOG(INFO) << LOG_BITS(mmap_start) << LOG_BITS(reinterpret_cast<size_t>(file_base_addr_ + ph.p_offset)) << LOG_BITS(ph.p_filesz);
138 *map_file << path().string() << " " << HexString(ph.p_offset, 16) << "-" << HexString(ph.p_offset + ph.p_filesz, 16) << " "
139 << flags_str << " " << HexString(ph.p_filesz, 16) << " => " << HexString(mmap_start, 16) << "-" << HexString(mmap_end, 16)
140 << std::endl;
141 memcpy(reinterpret_cast<void*>(ph.p_vaddr + base_addr()), file_base_addr_ + ph.p_offset, ph.p_filesz);
142 }
143 LOG(INFO) << "Load end";
144
145 LOG(INFO) << "ParseDynamic start";
146 ParseDynamic();
147 LOG(INFO) << "ParseDynamic end";
148
149 return (end_addr() + (0x400000 - 1)) / 0x400000 * 0x400000;
150}
151
152void ELFBinary::ParseDynamic() {
153 // Must mmap PT_LOADs before call ParseDynamic.
154 CHECK(base_addr_ != 0UL || ehdr_.e_type == ET_EXEC);
155
156 const size_t dyn_size = sizeof(Elf64_Dyn);
157 CHECK_EQ(file_dynamic_.p_filesz % dyn_size, 0U);
158
159 // Search DT_STRTAB at first.
160 for (size_t i = 0; i < file_dynamic_.p_filesz / dyn_size; ++i) {
161 Elf64_Dyn* dyn = reinterpret_cast<Elf64_Dyn*>(base_addr_ + file_dynamic_.p_vaddr + dyn_size * i);
162 LOG(INFO) << LOG_KEY(dyn);
163 if (dyn->d_tag == DT_STRTAB) {
164 LOG(INFO) << "Found DT_STRTAB";
165 strtab_ = reinterpret_cast<char*>(dyn->d_un.d_ptr + base_addr_);
166 } else if (dyn->d_tag == DT_STRSZ) {
167 LOG(INFO) << "Found DT_STRSZ";
168 strsz_ = dyn->d_un.d_val;
169 }
170 }
171
172 CHECK(strtab_ != nullptr || ehdr_.e_type == ET_EXEC);
173
174 for (size_t i = 0; i < file_dynamic_.p_filesz / dyn_size; ++i) {
175 Elf64_Dyn* dyn = reinterpret_cast<Elf64_Dyn*>(base_addr_ + file_dynamic_.p_vaddr + dyn_size * i);
176 if (dyn->d_tag == DT_NEEDED) {
177 std::string needed = strtab_ + dyn->d_un.d_val;
178 neededs_.emplace_back(needed);
179 LOG(INFO) << LOG_KEY(needed);
180 } else if (dyn->d_tag == DT_RUNPATH) {
181 // TODO: Handle relative path
182 runpath_ = strtab_ + dyn->d_un.d_val;
183 } else if (dyn->d_tag == DT_RPATH) {
184 // TODO: Handle relative path
185 rpath_ = strtab_ + dyn->d_un.d_val;
186 } else if (dyn->d_tag == DT_RELA) {
187 LOG(INFO) << "Found DT_RELA";
188 rela_ = reinterpret_cast<Elf64_Rela*>(base_addr_ + dyn->d_un.d_val);
189 } else if (dyn->d_tag == DT_RELASZ) {
190 relasz_ = dyn->d_un.d_val;
191 } else if (dyn->d_tag == DT_RELAENT) {
192 relaent_ = dyn->d_un.d_val;
193 } else if (dyn->d_tag == DT_RELACOUNT) {
194 relacount_ = dyn->d_un.d_val;
195 } else if (dyn->d_tag == DT_JMPREL) {
196 jmprel_ = reinterpret_cast<Elf64_Rela*>(base_addr_ + dyn->d_un.d_val);
197 } else if (dyn->d_tag == DT_PLTRELSZ) {
198 pltrelsz_ = dyn->d_un.d_val;
199 } else if (dyn->d_tag == DT_PLTREL) {
200 pltrel_ = dyn->d_un.d_val;
201 CHECK(pltrel_ == DT_RELA || pltrel_ == DT_REL);
202 pltrelent_ = (pltrel_ == DT_RELA) ? sizeof(Elf64_Rela) : sizeof(Elf64_Rel);
203 } else if (dyn->d_tag == DT_SYMTAB) {
204 symtab_ = reinterpret_cast<Elf64_Sym*>(base_addr_ + dyn->d_un.d_val);
205 } else if (dyn->d_tag == DT_SYMENT) {
206 syment_ = dyn->d_un.d_val;
207 CHECK_EQ(syment_, sizeof(Elf64_Sym));
208 } else if (dyn->d_tag == DT_INIT) {
209 init_ = dyn->d_un.d_val;
210 } else if (dyn->d_tag == DT_FINI) {
211 fini_ = dyn->d_un.d_val;
212 } else if (dyn->d_tag == DT_INIT_ARRAY) {
213 init_array_ = dyn->d_un.d_val;
214 } else if (dyn->d_tag == DT_INIT_ARRAYSZ) {
215 init_arraysz_ = dyn->d_un.d_val;
216 } else if (dyn->d_tag == DT_FINI_ARRAY) {
217 fini_array_ = dyn->d_un.d_val;
218 } else if (dyn->d_tag == DT_FINI_ARRAYSZ) {
219 fini_arraysz_ = dyn->d_un.d_val;
220 }
221 }
222
223 LOG(INFO) << LOG_KEY(relasz_) << LOG_KEY(relaent_) << LOG_KEY(relacount_);
224 if (rela_ != nullptr) {
225 CHECK_EQ(relasz_ % relaent_, 0UL);
226 Elf64_Rela* r = rela_;
227 for (size_t i = 0; i < relasz_ / relaent_; i++, r++) {
228 relas_.emplace_back(*r);
229 LOG(INFO) << ShowRela(relas_.back());
230 }
231 }
232
233 LOG(INFO) << LOG_KEY(pltrelsz_) << LOG_KEY(pltrelent_);
234 if (jmprel_ != nullptr) {
235 CHECK_EQ(pltrelsz_ % pltrelent_, 0UL);
236 CHECK_EQ(pltrel_, static_cast<unsigned long>(DT_RELA));
237 Elf64_Rela* r = jmprel_;
238 for (size_t i = 0; i < pltrelsz_ / pltrelent_; i++, r++) {
239 pltrelas_.emplace_back(*r);
240 LOG(INFO) << ShowRela(pltrelas_.back());
241 }
242 }
243
244 if (symtab_ != nullptr) {
245 Elf64_Sym* s = symtab_;
246 symtabs_.emplace_back(*s);
247 s++;
248
249 // TODO: This is a hack. Listing up all symbols is always difficult.
250 while (0 <= s->st_name && s->st_name < strsz_) {
251 symtabs_.emplace_back(*s);
252 s++;
253 }
254
255 for (const auto& s : symtabs_) {
256 LOG(INFO) << LOG_KEY(s.st_name);
257 LOG(INFO) << ShowSym(s, strtab_);
258 }
259 }
260}
261
262const Elf64_Addr ELFBinary::GetSymbolAddr(const size_t symbol_index) {
263 CHECK_LT(symbol_index, symtabs().size());
264 return symtabs()[symbol_index].st_value + base_addr();
265}
266
267std::filesystem::path FindLibrary(std::string library_name, std::optional<std::filesystem::path> runpath,
268 std::optional<std::filesystem::path> rpath) {
269 {
270 std::filesystem::path library_path(library_name);
271 if (library_path.is_absolute() && std::filesystem::exists(library_path)) {
272 return library_path;
273 }
274 }
275
276 std::vector<std::filesystem::path> library_directory;
277
278 std::string sloader_library_path(std::getenv("SLOADER_LIBRARY_PATH") == nullptr ? "" : std::getenv("SLOADER_LIBRARY_PATH"));
279 if (!sloader_library_path.empty()) {
280 library_directory.emplace_back(sloader_library_path);
281 }
282
283 if (runpath) {
284 library_directory.emplace_back(runpath.value());
285 }
286 if (rpath) {
287 library_directory.emplace_back(rpath.value());
288 }
289 const auto ldsoconfs = read_ldsoconf();
290 library_directory.insert(library_directory.end(), ldsoconfs.begin(), ldsoconfs.end());
291 library_directory.emplace_back("/lib");
292 library_directory.emplace_back("/usr/lib");
293 library_directory.emplace_back("/usr/lib64");
294 library_directory.emplace_back("/usr/lib/x86_64-linux-gnu");
295 library_directory.emplace_back(".");
296
297 for (const auto& d : library_directory) {
298 if(!std::filesystem::is_directory(d)) {
299 continue;
300 }
301
302 std::string searching_filename = std::filesystem::path(library_name).filename();
303 for (const auto& entry : std::filesystem::directory_iterator(d)) {
304 LOG(INFO) << LOG_KEY(entry.path().filename().string()) << LOG_KEY(searching_filename);
305 if (entry.path().filename().string().starts_with(searching_filename)) {
306 LOG(INFO) << LOG_KEY(entry.path());
307 return entry.path();
308 }
309 }
310 }
311 LOG(FATAL) << "Cannot find" << LOG_KEY(library_name);
312 std::abort();
313}
314
315void DynLoader::LoadDependingLibs(const std::filesystem::path& root_path) {
316 binaries_.emplace_back(ELFBinary(root_path));
317 next_base_addr_ = binaries_.back().Load(next_base_addr_, map_file_);
318 loaded_.insert(root_path.filename());
319
320 std::queue<std::tuple<std::string, std::optional<std::filesystem::path>, std::optional<std::filesystem::path>>> queue;
321
322 for (const auto& n : binaries_.back().neededs()) {
323 queue.push(std::make_tuple(n, binaries_.back().runpath(), binaries_.back().rpath()));
324 }
325
326 // Search depending sos.
327 while (!queue.empty()) {
328 const auto [library_name, runpath, rpath] = queue.front();
329 queue.pop();
330
331 if (loaded_.count(library_name) != 0) continue;
332 loaded_.insert(library_name);
333
334 // Skip dynamic loader and libc.so
335 if (library_name.find("ld-linux") != std::string::npos || library_name.find("libc.so") != std::string::npos) {
336 LOG(INFO) << "Skip " << library_name;
337 continue;
338 }
339
340 const auto library_path = FindLibrary(library_name, runpath, rpath);
341 binaries_.emplace_back(ELFBinary(library_path));
342 next_base_addr_ = binaries_.back().Load(next_base_addr_, map_file_);
343 for (const auto& n : binaries_.back().neededs()) {
344 queue.push(std::make_tuple(n, binaries_.back().runpath(), binaries_.back().rpath()));
345 }
346 }
347}
348
349DynLoader::DynLoader(const std::filesystem::path& main_path, const std::vector<std::string>& args, const std::vector<std::string>& envs)
350 : main_path_(main_path), args_(args), envs_(envs), next_base_addr_(0x140'0000) {
351 map_file_ =
352 std::make_shared<std::ofstream>(std::getenv("SLOADER_MAP_FILE") == nullptr ? "/tmp/sloader_map" : std::getenv("SLOADER_MAP_FILE"));
353}
354
355void DynLoader::Run() {
356 LoadDependingLibs(main_path_);
357 Relocate();
358 Execute(args_, envs_);
359}
360
361// To assign variables of stack, stack_num and entry to %rdi, %rsi and %rdx
362// I use the calling convention. For details, see A.2.1 Calling Conventions
363// in https://refspecs.linuxfoundation.org/elf/x86_64-abi-0.99.pdf. Of
364// course, compiler must not inline this function.
365void __attribute__((noinline)) DynLoader::ExecuteCore(uint64_t* stack, size_t stack_num, uint64_t entry) {
366 for (size_t i = 0; i < stack_num; i++) {
367 asm volatile("pushq %0" ::"m"(*(stack + i)));
368 }
369
370 asm volatile("jmp *%0" ::"r"(entry));
371}
372
373// Copied from glibc
374// Type of a constructor function, in DT_INIT, DT_INIT_ARRAY, DT_PREINIT_ARRAY.
375// argc, argv, env
376typedef void (*dl_init_t)(int, char**, char**);
377
378void DynLoader::Execute(std::vector<std::string> args, std::vector<std::string> envs) {
379 // TODO: Pass arguments
380 char* argv[] = {const_cast<char*>(main_path_.c_str())};
381 char** env = reinterpret_cast<char**>(malloc(sizeof(const char*) * envs.size()));
382 for (size_t i = 0; i < envs.size(); i++) {
383 env[i] = const_cast<char*>(envs[i].c_str());
384 }
385
386 for (int i = binaries_.size() - 1; 0 <= i; i--) {
387 if (binaries_[i].init() != 0) {
388 reinterpret_cast<dl_init_t>(binaries_[i].init() + binaries_[i].base_addr())(1, argv, env);
389 }
390 if (binaries_[i].init_arraysz() != 0) {
391 CHECK_EQ(binaries_[i].init_arraysz() % 8, 0UL); // Assume 64bits
392 LOG(INFO) << LOG_BITS(i) << LOG_BITS(binaries_[i].init_arraysz());
393 Elf64_Addr* init_array_funs =
394 reinterpret_cast<Elf64_Addr*>((reinterpret_cast<char**>(binaries_[i].init_array() + binaries_[i].base_addr())));
395
396 for (long unsigned int j = 0; j < binaries_[i].init_arraysz() / 8; j++) {
397 LOG(INFO) << LOG_KEY(binaries_[i].filename()) << LOG_BITS(binaries_[i].init_array()) << LOG_BITS(init_array_funs[j])
398 << LOG_BITS(binaries_[i].base_addr()) << LOG_BITS(init_array_funs[j] + binaries_[i].base_addr());
399 if (reinterpret_cast<dl_init_t>(init_array_funs[j]) == nullptr) {
400 LOG(FATAL) << LOG_BITS(init_array_funs[j]);
401 break;
402 }
403 reinterpret_cast<dl_init_t>(init_array_funs[j])(1, argv, env);
404 }
405 }
406 }
407
408 unsigned long at_random = getauxval(AT_RANDOM);
409 unsigned long at_pagesz = getauxval(AT_PAGESZ);
410 CHECK_NE(at_random, 0UL);
411 LOG(INFO) << LOG_BITS(at_random) << LOG_BITS(at_pagesz);
412
413 // Some commented out auxiliary values because they are not appropriate
414 // as loading programs. These values are for sloader itself.
415 std::vector<unsigned long> aux_types{AT_IGNORE,
416 // AT_EXECFD,
417 // AT_PHDR,
418 AT_PHENT,
419 // AT_PHNUM,
420 AT_PAGESZ,
421 // AT_BASE,
422 AT_FLAGS,
423 // AT_ENTRY,
424 AT_NOTELF, AT_UID, AT_EUID, AT_GID, AT_EGID, AT_CLKTCK, AT_PLATFORM, AT_HWCAP, AT_FPUCW,
425 AT_DCACHEBSIZE, AT_ICACHEBSIZE, AT_UCACHEBSIZE, AT_IGNOREPPC, AT_SECURE, AT_BASE_PLATFORM,
426 AT_RANDOM, AT_HWCAP2,
427 // AT_EXECFN,
428 AT_SYSINFO, AT_SYSINFO_EHDR, AT_L1I_CACHESHAPE, AT_L1D_CACHESHAPE, AT_L2_CACHESHAPE,
429 AT_L3_CACHESHAPE, AT_L1I_CACHESIZE, AT_L1I_CACHEGEOMETRY, AT_L1D_CACHESIZE, AT_L1D_CACHEGEOMETRY,
430 AT_L2_CACHESIZE, AT_L2_CACHEGEOMETRY, AT_L3_CACHESIZE, AT_L3_CACHEGEOMETRY, AT_MINSIGSTKSZ};
431
432 std::vector<std::pair<unsigned long, unsigned long>> aux_tvs;
433 for (size_t i = 0; i < aux_types.size(); i++) {
434 unsigned long v = getauxval(aux_types[i]);
435 if (v != 0) {
436 aux_tvs.emplace_back(std::make_pair(aux_types[i], v));
437 LOG(INFO) << LOG_BITS(aux_types[i]) << LOG_BITS(v);
438 }
439 }
440
441 // See http://articles.manugarg.com/aboutelfauxiliaryvectors.html for
442 // the stack layout padding.
443 //
444 // 4 words padding
445 // 0
446 // AT_NULL
447 // auxs
448 // NULL
449 // envs
450 // argv[argc] (must be null)
451 // argv[0] = filename
452 // argc
453 size_t stack_index = 0;
454 size_t stack_num = 4 + 2 + 2 * aux_tvs.size() + 1 + envs.size() + 2 + args.size();
455 size_t stack_size = sizeof(uint64_t) * stack_num;
456 unsigned long* stack = reinterpret_cast<uint64_t*>(malloc(stack_size));
457 memset(stack, 0, stack_size);
458
459 // 4 words padding
460 stack_index += 4;
461
462 // First two elements are 0 and AT_NULL.
463 stack_index += 2;
464
465 // auxs
466 for (size_t i = 0; i < aux_tvs.size(); i++) {
467 *(stack + stack_index) = aux_tvs[i].second;
468 stack_index++;
469 *(stack + stack_index) = aux_tvs[i].first;
470 stack_index++;
471 }
472
473 // End of environment variables
474 stack_index++;
475
476 // Environment variables
477 for (size_t i = 0; i < envs.size(); i++) {
478 *(stack + stack_index) = reinterpret_cast<uint64_t>(envs[envs.size() - 1 - i].c_str());
479 stack_index++;
480 }
481
482 // argv[argc]
483 stack_index++;
484
485 for (size_t i = 0; i < args.size(); i++) {
486 LOG(INFO) << (args[i]);
487 *(stack + stack_index) = reinterpret_cast<uint64_t>(args[args.size() - 1 - i].c_str());
488 stack_index++;
489 }
490
491 // argc
492 *(stack + stack_index) = args.size();
493 stack_index++;
494
495 CHECK_EQ(stack_index, stack_num);
496
497 LOG(INFO) << LOG_BITS(binaries_[0].ehdr().e_entry + binaries_[0].base_addr()) << std::endl;
498
499 // TLS initialization
500 // TODO: We support only static TLS i.e. don't support dlopen.
501 //
502 // =========== address ==========>
503 //
504 // tls_block (= sloader_dummy_to_secure_tls_space) tls_block + TLS_SPACE_FOR_LOADEE
505 // | |
506 // v v
507 // [.tdata of binaries_[n]] [.tbss of binaries_[n]] ... [.tdata of binaries_[0]] [.tbss of binaries_[0]]
508
509 {
510 size_t tls_block_size = 0;
511 for (const ELFBinary& b : binaries_) {
512 if (b.has_tls()) {
513 tls_block_size += b.file_tls().p_memsz;
514 }
515 }
516 CHECK_LE(tls_block_size, 4096UL);
517 }
518
519 void* tls_block = sloader_dummy_to_secure_tls_space;
520
521 // Copy .tdata and .tbss of each binary
522 for (const ELFBinary& b : binaries_) {
523 if (b.has_tls()) {
524 LOG(INFO) << LOG_BITS(reinterpret_cast<uint64_t>(tls_block)) << LOG_BITS(reinterpret_cast<uint64_t>(b.file_tls().p_memsz));
525 LOG(INFO) << LOG_BITS(reinterpret_cast<uint64_t>(b.file_tls().p_memsz))
526 << LOG_BITS(reinterpret_cast<uint64_t>(b.file_tls().p_filesz)) << LOG_KEY(b.path());
527
528 // Set .tdata
529 memcpy(reinterpret_cast<char*>(tls_block) + sloader_tls_offset - b.file_tls().p_memsz,
530 reinterpret_cast<const void*>(b.base_addr() + b.file_tls().p_vaddr), b.file_tls().p_memsz);
531 // Set .tbss
532 memset(reinterpret_cast<char*>(tls_block) + sloader_tls_offset - (b.file_tls().p_memsz - b.file_tls().p_filesz), 0x0,
533 b.file_tls().p_memsz - b.file_tls().p_filesz);
534
535 *reinterpret_cast<void**>(reinterpret_cast<char*>(tls_block) + sloader_tls_offset) =
536 reinterpret_cast<char*>(tls_block) + sloader_tls_offset;
537 sloader_tls_offset -= b.file_tls().p_memsz;
538 }
539 }
540
541 ExecuteCore(stack, stack_num, binaries_[0].ehdr().e_entry + binaries_[0].base_addr());
542
543 free(stack);
544 LOG(INFO) << "Execute end";
545}
546
547// Search the first defined symbol
548// Return pair of the index of ELFBinary and the index of the Elf64_Sym
549// TODO: Consider version information
550// TODO: Return ELFBinary and Elf64_Sym theirselves
551std::optional<std::pair<size_t, size_t>> DynLoader::SearchSym(const std::string& name, bool skip_main = false) {
552 LOG(INFO) << "========== SearchSym " << name << " ==========";
553 // binaries_[0] is the executable itself. We should skip it.
554 // TODO: Add reference here.
555 for (size_t i = skip_main ? 1 : 0; i < binaries_.size(); i++) {
556 for (size_t j = 0; j < binaries_[i].symtabs().size(); j++) {
557 const Elf64_Sym& s = binaries_[i].symtabs()[j];
558 std::string_view n(s.st_name + binaries_[i].strtab());
559 if (n == name && s.st_shndx != SHN_UNDEF) {
560 LOG(INFO) << "Found " << name << " at index " << j << " of " << binaries_[i].path();
561 return std::make_optional(std::make_pair(i, j));
562 }
563 }
564 }
565 for (size_t i = skip_main ? 1 : 0; i < binaries_.size(); i++) {
566 for (size_t j = 0; j < binaries_[i].symtabs().size(); j++) {
567 const Elf64_Sym& s = binaries_[i].symtabs()[j];
568 std::string_view n(s.st_name + binaries_[i].strtab());
569 if (n == name && s.st_shndx == SHN_UNDEF && ELF64_ST_BIND(s.st_info) == STB_WEAK) {
570 LOG(WARNING) << "Found " << name << " at index as an weak symbol " << j << " of " << binaries_[i].path();
571 return std::make_optional(std::make_pair(i, j));
572 }
573 }
574 }
575 return std::nullopt;
576}
577
578Elf64_Addr DynLoader::TLSSymOffset(const std::string& name) {
579 // Intentional use of underflow
580 Elf64_Addr offset = 0x0;
581 for (size_t i = 0; i < binaries_.size(); i++) {
582 for (size_t j = 0; j < binaries_[i].symtabs().size(); j++) {
583 Elf64_Sym s = binaries_[i].symtabs()[j];
584 std::string n = s.st_name + binaries_[i].strtab();
585 if (n == name && s.st_shndx != SHN_UNDEF && ELF64_ST_TYPE(s.st_info) == STT_TLS) {
586 Elf64_Addr o = offset - binaries_[i].file_tls().p_memsz + s.st_value;
587 LOG(INFO) << "Found " << name << " at index " << j << " of " << binaries_[i].path() << LOG_BITS(o);
588 return offset - binaries_[i].file_tls().p_memsz + s.st_value;
589 }
590 }
591 if (binaries_[i].has_tls()) {
592 offset -= binaries_[i].file_tls().p_memsz;
593 }
594 }
595
596 // Workaround for TLS variable in libc.so such as errno
597 if (libc_mapping::sloader_libc_tls_variables.find(name) != libc_mapping::sloader_libc_tls_variables.end()) {
598 const char* addr = libc_mapping::sloader_libc_tls_variables[name];
599 return (reinterpret_cast<const char*>(sloader_dummy_to_secure_tls_space) + 4096 - addr);
600 }
601 LOG(FATAL) << "Cannot find " << name;
602 std::abort();
603}
604
605void DynLoader::Relocate() {
606 for (const auto& bin : binaries_) {
607 LOG(INFO) << bin.path();
608 if (relocated_[bin.path()]) continue;
609 relocated_[bin.path()] = true;
610
611 std::vector<Elf64_Rela> relas = bin.pltrelas();
612 // TODO: Use std::copy?
613 for (const auto r : bin.relas()) {
614 relas.emplace_back(r);
615 }
616
617 for (const auto& r : relas) {
618 CHECK_LT(ELF64_R_SYM(r.r_info), bin.symtabs().size());
619 Elf64_Sym s = bin.symtabs()[ELF64_R_SYM(r.r_info)];
620 std::string name = s.st_name + bin.strtab();
621 LOG(INFO) << ShowRela(r) << LOG_KEY(name);
622
623 switch (ELF64_R_TYPE(r.r_info)) {
624 case R_X86_64_GLOB_DAT:
625 case R_X86_64_JUMP_SLOT: {
626 LOG(INFO) << ShowRelocationType(ELF64_R_TYPE(r.r_info));
627 const auto opt = SearchSym(name);
628 Elf64_Addr sym_addr;
629
630 if (libc_mapping::sloader_libc_map.find(name) != libc_mapping::sloader_libc_map.end()) {
631 sym_addr = libc_mapping::sloader_libc_map[name];
632 } else if (opt) {
633 const auto [bin_index, sym_index] = opt.value();
634 sym_addr = binaries_[bin_index].GetSymbolAddr(sym_index);
635 } else {
636 LOG(WARNING) << "Cannot find " << name << LOG_KEY(bin.path());
637 break;
638 }
639
640 Elf64_Addr* reloc_addr = reinterpret_cast<Elf64_Addr*>(bin.base_addr() + r.r_offset);
641 LOG(INFO) << LOG_KEY(reloc_addr) << LOG_BITS(*reloc_addr) << LOG_BITS(sym_addr);
642 // TODO: Although glibc add sym_addr to the original value
643 // here
644 // https://github.com/akawashiro/glibc/blob/008003dc6e83439c5e04a744b7fd8197df19096e/sysdeps/x86_64/dl-machine.h#L561,
645 // We just assign it.
646 *reloc_addr = sym_addr;
647 break;
648 }
649 // TODO: Is is correct?
650 case R_X86_64_IRELATIVE:
651 case R_X86_64_RELATIVE: {
652 Elf64_Addr* reloc_addr = reinterpret_cast<Elf64_Addr*>(bin.base_addr() + r.r_offset);
653 *reloc_addr = reinterpret_cast<Elf64_Addr>(bin.base_addr() + r.r_addend);
654 break;
655 }
656 case R_X86_64_64: {
657 const auto opt = SearchSym(name);
658 Elf64_Addr sym_addr;
659
660 if (libc_mapping::sloader_libc_map.find(name) != libc_mapping::sloader_libc_map.end()) {
661 sym_addr = libc_mapping::sloader_libc_map[name];
662 } else if (opt) {
663 const auto [bin_index, sym_index] = opt.value();
664 sym_addr = binaries_[bin_index].GetSymbolAddr(sym_index);
665 } else {
666 LOG(WARNING) << "Cannot find " << name << LOG_KEY(bin.path());
667 break;
668 }
669
670 Elf64_Addr* reloc_addr = reinterpret_cast<Elf64_Addr*>(bin.base_addr() + r.r_offset);
671
672 // TODO: This is wrong, maybe. What is symbol value?
673 // Elf64_Sym sym = binaries_[bin_index].symtabs()[sym_index];
674 // *reloc_addr = bin.base_addr() + sym.st_value + r.r_addend;
675 *reloc_addr = sym_addr + r.r_addend;
676 break;
677 }
678 case R_X86_64_TPOFF64: {
679 Elf64_Addr* reloc_addr = reinterpret_cast<Elf64_Addr*>(bin.base_addr() + r.r_offset);
680 Elf64_Addr offset = TLSSymOffset(name);
681 *reloc_addr = offset;
682 break;
683 }
684 case R_X86_64_DTPMOD64: {
685 Elf64_Addr* reloc_addr = reinterpret_cast<Elf64_Addr*>(bin.base_addr() + r.r_offset);
686 // TODO: Need reference.
687 *reloc_addr = 0x1;
688 break;
689 }
690 case R_X86_64_DTPOFF64: {
691 break;
692 }
693 case R_X86_64_COPY: {
694 const auto opt = SearchSym(name, true);
695 void* src;
696 Elf64_Xword size;
697
698 if (libc_mapping::sloader_libc_map.find(name) != libc_mapping::sloader_libc_map.end()) {
699 src = reinterpret_cast<void*>(libc_mapping::sloader_libc_map[name]);
700 size = 8;
701 } else if (opt) {
702 const auto [bin_index, sym_index] = opt.value();
703 Elf64_Sym sym = binaries_[bin_index].symtabs()[sym_index];
704 src = reinterpret_cast<void*>(binaries_[bin_index].base_addr() + sym.st_value);
705 size = sym.st_size;
706 } else {
707 LOG(FATAL) << "Cannot find " << name;
708 std::abort();
709 break;
710 }
711 void* dest = reinterpret_cast<void*>(bin.base_addr() + r.r_offset);
712 LOG(INFO) << LOG_BITS(src) << LOG_BITS(dest) << LOG_BITS(*reinterpret_cast<const unsigned long*>(src))
713 << LOG_BITS(size);
714 std::memcpy(dest, src, size);
715 // std::abort();
716 break;
717 }
718 default: {
719 LOG(FATAL) << "Unsupported! " << ShowRela(r) << std::endl;
720 std::abort();
721 break;
722 }
723 }
724 }
725 }
726}
727
728namespace {
729std::optional<std::shared_ptr<DynLoader>> dynloader = std::nullopt;
730}
731
732void InitializeDynLoader(const std::filesystem::path& main_path, const std::vector<std::string>& envs,
733 const std::vector<std::string>& args) {
734 // TODO: Remove this call
735 CHECK(dynloader == std::nullopt);
736 write_sloader_dummy_to_secure_tls_space();
737 dynloader = std::make_shared<DynLoader>(main_path, args, envs);
738}
739
740std::shared_ptr<DynLoader> GetDynLoader() {
741 CHECK(dynloader);
742 return *dynloader;
743}
744