From 2b4d6124d939116c0fffbb17b7282256cc51c579 Mon Sep 17 00:00:00 2001 From: Farid Zakaria Date: Sat, 8 Jul 2023 20:57:13 +0000 Subject: [PATCH] WIP --- third_party/mold/elf/arch-alpha.cc | 331 ---------- third_party/mold/elf/arch-arm32.cc | 737 --------------------- third_party/mold/elf/arch-arm64.cc | 595 ----------------- third_party/mold/elf/arch-i386.cc | 565 ---------------- third_party/mold/elf/arch-m68k.cc | 326 ---------- third_party/mold/elf/arch-ppc32.cc | 452 ------------- third_party/mold/elf/arch-ppc64v1.cc | 687 -------------------- third_party/mold/elf/arch-ppc64v2.cc | 555 ---------------- third_party/mold/elf/arch-riscv.cc | 938 --------------------------- third_party/mold/elf/arch-s390x.cc | 491 -------------- third_party/mold/elf/arch-sh4.cc | 355 ---------- third_party/mold/elf/arch-sparc64.cc | 622 ------------------ third_party/mold/elf/cmdline.cc | 3 +- third_party/mold/elf/elf.h | 2 +- third_party/mold/elf/main.cc | 7 +- third_party/mold/elf/mold.h | 8 +- third_party/mold/fake_tbb.h | 40 ++ third_party/mold/git-hash.cc | 5 + third_party/mold/hyperloglog.cc | 3 +- third_party/mold/mold.mk | 7 +- 20 files changed, 63 insertions(+), 6666 deletions(-) delete mode 100644 third_party/mold/elf/arch-alpha.cc delete mode 100644 third_party/mold/elf/arch-arm32.cc delete mode 100644 third_party/mold/elf/arch-arm64.cc delete mode 100644 third_party/mold/elf/arch-i386.cc delete mode 100644 third_party/mold/elf/arch-m68k.cc delete mode 100644 third_party/mold/elf/arch-ppc32.cc delete mode 100644 third_party/mold/elf/arch-ppc64v1.cc delete mode 100644 third_party/mold/elf/arch-ppc64v2.cc delete mode 100644 third_party/mold/elf/arch-riscv.cc delete mode 100644 third_party/mold/elf/arch-s390x.cc delete mode 100644 third_party/mold/elf/arch-sh4.cc delete mode 100644 third_party/mold/elf/arch-sparc64.cc create mode 100644 third_party/mold/git-hash.cc diff --git a/third_party/mold/elf/arch-alpha.cc b/third_party/mold/elf/arch-alpha.cc deleted file mode 100644 index e0b332d7c8f..00000000000 --- a/third_party/mold/elf/arch-alpha.cc +++ /dev/null @@ -1,331 +0,0 @@ -// clang-format off -// Alpha is a 64-bit RISC ISA developed by DEC (Digital Equipment -// Corporation) in the early '90s. It aimed to be an ISA that would last -// 25 years. DEC expected Alpha would become 1000x faster during that time -// span. Since the ISA was developed from scratch for future machines, -// it's 64-bit from the beginning. There's no 32-bit variant. -// -// DEC ported its own Unix (Tru64) to Alpha. Microsoft also ported Windows -// NT to it. But it wasn't a huge commercial success. -// -// DEC was acquired by Compaq in 1997. In the late '90s, Intel and -// Hewlett-Packard were advertising that their upcoming Itanium processor -// would achieve significantly better performance than RISC processors, so -// Compaq decided to discontinue the Alpha processor line to switch to -// Itanium. Itanium resulted in a miserable failure, but it still suceeded -// to wipe out several RISC processors just by promising overly optimistic -// perf numbers. Alpha as an ISA would probably have been fine after 25 -// years since its introduction (which is 1992 + 25 = 2017), but the -// company and its market didn't last that long. -// -// From the linker's point of view, there are a few peculiarities in its -// psABI as shown below: -// -// - Alpha lacks PC-relative memory load/store instructions, so it uses -// register-relative load/store instructions in position-independent -// code. Specifically, GP (which is an alias for $r29) is always -// maintained to refer to .got+0x8000, and global variables' addresses -// are loaded in a GP-relative manner. -// -// - It looks like even function addresses are first loaded to register -// in a GP-relative manner before calling it. We can relax it to -// convert the instruction sequence with a direct branch instruction, -// but by default, object files don't use a direct branch to call a -// function. Therefore, by default, we don't need to create a PLT. -// Any function call is made by first reading its address from GOT and -// jump to the address. - -#include "third_party/mold/elf/mold.h" - -namespace mold::elf { - -using E = ALPHA; - -// A 32-bit immediate can be materialized in a register with a "load high" -// and a "load low" instruction sequence. The first instruction sets the -// upper 16 bits in a register, and the second one set the lower 16 -// bits. When doing so, they sign-extend an immediate. Therefore, if the -// 15th bit of an immediate happens to be 1, setting a "low half" value -// negates the upper 16 bit values that has already been set in a -// register. To compensate that, we need to add 0x8000 when setting the -// upper 16 bits. -static u32 hi(u32 val) { - return bits(val + 0x8000, 31, 16); -} - -template <> -void write_plt_header(Context &ctx, u8 *buf) {} - -template <> -void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) {} - -template <> -void write_pltgot_entry(Context &ctx, u8 *buf, Symbol &sym) {} - -template <> -void EhFrameSection::apply_reloc(Context &ctx, const ElfRel &rel, - u64 offset, u64 val) { - u8 *loc = ctx.buf + this->shdr.sh_offset + offset; - - switch (rel.r_type) { - case R_NONE: - break; - case R_ALPHA_SREL32: - *(ul32 *)loc = val - this->shdr.sh_addr - offset; - break; - default: - Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel; - } -} - -template <> -void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - ElfRel *dynrel = nullptr; - if (ctx.reldyn) - dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + - file.reldyn_offset + this->reldyn_offset); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - u64 S = sym.get_addr(ctx); - u64 A = rel.r_addend; - u64 P = get_addr() + rel.r_offset; - u64 G = sym.get_got_idx(ctx) * sizeof(Word); - u64 GOT = ctx.got->shdr.sh_addr; - u64 GP = ctx.got->shdr.sh_addr + 0x8000; - - switch (rel.r_type) { - case R_ALPHA_REFQUAD: - apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel); - break; - case R_ALPHA_GPREL32: - *(ul32 *)loc = S + A - GP; - break; - case R_ALPHA_LITERAL: - if (A) - *(ul16 *)loc = ctx.extra.got->get_addr(sym, A) - GP; - else - *(ul16 *)loc = GOT + G - GP; - break; - case R_ALPHA_BRSGP: - *(ul32 *)loc |= bits(S + A - P - 4, 22, 0); - break; - case R_ALPHA_GPDISP: - *(ul16 *)loc = hi(GP - P); - *(ul16 *)(loc + A) = GP - P; - break; - case R_ALPHA_SREL32: - *(ul32 *)loc = S + A - P; - break; - case R_ALPHA_GPRELHIGH: - *(ul16 *)loc = hi(S + A - GP); - break; - case R_ALPHA_GPRELLOW: - *(ul16 *)loc = S + A - GP; - break; - case R_ALPHA_TLSGD: - *(ul16 *)loc = sym.get_tlsgd_addr(ctx) - GP; - break; - case R_ALPHA_TLSLDM: - *(ul16 *)loc = ctx.got->get_tlsld_addr(ctx) - GP; - break; - case R_ALPHA_DTPRELHI: - *(ul16 *)loc = hi(S + A - ctx.dtp_addr); - break; - case R_ALPHA_DTPRELLO: - *(ul16 *)loc = S + A - ctx.dtp_addr; - break; - case R_ALPHA_GOTTPREL: - *(ul16 *)loc = sym.get_gottp_addr(ctx) + A - GP; - break; - case R_ALPHA_TPRELHI: - *(ul16 *)loc = hi(S + A - ctx.tp_addr); - break; - case R_ALPHA_TPRELLO: - *(ul16 *)loc = S + A - ctx.tp_addr; - break; - case R_ALPHA_LITUSE: - case R_ALPHA_HINT: - break; - default: - unreachable(); - } - } -} - -template <> -void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - SectionFragment *frag; - i64 frag_addend; - std::tie(frag, frag_addend) = get_fragment(ctx, rel); - - u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx); - u64 A = frag ? frag_addend : (i64)rel.r_addend; - - switch (rel.r_type) { - case R_ALPHA_REFLONG: - if (std::optional val = get_tombstone(sym, frag)) - *(ul32 *)loc = *val; - else - *(ul32 *)loc = S + A; - break; - case R_ALPHA_REFQUAD: - if (std::optional val = get_tombstone(sym, frag)) - *(ul64 *)loc = *val; - else - *(ul64 *)loc = S + A; - break; - default: - Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: " - << rel; - } - } -} - -template <> -void InputSection::scan_relocations(Context &ctx) { - assert(shdr().sh_flags & SHF_ALLOC); - - this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); - std::span> rels = get_rels(ctx); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - - if (sym.is_ifunc()) - Error(ctx) << sym << ": GNU ifunc symbol is not supported on Alpha"; - - switch (rel.r_type) { - case R_ALPHA_REFQUAD: - scan_dyn_absrel(ctx, sym, rel); - break; - case R_ALPHA_LITERAL: - if (rel.r_addend) - ctx.extra.got->add_symbol(sym, rel.r_addend); - else - sym.flags |= NEEDS_GOT; - break; - case R_ALPHA_SREL32: - scan_pcrel(ctx, sym, rel); - break; - case R_ALPHA_BRSGP: - if (sym.is_imported) - sym.flags |= NEEDS_PLT; - break; - case R_ALPHA_TLSGD: - sym.flags |= NEEDS_TLSGD; - break; - case R_ALPHA_TLSLDM: - ctx.needs_tlsld = true; - break; - case R_ALPHA_GOTTPREL: - sym.flags |= NEEDS_GOTTP; - break; - case R_ALPHA_TPRELHI: - case R_ALPHA_TPRELLO: - check_tlsle(ctx, sym, rel); - break; - case R_ALPHA_GPREL32: - case R_ALPHA_LITUSE: - case R_ALPHA_GPDISP: - case R_ALPHA_HINT: - case R_ALPHA_GPRELHIGH: - case R_ALPHA_GPRELLOW: - case R_ALPHA_DTPRELHI: - case R_ALPHA_DTPRELLO: - break; - default: - Fatal(ctx) << *this << ": unknown relocation: " << rel; - } - } -} - -// An R_ALPHA_LITERAL relocation may request the linker to create a GOT -// entry for an external symbol with a non-zero addend. This is an unusual -// request which is not found in any other targets. -// -// Referring an external symbol with a non-zero addend is a bad practice -// because we need to create as many dynamic relocations as the number of -// distinctive addends for the same symbol. -// -// We don't want to mess up the implementation of the common GOT section -// for Alpha. So we create another GOT-like section, .alpha_got. Any GOT -// entry for an R_ALPHA_LITERAL reloc with a non-zero addend is created -// not in .got but in .alpha_got. -// -// Since .alpha_got entries are accessed relative to GP, .alpha_got -// needs to be close enough to .got. It's actually placed next to .got. -void AlphaGotSection::add_symbol(Symbol &sym, i64 addend) { - assert(addend); - std::scoped_lock lock(mu); - entries.push_back({&sym, addend}); -} - -bool operator<(const AlphaGotSection::Entry &a, const AlphaGotSection::Entry &b) { - return std::tuple(a.sym->file->priority, a.sym->sym_idx, a.addend) < - std::tuple(b.sym->file->priority, b.sym->sym_idx, b.addend); -}; - -u64 AlphaGotSection::get_addr(Symbol &sym, i64 addend) { - auto it = std::lower_bound(entries.begin(), entries.end(), Entry{&sym, addend}); - assert(it != entries.end()); - return this->shdr.sh_addr + (it - entries.begin()) * sizeof(Word); -} - -i64 AlphaGotSection::get_reldyn_size(Context &ctx) const { - i64 n = 0; - for (const Entry &e : entries) - if (e.sym->is_imported || (ctx.arg.pic && !e.sym->is_absolute())) - n++; - return n; -} - -void AlphaGotSection::finalize() { - sort(entries); - remove_duplicates(entries); - shdr.sh_size = entries.size() * sizeof(Word); -} - -void AlphaGotSection::copy_buf(Context &ctx) { - ElfRel *dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + - reldyn_offset); - - for (i64 i = 0; i < entries.size(); i++) { - Entry &e = entries[i]; - u64 P = this->shdr.sh_addr + sizeof(Word) * i; - ul64 *buf = (ul64 *)(ctx.buf + this->shdr.sh_offset + sizeof(Word) * i); - - if (e.sym->is_imported) { - *buf = ctx.arg.apply_dynamic_relocs ? e.addend : 0; - *dynrel++ = ElfRel(P, E::R_ABS, e.sym->get_dynsym_idx(ctx), e.addend); - } else { - *buf = e.sym->get_addr(ctx) + e.addend; - if (ctx.arg.pic && !e.sym->is_absolute()) - *dynrel++ = ElfRel(P, E::R_RELATIVE, 0, *buf); - } - } -} - -} // namespace mold::elf diff --git a/third_party/mold/elf/arch-arm32.cc b/third_party/mold/elf/arch-arm32.cc deleted file mode 100644 index 5ac207b0aec..00000000000 --- a/third_party/mold/elf/arch-arm32.cc +++ /dev/null @@ -1,737 +0,0 @@ -// clang-format off -// ARM32 is a bit special from the linker's viewpoint because ARM -// processors support two different instruction encodings: Thumb and -// ARM (in a narrower sense). Thumb instructions are either 16 bits or -// 32 bits, while ARM instructions are all 32 bits. Feature-wise, -// thumb is a subset of ARM, so not all ARM instructions are -// representable in Thumb. -// -// ARM processors originally supported only ARM instructions. Thumb -// instructions were later added to increase code density. -// -// ARM processors runs in either ARM mode or Thumb mode. The mode can -// be switched using BX (branch and mode exchange)-family instructions. -// We need to use that instructions to, for example, call a function -// encoded in Thumb from a function encoded in ARM. Sometimes, the -// linker even has to emit an interworking thunk code to switch mode. -// -// ARM instructions are aligned to 4 byte boundaries. Thumb are to 2 -// byte boundaries. -// -// You can distinguish Thumb functions from ARM functions by looking -// at the least significant bit (LSB) of its "address". If LSB is 0, -// it's ARM; otherwise, Thumb. -// -// For example, if a symbol `foo` is of type STT_FUNC and has value -// 0x2001, `foo` is a function using Thumb instructions whose address -// is 0x2000 (not 0x2001, as Thumb instructions are always 2-byte -// aligned). Likewise, if a function pointer has value 0x2001, it -// refers a Thumb function at 0x2000. -// -// https://github.com/ARM-software/abi-aa/blob/main/aaelf32/aaelf32.rst - -#include "third_party/mold/elf/mold.h" - -namespace mold::elf { - -using E = ARM32; - -template <> -i64 get_addend(u8 *loc, const ElfRel &rel) { - switch (rel.r_type) { - case R_ARM_ABS32: - case R_ARM_REL32: - case R_ARM_TARGET1: - case R_ARM_BASE_PREL: - case R_ARM_GOTOFF32: - case R_ARM_GOT_PREL: - case R_ARM_GOT_BREL: - case R_ARM_TLS_GD32: - case R_ARM_TLS_LDM32: - case R_ARM_TLS_LDO32: - case R_ARM_TLS_IE32: - case R_ARM_TLS_LE32: - case R_ARM_TLS_GOTDESC: - case R_ARM_TARGET2: - return *(il32 *)loc; - case R_ARM_THM_JUMP11: - return sign_extend(*(ul16 *)loc, 10) << 1; - case R_ARM_THM_CALL: - case R_ARM_THM_JUMP24: - case R_ARM_THM_TLS_CALL: { - u32 S = bit(*(ul16 *)loc, 10); - u32 J1 = bit(*(ul16 *)(loc + 2), 13); - u32 J2 = bit(*(ul16 *)(loc + 2), 11); - u32 I1 = !(J1 ^ S); - u32 I2 = !(J2 ^ S); - u32 imm10 = bits(*(ul16 *)loc, 9, 0); - u32 imm11 = bits(*(ul16 *)(loc + 2), 10, 0); - u32 val = (S << 24) | (I1 << 23) | (I2 << 22) | (imm10 << 12) | (imm11 << 1); - return sign_extend(val, 24); - } - case R_ARM_CALL: - case R_ARM_JUMP24: - case R_ARM_PLT32: - case R_ARM_TLS_CALL: - return sign_extend(*(ul32 *)loc, 23) << 2; - case R_ARM_MOVW_PREL_NC: - case R_ARM_MOVW_ABS_NC: - case R_ARM_MOVT_PREL: - case R_ARM_MOVT_ABS: { - u32 imm12 = bits(*(ul32 *)loc, 11, 0); - u32 imm4 = bits(*(ul32 *)loc, 19, 16); - return sign_extend((imm4 << 12) | imm12, 15); - } - case R_ARM_PREL31: - return sign_extend(*(ul32 *)loc, 30); - case R_ARM_THM_MOVW_PREL_NC: - case R_ARM_THM_MOVW_ABS_NC: - case R_ARM_THM_MOVT_PREL: - case R_ARM_THM_MOVT_ABS: { - u32 imm4 = bits(*(ul16 *)loc, 3, 0); - u32 i = bit(*(ul16 *)loc, 10); - u32 imm3 = bits(*(ul16 *)(loc + 2), 14, 12); - u32 imm8 = bits(*(ul16 *)(loc + 2), 7, 0); - u32 val = (imm4 << 12) | (i << 11) | (imm3 << 8) | imm8; - return sign_extend(val, 15); - } - default: - return 0; - } -} - -static void write_mov_imm(u8 *loc, u32 val) { - u32 imm12 = bits(val, 11, 0); - u32 imm4 = bits(val, 15, 12); - *(ul32 *)loc = (*(ul32 *)loc & 0xfff0f000) | (imm4 << 16) | imm12; -} - -static void write_thm_b_imm(u8 *loc, u32 val) { - // https://developer.arm.com/documentation/ddi0406/cb/Application-Level-Architecture/Instruction-Details/Alphabetical-list-of-instructions/BL--BLX--immediate- - u32 sign = bit(val, 24); - u32 I1 = bit(val, 23); - u32 I2 = bit(val, 22); - u32 J1 = !I1 ^ sign; - u32 J2 = !I2 ^ sign; - u32 imm10 = bits(val, 21, 12); - u32 imm11 = bits(val, 11, 1); - - ul16 *buf = (ul16 *)loc; - buf[0] = (buf[0] & 0b1111'1000'0000'0000) | (sign << 10) | imm10; - buf[1] = (buf[1] & 0b1101'0000'0000'0000) | (J1 << 13) | (J2 << 11) | imm11; -} - -static void write_thm_mov_imm(u8 *loc, u32 val) { - // https://developer.arm.com/documentation/ddi0406/cb/Application-Level-Architecture/Instruction-Details/Alphabetical-list-of-instructions/MOVT - u32 imm4 = bits(val, 15, 12); - u32 i = bit(val, 11); - u32 imm3 = bits(val, 10, 8); - u32 imm8 = bits(val, 7, 0); - - ul16 *buf = (ul16 *)loc; - buf[0] = (buf[0] & 0b1111'1011'1111'0000) | (i << 10) | imm4; - buf[1] = (buf[1] & 0b1000'1111'0000'0000) | (imm3 << 12) | imm8; -} - -template <> -void write_addend(u8 *loc, i64 val, const ElfRel &rel) { - switch (rel.r_type) { - case R_ARM_NONE: - break; - case R_ARM_ABS32: - case R_ARM_REL32: - case R_ARM_TARGET1: - case R_ARM_BASE_PREL: - case R_ARM_GOTOFF32: - case R_ARM_GOT_PREL: - case R_ARM_GOT_BREL: - case R_ARM_TLS_GD32: - case R_ARM_TLS_LDM32: - case R_ARM_TLS_LDO32: - case R_ARM_TLS_IE32: - case R_ARM_TLS_LE32: - case R_ARM_TLS_GOTDESC: - case R_ARM_TARGET2: - *(ul32 *)loc = val; - break; - case R_ARM_THM_JUMP11: - *(ul16 *)loc = (*(ul16 *)loc & 0xf800) | bits(val, 11, 1); - break; - case R_ARM_THM_CALL: - case R_ARM_THM_JUMP24: - case R_ARM_THM_TLS_CALL: - write_thm_b_imm(loc, val); - break; - case R_ARM_CALL: - case R_ARM_JUMP24: - case R_ARM_PLT32: - *(ul32 *)loc = (*(ul32 *)loc & 0xff00'0000) | bits(val, 25, 2); - break; - case R_ARM_MOVW_PREL_NC: - case R_ARM_MOVW_ABS_NC: - case R_ARM_MOVT_PREL: - case R_ARM_MOVT_ABS: - write_mov_imm(loc, val); - break; - case R_ARM_PREL31: - *(ul32 *)loc = (*(ul32 *)loc & 0x8000'0000) | (val & 0x7fff'ffff); - break; - case R_ARM_THM_MOVW_PREL_NC: - case R_ARM_THM_MOVW_ABS_NC: - case R_ARM_THM_MOVT_PREL: - case R_ARM_THM_MOVT_ABS: - write_thm_mov_imm(loc, val); - break; - default: - unreachable(); - } -} - -template <> -void write_plt_header(Context &ctx, u8 *buf) { - static const ul32 insn[] = { - 0xe52d'e004, // push {lr} - 0xe59f'e004, // ldr lr, 2f - 0xe08f'e00e, // 1: add lr, pc, lr - 0xe5be'f008, // ldr pc, [lr, #8]! - 0x0000'0000, // 2: .word .got.plt - 1b - 8 - 0xe320'f000, // nop - 0xe320'f000, // nop - 0xe320'f000, // nop - }; - - memcpy(buf, insn, sizeof(insn)); - *(ul32 *)(buf + 16) = ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr - 16; -} - -static const ul32 plt_entry[] = { - 0xe59f'c004, // 1: ldr ip, 2f - 0xe08c'c00f, // add ip, ip, pc - 0xe59c'f000, // ldr pc, [ip] - 0x0000'0000, // 2: .word sym@GOT - 1b -}; - -template <> -void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) { - memcpy(buf, plt_entry, sizeof(plt_entry)); - *(ul32 *)(buf + 12) = sym.get_gotplt_addr(ctx) - sym.get_plt_addr(ctx) - 12; -} - -template <> -void write_pltgot_entry(Context &ctx, u8 *buf, Symbol &sym) { - memcpy(buf, plt_entry, sizeof(plt_entry)); - *(ul32 *)(buf + 12) = sym.get_got_addr(ctx) - sym.get_plt_addr(ctx) - 12; -} - -// ARM does not use .eh_frame for exception handling. Instead, it uses -// .ARM.exidx and .ARM.extab. So this function is empty. -template <> -void EhFrameSection::apply_reloc(Context &ctx, const ElfRel &rel, - u64 offset, u64 val) {} - -// ARM and Thumb branch instructions can jump within ±16 MiB. -static bool is_jump_reachable(i64 val) { - return sign_extend(val, 24) == val; -} - -template <> -void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - ElfRel *dynrel = nullptr; - if (ctx.reldyn) - dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + - file.reldyn_offset + this->reldyn_offset); - - auto get_tls_trampoline_addr = [&, i = 0](u64 addr) mutable { - for (; i < output_section->thunks.size(); i++) { - i64 disp = output_section->shdr.sh_addr + output_section->thunks[i]->offset - - addr; - if (is_jump_reachable(disp)) - return disp; - } - unreachable(); - }; - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || rel.r_type == R_ARM_V4BX) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - auto check = [&](i64 val, i64 lo, i64 hi) { - if (val < lo || hi <= val) - Error(ctx) << *this << ": relocation " << rel << " against " - << sym << " out of range: " << val << " is not in [" - << lo << ", " << hi << ")"; - }; - - u64 S = sym.get_addr(ctx); - u64 A = get_addend(*this, rel); - u64 P = get_addr() + rel.r_offset; - u64 T = S & 1; - u64 G = sym.get_got_idx(ctx) * sizeof(Word); - u64 GOT = ctx.got->shdr.sh_addr; - - auto get_thumb_thunk_addr = [&] { return get_thunk_addr(i); }; - auto get_arm_thunk_addr = [&] { return get_thunk_addr(i) + 4; }; - - switch (rel.r_type) { - case R_ARM_ABS32: - case R_ARM_TARGET1: - apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel); - break; - case R_ARM_REL32: - *(ul32 *)loc = S + A - P; - break; - case R_ARM_THM_CALL: { - if (sym.is_remaining_undef_weak()) { - // On ARM, calling an weak undefined symbol jumps to the - // next instruction. - *(ul32 *)loc = 0x8000'f3af; // NOP.W - break; - } - - // THM_CALL relocation refers either BL or BLX instruction. - // They are different in only one bit. We need to use BL if - // the jump target is Thumb. Otherwise, use BLX. - i64 val = S + A - P; - if (is_jump_reachable(val)) { - if (T) { - write_thm_b_imm(loc, val); - *(ul16 *)(loc + 2) |= 0x1000; // rewrite to BL - } else { - write_thm_b_imm(loc, align_to(val, 4)); - *(ul16 *)(loc + 2) &= ~0x1000; // rewrite to BLX - } - } else { - write_thm_b_imm(loc, align_to(get_arm_thunk_addr() + A - P, 4)); - *(ul16 *)(loc + 2) &= ~0x1000; // rewrite to BLX - } - break; - } - case R_ARM_BASE_PREL: - *(ul32 *)loc = GOT + A - P; - break; - case R_ARM_GOTOFF32: - *(ul32 *)loc = ((S + A) | T) - GOT; - break; - case R_ARM_GOT_PREL: - case R_ARM_TARGET2: - *(ul32 *)loc = GOT + G + A - P; - break; - case R_ARM_GOT_BREL: - *(ul32 *)loc = G + A; - break; - case R_ARM_CALL: { - if (sym.is_remaining_undef_weak()) { - *(ul32 *)loc = 0xe320'f000; // NOP - break; - } - - // Just like THM_CALL, ARM_CALL relocation refers either BL or - // BLX instruction. We may need to rewrite BL → BLX or BLX → BL. - bool is_bl = ((*(ul32 *)loc & 0xff00'0000) == 0xeb00'0000); - bool is_blx = ((*(ul32 *)loc & 0xfe00'0000) == 0xfa00'0000); - if (!is_bl && !is_blx) - Fatal(ctx) << *this << ": R_ARM_CALL refers neither BL nor BLX"; - - u64 val = S + A - P; - if (is_jump_reachable(val)) { - if (T) { - *(ul32 *)loc = 0xfa00'0000; // BLX - *(ul32 *)loc |= (bit(val, 1) << 24) | bits(val, 25, 2); - } else { - *(ul32 *)loc = 0xeb00'0000; // BL - *(ul32 *)loc |= bits(val, 25, 2); - } - } else { - *(ul32 *)loc = 0xeb00'0000; // BL - *(ul32 *)loc |= bits(get_arm_thunk_addr() + A - P, 25, 2); - } - break; - } - case R_ARM_JUMP24: { - if (sym.is_remaining_undef_weak()) { - *(ul32 *)loc = 0xe320'f000; // NOP - break; - } - - // These relocs refers a B (unconditional branch) instruction. - // Unlike BL or BLX, we can't rewrite B to BX in place when the - // processor mode switch is required because BX doesn't takes an - // immediate; it takes only a register. So if mode switch is - // required, we jump to a linker-synthesized thunk which does the - // job with a longer code sequence. - u64 val = S + A - P; - if (!is_jump_reachable(val) || T) - val = get_arm_thunk_addr() + A - P; - *(ul32 *)loc = (*(ul32 *)loc & 0xff00'0000) | bits(val, 25, 2); - break; - } - case R_ARM_PLT32: - if (sym.is_remaining_undef_weak()) { - *(ul32 *)loc = 0xe320'f000; // NOP - } else { - u64 val = (T ? get_arm_thunk_addr() : S) + A - P; - *(ul32 *)loc = (*(ul32 *)loc & 0xff00'0000) | bits(val, 25, 2); - } - break; - case R_ARM_THM_JUMP11: - assert(T); - check(S + A - P, -(1 << 11), 1 << 11); - *(ul16 *)loc &= 0xf800; - *(ul16 *)loc |= bits(S + A - P, 11, 1); - break; - case R_ARM_THM_JUMP19: { - i64 val = S + A - P; - check(val, -(1 << 19), 1 << 19); - - // sign:J2:J1:imm6:imm11:'0' - u32 sign = bit(val, 20); - u32 J2 = bit(val, 19); - u32 J1 = bit(val, 18); - u32 imm6 = bits(val, 17, 12); - u32 imm11 = bits(val, 11, 1); - - *(ul16 *)loc &= 0b1111'1011'1100'0000; - *(ul16 *)loc |= (sign << 10) | imm6; - - *(ul16 *)(loc + 2) &= 0b1101'0000'0000'0000; - *(ul16 *)(loc + 2) |= (J2 << 13) | (J1 << 11) | imm11; - break; - } - case R_ARM_THM_JUMP24: { - if (sym.is_remaining_undef_weak()) { - *(ul32 *)loc = 0x8000'f3af; // NOP - break; - } - - // Just like R_ARM_JUMP24, we need to jump to a thunk if we need to - // switch processor mode. - u64 val = S + A - P; - if (!is_jump_reachable(val) || !T) - val = get_thumb_thunk_addr() + A - P; - write_thm_b_imm(loc, val); - break; - } - case R_ARM_MOVW_PREL_NC: - write_mov_imm(loc, ((S + A) | T) - P); - break; - case R_ARM_MOVW_ABS_NC: - write_mov_imm(loc, (S + A) | T); - break; - case R_ARM_THM_MOVW_PREL_NC: - write_thm_mov_imm(loc, ((S + A) | T) - P); - break; - case R_ARM_PREL31: - check(S + A - P, -(1LL << 30), 1LL << 30); - *(ul32 *)loc &= 0x8000'0000; - *(ul32 *)loc |= (S + A - P) & 0x7fff'ffff; - break; - case R_ARM_THM_MOVW_ABS_NC: - write_thm_mov_imm(loc, (S + A) | T); - break; - case R_ARM_MOVT_PREL: - write_mov_imm(loc, (S + A - P) >> 16); - break; - case R_ARM_THM_MOVT_PREL: - write_thm_mov_imm(loc, (S + A - P) >> 16); - break; - case R_ARM_MOVT_ABS: - write_mov_imm(loc, (S + A) >> 16); - break; - case R_ARM_THM_MOVT_ABS: - write_thm_mov_imm(loc, (S + A) >> 16); - break; - case R_ARM_TLS_GD32: - *(ul32 *)loc = sym.get_tlsgd_addr(ctx) + A - P; - break; - case R_ARM_TLS_LDM32: - *(ul32 *)loc = ctx.got->get_tlsld_addr(ctx) + A - P; - break; - case R_ARM_TLS_LDO32: - *(ul32 *)loc = S + A - ctx.dtp_addr; - break; - case R_ARM_TLS_IE32: - *(ul32 *)loc = sym.get_gottp_addr(ctx) + A - P; - break; - case R_ARM_TLS_LE32: - *(ul32 *)loc = S + A - ctx.tp_addr; - break; - case R_ARM_TLS_GOTDESC: - if (sym.has_tlsdesc(ctx)) { - // A is odd if the corresponding TLS_CALL is Thumb. - if (A & 1) - *(ul32 *)loc = sym.get_tlsdesc_addr(ctx) - P + A - 6; - else - *(ul32 *)loc = sym.get_tlsdesc_addr(ctx) - P + A - 4; - } else { - *(ul32 *)loc = S - ctx.tp_addr; - } - break; - case R_ARM_TLS_CALL: - if (sym.has_tlsdesc(ctx)) { - // BL - *(ul32 *)loc = 0xeb00'0000 | bits(get_tls_trampoline_addr(P + 8), 25, 2); - } else { - // BL -> NOP - *(ul32 *)loc = 0xe320'f000; - } - break; - case R_ARM_THM_TLS_CALL: - if (sym.has_tlsdesc(ctx)) { - u64 val = align_to(get_tls_trampoline_addr(P + 4), 4); - write_thm_b_imm(loc, val); - *(ul16 *)(loc + 2) &= ~0x1000; // rewrite BL with BLX - } else { - // BL -> NOP.W - *(ul32 *)loc = 0x8000'f3af; - } - break; - default: - Error(ctx) << *this << ": unknown relocation: " << rel; - } - } -} - -template <> -void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - SectionFragment *frag; - i64 frag_addend; - std::tie(frag, frag_addend) = get_fragment(ctx, rel); - - u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx); - u64 A = frag ? frag_addend : get_addend(*this, rel); - - switch (rel.r_type) { - case R_ARM_ABS32: - if (std::optional val = get_tombstone(sym, frag)) - *(ul32 *)loc = *val; - else - *(ul32 *)loc = S + A; - break; - case R_ARM_TLS_LDO32: - if (std::optional val = get_tombstone(sym, frag)) - *(ul32 *)loc = *val; - else - *(ul32 *)loc = S + A - ctx.dtp_addr; - break; - default: - Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: " - << rel; - break; - } - } -} - -template <> -void InputSection::scan_relocations(Context &ctx) { - assert(shdr().sh_flags & SHF_ALLOC); - - this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); - std::span> rels = get_rels(ctx); - - // Scan relocations - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - - if (sym.is_ifunc()) - sym.flags |= NEEDS_GOT | NEEDS_PLT; - - switch (rel.r_type) { - case R_ARM_ABS32: - case R_ARM_MOVT_ABS: - case R_ARM_THM_MOVT_ABS: - case R_ARM_TARGET1: - scan_dyn_absrel(ctx, sym, rel); - break; - case R_ARM_THM_CALL: - case R_ARM_CALL: - case R_ARM_JUMP24: - case R_ARM_PLT32: - case R_ARM_THM_JUMP24: - if (sym.is_imported) - sym.flags |= NEEDS_PLT; - break; - case R_ARM_GOT_PREL: - case R_ARM_GOT_BREL: - case R_ARM_TARGET2: - sym.flags |= NEEDS_GOT; - break; - case R_ARM_MOVT_PREL: - case R_ARM_THM_MOVT_PREL: - case R_ARM_PREL31: - scan_pcrel(ctx, sym, rel); - break; - case R_ARM_TLS_GD32: - sym.flags |= NEEDS_TLSGD; - break; - case R_ARM_TLS_LDM32: - ctx.needs_tlsld = true; - break; - case R_ARM_TLS_IE32: - sym.flags |= NEEDS_GOTTP; - break; - case R_ARM_TLS_GOTDESC: - if (!relax_tlsdesc(ctx, sym)) - sym.flags |= NEEDS_TLSDESC; - break; - case R_ARM_TLS_LE32: - check_tlsle(ctx, sym, rel); - break; - case R_ARM_REL32: - case R_ARM_BASE_PREL: - case R_ARM_GOTOFF32: - case R_ARM_THM_JUMP11: - case R_ARM_THM_JUMP19: - case R_ARM_MOVW_PREL_NC: - case R_ARM_MOVW_ABS_NC: - case R_ARM_THM_MOVW_PREL_NC: - case R_ARM_THM_MOVW_ABS_NC: - case R_ARM_TLS_LDO32: - case R_ARM_TLS_CALL: - case R_ARM_THM_TLS_CALL: - case R_ARM_V4BX: - break; - default: - Error(ctx) << *this << ": unknown relocation: " << rel; - } - } -} - -template <> -void RangeExtensionThunk::copy_buf(Context &ctx) { - u8 *buf = ctx.buf + output_section.shdr.sh_offset + offset; - - // TLS trampoline code. ARM32's TLSDESC is designed so that this - // common piece of code is factored out from object files to reduce - // output size. Since no one provide, the linker has to synthesize it. - static ul32 hdr[] = { - 0xe08e'0000, // add r0, lr, r0 - 0xe590'1004, // ldr r1, [r0, #4] - 0xe12f'ff11, // bx r1 - }; - - // This is a range extension and mode switch thunk. - // It has two entry points: +0 for Thumb and +4 for ARM. - const u8 entry[] = { - // .thumb - 0xfc, 0x46, // mov ip, pc - 0x60, 0x47, // bx ip # jumps to the following `ldr` insn - // .arm - 0x04, 0xc0, 0x9f, 0xe5, // ldr ip, 2f - 0x0f, 0xc0, 0x8c, 0xe0, // 1: add ip, ip, pc - 0x1c, 0xff, 0x2f, 0xe1, // bx ip - 0x00, 0x00, 0x00, 0x00, // 2: .word sym - 1b - }; - - static_assert(E::thunk_hdr_size == sizeof(hdr)); - static_assert(E::thunk_size == sizeof(entry)); - - memcpy(buf, hdr, sizeof(hdr)); - - for (i64 i = 0; i < symbols.size(); i++) { - u8 *loc = buf + sizeof(hdr) + i * sizeof(entry); - memcpy(loc, entry, sizeof(entry)); - - u64 S = symbols[i]->get_addr(ctx); - u64 P = output_section.shdr.sh_addr + offset + sizeof(hdr) + i * sizeof(entry); - *(ul32 *)(loc + 16) = S - P - 16; - } -} - -// ARM executables use an .ARM.exidx section to look up an exception -// handling record for the current instruction pointer. The table needs -// to be sorted by their addresses. -// -// Other target uses .eh_frame_hdr instead for the same purpose. -// I don't know why only ARM uses the different mechanism, but it's -// likely that it's due to some historical reason. -// -// This function sorts .ARM.exidx records. -void fixup_arm_exidx_section(Context &ctx) { - Timer t(ctx, "fixup_arm_exidx_section"); - - OutputSection *osec = find_section(ctx, SHT_ARM_EXIDX); - if (!osec) - return; - - // .ARM.exidx records consists of a signed 31-bit relative address - // and a 32-bit value. The relative address indicates the start - // address of a function that the record covers. The value is one of - // the followings: - // - // 1. CANTUNWIND indicating that there's no unwinding info for the function, - // 2. a compact unwinding record encoded into a 32-bit value, or - // 3. a 31-bit relative address which points to a larger record in - // the .ARM.extab section. - // - // CANTUNWIND is value 1. The most significant bit is set in (2) but - // not in (3). So we can distinguished them just by looking at a value. - const u32 EXIDX_CANTUNWIND = 1; - - struct Entry { - ul32 addr; - ul32 val; - }; - - if (osec->shdr.sh_size % sizeof(Entry)) - Fatal(ctx) << "invalid .ARM.exidx section size"; - - Entry *ent = (Entry *)(ctx.buf + osec->shdr.sh_offset); - i64 num_entries = osec->shdr.sh_size / sizeof(Entry); - - // Entry's addresses are relative to themselves. In order to sort - // records by addresses, we first translate them so that the addresses - // are relative to the beginning of the section. - auto is_relative = [](u32 val) { - return val != EXIDX_CANTUNWIND && !(val & 0x8000'0000); - }; - - tbb::parallel_for((i64)0, num_entries, [&](i64 i) { - i64 offset = sizeof(Entry) * i; - ent[i].addr = sign_extend(ent[i].addr, 30) + offset; - if (is_relative(ent[i].val)) - ent[i].val = 0x7fff'ffff & (ent[i].val + offset); - }); - - tbb::parallel_sort(ent, ent + num_entries, [](const Entry &a, const Entry &b) { - return a.addr < b.addr; - }); - - // Make addresses relative to themselves. - tbb::parallel_for((i64)0, num_entries, [&](i64 i) { - i64 offset = sizeof(Entry) * i; - ent[i].addr = 0x7fff'ffff & (ent[i].addr - offset); - if (is_relative(ent[i].val)) - ent[i].val = 0x7fff'ffff & (ent[i].val - offset); - }); - - // .ARM.exidx's sh_link should be set to the .text section index. - // Runtime doesn't care about it, but the binutils's strip command does. - if (ctx.shdr) { - if (Chunk *text = find_section(ctx, ".text")) { - osec->shdr.sh_link = text->shndx; - ctx.shdr->copy_buf(ctx); - } - } -} - -} // namespace mold::elf diff --git a/third_party/mold/elf/arch-arm64.cc b/third_party/mold/elf/arch-arm64.cc deleted file mode 100644 index ee1f5c09832..00000000000 --- a/third_party/mold/elf/arch-arm64.cc +++ /dev/null @@ -1,595 +0,0 @@ -// clang-format off -// This file contains ARM64-specific code. Being new, the ARM64's ELF -// psABI doesn't have anything peculiar. ARM64 is a clean RISC -// instruction set that supports PC-relative load/store instructions. -// -// Unlike ARM32, instructions length doesn't vary. All ARM64 -// instructions are 4 bytes long. -// -// Branch instructions used for function call can jump within ±128 MiB. -// We need to create range extension thunks to support binaries whose -// .text is larger than that. -// -// Unlike most other targets, the TLSDESC access model is used by default -// for -fPIC to access thread-local variables instead of the less -// efficient GD model. You can still enable GD but it needs the -// -mtls-dialect=trad flag. Since GD is used rarely, we don't need to -// implement GD → LE relaxation. -// -// https://github.com/ARM-software/abi-aa/blob/main/aaelf64/aaelf64.rst - -#include "third_party/mold/elf/mold.h" - -namespace mold::elf { - -using E = ARM64; - -static void write_adrp(u8 *buf, u64 val) { - *(ul32 *)buf |= (bits(val, 13, 12) << 29) | (bits(val, 32, 14) << 5); -} - -static void write_adr(u8 *buf, u64 val) { - *(ul32 *)buf |= (bits(val, 1, 0) << 29) | (bits(val, 20, 2) << 5); -} - -static void write_movn_movz(u8 *buf, i64 val) { - *(ul32 *)buf &= 0b0000'0000'0110'0000'0000'0000'0001'1111; - - if (val >= 0) - *(ul32 *)buf |= 0xd280'0000 | (bits(val, 15, 0) << 5); // rewrite to movz - else - *(ul32 *)buf |= 0x9280'0000 | (bits(~val, 15, 0) << 5); // rewrite to movn -} - -static u64 page(u64 val) { - return val & 0xffff'ffff'ffff'f000; -} - -template <> -void write_plt_header(Context &ctx, u8 *buf) { - static const ul32 insn[] = { - 0xa9bf'7bf0, // stp x16, x30, [sp,#-16]! - 0x9000'0010, // adrp x16, .got.plt[2] - 0xf940'0211, // ldr x17, [x16, .got.plt[2]] - 0x9100'0210, // add x16, x16, .got.plt[2] - 0xd61f'0220, // br x17 - 0xd503'201f, // nop - 0xd503'201f, // nop - 0xd503'201f, // nop - }; - - u64 gotplt = ctx.gotplt->shdr.sh_addr + 16; - u64 plt = ctx.plt->shdr.sh_addr; - - memcpy(buf, insn, sizeof(insn)); - write_adrp(buf + 4, page(gotplt) - page(plt + 4)); - *(ul32 *)(buf + 8) |= bits(gotplt, 11, 3) << 10; - *(ul32 *)(buf + 12) |= (gotplt & 0xfff) << 10; -} - -template <> -void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) { - static const ul32 insn[] = { - 0x9000'0010, // adrp x16, .got.plt[n] - 0xf940'0211, // ldr x17, [x16, .got.plt[n]] - 0x9100'0210, // add x16, x16, .got.plt[n] - 0xd61f'0220, // br x17 - }; - - u64 gotplt = sym.get_gotplt_addr(ctx); - u64 plt = sym.get_plt_addr(ctx); - - memcpy(buf, insn, sizeof(insn)); - write_adrp(buf, page(gotplt) - page(plt)); - *(ul32 *)(buf + 4) |= bits(gotplt, 11, 3) << 10; - *(ul32 *)(buf + 8) |= (gotplt & 0xfff) << 10; -} - -template <> -void write_pltgot_entry(Context &ctx, u8 *buf, Symbol &sym) { - static const ul32 insn[] = { - 0x9000'0010, // adrp x16, GOT[n] - 0xf940'0211, // ldr x17, [x16, GOT[n]] - 0xd61f'0220, // br x17 - 0xd503'201f, // nop - }; - - u64 got = sym.get_got_addr(ctx); - u64 plt = sym.get_plt_addr(ctx); - - memcpy(buf, insn, sizeof(insn)); - write_adrp(buf, page(got) - page(plt)); - *(ul32 *)(buf + 4) |= bits(got, 11, 3) << 10; -} - -template <> -void EhFrameSection::apply_reloc(Context &ctx, const ElfRel &rel, - u64 offset, u64 val) { - u8 *loc = ctx.buf + this->shdr.sh_offset + offset; - - switch (rel.r_type) { - case R_NONE: - break; - case R_AARCH64_ABS64: - *(ul64 *)loc = val; - break; - case R_AARCH64_PREL32: - *(ul32 *)loc = val - this->shdr.sh_addr - offset; - break; - case R_AARCH64_PREL64: - *(ul64 *)loc = val - this->shdr.sh_addr - offset; - break; - default: - Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel; - } -} - -static bool is_adrp(u8 *loc) { - // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADRP--Form-PC-relative-address-to-4KB-page- - u32 insn = *(ul32 *)loc; - return (bits(insn, 31, 24) & 0b1001'1111) == 0b1001'0000; -} - -static bool is_ldr(u8 *loc) { - // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDR--immediate---Load-Register--immediate-- - u32 insn = *(ul32 *)loc; - return (bits(insn, 31, 20) & 0b1111'1111'1100) == 0b1111'1001'0100; -} - -static bool is_add(u8 *loc) { - // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADD--immediate---Add--immediate-- - u32 insn = *(ul32 *)loc; - return (bits(insn, 31, 20) & 0b1111'1111'1100) == 0b1001'0001'0000; -} - -template <> -void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - ElfRel *dynrel = nullptr; - if (ctx.reldyn) - dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + - file.reldyn_offset + this->reldyn_offset); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - auto check = [&](i64 val, i64 lo, i64 hi) { - if (val < lo || hi <= val) - Error(ctx) << *this << ": relocation " << rel << " against " - << sym << " out of range: " << val << " is not in [" - << lo << ", " << hi << ")"; - }; - - u64 S = sym.get_addr(ctx); - u64 A = rel.r_addend; - u64 P = get_addr() + rel.r_offset; - u64 G = sym.get_got_idx(ctx) * sizeof(Word); - u64 GOT = ctx.got->shdr.sh_addr; - - switch (rel.r_type) { - case R_AARCH64_ABS64: - apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel); - break; - case R_AARCH64_LDST8_ABS_LO12_NC: - case R_AARCH64_ADD_ABS_LO12_NC: - *(ul32 *)loc |= bits(S + A, 11, 0) << 10; - break; - case R_AARCH64_LDST16_ABS_LO12_NC: - *(ul32 *)loc |= bits(S + A, 11, 1) << 10; - break; - case R_AARCH64_LDST32_ABS_LO12_NC: - *(ul32 *)loc |= bits(S + A, 11, 2) << 10; - break; - case R_AARCH64_LDST64_ABS_LO12_NC: - *(ul32 *)loc |= bits(S + A, 11, 3) << 10; - break; - case R_AARCH64_LDST128_ABS_LO12_NC: - *(ul32 *)loc |= bits(S + A, 11, 4) << 10; - break; - case R_AARCH64_MOVW_UABS_G0: - check(S + A, 0, 1 << 16); - *(ul32 *)loc |= bits(S + A, 15, 0) << 5; - break; - case R_AARCH64_MOVW_UABS_G0_NC: - *(ul32 *)loc |= bits(S + A, 15, 0) << 5; - break; - case R_AARCH64_MOVW_UABS_G1: - check(S + A, 0, 1LL << 32); - *(ul32 *)loc |= bits(S + A, 31, 16) << 5; - break; - case R_AARCH64_MOVW_UABS_G1_NC: - *(ul32 *)loc |= bits(S + A, 31, 16) << 5; - break; - case R_AARCH64_MOVW_UABS_G2: - check(S + A, 0, 1LL << 48); - *(ul32 *)loc |= bits(S + A, 47, 32) << 5; - break; - case R_AARCH64_MOVW_UABS_G2_NC: - *(ul32 *)loc |= bits(S + A, 47, 32) << 5; - break; - case R_AARCH64_MOVW_UABS_G3: - *(ul32 *)loc |= bits(S + A, 63, 48) << 5; - break; - case R_AARCH64_ADR_GOT_PAGE: - if (sym.has_got(ctx)) { - i64 val = page(G + GOT + A) - page(P); - check(val, -(1LL << 32), 1LL << 32); - write_adrp(loc, val); - } else { - // Relax GOT-loading ADRP+LDR to an immediate ADRP+ADD - i64 val = page(S + A) - page(P); - check(val, -(1LL << 32), 1LL << 32); - write_adrp(loc, val); - - u32 reg = bits(*(ul32 *)loc, 4, 0); - *(ul32 *)(loc + 4) = 0x9100'0000 | (reg << 5) | reg; // ADD - *(ul32 *)(loc + 4) |= bits(S + A, 11, 0) << 10; - i++; - } - break; - case R_AARCH64_ADR_PREL_PG_HI21: { - // The ARM64 psABI defines that an `ADRP x0, foo` and `ADD x0, x0, - // :lo12: foo` instruction pair to materialize a PC-relative address - // in a register can be relaxed to `NOP` followed by `ADR x0, foo` - // if foo is in PC ± 1 MiB. - if (ctx.arg.relax && i + 1 < rels.size() && - sign_extend(S + A - P - 4, 20) == S + A - P - 4) { - const ElfRel &rel2 = rels[i + 1]; - if (rel2.r_type == R_AARCH64_ADD_ABS_LO12_NC && - rel2.r_sym == rel.r_sym && - rel2.r_offset == rel.r_offset + 4 && - rel2.r_addend == rel.r_addend && - is_adrp(loc) && - is_add(loc + 4)) { - u32 reg1 = bits(*(ul32 *)loc, 4, 0); - u32 reg2 = bits(*(ul32 *)(loc + 4), 4, 0); - if (reg1 == reg2) { - *(ul32 *)loc = 0xd503'201f; // nop - *(ul32 *)(loc + 4) = 0x1000'0000 | reg1; // adr - write_adr(loc + 4, S + A - P - 4); - i++; - break; - } - } - } - - i64 val = page(S + A) - page(P); - check(val, -(1LL << 32), 1LL << 32); - write_adrp(loc, val); - break; - } - case R_AARCH64_ADR_PREL_LO21: - check(S + A - P, -(1LL << 20), 1LL << 20); - write_adr(loc, S + A - P); - break; - case R_AARCH64_CALL26: - case R_AARCH64_JUMP26: { - if (sym.is_remaining_undef_weak()) { - // On ARM, calling an weak undefined symbol jumps to the - // next instruction. - *(ul32 *)loc = 0xd503'201f; // nop - break; - } - - i64 val = S + A - P; - if (val < -(1 << 27) || (1 << 27) <= val) - val = get_thunk_addr(i) + A - P; - *(ul32 *)loc |= bits(val, 27, 2); - break; - } - case R_AARCH64_PLT32: - check(S + A - P, -(1LL << 31), 1LL << 31); - *(ul32 *)loc = S + A - P; - break; - case R_AARCH64_CONDBR19: - case R_AARCH64_LD_PREL_LO19: - check(S + A - P, -(1LL << 20), 1LL << 20); - *(ul32 *)loc |= bits(S + A - P, 20, 2) << 5; - break; - case R_AARCH64_PREL16: - check(S + A - P, -(1LL << 15), 1LL << 15); - *(ul16 *)loc = S + A - P; - break; - case R_AARCH64_PREL32: - check(S + A - P, -(1LL << 31), 1LL << 32); - *(ul32 *)loc = S + A - P; - break; - case R_AARCH64_PREL64: - *(ul64 *)loc = S + A - P; - break; - case R_AARCH64_LD64_GOT_LO12_NC: - *(ul32 *)loc |= bits(G + GOT + A, 11, 3) << 10; - break; - case R_AARCH64_LD64_GOTPAGE_LO15: { - i64 val = G + GOT + A - page(GOT); - check(val, 0, 1 << 15); - *(ul32 *)loc |= bits(val, 14, 3) << 10; - break; - } - case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: { - i64 val = page(sym.get_gottp_addr(ctx) + A) - page(P); - check(val, -(1LL << 32), 1LL << 32); - write_adrp(loc, val); - break; - } - case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: - *(ul32 *)loc |= bits(sym.get_gottp_addr(ctx) + A, 11, 3) << 10; - break; - case R_AARCH64_TLSLE_MOVW_TPREL_G0: { - i64 val = S + A - ctx.tp_addr; - check(val, -(1 << 15), 1 << 15); - write_movn_movz(loc, val); - break; - } - case R_AARCH64_TLSLE_MOVW_TPREL_G0_NC: - *(ul32 *)loc |= bits(S + A - ctx.tp_addr, 15, 0) << 5; - break; - case R_AARCH64_TLSLE_MOVW_TPREL_G1: { - i64 val = S + A - ctx.tp_addr; - check(val, -(1LL << 31), 1LL << 31); - write_movn_movz(loc, val >> 16); - break; - } - case R_AARCH64_TLSLE_MOVW_TPREL_G1_NC: - *(ul32 *)loc |= bits(S + A - ctx.tp_addr, 31, 16) << 5; - break; - case R_AARCH64_TLSLE_MOVW_TPREL_G2: { - i64 val = S + A - ctx.tp_addr; - check(val, -(1LL << 47), 1LL << 47); - write_movn_movz(loc, val >> 32); - break; - } - case R_AARCH64_TLSLE_ADD_TPREL_HI12: { - i64 val = S + A - ctx.tp_addr; - check(val, 0, 1LL << 24); - *(ul32 *)loc |= bits(val, 23, 12) << 10; - break; - } - case R_AARCH64_TLSLE_ADD_TPREL_LO12: - check(S + A - ctx.tp_addr, 0, 1 << 12); - *(ul32 *)loc |= bits(S + A - ctx.tp_addr, 11, 0) << 10; - break; - case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: - *(ul32 *)loc |= bits(S + A - ctx.tp_addr, 11, 0) << 10; - break; - case R_AARCH64_TLSGD_ADR_PAGE21: { - i64 val = page(sym.get_tlsgd_addr(ctx) + A) - page(P); - check(val, -(1LL << 32), 1LL << 32); - write_adrp(loc, val); - break; - } - case R_AARCH64_TLSGD_ADD_LO12_NC: - *(ul32 *)loc |= bits(sym.get_tlsgd_addr(ctx) + A, 11, 0) << 10; - break; - case R_AARCH64_TLSDESC_ADR_PAGE21: - if (sym.has_tlsdesc(ctx)) { - i64 val = page(sym.get_tlsdesc_addr(ctx) + A) - page(P); - check(val, -(1LL << 32), 1LL << 32); - write_adrp(loc, val); - } else { - // adrp x0, 0 -> movz x0, #tls_ofset_hi, lsl #16 - i64 val = (S + A - ctx.tp_addr); - check(val, -(1LL << 32), 1LL << 32); - *(ul32 *)loc = 0xd2a0'0000 | (bits(val, 32, 16) << 5); - } - break; - case R_AARCH64_TLSDESC_LD64_LO12: - if (sym.has_tlsdesc(ctx)) { - *(ul32 *)loc |= bits(sym.get_tlsdesc_addr(ctx) + A, 11, 3) << 10; - } else { - // ldr x2, [x0] -> movk x0, #tls_ofset_lo - u32 offset_lo = (S + A - ctx.tp_addr) & 0xffff; - *(ul32 *)loc = 0xf280'0000 | (offset_lo << 5); - } - break; - case R_AARCH64_TLSDESC_ADD_LO12: - if (sym.has_tlsdesc(ctx)) { - *(ul32 *)loc |= bits(sym.get_tlsdesc_addr(ctx) + A, 11, 0) << 10; - } else { - // add x0, x0, #0 -> nop - *(ul32 *)loc = 0xd503'201f; - } - break; - case R_AARCH64_TLSDESC_CALL: - if (!sym.has_tlsdesc(ctx)) { - // blr x2 -> nop - *(ul32 *)loc = 0xd503'201f; - } - break; - default: - unreachable(); - } - } -} - -template <> -void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - auto check = [&](i64 val, i64 lo, i64 hi) { - if (val < lo || hi <= val) - Error(ctx) << *this << ": relocation " << rel << " against " - << sym << " out of range: " << val << " is not in [" - << lo << ", " << hi << ")"; - }; - - SectionFragment *frag; - i64 frag_addend; - std::tie(frag, frag_addend) = get_fragment(ctx, rel); - - u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx); - u64 A = frag ? frag_addend : (i64)rel.r_addend; - - switch (rel.r_type) { - case R_AARCH64_ABS64: - if (std::optional val = get_tombstone(sym, frag)) - *(ul64 *)loc = *val; - else - *(ul64 *)loc = S + A; - break; - case R_AARCH64_ABS32: { - i64 val = S + A; - check(val, 0, 1LL << 32); - *(ul32 *)loc = val; - break; - } - default: - Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: " - << rel; - break; - } - } -} - -template <> -void InputSection::scan_relocations(Context &ctx) { - assert(shdr().sh_flags & SHF_ALLOC); - - this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); - std::span> rels = get_rels(ctx); - - // Scan relocations - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = (u8 *)(contents.data() + rel.r_offset); - - if (sym.is_ifunc()) - sym.flags |= NEEDS_GOT | NEEDS_PLT; - - switch (rel.r_type) { - case R_AARCH64_ABS64: - scan_dyn_absrel(ctx, sym, rel); - break; - case R_AARCH64_ADR_GOT_PAGE: - // An ADR_GOT_PAGE and GOT_LO12_NC relocation pair is used to load a - // symbol's address from GOT. If the GOT value is a link-time - // constant, we may be able to rewrite the ADRP+LDR instruction pair - // with an ADRP+ADD, eliminating a GOT memory load. - if (ctx.arg.relax && sym.is_relative() && !sym.is_imported && - !sym.is_ifunc() && i + 1 < rels.size()) { - // ADRP+LDR must be consecutive and use the same register to relax. - const ElfRel &rel2 = rels[i + 1]; - if (rel2.r_type == R_AARCH64_LD64_GOT_LO12_NC && - rel2.r_offset == rel.r_offset + 4 && - rel2.r_sym == rel.r_sym && - rel.r_addend == 0 && - rel2.r_addend == 0 && - is_adrp(loc) && - is_ldr(loc + 4)) { - u32 rd = bits(*(ul32 *)loc, 4, 0); - u32 rn = bits(*(ul32 *)(loc + 4), 9, 5); - u32 rt = bits(*(ul32 *)(loc + 4), 4, 0); - if (rd == rn && rn == rt) { - i++; - break; - } - } - } - sym.flags |= NEEDS_GOT; - break; - case R_AARCH64_LD64_GOT_LO12_NC: - case R_AARCH64_LD64_GOTPAGE_LO15: - sym.flags |= NEEDS_GOT; - break; - case R_AARCH64_CALL26: - case R_AARCH64_JUMP26: - case R_AARCH64_PLT32: - if (sym.is_imported) - sym.flags |= NEEDS_PLT; - break; - case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: - case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: - sym.flags |= NEEDS_GOTTP; - break; - case R_AARCH64_ADR_PREL_PG_HI21: - scan_pcrel(ctx, sym, rel); - break; - case R_AARCH64_TLSGD_ADR_PAGE21: - sym.flags |= NEEDS_TLSGD; - break; - case R_AARCH64_TLSDESC_ADR_PAGE21: - case R_AARCH64_TLSDESC_LD64_LO12: - case R_AARCH64_TLSDESC_ADD_LO12: - if (!relax_tlsdesc(ctx, sym)) - sym.flags |= NEEDS_TLSDESC; - break; - case R_AARCH64_TLSLE_MOVW_TPREL_G0: - case R_AARCH64_TLSLE_MOVW_TPREL_G0_NC: - case R_AARCH64_TLSLE_MOVW_TPREL_G1: - case R_AARCH64_TLSLE_MOVW_TPREL_G1_NC: - case R_AARCH64_TLSLE_MOVW_TPREL_G2: - case R_AARCH64_TLSLE_ADD_TPREL_HI12: - case R_AARCH64_TLSLE_ADD_TPREL_LO12: - case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: - check_tlsle(ctx, sym, rel); - break; - case R_AARCH64_ADD_ABS_LO12_NC: - case R_AARCH64_ADR_PREL_LO21: - case R_AARCH64_CONDBR19: - case R_AARCH64_LD_PREL_LO19: - case R_AARCH64_LDST16_ABS_LO12_NC: - case R_AARCH64_LDST32_ABS_LO12_NC: - case R_AARCH64_LDST64_ABS_LO12_NC: - case R_AARCH64_LDST128_ABS_LO12_NC: - case R_AARCH64_LDST8_ABS_LO12_NC: - case R_AARCH64_MOVW_UABS_G0: - case R_AARCH64_MOVW_UABS_G0_NC: - case R_AARCH64_MOVW_UABS_G1: - case R_AARCH64_MOVW_UABS_G1_NC: - case R_AARCH64_MOVW_UABS_G2: - case R_AARCH64_MOVW_UABS_G2_NC: - case R_AARCH64_MOVW_UABS_G3: - case R_AARCH64_PREL16: - case R_AARCH64_PREL32: - case R_AARCH64_PREL64: - case R_AARCH64_TLSGD_ADD_LO12_NC: - case R_AARCH64_TLSDESC_CALL: - break; - default: - Error(ctx) << *this << ": unknown relocation: " << rel; - } - } -} - -template <> -void RangeExtensionThunk::copy_buf(Context &ctx) { - u8 *buf = ctx.buf + output_section.shdr.sh_offset + offset; - - static const ul32 data[] = { - 0x9000'0010, // adrp x16, 0 # R_AARCH64_ADR_PREL_PG_HI21 - 0x9100'0210, // add x16, x16 # R_AARCH64_ADD_ABS_LO12_NC - 0xd61f'0200, // br x16 - }; - - static_assert(E::thunk_size == sizeof(data)); - - for (i64 i = 0; i < symbols.size(); i++) { - u64 S = symbols[i]->get_addr(ctx); - u64 P = output_section.shdr.sh_addr + offset + i * E::thunk_size; - - u8 *loc = buf + i * E::thunk_size; - memcpy(loc , data, sizeof(data)); - write_adrp(loc, page(S) - page(P)); - *(ul32 *)(loc + 4) |= bits(S, 11, 0) << 10; - } -} - -} // namespace mold::elf diff --git a/third_party/mold/elf/arch-i386.cc b/third_party/mold/elf/arch-i386.cc deleted file mode 100644 index 2692cacf1c2..00000000000 --- a/third_party/mold/elf/arch-i386.cc +++ /dev/null @@ -1,565 +0,0 @@ -// clang-format off -// i386 is similar to x86-64 but lacks PC-relative memory access -// instructions. So it's not straightforward to support position- -// independent code (PIC) on that target. -// -// If an object file is compiled with -fPIC, a function that needs to load -// a value from memory first obtains its own address with the following -// code -// -// call __x86.get_pc_thunk.bx -// -// where __x86.get_pc_thunk.bx is defined as -// -// __x86.get_pc_thunk.bx: -// mov (%esp), %ebx # move the return address to %ebx -// ret -// -// . With the function's own address (or, more precisely, the address -// immediately after the call instruction), the function can compute an -// absolute address of a variable with its address + link-time constant. -// -// Executing call-mov-ret isn't very cheap, and allocating one register to -// store PC isn't cheap too, especially given that i386 has only 8 -// general-purpose registers. But that's the cost of PIC on i386. You need -// to pay it when creating a .so and a position-independent executable. -// -// When a position-independent function calls another function, it sets -// %ebx to the address of .got. Position-independent PLT entries use that -// register to load values from .got.plt/.got. -// -// If we are creating a position-dependent executable (PDE), we can't -// assume that %ebx is set to .got. For PDE, we need to create position- -// dependent PLT entries which don't use %ebx. -// -// https://github.com/rui314/psabi/blob/main/i386.pdf - -#include "third_party/mold/elf/mold.h" - -namespace mold::elf { - -using E = I386; - -template <> -i64 get_addend(u8 *loc, const ElfRel &rel) { - switch (rel.r_type) { - case R_386_8: - case R_386_PC8: - return *loc; - case R_386_16: - case R_386_PC16: - return *(ul16 *)loc; - case R_386_32: - case R_386_PC32: - case R_386_GOT32: - case R_386_GOT32X: - case R_386_PLT32: - case R_386_GOTOFF: - case R_386_GOTPC: - case R_386_TLS_LDM: - case R_386_TLS_GOTIE: - case R_386_TLS_LE: - case R_386_TLS_IE: - case R_386_TLS_GD: - case R_386_TLS_LDO_32: - case R_386_SIZE32: - case R_386_TLS_GOTDESC: - return *(ul32 *)loc; - default: - return 0; - } -} - -template <> -void write_addend(u8 *loc, i64 val, const ElfRel &rel) { - switch (rel.r_type) { - case R_386_NONE: - break; - case R_386_8: - case R_386_PC8: - *loc = val; - break; - case R_386_16: - case R_386_PC16: - *(ul16 *)loc = val; - break; - case R_386_32: - case R_386_PC32: - case R_386_GOT32: - case R_386_GOT32X: - case R_386_PLT32: - case R_386_GOTOFF: - case R_386_GOTPC: - case R_386_TLS_LDM: - case R_386_TLS_GOTIE: - case R_386_TLS_LE: - case R_386_TLS_IE: - case R_386_TLS_GD: - case R_386_TLS_LDO_32: - case R_386_SIZE32: - case R_386_TLS_GOTDESC: - *(ul32 *)loc = val; - break; - default: - unreachable(); - } -} - -template <> -void write_plt_header(Context &ctx, u8 *buf) { - if (ctx.arg.pic) { - static const u8 insn[] = { - 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 - 0x51, // push %ecx - 0x8d, 0x8b, 0, 0, 0, 0, // lea GOTPLT+4(%ebx), %ecx - 0xff, 0x31, // push (%ecx) - 0xff, 0x61, 0x04, // jmp *0x4(%ecx) - }; - memcpy(buf, insn, sizeof(insn)); - *(ul32 *)(buf + 7) = ctx.gotplt->shdr.sh_addr - ctx.got->shdr.sh_addr + 4; - } else { - static const u8 insn[] = { - 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 - 0x51, // push %ecx - 0xb9, 0, 0, 0, 0, // mov GOTPLT+4, %ecx - 0xff, 0x31, // push (%ecx) - 0xff, 0x61, 0x04, // jmp *0x4(%ecx) - 0xcc, // (padding) - }; - memcpy(buf, insn, sizeof(insn)); - *(ul32 *)(buf + 6) = ctx.gotplt->shdr.sh_addr + 4; - } -} - -template <> -void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) { - if (ctx.arg.pic) { - static const u8 insn[] = { - 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 - 0xb9, 0, 0, 0, 0, // mov $reloc_offset, %ecx - 0xff, 0xa3, 0, 0, 0, 0, // jmp *foo@GOT(%ebx) - 0xcc, // (padding) - }; - memcpy(buf, insn, sizeof(insn)); - *(ul32 *)(buf + 5) = sym.get_plt_idx(ctx) * sizeof(ElfRel); - *(ul32 *)(buf + 11) = sym.get_gotplt_addr(ctx) - ctx.got->shdr.sh_addr; - } else { - static const u8 insn[] = { - 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 - 0xb9, 0, 0, 0, 0, // mov $reloc_offset, %ecx - 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT - 0xcc, // (padding) - }; - memcpy(buf, insn, sizeof(insn)); - *(ul32 *)(buf + 5) = sym.get_plt_idx(ctx) * sizeof(ElfRel); - *(ul32 *)(buf + 11) = sym.get_gotplt_addr(ctx); - } -} - -template <> -void write_pltgot_entry(Context &ctx, u8 *buf, Symbol &sym) { - if (ctx.arg.pic) { - static const u8 insn[] = { - 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 - 0xff, 0xa3, 0, 0, 0, 0, // jmp *foo@GOT(%ebx) - 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // (padding) - }; - memcpy(buf, insn, sizeof(insn)); - *(ul32 *)(buf + 6) = sym.get_got_addr(ctx) - ctx.got->shdr.sh_addr; - } else { - static const u8 insn[] = { - 0xf3, 0x0f, 0x1e, 0xfb, // endbr32 - 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT - 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // (padding) - }; - memcpy(buf, insn, sizeof(insn)); - *(ul32 *)(buf + 6) = sym.get_got_addr(ctx); - } -} - -template <> -void EhFrameSection::apply_reloc(Context &ctx, const ElfRel &rel, - u64 offset, u64 val) { - u8 *loc = ctx.buf + this->shdr.sh_offset + offset; - - switch (rel.r_type) { - case R_NONE: - break; - case R_386_32: - *(ul32 *)loc = val; - break; - case R_386_PC32: - *(ul32 *)loc = val - this->shdr.sh_addr - offset; - break; - default: - Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel; - } -} - -static u32 relax_got32x(u8 *loc) { - // mov imm(%reg1), %reg2 -> lea imm(%reg1), %reg2 - if (loc[0] == 0x8b) - return 0x8d00 | loc[1]; - return 0; -} - -// Relax GD to LE -static void relax_gd_to_le(u8 *loc, ElfRel rel, u64 val) { - static const u8 insn[] = { - 0x65, 0xa1, 0, 0, 0, 0, // mov %gs:0, %eax - 0x81, 0xc0, 0, 0, 0, 0, // add $tp_offset, %eax - }; - - switch (rel.r_type) { - case R_386_PLT32: - case R_386_PC32: - memcpy(loc - 3, insn, sizeof(insn)); - *(ul32 *)(loc + 5) = val; - break; - case R_386_GOT32: - case R_386_GOT32X: - memcpy(loc - 2, insn, sizeof(insn)); - *(ul32 *)(loc + 6) = val; - break; - default: - unreachable(); - } -} - -// Relax LD to LE -static void relax_ld_to_le(u8 *loc, ElfRel rel, u64 val) { - switch (rel.r_type) { - case R_386_PLT32: - case R_386_PC32: { - static const u8 insn[] = { - 0x65, 0xa1, 0, 0, 0, 0, // mov %gs:0, %eax - 0x2d, 0, 0, 0, 0, // sub $tls_size, %eax - }; - memcpy(loc - 2, insn, sizeof(insn)); - *(ul32 *)(loc + 5) = val; - break; - } - case R_386_GOT32: - case R_386_GOT32X: { - static const u8 insn[] = { - 0x65, 0xa1, 0, 0, 0, 0, // mov %gs:0, %eax - 0x2d, 0, 0, 0, 0, // sub $tls_size, %eax - 0x90, // nop - }; - memcpy(loc - 2, insn, sizeof(insn)); - *(ul32 *)(loc + 5) = val; - break; - } - default: - unreachable(); - } -} - -template <> -void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - ElfRel *dynrel = nullptr; - if (ctx.reldyn) - dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + - file.reldyn_offset + this->reldyn_offset); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - auto check = [&](i64 val, i64 lo, i64 hi) { - if (val < lo || hi <= val) - Error(ctx) << *this << ": relocation " << rel << " against " - << sym << " out of range: " << val << " is not in [" - << lo << ", " << hi << ")"; - }; - - u64 S = sym.get_addr(ctx); - u64 A = get_addend(*this, rel); - u64 P = get_addr() + rel.r_offset; - u64 G = sym.get_got_idx(ctx) * sizeof(Word); - u64 GOT = ctx.got->shdr.sh_addr; - - switch (rel.r_type) { - case R_386_8: - check(S + A, 0, 1 << 8); - *loc = S + A; - break; - case R_386_16: - check(S + A, 0, 1 << 16); - *(ul16 *)loc = S + A; - break; - case R_386_32: - apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel); - break; - case R_386_PC8: - check(S + A - P, -(1 << 7), 1 << 7); - *loc = S + A - P; - break; - case R_386_PC16: - check(S + A - P, -(1 << 15), 1 << 15); - *(ul16 *)loc = S + A - P; - break; - case R_386_PC32: - case R_386_PLT32: - *(ul32 *)loc = S + A - P; - break; - case R_386_GOT32: - *(ul32 *)loc = G + A; - break; - case R_386_GOT32X: - if (sym.has_got(ctx)) { - *(ul32 *)loc = G + A; - } else { - u32 insn = relax_got32x(loc - 2); - assert(insn); - loc[-2] = insn >> 8; - loc[-1] = insn; - *(ul32 *)loc = S + A - GOT; - } - break; - case R_386_GOTOFF: - *(ul32 *)loc = S + A - GOT; - break; - case R_386_GOTPC: - *(ul32 *)loc = GOT + A - P; - break; - case R_386_TLS_GOTIE: - *(ul32 *)loc = sym.get_gottp_addr(ctx) + A - GOT; - break; - case R_386_TLS_LE: - *(ul32 *)loc = S + A - ctx.tp_addr; - break; - case R_386_TLS_IE: - *(ul32 *)loc = sym.get_gottp_addr(ctx) + A; - break; - case R_386_TLS_GD: - if (sym.has_tlsgd(ctx)) { - *(ul32 *)loc = sym.get_tlsgd_addr(ctx) + A - GOT; - } else { - relax_gd_to_le(loc, rels[i + 1], S - ctx.tp_addr); - i++; - } - break; - case R_386_TLS_LDM: - if (ctx.got->has_tlsld(ctx)) { - *(ul32 *)loc = ctx.got->get_tlsld_addr(ctx) + A - GOT; - } else { - relax_ld_to_le(loc, rels[i + 1], ctx.tp_addr - ctx.tls_begin); - i++; - } - break; - case R_386_TLS_LDO_32: - *(ul32 *)loc = S + A - ctx.dtp_addr; - break; - case R_386_SIZE32: - *(ul32 *)loc = sym.esym().st_size + A; - break; - case R_386_TLS_GOTDESC: - if (sym.has_tlsdesc(ctx)) { - *(ul32 *)loc = sym.get_tlsdesc_addr(ctx) + A - GOT; - } else { - static const u8 insn[] = { - 0x8d, 0x05, 0, 0, 0, 0, // lea 0, %eax - }; - memcpy(loc - 2, insn, sizeof(insn)); - *(ul32 *)loc = S + A - ctx.tp_addr; - } - break; - case R_386_TLS_DESC_CALL: - if (!sym.has_tlsdesc(ctx)) { - // call *(%eax) -> nop - loc[0] = 0x66; - loc[1] = 0x90; - } - break; - default: - unreachable(); - } - } -} - -template <> -void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - auto check = [&](i64 val, i64 lo, i64 hi) { - if (val < lo || hi <= val) - Error(ctx) << *this << ": relocation " << rel << " against " - << sym << " out of range: " << val << " is not in [" - << lo << ", " << hi << ")"; - }; - - SectionFragment *frag; - i64 frag_addend; - std::tie(frag, frag_addend) = get_fragment(ctx, rel); - - u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx); - u64 A = frag ? frag_addend : get_addend(*this, rel); - u64 GOT = ctx.got->shdr.sh_addr; - - switch (rel.r_type) { - case R_386_8: - check(S + A, 0, 1 << 8); - *loc = S + A; - break; - case R_386_16: - check(S + A, 0, 1 << 16); - *(ul16 *)loc = S + A; - break; - case R_386_32: - if (std::optional val = get_tombstone(sym, frag)) - *(ul32 *)loc = *val; - else - *(ul32 *)loc = S + A; - break; - case R_386_PC8: - check(S + A, -(1 << 7), 1 << 7); - *loc = S + A; - break; - case R_386_PC16: - check(S + A, -(1 << 15), 1 << 15); - *(ul16 *)loc = S + A; - break; - case R_386_PC32: - *(ul32 *)loc = S + A; - break; - case R_386_GOTPC: - *(ul32 *)loc = GOT + A; - break; - case R_386_GOTOFF: - *(ul32 *)loc = S + A - GOT; - break; - case R_386_TLS_LDO_32: - if (std::optional val = get_tombstone(sym, frag)) - *(ul32 *)loc = *val; - else - *(ul32 *)loc = S + A - ctx.dtp_addr; - break; - case R_386_SIZE32: - *(ul32 *)loc = sym.esym().st_size + A; - break; - default: - unreachable(); - } - } -} - -template <> -void InputSection::scan_relocations(Context &ctx) { - assert(shdr().sh_flags & SHF_ALLOC); - - this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); - std::span> rels = get_rels(ctx); - - // Scan relocations - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = (u8 *)(contents.data() + rel.r_offset); - - if (sym.is_ifunc()) - sym.flags |= NEEDS_GOT | NEEDS_PLT; - - switch (rel.r_type) { - case R_386_8: - case R_386_16: - scan_absrel(ctx, sym, rel); - break; - case R_386_32: - scan_dyn_absrel(ctx, sym, rel); - break; - case R_386_PC8: - case R_386_PC16: - case R_386_PC32: - scan_pcrel(ctx, sym, rel); - break; - case R_386_GOT32: - case R_386_GOTPC: - sym.flags |= NEEDS_GOT; - break; - case R_386_GOT32X: { - // We always want to relax GOT32X because static PIE doesn't - // work without it. - bool do_relax = !sym.is_imported && sym.is_relative() && - relax_got32x(loc - 2); - if (!do_relax) - sym.flags |= NEEDS_GOT; - break; - } - case R_386_PLT32: - if (sym.is_imported) - sym.flags |= NEEDS_PLT; - break; - case R_386_TLS_GOTIE: - case R_386_TLS_IE: - sym.flags |= NEEDS_GOTTP; - break; - case R_386_TLS_GD: - if (i + 1 == rels.size()) - Fatal(ctx) << *this << ": TLS_GD reloc must be followed by PLT or GOT32"; - - if (u32 ty = rels[i + 1].r_type; - ty != R_386_PLT32 && ty != R_386_PC32 && - ty != R_386_GOT32 && ty != R_386_GOT32X) - Fatal(ctx) << *this << ": TLS_GD reloc must be followed by PLT or GOT32"; - - // We always relax if -static because libc.a doesn't contain - // __tls_get_addr(). - if (ctx.arg.is_static || - (ctx.arg.relax && !ctx.arg.shared && !sym.is_imported)) - i++; - else - sym.flags |= NEEDS_TLSGD; - break; - case R_386_TLS_LDM: - if (i + 1 == rels.size()) - Fatal(ctx) << *this << ": TLS_LDM reloc must be followed by PLT or GOT32"; - - if (u32 ty = rels[i + 1].r_type; - ty != R_386_PLT32 && ty != R_386_PC32 && - ty != R_386_GOT32 && ty != R_386_GOT32X) - Fatal(ctx) << *this << ": TLS_LDM reloc must be followed by PLT or GOT32"; - - // We always relax if -static because libc.a doesn't contain - // __tls_get_addr(). - if (ctx.arg.is_static || (ctx.arg.relax && !ctx.arg.shared)) - i++; - else - ctx.needs_tlsld = true; - break; - case R_386_TLS_GOTDESC: - if (!relax_tlsdesc(ctx, sym)) - sym.flags |= NEEDS_TLSDESC; - break; - case R_386_TLS_LE: - check_tlsle(ctx, sym, rel); - break; - case R_386_GOTOFF: - case R_386_TLS_LDO_32: - case R_386_SIZE32: - case R_386_TLS_DESC_CALL: - break; - default: - Error(ctx) << *this << ": unknown relocation: " << rel; - } - } -} - -} // namespace mold::elf diff --git a/third_party/mold/elf/arch-m68k.cc b/third_party/mold/elf/arch-m68k.cc deleted file mode 100644 index 1b3b8721241..00000000000 --- a/third_party/mold/elf/arch-m68k.cc +++ /dev/null @@ -1,326 +0,0 @@ -// clang-format off -// This file contains code for the Motorola 68000 series microprocessors, -// which is often abbreviated as m68k. Running a Unix-like system on a -// m68k-based machine today is probably a retro-computing hobby activity, -// but the processor was a popular choice to build Unix computers during -// '80s. Early Sun workstations for example used m68k. Macintosh until -// 1994 were based on m68k as well until they switched to PowerPC (and -// then to x86 and to ARM.) -// -// From the linker's point of view, it is not hard to support m68k. It's -// just a 32-bit big-endian CISC ISA. Compared to comtemporary i386, -// m68k's psABI is actually simpler because m68k has PC-relative memory -// access instructions and therefore can support position-independent -// code without too much hassle. -// -// https://github.com/rui314/psabi/blob/main/m68k.pdf - -#include "third_party/mold/elf/mold.h" - -namespace mold::elf { - -using E = M68K; - -template <> -void write_plt_header(Context &ctx, u8 *buf) { - static const u8 insn[] = { - 0x2f, 0x00, // move.l %d0, -(%sp) - 0x2f, 0x3b, 0x01, 0x70, 0, 0, 0, 0, // move.l (GOTPLT+4, %pc), -(%sp) - 0x4e, 0xfb, 0x01, 0x71, 0, 0, 0, 0, // jmp ([GOTPLT+8, %pc]) - }; - - memcpy(buf, insn, sizeof(insn)); - *(ub32 *)(buf + 6) = ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr; - *(ub32 *)(buf + 14) = ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr - 4; -} - -template <> -void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) { - static const u8 insn[] = { - 0x20, 0x3c, 0, 0, 0, 0, // move.l PLT_OFFSET, %d0 - 0x4e, 0xfb, 0x01, 0x71, 0, 0, 0, 0, // jmp ([GOTPLT_ENTRY, %pc]) - }; - - memcpy(buf, insn, sizeof(insn)); - *(ub32 *)(buf + 2) = sym.get_plt_idx(ctx) * sizeof(ElfRel); - *(ub32 *)(buf + 10) = sym.get_gotplt_addr(ctx) - sym.get_plt_addr(ctx) - 8; -} - -template <> -void write_pltgot_entry(Context &ctx, u8 *buf, Symbol &sym) { - static const u8 insn[] = { - 0x4e, 0xfb, 0x01, 0x71, 0, 0, 0, 0, // jmp ([GOT_ENTRY, %pc]) - }; - - memcpy(buf, insn, sizeof(insn)); - *(ub32 *)(buf + 4) = sym.get_got_addr(ctx) - sym.get_plt_addr(ctx) - 2; -} - -template <> -void EhFrameSection::apply_reloc(Context &ctx, const ElfRel &rel, - u64 offset, u64 val) { - u8 *loc = ctx.buf + this->shdr.sh_offset + offset; - - switch (rel.r_type) { - case R_NONE: - break; - case R_68K_32: - *(ub32 *)loc = val; - break; - case R_68K_PC32: - *(ub32 *)loc = val - this->shdr.sh_addr - offset; - break; - default: - Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel; - } -} - -template <> -void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - ElfRel *dynrel = nullptr; - if (ctx.reldyn) - dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + - file.reldyn_offset + this->reldyn_offset); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - auto check = [&](i64 val, i64 lo, i64 hi) { - if (val < lo || hi <= val) - Error(ctx) << *this << ": relocation " << rel << " against " - << sym << " out of range: " << val << " is not in [" - << lo << ", " << hi << ")"; - }; - - auto write16 = [&](u64 val) { - check(val, 0, 1 << 16); - *(ub16 *)loc = val; - }; - - auto write16s = [&](u64 val) { - check(val, -(1 << 15), 1 << 15); - *(ub16 *)loc = val; - }; - - auto write8 = [&](u64 val) { - check(val, 0, 1 << 8); - *loc = val; - }; - - auto write8s = [&](u64 val) { - check(val, -(1 << 7), 1 << 7); - *loc = val; - }; - - u64 S = sym.get_addr(ctx); - u64 A = rel.r_addend; - u64 P = get_addr() + rel.r_offset; - u64 G = sym.get_got_idx(ctx) * sizeof(Word); - u64 GOT = ctx.got->shdr.sh_addr; - - switch (rel.r_type) { - case R_68K_32: - apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel); - break; - case R_68K_16: - write16(S + A); - break; - case R_68K_8: - write8(S + A); - break; - case R_68K_PC32: - case R_68K_PLT32: - *(ub32 *)loc = S + A - P; - break; - case R_68K_PC16: - case R_68K_PLT16: - write16s(S + A - P); - break; - case R_68K_PC8: - case R_68K_PLT8: - write8s(S + A - P); - break; - case R_68K_GOTPCREL32: - *(ub32 *)loc = GOT + A - P; - break; - case R_68K_GOTPCREL16: - write16s(GOT + A - P); - break; - case R_68K_GOTPCREL8: - write8s(GOT + A - P); - break; - case R_68K_GOTOFF32: - *(ub32 *)loc = G + A; - break; - case R_68K_GOTOFF16: - write16(G + A); - break; - case R_68K_GOTOFF8: - write8(G + A); - break; - case R_68K_TLS_GD32: - *(ub32 *)loc = sym.get_tlsgd_addr(ctx) + A - GOT; - break; - case R_68K_TLS_GD16: - write16(sym.get_tlsgd_addr(ctx) + A - GOT); - break; - case R_68K_TLS_GD8: - write8(sym.get_tlsgd_addr(ctx) + A - GOT); - break; - case R_68K_TLS_LDM32: - *(ub32 *)loc = ctx.got->get_tlsld_addr(ctx) + A - GOT; - break; - case R_68K_TLS_LDM16: - write16(ctx.got->get_tlsld_addr(ctx) + A - GOT); - break; - case R_68K_TLS_LDM8: - write8(ctx.got->get_tlsld_addr(ctx) + A - GOT); - break; - case R_68K_TLS_LDO32: - *(ub32 *)loc = S + A - ctx.dtp_addr; - break; - case R_68K_TLS_LDO16: - write16s(S + A - ctx.dtp_addr); - break; - case R_68K_TLS_LDO8: - write8s(S + A - ctx.dtp_addr); - break; - case R_68K_TLS_IE32: - *(ub32 *)loc = sym.get_gottp_addr(ctx) + A - GOT; - break; - case R_68K_TLS_IE16: - write16(sym.get_gottp_addr(ctx) + A - GOT); - break; - case R_68K_TLS_IE8: - write8(sym.get_gottp_addr(ctx) + A - GOT); - break; - case R_68K_TLS_LE32: - *(ub32 *)loc = S + A - ctx.tp_addr; - break; - case R_68K_TLS_LE16: - write16(S + A - ctx.tp_addr); - break; - case R_68K_TLS_LE8: - write8(S + A - ctx.tp_addr); - break; - default: - unreachable(); - } - } -} - -template <> -void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - SectionFragment *frag; - i64 frag_addend; - std::tie(frag, frag_addend) = get_fragment(ctx, rel); - - u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx); - u64 A = frag ? frag_addend : (i64)rel.r_addend; - - switch (rel.r_type) { - case R_68K_32: - if (std::optional val = get_tombstone(sym, frag)) - *(ub32 *)loc = *val; - else - *(ub32 *)loc = S + A; - break; - default: - Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: " - << rel; - } - } -} - -template <> -void InputSection::scan_relocations(Context &ctx) { - assert(shdr().sh_flags & SHF_ALLOC); - - this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); - std::span> rels = get_rels(ctx); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - - if (sym.is_ifunc()) - Error(ctx) << sym << ": GNU ifunc symbol is not supported on m68k"; - - switch (rel.r_type) { - case R_68K_32: - scan_dyn_absrel(ctx, sym, rel); - break; - case R_68K_16: - case R_68K_8: - scan_absrel(ctx, sym, rel); - break; - case R_68K_PC32: - case R_68K_PC16: - case R_68K_PC8: - scan_pcrel(ctx, sym, rel); - break; - case R_68K_GOTPCREL32: - case R_68K_GOTPCREL16: - case R_68K_GOTPCREL8: - case R_68K_GOTOFF32: - case R_68K_GOTOFF16: - case R_68K_GOTOFF8: - sym.flags |= NEEDS_GOT; - break; - case R_68K_PLT32: - case R_68K_PLT16: - case R_68K_PLT8: - if (sym.is_imported) - sym.flags |= NEEDS_PLT; - break; - case R_68K_TLS_GD32: - case R_68K_TLS_GD16: - case R_68K_TLS_GD8: - sym.flags |= NEEDS_TLSGD; - break; - case R_68K_TLS_LDM32: - case R_68K_TLS_LDM16: - case R_68K_TLS_LDM8: - ctx.needs_tlsld = true; - break; - case R_68K_TLS_IE32: - case R_68K_TLS_IE16: - case R_68K_TLS_IE8: - sym.flags |= NEEDS_GOTTP; - break; - case R_68K_TLS_LE32: - case R_68K_TLS_LE16: - case R_68K_TLS_LE8: - check_tlsle(ctx, sym, rel); - break; - case R_68K_TLS_LDO32: - case R_68K_TLS_LDO16: - case R_68K_TLS_LDO8: - break; - default: - Error(ctx) << *this << ": unknown relocation: " << rel; - } - } -} - -} // namespace mold::elf diff --git a/third_party/mold/elf/arch-ppc32.cc b/third_party/mold/elf/arch-ppc32.cc deleted file mode 100644 index c3a1db4cec6..00000000000 --- a/third_party/mold/elf/arch-ppc32.cc +++ /dev/null @@ -1,452 +0,0 @@ -// clang-format off -// This file implements the PowerPC 32-bit ISA. For 64-bit PowerPC, see -// arch-ppc64v1.cpp and arch-ppc64v2.cpp. -// -// PPC32 is a RISC ISA. It has 32 general-purpose registers (GPRs). -// r0, r11 and r12 are reserved for static linkers, so we can use these -// registers in PLTs and range extension thunks. In addition to that, it -// has a few special registers. Notable ones are LR which holds a return -// address and CTR which we can use to store a branch target address. -// -// It feels that the PPC32 psABI is unnecessarily complicated at first -// glance, but that is mainly stemmed from the fact that the ISA lacks -// PC-relative load/store instructions. Since machine instructions cannot -// load data relative to its own address, it is not straightforward to -// support position-independent code (PIC) on PPC32. -// -// A position-independent function typically contains the following code -// in the prologue to obtain its own address: -// -// mflr r0 // save the current return address to %r0 -// bcl 20, 31, 4 // call the next instruction as if it were a function -// mtlr r12 // save the return address to %r12 -// mtlr r0 // restore the original return address -// -// An object file compiled with -fPIC contains a data section named -// `.got2` to store addresses of locally-defined global variables and -// constants. A PIC function usually computes its .got2+0x8000 and set it -// to %r30. This scheme allows the function to access global objects -// defined in the same input file with a single %r30-relative load/store -// instruction with a 16-bit offset, given that .got2 is smaller than -// 0x10000 (or 65536) bytes. -// -// Since each object file has its own .got2, %r30 refers to different -// places in a merged .got2 for two functions that came from different -// input files. Therefore, %r30 makes sense only within a single function. -// -// Technically, we can reuse a %r30 value in our PLT if we create a PLT -// _for each input file_ (that's what GNU ld seems to be doing), but that -// doesn't seems to be worth its complexity. Our PLT simply doesn't rely -// on a %r30 value. -// -// https://github.com/rui314/psabi/blob/main/ppc32.pdf - -#include "third_party/mold/elf/mold.h" - -namespace mold::elf { - -using E = PPC32; - -static u64 lo(u64 x) { return x & 0xffff; } -static u64 hi(u64 x) { return x >> 16; } -static u64 ha(u64 x) { return (x + 0x8000) >> 16; } -static u64 high(u64 x) { return (x >> 16) & 0xffff; } -static u64 higha(u64 x) { return ((x + 0x8000) >> 16) & 0xffff; } - -template <> -void write_plt_header(Context &ctx, u8 *buf) { - static const ub32 insn[] = { - // Get the address of this PLT section - 0x7c08'02a6, // mflr r0 - 0x429f'0005, // bcl 20, 31, 4 - 0x7d88'02a6, // 1: mflr r12 - 0x7c08'03a6, // mtlr r0 - - // Compute the runtime address of GOTPLT+12 - 0x3d8c'0000, // addis r12, r12, (GOTPLT - 1b)@higha - 0x398c'0000, // addi r12, r12, (GOTPLT - 1b)@lo - - // Compute the PLT entry offset - 0x7d6c'5850, // sub r11, r11, r12 - 0x1d6b'0003, // mulli r11, r11, 3 - - // Load GOTPLT[2] and branch to GOTPLT[1] - 0x800c'fff8, // lwz r0, -8(r12) - 0x7c09'03a6, // mtctr r0 - 0x818c'fffc, // lwz r12, -4(r12) - 0x4e80'0420, // bctr - 0x6000'0000, // nop - 0x6000'0000, // nop - 0x6000'0000, // nop - 0x6000'0000, // nop - }; - - static_assert(sizeof(insn) == E::plt_hdr_size); - memcpy(buf, insn, sizeof(insn)); - - ub32 *loc = (ub32 *)buf; - loc[4] |= higha(ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr + 4); - loc[5] |= lo(ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr + 4); -} - -static const ub32 plt_entry[] = { - // Get the address of this PLT entry - 0x7c08'02a6, // mflr r0 - 0x429f'0005, // bcl 20, 31, 4 - 0x7d88'02a6, // mflr r12 - 0x7c08'03a6, // mtlr r0 - - // Load an address from the GOT/GOTPLT entry and jump to that address - 0x3d6c'0000, // addis r11, r12, OFFSET@higha - 0x396b'0000, // addi r11, r11, OFFSET@lo - 0x818b'0000, // lwz r12, 0(r11) - 0x7d89'03a6, // mtctr r12 - 0x4e80'0420, // bctr -}; - -template <> -void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) { - static_assert(E::plt_size == sizeof(plt_entry)); - memcpy(buf, plt_entry, sizeof(plt_entry)); - - ub32 *loc = (ub32 *)buf; - i64 offset = sym.get_gotplt_addr(ctx) - sym.get_plt_addr(ctx) - 8; - loc[4] |= higha(offset); - loc[5] |= lo(offset); -} - -template <> -void write_pltgot_entry(Context &ctx, u8 *buf, Symbol &sym) { - static_assert(E::pltgot_size == sizeof(plt_entry)); - memcpy(buf, plt_entry, sizeof(plt_entry)); - - ub32 *loc = (ub32 *)buf; - i64 offset = sym.get_got_addr(ctx) - sym.get_plt_addr(ctx) - 8; - loc[4] |= higha(offset); - loc[5] |= lo(offset); -} - -template <> -void EhFrameSection::apply_reloc(Context &ctx, const ElfRel &rel, - u64 offset, u64 val) { - u8 *loc = ctx.buf + this->shdr.sh_offset + offset; - - switch (rel.r_type) { - case R_NONE: - break; - case R_PPC_ADDR32: - *(ub32 *)loc = val; - break; - case R_PPC_REL32: - *(ub32 *)loc = val - this->shdr.sh_addr - offset; - break; - default: - Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel; - } -} - -template <> -void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - ElfRel *dynrel = nullptr; - if (ctx.reldyn) - dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + - file.reldyn_offset + this->reldyn_offset); - - u64 GOT2 = file.ppc32_got2 ? file.ppc32_got2->get_addr() : 0; - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - u64 S = sym.get_addr(ctx); - u64 A = rel.r_addend; - u64 P = get_addr() + rel.r_offset; - u64 G = sym.get_got_idx(ctx) * sizeof(Word); - u64 GOT = ctx.got->shdr.sh_addr; - - switch (rel.r_type) { - case R_PPC_ADDR32: - case R_PPC_UADDR32: - apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel); - break; - case R_PPC_ADDR14: - *(ub32 *)loc |= bits(S + A, 15, 2) << 2; - break; - case R_PPC_ADDR16: - case R_PPC_UADDR16: - case R_PPC_ADDR16_LO: - *(ub16 *)loc = lo(S + A); - break; - case R_PPC_ADDR16_HI: - *(ub16 *)loc = hi(S + A); - break; - case R_PPC_ADDR16_HA: - *(ub16 *)loc = ha(S + A); - break; - case R_PPC_ADDR24: - *(ub32 *)loc |= bits(S + A, 25, 2) << 2; - break; - case R_PPC_ADDR30: - *(ub32 *)loc |= bits(S + A, 31, 2) << 2; - break; - case R_PPC_PLT16_LO: - *(ub16 *)loc = lo(G + GOT - A - GOT2); - break; - case R_PPC_PLT16_HI: - *(ub16 *)loc = hi(G + GOT - A - GOT2); - break; - case R_PPC_PLT16_HA: - *(ub16 *)loc = ha(G + GOT - A - GOT2); - break; - case R_PPC_PLT32: - *(ub32 *)loc = G + GOT - A - GOT2; - break; - case R_PPC_REL14: - *(ub32 *)loc |= bits(S + A - P, 15, 2) << 2; - break; - case R_PPC_REL16: - case R_PPC_REL16_LO: - *(ub16 *)loc = lo(S + A - P); - break; - case R_PPC_REL16_HI: - *(ub16 *)loc = hi(S + A - P); - break; - case R_PPC_REL16_HA: - *(ub16 *)loc = ha(S + A - P); - break; - case R_PPC_REL24: - case R_PPC_LOCAL24PC: { - i64 val = S + A - P; - if (sign_extend(val, 25) != val) - val = get_thunk_addr(i) - P; - *(ub32 *)loc |= bits(val, 25, 2) << 2; - break; - } - case R_PPC_PLTREL24: { - i64 val = S - P; - if (sym.has_plt(ctx) || sign_extend(val, 25) != val) - val = get_thunk_addr(i) - P; - *(ub32 *)loc |= bits(val, 25, 2) << 2; - break; - } - case R_PPC_REL32: - case R_PPC_PLTREL32: - *(ub32 *)loc = S + A - P; - break; - case R_PPC_GOT16: - case R_PPC_GOT16_LO: - *(ub16 *)loc = lo(G + A); - break; - case R_PPC_GOT16_HI: - *(ub16 *)loc = hi(G + A); - break; - case R_PPC_GOT16_HA: - *(ub16 *)loc = ha(G + A); - break; - case R_PPC_TPREL16_LO: - *(ub16 *)loc = lo(S + A - ctx.tp_addr); - break; - case R_PPC_TPREL16_HI: - *(ub16 *)loc = hi(S + A - ctx.tp_addr); - break; - case R_PPC_TPREL16_HA: - *(ub16 *)loc = ha(S + A - ctx.tp_addr); - break; - case R_PPC_DTPREL16_LO: - *(ub16 *)loc = lo(S + A - ctx.dtp_addr); - break; - case R_PPC_DTPREL16_HI: - *(ub16 *)loc = hi(S + A - ctx.dtp_addr); - break; - case R_PPC_DTPREL16_HA: - *(ub16 *)loc = ha(S + A - ctx.dtp_addr); - break; - case R_PPC_GOT_TLSGD16: - *(ub16 *)loc = sym.get_tlsgd_addr(ctx) - GOT; - break; - case R_PPC_GOT_TLSLD16: - *(ub16 *)loc = ctx.got->get_tlsld_addr(ctx) - GOT; - break; - case R_PPC_GOT_TPREL16: - *(ub16 *)loc = sym.get_gottp_addr(ctx) - GOT; - break; - case R_PPC_TLS: - case R_PPC_TLSGD: - case R_PPC_TLSLD: - case R_PPC_PLTSEQ: - case R_PPC_PLTCALL: - break; - default: - unreachable(); - } - } -} - -template <> -void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - SectionFragment *frag; - i64 frag_addend; - std::tie(frag, frag_addend) = get_fragment(ctx, rel); - - u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx); - u64 A = frag ? frag_addend : (i64)rel.r_addend; - - switch (rel.r_type) { - case R_PPC_ADDR32: - if (std::optional val = get_tombstone(sym, frag)) - *(ub32 *)loc = *val; - else - *(ub32 *)loc = S + A; - break; - default: - Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: " - << rel; - } - } -} - -template <> -void InputSection::scan_relocations(Context &ctx) { - assert(shdr().sh_flags & SHF_ALLOC); - - this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); - std::span> rels = get_rels(ctx); - - // Scan relocations - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - - if (sym.is_ifunc()) - sym.flags |= NEEDS_GOT | NEEDS_PLT; - - switch (rel.r_type) { - case R_PPC_ADDR32: - case R_PPC_UADDR32: - scan_dyn_absrel(ctx, sym, rel); - break; - case R_PPC_ADDR14: - case R_PPC_ADDR16: - case R_PPC_UADDR16: - case R_PPC_ADDR16_LO: - case R_PPC_ADDR16_HI: - case R_PPC_ADDR16_HA: - case R_PPC_ADDR24: - case R_PPC_ADDR30: - scan_absrel(ctx, sym, rel); - break; - case R_PPC_REL14: - case R_PPC_REL16: - case R_PPC_REL16_LO: - case R_PPC_REL16_HI: - case R_PPC_REL16_HA: - case R_PPC_REL32: - scan_pcrel(ctx, sym, rel); - break; - case R_PPC_GOT16: - case R_PPC_GOT16_LO: - case R_PPC_GOT16_HI: - case R_PPC_GOT16_HA: - case R_PPC_PLT16_LO: - case R_PPC_PLT16_HI: - case R_PPC_PLT16_HA: - case R_PPC_PLT32: - sym.flags |= NEEDS_GOT; - break; - case R_PPC_REL24: - case R_PPC_PLTREL24: - case R_PPC_PLTREL32: - if (sym.is_imported) - sym.flags |= NEEDS_PLT; - break; - case R_PPC_GOT_TLSGD16: - sym.flags |= NEEDS_TLSGD; - break; - case R_PPC_GOT_TLSLD16: - ctx.needs_tlsld = true; - break; - case R_PPC_GOT_TPREL16: - sym.flags |= NEEDS_GOTTP; - break; - case R_PPC_TPREL16_LO: - case R_PPC_TPREL16_HI: - case R_PPC_TPREL16_HA: - check_tlsle(ctx, sym, rel); - break; - case R_PPC_LOCAL24PC: - case R_PPC_TLS: - case R_PPC_TLSGD: - case R_PPC_TLSLD: - case R_PPC_DTPREL16_LO: - case R_PPC_DTPREL16_HI: - case R_PPC_DTPREL16_HA: - case R_PPC_PLTSEQ: - case R_PPC_PLTCALL: - break; - default: - Error(ctx) << *this << ": unknown relocation: " << rel; - } - } -} - -template <> -void RangeExtensionThunk::copy_buf(Context &ctx) { - u8 *buf = ctx.buf + output_section.shdr.sh_offset + offset; - - static const ub32 local_thunk[] = { - // Get this thunk's address - 0x7c08'02a6, // mflr r0 - 0x429f'0005, // bcl 20, 31, 4 - 0x7d88'02a6, // mflr r12 - 0x7c08'03a6, // mtlr r0 - - // Materialize the destination's address in %r11 and jump to that address - 0x3d6c'0000, // addis r11, r12, OFFSET@higha - 0x396b'0000, // addi r11, r11, OFFSET@lo - 0x7d69'03a6, // mtctr r11 - 0x4e80'0420, // bctr - 0x6000'0000, // nop - }; - - static_assert(E::thunk_size == sizeof(plt_entry)); - static_assert(E::thunk_size == sizeof(local_thunk)); - - for (i64 i = 0; i < symbols.size(); i++) { - ub32 *loc = (ub32 *)(buf + i * E::thunk_size); - Symbol &sym = *symbols[i]; - - if (sym.has_plt(ctx)) { - memcpy(loc, plt_entry, sizeof(plt_entry)); - u64 got = sym.has_got(ctx) ? sym.get_got_addr(ctx) : sym.get_gotplt_addr(ctx); - i64 val = got - get_addr(i) - 8; - loc[4] |= higha(val); - loc[5] |= lo(val); - } else { - memcpy(loc, local_thunk, sizeof(local_thunk)); - i64 val = sym.get_addr(ctx) - get_addr(i) - 8; - loc[4] |= higha(val); - loc[5] |= lo(val); - } - } -} - -} // namespace mold::elf diff --git a/third_party/mold/elf/arch-ppc64v1.cc b/third_party/mold/elf/arch-ppc64v1.cc deleted file mode 100644 index c45581269dc..00000000000 --- a/third_party/mold/elf/arch-ppc64v1.cc +++ /dev/null @@ -1,687 +0,0 @@ -// clang-format off -// This file contains code for the 64-bit PowerPC ELFv1 ABI that is -// commonly used for big-endian PPC systems. Modern PPC systems that use -// the processor in the little-endian mode use the ELFv2 ABI instead. For -// ELFv2, see arch-ppc64v2.cc. -// -// Even though they are similiar, ELFv1 isn't only different from ELFv2 in -// endianness. The most notable difference is, in ELFv1, a function -// pointer doesn't directly refer to the entry point of a function but -// instead refers to a data structure so-called "function descriptor". -// -// The function descriptor is essentially a pair of a function entry point -// address and a value that should be set to %r2 before calling that -// function. There is also a third member for "the environment pointer for -// languages such as Pascal and PL/1" according to the psABI, but it looks -// like no one acutally uses it. In total, the function descriptor is 24 -// bytes long. Here is why we need it. -// -// PPC generally lacks PC-relative data access instructions. Position- -// independent code sets GOT + 0x8000 to %r2 and access global variables -// relative to %r2. -// -// Each ELF file has its own GOT. If a function calls another function in -// the same ELF file, it doesn't have to reset %r2. However, if it is in -// other file (e.g. other .so), it has to set a new value to %r2 so that -// the register contains the callee's GOT + 0x8000. -// -// In this way, you can't call a function just by knowing the function's -// entry point address. You also need to know a proper %r2 value for the -// function. This is why a function pointer refers to a tuple of an -// address and a %r2 value. -// -// If a function call is made through PLT, PLT takes care of restoring %r2. -// Therefore, the caller has to restore %r2 only for function calls -// through function pointers. -// -// .opd (short for "official procedure descriptors") contains function -// descriptors. -// -// You can think OPD as this: even in other targets, a function can have a -// few different addresses for different purposes. It may not only have an -// entry point address but may also have PLT and/or GOT addresses. -// In PPCV1, it may have an OPD address in addition to these. OPD address -// is used for relocations that refers to the address of a function as a -// function pointer. -// -// https://github.com/rui314/psabi/blob/main/ppc64v1.pdf - -#include "third_party/mold/elf/mold.h" - -#include "third_party/libcxx/algorithm" -// MISSING #include - -namespace mold::elf { - -using E = PPC64V1; - -static u64 lo(u64 x) { return x & 0xffff; } -static u64 hi(u64 x) { return x >> 16; } -static u64 ha(u64 x) { return (x + 0x8000) >> 16; } -static u64 high(u64 x) { return (x >> 16) & 0xffff; } -static u64 higha(u64 x) { return ((x + 0x8000) >> 16) & 0xffff; } - -// .plt is used only for lazy symbol resolution on PPC64. All PLT -// calls are made via range extension thunks even if they are within -// reach. Thunks read addresses from .got.plt and jump there. -// Therefore, once PLT symbols are resolved and final addresses are -// written to .got.plt, thunks just skip .plt and directly jump to the -// resolved addresses. -template <> -void write_plt_header(Context &ctx, u8 *buf) { - static const ub32 insn[] = { - 0x7d88'02a6, // mflr r12 - 0x429f'0005, // bcl 20, 31, 4 // obtain PC - 0x7d68'02a6, // mflr r11 - 0xe84b'0024, // ld r2,36(r11) - 0x7d88'03a6, // mtlr r12 - 0x7d62'5a14, // add r11,r2,r11 - 0xe98b'0000, // ld r12,0(r11) - 0xe84b'0008, // ld r2,8(r11) - 0x7d89'03a6, // mtctr r12 - 0xe96b'0010, // ld r11,16(r11) - 0x4e80'0420, // bctr - // .quad .got.plt - .plt - 8 - 0x0000'0000, - 0x0000'0000, - }; - - static_assert(sizeof(insn) == E::plt_hdr_size); - memcpy(buf, insn, sizeof(insn)); - *(ub64 *)(buf + 44) = ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr - 8; -} - -template <> -void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) { - ub32 *loc = (ub32 *)buf; - i64 idx = sym.get_plt_idx(ctx); - - // The PPC64 ELFv1 ABI requires PLT entries to be vary in size depending - // on their indices. Unlike other targets, .got.plt is filled not by us - // but by the loader, so we don't have a control over where the initial - // call to the PLT entry jumps to. So we need to strictly follow the PLT - // section layout as the loader expect it to be. - if (idx < 0x8000) { - static const ub32 insn[] = { - 0x3800'0000, // li r0, PLT_INDEX - 0x4b00'0000, // b plt0 - }; - - memcpy(loc, insn, sizeof(insn)); - loc[0] |= idx; - loc[1] |= (ctx.plt->shdr.sh_addr - sym.get_plt_addr(ctx) - 4) & 0x00ff'ffff; - } else { - static const ub32 insn[] = { - 0x3c00'0000, // lis r0, PLT_INDEX@high - 0x6000'0000, // ori r0, r0, PLT_INDEX@lo - 0x4b00'0000, // b plt0 - }; - - memcpy(loc, insn, sizeof(insn)); - loc[0] |= high(idx); - loc[1] |= lo(idx); - loc[2] |= (ctx.plt->shdr.sh_addr - sym.get_plt_addr(ctx) - 8) & 0x00ff'ffff; - } -} - -// .plt.got is not necessary on PPC64 because range extension thunks -// directly read GOT entries and jump there. -template <> -void write_pltgot_entry(Context &ctx, u8 *buf, Symbol &sym) {} - -template <> -void EhFrameSection::apply_reloc(Context &ctx, const ElfRel &rel, - u64 offset, u64 val) { - u8 *loc = ctx.buf + this->shdr.sh_offset + offset; - - switch (rel.r_type) { - case R_NONE: - break; - case R_PPC64_ADDR64: - *(ub64 *)loc = val; - break; - case R_PPC64_REL32: - *(ub32 *)loc = val - this->shdr.sh_addr - offset; - break; - case R_PPC64_REL64: - *(ub64 *)loc = val - this->shdr.sh_addr - offset; - break; - default: - Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel; - } -} - -template <> -void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - ElfRel *dynrel = nullptr; - if (ctx.reldyn) - dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + - file.reldyn_offset + this->reldyn_offset); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - auto check = [&](i64 val, i64 lo, i64 hi) { - if (val < lo || hi <= val) - Error(ctx) << *this << ": relocation " << rel << " against " - << sym << " out of range: " << val << " is not in [" - << lo << ", " << hi << ")"; - }; - - u64 S = sym.get_addr(ctx); - u64 A = rel.r_addend; - u64 P = get_addr() + rel.r_offset; - u64 G = sym.get_got_idx(ctx) * sizeof(Word); - u64 GOT = ctx.got->shdr.sh_addr; - u64 TOC = ctx.extra.TOC->value; - - switch (rel.r_type) { - case R_PPC64_ADDR64: - apply_toc_rel(ctx, sym, rel, loc, S, A, P, dynrel); - break; - case R_PPC64_TOC: - apply_toc_rel(ctx, *ctx.extra.TOC, rel, loc, TOC, A, P, dynrel); - break; - case R_PPC64_TOC16_HA: - *(ub16 *)loc = ha(S + A - TOC); - break; - case R_PPC64_TOC16_LO: - *(ub16 *)loc = lo(S + A - TOC); - break; - case R_PPC64_TOC16_DS: - check(S + A - TOC, -(1 << 15), 1 << 15); - *(ub16 *)loc |= (S + A - TOC) & 0xfffc; - break; - case R_PPC64_TOC16_LO_DS: - *(ub16 *)loc |= (S + A - TOC) & 0xfffc; - break; - case R_PPC64_REL24: { - i64 val = sym.get_addr(ctx, NO_OPD) + A - P; - if (sym.has_plt(ctx) || sign_extend(val, 25) != val) - val = get_thunk_addr(i) + A - P; - - check(val, -(1 << 25), 1 << 25); - *(ub32 *)loc |= bits(val, 25, 2) << 2; - - // If a callee is an external function, PLT saves %r2 to the - // caller's r2 save slot. We need to restore it after function - // return. To do so, there's usually a NOP as a placeholder - // after a BL. 0x6000'0000 is a NOP. - if (sym.has_plt(ctx) && *(ub32 *)(loc + 4) == 0x6000'0000) - *(ub32 *)(loc + 4) = 0xe841'0028; // ld r2, 40(r1) - break; - } - case R_PPC64_REL32: - *(ub32 *)loc = S + A - P; - break; - case R_PPC64_REL64: - *(ub64 *)loc = S + A - P; - break; - case R_PPC64_REL16_HA: - *(ub16 *)loc = ha(S + A - P); - break; - case R_PPC64_REL16_LO: - *(ub16 *)loc = lo(S + A - P); - break; - case R_PPC64_PLT16_HA: - *(ub16 *)loc = ha(G + GOT - TOC); - break; - case R_PPC64_PLT16_HI: - *(ub16 *)loc = hi(G + GOT - TOC); - break; - case R_PPC64_PLT16_LO: - *(ub16 *)loc = lo(G + GOT - TOC); - break; - case R_PPC64_PLT16_LO_DS: - *(ub16 *)loc |= (G + GOT - TOC) & 0xfffc; - break; - case R_PPC64_GOT_TPREL16_HA: - *(ub16 *)loc = ha(sym.get_gottp_addr(ctx) - TOC); - break; - case R_PPC64_GOT_TLSGD16_HA: - *(ub16 *)loc = ha(sym.get_tlsgd_addr(ctx) - TOC); - break; - case R_PPC64_GOT_TLSGD16_LO: - *(ub16 *)loc = lo(sym.get_tlsgd_addr(ctx) - TOC); - break; - case R_PPC64_GOT_TLSLD16_HA: - *(ub16 *)loc = ha(ctx.got->get_tlsld_addr(ctx) - TOC); - break; - case R_PPC64_GOT_TLSLD16_LO: - *(ub16 *)loc = lo(ctx.got->get_tlsld_addr(ctx) - TOC); - break; - case R_PPC64_DTPREL16_HA: - *(ub16 *)loc = ha(S + A - ctx.dtp_addr); - break; - case R_PPC64_DTPREL16_LO: - *(ub16 *)loc = lo(S + A - ctx.dtp_addr); - break; - case R_PPC64_TPREL16_HA: - *(ub16 *)loc = ha(S + A - ctx.tp_addr); - break; - case R_PPC64_TPREL16_LO: - *(ub16 *)loc = lo(S + A - ctx.tp_addr); - break; - case R_PPC64_GOT_TPREL16_LO_DS: - *(ub16 *)loc |= (sym.get_gottp_addr(ctx) - TOC) & 0xfffc; - break; - case R_PPC64_PLTSEQ: - case R_PPC64_PLTCALL: - case R_PPC64_TLS: - case R_PPC64_TLSGD: - case R_PPC64_TLSLD: - break; - default: - unreachable(); - } - } -} - -template <> -void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - auto check = [&](i64 val, i64 lo, i64 hi) { - if (val < lo || hi <= val) - Error(ctx) << *this << ": relocation " << rel << " against " - << sym << " out of range: " << val << " is not in [" - << lo << ", " << hi << ")"; - }; - - SectionFragment *frag; - i64 frag_addend; - std::tie(frag, frag_addend) = get_fragment(ctx, rel); - - u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx); - u64 A = frag ? frag_addend : (i64)rel.r_addend; - - switch (rel.r_type) { - case R_PPC64_ADDR64: - if (std::optional val = get_tombstone(sym, frag)) - *(ub64 *)loc = *val; - else - *(ub64 *)loc = S + A; - break; - case R_PPC64_ADDR32: { - i64 val = S + A; - check(val, 0, 1LL << 32); - *(ub32 *)loc = val; - break; - } - case R_PPC64_DTPREL64: - *(ub64 *)loc = S + A - ctx.dtp_addr; - break; - default: - Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: " - << rel; - } - } -} - -template <> -void InputSection::scan_relocations(Context &ctx) { - assert(shdr().sh_flags & SHF_ALLOC); - - this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); - std::span> rels = get_rels(ctx); - - // Scan relocations - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - - if (sym.is_ifunc()) - sym.flags |= NEEDS_GOT | NEEDS_PLT | NEEDS_PPC_OPD; - - // Any relocation except R_PPC64_REL24 is considered as an - // address-taking relocation. - if (rel.r_type != R_PPC64_REL24 && sym.get_type() == STT_FUNC) - sym.flags |= NEEDS_PPC_OPD; - - switch (rel.r_type) { - case R_PPC64_ADDR64: - case R_PPC64_TOC: - scan_toc_rel(ctx, sym, rel); - break; - case R_PPC64_GOT_TPREL16_HA: - sym.flags |= NEEDS_GOTTP; - break; - case R_PPC64_REL24: - if (sym.is_imported) - sym.flags |= NEEDS_PLT; - break; - case R_PPC64_PLT16_HA: - sym.flags |= NEEDS_GOT; - break; - case R_PPC64_GOT_TLSGD16_HA: - sym.flags |= NEEDS_TLSGD; - break; - case R_PPC64_GOT_TLSLD16_HA: - ctx.needs_tlsld = true; - break; - case R_PPC64_TPREL16_HA: - case R_PPC64_TPREL16_LO: - check_tlsle(ctx, sym, rel); - break; - case R_PPC64_REL32: - case R_PPC64_REL64: - case R_PPC64_TOC16_HA: - case R_PPC64_TOC16_LO: - case R_PPC64_TOC16_LO_DS: - case R_PPC64_TOC16_DS: - case R_PPC64_REL16_HA: - case R_PPC64_REL16_LO: - case R_PPC64_PLT16_HI: - case R_PPC64_PLT16_LO: - case R_PPC64_PLT16_LO_DS: - case R_PPC64_PLTSEQ: - case R_PPC64_PLTCALL: - case R_PPC64_GOT_TPREL16_LO_DS: - case R_PPC64_GOT_TLSGD16_LO: - case R_PPC64_GOT_TLSLD16_LO: - case R_PPC64_TLS: - case R_PPC64_TLSGD: - case R_PPC64_TLSLD: - case R_PPC64_DTPREL16_HA: - case R_PPC64_DTPREL16_LO: - break; - default: - Error(ctx) << *this << ": unknown relocation: " << rel; - } - } -} - -template <> -void RangeExtensionThunk::copy_buf(Context &ctx) { - u8 *buf = ctx.buf + output_section.shdr.sh_offset + offset; - - // If the destination is .plt.got, we save the current r2, read an - // address of a function descriptor from .got, restore %r2 and jump - // to the function. - static const ub32 pltgot_thunk[] = { - // Store the caller's %r2 - 0xf841'0028, // std %r2, 40(%r1) - - // Load an address of a function descriptor - 0x3d82'0000, // addis %r12, %r2, foo@got@toc@ha - 0xe98c'0000, // ld %r12, foo@got@toc@lo(%r12) - - // Restore the callee's %r2 - 0xe84c'0008, // ld %r2, 8(%r12) - - // Jump to the function - 0xe98c'0000, // ld %r12, 0(%r12) - 0x7d89'03a6, // mtctr %r12 - 0x4e80'0420, // bctr - }; - - // If the destination is .plt, read a function descriptor from .got.plt. - static const ub32 plt_thunk[] = { - // Store the caller's %r2 - 0xf841'0028, // std %r2, 40(%r1) - - // Materialize an address of a function descriptor - 0x3d82'0000, // addis %r12, %r2, foo@gotplt@toc@ha - 0x398c'0000, // addi %r12, %r12, foo@gotplt@toc@lo - - // Restore the callee's %r2 - 0xe84c'0008, // ld %r2, 8(%r12) - - // Jump to the function - 0xe98c'0000, // ld %r12, 0(%r12) - 0x7d89'03a6, // mtctr %r12 - 0x4e80'0420, // bctr - }; - - // If the destination is a non-imported function, we directly jump - // to the function entry address. - static const ub32 local_thunk[] = { - 0x3d82'0000, // addis r12, r2, foo@toc@ha - 0x398c'0000, // addi r12, r12, foo@toc@lo - 0x7d89'03a6, // mtctr r12 - 0x4e80'0420, // bctr - 0x6000'0000, // nop - 0x6000'0000, // nop - 0x6000'0000, // nop - }; - - static_assert(E::thunk_size == sizeof(pltgot_thunk)); - static_assert(E::thunk_size == sizeof(plt_thunk)); - static_assert(E::thunk_size == sizeof(local_thunk)); - - for (i64 i = 0; i < symbols.size(); i++) { - Symbol &sym = *symbols[i]; - ub32 *loc = (ub32 *)(buf + i * E::thunk_size); - - if (sym.has_got(ctx)) { - memcpy(loc, pltgot_thunk, sizeof(pltgot_thunk)); - i64 val = sym.get_got_addr(ctx) - ctx.extra.TOC->value; - loc[1] |= higha(val); - loc[2] |= lo(val); - } else if(sym.has_plt(ctx)) { - memcpy(loc, plt_thunk, sizeof(plt_thunk)); - i64 val = sym.get_gotplt_addr(ctx) - ctx.extra.TOC->value; - loc[1] |= higha(val); - loc[2] |= lo(val); - } else { - memcpy(loc, local_thunk, sizeof(local_thunk)); - i64 val = sym.get_addr(ctx, NO_OPD) - ctx.extra.TOC->value; - loc[0] |= higha(val); - loc[1] |= lo(val); - } - } -} - -static InputSection *get_opd_section(ObjectFile &file) { - for (std::unique_ptr> &isec : file.sections) - if (isec && isec->name() == ".opd") - return isec.get(); - return nullptr; -} - -static ElfRel * -get_relocation_at(Context &ctx, InputSection &isec, i64 offset) { - std::span> rels = isec.get_rels(ctx); - - auto it = std::lower_bound(rels.begin(), rels.end(), offset, - [](const ElfRel &r, i64 offset) { - return r.r_offset < offset; - }); - - if (it == rels.end()) - return nullptr; - if (it->r_offset != offset) - return nullptr; - return &*it; -} - -struct OpdSymbol { - bool operator<(const OpdSymbol &x) const { return r_offset < x.r_offset; } - - u64 r_offset = 0; - Symbol *sym = nullptr; -}; - -static Symbol * -get_opd_sym_at(Context &ctx, std::span syms, u64 offset) { - auto it = std::lower_bound(syms.begin(), syms.end(), OpdSymbol{offset}); - if (it == syms.end()) - return nullptr; - if (it->r_offset != offset) - return nullptr; - return it->sym; -} - -// Compiler creates an .opd entry for each function symbol. The intention -// is to make it possible to create an output .opd section just by linking -// input .opd sections in the same manner as we do to other normal input -// sections. -// -// However, in reality, .opd isn't a normal input section. It needs many -// special treatments as follows: -// -// 1. A function symbol refers to not a .text but an .opd. Its address -// works fine for address-taking relocations such as R_PPC64_ADDR64. -// However, R_PPC64_REL24 (which is used for branch instruction) needs -// a function's real address instead of the function's .opd address. -// We need to read .opd contents to find out a function entry point -// address to apply R_PPC64_REL24. -// -// 2. Output .opd entries are needed only for functions whose addresses -// are taken. Just copying input .opd sections to an output would -// produces lots of dead .opd entries. -// -// 3. In this design, all function symbols refer to an .opd section, and -// that doesn't work well with graph traversal optimizations such as -// garbage collection or identical comdat folding. For example, garbage -// collector would mark an .opd alive which in turn mark all functions -// thatare referenced by .opd as alive, effectively keeping all -// functions as alive. -// -// The problem is that the compiler creates a half-baked .opd section, and -// the linker has to figure out what all these .opd entries and -// relocations are trying to achieve. It's like the compiler would emit a -// half-baked .plt section in an object file and the linker has to deal -// with that. That's not a good design. -// -// So, in this function, we undo what the compiler did to .opd. We remove -// function symbols from .opd and reattach them to their function entry -// points. We also rewrite relocations that directly refer to an input -// .opd section so that they refer to function symbols instead. We then -// mark input .opd sections as dead. -// -// After this function, we mark symbols with the NEEDS_PPC_OPD flag if the -// symbol needs an .opd entry. We then create an output .opd just like we -// do for .plt or .got. -void ppc64v1_rewrite_opd(Context &ctx) { - tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { - InputSection *opd = get_opd_section(*file); - if (!opd) - return; - opd->is_alive = false; - - // Move symbols from .opd to .text. - std::vector opd_syms; - - for (Symbol *sym : file->symbols) { - if (sym->file != file || sym->get_input_section() != opd) - continue; - - if (u32 ty = sym->get_type(); ty != STT_FUNC && ty != STT_GNU_IFUNC) - continue; - - ElfRel *rel = get_relocation_at(ctx, *opd, sym->value); - if (!rel) - Fatal(ctx) << *file << ": cannot find a relocation in .opd for " - << *sym << " at offset 0x" << std::hex << (u64)sym->value; - - Symbol *sym2 = file->symbols[rel->r_sym]; - if (sym2->get_type() != STT_SECTION) - Fatal(ctx) << *file << ": bad relocation in .opd referring " << *sym2; - - opd_syms.push_back({sym->value, sym}); - - sym->set_input_section(sym2->get_input_section()); - sym->value = rel->r_addend; - } - - // Sort symbols so that get_opd_sym_at() can do binary search. - sort(opd_syms); - - // Rewrite relocations so that they directly refer to .opd. - for (std::unique_ptr> &isec : file->sections) { - if (!isec || !isec->is_alive || isec.get() == opd) - continue; - - for (ElfRel &r : isec->get_rels(ctx)) { - Symbol &sym = *file->symbols[r.r_sym]; - if (sym.get_input_section() != opd) - continue; - - Symbol *real_sym = get_opd_sym_at(ctx, opd_syms, r.r_addend); - if (!real_sym) - Fatal(ctx) << *isec << ": cannot find a symbol in .opd for " << r - << " at offset 0x" << std::hex << (u64)r.r_addend; - - r.r_sym = real_sym->sym_idx; - r.r_addend = 0; - } - } - }); -} - -// When a function is exported, the dynamic symbol for the function should -// refers to the function's .opd entry. This function marks such symbols -// with NEEDS_PPC_OPD. -void ppc64v1_scan_symbols(Context &ctx) { - tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { - for (Symbol *sym : file->symbols) - if (sym->file == file && sym->is_exported) - if (u32 ty = sym->get_type(); ty == STT_FUNC || ty == STT_GNU_IFUNC) - sym->flags |= NEEDS_PPC_OPD; - }); - - // Functions referenced by the ELF header also have to have .opd entries. - auto mark = [&](std::string_view name) { - if (!name.empty()) - if (Symbol &sym = *get_symbol(ctx, name); !sym.is_imported) - sym.flags |= NEEDS_PPC_OPD; - }; - - mark(ctx.arg.entry); - mark(ctx.arg.init); - mark(ctx.arg.fini); -} - -void PPC64OpdSection::add_symbol(Context &ctx, Symbol *sym) { - sym->set_opd_idx(ctx, symbols.size()); - symbols.push_back(sym); - this->shdr.sh_size += ENTRY_SIZE; -} - -i64 PPC64OpdSection::get_reldyn_size(Context &ctx) const { - if (ctx.arg.pic) - return symbols.size() * 2; - return 0; -} - -void PPC64OpdSection::copy_buf(Context &ctx) { - ub64 *buf = (ub64 *)(ctx.buf + this->shdr.sh_offset); - - ElfRel *rel = nullptr; - if (ctx.arg.pic) - rel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + reldyn_offset); - - for (Symbol *sym : symbols) { - u64 addr = sym->get_addr(ctx, NO_PLT | NO_OPD); - *buf++ = addr; - *buf++ = ctx.extra.TOC->value; - *buf++ = 0; - - if (ctx.arg.pic) { - u64 loc = sym->get_opd_addr(ctx); - *rel++ = ElfRel(loc, E::R_RELATIVE, 0, addr); - *rel++ = ElfRel(loc + 8, E::R_RELATIVE, 0, ctx.extra.TOC->value); - } - } -} - -} // namespace mold::elf diff --git a/third_party/mold/elf/arch-ppc64v2.cc b/third_party/mold/elf/arch-ppc64v2.cc deleted file mode 100644 index 3582bb57f58..00000000000 --- a/third_party/mold/elf/arch-ppc64v2.cc +++ /dev/null @@ -1,555 +0,0 @@ -// clang-format off -// This file implements the PowerPC ELFv2 ABI which was standardized in -// 2014. Modern little-endian PowerPC systems are based on this ABI. -// The ABI is often referred to as "ppc64le". This shouldn't be confused -// with "ppc64" which refers to the original, big-endian PowerPC systems. -// -// PPC64 is a bit tricky to support because PC-relative load/store -// instructions hadn't been available until Power10 which debuted in 2021. -// Prior to Power10, it wasn't trivial for position-independent code (PIC) -// to load a value from, for example, .got, as we can't do that with [PC + -// the offset to the .got entry]. -// -// In the following, I'll explain how PIC is supported on pre-Power10 -// systems first and then explain what has changed with Power10. -// -// -// Position-independent code on Power9 or earlier: -// -// We can get the program counter on older PPC64 systems with the -// following four instructions -// -// mflr r1 // save the current link register to r1 -// bl .+4 // branch to the next instruction as if it were a function -// mflr r0 // copy the return address to r0 -// mtlr r1 // restore the original link register value -// -// , but it's too expensive to do if we do this for each load/store. -// -// As a workaround, most functions are compiled in such a way that r2 is -// assumed to always contain the address of .got + 0x8000. With this, we -// can for example load the first entry of .got with a single instruction -// `lw r0, -0x8000(r2)`. r2 is called the TOC pointer. -// -// There's only one .got for each ELF module. Therefore, if a callee is in -// the same ELF module, r2 doesn't have to be recomputed. Most function -// calls are usually within the same ELF module, so this mechanism is -// efficient. -// -// A function compiled for pre-Power10 usually has two entry points, -// global and local. The global entry point usually 8 bytes precedes -// the local entry point. In between is the following instructions: -// -// addis r2, r12, .TOC.@ha -// addi r2, r2, .TOC.@lo + 4; -// -// The global entry point assumes that the address of itself is in r12, -// and it computes its own TOC pointer from r12. It's easy to do so for -// the callee because the offset between its .got + 0x8000 and the -// function is known at link-time. The above code sequence then falls -// through to the local entry point that assumes r2 is .got + 0x8000. -// -// So, if a callee's TOC pointer is different from the current one -// (e.g. calling a function in another .so), we first load the callee's -// address to r12 (e.g. from .got.plt with a r2-relative load) and branch -// to that address. Then the callee computes its own TOC pointer using -// r12. -// -// -// Position-independent code on Power10: -// -// Power10 added 8-bytes-long instructions to the ISA. Some of them are -// PC-relative load/store instructions that take 34 bits offsets. -// Functions compiled with `-mcpu=power10` use these instructions for PIC. -// r2 does not have a special meaning in such fucntions. -// -// When a fucntion compiled for Power10 calls a function that uses the TOC -// pointer, we need to compute a correct value for TOC and set it to r2 -// before transferring the control to the callee. Thunks are responsible -// for doing it. -// -// `_NOTOC` relocations such as `R_PPC64_REL24_NOTOC` indicate that the -// callee does not use TOC (i.e. compiled with `-mcpu=power10`). If a -// function using TOC is referenced via a `_NOTOC` relocation, that call -// is made through a range extension thunk. -// -// -// Note on section names: the PPC64 psABI uses a weird naming convention -// which calls .got.plt .plt. We ignored that part because it's just -// confusing. Since the runtime only cares about segments, we should be -// able to name sections whatever we want. -// -// https://github.com/rui314/psabi/blob/main/ppc64v2.pdf - -#include "third_party/mold/elf/mold.h" - -namespace mold::elf { - -using E = PPC64V2; - -static u64 lo(u64 x) { return x & 0xffff; } -static u64 hi(u64 x) { return x >> 16; } -static u64 ha(u64 x) { return (x + 0x8000) >> 16; } -static u64 high(u64 x) { return (x >> 16) & 0xffff; } -static u64 higha(u64 x) { return ((x + 0x8000) >> 16) & 0xffff; } - -static u64 prefix34(u64 x) { - return bits(x, 33, 16) | (bits(x, 15, 0) << 32); -} - -// .plt is used only for lazy symbol resolution on PPC64. All PLT -// calls are made via range extension thunks even if they are within -// reach. Thunks read addresses from .got.plt and jump there. -// Therefore, once PLT symbols are resolved and final addresses are -// written to .got.plt, thunks just skip .plt and directly jump to the -// resolved addresses. -template <> -void write_plt_header(Context &ctx, u8 *buf) { - static const ul32 insn[] = { - // Get PC - 0x7c08'02a6, // mflr r0 - 0x429f'0005, // bcl 20, 31, 4 // obtain PC - 0x7d68'02a6, // mflr r11 - 0x7c08'03a6, // mtlr r0 - - // Compute the PLT entry index - 0xe80b'002c, // ld r0, 44(r11) - 0x7d8b'6050, // subf r12, r11, r12 - 0x7d60'5a14, // add r11, r0, r11 - 0x380c'ffcc, // addi r0, r12, -52 - 0x7800'f082, // rldicl r0, r0, 62, 2 - - // Load .got.plt[0] and .got.plt[1] and branch to .got.plt[0] - 0xe98b'0000, // ld r12, 0(r11) - 0x7d89'03a6, // mtctr r12 - 0xe96b'0008, // ld r11, 8(r11) - 0x4e80'0420, // bctr - - // .quad .got.plt - .plt - 8 - 0x0000'0000, - 0x0000'0000, - }; - - memcpy(buf, insn, sizeof(insn)); - *(ul64 *)(buf + 52) = ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr - 8; -} - -template <> -void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) { - // When the control is transferred to a PLT entry, the PLT entry's - // address is already set to %r12 by the caller. - i64 offset = ctx.plt->shdr.sh_addr - sym.get_plt_addr(ctx); - *(ul32 *)buf = 0x4b00'0000 | (offset & 0x00ff'ffff); // b plt0 -} - -// .plt.got is not necessary on PPC64 because range extension thunks -// directly read GOT entries and jump there. -template <> -void write_pltgot_entry(Context &ctx, u8 *buf, Symbol &sym) {} - -template <> -void EhFrameSection::apply_reloc(Context &ctx, const ElfRel &rel, - u64 offset, u64 val) { - u8 *loc = ctx.buf + this->shdr.sh_offset + offset; - - switch (rel.r_type) { - case R_NONE: - break; - case R_PPC64_ADDR64: - *(ul64 *)loc = val; - break; - case R_PPC64_REL32: - *(ul32 *)loc = val - this->shdr.sh_addr - offset; - break; - case R_PPC64_REL64: - *(ul64 *)loc = val - this->shdr.sh_addr - offset; - break; - default: - Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel; - } -} - -static u64 get_local_entry_offset(Context &ctx, Symbol &sym) { - i64 val = sym.esym().ppc_local_entry; - assert(val <= 7); - if (val == 7) - Fatal(ctx) << sym << ": local entry offset 7 is reserved"; - - if (val == 0 || val == 1) - return 0; - return 1 << val; -} - -template <> -void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - ElfRel *dynrel = nullptr; - if (ctx.reldyn) - dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + - file.reldyn_offset + this->reldyn_offset); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - u64 S = sym.get_addr(ctx); - u64 A = rel.r_addend; - u64 P = get_addr() + rel.r_offset; - u64 G = sym.get_got_idx(ctx) * sizeof(Word); - u64 GOT = ctx.got->shdr.sh_addr; - u64 TOC = ctx.extra.TOC->value; - - auto r2save_thunk_addr = [&] { return get_thunk_addr(i); }; - auto no_r2save_thunk_addr = [&] { return get_thunk_addr(i) + 4; }; - - switch (rel.r_type) { - case R_PPC64_ADDR64: - if (name() == ".toc") - apply_toc_rel(ctx, sym, rel, loc, S, A, P, dynrel); - else - apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel); - break; - case R_PPC64_TOC16_HA: - *(ul16 *)loc = ha(S + A - TOC); - break; - case R_PPC64_TOC16_LO: - *(ul16 *)loc = lo(S + A - TOC); - break; - case R_PPC64_TOC16_DS: - case R_PPC64_TOC16_LO_DS: - *(ul16 *)loc |= (S + A - TOC) & 0xfffc; - break; - case R_PPC64_REL24: - if (sym.has_plt(ctx) || !sym.esym().preserves_r2()) { - i64 val = r2save_thunk_addr() + A - P; - *(ul32 *)loc |= bits(val, 25, 2) << 2; - - // The thunk saves %r2 to the caller's r2 save slot. We need to - // restore it after function return. To do so, there's usually a - // NOP as a placeholder after a BL. 0x6000'0000 is a NOP. - if (*(ul32 *)(loc + 4) == 0x6000'0000) - *(ul32 *)(loc + 4) = 0xe841'0018; // ld r2, 24(r1) - } else { - i64 val = S + get_local_entry_offset(ctx, sym) + A - P; - if (sign_extend(val, 25) != val) - val = no_r2save_thunk_addr() + A - P; - *(ul32 *)loc |= bits(val, 25, 2) << 2; - } - break; - case R_PPC64_REL24_NOTOC: - if (sym.has_plt(ctx) || sym.esym().uses_toc()) { - i64 val = no_r2save_thunk_addr() + A - P; - *(ul32 *)loc |= bits(val, 25, 2) << 2; - } else { - i64 val = S + A - P; - if (sign_extend(val, 25) != val) - val = no_r2save_thunk_addr() + A - P; - *(ul32 *)loc |= bits(val, 25, 2) << 2; - } - break; - case R_PPC64_REL32: - *(ul32 *)loc = S + A - P; - break; - case R_PPC64_REL64: - *(ul64 *)loc = S + A - P; - break; - case R_PPC64_REL16_HA: - *(ul16 *)loc = ha(S + A - P); - break; - case R_PPC64_REL16_LO: - *(ul16 *)loc = lo(S + A - P); - break; - case R_PPC64_PLT16_HA: - *(ul16 *)loc = ha(G + GOT - TOC); - break; - case R_PPC64_PLT16_HI: - *(ul16 *)loc = hi(G + GOT - TOC); - break; - case R_PPC64_PLT16_LO: - *(ul16 *)loc = lo(G + GOT - TOC); - break; - case R_PPC64_PLT16_LO_DS: - *(ul16 *)loc |= (G + GOT - TOC) & 0xfffc; - break; - case R_PPC64_PLT_PCREL34: - case R_PPC64_PLT_PCREL34_NOTOC: - case R_PPC64_GOT_PCREL34: - *(ul64 *)loc |= prefix34(G + GOT - P); - break; - case R_PPC64_PCREL34: - *(ul64 *)loc |= prefix34(S + A - P); - break; - case R_PPC64_GOT_TPREL16_HA: - *(ul16 *)loc = ha(sym.get_gottp_addr(ctx) - TOC); - break; - case R_PPC64_GOT_TPREL16_LO_DS: - *(ul16 *)loc |= (sym.get_gottp_addr(ctx) - TOC) & 0xfffc; - break; - case R_PPC64_GOT_TPREL_PCREL34: - *(ul64 *)loc |= prefix34(sym.get_gottp_addr(ctx) - P); - break; - case R_PPC64_GOT_TLSGD16_HA: - *(ul16 *)loc = ha(sym.get_tlsgd_addr(ctx) - TOC); - break; - case R_PPC64_GOT_TLSGD16_LO: - *(ul16 *)loc = lo(sym.get_tlsgd_addr(ctx) - TOC); - break; - case R_PPC64_GOT_TLSGD_PCREL34: - *(ul64 *)loc |= prefix34(sym.get_tlsgd_addr(ctx) - P); - break; - case R_PPC64_GOT_TLSLD16_HA: - *(ul16 *)loc = ha(ctx.got->get_tlsld_addr(ctx) - TOC); - break; - case R_PPC64_GOT_TLSLD16_LO: - *(ul16 *)loc = lo(ctx.got->get_tlsld_addr(ctx) - TOC); - break; - case R_PPC64_GOT_TLSLD_PCREL34: - *(ul64 *)loc |= prefix34(ctx.got->get_tlsld_addr(ctx) - P); - break; - case R_PPC64_DTPREL16_HA: - *(ul16 *)loc = ha(S + A - ctx.dtp_addr); - break; - case R_PPC64_DTPREL16_LO: - *(ul16 *)loc = lo(S + A - ctx.dtp_addr); - break; - case R_PPC64_DTPREL34: - *(ul64 *)loc |= prefix34(S + A - ctx.dtp_addr); - break; - case R_PPC64_TPREL16_HA: - *(ul16 *)loc = ha(S + A - ctx.tp_addr); - break; - case R_PPC64_TPREL16_LO: - *(ul16 *)loc = lo(S + A - ctx.tp_addr); - break; - case R_PPC64_PLTSEQ: - case R_PPC64_PLTSEQ_NOTOC: - case R_PPC64_PLTCALL: - case R_PPC64_PLTCALL_NOTOC: - case R_PPC64_TLS: - case R_PPC64_TLSGD: - case R_PPC64_TLSLD: - break; - default: - unreachable(); - } - } -} - -template <> -void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - auto check = [&](i64 val, i64 lo, i64 hi) { - if (val < lo || hi <= val) - Error(ctx) << *this << ": relocation " << rel << " against " - << sym << " out of range: " << val << " is not in [" - << lo << ", " << hi << ")"; - }; - - SectionFragment *frag; - i64 frag_addend; - std::tie(frag, frag_addend) = get_fragment(ctx, rel); - - u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx); - u64 A = frag ? frag_addend : (i64)rel.r_addend; - - switch (rel.r_type) { - case R_PPC64_ADDR64: - if (std::optional val = get_tombstone(sym, frag)) - *(ul64 *)loc = *val; - else - *(ul64 *)loc = S + A; - break; - case R_PPC64_ADDR32: { - i64 val = S + A; - check(val, 0, 1LL << 32); - *(ul32 *)loc = val; - break; - } - case R_PPC64_DTPREL64: - *(ul64 *)loc = S + A - ctx.dtp_addr; - break; - default: - Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: " - << rel; - } - } -} - -template <> -void InputSection::scan_relocations(Context &ctx) { - assert(shdr().sh_flags & SHF_ALLOC); - - this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); - std::span> rels = get_rels(ctx); - - // Scan relocations - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - - if (sym.is_ifunc()) - sym.flags |= NEEDS_GOT | NEEDS_PLT; - - switch (rel.r_type) { - case R_PPC64_ADDR64: - if (name() == ".toc") - scan_toc_rel(ctx, sym, rel); - else - scan_dyn_absrel(ctx, sym, rel); - break; - case R_PPC64_GOT_TPREL16_HA: - case R_PPC64_GOT_TPREL_PCREL34: - sym.flags |= NEEDS_GOTTP; - break; - case R_PPC64_REL24: - if (sym.is_imported) - sym.flags |= NEEDS_PLT; - break; - case R_PPC64_REL24_NOTOC: - if (sym.is_imported) - sym.flags |= NEEDS_PLT; - ctx.extra.is_power10 = true; - break; - case R_PPC64_PLT16_HA: - case R_PPC64_PLT_PCREL34: - case R_PPC64_PLT_PCREL34_NOTOC: - case R_PPC64_GOT_PCREL34: - sym.flags |= NEEDS_GOT; - break; - case R_PPC64_GOT_TLSGD16_HA: - case R_PPC64_GOT_TLSGD_PCREL34: - sym.flags |= NEEDS_TLSGD; - break; - case R_PPC64_GOT_TLSLD16_HA: - case R_PPC64_GOT_TLSLD_PCREL34: - ctx.needs_tlsld = true; - break; - case R_PPC64_TPREL16_HA: - case R_PPC64_TPREL16_LO: - check_tlsle(ctx, sym, rel); - break; - case R_PPC64_REL32: - case R_PPC64_REL64: - case R_PPC64_TOC16_HA: - case R_PPC64_TOC16_LO: - case R_PPC64_TOC16_LO_DS: - case R_PPC64_TOC16_DS: - case R_PPC64_REL16_HA: - case R_PPC64_REL16_LO: - case R_PPC64_PLT16_HI: - case R_PPC64_PLT16_LO: - case R_PPC64_PLT16_LO_DS: - case R_PPC64_PCREL34: - case R_PPC64_PLTSEQ: - case R_PPC64_PLTSEQ_NOTOC: - case R_PPC64_PLTCALL: - case R_PPC64_PLTCALL_NOTOC: - case R_PPC64_GOT_TPREL16_LO_DS: - case R_PPC64_GOT_TLSGD16_LO: - case R_PPC64_GOT_TLSLD16_LO: - case R_PPC64_TLS: - case R_PPC64_TLSGD: - case R_PPC64_TLSLD: - case R_PPC64_DTPREL16_HA: - case R_PPC64_DTPREL16_LO: - case R_PPC64_DTPREL34: - break; - default: - Error(ctx) << *this << ": unknown relocation: " << rel; - } - } -} - -template <> -void RangeExtensionThunk::copy_buf(Context &ctx) { - u8 *buf = ctx.buf + output_section.shdr.sh_offset + offset; - - // If the destination is PLT, we read an address from .got.plt or .got - // and jump there. - static const ul32 plt_thunk[] = { - 0xf841'0018, // std r2, 24(r1) - 0x3d82'0000, // addis r12, r2, foo@gotplt@toc@ha - 0xe98c'0000, // ld r12, foo@gotplt@toc@lo(r12) - 0x7d89'03a6, // mtctr r12 - 0x4e80'0420, // bctr - }; - - static const ul32 plt_thunk_power10[] = { - 0xf841'0018, // std r2, 24(r1) - 0x0410'0000, // pld r12, foo@gotplt@pcrel - 0xe580'0000, - 0x7d89'03a6, // mtctr r12 - 0x4e80'0420, // bctr - }; - - // If the destination is a non-imported function, we directly jump - // to its local entry point. - static const ul32 local_thunk[] = { - 0xf841'0018, // std r2, 24(r1) - 0x3d82'0000, // addis r12, r2, foo@toc@ha - 0x398c'0000, // addi r12, r12, foo@toc@lo - 0x7d89'03a6, // mtctr r12 - 0x4e80'0420, // bctr - }; - - static const ul32 local_thunk_power10[] = { - 0xf841'0018, // std r2, 24(r1) - 0x0610'0000, // pla r12, foo@pcrel - 0x3980'0000, - 0x7d89'03a6, // mtctr r12 - 0x4e80'0420, // bctr - }; - - static_assert(E::thunk_size == sizeof(plt_thunk)); - static_assert(E::thunk_size == sizeof(plt_thunk_power10)); - static_assert(E::thunk_size == sizeof(local_thunk)); - static_assert(E::thunk_size == sizeof(local_thunk_power10)); - - for (i64 i = 0; i < symbols.size(); i++) { - Symbol &sym = *symbols[i]; - ul32 *loc = (ul32 *)(buf + i * E::thunk_size); - - if (sym.has_plt(ctx)) { - u64 got = sym.has_got(ctx) ? sym.get_got_addr(ctx) : sym.get_gotplt_addr(ctx); - - if (ctx.extra.is_power10) { - memcpy(loc, plt_thunk_power10, E::thunk_size); - *(ul64 *)(loc + 1) |= prefix34(got - get_addr(i) - 4); - } else { - i64 val = got - ctx.extra.TOC->value; - memcpy(loc, plt_thunk, E::thunk_size); - loc[1] |= higha(val); - loc[2] |= lo(val); - } - } else { - if (ctx.extra.is_power10) { - memcpy(loc, local_thunk_power10, E::thunk_size); - *(ul64 *)(loc + 1) |= prefix34(sym.get_addr(ctx) - get_addr(i) - 4); - } else { - i64 val = sym.get_addr(ctx) - ctx.extra.TOC->value; - memcpy(loc, local_thunk, E::thunk_size); - loc[1] |= higha(val); - loc[2] |= lo(val); - } - } - } -} - -} // namespace mold::elf diff --git a/third_party/mold/elf/arch-riscv.cc b/third_party/mold/elf/arch-riscv.cc deleted file mode 100644 index ddef6419b61..00000000000 --- a/third_party/mold/elf/arch-riscv.cc +++ /dev/null @@ -1,938 +0,0 @@ -// clang-format off -// RISC-V is a clean RISC ISA. It supports PC-relative load/store for -// position-independent code. Its 32-bit and 64-bit ISAs are almost -// identical. That is, you can think RV32 as a RV64 without 64-bit -// operations. In this file, we support both RV64 and RV32. -// -// RISC-V is essentially little-endian, but the big-endian version is -// available as an extension. GCC supports `-mbig-endian` to generate -// big-endian code. Even in big-endian mode, machine instructions are -// defined to be encoded in little-endian, though. Only the behavior of -// load/store instructions are different between LE RISC-V and BE RISC-V. -// -// From the linker's point of view, the RISC-V's psABI is unique because -// sections in input object files can be shrunk while being copied to the -// output file. That is contrary to other psABIs in which sections are an -// atomic unit of copying. Let me explain it in more details. -// -// Since RISC-V instructions are 16-bit or 32-bit long, there's no way to -// embed a very large immediate into a branch instruction. In fact, JAL -// (jump and link) instruction can jump to only within PC ± 1 MiB because -// its immediate is only 21 bits long. If the destination is out of its -// reach, we need to use two instructions instead; the first instruction -// being AUIPC which sets upper 20 bits to a register and the second being -// JALR with a 12-bit immediate and the register. Combined, they specify a -// 32 bits displacement. -// -// Other RISC ISAs have the same limitation, and they solved the problem by -// letting the linker create so-called "range extension thunks". It works as -// follows: the compiler optimistically emits single jump instructions for -// function calls. If the linker finds that a branch target is out of reach, -// it emits a small piece of machine code near the branch instruction and -// redirect the branch to the linker-synthesized code. The code constructs a -// full 32-bit address in a register and jump to the destination. That -// linker-synthesized code is called "range extension thunks" or just -// "thunks". -// -// The RISC-V psABI is unique that it works the other way around. That is, -// for RISC-V, the compiler always emits two instructions (AUIPC + JAL) for -// function calls. If the linker finds the destination is reachable with a -// single instruction, it replaces the two instructions with the one and -// shrink the section size by one instruction length, instead of filling the -// gap with a nop. -// -// With the presence of this relaxation, sections can no longer be -// considered as an atomic unit. If we delete 4 bytes from the middle of a -// section, all contents after that point needs to be shifted by 4. Symbol -// values and relocation offsets have to be adjusted accordingly if they -// refer to past the deleted bytes. -// -// In mold, we use `r_deltas` to memorize how many bytes have be adjusted -// for relocations. For symbols, we directly mutate their `value` member. -// -// RISC-V object files tend to have way more relocations than those for -// other targets. This is because all branches, including ones that jump -// within the same section, are explicitly expressed with relocations. -// Here is why we need them: all control-flow statements such as `if` or -// `for` are implemented using branch instructions. For other targets, the -// compiler doesn't emit relocations for such branches because they know -// at compile-time exactly how many bytes has to be skipped. That's not -// true to RISC-V because the linker may delete bytes between a branch and -// its destination. Therefore, all branches including in-section ones have -// to be explicitly expressed with relocations. -// -// Note that this mechanism only shrink sections and never enlarge, as -// the compiler always emits the longest instruction sequence. This -// makes the linker implementation a bit simpler because we don't need -// to worry about oscillation. -// -// https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc - -#include "third_party/mold/elf/mold.h" - -// MISSING #include -// MISSING #include - -namespace mold::elf { - -static void write_itype(u8 *loc, u32 val) { - *(ul32 *)loc &= 0b000000'00000'11111'111'11111'1111111; - *(ul32 *)loc |= bits(val, 11, 0) << 20; -} - -static void write_stype(u8 *loc, u32 val) { - *(ul32 *)loc &= 0b000000'11111'11111'111'00000'1111111; - *(ul32 *)loc |= bits(val, 11, 5) << 25 | bits(val, 4, 0) << 7; -} - -static void write_btype(u8 *loc, u32 val) { - *(ul32 *)loc &= 0b000000'11111'11111'111'00000'1111111; - *(ul32 *)loc |= bit(val, 12) << 31 | bits(val, 10, 5) << 25 | - bits(val, 4, 1) << 8 | bit(val, 11) << 7; -} - -static void write_utype(u8 *loc, u32 val) { - *(ul32 *)loc &= 0b000000'00000'00000'000'11111'1111111; - - // U-type instructions are used in combination with I-type - // instructions. U-type insn sets an immediate to the upper 20-bits - // of a register. I-type insn sign-extends a 12-bits immediate and - // adds it to a register value to construct a complete value. 0x800 - // is added here to compensate for the sign-extension. - *(ul32 *)loc |= (val + 0x800) & 0xffff'f000; -} - -static void write_jtype(u8 *loc, u32 val) { - *(ul32 *)loc &= 0b000000'00000'00000'000'11111'1111111; - *(ul32 *)loc |= bit(val, 20) << 31 | bits(val, 10, 1) << 21 | - bit(val, 11) << 20 | bits(val, 19, 12) << 12; -} - -static void write_cbtype(u8 *loc, u32 val) { - *(ul16 *)loc &= 0b111'000'111'00000'11; - *(ul16 *)loc |= bit(val, 8) << 12 | bit(val, 4) << 11 | bit(val, 3) << 10 | - bit(val, 7) << 6 | bit(val, 6) << 5 | bit(val, 2) << 4 | - bit(val, 1) << 3 | bit(val, 5) << 2; -} - -static void write_cjtype(u8 *loc, u32 val) { - *(ul16 *)loc &= 0b111'00000000000'11; - *(ul16 *)loc |= bit(val, 11) << 12 | bit(val, 4) << 11 | bit(val, 9) << 10 | - bit(val, 8) << 9 | bit(val, 10) << 8 | bit(val, 6) << 7 | - bit(val, 7) << 6 | bit(val, 3) << 5 | bit(val, 2) << 4 | - bit(val, 1) << 3 | bit(val, 5) << 2; -} - -static void overwrite_uleb(u8 *loc, u64 val) { - while (*loc & 0b1000'0000) { - *loc++ = 0b1000'0000 | (val & 0b0111'1111); - val >>= 7; - } -} - -// Returns the rd register of an R/I/U/J-type instruction. -static u32 get_rd(u32 val) { - return bits(val, 11, 7); -} - -static void set_rs1(u8 *loc, u32 rs1) { - assert(rs1 < 32); - *(ul32 *)loc &= 0b111111'11111'00000'111'11111'1111111; - *(ul32 *)loc |= rs1 << 15; -} - -template -void write_plt_header(Context &ctx, u8 *buf) { - static const ul32 insn_64[] = { - 0x0000'0397, // auipc t2, %pcrel_hi(.got.plt) - 0x41c3'0333, // sub t1, t1, t3 # .plt entry + hdr + 12 - 0x0003'be03, // ld t3, %pcrel_lo(1b)(t2) # _dl_runtime_resolve - 0xfd43'0313, // addi t1, t1, -44 # .plt entry - 0x0003'8293, // addi t0, t2, %pcrel_lo(1b) # &.got.plt - 0x0013'5313, // srli t1, t1, 1 # .plt entry offset - 0x0082'b283, // ld t0, 8(t0) # link map - 0x000e'0067, // jr t3 - }; - - static const ul32 insn_32[] = { - 0x0000'0397, // auipc t2, %pcrel_hi(.got.plt) - 0x41c3'0333, // sub t1, t1, t3 # .plt entry + hdr + 12 - 0x0003'ae03, // lw t3, %pcrel_lo(1b)(t2) # _dl_runtime_resolve - 0xfd43'0313, // addi t1, t1, -44 # .plt entry - 0x0003'8293, // addi t0, t2, %pcrel_lo(1b) # &.got.plt - 0x0023'5313, // srli t1, t1, 2 # .plt entry offset - 0x0042'a283, // lw t0, 4(t0) # link map - 0x000e'0067, // jr t3 - }; - - if constexpr (E::is_64) - memcpy(buf, insn_64, sizeof(insn_64)); - else - memcpy(buf, insn_32, sizeof(insn_32)); - - u64 gotplt = ctx.gotplt->shdr.sh_addr; - u64 plt = ctx.plt->shdr.sh_addr; - write_utype(buf, gotplt - plt); - write_itype(buf + 8, gotplt - plt); - write_itype(buf + 16, gotplt - plt); -} - -static const ul32 plt_entry_64[] = { - 0x0000'0e17, // auipc t3, %pcrel_hi(function@.got.plt) - 0x000e'3e03, // ld t3, %pcrel_lo(1b)(t3) - 0x000e'0367, // jalr t1, t3 - 0x0000'0013, // nop -}; - -static const ul32 plt_entry_32[] = { - 0x0000'0e17, // auipc t3, %pcrel_hi(function@.got.plt) - 0x000e'2e03, // lw t3, %pcrel_lo(1b)(t3) - 0x000e'0367, // jalr t1, t3 - 0x0000'0013, // nop -}; - -template -void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) { - if constexpr (E::is_64) - memcpy(buf, plt_entry_64, sizeof(plt_entry_64)); - else - memcpy(buf, plt_entry_32, sizeof(plt_entry_32)); - - u64 gotplt = sym.get_gotplt_addr(ctx); - u64 plt = sym.get_plt_addr(ctx); - write_utype(buf, gotplt - plt); - write_itype(buf + 4, gotplt - plt); -} - -template -void write_pltgot_entry(Context &ctx, u8 *buf, Symbol &sym) { - if constexpr (E::is_64) - memcpy(buf, plt_entry_64, sizeof(plt_entry_64)); - else - memcpy(buf, plt_entry_32, sizeof(plt_entry_32)); - - u64 got = sym.get_got_addr(ctx); - u64 plt = sym.get_plt_addr(ctx); - write_utype(buf, got - plt); - write_itype(buf + 4, got - plt); -} - -template -void EhFrameSection::apply_reloc(Context &ctx, const ElfRel &rel, - u64 offset, u64 val) { - u8 *loc = ctx.buf + this->shdr.sh_offset + offset; - - switch (rel.r_type) { - case R_NONE: - break; - case R_RISCV_ADD32: - *(U32 *)loc += val; - break; - case R_RISCV_SUB8: - *loc -= val; - break; - case R_RISCV_SUB16: - *(U16 *)loc -= val; - break; - case R_RISCV_SUB32: - *(U32 *)loc -= val; - break; - case R_RISCV_SUB6: - *loc = (*loc & 0b1100'0000) | ((*loc - val) & 0b0011'1111); - break; - case R_RISCV_SET6: - *loc = (*loc & 0b1100'0000) | (val & 0b0011'1111); - break; - case R_RISCV_SET8: - *loc = val; - break; - case R_RISCV_SET16: - *(U16 *)loc = val; - break; - case R_RISCV_SET32: - *(U32 *)loc = val; - break; - case R_RISCV_32_PCREL: - *(U32 *)loc = val - this->shdr.sh_addr - offset; - break; - default: - Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel; - } -} - -template -void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - ElfRel *dynrel = nullptr; - if (ctx.reldyn) - dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + - file.reldyn_offset + this->reldyn_offset); - - auto get_r_delta = [&](i64 idx) { - return extra.r_deltas.empty() ? 0 : extra.r_deltas[idx]; - }; - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || rel.r_type == R_RISCV_RELAX) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - i64 r_offset = rel.r_offset - get_r_delta(i); - i64 removed_bytes = get_r_delta(i + 1) - get_r_delta(i); - u8 *loc = base + r_offset; - - auto check = [&](i64 val, i64 lo, i64 hi) { - if (val < lo || hi <= val) - Error(ctx) << *this << ": relocation " << rel << " against " - << sym << " out of range: " << val << " is not in [" - << lo << ", " << hi << ")"; - }; - - auto find_paired_reloc = [&] { - Symbol &sym = *file.symbols[rels[i].r_sym]; - assert(sym.get_input_section() == this); - - if (sym.value < r_offset) { - for (i64 j = i - 1; j >= 0; j--) - if (u32 ty = rels[j].r_type; - ty == R_RISCV_GOT_HI20 || ty == R_RISCV_TLS_GOT_HI20 || - ty == R_RISCV_TLS_GD_HI20 || ty == R_RISCV_PCREL_HI20) - if (sym.value == rels[j].r_offset - get_r_delta(j)) - return j; - } else { - for (i64 j = i + 1; j < rels.size(); j++) - if (u32 ty = rels[j].r_type; - ty == R_RISCV_GOT_HI20 || ty == R_RISCV_TLS_GOT_HI20 || - ty == R_RISCV_TLS_GD_HI20 || ty == R_RISCV_PCREL_HI20) - if (sym.value == rels[j].r_offset - get_r_delta(j)) - return j; - } - - Fatal(ctx) << *this << ": paired relocation is missing: " << i; - }; - - u64 S = sym.get_addr(ctx); - u64 A = rel.r_addend; - u64 P = get_addr() + r_offset; - u64 G = sym.get_got_idx(ctx) * sizeof(Word); - u64 GOT = ctx.got->shdr.sh_addr; - - switch (rel.r_type) { - case R_RISCV_32: - if constexpr (E::is_64) - *(U32 *)loc = S + A; - else - apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel); - break; - case R_RISCV_64: - assert(E::is_64); - apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel); - break; - case R_RISCV_BRANCH: - check(S + A - P, -(1 << 12), 1 << 12); - write_btype(loc, S + A - P); - break; - case R_RISCV_JAL: - check(S + A - P, -(1 << 20), 1 << 20); - write_jtype(loc, S + A - P); - break; - case R_RISCV_CALL: - case R_RISCV_CALL_PLT: { - u32 rd = get_rd(*(ul32 *)(contents.data() + rel.r_offset + 4)); - - if (removed_bytes == 4) { - // auipc + jalr -> jal - *(ul32 *)loc = (rd << 7) | 0b1101111; - write_jtype(loc, S + A - P); - } else if (removed_bytes == 6 && rd == 0) { - // auipc + jalr -> c.j - *(ul16 *)loc = 0b101'00000000000'01; - write_cjtype(loc, S + A - P); - } else if (removed_bytes == 6 && rd == 1) { - // auipc + jalr -> c.jal - assert(!E::is_64); - *(ul16 *)loc = 0b001'00000000000'01; - write_cjtype(loc, S + A - P); - } else { - assert(removed_bytes == 0); - // Calling an undefined weak symbol does not make sense. - // We make such call into an infinite loop. This should - // help debugging of a faulty program. - u64 val = sym.esym().is_undef_weak() ? 0 : S + A - P; - check(val, -(1LL << 31), 1LL << 31); - write_utype(loc, val); - write_itype(loc + 4, val); - } - break; - } - case R_RISCV_GOT_HI20: - write_utype(loc, G + GOT + A - P); - break; - case R_RISCV_TLS_GOT_HI20: - write_utype(loc, sym.get_gottp_addr(ctx) + A - P); - break; - case R_RISCV_TLS_GD_HI20: - write_utype(loc, sym.get_tlsgd_addr(ctx) + A - P); - break; - case R_RISCV_PCREL_HI20: - write_utype(loc, S + A - P); - break; - case R_RISCV_PCREL_LO12_I: - case R_RISCV_PCREL_LO12_S: { - i64 idx2 = find_paired_reloc(); - const ElfRel &rel2 = rels[idx2]; - Symbol &sym2 = *file.symbols[rel2.r_sym]; - - u64 S = sym2.get_addr(ctx); - u64 A = rel2.r_addend; - u64 P = get_addr() + rel2.r_offset - get_r_delta(idx2); - u64 G = sym2.get_got_idx(ctx) * sizeof(Word); - u64 val; - - switch (rel2.r_type) { - case R_RISCV_GOT_HI20: - val = G + GOT + A - P; - break; - case R_RISCV_TLS_GOT_HI20: - val = sym2.get_gottp_addr(ctx) + A - P; - break; - case R_RISCV_TLS_GD_HI20: - val = sym2.get_tlsgd_addr(ctx) + A - P; - break; - case R_RISCV_PCREL_HI20: - val = S + A - P; - break; - default: - unreachable(); - } - - if (rel.r_type == R_RISCV_PCREL_LO12_I) - write_itype(loc, val); - else - write_stype(loc, val); - break; - } - case R_RISCV_HI20: - assert(removed_bytes == 0 || removed_bytes == 4); - if (removed_bytes == 0) { - check(S + A, -(1LL << 31), 1LL << 31); - write_utype(loc, S + A); - } - break; - case R_RISCV_LO12_I: - case R_RISCV_LO12_S: - if (rel.r_type == R_RISCV_LO12_I) - write_itype(loc, S + A); - else - write_stype(loc, S + A); - - // Rewrite `lw t1, 0(t0)` with `lw t1, 0(x0)` if the address is - // accessible relative to the zero register. If the upper 20 bits - // are all zero, the corresponding LUI might have been removed. - if (bits(S + A, 31, 12) == 0) - set_rs1(loc, 0); - break; - case R_RISCV_TPREL_HI20: - assert(removed_bytes == 0 || removed_bytes == 4); - if (removed_bytes == 0) - write_utype(loc, S + A - ctx.tp_addr); - break; - case R_RISCV_TPREL_ADD: - // This relocation just annotates an ADD instruction that can be - // removed when a TPREL is relaxed. No value is needed to be - // written. - assert(removed_bytes == 0 || removed_bytes == 4); - break; - case R_RISCV_TPREL_LO12_I: - case R_RISCV_TPREL_LO12_S: { - i64 val = S + A - ctx.tp_addr; - if (rel.r_type == R_RISCV_TPREL_LO12_I) - write_itype(loc, val); - else - write_stype(loc, val); - - // Rewrite `lw t1, 0(t0)` with `lw t1, 0(tp)` if the address is - // directly accessible using tp. tp is x4. - if (sign_extend(val, 11) == val) - set_rs1(loc, 4); - break; - } - case R_RISCV_ADD8: - loc += S + A; - break; - case R_RISCV_ADD16: - *(U16 *)loc += S + A; - break; - case R_RISCV_ADD32: - *(U32 *)loc += S + A; - break; - case R_RISCV_ADD64: - *(U64 *)loc += S + A; - break; - case R_RISCV_SUB8: - loc -= S + A; - break; - case R_RISCV_SUB16: - *(U16 *)loc -= S + A; - break; - case R_RISCV_SUB32: - *(U32 *)loc -= S + A; - break; - case R_RISCV_SUB64: - *(U64 *)loc -= S + A; - break; - case R_RISCV_ALIGN: { - // A R_RISCV_ALIGN is followed by a NOP sequence. We need to remove - // zero or more bytes so that the instruction after R_RISCV_ALIGN is - // aligned to a given alignment boundary. - // - // We need to guarantee that the NOP sequence is valid after byte - // removal (e.g. we can't remove the first 2 bytes of a 4-byte NOP). - // For the sake of simplicity, we always rewrite the entire NOP sequence. - i64 padding_bytes = rel.r_addend - removed_bytes; - assert((padding_bytes & 1) == 0); - - i64 i = 0; - for (; i <= padding_bytes - 4; i += 4) - *(ul32 *)(loc + i) = 0x0000'0013; // nop - if (i < padding_bytes) - *(ul16 *)(loc + i) = 0x0001; // c.nop - break; - } - case R_RISCV_RVC_BRANCH: - check(S + A - P, -(1 << 8), 1 << 8); - write_cbtype(loc, S + A - P); - break; - case R_RISCV_RVC_JUMP: - check(S + A - P, -(1 << 11), 1 << 11); - write_cjtype(loc, S + A - P); - break; - case R_RISCV_SUB6: - *loc = (*loc & 0b1100'0000) | ((*loc - (S + A)) & 0b0011'1111); - break; - case R_RISCV_SET6: - *loc = (*loc & 0b1100'0000) | ((S + A) & 0b0011'1111); - break; - case R_RISCV_SET8: - *loc = S + A; - break; - case R_RISCV_SET16: - *(U16 *)loc = S + A; - break; - case R_RISCV_SET32: - *(U32 *)loc = S + A; - break; - case R_RISCV_PLT32: - case R_RISCV_32_PCREL: - *(U32 *)loc = S + A - P; - break; - default: - unreachable(); - } - } -} - -template -void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - SectionFragment *frag; - i64 frag_addend; - std::tie(frag, frag_addend) = get_fragment(ctx, rel); - - u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx); - u64 A = frag ? frag_addend : (i64)rel.r_addend; - - switch (rel.r_type) { - case R_RISCV_32: - *(U32 *)loc = S + A; - break; - case R_RISCV_64: - if (std::optional val = get_tombstone(sym, frag)) - *(U64 *)loc = *val; - else - *(U64 *)loc = S + A; - break; - case R_RISCV_ADD8: - *loc += S + A; - break; - case R_RISCV_ADD16: - *(U16 *)loc += S + A; - break; - case R_RISCV_ADD32: - *(U32 *)loc += S + A; - break; - case R_RISCV_ADD64: - *(U64 *)loc += S + A; - break; - case R_RISCV_SUB8: - *loc -= S + A; - break; - case R_RISCV_SUB16: - *(U16 *)loc -= S + A; - break; - case R_RISCV_SUB32: - *(U32 *)loc -= S + A; - break; - case R_RISCV_SUB64: - *(U64 *)loc -= S + A; - break; - case R_RISCV_SUB6: - *loc = (*loc & 0b1100'0000) | ((*loc - (S + A)) & 0b0011'1111); - break; - case R_RISCV_SET6: - *loc = (*loc & 0b1100'0000) | ((S + A) & 0b0011'1111); - break; - case R_RISCV_SET8: - *loc = S + A; - break; - case R_RISCV_SET16: - *(U16 *)loc = S + A; - break; - case R_RISCV_SET32: - *(U32 *)loc = S + A; - break; - case R_RISCV_SET_ULEB128: - overwrite_uleb(loc, S + A); - break; - case R_RISCV_SUB_ULEB128: { - u8 *p = loc; - u64 val = read_uleb(p); - overwrite_uleb(loc, val - S - A); - break; - } - default: - Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: " - << rel; - break; - } - } -} - -template -void InputSection::copy_contents_riscv(Context &ctx, u8 *buf) { - // If a section is not relaxed, we can copy it as a one big chunk. - if (extra.r_deltas.empty()) { - uncompress_to(ctx, buf); - return; - } - - // A relaxed section is copied piece-wise. - std::span> rels = get_rels(ctx); - i64 pos = 0; - - for (i64 i = 0; i < rels.size(); i++) { - i64 delta = extra.r_deltas[i + 1] - extra.r_deltas[i]; - if (delta == 0) - continue; - assert(delta > 0); - - const ElfRel &r = rels[i]; - memcpy(buf, contents.data() + pos, r.r_offset - pos); - buf += r.r_offset - pos; - pos = r.r_offset + delta; - } - - memcpy(buf, contents.data() + pos, contents.size() - pos); -} - -template -void InputSection::scan_relocations(Context &ctx) { - assert(shdr().sh_flags & SHF_ALLOC); - - this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); - std::span> rels = get_rels(ctx); - - // Scan relocations - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - - if (sym.is_ifunc()) - sym.flags |= NEEDS_GOT | NEEDS_PLT; - - switch (rel.r_type) { - case R_RISCV_32: - if constexpr (E::is_64) - scan_absrel(ctx, sym, rel); - else - scan_dyn_absrel(ctx, sym, rel); - break; - case R_RISCV_HI20: - scan_absrel(ctx, sym, rel); - break; - case R_RISCV_64: - if constexpr (!E::is_64) - Fatal(ctx) << *this << ": R_RISCV_64 cannot be used on RV32"; - scan_dyn_absrel(ctx, sym, rel); - break; - case R_RISCV_CALL: - case R_RISCV_CALL_PLT: - case R_RISCV_PLT32: - if (sym.is_imported) - sym.flags |= NEEDS_PLT; - break; - case R_RISCV_GOT_HI20: - sym.flags |= NEEDS_GOT; - break; - case R_RISCV_TLS_GOT_HI20: - sym.flags |= NEEDS_GOTTP; - break; - case R_RISCV_TLS_GD_HI20: - sym.flags |= NEEDS_TLSGD; - break; - case R_RISCV_32_PCREL: - scan_pcrel(ctx, sym, rel); - break; - case R_RISCV_TPREL_HI20: - case R_RISCV_TPREL_LO12_I: - case R_RISCV_TPREL_LO12_S: - case R_RISCV_TPREL_ADD: - check_tlsle(ctx, sym, rel); - break; - case R_RISCV_BRANCH: - case R_RISCV_JAL: - case R_RISCV_PCREL_HI20: - case R_RISCV_PCREL_LO12_I: - case R_RISCV_PCREL_LO12_S: - case R_RISCV_LO12_I: - case R_RISCV_LO12_S: - case R_RISCV_ADD8: - case R_RISCV_ADD16: - case R_RISCV_ADD32: - case R_RISCV_ADD64: - case R_RISCV_SUB8: - case R_RISCV_SUB16: - case R_RISCV_SUB32: - case R_RISCV_SUB64: - case R_RISCV_ALIGN: - case R_RISCV_RVC_BRANCH: - case R_RISCV_RVC_JUMP: - case R_RISCV_RELAX: - case R_RISCV_SUB6: - case R_RISCV_SET6: - case R_RISCV_SET8: - case R_RISCV_SET16: - case R_RISCV_SET32: - break; - default: - Error(ctx) << *this << ": unknown relocation: " << rel; - } - } -} - -template -static bool is_resizable(Context &ctx, InputSection *isec) { - return isec && isec->is_alive && (isec->shdr().sh_flags & SHF_ALLOC) && - (isec->shdr().sh_flags & SHF_EXECINSTR); -} - -// Returns the distance between a relocated place and a symbol. -template -static i64 compute_distance(Context &ctx, Symbol &sym, - InputSection &isec, const ElfRel &rel) { - // We handle absolute symbols as if they were infinitely far away - // because `shrink_section` may increase a distance between a branch - // instruction and an absolute symbol. Branching to an absolute - // location is extremely rare in real code, though. - if (sym.is_absolute()) - return INT32_MAX; - - // Likewise, relocations against weak undefined symbols won't be relaxed. - if (sym.esym().is_undef_weak()) - return INT32_MAX; - - // Compute a distance between the relocated place and the symbol. - i64 S = sym.get_addr(ctx); - i64 A = rel.r_addend; - i64 P = isec.get_addr() + rel.r_offset; - return S + A - P; -} - -// Scan relocations to shrink sections. -template -static void shrink_section(Context &ctx, InputSection &isec, bool use_rvc) { - std::span> rels = isec.get_rels(ctx); - isec.extra.r_deltas.resize(rels.size() + 1); - - i64 delta = 0; - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &r = rels[i]; - Symbol &sym = *isec.file.symbols[r.r_sym]; - isec.extra.r_deltas[i] = delta; - - // Handling R_RISCV_ALIGN is mandatory. - // - // R_RISCV_ALIGN refers to NOP instructions. We need to eliminate some - // or all of the instructions so that the instruction that immediately - // follows the NOPs is aligned to a specified alignment boundary. - if (r.r_type == R_RISCV_ALIGN) { - // The total bytes of NOPs is stored to r_addend, so the next - // instruction is r_addend away. - u64 loc = isec.get_addr() + r.r_offset - delta; - u64 next_loc = loc + r.r_addend; - u64 alignment = bit_ceil(r.r_addend + 1); - assert(alignment <= (1 << isec.p2align)); - delta += next_loc - align_to(loc, alignment); - continue; - } - - // Handling other relocations is optional. - if (!ctx.arg.relax || i == rels.size() - 1 || - rels[i + 1].r_type != R_RISCV_RELAX) - continue; - - // Linker-synthesized symbols haven't been assigned their final - // values when we are shrinking sections because actual values can - // be computed only after we fix the file layout. Therefore, we - // assume that relocations against such symbols are always - // non-relaxable. - if (sym.file == ctx.internal_obj) - continue; - - switch (r.r_type) { - case R_RISCV_CALL: - case R_RISCV_CALL_PLT: { - // These relocations refer to an AUIPC + JALR instruction pair to - // allow to jump to anywhere in PC ± 2 GiB. If the jump target is - // close enough to PC, we can use C.J, C.JAL or JAL instead. - i64 dist = compute_distance(ctx, sym, isec, r); - if (dist & 1) - break; - - i64 rd = get_rd(*(ul32 *)(isec.contents.data() + r.r_offset + 4)); - - if (rd == 0 && sign_extend(dist, 11) == dist && use_rvc) { - // If rd is x0 and the jump target is within ±2 KiB, we can use - // C.J, saving 6 bytes. - delta += 6; - } else if (rd == 1 && sign_extend(dist, 11) == dist && use_rvc && !E::is_64) { - // If rd is x1 and the jump target is within ±2 KiB, we can use - // C.JAL. This is RV32 only because C.JAL is RV32-only instruction. - delta += 6; - } else if (sign_extend(dist, 20) == dist) { - // If the jump target is within ±1 MiB, we can use JAL. - delta += 4; - } - break; - } - case R_RISCV_HI20: - // If the upper 20 bits are all zero, we can remove LUI. - // The corresponding instructions referred to by LO12_I/LO12_S - // relocations will use the zero register instead. - if (bits(sym.get_addr(ctx), 31, 12) == 0) - delta += 4; - break; - case R_RISCV_TPREL_HI20: - case R_RISCV_TPREL_ADD: - // These relocations are used to add a high 20-bit value to the - // thread pointer. The following two instructions materializes - // TP + HI20(foo) in %r5, for example. - // - // lui a5,%tprel_hi(foo) # R_RISCV_TPREL_HI20 (symbol) - // add a5,a5,tp,%tprel_add(foo) # R_RISCV_TPREL_ADD (symbol) - // - // Then thread-local variable `foo` is accessed with a low 12-bit - // offset like this: - // - // sw t0,%tprel_lo(foo)(a5) # R_RISCV_TPREL_LO12_S (symbol) - // - // However, if the variable is at TP ±2 KiB, TP + HI20(foo) is the - // same as TP, so we can instead access the thread-local variable - // directly using TP like this: - // - // sw t0,%tprel_lo(foo)(tp) - // - // Here, we remove `lui` and `add` if the offset is within ±2 KiB. - if (i64 val = sym.get_addr(ctx) + r.r_addend - ctx.tp_addr; - sign_extend(val, 11) == val) - delta += 4; - break; - } - } - - isec.extra.r_deltas[rels.size()] = delta; - isec.sh_size -= delta; -} - -// Shrink sections by interpreting relocations. -// -// This operation seems to be optional, because by default longest -// instructions are being used. However, calling this function is actually -// mandatory because of R_RISCV_ALIGN. R_RISCV_ALIGN is a directive to the -// linker to align the location referred to by the relocation to a -// specified byte boundary. We at least have to interpret them to satisfy -// the alignment constraints. -template -i64 riscv_resize_sections(Context &ctx) { - Timer t(ctx, "riscv_resize_sections"); - - // True if we can use the 2-byte instructions. This is usually true on - // Unix because RV64GC is generally considered the baseline hardware. - bool use_rvc = get_eflags(ctx) & EF_RISCV_RVC; - - // Find all the relocations that can be relaxed. - // This step should only shrink sections. - tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { - for (std::unique_ptr> &isec : file->sections) - if (is_resizable(ctx, isec.get())) - shrink_section(ctx, *isec, use_rvc); - }); - - // Fix symbol values. - tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { - for (Symbol *sym : file->symbols) { - if (sym->file != file) - continue; - - InputSection *isec = sym->get_input_section(); - if (!isec || isec->extra.r_deltas.empty()) - continue; - - std::span> rels = isec->get_rels(ctx); - auto it = std::lower_bound(rels.begin(), rels.end(), sym->value, - [&](const ElfRel &r, u64 val) { - return r.r_offset < val; - }); - - sym->value -= isec->extra.r_deltas[it - rels.begin()]; - } - }); - - // Re-compute section offset again to finalize them. - compute_section_sizes(ctx); - return set_osec_offsets(ctx); -} - -#define INSTANTIATE(E) \ - template void write_plt_header(Context &, u8 *); \ - template void write_plt_entry(Context &, u8 *, Symbol &); \ - template void write_pltgot_entry(Context &, u8 *, Symbol &); \ - template void \ - EhFrameSection::apply_reloc(Context &, const ElfRel &, u64, u64); \ - template void InputSection::apply_reloc_alloc(Context &, u8 *); \ - template void InputSection::apply_reloc_nonalloc(Context &, u8 *); \ - template void InputSection::copy_contents_riscv(Context &, u8 *); \ - template void InputSection::scan_relocations(Context &); \ - template i64 riscv_resize_sections(Context &); - -INSTANTIATE(RV64LE); -INSTANTIATE(RV64BE); -INSTANTIATE(RV32LE); -INSTANTIATE(RV32BE); - -} // namespace mold::elf diff --git a/third_party/mold/elf/arch-s390x.cc b/third_party/mold/elf/arch-s390x.cc deleted file mode 100644 index 72c2965ed3b..00000000000 --- a/third_party/mold/elf/arch-s390x.cc +++ /dev/null @@ -1,491 +0,0 @@ -// clang-format off -// This file contains code for the IBM z/Architecture 64-bit ISA, which is -// commonly referred to as "s390x" on Linux. -// -// z/Architecture is a 64-bit CISC ISA developed by IBM around 2000 for -// IBM's "big iron" mainframe computers. The computers are direct -// descendents of IBM System/360 all the way back in 1966. I've never -// actually seen a mainframe, and you probaly haven't either, but it looks -// like the mainframe market is still large enough to sustain its ecosystem. -// Ubuntu for example provides the official support for s390x as of 2022. -// Since they are being actively maintained, we need to support them. -// -// As an instruction set, s390x isn't particularly odd. It has 16 general- -// purpose registers. Instructions are 2, 4 or 6 bytes long and always -// aligned to 2 bytes boundaries. Despite unfamiliarty, I found that it -// just feels like an x86-64 in a parallel universe. -// -// Here is the register usage in this ABI: -// -// r0-r1: reserved as scratch registers so we can use them in our PLT -// r2: parameter passing and return values -// r3-r6: parameter passing -// r12: address of GOT if position-independent code -// r14: return address -// r15: stack pointer -// a1: upper 32 bits of TP (thread pointer) -// a2: lower 32 bits of TP (thread pointer) -// -// Thread-local storage (TLS) is supported on s390x in the same way as it -// is on other targets with one exeption. On other targets, __tls_get_addr -// is used to get an address of a thread-local variable. On s390x, -// __tls_get_offset is used instead. The difference is __tls_get_offset -// returns an address of a thread-local variable as an offset from TP. So -// we need to add TP to a return value before use. I don't know why it is -// different, but that is the way it is. -// -// https://github.com/rui314/psabi/blob/main/s390x.pdf - -#include "third_party/mold/elf/mold.h" - -namespace mold::elf { - -using E = S390X; - -static void write_mid20(u8 *loc, u64 val) { - *(ub32 *)loc |= (bits(val, 11, 0) << 16) | (bits(val, 19, 12) << 8); -} - -template <> -void write_plt_header(Context &ctx, u8 *buf) { - static u8 insn[] = { - 0xe3, 0x00, 0xf0, 0x38, 0x00, 0x24, // stg %r0, 56(%r15) - 0xc0, 0x10, 0, 0, 0, 0, // larl %r1, GOTPLT_OFFSET - 0xd2, 0x07, 0xf0, 0x30, 0x10, 0x08, // mvc 48(8, %r15), 8(%r1) - 0xe3, 0x10, 0x10, 0x10, 0x00, 0x04, // lg %r1, 16(%r1) - 0x07, 0xf1, // br %r1 - 0x07, 0x00, 0x07, 0x00, 0x07, 0x00, // nopr; nopr; nopr - }; - - memcpy(buf, insn, sizeof(insn)); - *(ub32 *)(buf + 8) = (ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr - 6) >> 1; -} - -template <> -void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) { - static u8 insn[] = { - 0xc0, 0x10, 0, 0, 0, 0, // larl %r1, GOTPLT_ENTRY_OFFSET - 0xe3, 0x10, 0x10, 0x00, 0x00, 0x04, // lg %r1, (%r1) - 0xc0, 0x01, 0, 0, 0, 0, // lgfi %r0, PLT_INDEX - 0x07, 0xf1, // br %r1 - 0x07, 0x00, 0x07, 0x00, 0x07, 0x00, // nopr; nopr; nopr - 0x07, 0x00, 0x07, 0x00, 0x07, 0x00, // nopr; nopr; nopr - }; - - memcpy(buf, insn, sizeof(insn)); - *(ub32 *)(buf + 2) = (sym.get_gotplt_addr(ctx) - sym.get_plt_addr(ctx)) >> 1; - *(ub32 *)(buf + 14) = sym.get_plt_idx(ctx) * sizeof(ElfRel); -} - -template <> -void write_pltgot_entry(Context &ctx, u8 *buf, Symbol &sym) { - static u8 insn[] = { - 0xc0, 0x10, 0, 0, 0, 0, // larl %r1, GOT_ENTRY_OFFSET - 0xe3, 0x10, 0x10, 0x00, 0x00, 0x04, // lg %r1, (%r1) - 0x07, 0xf1, // br %r1 - 0x07, 0x00, // nopr - }; - - memcpy(buf, insn, sizeof(insn)); - *(ub32 *)(buf + 2) = (sym.get_got_addr(ctx) - sym.get_plt_addr(ctx)) >> 1; -} - -template <> -void EhFrameSection::apply_reloc(Context &ctx, const ElfRel &rel, - u64 offset, u64 val) { - u8 *loc = ctx.buf + this->shdr.sh_offset + offset; - - switch (rel.r_type) { - case R_NONE: - break; - case R_390_PC32: - *(ub32 *)loc = val - this->shdr.sh_addr - offset; - break; - case R_390_64: - *(ub64 *)loc = val; - break; - default: - Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel; - } -} - -template <> -void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - ElfRel *dynrel = nullptr; - if (ctx.reldyn) - dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + - file.reldyn_offset + this->reldyn_offset); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - auto check = [&](i64 val, i64 lo, i64 hi) { - if (val < lo || hi <= val) - Error(ctx) << *this << ": relocation " << rel << " against " - << sym << " out of range: " << val << " is not in [" - << lo << ", " << hi << ")"; - }; - - auto check_dbl = [&](i64 val, i64 lo, i64 hi) { - check(val, lo, hi); - - // R_390_*DBL relocs should never refer a symbol at an odd address - if (val & 1) - Error(ctx) << *this << ": misaligned symbol " << sym - << " for relocation " << rel; - }; - - u64 S = sym.get_addr(ctx); - u64 A = rel.r_addend; - u64 P = get_addr() + rel.r_offset; - u64 G = sym.get_got_idx(ctx) * sizeof(Word); - u64 GOT = ctx.got->shdr.sh_addr; - - switch (rel.r_type) { - case R_390_64: - apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel); - break; - case R_390_8: - check(S + A, 0, 1 << 8); - *loc = S + A; - break; - case R_390_12: - check(S + A, 0, 1 << 12); - *(ul16 *)loc |= bits(S + A, 11, 0); - break; - case R_390_16: - check(S + A, 0, 1 << 16); - *(ub16 *)loc = S + A; - break; - case R_390_20: - check(S + A, 0, 1 << 20); - write_mid20(loc, S + A); - break; - case R_390_32: - case R_390_PLT32: - check(S + A, 0, 1LL << 32); - *(ub32 *)loc = S + A; - break; - case R_390_PLT64: - *(ub64 *)loc = S + A; - break; - case R_390_PC12DBL: - case R_390_PLT12DBL: - check_dbl(S + A - P, -(1 << 12), 1 << 12); - *(ul16 *)loc |= bits(S + A - P, 12, 1); - break; - case R_390_PC16: - check(S + A - P, -(1 << 15), 1 << 15); - *(ub16 *)loc = S + A - P; - break; - case R_390_PC32: - check(S + A - P, -(1LL << 31), 1LL << 31); - *(ub32 *)loc = S + A - P; - break; - case R_390_PC64: - *(ub64 *)loc = S + A - P; - break; - case R_390_PC16DBL: - case R_390_PLT16DBL: - check_dbl(S + A - P, -(1 << 16), 1 << 16); - *(ub16 *)loc = (S + A - P) >> 1; - break; - case R_390_PC24DBL: - case R_390_PLT24DBL: - check_dbl(S + A - P, -(1 << 24), 1 << 24); - *(ub32 *)loc |= bits(S + A - P, 24, 1); - break; - case R_390_PC32DBL: - case R_390_PLT32DBL: - check_dbl(S + A - P, -(1LL << 32), 1LL << 32); - *(ub32 *)loc = (S + A - P) >> 1; - break; - case R_390_GOT12: - case R_390_GOTPLT12: - check(G + A, 0, 1 << 12); - *(ul16 *)loc |= bits(G + A, 11, 0); - break; - case R_390_GOT16: - case R_390_GOTPLT16: - check(G + A, 0, 1 << 16); - *(ub16 *)loc = G + A; - break; - case R_390_GOT20: - case R_390_GOTPLT20: - check(G + A, 0, 1 << 20); - write_mid20(loc, G + A); - break; - case R_390_GOT32: - case R_390_GOTPLT32: - check(G + A, 0, 1LL << 32); - *(ub32 *)loc = G + A; - break; - case R_390_GOT64: - case R_390_GOTPLT64: - *(ub64 *)loc = G + A; - break; - case R_390_GOTOFF16: - case R_390_PLTOFF16: - check(S + A - GOT, -(1 << 15), 1 << 15); - *(ub16 *)loc = S + A - GOT; - break; - case R_390_GOTOFF32: - case R_390_PLTOFF32: - check(S + A - GOT, -(1LL << 31), 1LL << 31); - *(ub32 *)loc = S + A - GOT; - break; - case R_390_GOTOFF64: - case R_390_PLTOFF64: - *(ub64 *)loc = S + A - GOT; - break; - case R_390_GOTPC: - *(ub64 *)loc = GOT + A - P; - break; - case R_390_GOTPCDBL: - check_dbl(GOT + A - P, -(1LL << 32), 1LL << 32); - *(ub32 *)loc = (GOT + A - P) >> 1; - break; - case R_390_GOTENT: - check(GOT + G + A - P, -(1LL << 32), 1LL << 32); - *(ub32 *)loc = (GOT + G + A - P) >> 1; - break; - case R_390_TLS_LE32: - *(ub32 *)loc = S + A - ctx.tp_addr; - break; - case R_390_TLS_LE64: - *(ub64 *)loc = S + A - ctx.tp_addr; - break; - case R_390_TLS_GOTIE20: - write_mid20(loc, sym.get_gottp_addr(ctx) + A - GOT); - break; - case R_390_TLS_IEENT: - *(ub32 *)loc = (sym.get_gottp_addr(ctx) + A - P) >> 1; - break; - case R_390_TLS_GD32: - if (sym.has_tlsgd(ctx)) - *(ub32 *)loc = sym.get_tlsgd_addr(ctx) + A - GOT; - else if (sym.has_gottp(ctx)) - *(ub32 *)loc = sym.get_gottp_addr(ctx) + A - GOT; - else - *(ub32 *)loc = S + A - ctx.tp_addr; - break; - case R_390_TLS_GD64: - if (sym.has_tlsgd(ctx)) - *(ub64 *)loc = sym.get_tlsgd_addr(ctx) + A - GOT; - else if (sym.has_gottp(ctx)) - *(ub64 *)loc = sym.get_gottp_addr(ctx) + A - GOT; - else - *(ub64 *)loc = S + A - ctx.tp_addr; - break; - case R_390_TLS_GDCALL: - if (sym.has_tlsgd(ctx)) { - // do nothing - } else if (sym.has_gottp(ctx)) { - // lg %r2, 0(%r2, %r12) - static u8 insn[] = { 0xe3, 0x22, 0xc0, 0x00, 0x00, 0x04 }; - memcpy(loc, insn, sizeof(insn)); - } else { - // nop - static u8 insn[] = { 0xc0, 0x04, 0x00, 0x00, 0x00, 0x00 }; - memcpy(loc, insn, sizeof(insn)); - } - break; - case R_390_TLS_LDM32: - if (ctx.got->has_tlsld(ctx)) - *(ub32 *)loc = ctx.got->get_tlsld_addr(ctx) + A - GOT; - break; - case R_390_TLS_LDM64: - if (ctx.got->has_tlsld(ctx)) - *(ub64 *)loc = ctx.got->get_tlsld_addr(ctx) + A - GOT; - break; - case R_390_TLS_LDO32: - if (ctx.got->has_tlsld(ctx)) - *(ub32 *)loc = S + A - ctx.dtp_addr; - else - *(ub32 *)loc = S + A - ctx.tp_addr; - break; - case R_390_TLS_LDO64: - if (ctx.got->has_tlsld(ctx)) - *(ub64 *)loc = S + A - ctx.dtp_addr; - else - *(ub64 *)loc = S + A - ctx.tp_addr; - break; - case R_390_TLS_LDCALL: - if (!ctx.got->has_tlsld(ctx)) { - // nop - static u8 insn[] = { 0xc0, 0x04, 0x00, 0x00, 0x00, 0x00 }; - memcpy(loc, insn, sizeof(insn)); - } - break; - default: - unreachable(); - } - } -} - -template <> -void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - auto check = [&](i64 val, i64 lo, i64 hi) { - if (val < lo || hi <= val) - Error(ctx) << *this << ": relocation " << rel << " against " - << sym << " out of range: " << val << " is not in [" - << lo << ", " << hi << ")"; - }; - - SectionFragment *frag; - i64 frag_addend; - std::tie(frag, frag_addend) = get_fragment(ctx, rel); - - u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx); - u64 A = frag ? frag_addend : (i64)rel.r_addend; - - switch (rel.r_type) { - case R_390_32: { - i64 val = S + A; - check(val, 0, 1LL << 32); - *(ub32 *)loc = val; - break; - } - case R_390_64: - if (std::optional val = get_tombstone(sym, frag)) - *(ub64 *)loc = *val; - else - *(ub64 *)loc = S + A; - break; - case R_390_TLS_LDO64: - if (std::optional val = get_tombstone(sym, frag)) - *(ub64 *)loc = *val; - else - *(ub64 *)loc = S + A - ctx.dtp_addr; - break; - default: - Fatal(ctx) << *this << ": apply_reloc_nonalloc: " << rel; - } - } -} - -template <> -void InputSection::scan_relocations(Context &ctx) { - assert(shdr().sh_flags & SHF_ALLOC); - - this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); - std::span> rels = get_rels(ctx); - - // Scan relocations - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - - if (sym.is_ifunc()) - sym.flags |= NEEDS_GOT | NEEDS_PLT; - - switch (rel.r_type) { - case R_390_64: - scan_dyn_absrel(ctx, sym, rel); - break; - case R_390_8: - case R_390_12: - case R_390_16: - case R_390_20: - case R_390_32: - scan_absrel(ctx, sym, rel); - break; - case R_390_PC16: - case R_390_PC16DBL: - case R_390_PC32: - case R_390_PC32DBL: - case R_390_PC64: - scan_pcrel(ctx, sym, rel); - break; - case R_390_GOT12: - case R_390_GOT16: - case R_390_GOT20: - case R_390_GOT32: - case R_390_GOT64: - case R_390_GOTOFF16: - case R_390_GOTOFF32: - case R_390_GOTOFF64: - case R_390_GOTPLT12: - case R_390_GOTPLT16: - case R_390_GOTPLT20: - case R_390_GOTPLT32: - case R_390_GOTPLT64: - case R_390_GOTPC: - case R_390_GOTPCDBL: - case R_390_GOTENT: - sym.flags |= NEEDS_GOT; - break; - case R_390_PLT12DBL: - case R_390_PLT16DBL: - case R_390_PLT24DBL: - case R_390_PLT32: - case R_390_PLT32DBL: - case R_390_PLT64: - case R_390_PLTOFF16: - case R_390_PLTOFF32: - case R_390_PLTOFF64: - if (sym.is_imported) - sym.flags |= NEEDS_PLT; - break; - case R_390_TLS_GOTIE20: - case R_390_TLS_IEENT: - sym.flags |= NEEDS_GOTTP; - break; - case R_390_TLS_GD32: - case R_390_TLS_GD64: - // We always want to relax calls to __tls_get_offset() in statically- - // linked executables because __tls_get_offset() in libc.a just calls - // abort(). - if (ctx.arg.is_static || - (ctx.arg.relax && !sym.is_imported && !ctx.arg.shared)) { - // do nothing - } else if (ctx.arg.relax && !sym.is_imported && ctx.arg.shared && - !ctx.arg.z_dlopen) { - sym.flags |= NEEDS_GOTTP; - } else { - sym.flags |= NEEDS_TLSGD; - } - break; - case R_390_TLS_LDM32: - case R_390_TLS_LDM64: { - bool do_relax = ctx.arg.is_static || (ctx.arg.relax && !ctx.arg.shared); - if (!do_relax) - ctx.needs_tlsld = true; - break; - } - case R_390_TLS_LE32: - case R_390_TLS_LE64: - check_tlsle(ctx, sym, rel); - break; - case R_390_TLS_LDO32: - case R_390_TLS_LDO64: - case R_390_TLS_GDCALL: - case R_390_TLS_LDCALL: - break; - default: - Fatal(ctx) << *this << ": scan_relocations: " << rel; - } - } -} - -} // namespace mold::elf diff --git a/third_party/mold/elf/arch-sh4.cc b/third_party/mold/elf/arch-sh4.cc deleted file mode 100644 index dcce34df95b..00000000000 --- a/third_party/mold/elf/arch-sh4.cc +++ /dev/null @@ -1,355 +0,0 @@ -// clang-format off -// SH-4 (SuperH 4) is a 32-bit RISC ISA developed by Hitachi in the early -// '90s. Some relatively powerful systems were developed with SH-4. -// A notable example is Sega's Dreamcast game console which debuted in 1998. -// Hitachi later spun off its semiconductor division as an independent -// company, Renesas, and Renesas is still selling SH-4 processors for the -// embedded market. It has never been as popular as ARM is, and its -// popularity continues to decline though. -// -// SH-4's most distinctive feature compared to other RISC ISAs is that its -// instructions are 16 bits in length instead of more common 32 bits for -// better code density. This difference affects various aspects of its -// instruction set as shown below: -// -// - SH-4 has 16 general-purpose registers (GPRs) instead of the most -// commmon 32 GPR configuration to save one bit to specify a register. -// -// - Binary instructions such as ADD normally take three register in -// RISC ISAs (e.g. x ← y ⊕ z where x, y and z are registers), but -// SH-4's instructions take only two registers. The result of an -// operation is written to one of the source registers (e.g. x ← x ⊕ y). -// -// - Usual RISC ISAs have "load high" and "load low" instructions to set -// an immediate to most significant and least significant bits in a -// register to construct a full 32-bit value in a register. This -// technique is hard to use in SH-4, as 16 bit instructions are too -// small to contain large immediates. On SH-4, large immediates are -// loaded from memory using `mov.l` PC-relative load instruction. -// -// - Many RISC ISAs are, despite their name, actually fairly complex. -// They tend to have hundreds if not thousands of different instructions. -// SH-4 doesn't really have that many instructions because its 16-bit -// machine code simply can't encode many different opcodes. As a -// result, the number of relocations the linker has to support is also -// small. -// -// Beside these, SH-4 has a delay branch slot just like contemporary MIPS -// and SPARC. That is, one instruction after a branch instruction will -// always be executed even if the branch is taken. Delay branch slot allows -// a pipelined CPU to start and finish executing an instruction after a -// branch regardless of the branch's condition, simplifying the processor's -// implementation. It's considered a bad premature optimization nowadays, -// though. Modern RISC processors don't have it. -// -// Here are notes about the SH-4 psABI: -// -// - If a source file is compiled with -fPIC, each function starts -// with a piece of code to store the address of .got to %r12. -// We can use the register in our PLT for position-independent output. -// -// - Even though it uses the RELA-type relocations, relocation addends -// are stored not to the r_addend field but to the relocated section -// contents for some reason. Therefore, it's effectively REL. -// -// - It looks like the ecosystem has bit-rotted. Some tests, especially -// one using C++ exceptions, don't pass even with GNU ld. -// -// - GCC/SH4 tends to write dynamically-relocated data into .text, so the -// output from the linker contains lots of text relocations. That's not -// a problem with embedded programming, I guess. - -#include "third_party/mold/elf/mold.h" - -namespace mold::elf { - -using E = SH4; - -// Even though SH-4 uses RELA-type relocations, addends are stored to -// relocated places for some reason. -template <> -i64 get_addend(u8 *loc, const ElfRel &rel) { - switch (rel.r_type) { - case R_SH_DIR32: - case R_SH_REL32: - case R_SH_TLS_GD_32: - case R_SH_TLS_LD_32: - case R_SH_TLS_LDO_32: - case R_SH_TLS_IE_32: - case R_SH_TLS_LE_32: - case R_SH_TLS_DTPMOD32: - case R_SH_TLS_DTPOFF32: - case R_SH_TLS_TPOFF32: - case R_SH_GOT32: - case R_SH_PLT32: - case R_SH_GOTOFF: - case R_SH_GOTPC: - case R_SH_GOTPLT32: - return *(ul32 *)loc; - default: - return 0; - } -} - -template <> -void write_plt_header(Context &ctx, u8 *buf) { - if (ctx.arg.pic) { - static const u8 insn[] = { - 0x02, 0xd2, // mov.l 1f, r2 - 0xcc, 0x32, // add r12, r2 - 0x22, 0x50, // mov.l @(8, r2), r0 - 0x21, 0x52, // mov.l @(4, r2), r2 - 0x2b, 0x40, // jmp @r0 - 0x00, 0xe0, // mov #0, r0 - 0, 0, 0, 0, // 1: .long GOTPLT - }; - - static_assert(sizeof(insn) == E::plt_hdr_size); - memcpy(buf, insn, sizeof(insn)); - *(ul32 *)(buf + 12) = ctx.gotplt->shdr.sh_addr - ctx.got->shdr.sh_addr; - } else { - static const u8 insn[] = { - 0x02, 0xd2, // mov.l 1f, r2 - 0x22, 0x50, // mov.l @(8, r2), r0 - 0x21, 0x52, // mov.l @(4, r2), r2 - 0x2b, 0x40, // jmp @r0 - 0x00, 0xe0, // mov #0, r0 - 0x09, 0x00, // nop - 0, 0, 0, 0, // 1: .long GOTPLT - }; - - static_assert(sizeof(insn) == E::plt_hdr_size); - memcpy(buf, insn, sizeof(insn)); - *(ul32 *)(buf + 12) = ctx.gotplt->shdr.sh_addr; - } -} - -template <> -void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) { - if (ctx.arg.pic) { - static const u8 insn[] = { - 0x01, 0xd0, // mov.l 1f, r0 - 0xce, 0x00, // mov.l @(r0, r12), r0 - 0x2b, 0x40, // jmp @r0 - 0x01, 0xd1, // mov.l 2f, r1 - 0, 0, 0, 0, // 1: .long GOTPLT_ENTRY - 0, 0, 0, 0, // 2: .long INDEX_IN_RELPLT - }; - - static_assert(sizeof(insn) == E::plt_size); - memcpy(buf, insn, sizeof(insn)); - *(ul32 *)(buf + 8) = sym.get_gotplt_addr(ctx) - ctx.got->shdr.sh_addr; - *(ul32 *)(buf + 12) = sym.get_plt_idx(ctx) * sizeof(ElfRel); - } else { - static const u8 insn[] = { - 0x01, 0xd0, // mov.l 1f, r0 - 0x02, 0x60, // mov.l @r0, r0 - 0x2b, 0x40, // jmp @r0 - 0x01, 0xd1, // mov.l 2f, r1 - 0, 0, 0, 0, // 1: .long GOTPLT_ENTRY - 0, 0, 0, 0, // 2: .long INDEX_IN_RELPLT - }; - - static_assert(sizeof(insn) == E::plt_size); - memcpy(buf, insn, sizeof(insn)); - *(ul32 *)(buf + 8) = sym.get_gotplt_addr(ctx); - *(ul32 *)(buf + 12) = sym.get_plt_idx(ctx) * sizeof(ElfRel); - } -} - -template <> -void write_pltgot_entry(Context &ctx, u8 *buf, Symbol &sym) { - if (ctx.arg.pic) { - static const u8 insn[] = { - 0x01, 0xd0, // mov.l 1f, r0 - 0xce, 0x00, // mov.l @(r0, r12), r0 - 0x2b, 0x40, // jmp @r0 - 0x09, 0x00, // nop - 0, 0, 0, 0, // 1: .long GOT_ENTRY - }; - - static_assert(sizeof(insn) == E::pltgot_size); - memcpy(buf, insn, sizeof(insn)); - *(ul32 *)(buf + 8) = sym.get_got_addr(ctx) - ctx.got->shdr.sh_addr; - } else { - static const u8 insn[] = { - 0x01, 0xd0, // mov.l 1f, r0 - 0x02, 0x60, // mov.l @r0, r0 - 0x2b, 0x40, // jmp @r0 - 0x09, 0x00, // nop - 0, 0, 0, 0, // 1: .long GOT_ENTRY - }; - - static_assert(sizeof(insn) == E::pltgot_size); - memcpy(buf, insn, sizeof(insn)); - *(ul32 *)(buf + 8) = sym.get_got_addr(ctx); - } -} - -template <> -void EhFrameSection::apply_reloc(Context &ctx, const ElfRel &rel, - u64 offset, u64 val) { - u8 *loc = ctx.buf + this->shdr.sh_offset + offset; - - switch (rel.r_type) { - case R_NONE: - break; - case R_SH_DIR32: - *(ul32 *)loc = val; - break; - case R_SH_REL32: - *(ul32 *)loc = val - this->shdr.sh_addr - offset; - break; - default: - Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel; - } -} - -template <> -void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - ElfRel *dynrel = nullptr; - if (ctx.reldyn) - dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + - file.reldyn_offset + this->reldyn_offset); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - u64 S = sym.get_addr(ctx); - u64 A = get_addend(loc, rel); - u64 P = get_addr() + rel.r_offset; - u64 G = sym.get_got_idx(ctx) * sizeof(Word); - u64 GOT = ctx.got->shdr.sh_addr; - - switch (rel.r_type) { - case R_SH_DIR32: - apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel); - break; - case R_SH_REL32: - case R_SH_PLT32: - *(ul32 *)loc = S + A - P; - break; - case R_SH_GOT32: - *(ul32 *)loc = G; - break; - case R_SH_GOTPC: - *(ul32 *)loc = GOT + A - P; - break; - case R_SH_GOTOFF: - *(ul32 *)loc = S + A - GOT; - break; - case R_SH_TLS_GD_32: - *(ul32 *)loc = sym.get_tlsgd_addr(ctx) + A - GOT; - break; - case R_SH_TLS_LD_32: - *(ul32 *)loc = ctx.got->get_tlsld_addr(ctx) + A - GOT; - break; - case R_SH_TLS_LDO_32: - *(ul32 *)loc = S + A - ctx.dtp_addr; - break; - case R_SH_TLS_IE_32: - *(ul32 *)loc = sym.get_gottp_addr(ctx) + A - GOT; - break; - case R_SH_TLS_LE_32: - *(ul32 *)loc = S + A - ctx.tp_addr; - break; - default: - unreachable(); - } - } -} - -template <> -void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - SectionFragment *frag; - i64 frag_addend; - std::tie(frag, frag_addend) = get_fragment(ctx, rel); - - u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx); - u64 A = frag ? frag_addend : get_addend(loc, rel); - - switch (rel.r_type) { - case R_SH_DIR32: - if (std::optional val = get_tombstone(sym, frag)) - *(ul32 *)loc = *val; - else - *(ul32 *)loc = S + A; - break; - default: - Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: " - << rel; - } - } -} - -template <> -void InputSection::scan_relocations(Context &ctx) { - assert(shdr().sh_flags & SHF_ALLOC); - - this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); - std::span> rels = get_rels(ctx); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - - if (sym.is_ifunc()) - Error(ctx) << sym << ": GNU ifunc symbol is not supported on sh4"; - - switch (rel.r_type) { - case R_SH_DIR32: - scan_dyn_absrel(ctx, sym, rel); - break; - case R_SH_REL32: - scan_pcrel(ctx, sym, rel); - break; - case R_SH_GOT32: - sym.flags |= NEEDS_GOT; - break; - case R_SH_PLT32: - if (sym.is_imported) - sym.flags |= NEEDS_PLT; - break; - case R_SH_TLS_GD_32: - sym.flags |= NEEDS_TLSGD; - break; - case R_SH_TLS_LD_32: - ctx.needs_tlsld = true; - break; - case R_SH_TLS_IE_32: - sym.flags |= NEEDS_GOTTP; - break; - case R_SH_TLS_LE_32: - check_tlsle(ctx, sym, rel); - break; - case R_SH_GOTPC: - case R_SH_GOTOFF: - case R_SH_TLS_LDO_32: - break; - default: - Fatal(ctx) << *this << ": unknown relocation: " << rel; - } - } -} - -} // namespace mold::elf diff --git a/third_party/mold/elf/arch-sparc64.cc b/third_party/mold/elf/arch-sparc64.cc deleted file mode 100644 index 35ac760b481..00000000000 --- a/third_party/mold/elf/arch-sparc64.cc +++ /dev/null @@ -1,622 +0,0 @@ -// clang-format off -// SPARC is a RISC ISA developed by Sun Microsystems. -// -// The byte order of the processor is big-endian. Anything larger than a -// byte is stored in the "reverse" order compared to little-endian -// processors such as x86-64. -// -// All instructions are 4 bytes long and aligned to 4 bytes boundaries. -// -// A notable feature of SPARC is that, unlike other RISC ISAs, it doesn't -// need range extension thunks. It is because the SPARC's CALL instruction -// contains a whopping 30 bits immediate. The processor scales it by 4 to -// extend it to 32 bits (this is doable because all instructions are -// aligned to 4 bytes boundaries, so the least significant two bits are -// always zero). That means CALL's reach is PC ± 2 GiB, elinating the -// need of range extension thunks. It comes with the cost that the CALL -// instruction alone takes 1/4th of the instruction encoding space, -// though. -// -// SPARC has 32 general purpose registers. CALL instruction saves a return -// address to %o7, which is an alias for %r15. Thread pointer is stored to -// %g7 which is %r7. -// -// SPARC does not have PC-relative load/store instructions. To access data -// in the position-independent manner, we usually first set the address of -// .got to, for example, %l7, with the following piece of code -// -// sethi %hi(. - _GLOBAL_OFFSET_TABLE_), %l7 -// add %l7, %lo(. - _GLOBAL_OFFSET_TABLE_), %l7 -// call __sparc_get_pc_thunk.l7 -// nop -// -// where __sparc_get_pc_thunk.l7 is defined as -// -// retl -// add %o7, %l7, %l7 -// -// . SETHI and the following ADD materialize a 32 bits offset to .got. -// CALL instruction sets a return address to $o7, and the subsequent ADD -// adds it to the GOT offset to materialize the absolute address of .got. -// -// Note that we have a NOP after CALL and an ADD after RETL because of -// SPARC's delay branch slots. That is, the SPARC processor always -// executes one instruction after a branch even if the branch is taken. -// This may seem like an odd behavior, and indeed it is considered as such -// (that's a premature optimization for the early pipelined SPARC -// processors), but that's been a part of the ISA's spec so that's what it -// is. -// -// Note also that the .got address obtained this way is not shared between -// functions, so functions can use an arbitrary register to hold the .got -// address. That also means each function needs to execute the above piece -// of code to become position-independent. -// -// This scheme is very similar to i386. That may not be a coincidence -// because the i386 ELF psABI is created by Sun Microsystems too. -// -// https://github.com/rui314/psabi/blob/main/sparc.pdf - -#include "third_party/mold/elf/mold.h" - -namespace mold::elf { - -using E = SPARC64; - -// SPARC's PLT section is writable despite containing executable code. -// We don't need to write the PLT header entry because the dynamic loader -// will do that for us. -// -// We also don't need a .got.plt section to store the result of lazy PLT -// symbol resolution because the dynamic symbol resolver directly mutates -// instructions in PLT so that they jump to the right places next time. -// That's why each PLT entry contains lots of NOPs; they are a placeholder -// for the runtime to add more instructions. -// -// Self-modifying code is nowadays considered really bad from the security -// point of view, though. -template <> -void write_plt_header(Context &ctx, u8 *buf) { - memset(buf, 0, E::plt_hdr_size); -} - -template <> -void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) { - static ub32 insn[] = { - 0x0300'0000, // sethi (. - .PLT0), %g1 - 0x3068'0000, // ba,a %xcc, .PLT1 - 0x0100'0000, // nop - 0x0100'0000, // nop - 0x0100'0000, // nop - 0x0100'0000, // nop - 0x0100'0000, // nop - 0x0100'0000, // nop - }; - - u64 plt0 = ctx.plt->shdr.sh_addr; - u64 plt1 = ctx.plt->shdr.sh_addr + E::plt_size; - u64 entry = sym.get_plt_addr(ctx); - - memcpy(buf, insn, sizeof(insn)); - *(ub32 *)buf |= bits(entry - plt0, 21, 0); - *(ub32 *)(buf + 4) |= bits(plt1 - entry - 4, 20, 2); -} - -template <> -void write_pltgot_entry(Context &ctx, u8 *buf, Symbol &sym) { - static ub32 entry[] = { - 0x8a10'000f, // mov %o7, %g5 - 0x4000'0002, // call . + 8 - 0xc25b'e014, // ldx [ %o7 + 20 ], %g1 - 0xc25b'c001, // ldx [ %o7 + %g1 ], %g1 - 0x81c0'4000, // jmp %g1 - 0x9e10'0005, // mov %g5, %o7 - 0x0000'0000, // .quad $plt_entry - $got_entry - 0x0000'0000, - }; - - memcpy(buf, entry, sizeof(entry)); - *(ub64 *)(buf + 24) = sym.get_got_addr(ctx) - sym.get_plt_addr(ctx) - 4; -} - -template <> -void EhFrameSection::apply_reloc(Context &ctx, const ElfRel &rel, - u64 offset, u64 val) { - u8 *loc = ctx.buf + this->shdr.sh_offset + offset; - - switch (rel.r_type) { - case R_NONE: - break; - case R_SPARC_64: - case R_SPARC_UA64: - *(ub64 *)loc = val; - break; - case R_SPARC_DISP32: - *(ub32 *)loc = val - this->shdr.sh_addr - offset; - break; - default: - Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel; - } -} - -template <> -void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - ElfRel *dynrel = nullptr; - if (ctx.reldyn) - dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + - file.reldyn_offset + this->reldyn_offset); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - auto check = [&](i64 val, i64 lo, i64 hi) { - if (val < lo || hi <= val) - Error(ctx) << *this << ": relocation " << rel << " against " - << sym << " out of range: " << val << " is not in [" - << lo << ", " << hi << ")"; - }; - - u64 S = sym.get_addr(ctx); - u64 A = rel.r_addend; - u64 P = (get_addr() + rel.r_offset); - u64 G = (sym.get_got_idx(ctx) * sizeof(Word)); - u64 GOT = ctx.got->shdr.sh_addr; - - switch (rel.r_type) { - case R_SPARC_64: - apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, dynrel); - break; - case R_SPARC_5: - check(S + A, 0, 1 << 5); - *(ub32 *)loc |= bits(S + A, 4, 0); - break; - case R_SPARC_6: - check(S + A, 0, 1 << 6); - *(ub32 *)loc |= bits(S + A, 5, 0); - break; - case R_SPARC_7: - check(S + A, 0, 1 << 7); - *(ub32 *)loc |= bits(S + A, 6, 0); - break; - case R_SPARC_8: - check(S + A, 0, 1 << 8); - *(u8 *)loc = S + A; - break; - case R_SPARC_10: - check(S + A, 0, 1 << 10); - *(ub32 *)loc |= bits(S + A, 9, 0); - break; - case R_SPARC_LO10: - case R_SPARC_LOPLT10: - *(ub32 *)loc |= bits(S + A, 9, 0); - break; - case R_SPARC_11: - check(S + A, 0, 1 << 11); - *(ub32 *)loc |= bits(S + A, 10, 0); - break; - case R_SPARC_13: - check(S + A, 0, 1 << 13); - *(ub32 *)loc |= bits(S + A, 12, 0); - break; - case R_SPARC_16: - case R_SPARC_UA16: - check(S + A, 0, 1 << 16); - *(ub16 *)loc = S + A; - break; - case R_SPARC_22: - check(S + A, 0, 1 << 22); - *(ub32 *)loc |= bits(S + A, 21, 0); - break; - case R_SPARC_32: - case R_SPARC_UA32: - case R_SPARC_PLT32: - check(S + A, 0, 1LL << 32); - *(ub32 *)loc = S + A; - break; - case R_SPARC_PLT64: - case R_SPARC_UA64: - case R_SPARC_REGISTER: - *(ub64 *)loc = S + A; - break; - case R_SPARC_DISP8: - check(S + A - P, -(1 << 7), 1 << 7); - *(u8 *)loc = S + A - P; - break; - case R_SPARC_DISP16: - check(S + A - P, -(1 << 15), 1 << 15); - *(ub16 *)loc = S + A - P; - break; - case R_SPARC_DISP32: - case R_SPARC_PCPLT32: - check(S + A - P, -(1LL << 31), 1LL << 31); - *(ub32 *)loc = S + A - P; - break; - case R_SPARC_DISP64: - *(ub64 *)loc = S + A - P; - break; - case R_SPARC_WDISP16: { - i64 val = S + A - P; - check(val, -(1 << 16), 1 << 16); - *(ub16 *)loc |= (bit(val, 16) << 21) | bits(val, 15, 2); - break; - } - case R_SPARC_WDISP19: - check(S + A - P, -(1 << 20), 1 << 20); - *(ub32 *)loc |= bits(S + A - P, 20, 2); - break; - case R_SPARC_WDISP22: - check(S + A - P, -(1 << 23), 1 << 23); - *(ub32 *)loc |= bits(S + A - P, 23, 2); - break; - case R_SPARC_WDISP30: - case R_SPARC_WPLT30: - check(S + A - P, -(1LL << 31), 1LL << 31); - *(ub32 *)loc |= bits(S + A - P, 31, 2); - break; - case R_SPARC_HI22: - case R_SPARC_HIPLT22: - case R_SPARC_LM22: - *(ub32 *)loc |= bits(S + A, 31, 10); - break; - case R_SPARC_GOT10: - *(ub32 *)loc |= bits(G, 9, 0); - break; - case R_SPARC_GOT13: - check(G, 0, 1 << 12); - *(ub32 *)loc |= bits(G, 12, 0); - break; - case R_SPARC_GOT22: - *(ub32 *)loc |= bits(G, 31, 10); - break; - case R_SPARC_GOTDATA_HIX22: { - i64 val = S + A - GOT; - *(ub32 *)loc |= bits(val < 0 ? ~val : val, 31, 10); - break; - } - case R_SPARC_GOTDATA_LOX10: { - i64 val = S + A - GOT; - *(ub32 *)loc |= bits(val, 9, 0) | (val < 0 ? 0b1'1100'0000'0000 : 0); - break; - } - case R_SPARC_GOTDATA_OP_HIX22: - // We always have to relax a GOT load to a load immediate if a - // symbol is local, because R_SPARC_GOTDATA_OP cannot represent - // an addend for a local symbol. - if (sym.is_imported || sym.is_ifunc()) { - *(ub32 *)loc |= bits(G, 31, 10); - } else if (sym.is_absolute()) { - i64 val = S + A; - *(ub32 *)loc |= bits(val < 0 ? ~val : val, 31, 10); - } else { - i64 val = S + A - GOT; - *(ub32 *)loc |= bits(val < 0 ? ~val : val, 31, 10); - } - break; - case R_SPARC_GOTDATA_OP_LOX10: { - if (sym.is_imported || sym.is_ifunc()) { - *(ub32 *)loc |= bits(G, 9, 0); - } else if (sym.is_absolute()) { - i64 val = S + A; - *(ub32 *)loc |= bits(val, 9, 0) | (val < 0 ? 0b1'1100'0000'0000 : 0); - } else { - i64 val = S + A - GOT; - *(ub32 *)loc |= bits(val, 9, 0) | (val < 0 ? 0b1'1100'0000'0000 : 0); - } - break; - } - case R_SPARC_GOTDATA_OP: - if (sym.is_imported || sym.is_ifunc()) - break; - - if (sym.is_absolute()) { - // ldx [ %g2 + %g1 ], %g1 → nop - *(ub32 *)loc = 0x0100'0000; - } else { - // ldx [ %g2 + %g1 ], %g1 → add %g2, %g1, %g1 - *(ub32 *)loc &= 0b00'11111'000000'11111'1'11111111'11111; - *(ub32 *)loc |= 0b10'00000'000000'00000'0'00000000'00000; - } - break; - case R_SPARC_PC10: - case R_SPARC_PCPLT10: - *(ub32 *)loc |= bits(S + A - P, 9, 0); - break; - case R_SPARC_PC22: - case R_SPARC_PCPLT22: - case R_SPARC_PC_LM22: - *(ub32 *)loc |= bits(S + A - P, 31, 10); - break; - case R_SPARC_OLO10: - *(ub32 *)loc |= bits(bits(S + A, 9, 0) + rel.r_type_data, 12, 0); - break; - case R_SPARC_HH22: - *(ub32 *)loc |= bits(S + A, 63, 42); - break; - case R_SPARC_HM10: - *(ub32 *)loc |= bits(S + A, 41, 32); - break; - case R_SPARC_PC_HH22: - *(ub32 *)loc |= bits(S + A - P, 63, 42); - break; - case R_SPARC_PC_HM10: - *(ub32 *)loc |= bits(S + A - P, 41, 32); - break; - case R_SPARC_HIX22: - *(ub32 *)loc |= bits(~(S + A), 31, 10); - break; - case R_SPARC_LOX10: - *(ub32 *)loc |= bits(S + A, 9, 0) | 0b1'1100'0000'0000; - break; - case R_SPARC_H44: - *(ub32 *)loc |= bits(S + A, 43, 22); - break; - case R_SPARC_M44: - *(ub32 *)loc |= bits(S + A, 21, 12); - break; - case R_SPARC_L44: - *(ub32 *)loc |= bits(S + A, 11, 0); - break; - case R_SPARC_TLS_GD_HI22: - *(ub32 *)loc |= bits(sym.get_tlsgd_addr(ctx) + A - GOT, 31, 10); - break; - case R_SPARC_TLS_GD_LO10: - *(ub32 *)loc |= bits(sym.get_tlsgd_addr(ctx) + A - GOT, 9, 0); - break; - case R_SPARC_TLS_GD_CALL: - case R_SPARC_TLS_LDM_CALL: { - u64 addr; - if (ctx.arg.is_static) - addr = ctx.extra.tls_get_addr_sec->shdr.sh_addr; - else - addr = ctx.extra.tls_get_addr_sym->get_addr(ctx); - - *(ub32 *)loc |= bits(addr + A - P, 31, 2); - break; - } - case R_SPARC_TLS_LDM_HI22: - *(ub32 *)loc |= bits(ctx.got->get_tlsld_addr(ctx) + A - GOT, 31, 10); - break; - case R_SPARC_TLS_LDM_LO10: - *(ub32 *)loc |= bits(ctx.got->get_tlsld_addr(ctx) + A - GOT, 9, 0); - break; - case R_SPARC_TLS_LDO_HIX22: - *(ub32 *)loc |= bits(S + A - ctx.dtp_addr, 31, 10); - break; - case R_SPARC_TLS_LDO_LOX10: - *(ub32 *)loc |= bits(S + A - ctx.dtp_addr, 9, 0); - break; - case R_SPARC_TLS_IE_HI22: - *(ub32 *)loc |= bits(sym.get_gottp_addr(ctx) + A - GOT, 31, 10); - break; - case R_SPARC_TLS_IE_LO10: - *(ub32 *)loc |= bits(sym.get_gottp_addr(ctx) + A - GOT, 9, 0); - break; - case R_SPARC_TLS_LE_HIX22: - *(ub32 *)loc |= bits(~(S + A - ctx.tp_addr), 31, 10); - break; - case R_SPARC_TLS_LE_LOX10: - *(ub32 *)loc |= bits(S + A - ctx.tp_addr, 9, 0) | 0b1'1100'0000'0000; - break; - case R_SPARC_SIZE32: - *(ub32 *)loc = sym.esym().st_size + A; - break; - case R_SPARC_TLS_GD_ADD: - case R_SPARC_TLS_LDM_ADD: - case R_SPARC_TLS_LDO_ADD: - case R_SPARC_TLS_IE_LD: - case R_SPARC_TLS_IE_LDX: - case R_SPARC_TLS_IE_ADD: - break; - default: - unreachable(); - } - } -} - -template <> -void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - auto check = [&](i64 val, i64 lo, i64 hi) { - if (val < lo || hi <= val) - Error(ctx) << *this << ": relocation " << rel << " against " - << sym << " out of range: " << val << " is not in [" - << lo << ", " << hi << ")"; - }; - - SectionFragment *frag; - i64 frag_addend; - std::tie(frag, frag_addend) = get_fragment(ctx, rel); - - u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx); - u64 A = frag ? frag_addend : (i64)rel.r_addend; - - switch (rel.r_type) { - case R_SPARC_64: - case R_SPARC_UA64: - if (std::optional val = get_tombstone(sym, frag)) - *(ub64 *)loc = *val; - else - *(ub64 *)loc = S + A; - break; - case R_SPARC_32: - case R_SPARC_UA32: { - i64 val = S + A; - check(val, 0, 1LL << 32); - *(ub32 *)loc = val; - break; - } - case R_SPARC_TLS_DTPOFF32: - *(ub32 *)loc = S + A - ctx.dtp_addr; - break; - case R_SPARC_TLS_DTPOFF64: - *(ub64 *)loc = S + A - ctx.dtp_addr; - break; - default: - Fatal(ctx) << *this << ": apply_reloc_nonalloc: " << rel; - } - } -} - -template <> -void InputSection::scan_relocations(Context &ctx) { - assert(shdr().sh_flags & SHF_ALLOC); - - this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); - std::span> rels = get_rels(ctx); - - // Scan relocations - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - - if (sym.is_ifunc()) - sym.flags |= NEEDS_GOT | NEEDS_PLT; - - switch (rel.r_type) { - case R_SPARC_64: - scan_dyn_absrel(ctx, sym, rel); - break; - case R_SPARC_8: - case R_SPARC_5: - case R_SPARC_6: - case R_SPARC_7: - case R_SPARC_10: - case R_SPARC_11: - case R_SPARC_13: - case R_SPARC_16: - case R_SPARC_22: - case R_SPARC_32: - case R_SPARC_REGISTER: - case R_SPARC_UA16: - case R_SPARC_UA32: - case R_SPARC_UA64: - case R_SPARC_PC_HM10: - case R_SPARC_OLO10: - case R_SPARC_LOX10: - case R_SPARC_HM10: - case R_SPARC_M44: - case R_SPARC_HIX22: - case R_SPARC_LO10: - case R_SPARC_L44: - case R_SPARC_LM22: - case R_SPARC_HI22: - case R_SPARC_H44: - case R_SPARC_HH22: - scan_absrel(ctx, sym, rel); - break; - case R_SPARC_PLT32: - case R_SPARC_WPLT30: - case R_SPARC_WDISP30: - case R_SPARC_HIPLT22: - case R_SPARC_LOPLT10: - case R_SPARC_PCPLT32: - case R_SPARC_PCPLT22: - case R_SPARC_PCPLT10: - case R_SPARC_PLT64: - if (sym.is_imported) - sym.flags |= NEEDS_PLT; - break; - case R_SPARC_GOT13: - case R_SPARC_GOT10: - case R_SPARC_GOT22: - case R_SPARC_GOTDATA_HIX22: - sym.flags |= NEEDS_GOT; - break; - case R_SPARC_GOTDATA_OP_HIX22: - if (sym.is_imported) - sym.flags |= NEEDS_GOT; - break; - case R_SPARC_DISP16: - case R_SPARC_DISP32: - case R_SPARC_DISP64: - case R_SPARC_DISP8: - case R_SPARC_PC10: - case R_SPARC_PC22: - case R_SPARC_PC_LM22: - case R_SPARC_WDISP16: - case R_SPARC_WDISP19: - case R_SPARC_WDISP22: - case R_SPARC_PC_HH22: - scan_pcrel(ctx, sym, rel); - break; - case R_SPARC_TLS_GD_HI22: - sym.flags |= NEEDS_TLSGD; - break; - case R_SPARC_TLS_LDM_HI22: - ctx.needs_tlsld = true; - break; - case R_SPARC_TLS_IE_HI22: - sym.flags |= NEEDS_GOTTP; - break; - case R_SPARC_TLS_GD_CALL: - case R_SPARC_TLS_LDM_CALL: - if (!ctx.arg.is_static && ctx.extra.tls_get_addr_sym->is_imported) - ctx.extra.tls_get_addr_sym->flags |= NEEDS_PLT; - break; - case R_SPARC_TLS_LE_HIX22: - case R_SPARC_TLS_LE_LOX10: - check_tlsle(ctx, sym, rel); - break; - case R_SPARC_GOTDATA_OP_LOX10: - case R_SPARC_GOTDATA_OP: - case R_SPARC_GOTDATA_LOX10: - case R_SPARC_TLS_GD_LO10: - case R_SPARC_TLS_GD_ADD: - case R_SPARC_TLS_LDM_LO10: - case R_SPARC_TLS_LDM_ADD: - case R_SPARC_TLS_LDO_HIX22: - case R_SPARC_TLS_LDO_LOX10: - case R_SPARC_TLS_LDO_ADD: - case R_SPARC_TLS_IE_ADD: - case R_SPARC_TLS_IE_LD: - case R_SPARC_TLS_IE_LDX: - case R_SPARC_TLS_IE_LO10: - case R_SPARC_SIZE32: - break; - default: - Fatal(ctx) << *this << ": scan_relocations: " << rel; - } - } -} - -// __tls_get_addr is not defined by libc.a, so we can't use that function -// in statically-linked executables. This section provides a replacement. -void SparcTlsGetAddrSection::copy_buf(Context &ctx) { - ub32 *buf = (ub32 *)(ctx.buf + this->shdr.sh_offset); - - static const ub32 insn[] = { - 0x0300'0000, // sethi %hi(TP_SIZE), %g1 - 0x8210'6000, // or %g1, %lo(TP_SIZE), %g1 - 0x8221'c001, // sub %g7, %g1, %g1 - 0xd05a'2008, // ldx [ %o0 + 8 ], %o0 - 0x81c3'e008, // retl - 0x9000'4008, // add %g1, %o0, %o0 - }; - - assert(this->shdr.sh_size == sizeof(insn)); - memcpy(buf, insn, sizeof(insn)); - - buf[0] |= bits(ctx.tp_addr - ctx.tls_begin, 31, 10); - buf[1] |= bits(ctx.tp_addr - ctx.tls_begin, 9, 0); -} - -} // namespace mold::elf diff --git a/third_party/mold/elf/cmdline.cc b/third_party/mold/elf/cmdline.cc index af1bc12dcba..cd2faa96a9b 100644 --- a/third_party/mold/elf/cmdline.cc +++ b/third_party/mold/elf/cmdline.cc @@ -1,6 +1,6 @@ // clang-format off #include "third_party/mold/elf/mold.h" -// MISSING #include "../common/cmdline.h" +#include "third_party/mold/cmdline.h" #include "third_party/libcxx/regex" #include "third_party/libcxx/sstream" @@ -36,7 +36,6 @@ #include "libc/sysv/consts/o.h" #include "libc/sysv/consts/ok.h" #include "libc/time/time.h" -#include "third_party/getopt/getopt.internal.h" #include "third_party/musl/crypt.h" #include "third_party/musl/lockf.h" #endif diff --git a/third_party/mold/elf/elf.h b/third_party/mold/elf/elf.h index 0f0a2c72063..930b034a564 100644 --- a/third_party/mold/elf/elf.h +++ b/third_party/mold/elf/elf.h @@ -1,7 +1,7 @@ // clang-format off #pragma once -// MISSING #include "../common/integers.h" +#include "third_party/mold/integers.h" #include "third_party/libcxx/ostream" #include "third_party/libcxx/string" diff --git a/third_party/mold/elf/main.cc b/third_party/mold/elf/main.cc index 645be0d50b6..d83317a948b 100644 --- a/third_party/mold/elf/main.cc +++ b/third_party/mold/elf/main.cc @@ -1,9 +1,8 @@ // clang-format off #include "third_party/mold/elf/mold.h" -// MISSING #include "../common/archive-file.h" -// MISSING #include "../common/cmdline.h" -// MISSING #include "../common/output-file.h" - +#include "third_party/mold/archive-file.h" +#include "third_party/mold/cmdline.h" +#include "third_party/mold/output-file.h" #include "third_party/libcxx/cstring" #include "third_party/libcxx/functional" #include "third_party/libcxx/iomanip" diff --git a/third_party/mold/elf/mold.h b/third_party/mold/elf/mold.h index a67c239ac53..af72cadc60c 100644 --- a/third_party/mold/elf/mold.h +++ b/third_party/mold/elf/mold.h @@ -2,7 +2,7 @@ #pragma once #include "third_party/mold/elf/elf.h" -// MISSING #include "../common/common.h" +#include "third_party/mold/common.h" #include "third_party/libcxx/atomic" #include "third_party/libcxx/bitset" @@ -15,16 +15,19 @@ #include "third_party/libcxx/memory" #include "third_party/libcxx/mutex" #include "third_party/libcxx/optional" -// MISSING #include +#include "third_party/libcxx/span" #include "third_party/libcxx/sstream" #include "third_party/libcxx/string" #include "third_party/libcxx/string_view" + +#include "third_party/mold/fake_tbb.h" // MISSING #include // MISSING #include // MISSING #include // MISSING #include // MISSING #include // MISSING #include + #include "third_party/libcxx/type_traits" #include "third_party/libcxx/unordered_map" #include "third_party/libcxx/unordered_set" @@ -42,7 +45,6 @@ #include "libc/sysv/consts/o.h" #include "libc/sysv/consts/ok.h" #include "libc/time/time.h" -#include "third_party/getopt/getopt.internal.h" #include "third_party/musl/crypt.h" #include "third_party/musl/lockf.h" #endif diff --git a/third_party/mold/fake_tbb.h b/third_party/mold/fake_tbb.h index 072fa42e5bf..b0453d89383 100644 --- a/third_party/mold/fake_tbb.h +++ b/third_party/mold/fake_tbb.h @@ -6,6 +6,16 @@ namespace tbb { template using concurrent_vector = std::vector; +template < + class Key, + class T, + class Hash = std::hash, + class KeyEqual = std::equal_to, + class Allocator = std::allocator< std::pair > > + using concurrent_hash_map = std::unordered_map; + + using spin_mutex = std::mutex; + template void parallel_for_each(InputIterator first, InputIterator last, const Function& f) { } @@ -22,5 +32,35 @@ namespace tbb { void parallel_for(Index first, Index last, const Function& f) { } + enum task_group_status { + not_complete, + complete, + canceled + }; + + class task_group { + public: + task_group() {}; + ~task_group() {}; + + template + void run( Func&& f ) { + + }; + + template + task_group_status run_and_wait( const Func& f ) { + return task_group_status::complete; + }; + + task_group_status wait() { + return task_group_status::complete; + }; + + void cancel() { + + }; + }; + } #endif diff --git a/third_party/mold/git-hash.cc b/third_party/mold/git-hash.cc new file mode 100644 index 00000000000..2d520bd8760 --- /dev/null +++ b/third_party/mold/git-hash.cc @@ -0,0 +1,5 @@ +#include "third_party/libcxx/string" + +namespace mold { +std::string mold_git_hash = "d4d93d7fb72dd19c44aafa4dd5397e35787d33ad"; +} diff --git a/third_party/mold/hyperloglog.cc b/third_party/mold/hyperloglog.cc index 7c10616a4e4..f7974eef6a4 100644 --- a/third_party/mold/hyperloglog.cc +++ b/third_party/mold/hyperloglog.cc @@ -5,9 +5,10 @@ // For more info, read // https://engineering.fb.com/2018/12/13/data-infrastructure/hyperloglog +// TODO(fzakaria): changed from libcxx because pow symbol wasn't present. #include "third_party/mold/common.h" -#include "third_party/libcxx/cmath" +#include "libc/math.h" namespace mold { diff --git a/third_party/mold/mold.mk b/third_party/mold/mold.mk index f0e76c19461..128061376b6 100644 --- a/third_party/mold/mold.mk +++ b/third_party/mold/mold.mk @@ -6,7 +6,7 @@ PKGS += THIRD_PARTY_MOLD THIRD_PARTY_MOLD_ARTIFACTS += THIRD_PARTY_MOLD_A THIRD_PARTY_MOLD = $(THIRD_PARTY_MOLD_A_DEPS) $(THIRD_PARTY_MOLD_A) THIRD_PARTY_MOLD_A = o/$(MODE)/third_party/mold/mold.a -THIRD_PARTY_MOLD_FILES := $(wildcard third_party/mold/*) +THIRD_PARTY_MOLD_FILES := $(wildcard third_party/mold/*) $(wildcard third_party/mold/elf/*) THIRD_PARTY_MOLD_HDRS = $(filter %.h,$(THIRD_PARTY_MOLD_FILES)) THIRD_PARTY_MOLD_SRCS = $(filter %.cc,$(THIRD_PARTY_MOLD_FILES)) THIRD_PARTY_MOLD_OBJS = $(THIRD_PARTY_MOLD_SRCS:%.cc=o/$(MODE)/%.o) @@ -16,6 +16,9 @@ THIRD_PARTY_MOLD_A_DIRECTDEPS = \ LIBC_STR \ LIBC_INTRIN \ LIBC_STDIO \ + LIBC_CALLS \ + LIBC_TINYMATH \ + LIBC_SYSV \ LIBC_RUNTIME \ THIRD_PARTY_ZSTD \ THIRD_PARTY_XXHASH \ @@ -35,6 +38,8 @@ $(THIRD_PARTY_MOLD_OBJS): private \ -fno-asynchronous-unwind-tables \ -Wno-sign-compare \ -Wno-unused-function \ + -DMOLD_X86_64=1 \ + -DMOLD_TARGET=X86_64 THIRD_PARTY_MOLD_CHECKS = \ $(THIRD_PARTY_MOLD_A).pkg \