diff options
Diffstat (limited to 'externals/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp')
-rw-r--r-- | externals/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp | 540 |
1 files changed, 540 insertions, 0 deletions
diff --git a/externals/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp b/externals/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp new file mode 100644 index 0000000000..22d9868fc5 --- /dev/null +++ b/externals/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp @@ -0,0 +1,540 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2016 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#include "dynarmic/backend/x64/block_of_code.h" + +#ifdef _WIN32 +# define WIN32_LEAN_AND_MEAN +# include <windows.h> +#else +# include <sys/mman.h> +#endif + +#ifdef __APPLE__ +# include <errno.h> +# include <fmt/format.h> +# include <sys/sysctl.h> +#endif + +#include <array> +#include <cstring> + +#include <mcl/assert.hpp> +#include <mcl/bit/bit_field.hpp> +#include <xbyak/xbyak.h> + +#include "dynarmic/backend/x64/a32_jitstate.h" +#include "dynarmic/backend/x64/abi.h" +#include "dynarmic/backend/x64/hostloc.h" +#include "dynarmic/backend/x64/perf_map.h" +#include "dynarmic/backend/x64/stack_layout.h" + +namespace Dynarmic::Backend::X64 { + +#ifdef _WIN32 +const Xbyak::Reg64 BlockOfCode::ABI_RETURN = HostLocToReg64(Dynarmic::Backend::X64::ABI_RETURN); +const Xbyak::Reg64 BlockOfCode::ABI_PARAM1 = HostLocToReg64(Dynarmic::Backend::X64::ABI_PARAM1); +const Xbyak::Reg64 BlockOfCode::ABI_PARAM2 = HostLocToReg64(Dynarmic::Backend::X64::ABI_PARAM2); +const Xbyak::Reg64 BlockOfCode::ABI_PARAM3 = HostLocToReg64(Dynarmic::Backend::X64::ABI_PARAM3); +const Xbyak::Reg64 BlockOfCode::ABI_PARAM4 = HostLocToReg64(Dynarmic::Backend::X64::ABI_PARAM4); +const std::array<Xbyak::Reg64, ABI_PARAM_COUNT> BlockOfCode::ABI_PARAMS = {BlockOfCode::ABI_PARAM1, BlockOfCode::ABI_PARAM2, BlockOfCode::ABI_PARAM3, BlockOfCode::ABI_PARAM4}; +#else +const Xbyak::Reg64 BlockOfCode::ABI_RETURN = HostLocToReg64(Dynarmic::Backend::X64::ABI_RETURN); +const Xbyak::Reg64 BlockOfCode::ABI_RETURN2 = HostLocToReg64(Dynarmic::Backend::X64::ABI_RETURN2); +const Xbyak::Reg64 BlockOfCode::ABI_PARAM1 = HostLocToReg64(Dynarmic::Backend::X64::ABI_PARAM1); +const Xbyak::Reg64 BlockOfCode::ABI_PARAM2 = HostLocToReg64(Dynarmic::Backend::X64::ABI_PARAM2); +const Xbyak::Reg64 BlockOfCode::ABI_PARAM3 = HostLocToReg64(Dynarmic::Backend::X64::ABI_PARAM3); +const Xbyak::Reg64 BlockOfCode::ABI_PARAM4 = HostLocToReg64(Dynarmic::Backend::X64::ABI_PARAM4); +const Xbyak::Reg64 BlockOfCode::ABI_PARAM5 = HostLocToReg64(Dynarmic::Backend::X64::ABI_PARAM5); +const Xbyak::Reg64 BlockOfCode::ABI_PARAM6 = HostLocToReg64(Dynarmic::Backend::X64::ABI_PARAM6); +const std::array<Xbyak::Reg64, ABI_PARAM_COUNT> BlockOfCode::ABI_PARAMS = {BlockOfCode::ABI_PARAM1, BlockOfCode::ABI_PARAM2, BlockOfCode::ABI_PARAM3, BlockOfCode::ABI_PARAM4, BlockOfCode::ABI_PARAM5, BlockOfCode::ABI_PARAM6}; +#endif + +namespace { + +constexpr size_t CONSTANT_POOL_SIZE = 2 * 1024 * 1024; +constexpr size_t PRELUDE_COMMIT_SIZE = 16 * 1024 * 1024; + +class CustomXbyakAllocator : public Xbyak::Allocator { +public: +#ifdef _WIN32 + uint8_t* alloc(size_t size) override { + void* p = VirtualAlloc(nullptr, size, MEM_RESERVE, PAGE_READWRITE); + if (p == nullptr) { + throw Xbyak::Error(Xbyak::ERR_CANT_ALLOC); + } + return static_cast<uint8_t*>(p); + } + + void free(uint8_t* p) override { + VirtualFree(static_cast<void*>(p), 0, MEM_RELEASE); + } + + bool useProtect() const override { return false; } +#else + static constexpr size_t DYNARMIC_PAGE_SIZE = 4096; + + // Can't subclass Xbyak::MmapAllocator because it is not a pure interface + // and doesn't expose its construtor + uint8_t* alloc(size_t size) override { + // Waste a page to store the size + size += DYNARMIC_PAGE_SIZE; + +# if defined(MAP_ANONYMOUS) + int mode = MAP_PRIVATE | MAP_ANONYMOUS; +# elif defined(MAP_ANON) + int mode = MAP_PRIVATE | MAP_ANON; +# else +# error "not supported" +# endif +# ifdef MAP_JIT + mode |= MAP_JIT; +# endif + + void* p = mmap(nullptr, size, PROT_READ | PROT_WRITE, mode, -1, 0); + if (p == MAP_FAILED) { + throw Xbyak::Error(Xbyak::ERR_CANT_ALLOC); + } + std::memcpy(p, &size, sizeof(size_t)); + return static_cast<uint8_t*>(p) + DYNARMIC_PAGE_SIZE; + } + + void free(uint8_t* p) override { + size_t size; + std::memcpy(&size, p - DYNARMIC_PAGE_SIZE, sizeof(size_t)); + munmap(p - DYNARMIC_PAGE_SIZE, size); + } + +# ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT + bool useProtect() const override { return false; } +# endif +#endif +}; + +// This is threadsafe as Xbyak::Allocator does not contain any state; it is a pure interface. +CustomXbyakAllocator s_allocator; + +#ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT +void ProtectMemory(const void* base, size_t size, bool is_executable) { +# ifdef _WIN32 + DWORD oldProtect = 0; + VirtualProtect(const_cast<void*>(base), size, is_executable ? PAGE_EXECUTE_READ : PAGE_READWRITE, &oldProtect); +# else + static const size_t pageSize = sysconf(_SC_PAGESIZE); + const size_t iaddr = reinterpret_cast<size_t>(base); + const size_t roundAddr = iaddr & ~(pageSize - static_cast<size_t>(1)); + const int mode = is_executable ? (PROT_READ | PROT_EXEC) : (PROT_READ | PROT_WRITE); + mprotect(reinterpret_cast<void*>(roundAddr), size + (iaddr - roundAddr), mode); +# endif +} +#endif + +HostFeature GetHostFeatures() { + HostFeature features = {}; + +#ifdef DYNARMIC_ENABLE_CPU_FEATURE_DETECTION + using Cpu = Xbyak::util::Cpu; + Xbyak::util::Cpu cpu_info; + + if (cpu_info.has(Cpu::tSSSE3)) + features |= HostFeature::SSSE3; + if (cpu_info.has(Cpu::tSSE41)) + features |= HostFeature::SSE41; + if (cpu_info.has(Cpu::tSSE42)) + features |= HostFeature::SSE42; + if (cpu_info.has(Cpu::tAVX)) + features |= HostFeature::AVX; + if (cpu_info.has(Cpu::tAVX2)) + features |= HostFeature::AVX2; + if (cpu_info.has(Cpu::tAVX512F)) + features |= HostFeature::AVX512F; + if (cpu_info.has(Cpu::tAVX512CD)) + features |= HostFeature::AVX512CD; + if (cpu_info.has(Cpu::tAVX512VL)) + features |= HostFeature::AVX512VL; + if (cpu_info.has(Cpu::tAVX512BW)) + features |= HostFeature::AVX512BW; + if (cpu_info.has(Cpu::tAVX512DQ)) + features |= HostFeature::AVX512DQ; + if (cpu_info.has(Cpu::tAVX512_BITALG)) + features |= HostFeature::AVX512BITALG; + if (cpu_info.has(Cpu::tAVX512VBMI)) + features |= HostFeature::AVX512VBMI; + if (cpu_info.has(Cpu::tPCLMULQDQ)) + features |= HostFeature::PCLMULQDQ; + if (cpu_info.has(Cpu::tF16C)) + features |= HostFeature::F16C; + if (cpu_info.has(Cpu::tFMA)) + features |= HostFeature::FMA; + if (cpu_info.has(Cpu::tAESNI)) + features |= HostFeature::AES; + if (cpu_info.has(Cpu::tSHA)) + features |= HostFeature::SHA; + if (cpu_info.has(Cpu::tPOPCNT)) + features |= HostFeature::POPCNT; + if (cpu_info.has(Cpu::tBMI1)) + features |= HostFeature::BMI1; + if (cpu_info.has(Cpu::tBMI2)) + features |= HostFeature::BMI2; + if (cpu_info.has(Cpu::tLZCNT)) + features |= HostFeature::LZCNT; + if (cpu_info.has(Cpu::tGFNI)) + features |= HostFeature::GFNI; + + if (cpu_info.has(Cpu::tBMI2)) { + // BMI2 instructions such as pdep and pext have been very slow up until Zen 3. + // Check for Zen 3 or newer by its family (0x19). + // See also: https://en.wikichip.org/wiki/amd/cpuid + if (cpu_info.has(Cpu::tAMD)) { + std::array<u32, 4> data{}; + cpu_info.getCpuid(1, data.data()); + const u32 family_base = mcl::bit::get_bits<8, 11>(data[0]); + const u32 family_extended = mcl::bit::get_bits<20, 27>(data[0]); + const u32 family = family_base + family_extended; + if (family >= 0x19) + features |= HostFeature::FastBMI2; + } else { + features |= HostFeature::FastBMI2; + } + } +#endif + + return features; +} + +#ifdef __APPLE__ +bool IsUnderRosetta() { + int result = 0; + size_t result_size = sizeof(result); + if (sysctlbyname("sysctl.proc_translated", &result, &result_size, nullptr, 0) == -1) { + if (errno != ENOENT) + fmt::print("IsUnderRosetta: Failed to detect Rosetta state, assuming not under Rosetta"); + return false; + } + return result != 0; +} +#endif + +} // anonymous namespace + +BlockOfCode::BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi, size_t total_code_size, std::function<void(BlockOfCode&)> rcp) + : Xbyak::CodeGenerator(total_code_size, nullptr, &s_allocator) + , cb(std::move(cb)) + , jsi(jsi) + , constant_pool(*this, CONSTANT_POOL_SIZE) + , host_features(GetHostFeatures()) { + EnableWriting(); + EnsureMemoryCommitted(PRELUDE_COMMIT_SIZE); + GenRunCode(rcp); +} + +void BlockOfCode::PreludeComplete() { + prelude_complete = true; + code_begin = getCurr(); + ClearCache(); + DisableWriting(); +} + +void BlockOfCode::EnableWriting() { +#ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT +# ifdef _WIN32 + ProtectMemory(getCode(), committed_size, false); +# else + ProtectMemory(getCode(), maxSize_, false); +# endif +#endif +} + +void BlockOfCode::DisableWriting() { +#ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT +# ifdef _WIN32 + ProtectMemory(getCode(), committed_size, true); +# else + ProtectMemory(getCode(), maxSize_, true); +# endif +#endif +} + +void BlockOfCode::ClearCache() { + ASSERT(prelude_complete); + SetCodePtr(code_begin); +} + +size_t BlockOfCode::SpaceRemaining() const { + ASSERT(prelude_complete); + const u8* current_ptr = getCurr<const u8*>(); + if (current_ptr >= &top_[maxSize_]) + return 0; + return &top_[maxSize_] - current_ptr; +} + +void BlockOfCode::EnsureMemoryCommitted([[maybe_unused]] size_t codesize) { +#ifdef _WIN32 + if (committed_size < size_ + codesize) { + committed_size = std::min<size_t>(maxSize_, committed_size + codesize); +# ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT + VirtualAlloc(top_, committed_size, MEM_COMMIT, PAGE_READWRITE); +# else + VirtualAlloc(top_, committed_size, MEM_COMMIT, PAGE_EXECUTE_READWRITE); +# endif + } +#endif +} + +HaltReason BlockOfCode::RunCode(void* jit_state, CodePtr code_ptr) const { + return run_code(jit_state, code_ptr); +} + +HaltReason BlockOfCode::StepCode(void* jit_state, CodePtr code_ptr) const { + return step_code(jit_state, code_ptr); +} + +void BlockOfCode::ReturnFromRunCode(bool mxcsr_already_exited) { + size_t index = 0; + if (mxcsr_already_exited) + index |= MXCSR_ALREADY_EXITED; + jmp(return_from_run_code[index]); +} + +void BlockOfCode::ForceReturnFromRunCode(bool mxcsr_already_exited) { + size_t index = FORCE_RETURN; + if (mxcsr_already_exited) + index |= MXCSR_ALREADY_EXITED; + jmp(return_from_run_code[index]); +} + +void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) { + Xbyak::Label return_to_caller, return_to_caller_mxcsr_already_exited; + + align(); + run_code = getCurr<RunCodeFuncType>(); + + // This serves two purposes: + // 1. It saves all the registers we as a callee need to save. + // 2. It aligns the stack so that the code the JIT emits can assume + // that the stack is appropriately aligned for CALLs. + ABI_PushCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout)); + + mov(r15, ABI_PARAM1); + mov(rbx, ABI_PARAM2); // save temporarily in non-volatile register + + if (cb.enable_cycle_counting) { + cb.GetTicksRemaining->EmitCall(*this); + mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], ABI_RETURN); + mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], ABI_RETURN); + } + + rcp(*this); + + cmp(dword[r15 + jsi.offsetof_halt_reason], 0); + jne(return_to_caller_mxcsr_already_exited, T_NEAR); + + SwitchMxcsrOnEntry(); + jmp(rbx); + + align(); + step_code = getCurr<RunCodeFuncType>(); + + ABI_PushCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout)); + + mov(r15, ABI_PARAM1); + + if (cb.enable_cycle_counting) { + mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], 1); + mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 1); + } + + rcp(*this); + + cmp(dword[r15 + jsi.offsetof_halt_reason], 0); + jne(return_to_caller_mxcsr_already_exited, T_NEAR); + lock(); + or_(dword[r15 + jsi.offsetof_halt_reason], static_cast<u32>(HaltReason::Step)); + + SwitchMxcsrOnEntry(); + jmp(ABI_PARAM2); + + // Dispatcher loop + + align(); + return_from_run_code[0] = getCurr<const void*>(); + + cmp(dword[r15 + jsi.offsetof_halt_reason], 0); + jne(return_to_caller); + if (cb.enable_cycle_counting) { + cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0); + jng(return_to_caller); + } + cb.LookupBlock->EmitCall(*this); + jmp(ABI_RETURN); + + align(); + return_from_run_code[MXCSR_ALREADY_EXITED] = getCurr<const void*>(); + + cmp(dword[r15 + jsi.offsetof_halt_reason], 0); + jne(return_to_caller_mxcsr_already_exited); + if (cb.enable_cycle_counting) { + cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0); + jng(return_to_caller_mxcsr_already_exited); + } + SwitchMxcsrOnEntry(); + cb.LookupBlock->EmitCall(*this); + jmp(ABI_RETURN); + + align(); + return_from_run_code[FORCE_RETURN] = getCurr<const void*>(); + L(return_to_caller); + + SwitchMxcsrOnExit(); + // fallthrough + + return_from_run_code[MXCSR_ALREADY_EXITED | FORCE_RETURN] = getCurr<const void*>(); + L(return_to_caller_mxcsr_already_exited); + + if (cb.enable_cycle_counting) { + cb.AddTicks->EmitCall(*this, [this](RegList param) { + mov(param[0], qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)]); + sub(param[0], qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)]); + }); + } + + xor_(eax, eax); + lock(); + xchg(dword[r15 + jsi.offsetof_halt_reason], eax); + + ABI_PopCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout)); + ret(); + + PerfMapRegister(run_code, getCurr(), "dynarmic_dispatcher"); +} + +void BlockOfCode::SwitchMxcsrOnEntry() { + stmxcsr(dword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, save_host_MXCSR)]); + ldmxcsr(dword[r15 + jsi.offsetof_guest_MXCSR]); +} + +void BlockOfCode::SwitchMxcsrOnExit() { + stmxcsr(dword[r15 + jsi.offsetof_guest_MXCSR]); + ldmxcsr(dword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, save_host_MXCSR)]); +} + +void BlockOfCode::EnterStandardASIMD() { + stmxcsr(dword[r15 + jsi.offsetof_guest_MXCSR]); + ldmxcsr(dword[r15 + jsi.offsetof_asimd_MXCSR]); +} + +void BlockOfCode::LeaveStandardASIMD() { + stmxcsr(dword[r15 + jsi.offsetof_asimd_MXCSR]); + ldmxcsr(dword[r15 + jsi.offsetof_guest_MXCSR]); +} + +void BlockOfCode::UpdateTicks() { + if (!cb.enable_cycle_counting) { + return; + } + + cb.AddTicks->EmitCall(*this, [this](RegList param) { + mov(param[0], qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)]); + sub(param[0], qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)]); + }); + + cb.GetTicksRemaining->EmitCall(*this); + mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], ABI_RETURN); + mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], ABI_RETURN); +} + +void BlockOfCode::LookupBlock() { + cb.LookupBlock->EmitCall(*this); +} + +void BlockOfCode::LoadRequiredFlagsForCondFromRax(IR::Cond cond) { +#ifdef __APPLE__ + static const bool is_rosetta = IsUnderRosetta(); +#endif + + // sahf restores SF, ZF, CF + // add al, 0x7F restores OF + + switch (cond) { + case IR::Cond::EQ: // z + case IR::Cond::NE: // !z + case IR::Cond::CS: // c + case IR::Cond::CC: // !c + case IR::Cond::MI: // n + case IR::Cond::PL: // !n + sahf(); + break; + case IR::Cond::VS: // v + case IR::Cond::VC: // !v + cmp(al, 0x81); + break; + case IR::Cond::HI: // c & !z + case IR::Cond::LS: // !c | z + sahf(); + cmc(); + break; + case IR::Cond::GE: // n == v + case IR::Cond::LT: // n != v + case IR::Cond::GT: // !z & (n == v) + case IR::Cond::LE: // z | (n != v) +#ifdef __APPLE__ + if (is_rosetta) { + shl(al, 3); + xchg(al, ah); + push(rax); + popf(); + break; + } +#endif + cmp(al, 0x81); + sahf(); + break; + case IR::Cond::AL: + case IR::Cond::NV: + break; + default: + ASSERT_MSG(false, "Unknown cond {}", static_cast<size_t>(cond)); + break; + } +} + +Xbyak::Address BlockOfCode::Const(const Xbyak::AddressFrame& frame, u64 lower, u64 upper) { + return constant_pool.GetConstant(frame, lower, upper); +} + +CodePtr BlockOfCode::GetCodeBegin() const { + return code_begin; +} + +size_t BlockOfCode::GetTotalCodeSize() const { + return maxSize_; +} + +void* BlockOfCode::AllocateFromCodeSpace(size_t alloc_size) { + if (size_ + alloc_size >= maxSize_) { + throw Xbyak::Error(Xbyak::ERR_CODE_IS_TOO_BIG); + } + + EnsureMemoryCommitted(alloc_size); + + void* ret = getCurr<void*>(); + size_ += alloc_size; + memset(ret, 0, alloc_size); + return ret; +} + +void BlockOfCode::SetCodePtr(CodePtr code_ptr) { + // The "size" defines where top_, the insertion point, is. + size_t required_size = reinterpret_cast<const u8*>(code_ptr) - getCode(); + setSize(required_size); +} + +void BlockOfCode::EnsurePatchLocationSize(CodePtr begin, size_t size) { + size_t current_size = getCurr<const u8*>() - reinterpret_cast<const u8*>(begin); + ASSERT(current_size <= size); + nop(size - current_size); +} + +} // namespace Dynarmic::Backend::X64 |