aboutsummaryrefslogtreecommitdiff
path: root/externals/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'externals/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp')
-rw-r--r--externals/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp540
1 files changed, 540 insertions, 0 deletions
diff --git a/externals/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp b/externals/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp
new file mode 100644
index 0000000000..22d9868fc5
--- /dev/null
+++ b/externals/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp
@@ -0,0 +1,540 @@
+/* This file is part of the dynarmic project.
+ * Copyright (c) 2016 MerryMage
+ * SPDX-License-Identifier: 0BSD
+ */
+
+#include "dynarmic/backend/x64/block_of_code.h"
+
+#ifdef _WIN32
+# define WIN32_LEAN_AND_MEAN
+# include <windows.h>
+#else
+# include <sys/mman.h>
+#endif
+
+#ifdef __APPLE__
+# include <errno.h>
+# include <fmt/format.h>
+# include <sys/sysctl.h>
+#endif
+
+#include <array>
+#include <cstring>
+
+#include <mcl/assert.hpp>
+#include <mcl/bit/bit_field.hpp>
+#include <xbyak/xbyak.h>
+
+#include "dynarmic/backend/x64/a32_jitstate.h"
+#include "dynarmic/backend/x64/abi.h"
+#include "dynarmic/backend/x64/hostloc.h"
+#include "dynarmic/backend/x64/perf_map.h"
+#include "dynarmic/backend/x64/stack_layout.h"
+
+namespace Dynarmic::Backend::X64 {
+
+#ifdef _WIN32
+const Xbyak::Reg64 BlockOfCode::ABI_RETURN = HostLocToReg64(Dynarmic::Backend::X64::ABI_RETURN);
+const Xbyak::Reg64 BlockOfCode::ABI_PARAM1 = HostLocToReg64(Dynarmic::Backend::X64::ABI_PARAM1);
+const Xbyak::Reg64 BlockOfCode::ABI_PARAM2 = HostLocToReg64(Dynarmic::Backend::X64::ABI_PARAM2);
+const Xbyak::Reg64 BlockOfCode::ABI_PARAM3 = HostLocToReg64(Dynarmic::Backend::X64::ABI_PARAM3);
+const Xbyak::Reg64 BlockOfCode::ABI_PARAM4 = HostLocToReg64(Dynarmic::Backend::X64::ABI_PARAM4);
+const std::array<Xbyak::Reg64, ABI_PARAM_COUNT> BlockOfCode::ABI_PARAMS = {BlockOfCode::ABI_PARAM1, BlockOfCode::ABI_PARAM2, BlockOfCode::ABI_PARAM3, BlockOfCode::ABI_PARAM4};
+#else
+const Xbyak::Reg64 BlockOfCode::ABI_RETURN = HostLocToReg64(Dynarmic::Backend::X64::ABI_RETURN);
+const Xbyak::Reg64 BlockOfCode::ABI_RETURN2 = HostLocToReg64(Dynarmic::Backend::X64::ABI_RETURN2);
+const Xbyak::Reg64 BlockOfCode::ABI_PARAM1 = HostLocToReg64(Dynarmic::Backend::X64::ABI_PARAM1);
+const Xbyak::Reg64 BlockOfCode::ABI_PARAM2 = HostLocToReg64(Dynarmic::Backend::X64::ABI_PARAM2);
+const Xbyak::Reg64 BlockOfCode::ABI_PARAM3 = HostLocToReg64(Dynarmic::Backend::X64::ABI_PARAM3);
+const Xbyak::Reg64 BlockOfCode::ABI_PARAM4 = HostLocToReg64(Dynarmic::Backend::X64::ABI_PARAM4);
+const Xbyak::Reg64 BlockOfCode::ABI_PARAM5 = HostLocToReg64(Dynarmic::Backend::X64::ABI_PARAM5);
+const Xbyak::Reg64 BlockOfCode::ABI_PARAM6 = HostLocToReg64(Dynarmic::Backend::X64::ABI_PARAM6);
+const std::array<Xbyak::Reg64, ABI_PARAM_COUNT> BlockOfCode::ABI_PARAMS = {BlockOfCode::ABI_PARAM1, BlockOfCode::ABI_PARAM2, BlockOfCode::ABI_PARAM3, BlockOfCode::ABI_PARAM4, BlockOfCode::ABI_PARAM5, BlockOfCode::ABI_PARAM6};
+#endif
+
+namespace {
+
+constexpr size_t CONSTANT_POOL_SIZE = 2 * 1024 * 1024;
+constexpr size_t PRELUDE_COMMIT_SIZE = 16 * 1024 * 1024;
+
+class CustomXbyakAllocator : public Xbyak::Allocator {
+public:
+#ifdef _WIN32
+ uint8_t* alloc(size_t size) override {
+ void* p = VirtualAlloc(nullptr, size, MEM_RESERVE, PAGE_READWRITE);
+ if (p == nullptr) {
+ throw Xbyak::Error(Xbyak::ERR_CANT_ALLOC);
+ }
+ return static_cast<uint8_t*>(p);
+ }
+
+ void free(uint8_t* p) override {
+ VirtualFree(static_cast<void*>(p), 0, MEM_RELEASE);
+ }
+
+ bool useProtect() const override { return false; }
+#else
+ static constexpr size_t DYNARMIC_PAGE_SIZE = 4096;
+
+ // Can't subclass Xbyak::MmapAllocator because it is not a pure interface
+ // and doesn't expose its construtor
+ uint8_t* alloc(size_t size) override {
+ // Waste a page to store the size
+ size += DYNARMIC_PAGE_SIZE;
+
+# if defined(MAP_ANONYMOUS)
+ int mode = MAP_PRIVATE | MAP_ANONYMOUS;
+# elif defined(MAP_ANON)
+ int mode = MAP_PRIVATE | MAP_ANON;
+# else
+# error "not supported"
+# endif
+# ifdef MAP_JIT
+ mode |= MAP_JIT;
+# endif
+
+ void* p = mmap(nullptr, size, PROT_READ | PROT_WRITE, mode, -1, 0);
+ if (p == MAP_FAILED) {
+ throw Xbyak::Error(Xbyak::ERR_CANT_ALLOC);
+ }
+ std::memcpy(p, &size, sizeof(size_t));
+ return static_cast<uint8_t*>(p) + DYNARMIC_PAGE_SIZE;
+ }
+
+ void free(uint8_t* p) override {
+ size_t size;
+ std::memcpy(&size, p - DYNARMIC_PAGE_SIZE, sizeof(size_t));
+ munmap(p - DYNARMIC_PAGE_SIZE, size);
+ }
+
+# ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT
+ bool useProtect() const override { return false; }
+# endif
+#endif
+};
+
+// This is threadsafe as Xbyak::Allocator does not contain any state; it is a pure interface.
+CustomXbyakAllocator s_allocator;
+
+#ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT
+void ProtectMemory(const void* base, size_t size, bool is_executable) {
+# ifdef _WIN32
+ DWORD oldProtect = 0;
+ VirtualProtect(const_cast<void*>(base), size, is_executable ? PAGE_EXECUTE_READ : PAGE_READWRITE, &oldProtect);
+# else
+ static const size_t pageSize = sysconf(_SC_PAGESIZE);
+ const size_t iaddr = reinterpret_cast<size_t>(base);
+ const size_t roundAddr = iaddr & ~(pageSize - static_cast<size_t>(1));
+ const int mode = is_executable ? (PROT_READ | PROT_EXEC) : (PROT_READ | PROT_WRITE);
+ mprotect(reinterpret_cast<void*>(roundAddr), size + (iaddr - roundAddr), mode);
+# endif
+}
+#endif
+
+HostFeature GetHostFeatures() {
+ HostFeature features = {};
+
+#ifdef DYNARMIC_ENABLE_CPU_FEATURE_DETECTION
+ using Cpu = Xbyak::util::Cpu;
+ Xbyak::util::Cpu cpu_info;
+
+ if (cpu_info.has(Cpu::tSSSE3))
+ features |= HostFeature::SSSE3;
+ if (cpu_info.has(Cpu::tSSE41))
+ features |= HostFeature::SSE41;
+ if (cpu_info.has(Cpu::tSSE42))
+ features |= HostFeature::SSE42;
+ if (cpu_info.has(Cpu::tAVX))
+ features |= HostFeature::AVX;
+ if (cpu_info.has(Cpu::tAVX2))
+ features |= HostFeature::AVX2;
+ if (cpu_info.has(Cpu::tAVX512F))
+ features |= HostFeature::AVX512F;
+ if (cpu_info.has(Cpu::tAVX512CD))
+ features |= HostFeature::AVX512CD;
+ if (cpu_info.has(Cpu::tAVX512VL))
+ features |= HostFeature::AVX512VL;
+ if (cpu_info.has(Cpu::tAVX512BW))
+ features |= HostFeature::AVX512BW;
+ if (cpu_info.has(Cpu::tAVX512DQ))
+ features |= HostFeature::AVX512DQ;
+ if (cpu_info.has(Cpu::tAVX512_BITALG))
+ features |= HostFeature::AVX512BITALG;
+ if (cpu_info.has(Cpu::tAVX512VBMI))
+ features |= HostFeature::AVX512VBMI;
+ if (cpu_info.has(Cpu::tPCLMULQDQ))
+ features |= HostFeature::PCLMULQDQ;
+ if (cpu_info.has(Cpu::tF16C))
+ features |= HostFeature::F16C;
+ if (cpu_info.has(Cpu::tFMA))
+ features |= HostFeature::FMA;
+ if (cpu_info.has(Cpu::tAESNI))
+ features |= HostFeature::AES;
+ if (cpu_info.has(Cpu::tSHA))
+ features |= HostFeature::SHA;
+ if (cpu_info.has(Cpu::tPOPCNT))
+ features |= HostFeature::POPCNT;
+ if (cpu_info.has(Cpu::tBMI1))
+ features |= HostFeature::BMI1;
+ if (cpu_info.has(Cpu::tBMI2))
+ features |= HostFeature::BMI2;
+ if (cpu_info.has(Cpu::tLZCNT))
+ features |= HostFeature::LZCNT;
+ if (cpu_info.has(Cpu::tGFNI))
+ features |= HostFeature::GFNI;
+
+ if (cpu_info.has(Cpu::tBMI2)) {
+ // BMI2 instructions such as pdep and pext have been very slow up until Zen 3.
+ // Check for Zen 3 or newer by its family (0x19).
+ // See also: https://en.wikichip.org/wiki/amd/cpuid
+ if (cpu_info.has(Cpu::tAMD)) {
+ std::array<u32, 4> data{};
+ cpu_info.getCpuid(1, data.data());
+ const u32 family_base = mcl::bit::get_bits<8, 11>(data[0]);
+ const u32 family_extended = mcl::bit::get_bits<20, 27>(data[0]);
+ const u32 family = family_base + family_extended;
+ if (family >= 0x19)
+ features |= HostFeature::FastBMI2;
+ } else {
+ features |= HostFeature::FastBMI2;
+ }
+ }
+#endif
+
+ return features;
+}
+
+#ifdef __APPLE__
+bool IsUnderRosetta() {
+ int result = 0;
+ size_t result_size = sizeof(result);
+ if (sysctlbyname("sysctl.proc_translated", &result, &result_size, nullptr, 0) == -1) {
+ if (errno != ENOENT)
+ fmt::print("IsUnderRosetta: Failed to detect Rosetta state, assuming not under Rosetta");
+ return false;
+ }
+ return result != 0;
+}
+#endif
+
+} // anonymous namespace
+
+BlockOfCode::BlockOfCode(RunCodeCallbacks cb, JitStateInfo jsi, size_t total_code_size, std::function<void(BlockOfCode&)> rcp)
+ : Xbyak::CodeGenerator(total_code_size, nullptr, &s_allocator)
+ , cb(std::move(cb))
+ , jsi(jsi)
+ , constant_pool(*this, CONSTANT_POOL_SIZE)
+ , host_features(GetHostFeatures()) {
+ EnableWriting();
+ EnsureMemoryCommitted(PRELUDE_COMMIT_SIZE);
+ GenRunCode(rcp);
+}
+
+void BlockOfCode::PreludeComplete() {
+ prelude_complete = true;
+ code_begin = getCurr();
+ ClearCache();
+ DisableWriting();
+}
+
+void BlockOfCode::EnableWriting() {
+#ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT
+# ifdef _WIN32
+ ProtectMemory(getCode(), committed_size, false);
+# else
+ ProtectMemory(getCode(), maxSize_, false);
+# endif
+#endif
+}
+
+void BlockOfCode::DisableWriting() {
+#ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT
+# ifdef _WIN32
+ ProtectMemory(getCode(), committed_size, true);
+# else
+ ProtectMemory(getCode(), maxSize_, true);
+# endif
+#endif
+}
+
+void BlockOfCode::ClearCache() {
+ ASSERT(prelude_complete);
+ SetCodePtr(code_begin);
+}
+
+size_t BlockOfCode::SpaceRemaining() const {
+ ASSERT(prelude_complete);
+ const u8* current_ptr = getCurr<const u8*>();
+ if (current_ptr >= &top_[maxSize_])
+ return 0;
+ return &top_[maxSize_] - current_ptr;
+}
+
+void BlockOfCode::EnsureMemoryCommitted([[maybe_unused]] size_t codesize) {
+#ifdef _WIN32
+ if (committed_size < size_ + codesize) {
+ committed_size = std::min<size_t>(maxSize_, committed_size + codesize);
+# ifdef DYNARMIC_ENABLE_NO_EXECUTE_SUPPORT
+ VirtualAlloc(top_, committed_size, MEM_COMMIT, PAGE_READWRITE);
+# else
+ VirtualAlloc(top_, committed_size, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
+# endif
+ }
+#endif
+}
+
+HaltReason BlockOfCode::RunCode(void* jit_state, CodePtr code_ptr) const {
+ return run_code(jit_state, code_ptr);
+}
+
+HaltReason BlockOfCode::StepCode(void* jit_state, CodePtr code_ptr) const {
+ return step_code(jit_state, code_ptr);
+}
+
+void BlockOfCode::ReturnFromRunCode(bool mxcsr_already_exited) {
+ size_t index = 0;
+ if (mxcsr_already_exited)
+ index |= MXCSR_ALREADY_EXITED;
+ jmp(return_from_run_code[index]);
+}
+
+void BlockOfCode::ForceReturnFromRunCode(bool mxcsr_already_exited) {
+ size_t index = FORCE_RETURN;
+ if (mxcsr_already_exited)
+ index |= MXCSR_ALREADY_EXITED;
+ jmp(return_from_run_code[index]);
+}
+
+void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
+ Xbyak::Label return_to_caller, return_to_caller_mxcsr_already_exited;
+
+ align();
+ run_code = getCurr<RunCodeFuncType>();
+
+ // This serves two purposes:
+ // 1. It saves all the registers we as a callee need to save.
+ // 2. It aligns the stack so that the code the JIT emits can assume
+ // that the stack is appropriately aligned for CALLs.
+ ABI_PushCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout));
+
+ mov(r15, ABI_PARAM1);
+ mov(rbx, ABI_PARAM2); // save temporarily in non-volatile register
+
+ if (cb.enable_cycle_counting) {
+ cb.GetTicksRemaining->EmitCall(*this);
+ mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], ABI_RETURN);
+ mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], ABI_RETURN);
+ }
+
+ rcp(*this);
+
+ cmp(dword[r15 + jsi.offsetof_halt_reason], 0);
+ jne(return_to_caller_mxcsr_already_exited, T_NEAR);
+
+ SwitchMxcsrOnEntry();
+ jmp(rbx);
+
+ align();
+ step_code = getCurr<RunCodeFuncType>();
+
+ ABI_PushCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout));
+
+ mov(r15, ABI_PARAM1);
+
+ if (cb.enable_cycle_counting) {
+ mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], 1);
+ mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 1);
+ }
+
+ rcp(*this);
+
+ cmp(dword[r15 + jsi.offsetof_halt_reason], 0);
+ jne(return_to_caller_mxcsr_already_exited, T_NEAR);
+ lock();
+ or_(dword[r15 + jsi.offsetof_halt_reason], static_cast<u32>(HaltReason::Step));
+
+ SwitchMxcsrOnEntry();
+ jmp(ABI_PARAM2);
+
+ // Dispatcher loop
+
+ align();
+ return_from_run_code[0] = getCurr<const void*>();
+
+ cmp(dword[r15 + jsi.offsetof_halt_reason], 0);
+ jne(return_to_caller);
+ if (cb.enable_cycle_counting) {
+ cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
+ jng(return_to_caller);
+ }
+ cb.LookupBlock->EmitCall(*this);
+ jmp(ABI_RETURN);
+
+ align();
+ return_from_run_code[MXCSR_ALREADY_EXITED] = getCurr<const void*>();
+
+ cmp(dword[r15 + jsi.offsetof_halt_reason], 0);
+ jne(return_to_caller_mxcsr_already_exited);
+ if (cb.enable_cycle_counting) {
+ cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
+ jng(return_to_caller_mxcsr_already_exited);
+ }
+ SwitchMxcsrOnEntry();
+ cb.LookupBlock->EmitCall(*this);
+ jmp(ABI_RETURN);
+
+ align();
+ return_from_run_code[FORCE_RETURN] = getCurr<const void*>();
+ L(return_to_caller);
+
+ SwitchMxcsrOnExit();
+ // fallthrough
+
+ return_from_run_code[MXCSR_ALREADY_EXITED | FORCE_RETURN] = getCurr<const void*>();
+ L(return_to_caller_mxcsr_already_exited);
+
+ if (cb.enable_cycle_counting) {
+ cb.AddTicks->EmitCall(*this, [this](RegList param) {
+ mov(param[0], qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)]);
+ sub(param[0], qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)]);
+ });
+ }
+
+ xor_(eax, eax);
+ lock();
+ xchg(dword[r15 + jsi.offsetof_halt_reason], eax);
+
+ ABI_PopCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout));
+ ret();
+
+ PerfMapRegister(run_code, getCurr(), "dynarmic_dispatcher");
+}
+
+void BlockOfCode::SwitchMxcsrOnEntry() {
+ stmxcsr(dword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, save_host_MXCSR)]);
+ ldmxcsr(dword[r15 + jsi.offsetof_guest_MXCSR]);
+}
+
+void BlockOfCode::SwitchMxcsrOnExit() {
+ stmxcsr(dword[r15 + jsi.offsetof_guest_MXCSR]);
+ ldmxcsr(dword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, save_host_MXCSR)]);
+}
+
+void BlockOfCode::EnterStandardASIMD() {
+ stmxcsr(dword[r15 + jsi.offsetof_guest_MXCSR]);
+ ldmxcsr(dword[r15 + jsi.offsetof_asimd_MXCSR]);
+}
+
+void BlockOfCode::LeaveStandardASIMD() {
+ stmxcsr(dword[r15 + jsi.offsetof_asimd_MXCSR]);
+ ldmxcsr(dword[r15 + jsi.offsetof_guest_MXCSR]);
+}
+
+void BlockOfCode::UpdateTicks() {
+ if (!cb.enable_cycle_counting) {
+ return;
+ }
+
+ cb.AddTicks->EmitCall(*this, [this](RegList param) {
+ mov(param[0], qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)]);
+ sub(param[0], qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)]);
+ });
+
+ cb.GetTicksRemaining->EmitCall(*this);
+ mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], ABI_RETURN);
+ mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], ABI_RETURN);
+}
+
+void BlockOfCode::LookupBlock() {
+ cb.LookupBlock->EmitCall(*this);
+}
+
+void BlockOfCode::LoadRequiredFlagsForCondFromRax(IR::Cond cond) {
+#ifdef __APPLE__
+ static const bool is_rosetta = IsUnderRosetta();
+#endif
+
+ // sahf restores SF, ZF, CF
+ // add al, 0x7F restores OF
+
+ switch (cond) {
+ case IR::Cond::EQ: // z
+ case IR::Cond::NE: // !z
+ case IR::Cond::CS: // c
+ case IR::Cond::CC: // !c
+ case IR::Cond::MI: // n
+ case IR::Cond::PL: // !n
+ sahf();
+ break;
+ case IR::Cond::VS: // v
+ case IR::Cond::VC: // !v
+ cmp(al, 0x81);
+ break;
+ case IR::Cond::HI: // c & !z
+ case IR::Cond::LS: // !c | z
+ sahf();
+ cmc();
+ break;
+ case IR::Cond::GE: // n == v
+ case IR::Cond::LT: // n != v
+ case IR::Cond::GT: // !z & (n == v)
+ case IR::Cond::LE: // z | (n != v)
+#ifdef __APPLE__
+ if (is_rosetta) {
+ shl(al, 3);
+ xchg(al, ah);
+ push(rax);
+ popf();
+ break;
+ }
+#endif
+ cmp(al, 0x81);
+ sahf();
+ break;
+ case IR::Cond::AL:
+ case IR::Cond::NV:
+ break;
+ default:
+ ASSERT_MSG(false, "Unknown cond {}", static_cast<size_t>(cond));
+ break;
+ }
+}
+
+Xbyak::Address BlockOfCode::Const(const Xbyak::AddressFrame& frame, u64 lower, u64 upper) {
+ return constant_pool.GetConstant(frame, lower, upper);
+}
+
+CodePtr BlockOfCode::GetCodeBegin() const {
+ return code_begin;
+}
+
+size_t BlockOfCode::GetTotalCodeSize() const {
+ return maxSize_;
+}
+
+void* BlockOfCode::AllocateFromCodeSpace(size_t alloc_size) {
+ if (size_ + alloc_size >= maxSize_) {
+ throw Xbyak::Error(Xbyak::ERR_CODE_IS_TOO_BIG);
+ }
+
+ EnsureMemoryCommitted(alloc_size);
+
+ void* ret = getCurr<void*>();
+ size_ += alloc_size;
+ memset(ret, 0, alloc_size);
+ return ret;
+}
+
+void BlockOfCode::SetCodePtr(CodePtr code_ptr) {
+ // The "size" defines where top_, the insertion point, is.
+ size_t required_size = reinterpret_cast<const u8*>(code_ptr) - getCode();
+ setSize(required_size);
+}
+
+void BlockOfCode::EnsurePatchLocationSize(CodePtr begin, size_t size) {
+ size_t current_size = getCurr<const u8*>() - reinterpret_cast<const u8*>(begin);
+ ASSERT(current_size <= size);
+ nop(size - current_size);
+}
+
+} // namespace Dynarmic::Backend::X64