diff options
Diffstat (limited to 'src')
44 files changed, 631 insertions, 259 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 9aea4af878..04018233f9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -21,15 +21,29 @@ if (MSVC) # Ensure that projects build with Unicode support. add_definitions(-DUNICODE -D_UNICODE) - # /W3 - Level 3 warnings - # /MP - Multi-threaded compilation - # /Zi - Output debugging information - # /Zo - enhanced debug info for optimized builds - # /permissive- - enables stricter C++ standards conformance checks - # /EHsc - C++-only exception handling semantics - # /Zc:throwingNew - let codegen assume `operator new` will never return null - # /Zc:inline - let codegen omit inline functions in object files - add_compile_options(/W3 /MP /Zi /Zo /permissive- /EHsc /std:c++latest /Zc:throwingNew,inline) + # /W3 - Level 3 warnings + # /MP - Multi-threaded compilation + # /Zi - Output debugging information + # /Zo - Enhanced debug info for optimized builds + # /permissive- - Enables stricter C++ standards conformance checks + # /EHsc - C++-only exception handling semantics + # /volatile:iso - Use strict standards-compliant volatile semantics. + # /Zc:externConstexpr - Allow extern constexpr variables to have external linkage, like the standard mandates + # /Zc:inline - Let codegen omit inline functions in object files + # /Zc:throwingNew - Let codegen assume `operator new` (without std::nothrow) will never return null + add_compile_options( + /W3 + /MP + /Zi + /Zo + /permissive- + /EHsc + /std:c++latest + /volatile:iso + /Zc:externConstexpr + /Zc:inline + /Zc:throwingNew + ) # /GS- - No stack buffer overflow checks add_compile_options("$<$<CONFIG:Release>:/GS->") @@ -37,7 +51,10 @@ if (MSVC) set(CMAKE_EXE_LINKER_FLAGS_DEBUG "/DEBUG /MANIFEST:NO" CACHE STRING "" FORCE) set(CMAKE_EXE_LINKER_FLAGS_RELEASE "/DEBUG /MANIFEST:NO /INCREMENTAL:NO /OPT:REF,ICF" CACHE STRING "" FORCE) else() - add_compile_options("-Wno-attributes") + add_compile_options( + -Wall + -Wno-attributes + ) if (APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL Clang) add_compile_options("-stdlib=libc++") diff --git a/src/common/zstd_compression.cpp b/src/common/zstd_compression.cpp index 60a35c67cc..9785264925 100644 --- a/src/common/zstd_compression.cpp +++ b/src/common/zstd_compression.cpp @@ -2,8 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#pragma once - #include <algorithm> #include <zstd.h> diff --git a/src/core/frontend/emu_window.cpp b/src/core/frontend/emu_window.cpp index 1320bbe77c..eda466a5d4 100644 --- a/src/core/frontend/emu_window.cpp +++ b/src/core/frontend/emu_window.cpp @@ -10,6 +10,8 @@ namespace Core::Frontend { +GraphicsContext::~GraphicsContext() = default; + class EmuWindow::TouchState : public Input::Factory<Input::TouchDevice>, public std::enable_shared_from_this<TouchState> { public: diff --git a/src/core/frontend/emu_window.h b/src/core/frontend/emu_window.h index 70a5225568..e2c290dc16 100644 --- a/src/core/frontend/emu_window.h +++ b/src/core/frontend/emu_window.h @@ -19,6 +19,8 @@ namespace Core::Frontend { */ class GraphicsContext { public: + virtual ~GraphicsContext(); + /// Makes the graphics context current for the caller thread virtual void MakeCurrent() = 0; diff --git a/src/core/hle/service/audio/audctl.cpp b/src/core/hle/service/audio/audctl.cpp index f43e512e99..6a01d4d29b 100644 --- a/src/core/hle/service/audio/audctl.cpp +++ b/src/core/hle/service/audio/audctl.cpp @@ -50,7 +50,7 @@ void AudCtl::GetTargetVolumeMin(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Audio, "called."); // This service function is currently hardcoded on the - // actual console to this value (as of 6.0.0). + // actual console to this value (as of 8.0.0). constexpr s32 target_min_volume = 0; IPC::ResponseBuilder rb{ctx, 3}; @@ -62,7 +62,7 @@ void AudCtl::GetTargetVolumeMax(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Audio, "called."); // This service function is currently hardcoded on the - // actual console to this value (as of 6.0.0). + // actual console to this value (as of 8.0.0). constexpr s32 target_max_volume = 15; IPC::ResponseBuilder rb{ctx, 3}; diff --git a/src/core/memory.h b/src/core/memory.h index b9fa18b1db..04e2c5f1d1 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -72,15 +72,6 @@ u8* GetPointer(VAddr vaddr); std::string ReadCString(VAddr vaddr, std::size_t max_length); -enum class FlushMode { - /// Write back modified surfaces to RAM - Flush, - /// Remove region from the cache - Invalidate, - /// Write back modified surfaces to RAM, and also remove them from the cache - FlushAndInvalidate, -}; - /** * Mark each page touching the region as cached. */ diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index e1db068115..4b17bada5d 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp @@ -102,12 +102,6 @@ bool VerifyLogin(const std::string& username, const std::string& token) { } TelemetrySession::TelemetrySession() { -#ifdef ENABLE_WEB_SERVICE - backend = std::make_unique<WebService::TelemetryJson>( - Settings::values.web_api_url, Settings::values.yuzu_username, Settings::values.yuzu_token); -#else - backend = std::make_unique<Telemetry::NullVisitor>(); -#endif // Log one-time top-level information AddField(Telemetry::FieldType::None, "TelemetryId", GetTelemetryId()); @@ -175,9 +169,14 @@ TelemetrySession::~TelemetrySession() { .count()}; AddField(Telemetry::FieldType::Session, "Shutdown_Time", shutdown_time); +#ifdef ENABLE_WEB_SERVICE + auto backend = std::make_unique<WebService::TelemetryJson>( + Settings::values.web_api_url, Settings::values.yuzu_username, Settings::values.yuzu_token); +#else + auto backend = std::make_unique<Telemetry::NullVisitor>(); +#endif + // Complete the session, submitting to web service if necessary - // This is just a placeholder to wrap up the session once the core completes and this is - // destroyed. This will be moved elsewhere once we are actually doing real I/O with the service. field_collection.Accept(*backend); if (Settings::values.enable_telemetry) backend->Complete(); @@ -186,6 +185,8 @@ TelemetrySession::~TelemetrySession() { bool TelemetrySession::SubmitTestcase() { #ifdef ENABLE_WEB_SERVICE + auto backend = std::make_unique<WebService::TelemetryJson>( + Settings::values.web_api_url, Settings::values.yuzu_username, Settings::values.yuzu_token); field_collection.Accept(*backend); return backend->SubmitTestcase(); #else diff --git a/src/core/telemetry_session.h b/src/core/telemetry_session.h index 023612b79e..cae5a45a00 100644 --- a/src/core/telemetry_session.h +++ b/src/core/telemetry_session.h @@ -39,7 +39,6 @@ public: private: Telemetry::FieldCollection field_collection; ///< Tracks all added fields for the session - std::unique_ptr<Telemetry::VisitorInterface> backend; ///< Backend interface that logs fields }; /** diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 6821f275df..1e010e4da6 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -3,6 +3,8 @@ add_library(video_core STATIC dma_pusher.h debug_utils/debug_utils.cpp debug_utils/debug_utils.h + engines/engine_upload.cpp + engines/engine_upload.h engines/fermi_2d.cpp engines/fermi_2d.h engines/kepler_compute.cpp diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp new file mode 100644 index 0000000000..f8aa4ff55d --- /dev/null +++ b/src/video_core/engines/engine_upload.cpp @@ -0,0 +1,48 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "video_core/engines/engine_upload.h" +#include "video_core/memory_manager.h" +#include "video_core/textures/decoders.h" + +namespace Tegra::Engines::Upload { + +State::State(MemoryManager& memory_manager, Registers& regs) + : memory_manager(memory_manager), regs(regs) {} + +void State::ProcessExec(const bool is_linear) { + write_offset = 0; + copy_size = regs.line_length_in * regs.line_count; + inner_buffer.resize(copy_size); + this->is_linear = is_linear; +} + +void State::ProcessData(const u32 data, const bool is_last_call) { + const u32 sub_copy_size = std::min(4U, copy_size - write_offset); + std::memcpy(&inner_buffer[write_offset], &data, sub_copy_size); + write_offset += sub_copy_size; + if (!is_last_call) { + return; + } + const GPUVAddr address{regs.dest.Address()}; + if (is_linear) { + memory_manager.WriteBlock(address, inner_buffer.data(), copy_size); + } else { + UNIMPLEMENTED_IF(regs.dest.z != 0); + UNIMPLEMENTED_IF(regs.dest.depth != 1); + UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1); + UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1); + const std::size_t dst_size = Tegra::Texture::CalculateSize( + true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1); + tmp_buffer.resize(dst_size); + memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); + Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, regs.dest.y, + regs.dest.BlockHeight(), copy_size, inner_buffer.data(), + tmp_buffer.data()); + memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size); + } +} + +} // namespace Tegra::Engines::Upload diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h new file mode 100644 index 0000000000..9c6e0d21ca --- /dev/null +++ b/src/video_core/engines/engine_upload.h @@ -0,0 +1,75 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <cstddef> +#include <vector> +#include "common/bit_field.h" +#include "common/common_funcs.h" +#include "common/common_types.h" + +namespace Tegra { +class MemoryManager; +} + +namespace Tegra::Engines::Upload { + +struct Registers { + u32 line_length_in; + u32 line_count; + + struct { + u32 address_high; + u32 address_low; + u32 pitch; + union { + BitField<0, 4, u32> block_width; + BitField<4, 4, u32> block_height; + BitField<8, 4, u32> block_depth; + }; + u32 width; + u32 height; + u32 depth; + u32 z; + u32 x; + u32 y; + + GPUVAddr Address() const { + return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low); + } + + u32 BlockWidth() const { + return 1U << block_width.Value(); + } + + u32 BlockHeight() const { + return 1U << block_height.Value(); + } + + u32 BlockDepth() const { + return 1U << block_depth.Value(); + } + } dest; +}; + +class State { +public: + State(MemoryManager& memory_manager, Registers& regs); + ~State() = default; + + void ProcessExec(const bool is_linear); + void ProcessData(const u32 data, const bool is_last_call); + +private: + u32 write_offset = 0; + u32 copy_size = 0; + std::vector<u8> inner_buffer; + std::vector<u8> tmp_buffer; + bool is_linear = false; + Registers& regs; + MemoryManager& memory_manager; +}; + +} // namespace Tegra::Engines::Upload diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 2e51b7f13d..45f59a4d99 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -21,6 +21,12 @@ class RasterizerInterface; namespace Tegra::Engines { +/** + * This Engine is known as G80_2D. Documentation can be found in: + * https://github.com/envytools/envytools/blob/master/rnndb/graph/g80_2d.xml + * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h + */ + #define FERMI2D_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::Fermi2D::Regs, field_name) / sizeof(u32)) diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index b1d9504608..7404a8163c 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -4,12 +4,21 @@ #include "common/assert.h" #include "common/logging/log.h" +#include "core/core.h" #include "video_core/engines/kepler_compute.h" +#include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" +#include "video_core/rasterizer_interface.h" +#include "video_core/renderer_base.h" +#include "video_core/textures/decoders.h" namespace Tegra::Engines { -KeplerCompute::KeplerCompute(MemoryManager& memory_manager) : memory_manager{memory_manager} {} +KeplerCompute::KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer, + MemoryManager& memory_manager) + : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, upload_state{ + memory_manager, + regs.upload} {} KeplerCompute::~KeplerCompute() = default; @@ -20,14 +29,34 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) { regs.reg_array[method_call.method] = method_call.argument; switch (method_call.method) { + case KEPLER_COMPUTE_REG_INDEX(exec_upload): { + upload_state.ProcessExec(regs.exec_upload.linear != 0); + break; + } + case KEPLER_COMPUTE_REG_INDEX(data_upload): { + const bool is_last_call = method_call.IsLastCall(); + upload_state.ProcessData(method_call.argument, is_last_call); + if (is_last_call) { + system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); + } + break; + } case KEPLER_COMPUTE_REG_INDEX(launch): - // Abort execution since compute shaders can be used to alter game memory (e.g. CUDA - // kernels) - UNREACHABLE_MSG("Compute shaders are not implemented"); + ProcessLaunch(); break; default: break; } } +void KeplerCompute::ProcessLaunch() { + + const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); + memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, + LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32)); + + const GPUVAddr code_loc = regs.code_loc.Address() + launch_description.program_start; + LOG_WARNING(HW_GPU, "Compute Kernel Execute at Address 0x{:016x}, STUBBED", code_loc); +} + } // namespace Tegra::Engines diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index fb6cdf4329..5250b8d9ba 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -6,22 +6,40 @@ #include <array> #include <cstddef> +#include <vector> +#include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" +#include "video_core/engines/engine_upload.h" #include "video_core/gpu.h" +namespace Core { +class System; +} + namespace Tegra { class MemoryManager; } +namespace VideoCore { +class RasterizerInterface; +} + namespace Tegra::Engines { +/** + * This Engine is known as GK104_Compute. Documentation can be found in: + * https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_compute.xml + * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_compute.xml.h + */ + #define KEPLER_COMPUTE_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) class KeplerCompute final { public: - explicit KeplerCompute(MemoryManager& memory_manager); + explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer, + MemoryManager& memory_manager); ~KeplerCompute(); static constexpr std::size_t NumConstBuffers = 8; @@ -31,30 +49,181 @@ public: union { struct { - INSERT_PADDING_WORDS(0xAF); + INSERT_PADDING_WORDS(0x60); + + Upload::Registers upload; + + struct { + union { + BitField<0, 1, u32> linear; + }; + } exec_upload; + + u32 data_upload; + + INSERT_PADDING_WORDS(0x3F); + + struct { + u32 address; + GPUVAddr Address() const { + return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address) << 8)); + } + } launch_desc_loc; + + INSERT_PADDING_WORDS(0x1); u32 launch; - INSERT_PADDING_WORDS(0xC48); + INSERT_PADDING_WORDS(0x4A7); + + struct { + u32 address_high; + u32 address_low; + u32 limit; + GPUVAddr Address() const { + return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | + address_low); + } + } tsc; + + INSERT_PADDING_WORDS(0x3); + + struct { + u32 address_high; + u32 address_low; + u32 limit; + GPUVAddr Address() const { + return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | + address_low); + } + } tic; + + INSERT_PADDING_WORDS(0x22); + + struct { + u32 address_high; + u32 address_low; + GPUVAddr Address() const { + return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | + address_low); + } + } code_loc; + + INSERT_PADDING_WORDS(0x3FE); + + u32 texture_const_buffer_index; + + INSERT_PADDING_WORDS(0x374); }; std::array<u32, NUM_REGS> reg_array; }; } regs{}; + + struct LaunchParams { + static constexpr std::size_t NUM_LAUNCH_PARAMETERS = 0x40; + + INSERT_PADDING_WORDS(0x8); + + u32 program_start; + + INSERT_PADDING_WORDS(0x2); + + BitField<30, 1, u32> linked_tsc; + + BitField<0, 31, u32> grid_dim_x; + union { + BitField<0, 16, u32> grid_dim_y; + BitField<16, 16, u32> grid_dim_z; + }; + + INSERT_PADDING_WORDS(0x3); + + BitField<0, 16, u32> shared_alloc; + + BitField<0, 31, u32> block_dim_x; + union { + BitField<0, 16, u32> block_dim_y; + BitField<16, 16, u32> block_dim_z; + }; + + union { + BitField<0, 8, u32> const_buffer_enable_mask; + BitField<29, 2, u32> cache_layout; + } memory_config; + + INSERT_PADDING_WORDS(0x8); + + struct { + u32 address_low; + union { + BitField<0, 8, u32> address_high; + BitField<15, 17, u32> size; + }; + GPUVAddr Address() const { + return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high.Value()) << 32) | + address_low); + } + } const_buffer_config[8]; + + union { + BitField<0, 20, u32> local_pos_alloc; + BitField<27, 5, u32> barrier_alloc; + }; + + union { + BitField<0, 20, u32> local_neg_alloc; + BitField<24, 5, u32> gpr_alloc; + }; + + INSERT_PADDING_WORDS(0x11); + } launch_description; + + struct { + u32 write_offset = 0; + u32 copy_size = 0; + std::vector<u8> inner_buffer; + } state{}; + static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "KeplerCompute Regs has wrong size"); + static_assert(sizeof(LaunchParams) == LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32), + "KeplerCompute LaunchParams has wrong size"); + /// Write the value to the register identified by method. void CallMethod(const GPU::MethodCall& method_call); private: + Core::System& system; + VideoCore::RasterizerInterface& rasterizer; MemoryManager& memory_manager; + Upload::State upload_state; + + void ProcessLaunch(); }; #define ASSERT_REG_POSITION(field_name, position) \ static_assert(offsetof(KeplerCompute::Regs, field_name) == position * 4, \ "Field " #field_name " has invalid position") +#define ASSERT_LAUNCH_PARAM_POSITION(field_name, position) \ + static_assert(offsetof(KeplerCompute::LaunchParams, field_name) == position * 4, \ + "Field " #field_name " has invalid position") + +ASSERT_REG_POSITION(upload, 0x60); +ASSERT_REG_POSITION(exec_upload, 0x6C); +ASSERT_REG_POSITION(data_upload, 0x6D); ASSERT_REG_POSITION(launch, 0xAF); +ASSERT_REG_POSITION(tsc, 0x557); +ASSERT_REG_POSITION(tic, 0x55D); +ASSERT_REG_POSITION(code_loc, 0x582); +ASSERT_REG_POSITION(texture_const_buffer_index, 0x982); +ASSERT_LAUNCH_PARAM_POSITION(program_start, 0x8); +ASSERT_LAUNCH_PARAM_POSITION(grid_dim_x, 0xC); +ASSERT_LAUNCH_PARAM_POSITION(shared_alloc, 0x11); +ASSERT_LAUNCH_PARAM_POSITION(block_dim_x, 0x12); +ASSERT_LAUNCH_PARAM_POSITION(memory_config, 0x14); +ASSERT_LAUNCH_PARAM_POSITION(const_buffer_config, 0x1D); #undef ASSERT_REG_POSITION diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 7387886a3f..0561f676cb 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp @@ -14,9 +14,8 @@ namespace Tegra::Engines { -KeplerMemory::KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - MemoryManager& memory_manager) - : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {} +KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager) + : system{system}, memory_manager{memory_manager}, upload_state{memory_manager, regs.upload} {} KeplerMemory::~KeplerMemory() = default; @@ -28,46 +27,18 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) { switch (method_call.method) { case KEPLERMEMORY_REG_INDEX(exec): { - ProcessExec(); + upload_state.ProcessExec(regs.exec.linear != 0); break; } case KEPLERMEMORY_REG_INDEX(data): { - ProcessData(method_call.argument, method_call.IsLastCall()); + const bool is_last_call = method_call.IsLastCall(); + upload_state.ProcessData(method_call.argument, is_last_call); + if (is_last_call) { + system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); + } break; } } } -void KeplerMemory::ProcessExec() { - state.write_offset = 0; - state.copy_size = regs.line_length_in * regs.line_count; - state.inner_buffer.resize(state.copy_size); -} - -void KeplerMemory::ProcessData(u32 data, bool is_last_call) { - const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset); - std::memcpy(&state.inner_buffer[state.write_offset], ®s.data, sub_copy_size); - state.write_offset += sub_copy_size; - if (is_last_call) { - const GPUVAddr address{regs.dest.Address()}; - if (regs.exec.linear != 0) { - memory_manager.WriteBlock(address, state.inner_buffer.data(), state.copy_size); - } else { - UNIMPLEMENTED_IF(regs.dest.z != 0); - UNIMPLEMENTED_IF(regs.dest.depth != 1); - UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1); - UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1); - const std::size_t dst_size = Tegra::Texture::CalculateSize( - true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1); - std::vector<u8> tmp_buffer(dst_size); - memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); - Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, - regs.dest.y, regs.dest.BlockHeight(), state.copy_size, - state.inner_buffer.data(), tmp_buffer.data()); - memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size); - } - system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); - } -} - } // namespace Tegra::Engines diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h index 5f892ddad1..f3bc675a9d 100644 --- a/src/video_core/engines/kepler_memory.h +++ b/src/video_core/engines/kepler_memory.h @@ -10,6 +10,7 @@ #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" +#include "video_core/engines/engine_upload.h" #include "video_core/gpu.h" namespace Core { @@ -20,19 +21,20 @@ namespace Tegra { class MemoryManager; } -namespace VideoCore { -class RasterizerInterface; -} - namespace Tegra::Engines { +/** + * This Engine is known as P2MF. Documentation can be found in: + * https://github.com/envytools/envytools/blob/master/rnndb/graph/gk104_p2mf.xml + * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nve4_p2mf.xml.h + */ + #define KEPLERMEMORY_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::KeplerMemory::Regs, field_name) / sizeof(u32)) class KeplerMemory final { public: - KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - MemoryManager& memory_manager); + KeplerMemory(Core::System& system, MemoryManager& memory_manager); ~KeplerMemory(); /// Write the value to the register identified by method. @@ -45,42 +47,7 @@ public: struct { INSERT_PADDING_WORDS(0x60); - u32 line_length_in; - u32 line_count; - - struct { - u32 address_high; - u32 address_low; - u32 pitch; - union { - BitField<0, 4, u32> block_width; - BitField<4, 4, u32> block_height; - BitField<8, 4, u32> block_depth; - }; - u32 width; - u32 height; - u32 depth; - u32 z; - u32 x; - u32 y; - - GPUVAddr Address() const { - return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | - address_low); - } - - u32 BlockWidth() const { - return 1U << block_width.Value(); - } - - u32 BlockHeight() const { - return 1U << block_height.Value(); - } - - u32 BlockDepth() const { - return 1U << block_depth.Value(); - } - } dest; + Upload::Registers upload; struct { union { @@ -96,28 +63,17 @@ public: }; } regs{}; - struct { - u32 write_offset = 0; - u32 copy_size = 0; - std::vector<u8> inner_buffer; - } state{}; - private: Core::System& system; - VideoCore::RasterizerInterface& rasterizer; MemoryManager& memory_manager; - - void ProcessExec(); - void ProcessData(u32 data, bool is_last_call); + Upload::State upload_state; }; #define ASSERT_REG_POSITION(field_name, position) \ static_assert(offsetof(KeplerMemory::Regs, field_name) == position * 4, \ "Field " #field_name " has invalid position") -ASSERT_REG_POSITION(line_length_in, 0x60); -ASSERT_REG_POSITION(line_count, 0x61); -ASSERT_REG_POSITION(dest, 0x62); +ASSERT_REG_POSITION(upload, 0x60); ASSERT_REG_POSITION(exec, 0x6C); ASSERT_REG_POSITION(data, 0x6D); #undef ASSERT_REG_POSITION diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 9780417f20..d7b586db93 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -20,8 +20,8 @@ constexpr u32 MacroRegistersStart = 0xE00; Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager) - : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, macro_interpreter{ - *this} { + : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, + macro_interpreter{*this}, upload_state{memory_manager, regs.upload} { InitializeRegisterDefaults(); } @@ -253,6 +253,18 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { ProcessSyncPoint(); break; } + case MAXWELL3D_REG_INDEX(exec_upload): { + upload_state.ProcessExec(regs.exec_upload.linear != 0); + break; + } + case MAXWELL3D_REG_INDEX(data_upload): { + const bool is_last_call = method_call.IsLastCall(); + upload_state.ProcessData(method_call.argument, is_last_call); + if (is_last_call) { + dirty_flags.OnMemoryWrite(); + } + break; + } default: break; } diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 85d309d9ba..4883b582a3 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -14,6 +14,7 @@ #include "common/common_funcs.h" #include "common/common_types.h" #include "common/math_util.h" +#include "video_core/engines/engine_upload.h" #include "video_core/gpu.h" #include "video_core/macro_interpreter.h" #include "video_core/textures/texture.h" @@ -32,6 +33,12 @@ class RasterizerInterface; namespace Tegra::Engines { +/** + * This Engine is known as GF100_3D. Documentation can be found in: + * https://github.com/envytools/envytools/blob/master/rnndb/graph/gf100_3d.xml + * https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h + */ + #define MAXWELL3D_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) @@ -580,7 +587,18 @@ public: u32 bind; } macros; - INSERT_PADDING_WORDS(0x69); + INSERT_PADDING_WORDS(0x17); + + Upload::Registers upload; + struct { + union { + BitField<0, 1, u32> linear; + }; + } exec_upload; + + u32 data_upload; + + INSERT_PADDING_WORDS(0x44); struct { union { @@ -1176,6 +1194,8 @@ private: /// Interpreter for the macro codes uploaded to the GPU. MacroInterpreter macro_interpreter; + Upload::State upload_state; + /// Retrieves information about a specific TIC entry from the TIC buffer. Texture::TICEntry GetTICEntry(u32 tic_index) const; @@ -1219,6 +1239,9 @@ private: "Field " #field_name " has invalid position") ASSERT_REG_POSITION(macros, 0x45); +ASSERT_REG_POSITION(upload, 0x60); +ASSERT_REG_POSITION(exec_upload, 0x6C); +ASSERT_REG_POSITION(data_upload, 0x6D); ASSERT_REG_POSITION(sync_info, 0xB2); ASSERT_REG_POSITION(tfb_enabled, 0x1D1); ASSERT_REG_POSITION(rt, 0x200); diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 2426d00678..3a5dfef0c6 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -83,57 +83,66 @@ void MaxwellDMA::HandleCopy() { ASSERT(regs.exec.enable_2d == 1); - const std::size_t copy_size = regs.x_count * regs.y_count; + if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { + ASSERT(regs.src_params.size_z == 1); + // If the input is tiled and the output is linear, deswizzle the input and copy it over. + const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x; + const std::size_t src_size = Texture::CalculateSize( + true, src_bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y, + regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth()); - auto source_ptr{memory_manager.GetPointer(source)}; - auto dst_ptr{memory_manager.GetPointer(dest)}; + const std::size_t dst_size = regs.dst_pitch * regs.y_count; - if (!source_ptr) { - LOG_ERROR(HW_GPU, "source_ptr is invalid"); - return; - } + if (read_buffer.size() < src_size) { + read_buffer.resize(src_size); + } - if (!dst_ptr) { - LOG_ERROR(HW_GPU, "dst_ptr is invalid"); - return; - } + if (write_buffer.size() < dst_size) { + write_buffer.resize(dst_size); + } - const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { - // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated - // copying. - rasterizer.FlushRegion(ToCacheAddr(source_ptr), src_size); + memory_manager.ReadBlock(source, read_buffer.data(), src_size); + memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); - // We have to invalidate the destination region to evict any outdated surfaces from the - // cache. We do this before actually writing the new data because the destination address - // might contain a dirty surface that will have to be written back to memory. - rasterizer.InvalidateRegion(ToCacheAddr(dst_ptr), dst_size); - }; + Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, + regs.src_params.size_x, src_bytes_per_pixel, read_buffer.data(), + write_buffer.data(), regs.src_params.BlockHeight(), + regs.src_params.pos_x, regs.src_params.pos_y); - if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { - ASSERT(regs.src_params.size_z == 1); - // If the input is tiled and the output is linear, deswizzle the input and copy it over. + memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); + } else { + ASSERT(regs.dst_params.BlockDepth() == 1); - const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x; + const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count; - FlushAndInvalidate(regs.src_pitch * regs.src_params.size_y, - copy_size * src_bytes_per_pixel); + const std::size_t dst_size = Texture::CalculateSize( + true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, + regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth()); - Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, - regs.src_params.size_x, src_bytes_per_pixel, source_ptr, dst_ptr, - regs.src_params.BlockHeight(), regs.src_params.pos_x, - regs.src_params.pos_y); - } else { - ASSERT(regs.dst_params.size_z == 1); - ASSERT(regs.src_pitch == regs.x_count); + const std::size_t dst_layer_size = Texture::CalculateSize( + true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1, + regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth()); - const u32 src_bpp = regs.src_pitch / regs.x_count; + const std::size_t src_size = regs.src_pitch * regs.y_count; - FlushAndInvalidate(regs.src_pitch * regs.y_count, - regs.dst_params.size_x * regs.dst_params.size_y * src_bpp); + if (read_buffer.size() < src_size) { + read_buffer.resize(src_size); + } + + if (write_buffer.size() < dst_size) { + write_buffer.resize(dst_size); + } + + memory_manager.ReadBlock(source, read_buffer.data(), src_size); + memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); // If the input is linear and the output is tiled, swizzle the input and copy it over. Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, - src_bpp, dst_ptr, source_ptr, regs.dst_params.BlockHeight()); + src_bytes_per_pixel, + write_buffer.data() + dst_layer_size * regs.dst_params.pos_z, + read_buffer.data(), regs.dst_params.BlockHeight()); + + memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); } } diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index c6b6498420..e5942f671c 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -6,6 +6,7 @@ #include <array> #include <cstddef> +#include <vector> #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" @@ -25,6 +26,11 @@ class RasterizerInterface; namespace Tegra::Engines { +/** + * This Engine is known as GK104_Copy. Documentation can be found in: + * https://github.com/envytools/envytools/blob/master/rnndb/fifo/gk104_copy.xml + */ + class MaxwellDMA final { public: explicit MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer, @@ -63,6 +69,16 @@ public: static_assert(sizeof(Parameters) == 24, "Parameters has wrong size"); + enum class ComponentMode : u32 { + Src0 = 0, + Src1 = 1, + Src2 = 2, + Src3 = 3, + Const0 = 4, + Const1 = 5, + Zero = 6, + }; + enum class CopyMode : u32 { None = 0, Unk1 = 1, @@ -128,7 +144,26 @@ public: u32 x_count; u32 y_count; - INSERT_PADDING_WORDS(0xBB); + INSERT_PADDING_WORDS(0xB8); + + u32 const0; + u32 const1; + union { + BitField<0, 4, ComponentMode> component0; + BitField<4, 4, ComponentMode> component1; + BitField<8, 4, ComponentMode> component2; + BitField<12, 4, ComponentMode> component3; + BitField<16, 2, u32> component_size; + BitField<20, 3, u32> src_num_components; + BitField<24, 3, u32> dst_num_components; + + u32 SrcBytePerPixel() const { + return src_num_components.Value() * component_size.Value(); + } + u32 DstBytePerPixel() const { + return dst_num_components.Value() * component_size.Value(); + } + } swizzle_config; Parameters dst_params; @@ -149,6 +184,9 @@ private: MemoryManager& memory_manager; + std::vector<u8> read_buffer; + std::vector<u8> write_buffer; + /// Performs the copy from the source buffer to the destination buffer as configured in the /// registers. void HandleCopy(); @@ -165,6 +203,9 @@ ASSERT_REG_POSITION(src_pitch, 0x104); ASSERT_REG_POSITION(dst_pitch, 0x105); ASSERT_REG_POSITION(x_count, 0x106); ASSERT_REG_POSITION(y_count, 0x107); +ASSERT_REG_POSITION(const0, 0x1C0); +ASSERT_REG_POSITION(const1, 0x1C1); +ASSERT_REG_POSITION(swizzle_config, 0x1C2); ASSERT_REG_POSITION(dst_params, 0x1C3); ASSERT_REG_POSITION(src_params, 0x1CA); diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 4461083ffe..52706505b0 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -35,9 +35,9 @@ GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{ren dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); - kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager); + kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager); maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager); - kepler_memory = std::make_unique<Engines::KeplerMemory>(system, rasterizer, *memory_manager); + kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager); } GPU::~GPU() = default; diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index c9a2077de4..03856013fb 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -44,7 +44,7 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p renderer.Rasterizer().FlushRegion(data->addr, data->size); } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) { renderer.Rasterizer().InvalidateRegion(data->addr, data->size); - } else if (const auto data = std::get_if<EndProcessingCommand>(&next.data)) { + } else if (std::holds_alternative<EndProcessingCommand>(next.data)) { return; } else { UNREACHABLE(); diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 6c98c67012..5d8d126c18 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -25,6 +25,8 @@ MemoryManager::MemoryManager(VideoCore::RasterizerInterface& rasterizer) : raste UpdatePageTableForVMA(initial_vma); } +MemoryManager::~MemoryManager() = default; + GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) { const u64 aligned_size{Common::AlignUp(size, page_size)}; const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)}; @@ -199,11 +201,11 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const { return {}; } -bool MemoryManager::IsBlockContinous(const GPUVAddr start, const std::size_t size) { +bool MemoryManager::IsBlockContinuous(const GPUVAddr start, const std::size_t size) const { const GPUVAddr end = start + size; const auto host_ptr_start = reinterpret_cast<std::uintptr_t>(GetPointer(start)); const auto host_ptr_end = reinterpret_cast<std::uintptr_t>(GetPointer(end)); - const std::size_t range = static_cast<std::size_t>(host_ptr_end - host_ptr_start); + const auto range = static_cast<std::size_t>(host_ptr_end - host_ptr_start); return range == size; } diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index e4f0c4bd6e..113f9d8f3e 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -47,7 +47,8 @@ struct VirtualMemoryArea { class MemoryManager final { public: - MemoryManager(VideoCore::RasterizerInterface& rasterizer); + explicit MemoryManager(VideoCore::RasterizerInterface& rasterizer); + ~MemoryManager(); GPUVAddr AllocateSpace(u64 size, u64 align); GPUVAddr AllocateSpace(GPUVAddr addr, u64 size, u64 align); @@ -65,18 +66,18 @@ public: u8* GetPointer(GPUVAddr addr); const u8* GetPointer(GPUVAddr addr) const; - // Returns true if the block is continous in host memory, false otherwise - bool IsBlockContinous(const GPUVAddr start, const std::size_t size); + /// Returns true if the block is continuous in host memory, false otherwise + bool IsBlockContinuous(GPUVAddr start, std::size_t size) const; /** * ReadBlock and WriteBlock are full read and write operations over virtual - * GPU Memory. It's important to use these when GPU memory may not be continous + * GPU Memory. It's important to use these when GPU memory may not be continuous * in the Host Memory counterpart. Note: This functions cause Host GPU Memory * Flushes and Invalidations, respectively to each operation. */ - void ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const; - void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size); - void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size); + void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const; + void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); + void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size); /** * ReadBlockUnsafe and WriteBlockUnsafe are special versions of ReadBlock and @@ -88,9 +89,9 @@ public: * WriteBlockUnsafe instead of WriteBlock since it shouldn't invalidate the texture * being flushed. */ - void ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const; - void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size); - void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size); + void ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const; + void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); + void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size); private: using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>; @@ -111,10 +112,10 @@ private: /** * Maps an unmanaged host memory pointer at a given address. * - * @param target The guest address to start the mapping at. - * @param memory The memory to be mapped. - * @param size Size of the mapping. - * @param state MemoryState tag to attach to the VMA. + * @param target The guest address to start the mapping at. + * @param memory The memory to be mapped. + * @param size Size of the mapping in bytes. + * @param backing_addr The base address of the range to back this mapping. */ VMAHandle MapBackingMemory(GPUVAddr target, u8* memory, u64 size, VAddr backing_addr); @@ -124,7 +125,7 @@ private: /// Converts a VMAHandle to a mutable VMAIter. VMAIter StripIterConstness(const VMAHandle& iter); - /// Marks as the specfied VMA as allocated. + /// Marks as the specified VMA as allocated. VMAIter Allocate(VMAIter vma); /** diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h index 291772186d..f820f3ed94 100644 --- a/src/video_core/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache.h @@ -37,9 +37,6 @@ public: /// Gets the size of the shader in guest memory, required for cache management virtual std::size_t GetSizeInBytes() const = 0; - /// Wriets any cached resources back to memory - virtual void Flush() = 0; - /// Sets whether the cached object should be considered registered void SetIsRegistered(bool registered) { is_registered = registered; @@ -158,6 +155,8 @@ protected: return ++modified_ticks; } + virtual void FlushObjectInner(const T& object) = 0; + /// Flushes the specified object, updating appropriate cache state as needed void FlushObject(const T& object) { std::lock_guard lock{mutex}; @@ -165,7 +164,7 @@ protected: if (!object->IsDirty()) { return; } - object->Flush(); + FlushObjectInner(object); object->MarkAsModified(false, *this); } diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index fc33aa4332..f9247a40e4 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -42,9 +42,6 @@ public: return alignment; } - // We do not have to flush this cache as things in it are never modified by us. - void Flush() override {} - private: VAddr cpu_addr{}; std::size_t size{}; @@ -75,6 +72,9 @@ public: protected: void AlignBuffer(std::size_t alignment); + // We do not have to flush this cache as things in it are never modified by us. + void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {} + private: OGLStreamBuffer stream_buffer; diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h index 196e6e278f..2d467a2401 100644 --- a/src/video_core/renderer_opengl/gl_global_cache.h +++ b/src/video_core/renderer_opengl/gl_global_cache.h @@ -46,7 +46,7 @@ public: /// Reloads the global region from guest memory void Reload(u32 size_); - void Flush() override; + void Flush(); private: VAddr cpu_addr{}; @@ -65,6 +65,11 @@ public: GlobalRegion GetGlobalRegion(const GLShader::GlobalMemoryEntry& descriptor, Tegra::Engines::Maxwell3D::Regs::ShaderStage stage); +protected: + void FlushObjectInner(const GlobalRegion& object) override { + object->Flush(); + } + private: GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const; GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, u32 size); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index db73e746c5..3cc9452354 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -922,8 +922,8 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { viewport.y = viewport_rect.bottom; viewport.width = viewport_rect.GetWidth(); viewport.height = viewport_rect.GetHeight(); - viewport.depth_range_far = regs.viewports[i].depth_range_far; - viewport.depth_range_near = regs.viewports[i].depth_range_near; + viewport.depth_range_far = src.depth_range_far; + viewport.depth_range_near = src.depth_range_near; } state.depth_clamp.far_plane = regs.view_volume_clip_control.depth_clamp_far != 0; state.depth_clamp.near_plane = regs.view_volume_clip_control.depth_clamp_near != 0; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 5a25f5b379..a7681902ef 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -628,9 +628,11 @@ CachedSurface::CachedSurface(const SurfaceParams& params) } MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64)); -void CachedSurface::LoadGLBuffer() { +void CachedSurface::LoadGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem) { MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); - gl_buffer.resize(params.max_mip_level); + auto& gl_buffer = res_cache_tmp_mem.gl_buffer; + if (gl_buffer.size() < params.max_mip_level) + gl_buffer.resize(params.max_mip_level); for (u32 i = 0; i < params.max_mip_level; i++) gl_buffer[i].resize(params.GetMipmapSizeGL(i)); if (params.is_tiled) { @@ -671,13 +673,13 @@ void CachedSurface::LoadGLBuffer() { } MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); -void CachedSurface::FlushGLBuffer() { +void CachedSurface::FlushGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem) { MICROPROFILE_SCOPE(OpenGL_SurfaceFlush); ASSERT_MSG(!IsPixelFormatASTC(params.pixel_format), "Unimplemented"); + auto& gl_buffer = res_cache_tmp_mem.gl_buffer; // OpenGL temporary buffer needs to be big enough to store raw texture size - gl_buffer.resize(1); gl_buffer[0].resize(GetSizeInBytes()); const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); @@ -713,10 +715,12 @@ void CachedSurface::FlushGLBuffer() { } } -void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, - GLuint draw_fb_handle) { +void CachedSurface::UploadGLMipmapTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, u32 mip_map, + GLuint read_fb_handle, GLuint draw_fb_handle) { const auto& rect{params.GetRect(mip_map)}; + auto& gl_buffer = res_cache_tmp_mem.gl_buffer; + // Load data from memory to the surface const auto x0 = static_cast<GLint>(rect.left); const auto y0 = static_cast<GLint>(rect.bottom); @@ -801,7 +805,6 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, tuple.type, &gl_buffer[mip_map][buffer_offset]); break; case SurfaceTarget::TextureCubemap: { - std::size_t start = buffer_offset; for (std::size_t face = 0; face < params.depth; ++face) { glTextureSubImage3D(texture.handle, mip_map, x0, y0, static_cast<GLint>(face), static_cast<GLsizei>(rect.GetWidth()), @@ -845,11 +848,12 @@ void CachedSurface::EnsureTextureDiscrepantView() { } MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64)); -void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) { +void CachedSurface::UploadGLTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, + GLuint read_fb_handle, GLuint draw_fb_handle) { MICROPROFILE_SCOPE(OpenGL_TextureUL); for (u32 i = 0; i < params.max_mip_level; i++) - UploadGLMipmapTexture(i, read_fb_handle, draw_fb_handle); + UploadGLMipmapTexture(res_cache_tmp_mem, i, read_fb_handle, draw_fb_handle); } void CachedSurface::UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x, @@ -929,8 +933,8 @@ Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool pre } void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) { - surface->LoadGLBuffer(); - surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); + surface->LoadGLBuffer(temporal_memory); + surface->UploadGLTexture(temporal_memory, read_framebuffer.handle, draw_framebuffer.handle); surface->MarkAsModified(false, *this); surface->MarkForReload(false); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index db280dbb38..6263ef3e75 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -355,6 +355,12 @@ namespace OpenGL { class RasterizerOpenGL; +// This is used to store temporary big buffers, +// instead of creating/destroying all the time +struct RasterizerTemporaryMemory { + std::vector<std::vector<u8>> gl_buffer; +}; + class CachedSurface final : public RasterizerCacheObject { public: explicit CachedSurface(const SurfaceParams& params); @@ -371,10 +377,6 @@ public: return memory_size; } - void Flush() override { - FlushGLBuffer(); - } - const OGLTexture& Texture() const { return texture; } @@ -397,11 +399,12 @@ public: } // Read/Write data in Switch memory to/from gl_buffer - void LoadGLBuffer(); - void FlushGLBuffer(); + void LoadGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem); + void FlushGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem); // Upload data in gl_buffer to this surface's texture - void UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle); + void UploadGLTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, GLuint read_fb_handle, + GLuint draw_fb_handle); void UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x, Tegra::Texture::SwizzleSource swizzle_y, @@ -429,13 +432,13 @@ public: } private: - void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle); + void UploadGLMipmapTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, u32 mip_map, + GLuint read_fb_handle, GLuint draw_fb_handle); void EnsureTextureDiscrepantView(); OGLTexture texture; OGLTexture discrepant_view; - std::vector<std::vector<u8>> gl_buffer; SurfaceParams params{}; GLenum gl_target{}; GLenum gl_internal_format{}; @@ -473,6 +476,11 @@ public: void SignalPreDrawCall(); void SignalPostDrawCall(); +protected: + void FlushObjectInner(const Surface& object) override { + object->FlushGLBuffer(temporal_memory); + } + private: void LoadSurface(const Surface& surface); Surface GetSurface(const SurfaceParams& params, bool preserve_contents = true); @@ -519,6 +527,8 @@ private: std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers; Surface last_depth_buffer; + RasterizerTemporaryMemory temporal_memory; + using SurfaceIntervalCache = boost::icl::interval_map<CacheAddr, Surface>; using SurfaceInterval = typename SurfaceIntervalCache::interval_type; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index b1c8f7c358..f700dc89a9 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -345,7 +345,7 @@ ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode, ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, const Device& device) - : RasterizerCache{rasterizer}, disk_cache{system}, device{device} {} + : RasterizerCache{rasterizer}, device{device}, disk_cache{system} {} void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback) { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index a332087f83..31b9799870 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -57,9 +57,6 @@ public: return shader_length; } - // We do not have to flush this cache as things in it are never modified by us. - void Flush() override {} - /// Gets the shader entries for the shader const GLShader::ShaderEntries& GetShaderEntries() const { return entries; @@ -123,6 +120,10 @@ public: /// Gets the current specified shader stage program Shader GetStageProgram(Maxwell::ShaderProgram program); +protected: + // We do not have to flush this cache as things in it are never modified by us. + void FlushObjectInner(const Shader& object) override {} + private: std::unordered_map<u64, UnspecializedShader> GenerateUnspecializedShaders( const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback, diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index ef1a1995f4..1a62795e1e 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -871,17 +871,6 @@ private: return {}; } - std::string Composite(Operation operation) { - std::string value = "vec4("; - for (std::size_t i = 0; i < 4; ++i) { - value += Visit(operation[i]); - if (i < 3) - value += ", "; - } - value += ')'; - return value; - } - template <Type type> std::string Add(Operation operation) { return GenerateBinaryInfix(operation, "+", type, type, type); diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index ed7afc4a00..254c0d4996 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -475,7 +475,10 @@ void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) { ASSERT_MSG(it != transferable.end(), "Saving shader usage without storing raw previously"); auto& usages{it->second}; - ASSERT(usages.find(usage) == usages.end()); + if (usages.find(usage) != usages.end()) { + // Skip this variant since the shader is already stored. + return; + } usages.insert(usage); FileUtil::IOFile file = AppendTransferableFile(); diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 08b786aade..3edf460df7 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -49,9 +49,6 @@ public: return alignment; } - // We do not have to flush this cache as things in it are never modified by us. - void Flush() override {} - private: VAddr cpu_addr{}; std::size_t size{}; @@ -87,6 +84,10 @@ public: return buffer_handle; } +protected: + // We do not have to flush this cache as things in it are never modified by us. + void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {} + private: void AlignBuffer(std::size_t alignment); diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 23d9b10db7..a11000f6b3 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -315,7 +315,6 @@ private: constexpr std::array<const char*, INTERNAL_FLAGS_COUNT> names = {"zero", "sign", "carry", "overflow"}; for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) { - const auto flag_code = static_cast<InternalFlag>(flag); const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); internal_flags[flag] = AddGlobalVariable(Name(id, names[flag])); } diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 819cc61319..5b033126d8 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -540,8 +540,6 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, bool is_array, bool is_aoffi) { const std::size_t coord_count = GetCoordCount(texture_type); - const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0); - const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0); // If enabled arrays index is always stored in the gpr8 field const u64 array_register = instr.gpr8.Value(); diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index b508d64e92..a9b8f69af1 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp @@ -25,8 +25,8 @@ class InputBitStream { public: - explicit InputBitStream(const unsigned char* ptr, int nBits = 0, int start_offset = 0) - : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {} + explicit InputBitStream(const unsigned char* ptr, int start_offset = 0) + : m_CurByte(ptr), m_NextBit(start_offset % 8) {} ~InputBitStream() = default; @@ -55,12 +55,9 @@ public: } private: - const int m_NumBits; const unsigned char* m_CurByte; int m_NextBit = 0; int m_BitsRead = 0; - - bool done = false; }; class OutputBitStream { @@ -114,7 +111,6 @@ private: const int m_NumBits; unsigned char* m_CurByte; int m_NextBit = 0; - int m_BitsRead = 0; bool done = false; }; @@ -1616,6 +1612,7 @@ namespace Tegra::Texture::ASTC { std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height, uint32_t depth, uint32_t block_width, uint32_t block_height) { uint32_t blockIdx = 0; + std::size_t depth_offset = 0; std::vector<uint8_t> outData(height * width * depth * 4); for (uint32_t k = 0; k < depth; k++) { for (uint32_t j = 0; j < height; j += block_height) { @@ -1630,7 +1627,7 @@ std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t he uint32_t decompWidth = std::min(block_width, width - i); uint32_t decompHeight = std::min(block_height, height - j); - uint8_t* outRow = outData.data() + (j * width + i) * 4; + uint8_t* outRow = depth_offset + outData.data() + (j * width + i) * 4; for (uint32_t jj = 0; jj < decompHeight; jj++) { memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4); } @@ -1638,6 +1635,7 @@ std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t he blockIdx++; } } + depth_offset += height * width * 4; } return outData; diff --git a/src/yuzu/compatdb.cpp b/src/yuzu/compatdb.cpp index c8b0a5ec08..5477f050ca 100644 --- a/src/yuzu/compatdb.cpp +++ b/src/yuzu/compatdb.cpp @@ -58,7 +58,7 @@ void CompatDB::Submit() { button(NextButton)->setEnabled(false); button(NextButton)->setText(tr("Submitting")); - button(QWizard::CancelButton)->setVisible(false); + button(CancelButton)->setVisible(false); testcase_watcher.setFuture(QtConcurrent::run( [] { return Core::System::GetInstance().TelemetrySession().SubmitTestcase(); })); @@ -74,12 +74,12 @@ void CompatDB::OnTestcaseSubmitted() { tr("An error occured while sending the Testcase")); button(NextButton)->setEnabled(true); button(NextButton)->setText(tr("Next")); - button(QWizard::CancelButton)->setVisible(true); + button(CancelButton)->setVisible(true); } else { next(); // older versions of QT don't support the "NoCancelButtonOnLastPage" option, this is a // workaround - button(QWizard::CancelButton)->setVisible(false); + button(CancelButton)->setVisible(false); } } diff --git a/src/yuzu/configuration/configure_dialog.cpp b/src/yuzu/configuration/configure_dialog.cpp index a5218b0517..32c05b7976 100644 --- a/src/yuzu/configuration/configure_dialog.cpp +++ b/src/yuzu/configuration/configure_dialog.cpp @@ -17,8 +17,12 @@ ConfigureDialog::ConfigureDialog(QWidget* parent, HotkeyRegistry& registry) ui->hotkeysTab->Populate(registry); this->setConfiguration(); this->PopulateSelectionList(); + + setWindowFlags(windowFlags() & ~Qt::WindowContextHelpButtonHint); + connect(ui->selectorList, &QListWidget::itemSelectionChanged, this, &ConfigureDialog::UpdateVisibleTabs); + adjustSize(); ui->selectorList->setCurrentRow(0); diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index c299c0b5ba..08ea41b0ff 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp @@ -69,16 +69,20 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent) ConfigureGraphics::~ConfigureGraphics() = default; void ConfigureGraphics::setConfiguration() { + const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn(); + ui->resolution_factor_combobox->setCurrentIndex( static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor))); ui->toggle_frame_limit->setChecked(Settings::values.use_frame_limit); ui->frame_limit->setValue(Settings::values.frame_limit); + ui->use_compatibility_profile->setEnabled(runtime_lock); ui->use_compatibility_profile->setChecked(Settings::values.use_compatibility_profile); + ui->use_disk_shader_cache->setEnabled(runtime_lock); ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache); ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation); - ui->use_asynchronous_gpu_emulation->setEnabled(!Core::System::GetInstance().IsPoweredOn()); + ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock); ui->use_asynchronous_gpu_emulation->setChecked(Settings::values.use_asynchronous_gpu_emulation); - ui->force_30fps_mode->setEnabled(!Core::System::GetInstance().IsPoweredOn()); + ui->force_30fps_mode->setEnabled(runtime_lock); ui->force_30fps_mode->setChecked(Settings::values.force_30fps_mode); UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue)); diff --git a/src/yuzu/hotkeys.h b/src/yuzu/hotkeys.h index 4f526dc7e8..248fadaf38 100644 --- a/src/yuzu/hotkeys.h +++ b/src/yuzu/hotkeys.h @@ -67,8 +67,6 @@ public: private: struct Hotkey { - Hotkey() : shortcut(nullptr), context(Qt::WindowShortcut) {} - QKeySequence keyseq; QShortcut* shortcut = nullptr; Qt::ShortcutContext context = Qt::WindowShortcut; diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp index 68a1760327..8f104062d0 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2.cpp @@ -176,9 +176,13 @@ EmuWindow_SDL2::EmuWindow_SDL2(bool fullscreen) { SDL_SetMainReady(); + const SDL_GLprofile profile = Settings::values.use_compatibility_profile + ? SDL_GL_CONTEXT_PROFILE_COMPATIBILITY + : SDL_GL_CONTEXT_PROFILE_CORE; + SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 4); SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 3); - SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE); + SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, profile); SDL_GL_SetAttribute(SDL_GL_DOUBLEBUFFER, 1); SDL_GL_SetAttribute(SDL_GL_RED_SIZE, 8); SDL_GL_SetAttribute(SDL_GL_GREEN_SIZE, 8); diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp index a1d7879b1c..d3734927b9 100644 --- a/src/yuzu_cmd/yuzu.cpp +++ b/src/yuzu_cmd/yuzu.cpp @@ -222,6 +222,7 @@ int main(int argc, char** argv) { system.TelemetrySession().AddField(Telemetry::FieldType::App, "Frontend", "SDL"); + emu_window->MakeCurrent(); system.Renderer().Rasterizer().LoadDiskResources(); while (emu_window->IsOpen()) { |