diff options
Diffstat (limited to 'src')
40 files changed, 412 insertions, 198 deletions
diff --git a/src/core/frontend/applets/controller.cpp b/src/core/frontend/applets/controller.cpp index 6dbd38ffae..e1033b6345 100644 --- a/src/core/frontend/applets/controller.cpp +++ b/src/core/frontend/applets/controller.cpp @@ -45,26 +45,26 @@ void DefaultControllerApplet::ReconfigureControllers(std::function<void()> callb // Pro Controller -> Dual Joycons -> Left Joycon/Right Joycon -> Handheld if (parameters.allow_pro_controller) { controller->SetNpadStyleIndex(Core::HID::NpadStyleIndex::ProController); - controller->Connect(); + controller->Connect(true); } else if (parameters.allow_dual_joycons) { controller->SetNpadStyleIndex(Core::HID::NpadStyleIndex::JoyconDual); - controller->Connect(); + controller->Connect(true); } else if (parameters.allow_left_joycon && parameters.allow_right_joycon) { // Assign left joycons to even player indices and right joycons to odd player indices. // We do this since Captain Toad Treasure Tracker expects a left joycon for Player 1 and // a right Joycon for Player 2 in 2 Player Assist mode. if (index % 2 == 0) { controller->SetNpadStyleIndex(Core::HID::NpadStyleIndex::JoyconLeft); - controller->Connect(); + controller->Connect(true); } else { controller->SetNpadStyleIndex(Core::HID::NpadStyleIndex::JoyconRight); - controller->Connect(); + controller->Connect(true); } } else if (index == 0 && parameters.enable_single_mode && parameters.allow_handheld && !Settings::values.use_docked_mode.GetValue()) { // We should *never* reach here under any normal circumstances. controller->SetNpadStyleIndex(Core::HID::NpadStyleIndex::Handheld); - controller->Connect(); + controller->Connect(true); } else { UNREACHABLE_MSG("Unable to add a new controller based on the given parameters!"); } diff --git a/src/core/hid/emulated_controller.cpp b/src/core/hid/emulated_controller.cpp index ff9d7a7e36..2d3fce2761 100644 --- a/src/core/hid/emulated_controller.cpp +++ b/src/core/hid/emulated_controller.cpp @@ -886,8 +886,9 @@ void EmulatedController::SetSupportedNpadStyleTag(NpadStyleTag supported_styles) } } -bool EmulatedController::IsControllerSupported() const { - switch (npad_type) { +bool EmulatedController::IsControllerSupported(bool use_temporary_value) const { + const auto type = is_configuring && use_temporary_value ? tmp_npad_type : npad_type; + switch (type) { case NpadStyleIndex::ProController: return supported_style_tag.fullkey; case NpadStyleIndex::Handheld: @@ -915,9 +916,10 @@ bool EmulatedController::IsControllerSupported() const { } } -void EmulatedController::Connect() { - if (!IsControllerSupported()) { - LOG_ERROR(Service_HID, "Controller type {} is not supported", npad_type); +void EmulatedController::Connect(bool use_temporary_value) { + if (!IsControllerSupported(use_temporary_value)) { + const auto type = is_configuring && use_temporary_value ? tmp_npad_type : npad_type; + LOG_ERROR(Service_HID, "Controller type {} is not supported", type); return; } { diff --git a/src/core/hid/emulated_controller.h b/src/core/hid/emulated_controller.h index e42aafebc4..d887eca870 100644 --- a/src/core/hid/emulated_controller.h +++ b/src/core/hid/emulated_controller.h @@ -167,8 +167,11 @@ public: */ void SetSupportedNpadStyleTag(NpadStyleTag supported_styles); - /// Sets the connected status to true - void Connect(); + /** + * Sets the connected status to true + * @param use_temporary_value If true tmp_npad_type will be used + */ + void Connect(bool use_temporary_value = false); /// Sets the connected status to false void Disconnect(); @@ -319,9 +322,10 @@ private: /** * Checks the current controller type against the supported_style_tag + * @param use_temporary_value If true tmp_npad_type will be used * @return true if the controller is supported */ - bool IsControllerSupported() const; + bool IsControllerSupported(bool use_temporary_value = false) const; /** * Updates the button status of the controller diff --git a/src/core/hle/kernel/k_memory_block.h b/src/core/hle/kernel/k_memory_block.h index fd491146fe..9e51c33ce0 100644 --- a/src/core/hle/kernel/k_memory_block.h +++ b/src/core/hle/kernel/k_memory_block.h @@ -120,7 +120,7 @@ static_assert(static_cast<u32>(KMemoryState::CodeOut) == 0x00402015); enum class KMemoryPermission : u8 { None = 0, - Mask = static_cast<u8>(~None), + All = static_cast<u8>(~None), Read = 1 << 0, Write = 1 << 1, diff --git a/src/core/hle/kernel/k_page_table.cpp b/src/core/hle/kernel/k_page_table.cpp index 99982e5a3b..4da509224d 100644 --- a/src/core/hle/kernel/k_page_table.cpp +++ b/src/core/hle/kernel/k_page_table.cpp @@ -264,9 +264,9 @@ ResultCode KPageTable::InitializeForProcess(FileSys::ProgramAddressSpaceType as_ ASSERT(heap_last < stack_start || stack_last < heap_start); ASSERT(heap_last < kmap_start || kmap_last < heap_start); - current_heap_addr = heap_region_start; - heap_capacity = 0; - physical_memory_usage = 0; + current_heap_end = heap_region_start; + max_heap_size = 0; + mapped_physical_memory_size = 0; memory_pool = pool; page_table_impl.Resize(address_space_width, PageBits); @@ -306,7 +306,7 @@ ResultCode KPageTable::MapProcessCodeMemory(VAddr dst_addr, VAddr src_addr, std: KMemoryState state{}; KMemoryPermission perm{}; CASCADE_CODE(CheckMemoryState(&state, &perm, nullptr, src_addr, size, KMemoryState::All, - KMemoryState::Normal, KMemoryPermission::Mask, + KMemoryState::Normal, KMemoryPermission::All, KMemoryPermission::ReadAndWrite, KMemoryAttribute::Mask, KMemoryAttribute::None, KMemoryAttribute::IpcAndDeviceMapped)); @@ -465,7 +465,7 @@ ResultCode KPageTable::MapPhysicalMemory(VAddr addr, std::size_t size) { MapPhysicalMemory(page_linked_list, addr, end_addr); - physical_memory_usage += remaining_size; + mapped_physical_memory_size += remaining_size; const std::size_t num_pages{size / PageSize}; block_manager->Update(addr, num_pages, KMemoryState::Free, KMemoryPermission::None, @@ -507,7 +507,7 @@ ResultCode KPageTable::UnmapPhysicalMemory(VAddr addr, std::size_t size) { auto process{system.Kernel().CurrentProcess()}; process->GetResourceLimit()->Release(LimitableResource::PhysicalMemory, mapped_size); - physical_memory_usage -= mapped_size; + mapped_physical_memory_size -= mapped_size; return ResultSuccess; } @@ -554,7 +554,7 @@ ResultCode KPageTable::Map(VAddr dst_addr, VAddr src_addr, std::size_t size) { KMemoryState src_state{}; CASCADE_CODE(CheckMemoryState( &src_state, nullptr, nullptr, src_addr, size, KMemoryState::FlagCanAlias, - KMemoryState::FlagCanAlias, KMemoryPermission::Mask, KMemoryPermission::ReadAndWrite, + KMemoryState::FlagCanAlias, KMemoryPermission::All, KMemoryPermission::ReadAndWrite, KMemoryAttribute::Mask, KMemoryAttribute::None, KMemoryAttribute::IpcAndDeviceMapped)); if (IsRegionMapped(dst_addr, size)) { @@ -593,7 +593,7 @@ ResultCode KPageTable::Unmap(VAddr dst_addr, VAddr src_addr, std::size_t size) { KMemoryState src_state{}; CASCADE_CODE(CheckMemoryState( &src_state, nullptr, nullptr, src_addr, size, KMemoryState::FlagCanAlias, - KMemoryState::FlagCanAlias, KMemoryPermission::Mask, KMemoryPermission::None, + KMemoryState::FlagCanAlias, KMemoryPermission::All, KMemoryPermission::None, KMemoryAttribute::Mask, KMemoryAttribute::Locked, KMemoryAttribute::IpcAndDeviceMapped)); KMemoryPermission dst_perm{}; @@ -784,7 +784,7 @@ ResultCode KPageTable::ReserveTransferMemory(VAddr addr, std::size_t size, KMemo CASCADE_CODE(CheckMemoryState( &state, nullptr, &attribute, addr, size, KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted, - KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted, KMemoryPermission::Mask, + KMemoryState::FlagCanTransfer | KMemoryState::FlagReferenceCounted, KMemoryPermission::All, KMemoryPermission::ReadAndWrite, KMemoryAttribute::Mask, KMemoryAttribute::None, KMemoryAttribute::IpcAndDeviceMapped)); @@ -806,6 +806,33 @@ ResultCode KPageTable::ResetTransferMemory(VAddr addr, std::size_t size) { KMemoryAttribute::Locked, KMemoryAttribute::IpcAndDeviceMapped)); block_manager->Update(addr, size / PageSize, state, KMemoryPermission::ReadAndWrite); + return ResultSuccess; +} + +ResultCode KPageTable::SetMemoryPermission(VAddr addr, std::size_t size, + Svc::MemoryPermission svc_perm) { + const size_t num_pages = size / PageSize; + + // Lock the table. + std::lock_guard lock{page_table_lock}; + + // Verify we can change the memory permission. + KMemoryState old_state; + KMemoryPermission old_perm; + R_TRY(this->CheckMemoryState( + std::addressof(old_state), std::addressof(old_perm), nullptr, addr, size, + KMemoryState::FlagCanReprotect, KMemoryState::FlagCanReprotect, KMemoryPermission::None, + KMemoryPermission::None, KMemoryAttribute::All, KMemoryAttribute::None)); + + // Determine new perm. + const KMemoryPermission new_perm = ConvertToKMemoryPermission(svc_perm); + R_SUCCEED_IF(old_perm == new_perm); + + // Perform mapping operation. + R_TRY(Operate(addr, num_pages, new_perm, OperationType::ChangePermissions)); + + // Update the blocks. + block_manager->Update(addr, num_pages, old_state, new_perm, KMemoryAttribute::None); return ResultSuccess; } @@ -832,61 +859,125 @@ ResultCode KPageTable::SetMemoryAttribute(VAddr addr, std::size_t size, KMemoryA return ResultSuccess; } -ResultCode KPageTable::SetHeapCapacity(std::size_t new_heap_capacity) { +ResultCode KPageTable::SetMaxHeapSize(std::size_t size) { + // Lock the table. std::lock_guard lock{page_table_lock}; - heap_capacity = new_heap_capacity; - return ResultSuccess; -} -ResultVal<VAddr> KPageTable::SetHeapSize(std::size_t size) { + // Only process page tables are allowed to set heap size. + ASSERT(!this->IsKernel()); - if (size > heap_region_end - heap_region_start) { - return ResultOutOfMemory; - } + max_heap_size = size; - const u64 previous_heap_size{GetHeapSize()}; - - UNIMPLEMENTED_IF_MSG(previous_heap_size > size, "Heap shrink is unimplemented"); + return ResultSuccess; +} - // Increase the heap size +ResultCode KPageTable::SetHeapSize(VAddr* out, std::size_t size) { + // Try to perform a reduction in heap, instead of an extension. + VAddr cur_address{}; + std::size_t allocation_size{}; { - std::lock_guard lock{page_table_lock}; - - const u64 delta{size - previous_heap_size}; - - // Reserve memory for the heap extension. - KScopedResourceReservation memory_reservation( - system.Kernel().CurrentProcess()->GetResourceLimit(), LimitableResource::PhysicalMemory, - delta); - - if (!memory_reservation.Succeeded()) { - LOG_ERROR(Kernel, "Could not reserve heap extension of size {:X} bytes", delta); - return ResultLimitReached; + // Lock the table. + std::lock_guard lk(page_table_lock); + + // Validate that setting heap size is possible at all. + R_UNLESS(!is_kernel, ResultOutOfMemory); + R_UNLESS(size <= static_cast<std::size_t>(heap_region_end - heap_region_start), + ResultOutOfMemory); + R_UNLESS(size <= max_heap_size, ResultOutOfMemory); + + if (size < GetHeapSize()) { + // The size being requested is less than the current size, so we need to free the end of + // the heap. + + // Validate memory state. + std::size_t num_allocator_blocks; + R_TRY(this->CheckMemoryState(std::addressof(num_allocator_blocks), + heap_region_start + size, GetHeapSize() - size, + KMemoryState::All, KMemoryState::Normal, + KMemoryPermission::All, KMemoryPermission::ReadAndWrite, + KMemoryAttribute::All, KMemoryAttribute::None)); + + // Unmap the end of the heap. + const auto num_pages = (GetHeapSize() - size) / PageSize; + R_TRY(Operate(heap_region_start + size, num_pages, KMemoryPermission::None, + OperationType::Unmap)); + + // Release the memory from the resource limit. + system.Kernel().CurrentProcess()->GetResourceLimit()->Release( + LimitableResource::PhysicalMemory, num_pages * PageSize); + + // Apply the memory block update. + block_manager->Update(heap_region_start + size, num_pages, KMemoryState::Free, + KMemoryPermission::None, KMemoryAttribute::None); + + // Update the current heap end. + current_heap_end = heap_region_start + size; + + // Set the output. + *out = heap_region_start; + return ResultSuccess; + } else if (size == GetHeapSize()) { + // The size requested is exactly the current size. + *out = heap_region_start; + return ResultSuccess; + } else { + // We have to allocate memory. Determine how much to allocate and where while the table + // is locked. + cur_address = current_heap_end; + allocation_size = size - GetHeapSize(); } + } - KPageLinkedList page_linked_list; - const std::size_t num_pages{delta / PageSize}; + // Reserve memory for the heap extension. + KScopedResourceReservation memory_reservation( + system.Kernel().CurrentProcess()->GetResourceLimit(), LimitableResource::PhysicalMemory, + allocation_size); + R_UNLESS(memory_reservation.Succeeded(), ResultLimitReached); - CASCADE_CODE( - system.Kernel().MemoryManager().Allocate(page_linked_list, num_pages, memory_pool)); + // Allocate pages for the heap extension. + KPageLinkedList page_linked_list; + R_TRY(system.Kernel().MemoryManager().Allocate(page_linked_list, allocation_size / PageSize, + memory_pool)); - if (IsRegionMapped(current_heap_addr, delta)) { - return ResultInvalidCurrentMemory; + // Map the pages. + { + // Lock the table. + std::lock_guard lk(page_table_lock); + + // Ensure that the heap hasn't changed since we began executing. + ASSERT(cur_address == current_heap_end); + + // Check the memory state. + std::size_t num_allocator_blocks{}; + R_TRY(this->CheckMemoryState(std::addressof(num_allocator_blocks), current_heap_end, + allocation_size, KMemoryState::All, KMemoryState::Free, + KMemoryPermission::None, KMemoryPermission::None, + KMemoryAttribute::None, KMemoryAttribute::None)); + + // Map the pages. + const auto num_pages = allocation_size / PageSize; + R_TRY(Operate(current_heap_end, num_pages, page_linked_list, OperationType::MapGroup)); + + // Clear all the newly allocated pages. + for (std::size_t cur_page = 0; cur_page < num_pages; ++cur_page) { + std::memset(system.Memory().GetPointer(current_heap_end + (cur_page * PageSize)), 0, + PageSize); } - CASCADE_CODE( - Operate(current_heap_addr, num_pages, page_linked_list, OperationType::MapGroup)); - - // Succeeded in allocation, commit the resource reservation + // We succeeded, so commit our memory reservation. memory_reservation.Commit(); - block_manager->Update(current_heap_addr, num_pages, KMemoryState::Normal, - KMemoryPermission::ReadAndWrite); + // Apply the memory block update. + block_manager->Update(current_heap_end, num_pages, KMemoryState::Normal, + KMemoryPermission::ReadAndWrite, KMemoryAttribute::None); - current_heap_addr = heap_region_start + size; - } + // Update the current heap end. + current_heap_end = heap_region_start + size; - return heap_region_start; + // Set the output. + *out = heap_region_start; + return ResultSuccess; + } } ResultVal<VAddr> KPageTable::AllocateAndMapMemory(std::size_t needed_num_pages, std::size_t align, @@ -978,7 +1069,7 @@ ResultCode KPageTable::LockForCodeMemory(VAddr addr, std::size_t size) { if (const ResultCode result{CheckMemoryState( nullptr, &old_perm, nullptr, addr, size, KMemoryState::FlagCanCodeMemory, - KMemoryState::FlagCanCodeMemory, KMemoryPermission::Mask, + KMemoryState::FlagCanCodeMemory, KMemoryPermission::All, KMemoryPermission::UserReadWrite, KMemoryAttribute::All, KMemoryAttribute::None)}; result.IsError()) { return result; @@ -1031,9 +1122,8 @@ ResultCode KPageTable::InitializeMemoryLayout(VAddr start, VAddr end) { bool KPageTable::IsRegionMapped(VAddr address, u64 size) { return CheckMemoryState(address, size, KMemoryState::All, KMemoryState::Free, - KMemoryPermission::Mask, KMemoryPermission::None, - KMemoryAttribute::Mask, KMemoryAttribute::None, - KMemoryAttribute::IpcAndDeviceMapped) + KMemoryPermission::All, KMemoryPermission::None, KMemoryAttribute::Mask, + KMemoryAttribute::None, KMemoryAttribute::IpcAndDeviceMapped) .IsError(); } diff --git a/src/core/hle/kernel/k_page_table.h b/src/core/hle/kernel/k_page_table.h index d784aa67e3..564410dca2 100644 --- a/src/core/hle/kernel/k_page_table.h +++ b/src/core/hle/kernel/k_page_table.h @@ -47,10 +47,11 @@ public: KMemoryInfo QueryInfo(VAddr addr); ResultCode ReserveTransferMemory(VAddr addr, std::size_t size, KMemoryPermission perm); ResultCode ResetTransferMemory(VAddr addr, std::size_t size); + ResultCode SetMemoryPermission(VAddr addr, std::size_t size, Svc::MemoryPermission perm); ResultCode SetMemoryAttribute(VAddr addr, std::size_t size, KMemoryAttribute mask, KMemoryAttribute value); - ResultCode SetHeapCapacity(std::size_t new_heap_capacity); - ResultVal<VAddr> SetHeapSize(std::size_t size); + ResultCode SetMaxHeapSize(std::size_t size); + ResultCode SetHeapSize(VAddr* out, std::size_t size); ResultVal<VAddr> AllocateAndMapMemory(std::size_t needed_num_pages, std::size_t align, bool is_map_only, VAddr region_start, std::size_t region_num_pages, KMemoryState state, @@ -182,14 +183,15 @@ public: constexpr VAddr GetAliasCodeRegionSize() const { return alias_code_region_end - alias_code_region_start; } + size_t GetNormalMemorySize() { + std::lock_guard lk(page_table_lock); + return GetHeapSize() + mapped_physical_memory_size; + } constexpr std::size_t GetAddressSpaceWidth() const { return address_space_width; } - constexpr std::size_t GetHeapSize() { - return current_heap_addr - heap_region_start; - } - constexpr std::size_t GetTotalHeapSize() { - return GetHeapSize() + physical_memory_usage; + constexpr std::size_t GetHeapSize() const { + return current_heap_end - heap_region_start; } constexpr bool IsInsideAddressSpace(VAddr address, std::size_t size) const { return address_space_start <= address && address + size - 1 <= address_space_end - 1; @@ -269,10 +271,8 @@ private: VAddr code_region_end{}; VAddr alias_code_region_start{}; VAddr alias_code_region_end{}; - VAddr current_heap_addr{}; - std::size_t heap_capacity{}; - std::size_t physical_memory_usage{}; + std::size_t mapped_physical_memory_size{}; std::size_t max_heap_size{}; std::size_t max_physical_memory_size{}; std::size_t address_space_width{}; diff --git a/src/core/hle/kernel/k_process.cpp b/src/core/hle/kernel/k_process.cpp index aee3139957..73f8bc4fe7 100644 --- a/src/core/hle/kernel/k_process.cpp +++ b/src/core/hle/kernel/k_process.cpp @@ -172,7 +172,7 @@ void KProcess::DecrementThreadCount() { u64 KProcess::GetTotalPhysicalMemoryAvailable() const { const u64 capacity{resource_limit->GetFreeValue(LimitableResource::PhysicalMemory) + - page_table->GetTotalHeapSize() + GetSystemResourceSize() + image_size + + page_table->GetNormalMemorySize() + GetSystemResourceSize() + image_size + main_thread_stack_size}; if (const auto pool_size = kernel.MemoryManager().GetSize(KMemoryManager::Pool::Application); capacity != pool_size) { @@ -189,7 +189,7 @@ u64 KProcess::GetTotalPhysicalMemoryAvailableWithoutSystemResource() const { } u64 KProcess::GetTotalPhysicalMemoryUsed() const { - return image_size + main_thread_stack_size + page_table->GetTotalHeapSize() + + return image_size + main_thread_stack_size + page_table->GetNormalMemorySize() + GetSystemResourceSize(); } @@ -410,8 +410,8 @@ void KProcess::Run(s32 main_thread_priority, u64 stack_size) { resource_limit->Reserve(LimitableResource::Threads, 1); resource_limit->Reserve(LimitableResource::PhysicalMemory, main_thread_stack_size); - const std::size_t heap_capacity{memory_usage_capacity - main_thread_stack_size - image_size}; - ASSERT(!page_table->SetHeapCapacity(heap_capacity).IsError()); + const std::size_t heap_capacity{memory_usage_capacity - (main_thread_stack_size + image_size)}; + ASSERT(!page_table->SetMaxHeapSize(heap_capacity).IsError()); ChangeStatus(ProcessStatus::Running); diff --git a/src/core/hle/kernel/k_thread.cpp b/src/core/hle/kernel/k_thread.cpp index 752592e2ef..b8c993748f 100644 --- a/src/core/hle/kernel/k_thread.cpp +++ b/src/core/hle/kernel/k_thread.cpp @@ -26,6 +26,7 @@ #include "core/hle/kernel/k_resource_limit.h" #include "core/hle/kernel/k_scheduler.h" #include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h" +#include "core/hle/kernel/k_system_control.h" #include "core/hle/kernel/k_thread.h" #include "core/hle/kernel/k_thread_queue.h" #include "core/hle/kernel/kernel.h" @@ -50,6 +51,7 @@ static void ResetThreadContext64(Core::ARM_Interface::ThreadContext64& context, VAddr entry_point, u64 arg) { context = {}; context.cpu_registers[0] = arg; + context.cpu_registers[18] = Kernel::KSystemControl::GenerateRandomU64() | 1; context.pc = entry_point; context.sp = stack_top; // TODO(merry): Perform a hardware test to determine the below value. diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 37d67b72e8..63e2dff19f 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -135,24 +135,15 @@ enum class ResourceLimitValueType { } // Anonymous namespace /// Set the process heap to a given Size. It can both extend and shrink the heap. -static ResultCode SetHeapSize(Core::System& system, VAddr* heap_addr, u64 heap_size) { - LOG_TRACE(Kernel_SVC, "called, heap_size=0x{:X}", heap_size); +static ResultCode SetHeapSize(Core::System& system, VAddr* out_address, u64 size) { + LOG_TRACE(Kernel_SVC, "called, heap_size=0x{:X}", size); - // Size must be a multiple of 0x200000 (2MB) and be equal to or less than 8GB. - if ((heap_size % 0x200000) != 0) { - LOG_ERROR(Kernel_SVC, "The heap size is not a multiple of 2MB, heap_size=0x{:016X}", - heap_size); - return ResultInvalidSize; - } - - if (heap_size >= 0x200000000) { - LOG_ERROR(Kernel_SVC, "The heap size is not less than 8GB, heap_size=0x{:016X}", heap_size); - return ResultInvalidSize; - } - - auto& page_table{system.Kernel().CurrentProcess()->PageTable()}; + // Validate size. + R_UNLESS(Common::IsAligned(size, HeapSizeAlignment), ResultInvalidSize); + R_UNLESS(size < MainMemorySizeMax, ResultInvalidSize); - CASCADE_RESULT(*heap_addr, page_table.SetHeapSize(heap_size)); + // Set the heap size. + R_TRY(system.Kernel().CurrentProcess()->PageTable().SetHeapSize(out_address, size)); return ResultSuccess; } @@ -164,6 +155,36 @@ static ResultCode SetHeapSize32(Core::System& system, u32* heap_addr, u32 heap_s return result; } +constexpr bool IsValidSetMemoryPermission(MemoryPermission perm) { + switch (perm) { + case MemoryPermission::None: + case MemoryPermission::Read: + case MemoryPermission::ReadWrite: + return true; + default: + return false; + } +} + +static ResultCode SetMemoryPermission(Core::System& system, VAddr address, u64 size, + MemoryPermission perm) { + // Validate address / size. + R_UNLESS(Common::IsAligned(address, PageSize), ResultInvalidAddress); + R_UNLESS(Common::IsAligned(size, PageSize), ResultInvalidSize); + R_UNLESS(size > 0, ResultInvalidSize); + R_UNLESS((address < address + size), ResultInvalidCurrentMemory); + + // Validate the permission. + R_UNLESS(IsValidSetMemoryPermission(perm), ResultInvalidNewMemoryPermission); + + // Validate that the region is in range for the current process. + auto& page_table = system.Kernel().CurrentProcess()->PageTable(); + R_UNLESS(page_table.Contains(address, size), ResultInvalidCurrentMemory); + + // Set the memory attribute. + return page_table.SetMemoryPermission(address, size, perm); +} + static ResultCode SetMemoryAttribute(Core::System& system, VAddr address, u64 size, u32 mask, u32 attribute) { LOG_DEBUG(Kernel_SVC, @@ -2724,7 +2745,7 @@ static const FunctionDef SVC_Table_32[] = { static const FunctionDef SVC_Table_64[] = { {0x00, nullptr, "Unknown"}, {0x01, SvcWrap64<SetHeapSize>, "SetHeapSize"}, - {0x02, nullptr, "SetMemoryPermission"}, + {0x02, SvcWrap64<SetMemoryPermission>, "SetMemoryPermission"}, {0x03, SvcWrap64<SetMemoryAttribute>, "SetMemoryAttribute"}, {0x04, SvcWrap64<MapMemory>, "MapMemory"}, {0x05, SvcWrap64<UnmapMemory>, "UnmapMemory"}, diff --git a/src/core/hle/kernel/svc_common.h b/src/core/hle/kernel/svc_common.h index 60ea2c4055..25de6e4378 100644 --- a/src/core/hle/kernel/svc_common.h +++ b/src/core/hle/kernel/svc_common.h @@ -5,6 +5,7 @@ #pragma once #include "common/common_types.h" +#include "common/literals.h" namespace Kernel { using Handle = u32; @@ -12,9 +13,13 @@ using Handle = u32; namespace Kernel::Svc { +using namespace Common::Literals; + constexpr s32 ArgumentHandleCountMax = 0x40; constexpr u32 HandleWaitMask{1u << 30}; +constexpr inline std::size_t HeapSizeAlignment = 2_MiB; + constexpr inline Handle InvalidHandle = Handle(0); enum PseudoHandle : Handle { diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h index 86255fe6d3..a60adfcabf 100644 --- a/src/core/hle/kernel/svc_wrap.h +++ b/src/core/hle/kernel/svc_wrap.h @@ -249,6 +249,14 @@ void SvcWrap64(Core::System& system) { func(system, Param(system, 0), Param(system, 1), static_cast<u32>(Param(system, 2))).raw); } +// Used by SetMemoryPermission +template <ResultCode func(Core::System&, u64, u64, Svc::MemoryPermission)> +void SvcWrap64(Core::System& system) { + FuncReturn(system, func(system, Param(system, 0), Param(system, 1), + static_cast<Svc::MemoryPermission>(Param(system, 2))) + .raw); +} + // Used by MapSharedMemory template <ResultCode func(Core::System&, Handle, u64, u64, Svc::MemoryPermission)> void SvcWrap64(Core::System& system) { diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index 081b2c8e03..7434a1f92f 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -86,7 +86,7 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, Scal } switch (attr) { case IR::Attribute::PrimitiveId: - ctx.Add("MOV.S {}.x,primitive.id;", inst); + ctx.Add("MOV.F {}.x,primitive.id;", inst); break; case IR::Attribute::PositionX: case IR::Attribute::PositionY: @@ -113,19 +113,35 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, Scal ctx.Add("MOV.F {}.x,vertex.tesscoord.{};", inst, swizzle); break; case IR::Attribute::InstanceId: - ctx.Add("MOV.S {}.x,{}.instance;", inst, ctx.attrib_name); + ctx.Add("MOV.F {}.x,{}.instance;", inst, ctx.attrib_name); break; case IR::Attribute::VertexId: - ctx.Add("MOV.S {}.x,{}.id;", inst, ctx.attrib_name); + ctx.Add("MOV.F {}.x,{}.id;", inst, ctx.attrib_name); break; case IR::Attribute::FrontFace: - ctx.Add("CMP.S {}.x,{}.facing.x,0,-1;", inst, ctx.attrib_name); + ctx.Add("CMP.F {}.x,{}.facing.x,0,-1;", inst, ctx.attrib_name); break; default: throw NotImplementedException("Get attribute {}", attr); } } +void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32) { + switch (attr) { + case IR::Attribute::PrimitiveId: + ctx.Add("MOV.S {}.x,primitive.id;", inst); + break; + case IR::Attribute::InstanceId: + ctx.Add("MOV.S {}.x,{}.instance;", inst, ctx.attrib_name); + break; + case IR::Attribute::VertexId: + ctx.Add("MOV.S {}.x,{}.id;", inst, ctx.attrib_name); + break; + default: + throw NotImplementedException("Get U32 attribute {}", attr); + } +} + void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, [[maybe_unused]] ScalarU32 vertex) { const u32 element{static_cast<u32>(attr) % 4}; diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index 1f343bff5e..b480078563 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -50,6 +50,7 @@ void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex); +void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex); void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, ScalarU32 vertex); void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, ScalarU32 vertex); void EmitSetAttributeIndexed(EmitContext& ctx, ScalarU32 offset, ScalarF32 value, ScalarU32 vertex); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp index 0f2668d9ef..e0ead7a539 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp @@ -7,6 +7,7 @@ #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" #include "shader_recompiler/backend/glsl/glsl_emit_context.h" #include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/profile.h" namespace Shader::Backend::GLSL { namespace { @@ -30,8 +31,9 @@ void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) inst.DestructiveAddUsage(1); const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U1)}; const auto input{ctx.var_alloc.Consume(value)}; + const auto suffix{ctx.profile.has_gl_bool_ref_bug ? "?true:false" : ""}; if (ret != input) { - ctx.Add("{}={};", ret, input); + ctx.Add("{}={}{};", ret, input, suffix); } } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 6477bd1928..0c1fbc7b1f 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -102,39 +102,46 @@ void GetCbuf16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { - GetCbuf8(ctx, inst, binding, offset, "ftou"); + const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"}; + GetCbuf8(ctx, inst, binding, offset, cast); } void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { - GetCbuf8(ctx, inst, binding, offset, "ftoi"); + const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "int" : "ftoi"}; + GetCbuf8(ctx, inst, binding, offset, cast); } void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { - GetCbuf16(ctx, inst, binding, offset, "ftou"); + const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"}; + GetCbuf16(ctx, inst, binding, offset, cast); } void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { - GetCbuf16(ctx, inst, binding, offset, "ftoi"); + const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "int" : "ftoi"}; + GetCbuf16(ctx, inst, binding, offset, cast); } void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; - GetCbuf(ctx, ret, binding, offset, 32, "ftou"); + const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"}; + GetCbuf(ctx, ret, binding, offset, 32, cast); } void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32)}; - GetCbuf(ctx, ret, binding, offset, 32); + const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "utof" : ""}; + GetCbuf(ctx, ret, binding, offset, 32, cast); } void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; + const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"}; if (offset.IsImmediate()) { static constexpr u32 cbuf_size{0x10000}; const u32 u32_offset{offset.U32()}; @@ -145,26 +152,26 @@ void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding return; } if (u32_offset % 2 == 0) { - ctx.AddU32x2("{}=ftou({}[{}].{}{});", inst, cbuf, u32_offset / 16, + ctx.AddU32x2("{}={}({}[{}].{}{});", inst, cast, cbuf, u32_offset / 16, OffsetSwizzle(u32_offset), OffsetSwizzle(u32_offset + 4)); } else { - ctx.AddU32x2("{}=uvec2(ftou({}[{}].{}),ftou({}[{}].{}));", inst, cbuf, u32_offset / 16, - OffsetSwizzle(u32_offset), cbuf, (u32_offset + 4) / 16, - OffsetSwizzle(u32_offset + 4)); + ctx.AddU32x2("{}=uvec2({}({}[{}].{}),{}({}[{}].{}));", inst, cast, cbuf, + u32_offset / 16, OffsetSwizzle(u32_offset), cast, cbuf, + (u32_offset + 4) / 16, OffsetSwizzle(u32_offset + 4)); } return; } const auto offset_var{ctx.var_alloc.Consume(offset)}; if (!ctx.profile.has_gl_component_indexing_bug) { - ctx.AddU32x2("{}=uvec2(ftou({}[{}>>4][({}>>2)%4]),ftou({}[({}+4)>>4][(({}+4)>>2)%4]));", - inst, cbuf, offset_var, offset_var, cbuf, offset_var, offset_var); + ctx.AddU32x2("{}=uvec2({}({}[{}>>4][({}>>2)%4]),{}({}[({}+4)>>4][(({}+4)>>2)%4]));", inst, + cast, cbuf, offset_var, offset_var, cast, cbuf, offset_var, offset_var); return; } const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x2)}; const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; for (u32 swizzle = 0; swizzle < 4; ++swizzle) { - ctx.Add("if(({}&3)=={}){}=uvec2(ftou({}[{}>>4].{}),ftou({}[({}+4)>>4].{}));", cbuf_offset, - swizzle, ret, cbuf, offset_var, "xyzw"[swizzle], cbuf, offset_var, + ctx.Add("if(({}&3)=={}){}=uvec2({}({}[{}>>4].{}),{}({}[({}+4)>>4].{}));", cbuf_offset, + swizzle, ret, cast, cbuf, offset_var, "xyzw"[swizzle], cast, cbuf, offset_var, "xyzw"[(swizzle + 1) % 4]); } } @@ -221,6 +228,22 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, } } +void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, std::string_view) { + switch (attr) { + case IR::Attribute::PrimitiveId: + ctx.AddU32("{}=uint(gl_PrimitiveID);", inst); + break; + case IR::Attribute::InstanceId: + ctx.AddU32("{}=uint(gl_InstanceID);", inst); + break; + case IR::Attribute::VertexId: + ctx.AddU32("{}=uint(gl_VertexID);", inst); + break; + default: + throw NotImplementedException("Get U32 attribute {}", attr); + } +} + void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, [[maybe_unused]] std::string_view vertex) { if (IR::IsGeneric(attr)) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp index b765a251b1..474189d875 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp @@ -125,11 +125,11 @@ void EmitFPNeg16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& i } void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddF32("{}=-({});", inst, value); + ctx.AddF32("{}=0.f-({});", inst, value); } void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddF64("{}=-({});", inst, value); + ctx.AddF64("{}=double(0.)-({});", inst, value); } void EmitFPSin(EmitContext& ctx, IR::Inst& inst, std::string_view value) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index f86502e4c8..6cabbc717d 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -60,6 +60,8 @@ void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding const IR::Value& offset); void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, std::string_view vertex); +void EmitGetAttributeU32(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, + std::string_view vertex); void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, std::string_view vertex); void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, std::string_view offset, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp index 44060df334..b0d85be995 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp @@ -87,11 +87,11 @@ void EmitUDiv32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::strin } void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddU32("{}=uint(-({}));", inst, value); + ctx.AddU32("{}=uint(int(0)-int({}));", inst, value); } void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { - ctx.AddU64("{}=-({});", inst, value); + ctx.AddU64("{}=uint64_t(int64_t(0)-int64_t({}));", inst, value); } void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp index b8ddafe48f..fcf620b791 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp @@ -90,7 +90,9 @@ void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& if (phi_reg == val_reg) { return; } - ctx.Add("{}={};", phi_reg, val_reg); + const bool needs_workaround{ctx.profile.has_gl_bool_ref_bug && phi_type == IR::Type::U1}; + const auto suffix{needs_workaround ? "?true:false" : ""}; + ctx.Add("{}={}{};", phi_reg, val_reg, suffix); } void EmitPrologue(EmitContext& ctx) { diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp index bc9d2a904a..bb7f1a0fd3 100644 --- a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp @@ -428,9 +428,10 @@ void EmitContext::DefineConstantBuffers(Bindings& bindings) { return; } for (const auto& desc : info.constant_buffer_descriptors) { - header += fmt::format( - "layout(std140,binding={}) uniform {}_cbuf_{}{{vec4 {}_cbuf{}[{}];}};", - bindings.uniform_buffer, stage_name, desc.index, stage_name, desc.index, 4 * 1024); + const auto cbuf_type{profile.has_gl_cbuf_ftou_bug ? "uvec4" : "vec4"}; + header += fmt::format("layout(std140,binding={}) uniform {}_cbuf_{}{{{} {}_cbuf{}[{}];}};", + bindings.uniform_buffer, stage_name, desc.index, cbuf_type, + stage_name, desc.index, 4 * 1024); bindings.uniform_buffer += desc.count; } } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 6ce7ed12a8..50918317f9 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -30,11 +30,20 @@ struct FuncTraits<ReturnType_ (*)(Args...)> { using ArgType = std::tuple_element_t<I, std::tuple<Args...>>; }; +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4702) // Ignore unreachable code warning +#endif + template <auto func, typename... Args> void SetDefinition(EmitContext& ctx, IR::Inst* inst, Args... args) { inst->SetDefinition<Id>(func(ctx, std::forward<Args>(args)...)); } +#ifdef _MSC_VER +#pragma warning(pop) +#endif + template <typename ArgType> ArgType Arg(EmitContext& ctx, const IR::Value& arg) { if constexpr (std::is_same_v<ArgType, Id>) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 14f470812e..8ea730c807 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -355,6 +355,31 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { } } +Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id) { + switch (attr) { + case IR::Attribute::PrimitiveId: + return ctx.OpLoad(ctx.U32[1], ctx.primitive_id); + case IR::Attribute::InstanceId: + if (ctx.profile.support_vertex_instance_id) { + return ctx.OpLoad(ctx.U32[1], ctx.instance_id); + } else { + const Id index{ctx.OpLoad(ctx.U32[1], ctx.instance_index)}; + const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_instance)}; + return ctx.OpISub(ctx.U32[1], index, base); + } + case IR::Attribute::VertexId: + if (ctx.profile.support_vertex_instance_id) { + return ctx.OpLoad(ctx.U32[1], ctx.vertex_id); + } else { + const Id index{ctx.OpLoad(ctx.U32[1], ctx.vertex_index)}; + const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)}; + return ctx.OpISub(ctx.U32[1], index, base); + } + default: + throw NotImplementedException("Read U32 attribute {}", attr); + } +} + void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, [[maybe_unused]] Id vertex) { const std::optional<OutAttr> output{OutputAttrPointer(ctx, attr)}; if (!output) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 6cd22dd3ef..887112deb4 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -53,6 +53,7 @@ Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& o Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex); +Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id vertex); void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, Id vertex); Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex); void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, Id vertex); diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 6929919df1..b94ce74061 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -40,6 +40,7 @@ OPCODE(GetCbufU32, U32, U32, OPCODE(GetCbufF32, F32, U32, U32, ) OPCODE(GetCbufU32x2, U32x2, U32, U32, ) OPCODE(GetAttribute, F32, Attribute, U32, ) +OPCODE(GetAttributeU32, U32, Attribute, U32, ) OPCODE(SetAttribute, Void, Attribute, F32, U32, ) OPCODE(GetAttributeIndexed, F32, U32, U32, ) OPCODE(SetAttributeIndexed, Void, U32, F32, U32, ) diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 1e476d83d9..a78c469be1 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -389,6 +389,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { info.uses_demote_to_helper_invocation = true; break; case IR::Opcode::GetAttribute: + case IR::Opcode::GetAttributeU32: info.loads.mask[static_cast<size_t>(inst.Arg(0).Attribute())] = true; break; case IR::Opcode::SetAttribute: diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index d089fdd12f..c134a12bc6 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -505,6 +505,29 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { return; } } + if constexpr (op == IR::Opcode::BitCastU32F32) { + // Workaround for new NVIDIA driver bug, where: + // uint attr = ftou(itof(gl_InstanceID)); + // always returned 0. + // We can instead manually optimize this and work around the driver bug: + // uint attr = uint(gl_InstanceID); + if (arg_inst->GetOpcode() == IR::Opcode::GetAttribute) { + const IR::Attribute attr{arg_inst->Arg(0).Attribute()}; + switch (attr) { + case IR::Attribute::PrimitiveId: + case IR::Attribute::InstanceId: + case IR::Attribute::VertexId: + break; + default: + return; + } + // Replace the bitcasts with an integer attribute get + inst.ReplaceOpcode(IR::Opcode::GetAttributeU32); + inst.SetArg(0, arg_inst->Arg(0)); + inst.SetArg(1, arg_inst->Arg(1)); + return; + } + } } void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) { diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index f0c3b3b172..dc4c806ff4 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -65,6 +65,10 @@ struct Profile { bool has_gl_component_indexing_bug{}; /// The precise type qualifier is broken in the fragment stage of some drivers bool has_gl_precise_bug{}; + /// Some drivers do not properly support floatBitsToUint when used on cbufs + bool has_gl_cbuf_ftou_bug{}; + /// Some drivers poorly optimize boolean variable references + bool has_gl_bool_ref_bug{}; /// Ignores SPIR-V ordered vs unordered using GLSL semantics bool ignore_nan_fp_comparisons{}; diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 0764ea6e01..e62912a22b 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -182,17 +182,13 @@ Device::Device() { shader_backend = Settings::ShaderBackend::GLSL; } - if (shader_backend == Settings::ShaderBackend::GLSL && is_nvidia && - !Settings::values.renderer_debug) { + if (shader_backend == Settings::ShaderBackend::GLSL && is_nvidia) { const std::string_view driver_version = version.substr(13); const int version_major = std::atoi(driver_version.substr(0, driver_version.find(".")).data()); - if (version_major >= 495) { - LOG_WARNING(Render_OpenGL, "NVIDIA drivers 495 and later causes significant problems " - "with yuzu. Forcing GLASM as a mitigation."); - shader_backend = Settings::ShaderBackend::GLASM; - use_assembly_shaders = true; + has_cbuf_ftou_bug = true; + has_bool_ref_bug = true; } } diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index de9e41659d..95c2e8d382 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -152,6 +152,14 @@ public: return need_fastmath_off; } + bool HasCbufFtouBug() const { + return has_cbuf_ftou_bug; + } + + bool HasBoolRefBug() const { + return has_bool_ref_bug; + } + Settings::ShaderBackend GetShaderBackend() const { return shader_backend; } @@ -200,6 +208,8 @@ private: bool has_sparse_texture_2{}; bool warp_size_potentially_larger_than_guest{}; bool need_fastmath_off{}; + bool has_cbuf_ftou_bug{}; + bool has_bool_ref_bug{}; std::string vendor_name; }; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 29c6e1a5f3..ec558a9afa 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -214,6 +214,8 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .has_broken_fp16_float_controls = false, .has_gl_component_indexing_bug = device.HasComponentIndexingBug(), .has_gl_precise_bug = device.HasPreciseBug(), + .has_gl_cbuf_ftou_bug = device.HasCbufFtouBug(), + .has_gl_bool_ref_bug = device.HasBoolRefBug(), .ignore_nan_fp_comparisons = true, .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(), }, diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 14e6522f2e..3c1f79a271 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -1047,7 +1047,7 @@ bool Image::ScaleDown(bool ignore) { } ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, - ImageId image_id_, Image& image) + ImageId image_id_, Image& image, const SlotVector<Image>&) : VideoCommon::ImageViewBase{info, image.info, image_id_}, views{runtime.null_image_views} { const Device& device = runtime.device; if (True(image.flags & ImageFlagBits::Converted)) { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index dbf1df79cd..7f425631f5 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -36,6 +36,7 @@ using VideoCommon::ImageViewType; using VideoCommon::NUM_RT; using VideoCommon::Region2D; using VideoCommon::RenderTargets; +using VideoCommon::SlotVector; struct ImageBufferMap { ~ImageBufferMap(); @@ -234,7 +235,8 @@ class ImageView : public VideoCommon::ImageViewBase { friend Image; public: - explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); + explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&, + const SlotVector<Image>&); explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&, const VideoCommon::ImageViewInfo&, GPUVAddr); explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index cd59958974..2c39144592 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -518,53 +518,6 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb scheduler.InvalidateState(); } -void BlitImageHelper::ConvertColor(VkPipeline pipeline, const Framebuffer* dst_framebuffer, - ImageView& src_image_view, u32 up_scale, u32 down_shift) { - const VkPipelineLayout layout = *one_texture_pipeline_layout; - const VkImageView src_view = src_image_view.ColorView(); - const VkSampler sampler = *nearest_sampler; - const VkExtent2D extent{ - .width = std::max((src_image_view.size.width * up_scale) >> down_shift, 1U), - .height = std::max((src_image_view.size.height * up_scale) >> down_shift, 1U), - }; - scheduler.RequestRenderpass(dst_framebuffer); - scheduler.Record([pipeline, layout, sampler, src_view, extent, up_scale, down_shift, - this](vk::CommandBuffer cmdbuf) { - const VkOffset2D offset{ - .x = 0, - .y = 0, - }; - const VkViewport viewport{ - .x = 0.0f, - .y = 0.0f, - .width = static_cast<float>(extent.width), - .height = static_cast<float>(extent.height), - .minDepth = 0.0f, - .maxDepth = 0.0f, - }; - const VkRect2D scissor{ - .offset = offset, - .extent = extent, - }; - const PushConstants push_constants{ - .tex_scale = {viewport.width, viewport.height}, - .tex_offset = {0.0f, 0.0f}, - }; - const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit(); - UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view); - - // TODO: Barriers - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, - nullptr); - cmdbuf.SetViewport(0, viewport); - cmdbuf.SetScissor(0, scissor); - cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants); - cmdbuf.Draw(3, 1, 0, 0); - }); - scheduler.InvalidateState(); -} - void BlitImageHelper::ConvertDepthStencil(VkPipeline pipeline, const Framebuffer* dst_framebuffer, ImageView& src_image_view) { const VkPipelineLayout layout = *two_textures_pipeline_layout; diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index 1d9f61a52a..85e7dca5bc 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -60,9 +60,6 @@ private: void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, const ImageView& src_image_view); - void ConvertColor(VkPipeline pipeline, const Framebuffer* dst_framebuffer, - ImageView& src_image_view, u32 up_scale, u32 down_shift); - void ConvertDepthStencil(VkPipeline pipeline, const Framebuffer* dst_framebuffer, ImageView& src_image_view); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index a4cce61423..0ba56ff1e1 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1476,8 +1476,7 @@ bool Image::NeedsScaleHelper() const { ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, ImageId image_id_, Image& image) : VideoCommon::ImageViewBase{info, image.info, image_id_}, device{&runtime.device}, - src_image{&image}, image_handle{image.Handle()}, - samples(ConvertSampleCount(image.info.num_samples)) { + image_handle{image.Handle()}, samples(ConvertSampleCount(image.info.num_samples)) { using Shader::TextureType; const VkImageAspectFlags aspect_mask = ImageViewAspectMask(info); @@ -1560,6 +1559,12 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI } } +ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, + ImageId image_id_, Image& image, const SlotVector<Image>& slot_imgs) + : ImageView{runtime, info, image_id_, image} { + slot_images = &slot_imgs; +} + ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_) : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_}, @@ -1616,10 +1621,12 @@ VkImageView ImageView::StorageView(Shader::TextureType texture_type, } bool ImageView::IsRescaled() const noexcept { - if (!src_image) { + if (!slot_images) { return false; } - return src_image->IsRescaled(); + const auto& slots = *slot_images; + const auto& src_image = slots[image_id]; + return src_image.IsRescaled(); } vk::ImageView ImageView::MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask) { diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index cad033af79..c81130dd20 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -23,6 +23,7 @@ using VideoCommon::ImageId; using VideoCommon::NUM_RT; using VideoCommon::Region2D; using VideoCommon::RenderTargets; +using VideoCommon::SlotVector; using VideoCore::Surface::PixelFormat; class ASTCDecoderPass; @@ -172,6 +173,8 @@ private: class ImageView : public VideoCommon::ImageViewBase { public: explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); + explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&, + const SlotVector<Image>&); explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&, const VideoCommon::ImageViewInfo&, GPUVAddr); explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageViewParams&); @@ -228,7 +231,7 @@ private: [[nodiscard]] vk::ImageView MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask); const Device* device = nullptr; - const Image* src_image{}; + const SlotVector<Image>* slot_images = nullptr; std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> image_views; std::unique_ptr<StorageViews> storage_views; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 2e19fced24..b494152b83 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1397,7 +1397,8 @@ ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const Imag if (const ImageViewId image_view_id = image.FindView(info); image_view_id) { return image_view_id; } - const ImageViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image); + const ImageViewId image_view_id = + slot_image_views.insert(runtime, info, image_id, image, slot_images); image.InsertView(info, image_view_id); return image_view_id; } diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 7bd31b2111..d8e19cb2fe 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -364,14 +364,14 @@ template <u32 GOB_EXTENT> [[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress2D( const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) { - const u32 layer_stride = new_info.layer_stride; - const s32 new_size = layer_stride * new_info.resources.layers; - const s32 diff = static_cast<s32>(overlap.gpu_addr - gpu_addr); + const u64 layer_stride = new_info.layer_stride; + const u64 new_size = layer_stride * new_info.resources.layers; + const u64 diff = overlap.gpu_addr - gpu_addr; if (diff > new_size) { return std::nullopt; } - const s32 base_layer = diff / layer_stride; - const s32 mip_offset = diff % layer_stride; + const s32 base_layer = static_cast<s32>(diff / layer_stride); + const s32 mip_offset = static_cast<s32>(diff % layer_stride); const std::array offsets = CalculateMipLevelOffsets(new_info); const auto end = offsets.begin() + new_info.resources.levels; const auto it = std::find(offsets.begin(), end, static_cast<u32>(mip_offset)); diff --git a/src/yuzu/applets/qt_controller.cpp b/src/yuzu/applets/qt_controller.cpp index c6222b571f..d63193131d 100644 --- a/src/yuzu/applets/qt_controller.cpp +++ b/src/yuzu/applets/qt_controller.cpp @@ -33,7 +33,7 @@ void UpdateController(Core::HID::EmulatedController* controller, } controller->SetNpadStyleIndex(controller_type); if (connected) { - controller->Connect(); + controller->Connect(true); } } diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp index 8a8be8e406..cb61637020 100644 --- a/src/yuzu/configuration/configure_input_player.cpp +++ b/src/yuzu/configuration/configure_input_player.cpp @@ -599,11 +599,11 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i if (is_connected) { if (type == Core::HID::NpadStyleIndex::Handheld) { emulated_controller_p1->Disconnect(); - emulated_controller_handheld->Connect(); + emulated_controller_handheld->Connect(true); emulated_controller = emulated_controller_handheld; } else { emulated_controller_handheld->Disconnect(); - emulated_controller_p1->Connect(); + emulated_controller_p1->Connect(true); emulated_controller = emulated_controller_p1; } } @@ -718,7 +718,7 @@ void ConfigureInputPlayer::LoadConfiguration() { void ConfigureInputPlayer::ConnectPlayer(bool connected) { ui->groupConnectedController->setChecked(connected); if (connected) { - emulated_controller->Connect(); + emulated_controller->Connect(true); } else { emulated_controller->Disconnect(); } |