diff options
Diffstat (limited to 'src')
66 files changed, 1110 insertions, 275 deletions
diff --git a/src/common/common_types.h b/src/common/common_types.h index 4cec89fbd9..99bffc460a 100644 --- a/src/common/common_types.h +++ b/src/common/common_types.h @@ -46,13 +46,3 @@ using GPUVAddr = u64; ///< Represents a pointer in the GPU virtual address space using u128 = std::array<std::uint64_t, 2>; static_assert(sizeof(u128) == 16, "u128 must be 128 bits wide"); - -// An inheritable class to disallow the copy constructor and operator= functions -class NonCopyable { -protected: - constexpr NonCopyable() = default; - ~NonCopyable() = default; - - NonCopyable(const NonCopyable&) = delete; - NonCopyable& operator=(const NonCopyable&) = delete; -}; diff --git a/src/common/fs/file.h b/src/common/fs/file.h index 2c4ab43320..a4f7944cd1 100644 --- a/src/common/fs/file.h +++ b/src/common/fs/file.h @@ -188,9 +188,8 @@ public: #ifdef _WIN32 template <typename Path> - [[nodiscard]] void Open(const Path& path, FileAccessMode mode, - FileType type = FileType::BinaryFile, - FileShareFlag flag = FileShareFlag::ShareReadOnly) { + void Open(const Path& path, FileAccessMode mode, FileType type = FileType::BinaryFile, + FileShareFlag flag = FileShareFlag::ShareReadOnly) { using ValueType = typename Path::value_type; if constexpr (IsChar<ValueType>) { Open(ToU8String(path), mode, type, flag); diff --git a/src/common/input.h b/src/common/input.h index f4f9eb30a7..54fcb24b04 100644 --- a/src/common/input.h +++ b/src/common/input.h @@ -209,6 +209,8 @@ enum class ButtonNames { Triangle, Share, Options, + Home, + Touch, // Mouse buttons ButtonMouseWheel, diff --git a/src/common/telemetry.h b/src/common/telemetry.h index 49186e848a..d38aeac999 100644 --- a/src/common/telemetry.h +++ b/src/common/telemetry.h @@ -8,6 +8,7 @@ #include <map> #include <memory> #include <string> +#include "common/common_funcs.h" #include "common/common_types.h" namespace Common::Telemetry { @@ -28,7 +29,7 @@ struct VisitorInterface; /** * Interface class for telemetry data fields. */ -class FieldInterface : NonCopyable { +class FieldInterface { public: virtual ~FieldInterface() = default; @@ -52,14 +53,15 @@ public: template <typename T> class Field : public FieldInterface { public: + YUZU_NON_COPYABLE(Field); + Field(FieldType type_, std::string name_, T value_) : name(std::move(name_)), type(type_), value(std::move(value_)) {} - Field(const Field&) = default; - Field& operator=(const Field&) = default; + ~Field() override = default; - Field(Field&&) = default; - Field& operator=(Field&& other) = default; + Field(Field&&) noexcept = default; + Field& operator=(Field&& other) noexcept = default; void Accept(VisitorInterface& visitor) const override; @@ -98,9 +100,15 @@ private: /** * Collection of data fields that have been logged. */ -class FieldCollection final : NonCopyable { +class FieldCollection final { public: + YUZU_NON_COPYABLE(FieldCollection); + FieldCollection() = default; + ~FieldCollection() = default; + + FieldCollection(FieldCollection&&) noexcept = default; + FieldCollection& operator=(FieldCollection&&) noexcept = default; /** * Accept method for the visitor pattern, visits each field in the collection. @@ -133,7 +141,7 @@ private: * Telemetry fields visitor interface class. A backend to log to a web service should implement * this interface. */ -struct VisitorInterface : NonCopyable { +struct VisitorInterface { virtual ~VisitorInterface() = default; virtual void Visit(const Field<bool>& field) = 0; @@ -160,8 +168,8 @@ struct VisitorInterface : NonCopyable { * Empty implementation of VisitorInterface that drops all fields. Used when a functional * backend implementation is not available. */ -struct NullVisitor : public VisitorInterface { - ~NullVisitor() = default; +struct NullVisitor final : public VisitorInterface { + YUZU_NON_COPYABLE(NullVisitor); void Visit(const Field<bool>& /*field*/) override {} void Visit(const Field<double>& /*field*/) override {} diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp index ffa282e888..9acf7551ef 100644 --- a/src/common/wall_clock.cpp +++ b/src/common/wall_clock.cpp @@ -65,16 +65,20 @@ private: #ifdef ARCHITECTURE_x86_64 -std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency, - u32 emulated_clock_frequency) { +std::unique_ptr<WallClock> CreateBestMatchingClock(u64 emulated_cpu_frequency, + u64 emulated_clock_frequency) { const auto& caps = GetCPUCaps(); u64 rtsc_frequency = 0; if (caps.invariant_tsc) { rtsc_frequency = EstimateRDTSCFrequency(); } - // Fallback to StandardWallClock if rtsc period is higher than a nano second - if (rtsc_frequency <= 1000000000) { + // Fallback to StandardWallClock if the hardware TSC does not have the precision greater than: + // - A nanosecond + // - The emulated CPU frequency + // - The emulated clock counter frequency (CNTFRQ) + if (rtsc_frequency <= WallClock::NS_RATIO || rtsc_frequency <= emulated_cpu_frequency || + rtsc_frequency <= emulated_clock_frequency) { return std::make_unique<StandardWallClock>(emulated_cpu_frequency, emulated_clock_frequency); } else { @@ -85,8 +89,8 @@ std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency, #else -std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency, - u32 emulated_clock_frequency) { +std::unique_ptr<WallClock> CreateBestMatchingClock(u64 emulated_cpu_frequency, + u64 emulated_clock_frequency) { return std::make_unique<StandardWallClock>(emulated_cpu_frequency, emulated_clock_frequency); } diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h index cef3e9499f..874448c27c 100644 --- a/src/common/wall_clock.h +++ b/src/common/wall_clock.h @@ -13,6 +13,10 @@ namespace Common { class WallClock { public: + static constexpr u64 NS_RATIO = 1'000'000'000; + static constexpr u64 US_RATIO = 1'000'000; + static constexpr u64 MS_RATIO = 1'000; + virtual ~WallClock() = default; /// Returns current wall time in nanoseconds @@ -49,7 +53,7 @@ private: bool is_native; }; -[[nodiscard]] std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency, - u32 emulated_clock_frequency); +[[nodiscard]] std::unique_ptr<WallClock> CreateBestMatchingClock(u64 emulated_cpu_frequency, + u64 emulated_clock_frequency); } // namespace Common diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index 82ee2c8a13..91b842829b 100644 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp @@ -47,9 +47,9 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequen _mm_mfence(); time_point.inner.last_measure = __rdtsc(); time_point.inner.accumulated_ticks = 0U; - ns_rtsc_factor = GetFixedPoint64Factor(1000000000, rtsc_frequency); - us_rtsc_factor = GetFixedPoint64Factor(1000000, rtsc_frequency); - ms_rtsc_factor = GetFixedPoint64Factor(1000, rtsc_frequency); + ns_rtsc_factor = GetFixedPoint64Factor(NS_RATIO, rtsc_frequency); + us_rtsc_factor = GetFixedPoint64Factor(US_RATIO, rtsc_frequency); + ms_rtsc_factor = GetFixedPoint64Factor(MS_RATIO, rtsc_frequency); clock_rtsc_factor = GetFixedPoint64Factor(emulated_clock_frequency, rtsc_frequency); cpu_rtsc_factor = GetFixedPoint64Factor(emulated_cpu_frequency, rtsc_frequency); } diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h index 689e3ceb56..c603224425 100644 --- a/src/core/arm/arm_interface.h +++ b/src/core/arm/arm_interface.h @@ -6,6 +6,7 @@ #include <array> #include <vector> +#include "common/common_funcs.h" #include "common/common_types.h" #include "core/hardware_properties.h" @@ -24,8 +25,11 @@ class CPUInterruptHandler; using CPUInterrupts = std::array<CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES>; /// Generic ARMv8 CPU interface -class ARM_Interface : NonCopyable { +class ARM_Interface { public: + YUZU_NON_COPYABLE(ARM_Interface); + YUZU_NON_MOVEABLE(ARM_Interface); + explicit ARM_Interface(System& system_, CPUInterrupts& interrupt_handlers_, bool uses_wall_clock_) : system{system_}, interrupt_handlers{interrupt_handlers_}, uses_wall_clock{ diff --git a/src/core/file_sys/vfs.h b/src/core/file_sys/vfs.h index 3e625fad66..1b93658531 100644 --- a/src/core/file_sys/vfs.h +++ b/src/core/file_sys/vfs.h @@ -12,6 +12,7 @@ #include <type_traits> #include <vector> +#include "common/common_funcs.h" #include "common/common_types.h" #include "core/file_sys/vfs_types.h" @@ -29,8 +30,11 @@ enum class VfsEntryType { // A class representing an abstract filesystem. A default implementation given the root VirtualDir // is provided for convenience, but if the Vfs implementation has any additional state or // functionality, they will need to override. -class VfsFilesystem : NonCopyable { +class VfsFilesystem { public: + YUZU_NON_COPYABLE(VfsFilesystem); + YUZU_NON_MOVEABLE(VfsFilesystem); + explicit VfsFilesystem(VirtualDir root); virtual ~VfsFilesystem(); @@ -77,8 +81,12 @@ protected: }; // A class representing a file in an abstract filesystem. -class VfsFile : NonCopyable { +class VfsFile { public: + YUZU_NON_COPYABLE(VfsFile); + YUZU_NON_MOVEABLE(VfsFile); + + VfsFile() = default; virtual ~VfsFile(); // Retrieves the file name. @@ -176,8 +184,12 @@ public: }; // A class representing a directory in an abstract filesystem. -class VfsDirectory : NonCopyable { +class VfsDirectory { public: + YUZU_NON_COPYABLE(VfsDirectory); + YUZU_NON_MOVEABLE(VfsDirectory); + + VfsDirectory() = default; virtual ~VfsDirectory(); // Retrives the file located at path as if the current directory was root. Returns nullptr if diff --git a/src/core/hid/emulated_console.h b/src/core/hid/emulated_console.h index 7074191022..5eb170823b 100644 --- a/src/core/hid/emulated_console.h +++ b/src/core/hid/emulated_console.h @@ -10,6 +10,7 @@ #include <mutex> #include <unordered_map> +#include "common/common_funcs.h" #include "common/common_types.h" #include "common/input.h" #include "common/param_package.h" diff --git a/src/core/hid/emulated_controller.h b/src/core/hid/emulated_controller.h index a63a83cce6..d8642c5b3a 100644 --- a/src/core/hid/emulated_controller.h +++ b/src/core/hid/emulated_controller.h @@ -13,8 +13,6 @@ #include "common/common_types.h" #include "common/input.h" #include "common/param_package.h" -#include "common/point.h" -#include "common/quaternion.h" #include "common/settings.h" #include "common/vector_math.h" #include "core/hid/hid_types.h" diff --git a/src/core/hid/hid_core.h b/src/core/hid/hid_core.h index 837f7de49c..717f605e7b 100644 --- a/src/core/hid/hid_core.h +++ b/src/core/hid/hid_core.h @@ -6,6 +6,7 @@ #include <memory> +#include "common/common_funcs.h" #include "core/hid/hid_types.h" namespace Core::HID { diff --git a/src/core/hle/kernel/k_auto_object.h b/src/core/hle/kernel/k_auto_object.h index 165b767471..05779f2d55 100644 --- a/src/core/hle/kernel/k_auto_object.h +++ b/src/core/hle/kernel/k_auto_object.h @@ -20,8 +20,6 @@ class KernelCore; class KProcess; #define KERNEL_AUTOOBJECT_TRAITS(CLASS, BASE_CLASS) \ - YUZU_NON_COPYABLE(CLASS); \ - YUZU_NON_MOVEABLE(CLASS); \ \ private: \ friend class ::Kernel::KClassTokenGenerator; \ @@ -32,6 +30,9 @@ private: } \ \ public: \ + YUZU_NON_COPYABLE(CLASS); \ + YUZU_NON_MOVEABLE(CLASS); \ + \ using BaseClass = BASE_CLASS; \ static constexpr TypeObj GetStaticTypeObj() { \ constexpr ClassTokenType Token = ClassToken(); \ @@ -224,9 +225,9 @@ private: template <typename T> class KScopedAutoObject { +public: YUZU_NON_COPYABLE(KScopedAutoObject); -public: constexpr KScopedAutoObject() = default; constexpr KScopedAutoObject(T* o) : m_obj(o) { diff --git a/src/core/hle/kernel/k_auto_object_container.h b/src/core/hle/kernel/k_auto_object_container.h index 4eadfe99de..697cc4289c 100644 --- a/src/core/hle/kernel/k_auto_object_container.h +++ b/src/core/hle/kernel/k_auto_object_container.h @@ -16,13 +16,12 @@ class KernelCore; class KProcess; class KAutoObjectWithListContainer { +public: YUZU_NON_COPYABLE(KAutoObjectWithListContainer); YUZU_NON_MOVEABLE(KAutoObjectWithListContainer); -public: using ListType = boost::intrusive::rbtree<KAutoObjectWithList>; -public: class ListAccessor : public KScopedLightLock { public: explicit ListAccessor(KAutoObjectWithListContainer* container) @@ -48,7 +47,6 @@ public: friend class ListAccessor; -public: KAutoObjectWithListContainer(KernelCore& kernel) : m_lock(kernel), m_object_list() {} void Initialize() {} diff --git a/src/core/hle/kernel/k_handle_table.h b/src/core/hle/kernel/k_handle_table.h index 4b114ec2fd..87004a0f91 100644 --- a/src/core/hle/kernel/k_handle_table.h +++ b/src/core/hle/kernel/k_handle_table.h @@ -22,13 +22,12 @@ namespace Kernel { class KernelCore; class KHandleTable { +public: YUZU_NON_COPYABLE(KHandleTable); YUZU_NON_MOVEABLE(KHandleTable); -public: static constexpr size_t MaxTableSize = 1024; -public: explicit KHandleTable(KernelCore& kernel_); ~KHandleTable(); diff --git a/src/core/hle/kernel/k_memory_manager.h b/src/core/hle/kernel/k_memory_manager.h index abd6c8ace2..17c7690f1a 100644 --- a/src/core/hle/kernel/k_memory_manager.h +++ b/src/core/hle/kernel/k_memory_manager.h @@ -8,6 +8,7 @@ #include <mutex> #include <tuple> +#include "common/common_funcs.h" #include "common/common_types.h" #include "core/hle/kernel/k_page_heap.h" #include "core/hle/result.h" @@ -20,8 +21,11 @@ namespace Kernel { class KPageLinkedList; -class KMemoryManager final : NonCopyable { +class KMemoryManager final { public: + YUZU_NON_COPYABLE(KMemoryManager); + YUZU_NON_MOVEABLE(KMemoryManager); + enum class Pool : u32 { Application = 0, Applet = 1, @@ -88,26 +92,13 @@ public: } private: - class Impl final : NonCopyable { - private: - using RefCount = u16; - - private: - KPageHeap heap; - Pool pool{}; - + class Impl final { public: - static std::size_t CalculateManagementOverheadSize(std::size_t region_size); - - static constexpr std::size_t CalculateOptimizedProcessOverheadSize( - std::size_t region_size) { - return (Common::AlignUp((region_size / PageSize), Common::BitSize<u64>()) / - Common::BitSize<u64>()) * - sizeof(u64); - } + YUZU_NON_COPYABLE(Impl); + YUZU_NON_MOVEABLE(Impl); - public: Impl() = default; + ~Impl() = default; std::size_t Initialize(Pool new_pool, u64 start_address, u64 end_address); @@ -130,6 +121,21 @@ private: constexpr VAddr GetEndAddress() const { return heap.GetEndAddress(); } + + static std::size_t CalculateManagementOverheadSize(std::size_t region_size); + + static constexpr std::size_t CalculateOptimizedProcessOverheadSize( + std::size_t region_size) { + return (Common::AlignUp((region_size / PageSize), Common::BitSize<u64>()) / + Common::BitSize<u64>()) * + sizeof(u64); + } + + private: + using RefCount = u16; + + KPageHeap heap; + Pool pool{}; }; private: diff --git a/src/core/hle/kernel/k_memory_region.h b/src/core/hle/kernel/k_memory_region.h index 90ab8fd625..e9bdf4e595 100644 --- a/src/core/hle/kernel/k_memory_region.h +++ b/src/core/hle/kernel/k_memory_region.h @@ -5,6 +5,7 @@ #pragma once #include "common/assert.h" +#include "common/common_funcs.h" #include "common/common_types.h" #include "common/intrusive_red_black_tree.h" #include "core/hle/kernel/k_memory_region_type.h" @@ -13,11 +14,13 @@ namespace Kernel { class KMemoryRegionAllocator; -class KMemoryRegion final : public Common::IntrusiveRedBlackTreeBaseNode<KMemoryRegion>, - NonCopyable { +class KMemoryRegion final : public Common::IntrusiveRedBlackTreeBaseNode<KMemoryRegion> { friend class KMemoryRegionTree; public: + YUZU_NON_COPYABLE(KMemoryRegion); + YUZU_NON_MOVEABLE(KMemoryRegion); + constexpr KMemoryRegion() = default; constexpr KMemoryRegion(u64 address_, u64 last_address_) : address{address_}, last_address{last_address_} {} @@ -29,6 +32,8 @@ public: : KMemoryRegion(address_, last_address_, std::numeric_limits<u64>::max(), attributes_, type_id_) {} + ~KMemoryRegion() = default; + static constexpr int Compare(const KMemoryRegion& lhs, const KMemoryRegion& rhs) { if (lhs.GetAddress() < rhs.GetAddress()) { return -1; @@ -39,16 +44,6 @@ public: } } -private: - constexpr void Reset(u64 a, u64 la, u64 p, u32 r, u32 t) { - address = a; - pair_address = p; - last_address = la; - attributes = r; - type_id = t; - } - -public: constexpr u64 GetAddress() const { return address; } @@ -108,6 +103,14 @@ public: } private: + constexpr void Reset(u64 a, u64 la, u64 p, u32 r, u32 t) { + address = a; + pair_address = p; + last_address = la; + attributes = r; + type_id = t; + } + u64 address{}; u64 last_address{}; u64 pair_address{}; @@ -115,8 +118,25 @@ private: u32 type_id{}; }; -class KMemoryRegionTree final : NonCopyable { +class KMemoryRegionTree final { +private: + using TreeType = + Common::IntrusiveRedBlackTreeBaseTraits<KMemoryRegion>::TreeType<KMemoryRegion>; + public: + YUZU_NON_COPYABLE(KMemoryRegionTree); + YUZU_NON_MOVEABLE(KMemoryRegionTree); + + using value_type = TreeType::value_type; + using size_type = TreeType::size_type; + using difference_type = TreeType::difference_type; + using pointer = TreeType::pointer; + using const_pointer = TreeType::const_pointer; + using reference = TreeType::reference; + using const_reference = TreeType::const_reference; + using iterator = TreeType::iterator; + using const_iterator = TreeType::const_iterator; + struct DerivedRegionExtents { const KMemoryRegion* first_region{}; const KMemoryRegion* last_region{}; @@ -140,29 +160,9 @@ public: } }; -private: - using TreeType = - Common::IntrusiveRedBlackTreeBaseTraits<KMemoryRegion>::TreeType<KMemoryRegion>; - -public: - using value_type = TreeType::value_type; - using size_type = TreeType::size_type; - using difference_type = TreeType::difference_type; - using pointer = TreeType::pointer; - using const_pointer = TreeType::const_pointer; - using reference = TreeType::reference; - using const_reference = TreeType::const_reference; - using iterator = TreeType::iterator; - using const_iterator = TreeType::const_iterator; - -private: - TreeType m_tree{}; - KMemoryRegionAllocator& memory_region_allocator; - -public: explicit KMemoryRegionTree(KMemoryRegionAllocator& memory_region_allocator_); + ~KMemoryRegionTree() = default; -public: KMemoryRegion* FindModifiable(u64 address) { if (auto it = this->find(KMemoryRegion(address, address, 0, 0)); it != this->end()) { return std::addressof(*it); @@ -241,7 +241,6 @@ public: return GetDerivedRegionExtents(static_cast<KMemoryRegionType>(type_id)); } -public: void InsertDirectly(u64 address, u64 last_address, u32 attr = 0, u32 type_id = 0); bool Insert(u64 address, size_t size, u32 type_id, u32 new_attr = 0, u32 old_attr = 0); @@ -252,7 +251,6 @@ public: return this->GetRandomAlignedRegion(size + 2 * guard_size, alignment, type_id) + guard_size; } -public: // Iterator accessors. iterator begin() { return m_tree.begin(); @@ -322,13 +320,21 @@ public: iterator nfind(const_reference ref) const { return m_tree.nfind(ref); } + +private: + TreeType m_tree{}; + KMemoryRegionAllocator& memory_region_allocator; }; -class KMemoryRegionAllocator final : NonCopyable { +class KMemoryRegionAllocator final { public: + YUZU_NON_COPYABLE(KMemoryRegionAllocator); + YUZU_NON_MOVEABLE(KMemoryRegionAllocator); + static constexpr size_t MaxMemoryRegions = 200; constexpr KMemoryRegionAllocator() = default; + constexpr ~KMemoryRegionAllocator() = default; template <typename... Args> KMemoryRegion* Allocate(Args&&... args) { diff --git a/src/core/hle/kernel/k_page_heap.h b/src/core/hle/kernel/k_page_heap.h index 8d9f305230..a65aa28a04 100644 --- a/src/core/hle/kernel/k_page_heap.h +++ b/src/core/hle/kernel/k_page_heap.h @@ -8,14 +8,44 @@ #include <vector> #include "common/alignment.h" +#include "common/common_funcs.h" #include "common/common_types.h" #include "core/hle/kernel/k_page_bitmap.h" #include "core/hle/kernel/memory_types.h" namespace Kernel { -class KPageHeap final : NonCopyable { +class KPageHeap final { public: + YUZU_NON_COPYABLE(KPageHeap); + YUZU_NON_MOVEABLE(KPageHeap); + + KPageHeap() = default; + ~KPageHeap() = default; + + constexpr VAddr GetAddress() const { + return heap_address; + } + constexpr std::size_t GetSize() const { + return heap_size; + } + constexpr VAddr GetEndAddress() const { + return GetAddress() + GetSize(); + } + constexpr std::size_t GetPageOffset(VAddr block) const { + return (block - GetAddress()) / PageSize; + } + + void Initialize(VAddr heap_address, std::size_t heap_size, std::size_t metadata_size); + VAddr AllocateBlock(s32 index, bool random); + void Free(VAddr addr, std::size_t num_pages); + + void UpdateUsedSize() { + used_size = heap_size - (GetNumFreePages() * PageSize); + } + + static std::size_t CalculateManagementOverheadSize(std::size_t region_size); + static constexpr s32 GetAlignedBlockIndex(std::size_t num_pages, std::size_t align_pages) { const auto target_pages{std::max(num_pages, align_pages)}; for (std::size_t i = 0; i < NumMemoryBlockPageShifts; i++) { @@ -45,21 +75,13 @@ public: } private: - static constexpr std::size_t NumMemoryBlockPageShifts{7}; - static constexpr std::array<std::size_t, NumMemoryBlockPageShifts> MemoryBlockPageShifts{ - 0xC, 0x10, 0x15, 0x16, 0x19, 0x1D, 0x1E, - }; - - class Block final : NonCopyable { - private: - KPageBitmap bitmap; - VAddr heap_address{}; - uintptr_t end_offset{}; - std::size_t block_shift{}; - std::size_t next_block_shift{}; - + class Block final { public: + YUZU_NON_COPYABLE(Block); + YUZU_NON_MOVEABLE(Block); + Block() = default; + ~Block() = default; constexpr std::size_t GetShift() const { return block_shift; @@ -129,7 +151,6 @@ private: return heap_address + (offset << GetShift()); } - public: static constexpr std::size_t CalculateManagementOverheadSize(std::size_t region_size, std::size_t cur_block_shift, std::size_t next_block_shift) { @@ -139,35 +160,15 @@ private: return KPageBitmap::CalculateManagementOverheadSize( (align * 2 + Common::AlignUp(region_size, align)) / cur_block_size); } - }; - -public: - KPageHeap() = default; - - constexpr VAddr GetAddress() const { - return heap_address; - } - constexpr std::size_t GetSize() const { - return heap_size; - } - constexpr VAddr GetEndAddress() const { - return GetAddress() + GetSize(); - } - constexpr std::size_t GetPageOffset(VAddr block) const { - return (block - GetAddress()) / PageSize; - } - void Initialize(VAddr heap_address, std::size_t heap_size, std::size_t metadata_size); - VAddr AllocateBlock(s32 index, bool random); - void Free(VAddr addr, std::size_t num_pages); - - void UpdateUsedSize() { - used_size = heap_size - (GetNumFreePages() * PageSize); - } - - static std::size_t CalculateManagementOverheadSize(std::size_t region_size); + private: + KPageBitmap bitmap; + VAddr heap_address{}; + uintptr_t end_offset{}; + std::size_t block_shift{}; + std::size_t next_block_shift{}; + }; -private: constexpr std::size_t GetNumFreePages() const { std::size_t num_free{}; @@ -180,6 +181,11 @@ private: void FreeBlock(VAddr block, s32 index); + static constexpr std::size_t NumMemoryBlockPageShifts{7}; + static constexpr std::array<std::size_t, NumMemoryBlockPageShifts> MemoryBlockPageShifts{ + 0xC, 0x10, 0x15, 0x16, 0x19, 0x1D, 0x1E, + }; + VAddr heap_address{}; std::size_t heap_size{}; std::size_t used_size{}; diff --git a/src/core/hle/kernel/k_page_table.cpp b/src/core/hle/kernel/k_page_table.cpp index 2ebbc08197..912853e5c2 100644 --- a/src/core/hle/kernel/k_page_table.cpp +++ b/src/core/hle/kernel/k_page_table.cpp @@ -61,7 +61,10 @@ constexpr std::size_t GetSizeInRange(const KMemoryInfo& info, VAddr start, VAddr } // namespace -KPageTable::KPageTable(Core::System& system_) : system{system_} {} +KPageTable::KPageTable(Core::System& system_) + : general_lock{system_.Kernel()}, map_physical_memory_lock{system_.Kernel()}, system{system_} {} + +KPageTable::~KPageTable() = default; ResultCode KPageTable::InitializeForProcess(FileSys::ProgramAddressSpaceType as_type, bool enable_aslr, VAddr code_addr, @@ -282,7 +285,7 @@ ResultCode KPageTable::MapProcessCode(VAddr addr, std::size_t num_pages, KMemory R_UNLESS(this->CanContain(addr, size, state), ResultInvalidCurrentMemory); // Lock the table. - std::lock_guard lock{page_table_lock}; + KScopedLightLock lk(general_lock); // Verify that the destination memory is unmapped. R_TRY(this->CheckMemoryState(addr, size, KMemoryState::All, KMemoryState::Free, @@ -300,7 +303,7 @@ ResultCode KPageTable::MapProcessCode(VAddr addr, std::size_t num_pages, KMemory } ResultCode KPageTable::MapCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size) { - std::lock_guard lock{page_table_lock}; + KScopedLightLock lk(general_lock); const std::size_t num_pages{size / PageSize}; @@ -337,7 +340,7 @@ ResultCode KPageTable::MapCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t } ResultCode KPageTable::UnmapCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size) { - std::lock_guard lock{page_table_lock}; + KScopedLightLock lk(general_lock); if (!size) { return ResultSuccess; @@ -371,7 +374,7 @@ ResultCode KPageTable::UnmapCodeMemory(VAddr dst_addr, VAddr src_addr, std::size ResultCode KPageTable::UnmapProcessMemory(VAddr dst_addr, std::size_t size, KPageTable& src_page_table, VAddr src_addr) { - std::lock_guard lock{page_table_lock}; + KScopedLightLock lk(general_lock); const std::size_t num_pages{size / PageSize}; @@ -399,10 +402,10 @@ ResultCode KPageTable::UnmapProcessMemory(VAddr dst_addr, std::size_t size, ResultCode KPageTable::MapPhysicalMemory(VAddr addr, std::size_t size) { // Lock the physical memory lock. - std::lock_guard phys_lk(map_physical_memory_lock); + KScopedLightLock map_phys_mem_lk(map_physical_memory_lock); // Lock the table. - std::lock_guard lock{page_table_lock}; + KScopedLightLock lk(general_lock); std::size_t mapped_size{}; const VAddr end_addr{addr + size}; @@ -478,7 +481,11 @@ ResultCode KPageTable::MapPhysicalMemory(VAddr addr, std::size_t size) { } ResultCode KPageTable::UnmapPhysicalMemory(VAddr addr, std::size_t size) { - std::lock_guard lock{page_table_lock}; + // Lock the physical memory lock. + KScopedLightLock map_phys_mem_lk(map_physical_memory_lock); + + // Lock the table. + KScopedLightLock lk(general_lock); const VAddr end_addr{addr + size}; ResultCode result{ResultSuccess}; @@ -540,7 +547,7 @@ ResultCode KPageTable::UnmapPhysicalMemory(VAddr addr, std::size_t size) { } ResultCode KPageTable::MapMemory(VAddr dst_addr, VAddr src_addr, std::size_t size) { - std::lock_guard lock{page_table_lock}; + KScopedLightLock lk(general_lock); KMemoryState src_state{}; CASCADE_CODE(CheckMemoryState( @@ -579,7 +586,7 @@ ResultCode KPageTable::MapMemory(VAddr dst_addr, VAddr src_addr, std::size_t siz } ResultCode KPageTable::UnmapMemory(VAddr dst_addr, VAddr src_addr, std::size_t size) { - std::lock_guard lock{page_table_lock}; + KScopedLightLock lk(general_lock); KMemoryState src_state{}; CASCADE_CODE(CheckMemoryState( @@ -622,6 +629,8 @@ ResultCode KPageTable::UnmapMemory(VAddr dst_addr, VAddr src_addr, std::size_t s ResultCode KPageTable::MapPages(VAddr addr, const KPageLinkedList& page_linked_list, KMemoryPermission perm) { + ASSERT(this->IsLockedByCurrentThread()); + VAddr cur_addr{addr}; for (const auto& node : page_linked_list.Nodes()) { @@ -650,7 +659,7 @@ ResultCode KPageTable::MapPages(VAddr address, KPageLinkedList& page_linked_list R_UNLESS(this->CanContain(address, size, state), ResultInvalidCurrentMemory); // Lock the table. - std::lock_guard lock{page_table_lock}; + KScopedLightLock lk(general_lock); // Check the memory state. R_TRY(this->CheckMemoryState(address, size, KMemoryState::All, KMemoryState::Free, @@ -667,6 +676,8 @@ ResultCode KPageTable::MapPages(VAddr address, KPageLinkedList& page_linked_list } ResultCode KPageTable::UnmapPages(VAddr addr, const KPageLinkedList& page_linked_list) { + ASSERT(this->IsLockedByCurrentThread()); + VAddr cur_addr{addr}; for (const auto& node : page_linked_list.Nodes()) { @@ -691,7 +702,7 @@ ResultCode KPageTable::UnmapPages(VAddr addr, KPageLinkedList& page_linked_list, R_UNLESS(this->Contains(addr, size), ResultInvalidCurrentMemory); // Lock the table. - std::lock_guard lock{page_table_lock}; + KScopedLightLock lk(general_lock); // Check the memory state. R_TRY(this->CheckMemoryState(addr, size, KMemoryState::All, state, KMemoryPermission::None, @@ -712,7 +723,7 @@ ResultCode KPageTable::SetProcessMemoryPermission(VAddr addr, std::size_t size, const size_t num_pages = size / PageSize; // Lock the table. - std::lock_guard lock{page_table_lock}; + KScopedLightLock lk(general_lock); // Verify we can change the memory permission. KMemoryState old_state; @@ -766,7 +777,7 @@ ResultCode KPageTable::SetProcessMemoryPermission(VAddr addr, std::size_t size, } KMemoryInfo KPageTable::QueryInfoImpl(VAddr addr) { - std::lock_guard lock{page_table_lock}; + KScopedLightLock lk(general_lock); return block_manager->FindBlock(addr).GetMemoryInfo(); } @@ -781,7 +792,7 @@ KMemoryInfo KPageTable::QueryInfo(VAddr addr) { } ResultCode KPageTable::ReserveTransferMemory(VAddr addr, std::size_t size, KMemoryPermission perm) { - std::lock_guard lock{page_table_lock}; + KScopedLightLock lk(general_lock); KMemoryState state{}; KMemoryAttribute attribute{}; @@ -799,7 +810,7 @@ ResultCode KPageTable::ReserveTransferMemory(VAddr addr, std::size_t size, KMemo } ResultCode KPageTable::ResetTransferMemory(VAddr addr, std::size_t size) { - std::lock_guard lock{page_table_lock}; + KScopedLightLock lk(general_lock); KMemoryState state{}; @@ -818,7 +829,7 @@ ResultCode KPageTable::SetMemoryPermission(VAddr addr, std::size_t size, const size_t num_pages = size / PageSize; // Lock the table. - std::lock_guard lock{page_table_lock}; + KScopedLightLock lk(general_lock); // Verify we can change the memory permission. KMemoryState old_state; @@ -847,7 +858,7 @@ ResultCode KPageTable::SetMemoryAttribute(VAddr addr, std::size_t size, u32 mask KMemoryAttribute::SetMask); // Lock the table. - std::lock_guard lock{page_table_lock}; + KScopedLightLock lk(general_lock); // Verify we can change the memory attribute. KMemoryState old_state; @@ -878,7 +889,7 @@ ResultCode KPageTable::SetMemoryAttribute(VAddr addr, std::size_t size, u32 mask ResultCode KPageTable::SetMaxHeapSize(std::size_t size) { // Lock the table. - std::lock_guard lock{page_table_lock}; + KScopedLightLock lk(general_lock); // Only process page tables are allowed to set heap size. ASSERT(!this->IsKernel()); @@ -889,15 +900,15 @@ ResultCode KPageTable::SetMaxHeapSize(std::size_t size) { } ResultCode KPageTable::SetHeapSize(VAddr* out, std::size_t size) { - // Lock the physical memory lock. - std::lock_guard phys_lk(map_physical_memory_lock); + // Lock the physical memory mutex. + KScopedLightLock map_phys_mem_lk(map_physical_memory_lock); // Try to perform a reduction in heap, instead of an extension. VAddr cur_address{}; std::size_t allocation_size{}; { // Lock the table. - std::lock_guard lk(page_table_lock); + KScopedLightLock lk(general_lock); // Validate that setting heap size is possible at all. R_UNLESS(!is_kernel, ResultOutOfMemory); @@ -962,7 +973,7 @@ ResultCode KPageTable::SetHeapSize(VAddr* out, std::size_t size) { // Map the pages. { // Lock the table. - std::lock_guard lk(page_table_lock); + KScopedLightLock lk(general_lock); // Ensure that the heap hasn't changed since we began executing. ASSERT(cur_address == current_heap_end); @@ -1004,7 +1015,7 @@ ResultVal<VAddr> KPageTable::AllocateAndMapMemory(std::size_t needed_num_pages, bool is_map_only, VAddr region_start, std::size_t region_num_pages, KMemoryState state, KMemoryPermission perm, PAddr map_addr) { - std::lock_guard lock{page_table_lock}; + KScopedLightLock lk(general_lock); if (!CanContain(region_start, region_num_pages * PageSize, state)) { return ResultInvalidCurrentMemory; @@ -1035,7 +1046,7 @@ ResultVal<VAddr> KPageTable::AllocateAndMapMemory(std::size_t needed_num_pages, } ResultCode KPageTable::LockForDeviceAddressSpace(VAddr addr, std::size_t size) { - std::lock_guard lock{page_table_lock}; + KScopedLightLock lk(general_lock); KMemoryPermission perm{}; if (const ResultCode result{CheckMemoryState( @@ -1058,7 +1069,7 @@ ResultCode KPageTable::LockForDeviceAddressSpace(VAddr addr, std::size_t size) { } ResultCode KPageTable::UnlockForDeviceAddressSpace(VAddr addr, std::size_t size) { - std::lock_guard lock{page_table_lock}; + KScopedLightLock lk(general_lock); KMemoryPermission perm{}; if (const ResultCode result{CheckMemoryState( @@ -1081,7 +1092,7 @@ ResultCode KPageTable::UnlockForDeviceAddressSpace(VAddr addr, std::size_t size) } ResultCode KPageTable::LockForCodeMemory(VAddr addr, std::size_t size) { - std::lock_guard lock{page_table_lock}; + KScopedLightLock lk(general_lock); KMemoryPermission new_perm = KMemoryPermission::NotMapped | KMemoryPermission::KernelReadWrite; @@ -1108,7 +1119,7 @@ ResultCode KPageTable::LockForCodeMemory(VAddr addr, std::size_t size) { } ResultCode KPageTable::UnlockForCodeMemory(VAddr addr, std::size_t size) { - std::lock_guard lock{page_table_lock}; + KScopedLightLock lk(general_lock); KMemoryPermission new_perm = KMemoryPermission::UserReadWrite; diff --git a/src/core/hle/kernel/k_page_table.h b/src/core/hle/kernel/k_page_table.h index 60ae9b9e8f..c98887d34c 100644 --- a/src/core/hle/kernel/k_page_table.h +++ b/src/core/hle/kernel/k_page_table.h @@ -5,11 +5,12 @@ #pragma once #include <memory> -#include <mutex> +#include "common/common_funcs.h" #include "common/common_types.h" #include "common/page_table.h" #include "core/file_sys/program_metadata.h" +#include "core/hle/kernel/k_light_lock.h" #include "core/hle/kernel/k_memory_block.h" #include "core/hle/kernel/k_memory_manager.h" #include "core/hle/result.h" @@ -22,9 +23,13 @@ namespace Kernel { class KMemoryBlockManager; -class KPageTable final : NonCopyable { +class KPageTable final { public: + YUZU_NON_COPYABLE(KPageTable); + YUZU_NON_MOVEABLE(KPageTable); + explicit KPageTable(Core::System& system_); + ~KPageTable(); ResultCode InitializeForProcess(FileSys::ProgramAddressSpaceType as_type, bool enable_aslr, VAddr code_addr, std::size_t code_size, @@ -142,11 +147,12 @@ private: } bool IsLockedByCurrentThread() const { - return true; + return general_lock.IsLockedByCurrentThread(); } - std::recursive_mutex page_table_lock; - std::mutex map_physical_memory_lock; + mutable KLightLock general_lock; + mutable KLightLock map_physical_memory_lock; + std::unique_ptr<KMemoryBlockManager> block_manager; public: @@ -205,7 +211,7 @@ public: return alias_code_region_end - alias_code_region_start; } size_t GetNormalMemorySize() { - std::lock_guard lk(page_table_lock); + KScopedLightLock lk(general_lock); return GetHeapSize() + mapped_physical_memory_size; } constexpr std::size_t GetAddressSpaceWidth() const { @@ -247,7 +253,9 @@ public: constexpr bool IsInsideASLRRegion(VAddr address, std::size_t size) const { return !IsOutsideASLRRegion(address, size); } - constexpr PAddr GetPhysicalAddr(VAddr addr) { + + PAddr GetPhysicalAddr(VAddr addr) { + ASSERT(IsLockedByCurrentThread()); const auto backing_addr = page_table_impl.backing_addr[addr >> PageBits]; ASSERT(backing_addr); return backing_addr + addr; diff --git a/src/core/hle/kernel/k_slab_heap.h b/src/core/hle/kernel/k_slab_heap.h index 0ad74b0a06..05c0bec9cb 100644 --- a/src/core/hle/kernel/k_slab_heap.h +++ b/src/core/hle/kernel/k_slab_heap.h @@ -7,6 +7,7 @@ #include <atomic> #include "common/assert.h" +#include "common/common_funcs.h" #include "common/common_types.h" namespace Kernel { @@ -15,13 +16,17 @@ class KernelCore; namespace impl { -class KSlabHeapImpl final : NonCopyable { +class KSlabHeapImpl final { public: + YUZU_NON_COPYABLE(KSlabHeapImpl); + YUZU_NON_MOVEABLE(KSlabHeapImpl); + struct Node { Node* next{}; }; constexpr KSlabHeapImpl() = default; + constexpr ~KSlabHeapImpl() = default; void Initialize(std::size_t size) { ASSERT(head == nullptr); @@ -64,9 +69,13 @@ private: } // namespace impl -class KSlabHeapBase : NonCopyable { +class KSlabHeapBase { public: + YUZU_NON_COPYABLE(KSlabHeapBase); + YUZU_NON_MOVEABLE(KSlabHeapBase); + constexpr KSlabHeapBase() = default; + constexpr ~KSlabHeapBase() = default; constexpr bool Contains(uintptr_t addr) const { return start <= addr && addr < end; diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 40bb893acd..4f7aebf3fd 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -2613,7 +2613,7 @@ static const FunctionDef SVC_Table_32[] = { {0x33, SvcWrap32<GetThreadContext32>, "GetThreadContext32"}, {0x34, SvcWrap32<WaitForAddress32>, "WaitForAddress32"}, {0x35, SvcWrap32<SignalToAddress32>, "SignalToAddress32"}, - {0x36, nullptr, "Unknown"}, + {0x36, SvcWrap32<SynchronizePreemptionState>, "SynchronizePreemptionState32"}, {0x37, nullptr, "Unknown"}, {0x38, nullptr, "Unknown"}, {0x39, nullptr, "Unknown"}, diff --git a/src/core/hle/service/vi/display/vi_display.h b/src/core/hle/service/vi/display/vi_display.h index 0979fc421f..329f4ba865 100644 --- a/src/core/hle/service/vi/display/vi_display.h +++ b/src/core/hle/service/vi/display/vi_display.h @@ -28,10 +28,10 @@ class Layer; /// Represents a single display type class Display { +public: YUZU_NON_COPYABLE(Display); YUZU_NON_MOVEABLE(Display); -public: /// Constructs a display with a given unique ID and name. /// /// @param id The unique ID for this display. diff --git a/src/core/loader/loader.h b/src/core/loader/loader.h index 7b1bac3f76..8b6b3b68f1 100644 --- a/src/core/loader/loader.h +++ b/src/core/loader/loader.h @@ -11,6 +11,7 @@ #include <utility> #include <vector> +#include "common/common_funcs.h" #include "common/common_types.h" #include "core/file_sys/control_metadata.h" #include "core/file_sys/vfs.h" @@ -139,8 +140,11 @@ std::string GetResultStatusString(ResultStatus status); std::ostream& operator<<(std::ostream& os, ResultStatus status); /// Interface for loading an application -class AppLoader : NonCopyable { +class AppLoader { public: + YUZU_NON_COPYABLE(AppLoader); + YUZU_NON_MOVEABLE(AppLoader); + struct LoadParameters { s32 main_thread_priority; u64 main_thread_stack_size; diff --git a/src/input_common/drivers/udp_client.cpp b/src/input_common/drivers/udp_client.cpp index d1cdb1ab2b..333173e3df 100644 --- a/src/input_common/drivers/udp_client.cpp +++ b/src/input_common/drivers/udp_client.cpp @@ -271,7 +271,7 @@ void UDPClient::OnPadData(Response::PadData data, std::size_t client) { const auto touch_axis_y_id = static_cast<int>(id == 0 ? PadAxes::Touch1Y : PadAxes::Touch2Y); const auto touch_button_id = - static_cast<int>(id == 0 ? PadButton::Touch1 : PadButton::touch2); + static_cast<int>(id == 0 ? PadButton::Touch1 : PadButton::Touch2); // TODO: Use custom calibration per device const Common::ParamPackage touch_param(Settings::values.touch_device.GetValue()); @@ -319,6 +319,9 @@ void UDPClient::OnPadData(Response::PadData data, std::size_t client) { SetButton(identifier, button, button_status); } + SetButton(identifier, static_cast<int>(PadButton::Home), data.home != 0); + SetButton(identifier, static_cast<int>(PadButton::TouchHardPress), data.touch_hard_press != 0); + SetBattery(identifier, GetBatteryLevel(data.info.battery)); } @@ -393,7 +396,7 @@ std::vector<Common::ParamPackage> UDPClient::GetInputDevices() const { ButtonMapping UDPClient::GetButtonMappingForDevice(const Common::ParamPackage& params) { // This list excludes any button that can't be really mapped - static constexpr std::array<std::pair<Settings::NativeButton::Values, PadButton>, 18> + static constexpr std::array<std::pair<Settings::NativeButton::Values, PadButton>, 20> switch_to_dsu_button = { std::pair{Settings::NativeButton::A, PadButton::Circle}, {Settings::NativeButton::B, PadButton::Cross}, @@ -413,6 +416,8 @@ ButtonMapping UDPClient::GetButtonMappingForDevice(const Common::ParamPackage& p {Settings::NativeButton::SR, PadButton::R2}, {Settings::NativeButton::LStick, PadButton::L3}, {Settings::NativeButton::RStick, PadButton::R3}, + {Settings::NativeButton::Home, PadButton::Home}, + {Settings::NativeButton::Screenshot, PadButton::TouchHardPress}, }; if (!params.Has("guid") || !params.Has("port") || !params.Has("pad")) { return {}; @@ -517,6 +522,12 @@ Common::Input::ButtonNames UDPClient::GetUIButtonName(const Common::ParamPackage return Common::Input::ButtonNames::Share; case PadButton::Options: return Common::Input::ButtonNames::Options; + case PadButton::Home: + return Common::Input::ButtonNames::Home; + case PadButton::Touch1: + case PadButton::Touch2: + case PadButton::TouchHardPress: + return Common::Input::ButtonNames::Touch; default: return Common::Input::ButtonNames::Undefined; } diff --git a/src/input_common/drivers/udp_client.h b/src/input_common/drivers/udp_client.h index 30d7c26821..e9c1781397 100644 --- a/src/input_common/drivers/udp_client.h +++ b/src/input_common/drivers/udp_client.h @@ -84,7 +84,9 @@ private: Cross = 0x4000, Square = 0x8000, Touch1 = 0x10000, - touch2 = 0x20000, + Touch2 = 0x20000, + Home = 0x40000, + TouchHardPress = 0x80000, }; enum class PadAxes : u8 { diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h index b480078563..5efbe4e6fe 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h @@ -372,6 +372,8 @@ void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 poin ScalarU32 value); void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, Register value); +void EmitSharedAtomicExchange32x2(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, + Register value); void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, ScalarU32 value); void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, @@ -412,6 +414,24 @@ void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& b ScalarU32 offset, Register value); void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, Register value); +void EmitStorageAtomicIAdd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicSMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicUMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicSMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicUMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicAnd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicOr32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicXor32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); +void EmitStorageAtomicExchange32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + ScalarU32 offset, Register value); void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, ScalarF32 value); void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, @@ -448,6 +468,17 @@ void EmitGlobalAtomicAnd64(EmitContext& ctx); void EmitGlobalAtomicOr64(EmitContext& ctx); void EmitGlobalAtomicXor64(EmitContext& ctx); void EmitGlobalAtomicExchange64(EmitContext& ctx); +void EmitGlobalAtomicIAdd32x2(EmitContext& ctx); +void EmitGlobalAtomicSMin32x2(EmitContext& ctx); +void EmitGlobalAtomicUMin32x2(EmitContext& ctx); +void EmitGlobalAtomicSMax32x2(EmitContext& ctx); +void EmitGlobalAtomicUMax32x2(EmitContext& ctx); +void EmitGlobalAtomicInc32x2(EmitContext& ctx); +void EmitGlobalAtomicDec32x2(EmitContext& ctx); +void EmitGlobalAtomicAnd32x2(EmitContext& ctx); +void EmitGlobalAtomicOr32x2(EmitContext& ctx); +void EmitGlobalAtomicXor32x2(EmitContext& ctx); +void EmitGlobalAtomicExchange32x2(EmitContext& ctx); void EmitGlobalAtomicAddF32(EmitContext& ctx); void EmitGlobalAtomicAddF16x2(EmitContext& ctx); void EmitGlobalAtomicAddF32x2(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp index f135b67f53..f0fd94a288 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp @@ -311,6 +311,13 @@ void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 poin ctx.LongAdd("ATOMS.EXCH.U64 {}.x,{},shared_mem[{}];", inst, value, pointer_offset); } +void EmitSharedAtomicExchange32x2([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] ScalarU32 pointer_offset, + [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, ScalarU32 value) { Atom(ctx, inst, binding, offset, value, "ADD", "U32"); @@ -411,6 +418,62 @@ void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Val Atom(ctx, inst, binding, offset, value, "EXCH", "U64"); } +void EmitStorageAtomicIAdd32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitStorageAtomicSMin32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitStorageAtomicUMin32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitStorageAtomicSMax32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitStorageAtomicUMax32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitStorageAtomicAnd32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitStorageAtomicOr32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitStorageAtomicXor32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitStorageAtomicExchange32x2([[maybe_unused]] EmitContext& ctx, + [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, + [[maybe_unused]] ScalarU32 offset, + [[maybe_unused]] Register value) { + throw NotImplementedException("GLASM instruction"); +} + void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, ScalarF32 value) { Atom(ctx, inst, binding, offset, value, "ADD", "F32"); @@ -537,6 +600,50 @@ void EmitGlobalAtomicExchange64(EmitContext&) { throw NotImplementedException("GLASM instruction"); } +void EmitGlobalAtomicIAdd32x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicSMin32x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicUMin32x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicSMax32x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicUMax32x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicInc32x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicDec32x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicAnd32x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicOr32x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicXor32x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + +void EmitGlobalAtomicExchange32x2(EmitContext&) { + throw NotImplementedException("GLASM instruction"); +} + void EmitGlobalAtomicAddF32(EmitContext&) { throw NotImplementedException("GLASM instruction"); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp index dc377b053a..a409a7ab37 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp @@ -105,6 +105,13 @@ void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_vi pointer_offset, value, pointer_offset, value); } +void EmitSharedAtomicExchange32x2(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value) { + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); + ctx.AddU32x2("{}=uvec2(smem[{}>>2],smem[({}+4)>>2]);", inst, pointer_offset, pointer_offset); + ctx.Add("smem[{}>>2]={}.x;smem[({}+4)>>2]={}.y;", pointer_offset, value, pointer_offset, value); +} + void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { ctx.AddU32("{}=atomicAdd({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), @@ -265,6 +272,97 @@ void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Val ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value); } +void EmitStorageAtomicIAdd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); + ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset)); + ctx.Add("{}_ssbo{}[{}>>2]+={}.x;{}_ssbo{}[({}>>2)+1]+={}.y;", ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset), value, ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicSMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); + ctx.AddU32x2("{}=ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset)); + ctx.Add("for(int " + "i=0;i<2;++i){{{}_ssbo{}[({}>>2)+i]=uint(min(int({}_ssbo{}[({}>>2)+i]),int({}[i])));}}", + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicUMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); + ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset)); + ctx.Add("for(int i=0;i<2;++i){{ " + "{}_ssbo{}[({}>>2)+i]=min({}_ssbo{}[({}>>2)+i],{}[i]);}}", + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicSMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); + ctx.AddU32x2("{}=ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset)); + ctx.Add("for(int " + "i=0;i<2;++i){{{}_ssbo{}[({}>>2)+i]=uint(max(int({}_ssbo{}[({}>>2)+i]),int({}[i])));}}", + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicUMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); + ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]);", inst, ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, binding.U32(), + ctx.var_alloc.Consume(offset)); + ctx.Add("for(int i=0;i<2;++i){{{}_ssbo{}[({}>>2)+i]=max({}_ssbo{}[({}>>2)+i],{}[i]);}}", + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, + binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicAnd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to 32x2"); + ctx.AddU32x2("{}=uvec2(atomicAnd({}_ssbo{}[{}>>2],{}.x),atomicAnd({}_ssbo{}[({}>>2)+1],{}.y));", + inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicOr32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to 32x2"); + ctx.AddU32x2("{}=uvec2(atomicOr({}_ssbo{}[{}>>2],{}.x),atomicOr({}_ssbo{}[({}>>2)+1],{}.y));", + inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicXor32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to 32x2"); + ctx.AddU32x2("{}=uvec2(atomicXor({}_ssbo{}[{}>>2],{}.x),atomicXor({}_ssbo{}[({}>>2)+1],{}.y));", + inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value); +} + +void EmitStorageAtomicExchange32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to 32x2"); + ctx.AddU32x2("{}=uvec2(atomicExchange({}_ssbo{}[{}>>2],{}.x),atomicExchange({}_ssbo{}[({}>>2)+" + "1],{}.y));", + inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, + ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value); +} + void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value) { SsboCasFunctionF32(ctx, inst, binding, offset, value, "CasFloatAdd"); @@ -388,6 +486,50 @@ void EmitGlobalAtomicExchange64(EmitContext&) { throw NotImplementedException("GLSL Instrucion"); } +void EmitGlobalAtomicIAdd32x2(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicSMin32x2(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicUMin32x2(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicSMax32x2(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicUMax32x2(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicInc32x2(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicDec32x2(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicAnd32x2(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicOr32x2(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicXor32x2(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + +void EmitGlobalAtomicExchange32x2(EmitContext&) { + throw NotImplementedException("GLSL Instrucion"); +} + void EmitGlobalAtomicAddF32(EmitContext&) { throw NotImplementedException("GLSL Instrucion"); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 6cabbc717d..704baddc9a 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -442,6 +442,8 @@ void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_vi std::string_view value); void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, std::string_view value); +void EmitSharedAtomicExchange32x2(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, + std::string_view value); void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value); void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, @@ -482,6 +484,24 @@ void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& b const IR::Value& offset, std::string_view value); void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value); +void EmitStorageAtomicIAdd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicSMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicUMin32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicSMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicUMax32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicAnd32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicOr32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicXor32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); +void EmitStorageAtomicExchange32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value); void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, std::string_view value); void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, @@ -518,6 +538,17 @@ void EmitGlobalAtomicAnd64(EmitContext& ctx); void EmitGlobalAtomicOr64(EmitContext& ctx); void EmitGlobalAtomicXor64(EmitContext& ctx); void EmitGlobalAtomicExchange64(EmitContext& ctx); +void EmitGlobalAtomicIAdd32x2(EmitContext& ctx); +void EmitGlobalAtomicSMin32x2(EmitContext& ctx); +void EmitGlobalAtomicUMin32x2(EmitContext& ctx); +void EmitGlobalAtomicSMax32x2(EmitContext& ctx); +void EmitGlobalAtomicUMax32x2(EmitContext& ctx); +void EmitGlobalAtomicInc32x2(EmitContext& ctx); +void EmitGlobalAtomicDec32x2(EmitContext& ctx); +void EmitGlobalAtomicAnd32x2(EmitContext& ctx); +void EmitGlobalAtomicOr32x2(EmitContext& ctx); +void EmitGlobalAtomicXor32x2(EmitContext& ctx); +void EmitGlobalAtomicExchange32x2(EmitContext& ctx); void EmitGlobalAtomicAddF32(EmitContext& ctx); void EmitGlobalAtomicAddF16x2(EmitContext& ctx); void EmitGlobalAtomicAddF32x2(EmitContext& ctx); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index 46ba52a254..d3cbb14a9a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -82,6 +82,17 @@ Id StorageAtomicU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& ctx.OpStore(pointer, ctx.OpBitcast(ctx.U32[2], result)); return original_value; } + +Id StorageAtomicU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value, + Id (Sirit::Module::*non_atomic_func)(Id, Id, Id)) { + LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); + const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2, + binding, offset, sizeof(u32[2]))}; + const Id original_value{ctx.OpLoad(ctx.U32[2], pointer)}; + const Id result{(ctx.*non_atomic_func)(ctx.U32[2], value, original_value)}; + ctx.OpStore(pointer, result); + return original_value; +} } // Anonymous namespace Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value) { @@ -141,7 +152,7 @@ Id EmitSharedAtomicExchange64(EmitContext& ctx, Id offset, Id value) { const auto [scope, semantics]{AtomicArgs(ctx)}; return ctx.OpAtomicExchange(ctx.U64, pointer, scope, semantics, value); } - LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); + LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); const Id pointer_1{SharedPointer(ctx, offset, 0)}; const Id pointer_2{SharedPointer(ctx, offset, 1)}; const Id value_1{ctx.OpLoad(ctx.U32[1], pointer_1)}; @@ -152,6 +163,18 @@ Id EmitSharedAtomicExchange64(EmitContext& ctx, Id offset, Id value) { return ctx.OpBitcast(ctx.U64, ctx.OpCompositeConstruct(ctx.U32[2], value_1, value_2)); } +Id EmitSharedAtomicExchange32x2(EmitContext& ctx, Id offset, Id value) { + LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); + const Id pointer_1{SharedPointer(ctx, offset, 0)}; + const Id pointer_2{SharedPointer(ctx, offset, 1)}; + const Id value_1{ctx.OpLoad(ctx.U32[1], pointer_1)}; + const Id value_2{ctx.OpLoad(ctx.U32[1], pointer_2)}; + const Id new_vector{ctx.OpBitcast(ctx.U32[2], value)}; + ctx.OpStore(pointer_1, ctx.OpCompositeExtract(ctx.U32[1], new_vector, 0U)); + ctx.OpStore(pointer_2, ctx.OpCompositeExtract(ctx.U32[1], new_vector, 1U)); + return ctx.OpCompositeConstruct(ctx.U32[2], value_1, value_2); +} + Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicIAdd); @@ -275,6 +298,56 @@ Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const return original; } +Id EmitStorageAtomicIAdd32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU32x2(ctx, binding, offset, value, &Sirit::Module::OpIAdd); +} + +Id EmitStorageAtomicSMin32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU32x2(ctx, binding, offset, value, &Sirit::Module::OpSMin); +} + +Id EmitStorageAtomicUMin32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU32x2(ctx, binding, offset, value, &Sirit::Module::OpUMin); +} + +Id EmitStorageAtomicSMax32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU32x2(ctx, binding, offset, value, &Sirit::Module::OpSMax); +} + +Id EmitStorageAtomicUMax32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU32x2(ctx, binding, offset, value, &Sirit::Module::OpUMax); +} + +Id EmitStorageAtomicAnd32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU32x2(ctx, binding, offset, value, &Sirit::Module::OpBitwiseAnd); +} + +Id EmitStorageAtomicOr32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU32x2(ctx, binding, offset, value, &Sirit::Module::OpBitwiseOr); +} + +Id EmitStorageAtomicXor32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value) { + return StorageAtomicU32x2(ctx, binding, offset, value, &Sirit::Module::OpBitwiseXor); +} + +Id EmitStorageAtomicExchange32x2(EmitContext& ctx, const IR::Value& binding, + const IR::Value& offset, Id value) { + LOG_WARNING(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); + const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2, + binding, offset, sizeof(u32[2]))}; + const Id original{ctx.OpLoad(ctx.U32[2], pointer)}; + ctx.OpStore(pointer, value); + return original; +} + Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { const Id ssbo{ctx.ssbos[binding.U32()].U32}; @@ -418,6 +491,50 @@ Id EmitGlobalAtomicExchange64(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } +Id EmitGlobalAtomicIAdd32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicSMin32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicUMin32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicSMax32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicUMax32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicInc32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicDec32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicAnd32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicOr32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicXor32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + +Id EmitGlobalAtomicExchange32x2(EmitContext&) { + throw NotImplementedException("SPIR-V Instruction"); +} + Id EmitGlobalAtomicAddF32(EmitContext&) { throw NotImplementedException("SPIR-V Instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 887112deb4..f263b41b01 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -335,6 +335,7 @@ Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value); Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value); Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value); Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value); +Id EmitSharedAtomicExchange32x2(EmitContext& ctx, Id pointer_offset, Id value); Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value); Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, @@ -375,6 +376,24 @@ Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR:: Id value); Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value); +Id EmitStorageAtomicIAdd32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMin32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMin32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicSMax32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicUMax32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicAnd32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicOr32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicXor32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value); +Id EmitStorageAtomicExchange32x2(EmitContext& ctx, const IR::Value& binding, + const IR::Value& offset, Id value); Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value); Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, @@ -411,6 +430,17 @@ Id EmitGlobalAtomicAnd64(EmitContext& ctx); Id EmitGlobalAtomicOr64(EmitContext& ctx); Id EmitGlobalAtomicXor64(EmitContext& ctx); Id EmitGlobalAtomicExchange64(EmitContext& ctx); +Id EmitGlobalAtomicIAdd32x2(EmitContext& ctx); +Id EmitGlobalAtomicSMin32x2(EmitContext& ctx); +Id EmitGlobalAtomicUMin32x2(EmitContext& ctx); +Id EmitGlobalAtomicSMax32x2(EmitContext& ctx); +Id EmitGlobalAtomicUMax32x2(EmitContext& ctx); +Id EmitGlobalAtomicInc32x2(EmitContext& ctx); +Id EmitGlobalAtomicDec32x2(EmitContext& ctx); +Id EmitGlobalAtomicAnd32x2(EmitContext& ctx); +Id EmitGlobalAtomicOr32x2(EmitContext& ctx); +Id EmitGlobalAtomicXor32x2(EmitContext& ctx); +Id EmitGlobalAtomicExchange32x2(EmitContext& ctx); Id EmitGlobalAtomicAddF32(EmitContext& ctx); Id EmitGlobalAtomicAddF16x2(EmitContext& ctx); Id EmitGlobalAtomicAddF32x2(EmitContext& ctx); diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 97e2bf6af4..631446cf7d 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp @@ -118,6 +118,7 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::SharedAtomicXor32: case Opcode::SharedAtomicExchange32: case Opcode::SharedAtomicExchange64: + case Opcode::SharedAtomicExchange32x2: case Opcode::GlobalAtomicIAdd32: case Opcode::GlobalAtomicSMin32: case Opcode::GlobalAtomicUMin32: @@ -138,6 +139,15 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::GlobalAtomicOr64: case Opcode::GlobalAtomicXor64: case Opcode::GlobalAtomicExchange64: + case Opcode::GlobalAtomicIAdd32x2: + case Opcode::GlobalAtomicSMin32x2: + case Opcode::GlobalAtomicUMin32x2: + case Opcode::GlobalAtomicSMax32x2: + case Opcode::GlobalAtomicUMax32x2: + case Opcode::GlobalAtomicAnd32x2: + case Opcode::GlobalAtomicOr32x2: + case Opcode::GlobalAtomicXor32x2: + case Opcode::GlobalAtomicExchange32x2: case Opcode::GlobalAtomicAddF32: case Opcode::GlobalAtomicAddF16x2: case Opcode::GlobalAtomicAddF32x2: @@ -165,6 +175,15 @@ bool Inst::MayHaveSideEffects() const noexcept { case Opcode::StorageAtomicOr64: case Opcode::StorageAtomicXor64: case Opcode::StorageAtomicExchange64: + case Opcode::StorageAtomicIAdd32x2: + case Opcode::StorageAtomicSMin32x2: + case Opcode::StorageAtomicUMin32x2: + case Opcode::StorageAtomicSMax32x2: + case Opcode::StorageAtomicUMax32x2: + case Opcode::StorageAtomicAnd32x2: + case Opcode::StorageAtomicOr32x2: + case Opcode::StorageAtomicXor32x2: + case Opcode::StorageAtomicExchange32x2: case Opcode::StorageAtomicAddF32: case Opcode::StorageAtomicAddF16x2: case Opcode::StorageAtomicAddF32x2: diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index b94ce74061..efb6bfac32 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc @@ -341,6 +341,7 @@ OPCODE(SharedAtomicOr32, U32, U32, OPCODE(SharedAtomicXor32, U32, U32, U32, ) OPCODE(SharedAtomicExchange32, U32, U32, U32, ) OPCODE(SharedAtomicExchange64, U64, U32, U64, ) +OPCODE(SharedAtomicExchange32x2, U32x2, U32, U32x2, ) OPCODE(GlobalAtomicIAdd32, U32, U64, U32, ) OPCODE(GlobalAtomicSMin32, U32, U64, U32, ) @@ -362,6 +363,15 @@ OPCODE(GlobalAtomicAnd64, U64, U64, OPCODE(GlobalAtomicOr64, U64, U64, U64, ) OPCODE(GlobalAtomicXor64, U64, U64, U64, ) OPCODE(GlobalAtomicExchange64, U64, U64, U64, ) +OPCODE(GlobalAtomicIAdd32x2, U32x2, U32x2, U32x2, ) +OPCODE(GlobalAtomicSMin32x2, U32x2, U32x2, U32x2, ) +OPCODE(GlobalAtomicUMin32x2, U32x2, U32x2, U32x2, ) +OPCODE(GlobalAtomicSMax32x2, U32x2, U32x2, U32x2, ) +OPCODE(GlobalAtomicUMax32x2, U32x2, U32x2, U32x2, ) +OPCODE(GlobalAtomicAnd32x2, U32x2, U32x2, U32x2, ) +OPCODE(GlobalAtomicOr32x2, U32x2, U32x2, U32x2, ) +OPCODE(GlobalAtomicXor32x2, U32x2, U32x2, U32x2, ) +OPCODE(GlobalAtomicExchange32x2, U32x2, U32x2, U32x2, ) OPCODE(GlobalAtomicAddF32, F32, U64, F32, ) OPCODE(GlobalAtomicAddF16x2, U32, U64, F16x2, ) OPCODE(GlobalAtomicAddF32x2, U32, U64, F32x2, ) @@ -390,6 +400,15 @@ OPCODE(StorageAtomicAnd64, U64, U32, OPCODE(StorageAtomicOr64, U64, U32, U32, U64, ) OPCODE(StorageAtomicXor64, U64, U32, U32, U64, ) OPCODE(StorageAtomicExchange64, U64, U32, U32, U64, ) +OPCODE(StorageAtomicIAdd32x2, U32x2, U32, U32, U32x2, ) +OPCODE(StorageAtomicSMin32x2, U32x2, U32, U32, U32x2, ) +OPCODE(StorageAtomicUMin32x2, U32x2, U32, U32, U32x2, ) +OPCODE(StorageAtomicSMax32x2, U32x2, U32, U32, U32x2, ) +OPCODE(StorageAtomicUMax32x2, U32x2, U32, U32, U32x2, ) +OPCODE(StorageAtomicAnd32x2, U32x2, U32, U32, U32x2, ) +OPCODE(StorageAtomicOr32x2, U32x2, U32, U32, U32x2, ) +OPCODE(StorageAtomicXor32x2, U32x2, U32, U32, U32x2, ) +OPCODE(StorageAtomicExchange32x2, U32x2, U32, U32, U32x2, ) OPCODE(StorageAtomicAddF32, F32, U32, U32, F32, ) OPCODE(StorageAtomicAddF16x2, U32, U32, U32, F16x2, ) OPCODE(StorageAtomicAddF32x2, U32, U32, U32, F32x2, ) diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index b6a20f904e..bfd2ae650e 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -360,6 +360,15 @@ void VisitUsages(Info& info, IR::Inst& inst) { case IR::Opcode::GlobalAtomicOr64: case IR::Opcode::GlobalAtomicXor64: case IR::Opcode::GlobalAtomicExchange64: + case IR::Opcode::GlobalAtomicIAdd32x2: + case IR::Opcode::GlobalAtomicSMin32x2: + case IR::Opcode::GlobalAtomicUMin32x2: + case IR::Opcode::GlobalAtomicSMax32x2: + case IR::Opcode::GlobalAtomicUMax32x2: + case IR::Opcode::GlobalAtomicAnd32x2: + case IR::Opcode::GlobalAtomicOr32x2: + case IR::Opcode::GlobalAtomicXor32x2: + case IR::Opcode::GlobalAtomicExchange32x2: case IR::Opcode::GlobalAtomicAddF32: case IR::Opcode::GlobalAtomicAddF16x2: case IR::Opcode::GlobalAtomicAddF32x2: @@ -597,6 +606,15 @@ void VisitUsages(Info& info, IR::Inst& inst) { break; case IR::Opcode::LoadStorage64: case IR::Opcode::WriteStorage64: + case IR::Opcode::StorageAtomicIAdd32x2: + case IR::Opcode::StorageAtomicSMin32x2: + case IR::Opcode::StorageAtomicUMin32x2: + case IR::Opcode::StorageAtomicSMax32x2: + case IR::Opcode::StorageAtomicUMax32x2: + case IR::Opcode::StorageAtomicAnd32x2: + case IR::Opcode::StorageAtomicOr32x2: + case IR::Opcode::StorageAtomicXor32x2: + case IR::Opcode::StorageAtomicExchange32x2: info.used_storage_buffer_types |= IR::Type::U32x2; break; case IR::Opcode::LoadStorage128: diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 4197b0095d..38592afd0f 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -92,6 +92,15 @@ bool IsGlobalMemory(const IR::Inst& inst) { case IR::Opcode::GlobalAtomicOr64: case IR::Opcode::GlobalAtomicXor64: case IR::Opcode::GlobalAtomicExchange64: + case IR::Opcode::GlobalAtomicIAdd32x2: + case IR::Opcode::GlobalAtomicSMin32x2: + case IR::Opcode::GlobalAtomicUMin32x2: + case IR::Opcode::GlobalAtomicSMax32x2: + case IR::Opcode::GlobalAtomicUMax32x2: + case IR::Opcode::GlobalAtomicAnd32x2: + case IR::Opcode::GlobalAtomicOr32x2: + case IR::Opcode::GlobalAtomicXor32x2: + case IR::Opcode::GlobalAtomicExchange32x2: case IR::Opcode::GlobalAtomicAddF32: case IR::Opcode::GlobalAtomicAddF16x2: case IR::Opcode::GlobalAtomicAddF32x2: @@ -135,6 +144,15 @@ bool IsGlobalMemoryWrite(const IR::Inst& inst) { case IR::Opcode::GlobalAtomicOr64: case IR::Opcode::GlobalAtomicXor64: case IR::Opcode::GlobalAtomicExchange64: + case IR::Opcode::GlobalAtomicIAdd32x2: + case IR::Opcode::GlobalAtomicSMin32x2: + case IR::Opcode::GlobalAtomicUMin32x2: + case IR::Opcode::GlobalAtomicSMax32x2: + case IR::Opcode::GlobalAtomicUMax32x2: + case IR::Opcode::GlobalAtomicAnd32x2: + case IR::Opcode::GlobalAtomicOr32x2: + case IR::Opcode::GlobalAtomicXor32x2: + case IR::Opcode::GlobalAtomicExchange32x2: case IR::Opcode::GlobalAtomicAddF32: case IR::Opcode::GlobalAtomicAddF16x2: case IR::Opcode::GlobalAtomicAddF32x2: @@ -199,6 +217,8 @@ IR::Opcode GlobalToStorage(IR::Opcode opcode) { return IR::Opcode::StorageAtomicOr32; case IR::Opcode::GlobalAtomicXor32: return IR::Opcode::StorageAtomicXor32; + case IR::Opcode::GlobalAtomicExchange32: + return IR::Opcode::StorageAtomicExchange32; case IR::Opcode::GlobalAtomicIAdd64: return IR::Opcode::StorageAtomicIAdd64; case IR::Opcode::GlobalAtomicSMin64: @@ -215,10 +235,26 @@ IR::Opcode GlobalToStorage(IR::Opcode opcode) { return IR::Opcode::StorageAtomicOr64; case IR::Opcode::GlobalAtomicXor64: return IR::Opcode::StorageAtomicXor64; - case IR::Opcode::GlobalAtomicExchange32: - return IR::Opcode::StorageAtomicExchange32; case IR::Opcode::GlobalAtomicExchange64: return IR::Opcode::StorageAtomicExchange64; + case IR::Opcode::GlobalAtomicIAdd32x2: + return IR::Opcode::StorageAtomicIAdd32x2; + case IR::Opcode::GlobalAtomicSMin32x2: + return IR::Opcode::StorageAtomicSMin32x2; + case IR::Opcode::GlobalAtomicUMin32x2: + return IR::Opcode::StorageAtomicUMin32x2; + case IR::Opcode::GlobalAtomicSMax32x2: + return IR::Opcode::StorageAtomicSMax32x2; + case IR::Opcode::GlobalAtomicUMax32x2: + return IR::Opcode::StorageAtomicUMax32x2; + case IR::Opcode::GlobalAtomicAnd32x2: + return IR::Opcode::StorageAtomicAnd32x2; + case IR::Opcode::GlobalAtomicOr32x2: + return IR::Opcode::StorageAtomicOr32x2; + case IR::Opcode::GlobalAtomicXor32x2: + return IR::Opcode::StorageAtomicXor32x2; + case IR::Opcode::GlobalAtomicExchange32x2: + return IR::Opcode::StorageAtomicExchange32x2; case IR::Opcode::GlobalAtomicAddF32: return IR::Opcode::StorageAtomicAddF32; case IR::Opcode::GlobalAtomicAddF16x2: @@ -454,6 +490,15 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, case IR::Opcode::GlobalAtomicOr64: case IR::Opcode::GlobalAtomicXor64: case IR::Opcode::GlobalAtomicExchange64: + case IR::Opcode::GlobalAtomicIAdd32x2: + case IR::Opcode::GlobalAtomicSMin32x2: + case IR::Opcode::GlobalAtomicUMin32x2: + case IR::Opcode::GlobalAtomicSMax32x2: + case IR::Opcode::GlobalAtomicUMax32x2: + case IR::Opcode::GlobalAtomicAnd32x2: + case IR::Opcode::GlobalAtomicOr32x2: + case IR::Opcode::GlobalAtomicXor32x2: + case IR::Opcode::GlobalAtomicExchange32x2: case IR::Opcode::GlobalAtomicAddF32: case IR::Opcode::GlobalAtomicAddF16x2: case IR::Opcode::GlobalAtomicAddF32x2: diff --git a/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp index e80d3d1d94..c2654cd9b6 100644 --- a/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp +++ b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp @@ -199,6 +199,26 @@ void Lower(IR::Block& block, IR::Inst& inst) { return ShiftRightLogical64To32(block, inst); case IR::Opcode::ShiftRightArithmetic64: return ShiftRightArithmetic64To32(block, inst); + case IR::Opcode::SharedAtomicExchange64: + return inst.ReplaceOpcode(IR::Opcode::SharedAtomicExchange32x2); + case IR::Opcode::GlobalAtomicIAdd64: + return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicIAdd32x2); + case IR::Opcode::GlobalAtomicSMin64: + return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicSMin32x2); + case IR::Opcode::GlobalAtomicUMin64: + return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicUMin32x2); + case IR::Opcode::GlobalAtomicSMax64: + return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicSMax32x2); + case IR::Opcode::GlobalAtomicUMax64: + return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicUMax32x2); + case IR::Opcode::GlobalAtomicAnd64: + return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicAnd32x2); + case IR::Opcode::GlobalAtomicOr64: + return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicOr32x2); + case IR::Opcode::GlobalAtomicXor64: + return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicXor32x2); + case IR::Opcode::GlobalAtomicExchange64: + return inst.ReplaceOpcode(IR::Opcode::GlobalAtomicExchange32x2); default: break; } diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 048dba4f33..fa26eb8b0e 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -131,6 +131,8 @@ public: void DownloadMemory(VAddr cpu_addr, u64 size); + bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<u8> inlined_buffer); + void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size); void DisableGraphicsUniformBuffer(size_t stage, u32 index); @@ -808,6 +810,8 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { return; } MICROPROFILE_SCOPE(GPU_DownloadMemory); + const bool is_accuracy_normal = + Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal; boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads; u64 total_size_bytes = 0; @@ -819,6 +823,9 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { buffer.ForEachDownloadRangeAndClear( cpu_addr, size, [&](u64 range_offset, u64 range_size) { + if (is_accuracy_normal) { + return; + } const VAddr buffer_addr = buffer.CpuAddr(); const auto add_download = [&](VAddr start, VAddr end) { const u64 new_offset = start - buffer_addr; @@ -1417,10 +1424,8 @@ void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 s const IntervalType base_interval{cpu_addr, cpu_addr + size}; common_ranges.add(base_interval); - const bool is_accuracy_high = - Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High; const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); - if (!is_async && !is_accuracy_high) { + if (!is_async) { return; } uncommitted_ranges.add(base_interval); @@ -1644,6 +1649,42 @@ void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, } template <class P> +bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size, + std::span<u8> inlined_buffer) { + const bool is_dirty = IsRegionRegistered(dest_address, copy_size); + if (!is_dirty) { + return false; + } + if (!IsRegionGpuModified(dest_address, copy_size)) { + return false; + } + + const IntervalType subtract_interval{dest_address, dest_address + copy_size}; + ClearDownload(subtract_interval); + common_ranges.subtract(subtract_interval); + + BufferId buffer_id = FindBuffer(dest_address, static_cast<u32>(copy_size)); + auto& buffer = slot_buffers[buffer_id]; + SynchronizeBuffer(buffer, dest_address, static_cast<u32>(copy_size)); + + if constexpr (USE_MEMORY_MAPS) { + std::array copies{BufferCopy{ + .src_offset = 0, + .dst_offset = buffer.Offset(dest_address), + .size = copy_size, + }}; + auto upload_staging = runtime.UploadStagingBuffer(copy_size); + u8* const src_pointer = upload_staging.mapped_span.data(); + std::memcpy(src_pointer, inlined_buffer.data(), copy_size); + runtime.CopyBuffer(buffer, upload_staging.buffer, copies); + } else { + buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size)); + } + + return true; +} + +template <class P> void BufferCache<P>::DownloadBufferMemory(Buffer& buffer) { DownloadBufferMemory(buffer, buffer.CpuAddr(), buffer.SizeBytes()); } diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp index 71d7e1473f..351b110feb 100644 --- a/src/video_core/engines/engine_upload.cpp +++ b/src/video_core/engines/engine_upload.cpp @@ -7,6 +7,7 @@ #include "common/assert.h" #include "video_core/engines/engine_upload.h" #include "video_core/memory_manager.h" +#include "video_core/rasterizer_interface.h" #include "video_core/textures/decoders.h" namespace Tegra::Engines::Upload { @@ -16,6 +17,10 @@ State::State(MemoryManager& memory_manager_, Registers& regs_) State::~State() = default; +void State::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { + rasterizer = rasterizer_; +} + void State::ProcessExec(const bool is_linear_) { write_offset = 0; copy_size = regs.line_length_in * regs.line_count; @@ -32,7 +37,7 @@ void State::ProcessData(const u32 data, const bool is_last_call) { } const GPUVAddr address{regs.dest.Address()}; if (is_linear) { - memory_manager.WriteBlock(address, inner_buffer.data(), copy_size); + rasterizer->AccelerateInlineToMemory(address, copy_size, inner_buffer); } else { UNIMPLEMENTED_IF(regs.dest.z != 0); UNIMPLEMENTED_IF(regs.dest.depth != 1); diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h index 1c7f1effac..c9c5ec8c3d 100644 --- a/src/video_core/engines/engine_upload.h +++ b/src/video_core/engines/engine_upload.h @@ -12,6 +12,10 @@ namespace Tegra { class MemoryManager; } +namespace VideoCore { +class RasterizerInterface; +} + namespace Tegra::Engines::Upload { struct Registers { @@ -60,6 +64,9 @@ public: void ProcessExec(bool is_linear_); void ProcessData(u32 data, bool is_last_call); + /// Binds a rasterizer to this engine. + void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); + private: u32 write_offset = 0; u32 copy_size = 0; @@ -68,6 +75,7 @@ private: bool is_linear = false; Registers& regs; MemoryManager& memory_manager; + VideoCore::RasterizerInterface* rasterizer = nullptr; }; } // namespace Tegra::Engines::Upload diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 492b4c5a38..5a1c120765 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -22,6 +22,7 @@ KeplerCompute::~KeplerCompute() = default; void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { rasterizer = rasterizer_; + upload_state.BindRasterizer(rasterizer); } void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) { diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 5605511575..8aed16caa9 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp @@ -19,6 +19,10 @@ KeplerMemory::KeplerMemory(Core::System& system_, MemoryManager& memory_manager) KeplerMemory::~KeplerMemory() = default; +void KeplerMemory::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { + upload_state.BindRasterizer(rasterizer_); +} + void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call) { ASSERT_MSG(method < Regs::NUM_REGS, "Invalid KeplerMemory register, increase the size of the Regs structure"); diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h index 0d8ea09a9f..949e2fae1e 100644 --- a/src/video_core/engines/kepler_memory.h +++ b/src/video_core/engines/kepler_memory.h @@ -22,6 +22,10 @@ namespace Tegra { class MemoryManager; } +namespace VideoCore { +class RasterizerInterface; +} + namespace Tegra::Engines { /** @@ -38,6 +42,9 @@ public: explicit KeplerMemory(Core::System& system_, MemoryManager& memory_manager); ~KeplerMemory() override; + /// Binds a rasterizer to this engine. + void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); + /// Write the value to the register identified by method. void CallMethod(u32 method, u32 method_argument, bool is_last_call) override; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index c38ebd670a..5d6d217bbd 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -31,6 +31,7 @@ Maxwell3D::~Maxwell3D() = default; void Maxwell3D::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { rasterizer = rasterizer_; + upload_state.BindRasterizer(rasterizer_); } void Maxwell3D::InitializeRegisterDefaults() { diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index f22342dfb8..dc9df6c8bc 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1557,7 +1557,8 @@ private: static constexpr u32 null_cb_data = 0xFFFFFFFF; struct CBDataState { - std::array<std::array<u32, 0x4000>, 16> buffer; + static constexpr size_t inline_size = 0x4000; + std::array<std::array<u32, inline_size>, 16> buffer; u32 current{null_cb_data}; u32 id{null_cb_data}; u32 start_pos{}; diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 705765c998..ba9ba082f2 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -59,6 +59,7 @@ struct GPU::Impl { maxwell_3d->BindRasterizer(rasterizer); fermi_2d->BindRasterizer(rasterizer); kepler_compute->BindRasterizer(rasterizer); + kepler_memory->BindRasterizer(rasterizer); maxwell_dma->BindRasterizer(rasterizer); } @@ -502,8 +503,13 @@ struct GPU::Impl { case BufferMethods::SemaphoreAddressHigh: case BufferMethods::SemaphoreAddressLow: case BufferMethods::SemaphoreSequence: + break; case BufferMethods::UnkCacheFlush: + rasterizer->SyncGuestHost(); + break; case BufferMethods::WrcacheFlush: + rasterizer->SignalReference(); + break; case BufferMethods::FenceValue: break; case BufferMethods::RefCnt: @@ -513,7 +519,7 @@ struct GPU::Impl { ProcessFenceActionMethod(); break; case BufferMethods::WaitForInterrupt: - ProcessWaitForInterruptMethod(); + rasterizer->WaitForIdle(); break; case BufferMethods::SemaphoreTrigger: { ProcessSemaphoreTriggerMethod(); diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 38d8d9d746..61bfe47c7e 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -143,6 +143,8 @@ public: [[nodiscard]] GPUVAddr Allocate(std::size_t size, std::size_t align); void Unmap(GPUVAddr gpu_addr, std::size_t size); + void FlushRegion(GPUVAddr gpu_addr, size_t size) const; + private: [[nodiscard]] PageEntry GetPageEntry(GPUVAddr gpu_addr) const; void SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size = page_size); @@ -153,8 +155,6 @@ private: void TryLockPage(PageEntry page_entry, std::size_t size); void TryUnlockPage(PageEntry page_entry, std::size_t size); - void FlushRegion(GPUVAddr gpu_addr, size_t size) const; - void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, bool is_safe) const; void WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size, diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index b094fc064c..1f1f122910 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -123,6 +123,9 @@ public: [[nodiscard]] virtual Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() = 0; + virtual void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, + std::span<u8> memory) = 0; + /// Attempt to use a faster method to display the framebuffer to screen [[nodiscard]] virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) { diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index bb204454e4..c5f9740805 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -5,9 +5,10 @@ #pragma once #include <atomic> +#include <functional> #include <memory> -#include <optional> +#include "common/common_funcs.h" #include "common/common_types.h" #include "core/frontend/emu_window.h" #include "video_core/gpu.h" @@ -28,8 +29,11 @@ struct RendererSettings { Layout::FramebufferLayout screenshot_framebuffer_layout; }; -class RendererBase : NonCopyable { +class RendererBase { public: + YUZU_NON_COPYABLE(RendererBase); + YUZU_NON_MOVEABLE(RendererBase); + explicit RendererBase(Core::Frontend::EmuWindow& window, std::unique_ptr<Core::Frontend::GraphicsContext> context); virtual ~RendererBase(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 9b516c64ff..142412a8e3 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -484,6 +484,28 @@ Tegra::Engines::AccelerateDMAInterface& RasterizerOpenGL::AccessAccelerateDMA() return accelerate_dma; } +void RasterizerOpenGL::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, + std::span<u8> memory) { + auto cpu_addr = gpu_memory.GpuToCpuAddress(address); + if (!cpu_addr) [[unlikely]] { + gpu_memory.WriteBlock(address, memory.data(), copy_size); + return; + } + gpu_memory.WriteBlockUnsafe(address, memory.data(), copy_size); + { + std::unique_lock<std::mutex> lock{buffer_cache.mutex}; + if (!buffer_cache.InlineMemory(*cpu_addr, copy_size, memory)) { + buffer_cache.WriteMemory(*cpu_addr, copy_size); + } + } + { + std::scoped_lock lock_texture{texture_cache.mutex}; + texture_cache.WriteMemory(*cpu_addr, copy_size); + } + shader_cache.InvalidateRegion(*cpu_addr, copy_size); + query_cache.InvalidateRegion(*cpu_addr, copy_size); +} + bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) { if (framebuffer_addr == 0) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index d0397b7454..98f6fd3429 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -106,6 +106,8 @@ public: const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Config& copy_config) override; Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; + void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, + std::span<u8> memory) override; bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; void LoadDiskResources(u64 title_id, std::stop_token stop_loading, diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index b2d5bfd3ba..84e07f8bd4 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -7,12 +7,14 @@ #include <string_view> #include <utility> #include <glad/glad.h> -#include "common/common_types.h" +#include "common/common_funcs.h" namespace OpenGL { -class OGLRenderbuffer : private NonCopyable { +class OGLRenderbuffer final { public: + YUZU_NON_COPYABLE(OGLRenderbuffer); + OGLRenderbuffer() = default; OGLRenderbuffer(OGLRenderbuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {} @@ -36,8 +38,10 @@ public: GLuint handle = 0; }; -class OGLTexture : private NonCopyable { +class OGLTexture final { public: + YUZU_NON_COPYABLE(OGLTexture); + OGLTexture() = default; OGLTexture(OGLTexture&& o) noexcept : handle(std::exchange(o.handle, 0)) {} @@ -61,8 +65,10 @@ public: GLuint handle = 0; }; -class OGLTextureView : private NonCopyable { +class OGLTextureView final { public: + YUZU_NON_COPYABLE(OGLTextureView); + OGLTextureView() = default; OGLTextureView(OGLTextureView&& o) noexcept : handle(std::exchange(o.handle, 0)) {} @@ -86,8 +92,10 @@ public: GLuint handle = 0; }; -class OGLSampler : private NonCopyable { +class OGLSampler final { public: + YUZU_NON_COPYABLE(OGLSampler); + OGLSampler() = default; OGLSampler(OGLSampler&& o) noexcept : handle(std::exchange(o.handle, 0)) {} @@ -111,8 +119,10 @@ public: GLuint handle = 0; }; -class OGLShader : private NonCopyable { +class OGLShader final { public: + YUZU_NON_COPYABLE(OGLShader); + OGLShader() = default; OGLShader(OGLShader&& o) noexcept : handle(std::exchange(o.handle, 0)) {} @@ -132,8 +142,10 @@ public: GLuint handle = 0; }; -class OGLProgram : private NonCopyable { +class OGLProgram final { public: + YUZU_NON_COPYABLE(OGLProgram); + OGLProgram() = default; OGLProgram(OGLProgram&& o) noexcept : handle(std::exchange(o.handle, 0)) {} @@ -154,8 +166,10 @@ public: GLuint handle = 0; }; -class OGLAssemblyProgram : private NonCopyable { +class OGLAssemblyProgram final { public: + YUZU_NON_COPYABLE(OGLAssemblyProgram); + OGLAssemblyProgram() = default; OGLAssemblyProgram(OGLAssemblyProgram&& o) noexcept : handle(std::exchange(o.handle, 0)) {} @@ -176,8 +190,10 @@ public: GLuint handle = 0; }; -class OGLPipeline : private NonCopyable { +class OGLPipeline final { public: + YUZU_NON_COPYABLE(OGLPipeline); + OGLPipeline() = default; OGLPipeline(OGLPipeline&& o) noexcept : handle{std::exchange<GLuint>(o.handle, 0)} {} @@ -198,8 +214,10 @@ public: GLuint handle = 0; }; -class OGLBuffer : private NonCopyable { +class OGLBuffer final { public: + YUZU_NON_COPYABLE(OGLBuffer); + OGLBuffer() = default; OGLBuffer(OGLBuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {} @@ -223,8 +241,10 @@ public: GLuint handle = 0; }; -class OGLSync : private NonCopyable { +class OGLSync final { public: + YUZU_NON_COPYABLE(OGLSync); + OGLSync() = default; OGLSync(OGLSync&& o) noexcept : handle(std::exchange(o.handle, nullptr)) {} @@ -247,8 +267,10 @@ public: GLsync handle = 0; }; -class OGLFramebuffer : private NonCopyable { +class OGLFramebuffer final { public: + YUZU_NON_COPYABLE(OGLFramebuffer); + OGLFramebuffer() = default; OGLFramebuffer(OGLFramebuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {} @@ -272,8 +294,10 @@ public: GLuint handle = 0; }; -class OGLQuery : private NonCopyable { +class OGLQuery final { public: + YUZU_NON_COPYABLE(OGLQuery); + OGLQuery() = default; OGLQuery(OGLQuery&& o) noexcept : handle(std::exchange(o.handle, 0)) {} diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index fd334a1462..2227d91978 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -548,6 +548,28 @@ Tegra::Engines::AccelerateDMAInterface& RasterizerVulkan::AccessAccelerateDMA() return accelerate_dma; } +void RasterizerVulkan::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, + std::span<u8> memory) { + auto cpu_addr = gpu_memory.GpuToCpuAddress(address); + if (!cpu_addr) [[unlikely]] { + gpu_memory.WriteBlock(address, memory.data(), copy_size); + return; + } + gpu_memory.WriteBlockUnsafe(address, memory.data(), copy_size); + { + std::unique_lock<std::mutex> lock{buffer_cache.mutex}; + if (!buffer_cache.InlineMemory(*cpu_addr, copy_size, memory)) { + buffer_cache.WriteMemory(*cpu_addr, copy_size); + } + } + { + std::scoped_lock lock_texture{texture_cache.mutex}; + texture_cache.WriteMemory(*cpu_addr, copy_size); + } + pipeline_cache.InvalidateRegion(*cpu_addr, copy_size); + query_cache.InvalidateRegion(*cpu_addr, copy_size); +} + bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) { if (!framebuffer_addr) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 8668272477..5af2e275b0 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -99,6 +99,8 @@ public: const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Config& copy_config) override; Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; + void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, + std::span<u8> memory) override; bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; void LoadDiskResources(u64 title_id, std::stop_token stop_loading, diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 3bfdf41ba7..7d9d4f7ba2 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -140,12 +140,12 @@ bool VKScheduler::UpdateRescaling(bool is_rescaling) { void VKScheduler::WorkerThread(std::stop_token stop_token) { Common::SetCurrentThreadName("yuzu:VulkanWorker"); do { - if (work_queue.empty()) { - wait_cv.notify_all(); - } std::unique_ptr<CommandChunk> work; { std::unique_lock lock{work_mutex}; + if (work_queue.empty()) { + wait_cv.notify_all(); + } work_cv.wait(lock, stop_token, [this] { return !work_queue.empty(); }); if (stop_token.stop_requested()) { continue; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 1b06c92967..e69aa136bd 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -146,6 +146,7 @@ private: using FuncType = TypedCommand<T>; static_assert(sizeof(FuncType) < sizeof(data), "Lambda is too large"); + recorded_counts++; command_offset = Common::AlignUp(command_offset, alignof(FuncType)); if (command_offset > sizeof(data) - sizeof(FuncType)) { return false; @@ -167,7 +168,7 @@ private: } bool Empty() const { - return command_offset == 0; + return recorded_counts == 0; } bool HasSubmit() const { @@ -178,6 +179,7 @@ private: Command* first = nullptr; Command* last = nullptr; + size_t recorded_counts = 0; size_t command_offset = 0; bool submit = false; alignas(std::max_align_t) std::array<u8, 0x8000> data{}; diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp index 78bf90c48f..87636857d9 100644 --- a/src/video_core/shader_cache.cpp +++ b/src/video_core/shader_cache.cpp @@ -170,7 +170,7 @@ void ShaderCache::RemovePendingShaders() { marked_for_removal.clear(); if (!removed_shaders.empty()) { - RemoveShadersFromStorage(std::move(removed_shaders)); + RemoveShadersFromStorage(removed_shaders); } } @@ -213,7 +213,7 @@ void ShaderCache::UnmarkMemory(Entry* entry) { rasterizer.UpdatePagesCachedCount(addr, size, -1); } -void ShaderCache::RemoveShadersFromStorage(std::vector<ShaderInfo*> removed_shaders) { +void ShaderCache::RemoveShadersFromStorage(std::span<ShaderInfo*> removed_shaders) { // Remove them from the cache std::erase_if(storage, [&removed_shaders](const std::unique_ptr<ShaderInfo>& shader) { return std::ranges::find(removed_shaders, shader.get()) != removed_shaders.end(); diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h index 136fe294cb..8836bc8c6e 100644 --- a/src/video_core/shader_cache.h +++ b/src/video_core/shader_cache.h @@ -4,7 +4,6 @@ #pragma once -#include <algorithm> #include <array> #include <memory> #include <mutex> @@ -138,7 +137,7 @@ private: /// @param removed_shaders Shaders to be removed from the storage /// @pre invalidation_mutex is locked /// @pre lookup_mutex is locked - void RemoveShadersFromStorage(std::vector<ShaderInfo*> removed_shaders); + void RemoveShadersFromStorage(std::span<ShaderInfo*> removed_shaders); /// @brief Creates a new entry in the lookup cache and returns its pointer /// @pre lookup_mutex is locked diff --git a/src/yuzu/configuration/configure_dialog.cpp b/src/yuzu/configuration/configure_dialog.cpp index 464e7a4893..19133ccf5c 100644 --- a/src/yuzu/configuration/configure_dialog.cpp +++ b/src/yuzu/configuration/configure_dialog.cpp @@ -109,7 +109,7 @@ void ConfigureDialog::ApplyConfiguration() { ui_tab->ApplyConfiguration(); system_tab->ApplyConfiguration(); profile_tab->ApplyConfiguration(); - filesystem_tab->applyConfiguration(); + filesystem_tab->ApplyConfiguration(); input_tab->ApplyConfiguration(); hotkeys_tab->ApplyConfiguration(registry); cpu_tab->ApplyConfiguration(); diff --git a/src/yuzu/configuration/configure_filesystem.cpp b/src/yuzu/configuration/configure_filesystem.cpp index 9cb3178229..d6fb43f8b8 100644 --- a/src/yuzu/configuration/configure_filesystem.cpp +++ b/src/yuzu/configuration/configure_filesystem.cpp @@ -14,7 +14,7 @@ ConfigureFilesystem::ConfigureFilesystem(QWidget* parent) : QWidget(parent), ui(std::make_unique<Ui::ConfigureFilesystem>()) { ui->setupUi(this); - this->setConfiguration(); + SetConfiguration(); connect(ui->nand_directory_button, &QToolButton::pressed, this, [this] { SetDirectory(DirectoryTarget::NAND, ui->nand_directory_edit); }); @@ -38,7 +38,15 @@ ConfigureFilesystem::ConfigureFilesystem(QWidget* parent) ConfigureFilesystem::~ConfigureFilesystem() = default; -void ConfigureFilesystem::setConfiguration() { +void ConfigureFilesystem::changeEvent(QEvent* event) { + if (event->type() == QEvent::LanguageChange) { + RetranslateUI(); + } + + QWidget::changeEvent(event); +} + +void ConfigureFilesystem::SetConfiguration() { ui->nand_directory_edit->setText( QString::fromStdString(Common::FS::GetYuzuPathString(Common::FS::YuzuPath::NANDDir))); ui->sdmc_directory_edit->setText( @@ -60,7 +68,7 @@ void ConfigureFilesystem::setConfiguration() { UpdateEnabledControls(); } -void ConfigureFilesystem::applyConfiguration() { +void ConfigureFilesystem::ApplyConfiguration() { Common::FS::SetYuzuPath(Common::FS::YuzuPath::NANDDir, ui->nand_directory_edit->text().toStdString()); Common::FS::SetYuzuPath(Common::FS::YuzuPath::SDMCDir, @@ -143,6 +151,6 @@ void ConfigureFilesystem::UpdateEnabledControls() { !ui->gamecard_current_game->isChecked()); } -void ConfigureFilesystem::retranslateUi() { +void ConfigureFilesystem::RetranslateUI() { ui->retranslateUi(this); } diff --git a/src/yuzu/configuration/configure_filesystem.h b/src/yuzu/configuration/configure_filesystem.h index 2147cd4050..b4f9355eb5 100644 --- a/src/yuzu/configuration/configure_filesystem.h +++ b/src/yuzu/configuration/configure_filesystem.h @@ -20,11 +20,13 @@ public: explicit ConfigureFilesystem(QWidget* parent = nullptr); ~ConfigureFilesystem() override; - void applyConfiguration(); - void retranslateUi(); + void ApplyConfiguration(); private: - void setConfiguration(); + void changeEvent(QEvent* event) override; + + void RetranslateUI(); + void SetConfiguration(); enum class DirectoryTarget { NAND, diff --git a/src/yuzu/configuration/configure_hotkeys.cpp b/src/yuzu/configuration/configure_hotkeys.cpp index be10e0a31e..53e629a5eb 100644 --- a/src/yuzu/configuration/configure_hotkeys.cpp +++ b/src/yuzu/configuration/configure_hotkeys.cpp @@ -178,52 +178,52 @@ void ConfigureHotkeys::SetPollingResult(Core::HID::NpadButton button, const bool QString ConfigureHotkeys::GetButtonName(Core::HID::NpadButton button) const { Core::HID::NpadButtonState state{button}; if (state.a) { - return tr("A"); + return QStringLiteral("A"); } if (state.b) { - return tr("B"); + return QStringLiteral("B"); } if (state.x) { - return tr("X"); + return QStringLiteral("X"); } if (state.y) { - return tr("Y"); + return QStringLiteral("Y"); } if (state.l || state.right_sl || state.left_sl) { - return tr("L"); + return QStringLiteral("L"); } if (state.r || state.right_sr || state.left_sr) { - return tr("R"); + return QStringLiteral("R"); } if (state.zl) { - return tr("ZL"); + return QStringLiteral("ZL"); } if (state.zr) { - return tr("ZR"); + return QStringLiteral("ZR"); } if (state.left) { - return tr("Dpad_Left"); + return QStringLiteral("Dpad_Left"); } if (state.right) { - return tr("Dpad_Right"); + return QStringLiteral("Dpad_Right"); } if (state.up) { - return tr("Dpad_Up"); + return QStringLiteral("Dpad_Up"); } if (state.down) { - return tr("Dpad_Down"); + return QStringLiteral("Dpad_Down"); } if (state.stick_l) { - return tr("Left_Stick"); + return QStringLiteral("Left_Stick"); } if (state.stick_r) { - return tr("Right_Stick"); + return QStringLiteral("Right_Stick"); } if (state.minus) { - return tr("Minus"); + return QStringLiteral("Minus"); } if (state.plus) { - return tr("Plus"); + return QStringLiteral("Plus"); } return tr("Invalid"); } diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp index 7525042366..cc0534907a 100644 --- a/src/yuzu/configuration/configure_input_player.cpp +++ b/src/yuzu/configuration/configure_input_player.cpp @@ -102,6 +102,10 @@ QString GetButtonName(Common::Input::ButtonNames button_name) { return QObject::tr("Share"); case Common::Input::ButtonNames::Options: return QObject::tr("Options"); + case Common::Input::ButtonNames::Home: + return QObject::tr("Home"); + case Common::Input::ButtonNames::Touch: + return QObject::tr("Touch"); case Common::Input::ButtonNames::ButtonMouseWheel: return QObject::tr("Wheel", "Indicates the mouse wheel"); case Common::Input::ButtonNames::ButtonBackward: @@ -326,7 +330,7 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i connect(button, &QPushButton::clicked, [=, this] { HandleClick( button, button_id, - [=, this](Common::ParamPackage params) { + [=, this](const Common::ParamPackage& params) { emulated_controller->SetButtonParam(button_id, params); }, InputCommon::Polling::InputType::Button); @@ -392,7 +396,7 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i connect(button, &QPushButton::clicked, [=, this] { HandleClick( button, motion_id, - [=, this](Common::ParamPackage params) { + [=, this](const Common::ParamPackage& params) { emulated_controller->SetMotionParam(motion_id, params); }, InputCommon::Polling::InputType::Motion); @@ -497,10 +501,11 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i param.Set("invert_y", invert_str); emulated_controller->SetStickParam(analog_id, param); } - for (int sub_button_id = 0; sub_button_id < ANALOG_SUB_BUTTONS_NUM; - ++sub_button_id) { - analog_map_buttons[analog_id][sub_button_id]->setText( - AnalogToText(param, analog_sub_buttons[sub_button_id])); + for (int analog_sub_button_id = 0; + analog_sub_button_id < ANALOG_SUB_BUTTONS_NUM; + ++analog_sub_button_id) { + analog_map_buttons[analog_id][analog_sub_button_id]->setText( + AnalogToText(param, analog_sub_buttons[analog_sub_button_id])); } }); context_menu.exec(analog_map_buttons[analog_id][sub_button_id]->mapToGlobal( @@ -783,7 +788,7 @@ void ConfigureInputPlayer::UpdateInputDeviceCombobox() { if (devices.size() == 1) { const auto devices_it = std::find_if( input_devices.begin(), input_devices.end(), - [first_engine, first_guid, first_port, first_pad](const Common::ParamPackage param) { + [first_engine, first_guid, first_port, first_pad](const Common::ParamPackage& param) { return param.Get("engine", "") == first_engine && param.Get("guid", "") == first_guid && param.Get("port", 0) == first_port && param.Get("pad", 0) == first_pad; @@ -814,7 +819,7 @@ void ConfigureInputPlayer::UpdateInputDeviceCombobox() { if (is_engine_equal && is_port_equal) { const auto devices_it = std::find_if( input_devices.begin(), input_devices.end(), - [first_engine, first_guid, second_guid, first_port](const Common::ParamPackage param) { + [first_engine, first_guid, second_guid, first_port](const Common::ParamPackage& param) { const bool is_guid_valid = (param.Get("guid", "") == first_guid && param.Get("guid2", "") == second_guid) || @@ -1026,7 +1031,7 @@ int ConfigureInputPlayer::GetIndexFromControllerType(Core::HID::NpadStyleIndex t void ConfigureInputPlayer::UpdateInputDevices() { input_devices = input_subsystem->GetInputDevices(); ui->comboDevices->clear(); - for (auto device : input_devices) { + for (const auto& device : input_devices) { ui->comboDevices->addItem(QString::fromStdString(device.Get("display", "Unknown")), {}); } } @@ -1308,7 +1313,7 @@ void ConfigureInputPlayer::HandleClick( } button->setFocus(); - input_setter = new_input_setter; + input_setter = std::move(new_input_setter); input_subsystem->BeginMapping(type); @@ -1358,7 +1363,7 @@ bool ConfigureInputPlayer::IsInputAcceptable(const Common::ParamPackage& params) return params.Get("engine", "") == "keyboard" || params.Get("engine", "") == "mouse"; } - const auto current_input_device = input_devices[ui->comboDevices->currentIndex()]; + const auto& current_input_device = input_devices[ui->comboDevices->currentIndex()]; return params.Get("engine", "") == current_input_device.Get("engine", "") && (params.Get("guid", "") == current_input_device.Get("guid", "") || params.Get("guid", "") == current_input_device.Get("guid2", "")) && diff --git a/src/yuzu/configuration/configure_motion_touch.cpp b/src/yuzu/configuration/configure_motion_touch.cpp index 8539a5c8b6..4340de304c 100644 --- a/src/yuzu/configuration/configure_motion_touch.cpp +++ b/src/yuzu/configuration/configure_motion_touch.cpp @@ -42,23 +42,25 @@ CalibrationConfigurationDialog::CalibrationConfigurationDialog(QWidget* parent, job = std::make_unique<CalibrationConfigurationJob>( host, port, [this](CalibrationConfigurationJob::Status status) { - QString text; - switch (status) { - case CalibrationConfigurationJob::Status::Ready: - text = tr("Touch the top left corner <br>of your touchpad."); - break; - case CalibrationConfigurationJob::Status::Stage1Completed: - text = tr("Now touch the bottom right corner <br>of your touchpad."); - break; - case CalibrationConfigurationJob::Status::Completed: - text = tr("Configuration completed!"); - break; - default: - break; - } - QMetaObject::invokeMethod(this, "UpdateLabelText", Q_ARG(QString, text)); + QMetaObject::invokeMethod(this, [status, this] { + QString text; + switch (status) { + case CalibrationConfigurationJob::Status::Ready: + text = tr("Touch the top left corner <br>of your touchpad."); + break; + case CalibrationConfigurationJob::Status::Stage1Completed: + text = tr("Now touch the bottom right corner <br>of your touchpad."); + break; + case CalibrationConfigurationJob::Status::Completed: + text = tr("Configuration completed!"); + break; + default: + break; + } + UpdateLabelText(text); + }); if (status == CalibrationConfigurationJob::Status::Completed) { - QMetaObject::invokeMethod(this, "UpdateButtonText", Q_ARG(QString, tr("OK"))); + QMetaObject::invokeMethod(this, [this] { UpdateButtonText(tr("OK")); }); } }, [this](u16 min_x_, u16 min_y_, u16 max_x_, u16 max_y_) { @@ -215,11 +217,11 @@ void ConfigureMotionTouch::OnCemuhookUDPTest() { ui->udp_server->text().toStdString(), static_cast<u16>(ui->udp_port->text().toInt()), [this] { LOG_INFO(Frontend, "UDP input test success"); - QMetaObject::invokeMethod(this, "ShowUDPTestResult", Q_ARG(bool, true)); + QMetaObject::invokeMethod(this, [this] { ShowUDPTestResult(true); }); }, [this] { LOG_ERROR(Frontend, "UDP input test failed"); - QMetaObject::invokeMethod(this, "ShowUDPTestResult", Q_ARG(bool, false)); + QMetaObject::invokeMethod(this, [this] { ShowUDPTestResult(false); }); }); } diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp index 8b5c4a10ae..e3661b3902 100644 --- a/src/yuzu/game_list.cpp +++ b/src/yuzu/game_list.cpp @@ -161,7 +161,7 @@ GameListSearchField::GameListSearchField(GameList* parent) : QWidget{parent} { * @return true if the haystack contains all words of userinput */ static bool ContainsAllWords(const QString& haystack, const QString& userinput) { - const QStringList userinput_split = userinput.split(QLatin1Char{' '}, QString::SkipEmptyParts); + const QStringList userinput_split = userinput.split(QLatin1Char{' '}, Qt::SkipEmptyParts); return std::all_of(userinput_split.begin(), userinput_split.end(), [&haystack](const QString& s) { return haystack.contains(s); }); diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index d9e689d14b..556d2cdb38 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -965,6 +965,7 @@ void GMainWindow::LinkActionShortcut(QAction* action, const QString& action_name static const QString main_window = QStringLiteral("Main Window"); action->setShortcut(hotkey_registry.GetKeySequence(main_window, action_name)); action->setShortcutContext(hotkey_registry.GetShortcutContext(main_window, action_name)); + action->setAutoRepeat(false); this->addAction(action); |