diff options
Diffstat (limited to 'src')
40 files changed, 562 insertions, 420 deletions
diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp index 282f345c55..6ebed3fb0d 100644 --- a/src/audio_core/audio_renderer.cpp +++ b/src/audio_core/audio_renderer.cpp @@ -26,6 +26,18 @@ AudioRenderer::AudioRenderer(AudioRendererParameter params, QueueMixedBuffer(2); } +u32 AudioRenderer::GetSampleRate() const { + return worker_params.sample_rate; +} + +u32 AudioRenderer::GetSampleCount() const { + return worker_params.sample_count; +} + +u32 AudioRenderer::GetMixBufferCount() const { + return worker_params.mix_buffer_count; +} + std::vector<u8> AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_params) { // Copy UpdateDataHeader struct UpdateDataHeader config{}; diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h index 6950a4681c..13c5d0adc9 100644 --- a/src/audio_core/audio_renderer.h +++ b/src/audio_core/audio_renderer.h @@ -26,7 +26,7 @@ enum class PlayState : u8 { struct AudioRendererParameter { u32_le sample_rate; u32_le sample_count; - u32_le unknown_8; + u32_le mix_buffer_count; u32_le unknown_c; u32_le voice_count; u32_le sink_count; @@ -160,6 +160,9 @@ public: std::vector<u8> UpdateAudioRenderer(const std::vector<u8>& input_params); void QueueMixedBuffer(Buffer::Tag tag); void ReleaseAndQueueBuffers(); + u32 GetSampleRate() const; + u32 GetSampleCount() const; + u32 GetMixBufferCount() const; private: class VoiceState { diff --git a/src/audio_core/cubeb_sink.cpp b/src/audio_core/cubeb_sink.cpp index 1501ef1f48..5a1177d0c9 100644 --- a/src/audio_core/cubeb_sink.cpp +++ b/src/audio_core/cubeb_sink.cpp @@ -4,6 +4,7 @@ #include <algorithm> #include <cstring> +#include <mutex> #include "audio_core/cubeb_sink.h" #include "audio_core/stream.h" @@ -66,6 +67,8 @@ public: return; } + std::lock_guard lock{queue_mutex}; + queue.reserve(queue.size() + samples.size() * GetNumChannels()); if (is_6_channel) { @@ -94,6 +97,7 @@ private: u32 num_channels{}; bool is_6_channel{}; + std::mutex queue_mutex; std::vector<s16> queue; static long DataCallback(cubeb_stream* stream, void* user_data, const void* input_buffer, @@ -153,6 +157,8 @@ long SinkStreamImpl::DataCallback(cubeb_stream* stream, void* user_data, const v return {}; } + std::lock_guard lock{impl->queue_mutex}; + const size_t frames_to_write{ std::min(impl->queue.size() / impl->GetNumChannels(), static_cast<size_t>(num_frames))}; diff --git a/src/common/thread_queue_list.h b/src/common/thread_queue_list.h index 38a450d693..133122c5f4 100644 --- a/src/common/thread_queue_list.h +++ b/src/common/thread_queue_list.h @@ -16,7 +16,7 @@ struct ThreadQueueList { // (dynamically resizable) circular buffers to remove their overhead when // inserting and popping. - typedef unsigned int Priority; + using Priority = unsigned int; // Number of priority levels. (Valid levels are [0..NUM_QUEUES).) static const Priority NUM_QUEUES = N; @@ -26,9 +26,9 @@ struct ThreadQueueList { } // Only for debugging, returns priority level. - Priority contains(const T& uid) { + Priority contains(const T& uid) const { for (Priority i = 0; i < NUM_QUEUES; ++i) { - Queue& cur = queues[i]; + const Queue& cur = queues[i]; if (std::find(cur.data.cbegin(), cur.data.cend(), uid) != cur.data.cend()) { return i; } @@ -37,8 +37,8 @@ struct ThreadQueueList { return -1; } - T get_first() { - Queue* cur = first; + T get_first() const { + const Queue* cur = first; while (cur != nullptr) { if (!cur->data.empty()) { return cur->data.front(); diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp index ceb3f76835..0996f129c5 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic.cpp @@ -86,7 +86,16 @@ public: } void AddTicks(u64 ticks) override { - CoreTiming::AddTicks(ticks - num_interpreted_instructions); + // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a + // rough approximation of the amount of executed ticks in the system, it may be thrown off + // if not all cores are doing a similar amount of work. Instead of doing this, we should + // device a way so that timing is consistent across all cores without increasing the ticks 4 + // times. + u64 amortized_ticks = (ticks - num_interpreted_instructions) / Core::NUM_CPU_CORES; + // Always execute at least one tick. + amortized_ticks = std::max<u64>(amortized_ticks, 1); + + CoreTiming::AddTicks(amortized_ticks); num_interpreted_instructions = 0; } u64 GetTicksRemaining() override { diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp index 46a522fcd4..b042ee02bd 100644 --- a/src/core/core_cpu.cpp +++ b/src/core/core_cpu.cpp @@ -14,6 +14,7 @@ #include "core/core_timing.h" #include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/thread.h" +#include "core/hle/lock.h" #include "core/settings.h" namespace Core { @@ -90,6 +91,7 @@ void Cpu::RunLoop(bool tight_loop) { LOG_TRACE(Core, "Core-{} idling", core_index); if (IsMainCore()) { + // TODO(Subv): Only let CoreTiming idle if all 4 cores are idling. CoreTiming::Idle(); CoreTiming::Advance(); } @@ -125,6 +127,8 @@ void Cpu::Reschedule() { } reschedule_pending = false; + // Lock the global kernel mutex when we manipulate the HLE state + std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock); scheduler->Reschedule(); } diff --git a/src/core/core_cpu.h b/src/core/core_cpu.h index 9769529033..56cdae1947 100644 --- a/src/core/core_cpu.h +++ b/src/core/core_cpu.h @@ -79,7 +79,7 @@ private: std::shared_ptr<CpuBarrier> cpu_barrier; std::shared_ptr<Kernel::Scheduler> scheduler; - bool reschedule_pending{}; + std::atomic<bool> reschedule_pending = false; size_t core_index; }; diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index d3bb6f8188..f977d1b325 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -135,11 +135,9 @@ void ClearPendingEvents() { void ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata) { ASSERT(event_type != nullptr); s64 timeout = GetTicks() + cycles_into_future; - // If this event needs to be scheduled before the next advance(), force one early if (!is_global_timer_sane) ForceExceptionCheck(cycles_into_future); - event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type}); std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>()); } diff --git a/src/core/file_sys/card_image.cpp b/src/core/file_sys/card_image.cpp index a4823353e0..8e05b9d0eb 100644 --- a/src/core/file_sys/card_image.cpp +++ b/src/core/file_sys/card_image.cpp @@ -107,19 +107,19 @@ VirtualFile XCI::GetNCAFileByType(NCAContentType type) const { return nullptr; } -std::vector<std::shared_ptr<VfsFile>> XCI::GetFiles() const { +std::vector<VirtualFile> XCI::GetFiles() const { return {}; } -std::vector<std::shared_ptr<VfsDirectory>> XCI::GetSubdirectories() const { - return std::vector<std::shared_ptr<VfsDirectory>>(); +std::vector<VirtualDir> XCI::GetSubdirectories() const { + return {}; } std::string XCI::GetName() const { return file->GetName(); } -std::shared_ptr<VfsDirectory> XCI::GetParentDirectory() const { +VirtualDir XCI::GetParentDirectory() const { return file->GetContainingDirectory(); } diff --git a/src/core/file_sys/card_image.h b/src/core/file_sys/card_image.h index e089d737cb..4618d9c001 100644 --- a/src/core/file_sys/card_image.h +++ b/src/core/file_sys/card_image.h @@ -71,13 +71,13 @@ public: std::shared_ptr<NCA> GetNCAByType(NCAContentType type) const; VirtualFile GetNCAFileByType(NCAContentType type) const; - std::vector<std::shared_ptr<VfsFile>> GetFiles() const override; + std::vector<VirtualFile> GetFiles() const override; - std::vector<std::shared_ptr<VfsDirectory>> GetSubdirectories() const override; + std::vector<VirtualDir> GetSubdirectories() const override; std::string GetName() const override; - std::shared_ptr<VfsDirectory> GetParentDirectory() const override; + VirtualDir GetParentDirectory() const override; protected: bool ReplaceFileWithSubdirectory(VirtualFile file, VirtualDir dir) override; diff --git a/src/core/file_sys/partition_filesystem.h b/src/core/file_sys/partition_filesystem.h index 7c7a75816c..be7bc32a87 100644 --- a/src/core/file_sys/partition_filesystem.h +++ b/src/core/file_sys/partition_filesystem.h @@ -13,7 +13,7 @@ #include "core/file_sys/vfs.h" namespace Loader { -enum class ResultStatus; +enum class ResultStatus : u16; } namespace FileSys { diff --git a/src/core/file_sys/program_metadata.h b/src/core/file_sys/program_metadata.h index 06a7315db4..74a91052b9 100644 --- a/src/core/file_sys/program_metadata.h +++ b/src/core/file_sys/program_metadata.h @@ -13,7 +13,7 @@ #include "partition_filesystem.h" namespace Loader { -enum class ResultStatus; +enum class ResultStatus : u16; } namespace FileSys { diff --git a/src/core/file_sys/vfs.h b/src/core/file_sys/vfs.h index 141a053ce0..78a63c59be 100644 --- a/src/core/file_sys/vfs.h +++ b/src/core/file_sys/vfs.h @@ -15,9 +15,9 @@ namespace FileSys { -struct VfsFilesystem; -struct VfsFile; -struct VfsDirectory; +class VfsDirectory; +class VfsFile; +class VfsFilesystem; // Convenience typedefs to use Vfs* interfaces using VirtualFilesystem = std::shared_ptr<VfsFilesystem>; @@ -34,8 +34,9 @@ enum class VfsEntryType { // A class representing an abstract filesystem. A default implementation given the root VirtualDir // is provided for convenience, but if the Vfs implementation has any additional state or // functionality, they will need to override. -struct VfsFilesystem : NonCopyable { - VfsFilesystem(VirtualDir root); +class VfsFilesystem : NonCopyable { +public: + explicit VfsFilesystem(VirtualDir root); virtual ~VfsFilesystem(); // Gets the friendly name for the filesystem. @@ -81,7 +82,8 @@ protected: }; // A class representing a file in an abstract filesystem. -struct VfsFile : NonCopyable { +class VfsFile : NonCopyable { +public: virtual ~VfsFile(); // Retrieves the file name. @@ -179,7 +181,8 @@ struct VfsFile : NonCopyable { }; // A class representing a directory in an abstract filesystem. -struct VfsDirectory : NonCopyable { +class VfsDirectory : NonCopyable { +public: virtual ~VfsDirectory(); // Retrives the file located at path as if the current directory was root. Returns nullptr if @@ -295,7 +298,8 @@ protected: // A convenience partial-implementation of VfsDirectory that stubs out methods that should only work // if writable. This is to avoid redundant empty methods everywhere. -struct ReadOnlyVfsDirectory : public VfsDirectory { +class ReadOnlyVfsDirectory : public VfsDirectory { +public: bool IsWritable() const override; bool IsReadable() const override; std::shared_ptr<VfsDirectory> CreateSubdirectory(std::string_view name) override; diff --git a/src/core/file_sys/vfs_offset.h b/src/core/file_sys/vfs_offset.h index 235970dc5e..cb92d15707 100644 --- a/src/core/file_sys/vfs_offset.h +++ b/src/core/file_sys/vfs_offset.h @@ -15,7 +15,8 @@ namespace FileSys { // Similar to seeking to an offset. // If the file is writable, operations that would write past the end of the offset file will expand // the size of this wrapper. -struct OffsetVfsFile : public VfsFile { +class OffsetVfsFile : public VfsFile { +public: OffsetVfsFile(std::shared_ptr<VfsFile> file, size_t size, size_t offset = 0, std::string new_name = "", VirtualDir new_parent = nullptr); diff --git a/src/core/file_sys/vfs_vector.h b/src/core/file_sys/vfs_vector.h index ba469647bf..b3b4682338 100644 --- a/src/core/file_sys/vfs_vector.h +++ b/src/core/file_sys/vfs_vector.h @@ -10,7 +10,8 @@ namespace FileSys { // An implementation of VfsDirectory that maintains two vectors for subdirectories and files. // Vector data is supplied upon construction. -struct VectorVfsDirectory : public VfsDirectory { +class VectorVfsDirectory : public VfsDirectory { +public: explicit VectorVfsDirectory(std::vector<VirtualFile> files = {}, std::vector<VirtualDir> dirs = {}, VirtualDir parent = nullptr, std::string name = ""); diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 1b0cd0abf9..8c19e86d3b 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -11,7 +11,7 @@ namespace Kernel { -unsigned int Object::next_object_id; +std::atomic<u32> Object::next_object_id{0}; /// Initialize the kernel void Init() { diff --git a/src/core/hle/kernel/object.h b/src/core/hle/kernel/object.h index 83df68dfd1..526ac9cc3c 100644 --- a/src/core/hle/kernel/object.h +++ b/src/core/hle/kernel/object.h @@ -4,6 +4,7 @@ #pragma once +#include <atomic> #include <string> #include <utility> @@ -42,8 +43,8 @@ public: virtual ~Object(); /// Returns a unique identifier for the object. For debugging purposes only. - unsigned int GetObjectId() const { - return object_id; + u32 GetObjectId() const { + return object_id.load(std::memory_order_relaxed); } virtual std::string GetTypeName() const { @@ -61,23 +62,23 @@ public: bool IsWaitable() const; public: - static unsigned int next_object_id; + static std::atomic<u32> next_object_id; private: friend void intrusive_ptr_add_ref(Object*); friend void intrusive_ptr_release(Object*); - unsigned int ref_count = 0; - unsigned int object_id = next_object_id++; + std::atomic<u32> ref_count{0}; + std::atomic<u32> object_id{next_object_id++}; }; // Special functions used by boost::instrusive_ptr to do automatic ref-counting inline void intrusive_ptr_add_ref(Object* object) { - ++object->ref_count; + object->ref_count.fetch_add(1, std::memory_order_relaxed); } inline void intrusive_ptr_release(Object* object) { - if (--object->ref_count == 0) { + if (object->ref_count.fetch_sub(1, std::memory_order_acq_rel) == 1) { delete object; } } diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp index 94065c736f..e770b91031 100644 --- a/src/core/hle/kernel/scheduler.cpp +++ b/src/core/hle/kernel/scheduler.cpp @@ -25,7 +25,7 @@ Scheduler::~Scheduler() { } } -bool Scheduler::HaveReadyThreads() { +bool Scheduler::HaveReadyThreads() const { std::lock_guard<std::mutex> lock(scheduler_mutex); return ready_queue.get_first() != nullptr; } diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h index 1a4ee8f36b..6a61ef64ea 100644 --- a/src/core/hle/kernel/scheduler.h +++ b/src/core/hle/kernel/scheduler.h @@ -21,7 +21,7 @@ public: ~Scheduler(); /// Returns whether there are any threads that are ready to run. - bool HaveReadyThreads(); + bool HaveReadyThreads() const; /// Reschedules to the next available thread (call after current thread is suspended) void Reschedule(); diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 5db2db6872..b24f409b30 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -532,7 +532,6 @@ static ResultCode CreateThread(Handle* out_handle, VAddr entry_point, u64 arg, V CASCADE_RESULT(thread->guest_handle, g_handle_table.Create(thread)); *out_handle = thread->guest_handle; - Core::System::GetInstance().PrepareReschedule(); Core::System::GetInstance().CpuCore(thread->processor_id).PrepareReschedule(); LOG_TRACE(Kernel_SVC, @@ -706,8 +705,7 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target Handle owner_handle = static_cast<Handle>(mutex_val & Mutex::MutexOwnerMask); auto owner = g_handle_table.Get<Thread>(owner_handle); ASSERT(owner); - ASSERT(thread->status != ThreadStatus::Running); - thread->status = ThreadStatus::WaitMutex; + ASSERT(thread->status == ThreadStatus::WaitMutex); thread->wakeup_callback = nullptr; owner->AddMutexWaiter(thread); diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index b9022feae2..a1a7867ce1 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -23,6 +23,7 @@ #include "core/hle/kernel/object.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/thread.h" +#include "core/hle/lock.h" #include "core/hle/result.h" #include "core/memory.h" @@ -104,6 +105,10 @@ void ExitCurrentThread() { */ static void ThreadWakeupCallback(u64 thread_handle, int cycles_late) { const auto proper_handle = static_cast<Handle>(thread_handle); + + // Lock the global kernel mutex when we enter the kernel HLE. + std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock); + SharedPtr<Thread> thread = wakeup_callback_handle_table.Get<Thread>(proper_handle); if (thread == nullptr) { LOG_CRITICAL(Kernel, "Callback fired for invalid thread {:08X}", proper_handle); @@ -155,8 +160,10 @@ void Thread::WakeAfterDelay(s64 nanoseconds) { if (nanoseconds == -1) return; - CoreTiming::ScheduleEvent(CoreTiming::nsToCycles(nanoseconds), ThreadWakeupEventType, - callback_handle); + // This function might be called from any thread so we have to be cautious and use the + // thread-safe version of ScheduleEvent. + CoreTiming::ScheduleEventThreadsafe(CoreTiming::nsToCycles(nanoseconds), ThreadWakeupEventType, + callback_handle); } void Thread::CancelWakeupTimer() { @@ -419,12 +426,33 @@ VAddr Thread::GetCommandBufferAddress() const { } void Thread::AddMutexWaiter(SharedPtr<Thread> thread) { + if (thread->lock_owner == this) { + // If the thread is already waiting for this thread to release the mutex, ensure that the + // waiters list is consistent and return without doing anything. + auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread); + ASSERT(itr != wait_mutex_threads.end()); + return; + } + + // A thread can't wait on two different mutexes at the same time. + ASSERT(thread->lock_owner == nullptr); + + // Ensure that the thread is not already in the list of mutex waiters + auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread); + ASSERT(itr == wait_mutex_threads.end()); + thread->lock_owner = this; wait_mutex_threads.emplace_back(std::move(thread)); UpdatePriority(); } void Thread::RemoveMutexWaiter(SharedPtr<Thread> thread) { + ASSERT(thread->lock_owner == this); + + // Ensure that the thread is in the list of mutex waiters + auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread); + ASSERT(itr != wait_mutex_threads.end()); + boost::remove_erase(wait_mutex_threads, thread); thread->lock_owner = nullptr; UpdatePriority(); diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp index f99304de54..9e75eb3a6c 100644 --- a/src/core/hle/service/audio/audren_u.cpp +++ b/src/core/hle/service/audio/audren_u.cpp @@ -20,9 +20,9 @@ public: explicit IAudioRenderer(AudioCore::AudioRendererParameter audren_params) : ServiceFramework("IAudioRenderer") { static const FunctionInfo functions[] = { - {0, nullptr, "GetAudioRendererSampleRate"}, - {1, nullptr, "GetAudioRendererSampleCount"}, - {2, nullptr, "GetAudioRendererMixBufferCount"}, + {0, &IAudioRenderer::GetAudioRendererSampleRate, "GetAudioRendererSampleRate"}, + {1, &IAudioRenderer::GetAudioRendererSampleCount, "GetAudioRendererSampleCount"}, + {2, &IAudioRenderer::GetAudioRendererMixBufferCount, "GetAudioRendererMixBufferCount"}, {3, nullptr, "GetAudioRendererState"}, {4, &IAudioRenderer::RequestUpdateAudioRenderer, "RequestUpdateAudioRenderer"}, {5, &IAudioRenderer::StartAudioRenderer, "StartAudioRenderer"}, @@ -45,6 +45,27 @@ private: system_event->Signal(); } + void GetAudioRendererSampleRate(Kernel::HLERequestContext& ctx) { + IPC::ResponseBuilder rb{ctx, 3}; + rb.Push(RESULT_SUCCESS); + rb.Push<u32>(renderer->GetSampleRate()); + LOG_DEBUG(Service_Audio, "called"); + } + + void GetAudioRendererSampleCount(Kernel::HLERequestContext& ctx) { + IPC::ResponseBuilder rb{ctx, 3}; + rb.Push(RESULT_SUCCESS); + rb.Push<u32>(renderer->GetSampleCount()); + LOG_DEBUG(Service_Audio, "called"); + } + + void GetAudioRendererMixBufferCount(Kernel::HLERequestContext& ctx) { + IPC::ResponseBuilder rb{ctx, 3}; + rb.Push(RESULT_SUCCESS); + rb.Push<u32>(renderer->GetMixBufferCount()); + LOG_DEBUG(Service_Audio, "called"); + } + void RequestUpdateAudioRenderer(Kernel::HLERequestContext& ctx) { ctx.WriteBuffer(renderer->UpdateAudioRenderer(ctx.ReadBuffer())); IPC::ResponseBuilder rb{ctx, 2}; @@ -169,7 +190,8 @@ AudRenU::AudRenU() : ServiceFramework("audren:u") { {1, &AudRenU::GetAudioRendererWorkBufferSize, "GetAudioRendererWorkBufferSize"}, {2, &AudRenU::GetAudioDevice, "GetAudioDevice"}, {3, nullptr, "OpenAudioRendererAuto"}, - {4, nullptr, "GetAudioDeviceServiceWithRevisionInfo"}, + {4, &AudRenU::GetAudioDeviceServiceWithRevisionInfo, + "GetAudioDeviceServiceWithRevisionInfo"}, }; RegisterHandlers(functions); } @@ -189,7 +211,7 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; auto params = rp.PopRaw<AudioCore::AudioRendererParameter>(); - u64 buffer_sz = Common::AlignUp(4 * params.unknown_8, 0x40); + u64 buffer_sz = Common::AlignUp(4 * params.mix_buffer_count, 0x40); buffer_sz += params.unknown_c * 1024; buffer_sz += 0x940 * (params.unknown_c + 1); buffer_sz += 0x3F0 * params.voice_count; @@ -197,7 +219,7 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) { buffer_sz += Common::AlignUp(8 * params.voice_count, 0x10); buffer_sz += Common::AlignUp((0x3C0 * (params.sink_count + params.unknown_c) + 4 * params.sample_count) * - (params.unknown_8 + 6), + (params.mix_buffer_count + 6), 0x40); if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) { @@ -253,6 +275,16 @@ void AudRenU::GetAudioDevice(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_Audio, "called"); } +void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx) { + IPC::ResponseBuilder rb{ctx, 2, 0, 1}; + + rb.Push(RESULT_SUCCESS); + rb.PushIpcInterface<Audio::IAudioDevice>(); + + LOG_WARNING(Service_Audio, "(STUBBED) called"); // TODO(ogniK): Figure out what is different + // based on the current revision +} + bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const { u32_be version_num = (revision - Common::MakeMagic('R', 'E', 'V', '0')); // Byte swap switch (feature) { diff --git a/src/core/hle/service/audio/audren_u.h b/src/core/hle/service/audio/audren_u.h index 14907f8aea..8600ac6e4e 100644 --- a/src/core/hle/service/audio/audren_u.h +++ b/src/core/hle/service/audio/audren_u.h @@ -22,6 +22,7 @@ private: void OpenAudioRenderer(Kernel::HLERequestContext& ctx); void GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx); void GetAudioDevice(Kernel::HLERequestContext& ctx); + void GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx); enum class AudioFeatures : u32 { Splitter, diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp index dcdfa0e190..970942d3f7 100644 --- a/src/core/hle/service/hid/hid.cpp +++ b/src/core/hle/service/hid/hid.cpp @@ -291,6 +291,7 @@ private: class Hid final : public ServiceFramework<Hid> { public: Hid() : ServiceFramework("hid") { + // clang-format off static const FunctionInfo functions[] = { {0, &Hid::CreateAppletResource, "CreateAppletResource"}, {1, &Hid::ActivateDebugPad, "ActivateDebugPad"}, @@ -333,15 +334,13 @@ public: {102, &Hid::SetSupportedNpadIdType, "SetSupportedNpadIdType"}, {103, &Hid::ActivateNpad, "ActivateNpad"}, {104, nullptr, "DeactivateNpad"}, - {106, &Hid::AcquireNpadStyleSetUpdateEventHandle, - "AcquireNpadStyleSetUpdateEventHandle"}, - {107, nullptr, "DisconnectNpad"}, + {106, &Hid::AcquireNpadStyleSetUpdateEventHandle, "AcquireNpadStyleSetUpdateEventHandle"}, + {107, &Hid::DisconnectNpad, "DisconnectNpad"}, {108, &Hid::GetPlayerLedPattern, "GetPlayerLedPattern"}, {109, nullptr, "ActivateNpadWithRevision"}, {120, &Hid::SetNpadJoyHoldType, "SetNpadJoyHoldType"}, {121, &Hid::GetNpadJoyHoldType, "GetNpadJoyHoldType"}, - {122, &Hid::SetNpadJoyAssignmentModeSingleByDefault, - "SetNpadJoyAssignmentModeSingleByDefault"}, + {122, &Hid::SetNpadJoyAssignmentModeSingleByDefault, "SetNpadJoyAssignmentModeSingleByDefault"}, {123, nullptr, "SetNpadJoyAssignmentModeSingleByDefault"}, {124, &Hid::SetNpadJoyAssignmentModeDual, "SetNpadJoyAssignmentModeDual"}, {125, &Hid::MergeSingleJoyAsDualJoy, "MergeSingleJoyAsDualJoy"}, @@ -398,6 +397,8 @@ public: {1000, nullptr, "SetNpadCommunicationMode"}, {1001, nullptr, "GetNpadCommunicationMode"}, }; + // clang-format on + RegisterHandlers(functions); event = Kernel::Event::Create(Kernel::ResetType::OneShot, "hid:EventHandle"); @@ -496,6 +497,12 @@ private: LOG_WARNING(Service_HID, "(STUBBED) called"); } + void DisconnectNpad(Kernel::HLERequestContext& ctx) { + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(RESULT_SUCCESS); + LOG_WARNING(Service_HID, "(STUBBED) called"); + } + void GetPlayerLedPattern(Kernel::HLERequestContext& ctx) { IPC::ResponseBuilder rb{ctx, 2}; rb.Push(RESULT_SUCCESS); diff --git a/src/core/hle/service/service.h b/src/core/hle/service/service.h index 8a294c0f23..cd9c74f3d8 100644 --- a/src/core/hle/service/service.h +++ b/src/core/hle/service/service.h @@ -23,7 +23,7 @@ class HLERequestContext; } // namespace Kernel namespace FileSys { -struct VfsFilesystem; +class VfsFilesystem; } namespace Service { diff --git a/src/core/loader/loader.cpp b/src/core/loader/loader.cpp index 2f5bfc67cd..1f2f315358 100644 --- a/src/core/loader/loader.cpp +++ b/src/core/loader/loader.cpp @@ -126,7 +126,7 @@ constexpr std::array<const char*, 36> RESULT_MESSAGES{ }; std::string GetMessageForResultStatus(ResultStatus status) { - return GetMessageForResultStatus(static_cast<size_t>(status)); + return GetMessageForResultStatus(static_cast<u16>(status)); } std::string GetMessageForResultStatus(u16 status) { diff --git a/src/core/loader/loader.h b/src/core/loader/loader.h index cfdadbee3b..2853635492 100644 --- a/src/core/loader/loader.h +++ b/src/core/loader/loader.h @@ -56,7 +56,7 @@ FileType GuessFromFilename(const std::string& name); std::string GetFileTypeString(FileType type); /// Return type for functions in Loader namespace -enum class ResultStatus { +enum class ResultStatus : u16 { Success, ErrorAlreadyLoaded, ErrorNotImplemented, diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 9f64b248ba..2526ebf28f 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -200,6 +200,14 @@ enum class IMinMaxExchange : u64 { XHi = 3, }; +enum class XmadMode : u64 { + None = 0, + CLo = 1, + CHi = 2, + CSfu = 3, + CBcc = 4, +}; + enum class FlowCondition : u64 { Always = 0xF, Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for? @@ -457,6 +465,18 @@ union Instruction { } bra; union { + BitField<20, 16, u64> imm20_16; + BitField<36, 1, u64> product_shift_left; + BitField<37, 1, u64> merge_37; + BitField<48, 1, u64> sign_a; + BitField<49, 1, u64> sign_b; + BitField<50, 3, XmadMode> mode; + BitField<52, 1, u64> high_b; + BitField<53, 1, u64> high_a; + BitField<56, 1, u64> merge_56; + } xmad; + + union { BitField<20, 14, u64> offset; BitField<34, 5, u64> index; } cbuf34; @@ -593,6 +613,7 @@ public: IntegerSetPredicate, PredicateSetPredicate, Conversion, + Xmad, Unknown, }; @@ -782,10 +803,10 @@ private: INST("010010110101----", Id::ISET_C, Type::IntegerSet, "ISET_C"), INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"), INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"), - INST("0011011-00------", Id::XMAD_IMM, Type::Arithmetic, "XMAD_IMM"), - INST("0100111---------", Id::XMAD_CR, Type::Arithmetic, "XMAD_CR"), - INST("010100010-------", Id::XMAD_RC, Type::Arithmetic, "XMAD_RC"), - INST("0101101100------", Id::XMAD_RR, Type::Arithmetic, "XMAD_RR"), + INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"), + INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"), + INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"), + INST("0101101100------", Id::XMAD_RR, Type::Xmad, "XMAD_RR"), }; #undef INST std::stable_sort(table.begin(), table.end(), [](const auto& a, const auto& b) { diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 19e7f11613..6f0343888a 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -46,6 +46,7 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { case RenderTargetFormat::RGBA32_FLOAT: case RenderTargetFormat::RGBA32_UINT: return 16; + case RenderTargetFormat::RGBA16_UINT: case RenderTargetFormat::RGBA16_FLOAT: case RenderTargetFormat::RG32_FLOAT: return 8; @@ -67,6 +68,7 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { case RenderTargetFormat::R16_UINT: case RenderTargetFormat::R16_SINT: case RenderTargetFormat::R16_FLOAT: + case RenderTargetFormat::RG8_UNORM: case RenderTargetFormat::RG8_SNORM: return 2; case RenderTargetFormat::R8_UNORM: diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index e008d8f262..73abb7a187 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -20,6 +20,7 @@ enum class RenderTargetFormat : u32 { NONE = 0x0, RGBA32_FLOAT = 0xC0, RGBA32_UINT = 0xC2, + RGBA16_UINT = 0xC9, RGBA16_FLOAT = 0xCA, RG32_FLOAT = 0xCB, BGRA8_UNORM = 0xCF, @@ -35,6 +36,7 @@ enum class RenderTargetFormat : u32 { R11G11B10_FLOAT = 0xE0, R32_FLOAT = 0xE5, B5G6R5_UNORM = 0xE8, + RG8_UNORM = 0xEA, RG8_SNORM = 0xEB, R16_UNORM = 0xEE, R16_SNORM = 0xEF, diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 38a7b14137..52a649e2f0 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -36,30 +36,21 @@ MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); -RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window) : emu_window{window} { +RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window) + : emu_window{window}, stream_buffer(GL_ARRAY_BUFFER, STREAM_BUFFER_SIZE) { // Create sampler objects for (size_t i = 0; i < texture_samplers.size(); ++i) { texture_samplers[i].Create(); state.texture_units[i].sampler = texture_samplers[i].sampler.handle; } - // Create SSBOs - for (size_t stage = 0; stage < ssbos.size(); ++stage) { - for (size_t buffer = 0; buffer < ssbos[stage].size(); ++buffer) { - ssbos[stage][buffer].Create(); - state.draw.const_buffers[stage][buffer].ssbo = ssbos[stage][buffer].handle; - } - } - GLint ext_num; glGetIntegerv(GL_NUM_EXTENSIONS, &ext_num); for (GLint i = 0; i < ext_num; i++) { const std::string_view extension{ reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, i))}; - if (extension == "GL_ARB_buffer_storage") { - has_ARB_buffer_storage = true; - } else if (extension == "GL_ARB_direct_state_access") { + if (extension == "GL_ARB_direct_state_access") { has_ARB_direct_state_access = true; } else if (extension == "GL_ARB_separate_shader_objects") { has_ARB_separate_shader_objects = true; @@ -86,47 +77,31 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window) : emu_wind hw_vao.Create(); - stream_buffer = OGLStreamBuffer::MakeBuffer(has_ARB_buffer_storage, GL_ARRAY_BUFFER); - stream_buffer->Create(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE / 2); - state.draw.vertex_buffer = stream_buffer->GetHandle(); + state.draw.vertex_buffer = stream_buffer.GetHandle(); shader_program_manager = std::make_unique<GLShader::ProgramManager>(); state.draw.shader_program = 0; state.draw.vertex_array = hw_vao.handle; state.Apply(); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer->GetHandle()); - - for (unsigned index = 0; index < uniform_buffers.size(); ++index) { - auto& buffer = uniform_buffers[index]; - buffer.Create(); - glBindBuffer(GL_UNIFORM_BUFFER, buffer.handle); - glBufferData(GL_UNIFORM_BUFFER, sizeof(GLShader::MaxwellUniformData), nullptr, - GL_STREAM_COPY); - glBindBufferBase(GL_UNIFORM_BUFFER, index, buffer.handle); - } + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer.GetHandle()); glEnable(GL_BLEND); + glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment); + LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!"); } -RasterizerOpenGL::~RasterizerOpenGL() { - if (stream_buffer != nullptr) { - state.draw.vertex_buffer = stream_buffer->GetHandle(); - state.Apply(); - stream_buffer->Release(); - } -} +RasterizerOpenGL::~RasterizerOpenGL() {} std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, GLintptr buffer_offset) { MICROPROFILE_SCOPE(OpenGL_VAO); const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; - const auto& memory_manager = Core::System::GetInstance().GPU().memory_manager; state.draw.vertex_array = hw_vao.handle; - state.draw.vertex_buffer = stream_buffer->GetHandle(); + state.draw.vertex_buffer = stream_buffer.GetHandle(); state.Apply(); // Upload all guest vertex arrays sequentially to our buffer @@ -141,16 +116,15 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, ASSERT(end > start); u64 size = end - start + 1; - // Copy vertex array data - Memory::ReadBlock(*memory_manager->GpuToCpuAddress(start), array_ptr, size); + GLintptr vertex_buffer_offset; + std::tie(array_ptr, buffer_offset, vertex_buffer_offset) = + UploadMemory(array_ptr, buffer_offset, start, size); // Bind the vertex array to the buffer at the current offset. - glBindVertexBuffer(index, stream_buffer->GetHandle(), buffer_offset, vertex_array.stride); + glBindVertexBuffer(index, stream_buffer.GetHandle(), vertex_buffer_offset, + vertex_array.stride); ASSERT_MSG(vertex_array.divisor == 0, "Vertex buffer divisor unimplemented"); - - array_ptr += size; - buffer_offset += size; } // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. @@ -201,22 +175,12 @@ static GLShader::ProgramCode GetShaderProgramCode(Maxwell::ShaderProgram program return program_code; } -void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { - // Helper function for uploading uniform data - const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) { - if (has_ARB_direct_state_access) { - glCopyNamedBufferSubData(stream_buffer->GetHandle(), handle, offset, 0, size); - } else { - glBindBuffer(GL_COPY_WRITE_BUFFER, handle); - glCopyBufferSubData(GL_ARRAY_BUFFER, GL_COPY_WRITE_BUFFER, offset, 0, size); - } - }; - +std::pair<u8*, GLintptr> RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); // Next available bindpoints to use when uploading the const buffers and textures to the GLSL // shaders. The constbuffer bindpoint starts after the shader stage configuration bind points. - u32 current_constbuffer_bindpoint = static_cast<u32>(uniform_buffers.size()); + u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; u32 current_texture_bindpoint = 0; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { @@ -228,22 +192,21 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { continue; } + std::tie(buffer_ptr, buffer_offset) = + AlignBuffer(buffer_ptr, buffer_offset, static_cast<size_t>(uniform_buffer_alignment)); + const size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5 GLShader::MaxwellUniformData ubo{}; ubo.SetFromRegs(gpu.state.shader_stages[stage]); std::memcpy(buffer_ptr, &ubo, sizeof(ubo)); - // Flush the buffer so that the GPU can see the data we just wrote. - glFlushMappedBufferRange(GL_ARRAY_BUFFER, buffer_offset, sizeof(ubo)); - - // Upload uniform data as one UBO per stage - const GLintptr ubo_offset = buffer_offset; - copy_buffer(uniform_buffers[stage].handle, ubo_offset, - sizeof(GLShader::MaxwellUniformData)); + // Bind the buffer + glBindBufferRange(GL_UNIFORM_BUFFER, stage, stream_buffer.GetHandle(), buffer_offset, + sizeof(ubo)); - buffer_ptr += sizeof(GLShader::MaxwellUniformData); - buffer_offset += sizeof(GLShader::MaxwellUniformData); + buffer_ptr += sizeof(ubo); + buffer_offset += sizeof(ubo); GLShader::ShaderSetup setup{GetShaderProgramCode(program)}; GLShader::ShaderEntries shader_resources; @@ -282,9 +245,9 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { static_cast<Maxwell::ShaderStage>(stage)); // Configure the const buffers for this shader stage. - current_constbuffer_bindpoint = - SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), gl_stage_program, - current_constbuffer_bindpoint, shader_resources.const_buffer_entries); + std::tie(buffer_ptr, buffer_offset, current_constbuffer_bindpoint) = SetupConstBuffers( + buffer_ptr, buffer_offset, static_cast<Maxwell::ShaderStage>(stage), gl_stage_program, + current_constbuffer_bindpoint, shader_resources.const_buffer_entries); // Configure the textures for this shader stage. current_texture_bindpoint = @@ -299,6 +262,8 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { } shader_program_manager->UseTrivialGeometryShader(); + + return {buffer_ptr, buffer_offset}; } size_t RasterizerOpenGL::CalculateVertexArraysSize() const { @@ -432,6 +397,31 @@ void RasterizerOpenGL::Clear() { } } +std::pair<u8*, GLintptr> RasterizerOpenGL::AlignBuffer(u8* buffer_ptr, GLintptr buffer_offset, + size_t alignment) { + // Align the offset, not the mapped pointer + GLintptr offset_aligned = + static_cast<GLintptr>(Common::AlignUp(static_cast<size_t>(buffer_offset), alignment)); + return {buffer_ptr + (offset_aligned - buffer_offset), offset_aligned}; +} + +std::tuple<u8*, GLintptr, GLintptr> RasterizerOpenGL::UploadMemory(u8* buffer_ptr, + GLintptr buffer_offset, + Tegra::GPUVAddr gpu_addr, + size_t size, size_t alignment) { + std::tie(buffer_ptr, buffer_offset) = AlignBuffer(buffer_ptr, buffer_offset, alignment); + GLintptr uploaded_offset = buffer_offset; + + const auto& memory_manager = Core::System::GetInstance().GPU().memory_manager; + const boost::optional<VAddr> cpu_addr{memory_manager->GpuToCpuAddress(gpu_addr)}; + Memory::ReadBlock(*cpu_addr, buffer_ptr, size); + + buffer_ptr += size; + buffer_offset += size; + + return {buffer_ptr, buffer_offset, uploaded_offset}; +} + void RasterizerOpenGL::DrawArrays() { if (accelerate_draw == AccelDraw::Disabled) return; @@ -456,7 +446,7 @@ void RasterizerOpenGL::DrawArrays() { const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()}; const unsigned vertex_num{is_indexed ? regs.index_array.count : regs.vertex_buffer.count}; - state.draw.vertex_buffer = stream_buffer->GetHandle(); + state.draw.vertex_buffer = stream_buffer.GetHandle(); state.Apply(); size_t buffer_size = CalculateVertexArraysSize(); @@ -466,41 +456,31 @@ void RasterizerOpenGL::DrawArrays() { } // Uniform space for the 5 shader stages - buffer_size = Common::AlignUp<size_t>(buffer_size, 4) + - sizeof(GLShader::MaxwellUniformData) * Maxwell::MaxShaderStage; + buffer_size = + Common::AlignUp<size_t>(buffer_size, 4) + + (sizeof(GLShader::MaxwellUniformData) + uniform_buffer_alignment) * Maxwell::MaxShaderStage; + + // Add space for at least 18 constant buffers + buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment); u8* buffer_ptr; GLintptr buffer_offset; - std::tie(buffer_ptr, buffer_offset) = - stream_buffer->Map(static_cast<GLsizeiptr>(buffer_size), 4); + std::tie(buffer_ptr, buffer_offset, std::ignore) = + stream_buffer.Map(static_cast<GLsizeiptr>(buffer_size), 4); + u8* buffer_ptr_base = buffer_ptr; - u8* offseted_buffer; - std::tie(offseted_buffer, buffer_offset) = SetupVertexArrays(buffer_ptr, buffer_offset); - - offseted_buffer = - reinterpret_cast<u8*>(Common::AlignUp(reinterpret_cast<size_t>(offseted_buffer), 4)); - buffer_offset = Common::AlignUp<size_t>(buffer_offset, 4); + std::tie(buffer_ptr, buffer_offset) = SetupVertexArrays(buffer_ptr, buffer_offset); // If indexed mode, copy the index buffer GLintptr index_buffer_offset = 0; if (is_indexed) { - const auto& memory_manager = Core::System::GetInstance().GPU().memory_manager; - const boost::optional<VAddr> index_data_addr{ - memory_manager->GpuToCpuAddress(regs.index_array.StartAddress())}; - Memory::ReadBlock(*index_data_addr, offseted_buffer, index_buffer_size); - - index_buffer_offset = buffer_offset; - offseted_buffer += index_buffer_size; - buffer_offset += index_buffer_size; + std::tie(buffer_ptr, buffer_offset, index_buffer_offset) = UploadMemory( + buffer_ptr, buffer_offset, regs.index_array.StartAddress(), index_buffer_size); } - offseted_buffer = - reinterpret_cast<u8*>(Common::AlignUp(reinterpret_cast<size_t>(offseted_buffer), 4)); - buffer_offset = Common::AlignUp<size_t>(buffer_offset, 4); - - SetupShaders(offseted_buffer, buffer_offset); + std::tie(buffer_ptr, buffer_offset) = SetupShaders(buffer_ptr, buffer_offset); - stream_buffer->Unmap(); + stream_buffer.Unmap(buffer_ptr - buffer_ptr_base); shader_program_manager->ApplyTo(state); state.Apply(); @@ -647,36 +627,23 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr } } -u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint program, - u32 current_bindpoint, - const std::vector<GLShader::ConstBufferEntry>& entries) { +std::tuple<u8*, GLintptr, u32> RasterizerOpenGL::SetupConstBuffers( + u8* buffer_ptr, GLintptr buffer_offset, Maxwell::ShaderStage stage, GLuint program, + u32 current_bindpoint, const std::vector<GLShader::ConstBufferEntry>& entries) { const auto& gpu = Core::System::GetInstance().GPU(); const auto& maxwell3d = gpu.Maxwell3D(); - // Reset all buffer draw state for this stage. - for (auto& buffer : state.draw.const_buffers[static_cast<size_t>(stage)]) { - buffer.bindpoint = 0; - buffer.enabled = false; - } - // Upload only the enabled buffers from the 16 constbuffers of each shader stage const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<size_t>(stage)]; for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { const auto& used_buffer = entries[bindpoint]; const auto& buffer = shader_stage.const_buffers[used_buffer.GetIndex()]; - auto& buffer_draw_state = - state.draw.const_buffers[static_cast<size_t>(stage)][used_buffer.GetIndex()]; if (!buffer.enabled) { continue; } - buffer_draw_state.enabled = true; - buffer_draw_state.bindpoint = current_bindpoint + bindpoint; - - boost::optional<VAddr> addr = gpu.memory_manager->GpuToCpuAddress(buffer.address); - size_t size = 0; if (used_buffer.IsIndirect()) { @@ -698,25 +665,26 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr size = Common::AlignUp(size, sizeof(GLvec4)); ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big"); - std::vector<u8> data(size); - Memory::ReadBlock(*addr, data.data(), data.size()); + GLintptr const_buffer_offset; + std::tie(buffer_ptr, buffer_offset, const_buffer_offset) = + UploadMemory(buffer_ptr, buffer_offset, buffer.address, size, + static_cast<size_t>(uniform_buffer_alignment)); - glBindBuffer(GL_UNIFORM_BUFFER, buffer_draw_state.ssbo); - glBufferData(GL_UNIFORM_BUFFER, data.size(), data.data(), GL_DYNAMIC_DRAW); - glBindBuffer(GL_UNIFORM_BUFFER, 0); + glBindBufferRange(GL_UNIFORM_BUFFER, current_bindpoint + bindpoint, + stream_buffer.GetHandle(), const_buffer_offset, size); // Now configure the bindpoint of the buffer inside the shader const std::string buffer_name = used_buffer.GetName(); const GLuint index = glGetProgramResourceIndex(program, GL_UNIFORM_BLOCK, buffer_name.c_str()); if (index != GL_INVALID_INDEX) { - glUniformBlockBinding(program, index, buffer_draw_state.bindpoint); + glUniformBlockBinding(program, index, current_bindpoint + bindpoint); } } state.Apply(); - return current_bindpoint + static_cast<u32>(entries.size()); + return {buffer_ptr, buffer_offset, current_bindpoint + static_cast<u32>(entries.size())}; } u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program, u32 current_unit, diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index bd01dc0ae2..74307f6265 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -7,6 +7,7 @@ #include <array> #include <cstddef> #include <memory> +#include <tuple> #include <utility> #include <vector> #include <glad/glad.h> @@ -100,9 +101,10 @@ private: * @param entries Vector describing the buffers that are actually used in the guest shader. * @returns The next available bindpoint for use in the next shader stage. */ - u32 SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, GLuint program, - u32 current_bindpoint, - const std::vector<GLShader::ConstBufferEntry>& entries); + std::tuple<u8*, GLintptr, u32> SetupConstBuffers( + u8* buffer_ptr, GLintptr buffer_offset, Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, + GLuint program, u32 current_bindpoint, + const std::vector<GLShader::ConstBufferEntry>& entries); /* * Configures the current textures to use for the draw command. @@ -139,7 +141,6 @@ private: /// Syncs the blend state to match the guest state void SyncBlendState(); - bool has_ARB_buffer_storage = false; bool has_ARB_direct_state_access = false; bool has_ARB_separate_shader_objects = false; bool has_ARB_vertex_attrib_binding = false; @@ -155,22 +156,24 @@ private: OGLVertexArray hw_vao; std::array<SamplerInfo, GLShader::NumTextureSamplers> texture_samplers; - std::array<std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers>, - Tegra::Engines::Maxwell3D::Regs::MaxShaderStage> - ssbos; static constexpr size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; - std::unique_ptr<OGLStreamBuffer> stream_buffer; + OGLStreamBuffer stream_buffer; OGLBuffer uniform_buffer; OGLFramebuffer framebuffer; + GLint uniform_buffer_alignment; size_t CalculateVertexArraysSize() const; std::pair<u8*, GLintptr> SetupVertexArrays(u8* array_ptr, GLintptr buffer_offset); - std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxShaderStage> uniform_buffers; + std::pair<u8*, GLintptr> SetupShaders(u8* buffer_ptr, GLintptr buffer_offset); - void SetupShaders(u8* buffer_ptr, GLintptr buffer_offset); + std::pair<u8*, GLintptr> AlignBuffer(u8* buffer_ptr, GLintptr buffer_offset, size_t alignment); + + std::tuple<u8*, GLintptr, GLintptr> UploadMemory(u8* buffer_ptr, GLintptr buffer_offset, + Tegra::GPUVAddr gpu_addr, size_t size, + size_t alignment = 4); enum class AccelDraw { Disabled, Arrays, Indexed }; AccelDraw accelerate_draw = AccelDraw::Disabled; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 84c250c631..d635550d2e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -101,6 +101,7 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form {GL_R8, GL_RED, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // R8 {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // R8UI {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, ComponentType::Float, false}, // RGBA16F + {GL_RGBA16UI, GL_RGBA, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RGBA16UI {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float, false}, // R11FG11FB10F {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RGBA32UI @@ -134,6 +135,7 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form {GL_RG16_SNORM, GL_RG, GL_SHORT, ComponentType::SNorm, false}, // RG16S {GL_RGB32F, GL_RGB, GL_FLOAT, ComponentType::Float, false}, // RGB32F {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // SRGBA8 + {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // RG8U {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S // DepthStencil formats @@ -234,32 +236,57 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, std::vector<u8>& gl_bu static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPUVAddr), SurfaceParams::MaxPixelFormat> morton_to_gl_fns = { - MortonCopy<true, PixelFormat::ABGR8U>, MortonCopy<true, PixelFormat::ABGR8S>, - MortonCopy<true, PixelFormat::B5G6R5>, MortonCopy<true, PixelFormat::A2B10G10R10>, - MortonCopy<true, PixelFormat::A1B5G5R5>, MortonCopy<true, PixelFormat::R8>, - MortonCopy<true, PixelFormat::R8UI>, MortonCopy<true, PixelFormat::RGBA16F>, - MortonCopy<true, PixelFormat::R11FG11FB10F>, MortonCopy<true, PixelFormat::RGBA32UI>, - MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>, - MortonCopy<true, PixelFormat::DXT45>, MortonCopy<true, PixelFormat::DXN1>, - MortonCopy<true, PixelFormat::DXN2UNORM>, MortonCopy<true, PixelFormat::DXN2SNORM>, - MortonCopy<true, PixelFormat::BC7U>, MortonCopy<true, PixelFormat::ASTC_2D_4X4>, - MortonCopy<true, PixelFormat::G8R8>, MortonCopy<true, PixelFormat::BGRA8>, - MortonCopy<true, PixelFormat::RGBA32F>, MortonCopy<true, PixelFormat::RG32F>, - MortonCopy<true, PixelFormat::R32F>, MortonCopy<true, PixelFormat::R16F>, - MortonCopy<true, PixelFormat::R16UNORM>, MortonCopy<true, PixelFormat::R16S>, - MortonCopy<true, PixelFormat::R16UI>, MortonCopy<true, PixelFormat::R16I>, - MortonCopy<true, PixelFormat::RG16>, MortonCopy<true, PixelFormat::RG16F>, - MortonCopy<true, PixelFormat::RG16UI>, MortonCopy<true, PixelFormat::RG16I>, - MortonCopy<true, PixelFormat::RG16S>, MortonCopy<true, PixelFormat::RGB32F>, - MortonCopy<true, PixelFormat::SRGBA8>, MortonCopy<true, PixelFormat::RG8S>, - MortonCopy<true, PixelFormat::Z24S8>, MortonCopy<true, PixelFormat::S8Z24>, - MortonCopy<true, PixelFormat::Z32F>, MortonCopy<true, PixelFormat::Z16>, + // clang-format off + MortonCopy<true, PixelFormat::ABGR8U>, + MortonCopy<true, PixelFormat::ABGR8S>, + MortonCopy<true, PixelFormat::B5G6R5>, + MortonCopy<true, PixelFormat::A2B10G10R10>, + MortonCopy<true, PixelFormat::A1B5G5R5>, + MortonCopy<true, PixelFormat::R8>, + MortonCopy<true, PixelFormat::R8UI>, + MortonCopy<true, PixelFormat::RGBA16F>, + MortonCopy<true, PixelFormat::RGBA16UI>, + MortonCopy<true, PixelFormat::R11FG11FB10F>, + MortonCopy<true, PixelFormat::RGBA32UI>, + MortonCopy<true, PixelFormat::DXT1>, + MortonCopy<true, PixelFormat::DXT23>, + MortonCopy<true, PixelFormat::DXT45>, + MortonCopy<true, PixelFormat::DXN1>, + MortonCopy<true, PixelFormat::DXN2UNORM>, + MortonCopy<true, PixelFormat::DXN2SNORM>, + MortonCopy<true, PixelFormat::BC7U>, + MortonCopy<true, PixelFormat::ASTC_2D_4X4>, + MortonCopy<true, PixelFormat::G8R8>, + MortonCopy<true, PixelFormat::BGRA8>, + MortonCopy<true, PixelFormat::RGBA32F>, + MortonCopy<true, PixelFormat::RG32F>, + MortonCopy<true, PixelFormat::R32F>, + MortonCopy<true, PixelFormat::R16F>, + MortonCopy<true, PixelFormat::R16UNORM>, + MortonCopy<true, PixelFormat::R16S>, + MortonCopy<true, PixelFormat::R16UI>, + MortonCopy<true, PixelFormat::R16I>, + MortonCopy<true, PixelFormat::RG16>, + MortonCopy<true, PixelFormat::RG16F>, + MortonCopy<true, PixelFormat::RG16UI>, + MortonCopy<true, PixelFormat::RG16I>, + MortonCopy<true, PixelFormat::RG16S>, + MortonCopy<true, PixelFormat::RGB32F>, + MortonCopy<true, PixelFormat::SRGBA8>, + MortonCopy<true, PixelFormat::RG8U>, + MortonCopy<true, PixelFormat::RG8S>, + MortonCopy<true, PixelFormat::Z24S8>, + MortonCopy<true, PixelFormat::S8Z24>, + MortonCopy<true, PixelFormat::Z32F>, + MortonCopy<true, PixelFormat::Z16>, MortonCopy<true, PixelFormat::Z32FS8>, + // clang-format on }; static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPUVAddr), SurfaceParams::MaxPixelFormat> gl_to_morton_fns = { + // clang-format off MortonCopy<false, PixelFormat::ABGR8U>, MortonCopy<false, PixelFormat::ABGR8S>, MortonCopy<false, PixelFormat::B5G6R5>, @@ -268,6 +295,7 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPU MortonCopy<false, PixelFormat::R8>, MortonCopy<false, PixelFormat::R8UI>, MortonCopy<false, PixelFormat::RGBA16F>, + MortonCopy<false, PixelFormat::RGBA16UI>, MortonCopy<false, PixelFormat::R11FG11FB10F>, MortonCopy<false, PixelFormat::RGBA32UI>, // TODO(Subv): Swizzling DXT1/DXT23/DXT45/DXN1/DXN2/BC7U/ASTC_2D_4X4 formats is not @@ -297,12 +325,14 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPU MortonCopy<false, PixelFormat::RG16S>, MortonCopy<false, PixelFormat::RGB32F>, MortonCopy<false, PixelFormat::SRGBA8>, + MortonCopy<false, PixelFormat::RG8U>, MortonCopy<false, PixelFormat::RG8S>, MortonCopy<false, PixelFormat::Z24S8>, MortonCopy<false, PixelFormat::S8Z24>, MortonCopy<false, PixelFormat::Z32F>, MortonCopy<false, PixelFormat::Z16>, MortonCopy<false, PixelFormat::Z32FS8>, + // clang-format on }; // Allocate an uninitialized texture of appropriate size and format for the surface diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 202257b582..4ab74342e1 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -31,43 +31,45 @@ struct SurfaceParams { R8 = 5, R8UI = 6, RGBA16F = 7, - R11FG11FB10F = 8, - RGBA32UI = 9, - DXT1 = 10, - DXT23 = 11, - DXT45 = 12, - DXN1 = 13, // This is also known as BC4 - DXN2UNORM = 14, - DXN2SNORM = 15, - BC7U = 16, - ASTC_2D_4X4 = 17, - G8R8 = 18, - BGRA8 = 19, - RGBA32F = 20, - RG32F = 21, - R32F = 22, - R16F = 23, - R16UNORM = 24, - R16S = 25, - R16UI = 26, - R16I = 27, - RG16 = 28, - RG16F = 29, - RG16UI = 30, - RG16I = 31, - RG16S = 32, - RGB32F = 33, - SRGBA8 = 34, - RG8S = 35, + RGBA16UI = 8, + R11FG11FB10F = 9, + RGBA32UI = 10, + DXT1 = 11, + DXT23 = 12, + DXT45 = 13, + DXN1 = 14, // This is also known as BC4 + DXN2UNORM = 15, + DXN2SNORM = 16, + BC7U = 17, + ASTC_2D_4X4 = 18, + G8R8 = 19, + BGRA8 = 20, + RGBA32F = 21, + RG32F = 22, + R32F = 23, + R16F = 24, + R16UNORM = 25, + R16S = 26, + R16UI = 27, + R16I = 28, + RG16 = 29, + RG16F = 30, + RG16UI = 31, + RG16I = 32, + RG16S = 33, + RGB32F = 34, + SRGBA8 = 35, + RG8U = 36, + RG8S = 37, MaxColorFormat, // DepthStencil formats - Z24S8 = 36, - S8Z24 = 37, - Z32F = 38, - Z16 = 39, - Z32FS8 = 40, + Z24S8 = 38, + S8Z24 = 39, + Z32F = 40, + Z16 = 41, + Z32FS8 = 42, MaxDepthStencilFormat, @@ -113,6 +115,7 @@ struct SurfaceParams { 1, // R8 1, // R8UI 1, // RGBA16F + 1, // RGBA16UI 1, // R11FG11FB10F 1, // RGBA32UI 4, // DXT1 @@ -140,6 +143,7 @@ struct SurfaceParams { 1, // RG16S 1, // RGB32F 1, // SRGBA8 + 1, // RG8U 1, // RG8S 1, // Z24S8 1, // S8Z24 @@ -165,6 +169,7 @@ struct SurfaceParams { 8, // R8 8, // R8UI 64, // RGBA16F + 64, // RGBA16UI 32, // R11FG11FB10F 128, // RGBA32UI 64, // DXT1 @@ -192,6 +197,7 @@ struct SurfaceParams { 32, // RG16S 96, // RGB32F 32, // SRGBA8 + 16, // RG8U 16, // RG8S 32, // Z24S8 32, // S8Z24 @@ -241,6 +247,8 @@ struct SurfaceParams { return PixelFormat::A2B10G10R10; case Tegra::RenderTargetFormat::RGBA16_FLOAT: return PixelFormat::RGBA16F; + case Tegra::RenderTargetFormat::RGBA16_UINT: + return PixelFormat::RGBA16UI; case Tegra::RenderTargetFormat::RGBA32_FLOAT: return PixelFormat::RGBA32F; case Tegra::RenderTargetFormat::RG32_FLOAT: @@ -265,6 +273,8 @@ struct SurfaceParams { return PixelFormat::RG16; case Tegra::RenderTargetFormat::RG16_SNORM: return PixelFormat::RG16S; + case Tegra::RenderTargetFormat::RG8_UNORM: + return PixelFormat::RG8U; case Tegra::RenderTargetFormat::RG8_SNORM: return PixelFormat::RG8S; case Tegra::RenderTargetFormat::R16_FLOAT: @@ -432,6 +442,7 @@ struct SurfaceParams { case Tegra::RenderTargetFormat::RG16_UNORM: case Tegra::RenderTargetFormat::R16_UNORM: case Tegra::RenderTargetFormat::B5G6R5_UNORM: + case Tegra::RenderTargetFormat::RG8_UNORM: return ComponentType::UNorm; case Tegra::RenderTargetFormat::RGBA8_SNORM: case Tegra::RenderTargetFormat::RG16_SNORM: @@ -447,6 +458,7 @@ struct SurfaceParams { case Tegra::RenderTargetFormat::R32_FLOAT: return ComponentType::Float; case Tegra::RenderTargetFormat::RGBA32_UINT: + case Tegra::RenderTargetFormat::RGBA16_UINT: case Tegra::RenderTargetFormat::RG16_UINT: case Tegra::RenderTargetFormat::R8_UINT: case Tegra::RenderTargetFormat::R16_UINT: diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index d21daf28a4..6834d7085d 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -356,13 +356,13 @@ public: * @param reg The register to use as the source value. */ void SetOutputAttributeToRegister(Attribute::Index attribute, u64 elem, const Register& reg) { - std::string dest = GetOutputAttribute(attribute) + GetSwizzle(elem); + std::string dest = GetOutputAttribute(attribute); std::string src = GetRegisterAsFloat(reg); if (!dest.empty()) { // Can happen with unknown/unimplemented output attributes, in which case we ignore the // instruction for now. - shader.AddLine(dest + " = " + src + ';'); + shader.AddLine(dest + GetSwizzle(elem) + " = " + src + ';'); } } @@ -376,6 +376,8 @@ public: return value; } else if (type == GLSLRegister::Type::Integer) { return "floatBitsToInt(" + value + ')'; + } else if (type == GLSLRegister::Type::UnsignedInteger) { + return "floatBitsToUint(" + value + ')'; } else { UNREACHABLE(); } @@ -1630,6 +1632,99 @@ private: } break; } + case OpCode::Type::Xmad: { + ASSERT_MSG(!instr.xmad.sign_a, "Unimplemented"); + ASSERT_MSG(!instr.xmad.sign_b, "Unimplemented"); + + std::string op_a{regs.GetRegisterAsInteger(instr.gpr8, 0, instr.xmad.sign_a)}; + std::string op_b; + std::string op_c; + + // TODO(bunnei): Needs to be fixed once op_a or op_b is signed + ASSERT_MSG(instr.xmad.sign_a == instr.xmad.sign_b, "Unimplemented"); + const bool is_signed{instr.xmad.sign_a == 1}; + + bool is_merge{}; + switch (opcode->GetId()) { + case OpCode::Id::XMAD_CR: { + is_merge = instr.xmad.merge_56; + op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, + instr.xmad.sign_b ? GLSLRegister::Type::Integer + : GLSLRegister::Type::UnsignedInteger); + op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed); + break; + } + case OpCode::Id::XMAD_RR: { + is_merge = instr.xmad.merge_37; + op_b += regs.GetRegisterAsInteger(instr.gpr20, 0, instr.xmad.sign_b); + op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed); + break; + } + case OpCode::Id::XMAD_RC: { + op_b += regs.GetRegisterAsInteger(instr.gpr39, 0, instr.xmad.sign_b); + op_c += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, + is_signed ? GLSLRegister::Type::Integer + : GLSLRegister::Type::UnsignedInteger); + break; + } + case OpCode::Id::XMAD_IMM: { + is_merge = instr.xmad.merge_37; + op_b += std::to_string(instr.xmad.imm20_16); + op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed); + break; + } + default: { + LOG_CRITICAL(HW_GPU, "Unhandled XMAD instruction: {}", opcode->GetName()); + UNREACHABLE(); + } + } + + // TODO(bunnei): Ensure this is right with signed operands + if (instr.xmad.high_a) { + op_a = "((" + op_a + ") >> 16)"; + } else { + op_a = "((" + op_a + ") & 0xFFFF)"; + } + + std::string src2 = '(' + op_b + ')'; // Preserve original source 2 + if (instr.xmad.high_b) { + op_b = '(' + src2 + " >> 16)"; + } else { + op_b = '(' + src2 + " & 0xFFFF)"; + } + + std::string product = '(' + op_a + " * " + op_b + ')'; + if (instr.xmad.product_shift_left) { + product = '(' + product + " << 16)"; + } + + switch (instr.xmad.mode) { + case Tegra::Shader::XmadMode::None: + break; + case Tegra::Shader::XmadMode::CLo: + op_c = "((" + op_c + ") & 0xFFFF)"; + break; + case Tegra::Shader::XmadMode::CHi: + op_c = "((" + op_c + ") >> 16)"; + break; + case Tegra::Shader::XmadMode::CBcc: + op_c = "((" + op_c + ") + (" + src2 + "<< 16))"; + break; + default: { + LOG_CRITICAL(HW_GPU, "Unhandled XMAD mode: {}", + static_cast<u32>(instr.xmad.mode.Value())); + UNREACHABLE(); + } + } + + std::string sum{'(' + product + " + " + op_c + ')'}; + if (is_merge) { + sum = "((" + sum + " & 0xFFFF) | (" + src2 + "<< 16))"; + } + + regs.SetRegisterToInteger(instr.gpr0, is_signed, 0, sum, 1, 1); + break; + } default: { switch (opcode->GetId()) { case OpCode::Id::EXIT: { diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 68bacd4c5f..1d19751790 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -203,21 +203,6 @@ void OpenGLState::Apply() const { } } - // Constbuffers - for (std::size_t stage = 0; stage < draw.const_buffers.size(); ++stage) { - for (std::size_t buffer_id = 0; buffer_id < draw.const_buffers[stage].size(); ++buffer_id) { - const auto& current = cur_state.draw.const_buffers[stage][buffer_id]; - const auto& new_state = draw.const_buffers[stage][buffer_id]; - - if (current.enabled != new_state.enabled || current.bindpoint != new_state.bindpoint || - current.ssbo != new_state.ssbo) { - if (new_state.enabled) { - glBindBufferBase(GL_UNIFORM_BUFFER, new_state.bindpoint, new_state.ssbo); - } - } - } - } - // Framebuffer if (draw.read_framebuffer != cur_state.draw.read_framebuffer) { glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer); diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 5c7b636e4f..bdb02ba256 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -119,12 +119,6 @@ public: GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING GLuint shader_program; // GL_CURRENT_PROGRAM GLuint program_pipeline; // GL_PROGRAM_PIPELINE_BINDING - struct ConstBufferConfig { - bool enabled = false; - GLuint bindpoint; - GLuint ssbo; - }; - std::array<std::array<ConstBufferConfig, Regs::MaxConstBuffers>, 5> const_buffers; } draw; struct { diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index a2713e9f0f..03a8ed8b7f 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp @@ -9,174 +9,91 @@ #include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_stream_buffer.h" -class OrphanBuffer : public OGLStreamBuffer { -public: - explicit OrphanBuffer(GLenum target) : OGLStreamBuffer(target) {} - ~OrphanBuffer() override; - -private: - void Create(size_t size, size_t sync_subdivide) override; - void Release() override; - - std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) override; - void Unmap() override; - - std::vector<u8> data; -}; - -class StorageBuffer : public OGLStreamBuffer { -public: - explicit StorageBuffer(GLenum target) : OGLStreamBuffer(target) {} - ~StorageBuffer() override; - -private: - void Create(size_t size, size_t sync_subdivide) override; - void Release() override; - - std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) override; - void Unmap() override; - - struct Fence { - OGLSync sync; - size_t offset; - }; - std::deque<Fence> head; - std::deque<Fence> tail; - - u8* mapped_ptr; -}; - -OGLStreamBuffer::OGLStreamBuffer(GLenum target) { - gl_target = target; -} - -GLuint OGLStreamBuffer::GetHandle() const { - return gl_buffer.handle; -} +OGLStreamBuffer::OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coherent) + : gl_target(target), buffer_size(size) { + gl_buffer.Create(); + glBindBuffer(gl_target, gl_buffer.handle); -std::unique_ptr<OGLStreamBuffer> OGLStreamBuffer::MakeBuffer(bool storage_buffer, GLenum target) { - if (storage_buffer) { - return std::make_unique<StorageBuffer>(target); + GLsizeiptr allocate_size = size; + if (target == GL_ARRAY_BUFFER) { + // On AMD GPU there is a strange crash in indexed drawing. The crash happens when the buffer + // read position is near the end and is an out-of-bound access to the vertex buffer. This is + // probably a bug in the driver and is related to the usage of vec3<byte> attributes in the + // vertex array. Doubling the allocation size for the vertex buffer seems to avoid the + // crash. + allocate_size *= 2; } - return std::make_unique<OrphanBuffer>(target); -} -OrphanBuffer::~OrphanBuffer() { - Release(); + if (GLAD_GL_ARB_buffer_storage) { + persistent = true; + coherent = prefer_coherent; + GLbitfield flags = + GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0); + glBufferStorage(gl_target, allocate_size, nullptr, flags); + mapped_ptr = static_cast<u8*>(glMapBufferRange( + gl_target, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT))); + } else { + glBufferData(gl_target, allocate_size, nullptr, GL_STREAM_DRAW); + } } -void OrphanBuffer::Create(size_t size, size_t /*sync_subdivide*/) { - buffer_pos = 0; - buffer_size = size; - data.resize(buffer_size); - - if (gl_buffer.handle == 0) { - gl_buffer.Create(); +OGLStreamBuffer::~OGLStreamBuffer() { + if (persistent) { glBindBuffer(gl_target, gl_buffer.handle); + glUnmapBuffer(gl_target); } - - glBufferData(gl_target, static_cast<GLsizeiptr>(buffer_size), nullptr, GL_STREAM_DRAW); -} - -void OrphanBuffer::Release() { gl_buffer.Release(); } -std::pair<u8*, GLintptr> OrphanBuffer::Map(size_t size, size_t alignment) { - buffer_pos = Common::AlignUp(buffer_pos, alignment); - - if (buffer_pos + size > buffer_size) { - Create(std::max(buffer_size, size), 0); - } - - mapped_size = size; - return std::make_pair(&data[buffer_pos], static_cast<GLintptr>(buffer_pos)); -} - -void OrphanBuffer::Unmap() { - glBufferSubData(gl_target, static_cast<GLintptr>(buffer_pos), - static_cast<GLsizeiptr>(mapped_size), &data[buffer_pos]); - buffer_pos += mapped_size; -} - -StorageBuffer::~StorageBuffer() { - Release(); +GLuint OGLStreamBuffer::GetHandle() const { + return gl_buffer.handle; } -void StorageBuffer::Create(size_t size, size_t sync_subdivide) { - if (gl_buffer.handle != 0) - return; - - buffer_pos = 0; - buffer_size = size; - buffer_sync_subdivide = std::max<size_t>(sync_subdivide, 1); - - gl_buffer.Create(); - glBindBuffer(gl_target, gl_buffer.handle); - - glBufferStorage(gl_target, static_cast<GLsizeiptr>(buffer_size), nullptr, - GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT); - mapped_ptr = reinterpret_cast<u8*>( - glMapBufferRange(gl_target, 0, static_cast<GLsizeiptr>(buffer_size), - GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_FLUSH_EXPLICIT_BIT)); +GLsizeiptr OGLStreamBuffer::GetSize() const { + return buffer_size; } -void StorageBuffer::Release() { - if (gl_buffer.handle == 0) - return; - - glUnmapBuffer(gl_target); - - gl_buffer.Release(); - head.clear(); - tail.clear(); -} - -std::pair<u8*, GLintptr> StorageBuffer::Map(size_t size, size_t alignment) { +std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) { ASSERT(size <= buffer_size); + ASSERT(alignment <= buffer_size); + mapped_size = size; - OGLSync sync; - - buffer_pos = Common::AlignUp(buffer_pos, alignment); - size_t effective_offset = Common::AlignDown(buffer_pos, buffer_sync_subdivide); - - if (!head.empty() && - (effective_offset > head.back().offset || buffer_pos + size > buffer_size)) { - ASSERT(head.back().sync.handle == 0); - head.back().sync.Create(); + if (alignment > 0) { + buffer_pos = Common::AlignUp<size_t>(buffer_pos, alignment); } + bool invalidate = false; if (buffer_pos + size > buffer_size) { - if (!tail.empty()) { - std::swap(sync, tail.back().sync); - tail.clear(); - } - std::swap(tail, head); buffer_pos = 0; - effective_offset = 0; - } + invalidate = true; - while (!tail.empty() && buffer_pos + size > tail.front().offset) { - std::swap(sync, tail.front().sync); - tail.pop_front(); + if (persistent) { + glUnmapBuffer(gl_target); + } } - if (sync.handle != 0) { - glClientWaitSync(sync.handle, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); - sync.Release(); + if (invalidate | !persistent) { + GLbitfield flags = GL_MAP_WRITE_BIT | (persistent ? GL_MAP_PERSISTENT_BIT : 0) | + (coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) | + (invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT); + mapped_ptr = static_cast<u8*>( + glMapBufferRange(gl_target, buffer_pos, buffer_size - buffer_pos, flags)); + mapped_offset = buffer_pos; } - if (head.empty() || effective_offset > head.back().offset) { - head.emplace_back(); - head.back().offset = effective_offset; + return std::make_tuple(mapped_ptr + buffer_pos - mapped_offset, buffer_pos, invalidate); +} + +void OGLStreamBuffer::Unmap(GLsizeiptr size) { + ASSERT(size <= mapped_size); + + if (!coherent && size > 0) { + glFlushMappedBufferRange(gl_target, buffer_pos - mapped_offset, size); } - mapped_size = size; - return std::make_pair(&mapped_ptr[buffer_pos], static_cast<GLintptr>(buffer_pos)); -} + if (!persistent) { + glUnmapBuffer(gl_target); + } -void StorageBuffer::Unmap() { - glFlushMappedBufferRange(gl_target, static_cast<GLintptr>(buffer_pos), - static_cast<GLsizeiptr>(mapped_size)); - buffer_pos += mapped_size; + buffer_pos += size; } diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h index e78dc5784e..45592daaf8 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h @@ -2,35 +2,41 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#pragma once - -#include <memory> +#include <tuple> #include <glad/glad.h> #include "common/common_types.h" #include "video_core/renderer_opengl/gl_resource_manager.h" class OGLStreamBuffer : private NonCopyable { public: - explicit OGLStreamBuffer(GLenum target); - virtual ~OGLStreamBuffer() = default; - -public: - static std::unique_ptr<OGLStreamBuffer> MakeBuffer(bool storage_buffer, GLenum target); - - virtual void Create(size_t size, size_t sync_subdivide) = 0; - virtual void Release() {} + explicit OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coherent = false); + ~OGLStreamBuffer(); GLuint GetHandle() const; + GLsizeiptr GetSize() const; + + /* + * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes + * and the optional alignment requirement. + * If the buffer is full, the whole buffer is reallocated which invalidates old chunks. + * The return values are the pointer to the new chunk, the offset within the buffer, + * and the invalidation flag for previous chunks. + * The actual used size must be specified on unmapping the chunk. + */ + std::tuple<u8*, GLintptr, bool> Map(GLsizeiptr size, GLintptr alignment = 0); - virtual std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) = 0; - virtual void Unmap() = 0; + void Unmap(GLsizeiptr size); -protected: +private: OGLBuffer gl_buffer; GLenum gl_target; - size_t buffer_pos = 0; - size_t buffer_size = 0; - size_t buffer_sync_subdivide = 0; - size_t mapped_size = 0; + bool coherent = false; + bool persistent = false; + + GLintptr buffer_pos = 0; + GLsizeiptr buffer_size = 0; + GLintptr mapped_offset = 0; + GLsizeiptr mapped_size = 0; + u8* mapped_ptr = nullptr; }; diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 5afd20dbed..679e5ceb27 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -91,6 +91,8 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) { switch (topology) { case Maxwell::PrimitiveTopology::Points: return GL_POINTS; + case Maxwell::PrimitiveTopology::LineStrip: + return GL_LINE_STRIP; case Maxwell::PrimitiveTopology::Triangles: return GL_TRIANGLES; case Maxwell::PrimitiveTopology::TriangleStrip: |