diff options
Diffstat (limited to 'src')
77 files changed, 1841 insertions, 783 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f30dd49a3f..f8ec8fea8c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -45,13 +45,23 @@ if (MSVC) /Zc:inline /Zc:throwingNew + # External headers diagnostics + /experimental:external # Enables the external headers options. This option isn't required in Visual Studio 2019 version 16.10 and later + /external:anglebrackets # Treats all headers included by #include <header>, where the header file is enclosed in angle brackets (< >), as external headers + /external:W0 # Sets the default warning level to 0 for external headers, effectively turning off warnings for external headers + # Warnings /W3 - /we4062 # enumerator 'identifier' in a switch of enum 'enumeration' is not handled + /we4018 # 'expression': signed/unsigned mismatch + /we4062 # Enumerator 'identifier' in a switch of enum 'enumeration' is not handled /we4101 # 'identifier': unreferenced local variable + /we4189 # 'identifier': local variable is initialized but not referenced /we4265 # 'class': class has virtual functions, but destructor is not virtual - /we4388 # signed/unsigned mismatch - /we4547 # 'operator' : operator before comma has no effect; expected operator with side-effect + /we4267 # 'var': conversion from 'size_t' to 'type', possible loss of data + /we4305 # 'context': truncation from 'type1' to 'type2' + /we4388 # 'expression': signed/unsigned mismatch + /we4389 # 'operator': signed/unsigned mismatch + /we4547 # 'operator': operator before comma has no effect; expected operator with side-effect /we4549 # 'operator1': operator before comma has no effect; did you intend 'operator2'? /we4555 # Expression has no effect; expected expression with side-effect /we4715 # 'function': not all control paths return a value @@ -72,6 +82,7 @@ else() -Werror=missing-declarations -Werror=missing-field-initializers -Werror=reorder + -Werror=sign-compare -Werror=switch -Werror=uninitialized -Werror=unused-function diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt index d25a1a6451..090dd19b1d 100644 --- a/src/audio_core/CMakeLists.txt +++ b/src/audio_core/CMakeLists.txt @@ -51,9 +51,6 @@ if (NOT MSVC) target_compile_options(audio_core PRIVATE -Werror=conversion -Werror=ignored-qualifiers - -Werror=implicit-fallthrough - -Werror=reorder - -Werror=sign-compare -Werror=shadow -Werror=unused-parameter -Werror=unused-variable diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp index 80ffddb104..7dba739b45 100644 --- a/src/audio_core/audio_renderer.cpp +++ b/src/audio_core/audio_renderer.cpp @@ -12,6 +12,7 @@ #include "audio_core/voice_context.h" #include "common/logging/log.h" #include "common/settings.h" +#include "core/core_timing.h" #include "core/memory.h" namespace { @@ -28,10 +29,9 @@ namespace { (static_cast<float>(r_channel) * r_mix_amount))); } -[[nodiscard]] static constexpr std::tuple<s16, s16> Mix6To2(s16 fl_channel, s16 fr_channel, - s16 fc_channel, - [[maybe_unused]] s16 lf_channel, - s16 bl_channel, s16 br_channel) { +[[maybe_unused, nodiscard]] static constexpr std::tuple<s16, s16> Mix6To2( + s16 fl_channel, s16 fr_channel, s16 fc_channel, [[maybe_unused]] s16 lf_channel, s16 bl_channel, + s16 br_channel) { // Front channels are mixed 36.94%, Center channels are mixed to be 26.12% & the back channels // are mixed to be 36.94% @@ -56,11 +56,11 @@ namespace { const std::array<float_le, 4>& coeff) { const auto left = static_cast<float>(fl_channel) * coeff[0] + static_cast<float>(fc_channel) * coeff[1] + - static_cast<float>(lf_channel) * coeff[2] + static_cast<float>(bl_channel) * coeff[0]; + static_cast<float>(lf_channel) * coeff[2] + static_cast<float>(bl_channel) * coeff[3]; const auto right = static_cast<float>(fr_channel) * coeff[0] + static_cast<float>(fc_channel) * coeff[1] + - static_cast<float>(lf_channel) * coeff[2] + static_cast<float>(br_channel) * coeff[0]; + static_cast<float>(lf_channel) * coeff[2] + static_cast<float>(br_channel) * coeff[3]; return {ClampToS16(static_cast<s32>(left)), ClampToS16(static_cast<s32>(right))}; } @@ -68,7 +68,9 @@ namespace { } // namespace namespace AudioCore { -AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory::Memory& memory_, +constexpr s32 NUM_BUFFERS = 2; + +AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing_, Core::Memory::Memory& memory_, AudioCommon::AudioRendererParameter params, Stream::ReleaseCallback&& release_callback, std::size_t instance_number) @@ -77,7 +79,8 @@ AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory sink_context(params.sink_count), splitter_context(), voices(params.voice_count), memory{memory_}, command_generator(worker_params, voice_context, mix_context, splitter_context, effect_context, - memory) { + memory), + core_timing{core_timing_} { behavior_info.SetUserRevision(params.revision); splitter_context.Initialize(behavior_info, params.splitter_count, params.num_splitter_send_channels); @@ -86,16 +89,27 @@ AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory stream = audio_out->OpenStream( core_timing, params.sample_rate, AudioCommon::STREAM_NUM_CHANNELS, fmt::format("AudioRenderer-Instance{}", instance_number), std::move(release_callback)); - audio_out->StartStream(stream); - - QueueMixedBuffer(0); - QueueMixedBuffer(1); - QueueMixedBuffer(2); - QueueMixedBuffer(3); + process_event = Core::Timing::CreateEvent( + fmt::format("AudioRenderer-Instance{}-Process", instance_number), + [this](std::uintptr_t, std::chrono::nanoseconds) { ReleaseAndQueueBuffers(); }); + for (s32 i = 0; i < NUM_BUFFERS; ++i) { + QueueMixedBuffer(i); + } } AudioRenderer::~AudioRenderer() = default; +ResultCode AudioRenderer::Start() { + audio_out->StartStream(stream); + ReleaseAndQueueBuffers(); + return ResultSuccess; +} + +ResultCode AudioRenderer::Stop() { + audio_out->StopStream(stream); + return ResultSuccess; +} + u32 AudioRenderer::GetSampleRate() const { return worker_params.sample_rate; } @@ -114,7 +128,7 @@ Stream::State AudioRenderer::GetStreamState() const { ResultCode AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_params, std::vector<u8>& output_params) { - + std::scoped_lock lock{mutex}; InfoUpdater info_updater{input_params, output_params, behavior_info}; if (!info_updater.UpdateBehaviorInfo(behavior_info)) { @@ -194,9 +208,6 @@ ResultCode AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_param LOG_ERROR(Audio, "Audio buffers were not consumed!"); return AudioCommon::Audren::ERR_INVALID_PARAMETERS; } - - ReleaseAndQueueBuffers(); - return ResultSuccess; } @@ -220,10 +231,8 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) { command_generator.PostCommand(); // Base sample size std::size_t BUFFER_SIZE{worker_params.sample_count}; - // Samples - std::vector<s16> buffer(BUFFER_SIZE * stream->GetNumChannels()); - // Make sure to clear our samples - std::memset(buffer.data(), 0, buffer.size() * sizeof(s16)); + // Samples, making sure to clear + std::vector<s16> buffer(BUFFER_SIZE * stream->GetNumChannels(), 0); if (sink_context.InUse()) { const auto stream_channel_count = stream->GetNumChannels(); @@ -231,7 +240,7 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) { const auto channel_count = buffer_offsets.size(); const auto& final_mix = mix_context.GetFinalMixInfo(); const auto& in_params = final_mix.GetInParams(); - std::vector<s32*> mix_buffers(channel_count); + std::vector<std::span<s32>> mix_buffers(channel_count); for (std::size_t i = 0; i < channel_count; i++) { mix_buffers[i] = command_generator.GetMixBuffer(in_params.buffer_offset + buffer_offsets[i]); @@ -284,18 +293,11 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) { buffer[i * stream_channel_count + 0] = Mix2To1(fl_sample, fr_sample); } else if (stream_channel_count == 2) { // Mix all channels into 2 channels - if (sink_context.HasDownMixingCoefficients()) { - const auto [left, right] = Mix6To2WithCoefficients( - fl_sample, fr_sample, fc_sample, lf_sample, bl_sample, br_sample, - sink_context.GetDownmixCoefficients()); - buffer[i * stream_channel_count + 0] = left; - buffer[i * stream_channel_count + 1] = right; - } else { - const auto [left, right] = Mix6To2(fl_sample, fr_sample, fc_sample, - lf_sample, bl_sample, br_sample); - buffer[i * stream_channel_count + 0] = left; - buffer[i * stream_channel_count + 1] = right; - } + const auto [left, right] = Mix6To2WithCoefficients( + fl_sample, fr_sample, fc_sample, lf_sample, bl_sample, br_sample, + sink_context.GetDownmixCoefficients()); + buffer[i * stream_channel_count + 0] = left; + buffer[i * stream_channel_count + 1] = right; } else if (stream_channel_count == 6) { // Pass through buffer[i * stream_channel_count + 0] = fl_sample; @@ -315,10 +317,24 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) { } void AudioRenderer::ReleaseAndQueueBuffers() { - const auto released_buffers{audio_out->GetTagsAndReleaseBuffers(stream)}; - for (const auto& tag : released_buffers) { - QueueMixedBuffer(tag); + if (!stream->IsPlaying()) { + return; } + + { + std::scoped_lock lock{mutex}; + const auto released_buffers{audio_out->GetTagsAndReleaseBuffers(stream)}; + for (const auto& tag : released_buffers) { + QueueMixedBuffer(tag); + } + } + + const f32 sample_rate = static_cast<f32>(GetSampleRate()); + const f32 sample_count = static_cast<f32>(GetSampleCount()); + const f32 consume_rate = sample_rate / (sample_count * (sample_count / 240)); + const s32 ms = (1000 / static_cast<s32>(consume_rate)) - 1; + const std::chrono::milliseconds next_event_time(std::max(ms / NUM_BUFFERS, 1)); + core_timing.ScheduleEvent(next_event_time, process_event, {}); } } // namespace AudioCore diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h index 18567f6188..88fdd13dda 100644 --- a/src/audio_core/audio_renderer.h +++ b/src/audio_core/audio_renderer.h @@ -6,6 +6,7 @@ #include <array> #include <memory> +#include <mutex> #include <vector> #include "audio_core/behavior_info.h" @@ -45,6 +46,8 @@ public: [[nodiscard]] ResultCode UpdateAudioRenderer(const std::vector<u8>& input_params, std::vector<u8>& output_params); + [[nodiscard]] ResultCode Start(); + [[nodiscard]] ResultCode Stop(); void QueueMixedBuffer(Buffer::Tag tag); void ReleaseAndQueueBuffers(); [[nodiscard]] u32 GetSampleRate() const; @@ -68,6 +71,9 @@ private: Core::Memory::Memory& memory; CommandGenerator command_generator; std::size_t elapsed_frame_count{}; + Core::Timing::CoreTiming& core_timing; + std::shared_ptr<Core::Timing::EventType> process_event; + std::mutex mutex; }; } // namespace AudioCore diff --git a/src/audio_core/command_generator.cpp b/src/audio_core/command_generator.cpp index 437cc5ccd0..b99d0fc910 100644 --- a/src/audio_core/command_generator.cpp +++ b/src/audio_core/command_generator.cpp @@ -31,7 +31,7 @@ constexpr std::array<f32, AudioCommon::I3DL2REVERB_TAPS> EARLY_GAIN{ 0.72867f, 0.69794f, 0.5464f, 0.24563f, 0.45214f, 0.44042f}; template <std::size_t N> -void ApplyMix(s32* output, const s32* input, s32 gain, s32 sample_count) { +void ApplyMix(std::span<s32> output, std::span<const s32> input, s32 gain, s32 sample_count) { for (std::size_t i = 0; i < static_cast<std::size_t>(sample_count); i += N) { for (std::size_t j = 0; j < N; j++) { output[i + j] += @@ -40,7 +40,8 @@ void ApplyMix(s32* output, const s32* input, s32 gain, s32 sample_count) { } } -s32 ApplyMixRamp(s32* output, const s32* input, float gain, float delta, s32 sample_count) { +s32 ApplyMixRamp(std::span<s32> output, std::span<const s32> input, float gain, float delta, + s32 sample_count) { s32 x = 0; for (s32 i = 0; i < sample_count; i++) { x = static_cast<s32>(static_cast<float>(input[i]) * gain); @@ -50,20 +51,22 @@ s32 ApplyMixRamp(s32* output, const s32* input, float gain, float delta, s32 sam return x; } -void ApplyGain(s32* output, const s32* input, s32 gain, s32 delta, s32 sample_count) { +void ApplyGain(std::span<s32> output, std::span<const s32> input, s32 gain, s32 delta, + s32 sample_count) { for (s32 i = 0; i < sample_count; i++) { output[i] = static_cast<s32>((static_cast<s64>(input[i]) * gain + 0x4000) >> 15); gain += delta; } } -void ApplyGainWithoutDelta(s32* output, const s32* input, s32 gain, s32 sample_count) { +void ApplyGainWithoutDelta(std::span<s32> output, std::span<const s32> input, s32 gain, + s32 sample_count) { for (s32 i = 0; i < sample_count; i++) { output[i] = static_cast<s32>((static_cast<s64>(input[i]) * gain + 0x4000) >> 15); } } -s32 ApplyMixDepop(s32* output, s32 first_sample, s32 delta, s32 sample_count) { +s32 ApplyMixDepop(std::span<s32> output, s32 first_sample, s32 delta, s32 sample_count) { const bool positive = first_sample > 0; auto final_sample = std::abs(first_sample); for (s32 i = 0; i < sample_count; i++) { @@ -128,10 +131,10 @@ constexpr std::array<std::size_t, 20> REVERB_TAP_INDEX_6CH{4, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 3, 3, 3}; template <std::size_t CHANNEL_COUNT> -void ApplyReverbGeneric(I3dl2ReverbState& state, - const std::array<const s32*, AudioCommon::MAX_CHANNEL_COUNT>& input, - const std::array<s32*, AudioCommon::MAX_CHANNEL_COUNT>& output, - s32 sample_count) { +void ApplyReverbGeneric( + I3dl2ReverbState& state, + const std::array<std::span<const s32>, AudioCommon::MAX_CHANNEL_COUNT>& input, + const std::array<std::span<s32>, AudioCommon::MAX_CHANNEL_COUNT>& output, s32 sample_count) { auto GetTapLookup = []() { if constexpr (CHANNEL_COUNT == 1) { @@ -400,7 +403,10 @@ void CommandGenerator::GenerateDataSourceCommand(ServerVoiceInfo& voice_info, Vo } } else { switch (in_params.sample_format) { + case SampleFormat::Pcm8: case SampleFormat::Pcm16: + case SampleFormat::Pcm32: + case SampleFormat::PcmFloat: DecodeFromWaveBuffers(voice_info, GetChannelMixBuffer(channel), dsp_state, channel, worker_params.sample_rate, worker_params.sample_count, in_params.node_id); @@ -454,8 +460,8 @@ void CommandGenerator::GenerateBiquadFilterCommand([[maybe_unused]] s32 mix_buff "input_mix_buffer={}, output_mix_buffer={}", node_id, input_offset, output_offset); } - const auto* input = GetMixBuffer(input_offset); - auto* output = GetMixBuffer(output_offset); + std::span<const s32> input = GetMixBuffer(input_offset); + std::span<s32> output = GetMixBuffer(output_offset); // Biquad filter parameters const auto [n0, n1, n2] = params.numerator; @@ -548,8 +554,8 @@ void CommandGenerator::GenerateI3dl2ReverbEffectCommand(s32 mix_buffer_offset, E return; } - std::array<const s32*, AudioCommon::MAX_CHANNEL_COUNT> input{}; - std::array<s32*, AudioCommon::MAX_CHANNEL_COUNT> output{}; + std::array<std::span<const s32>, AudioCommon::MAX_CHANNEL_COUNT> input{}; + std::array<std::span<s32>, AudioCommon::MAX_CHANNEL_COUNT> output{}; const auto status = params.status; for (s32 i = 0; i < channel_count; i++) { @@ -584,7 +590,8 @@ void CommandGenerator::GenerateI3dl2ReverbEffectCommand(s32 mix_buffer_offset, E for (s32 i = 0; i < channel_count; i++) { // Only copy if the buffer input and output do not match! if ((mix_buffer_offset + params.input[i]) != (mix_buffer_offset + params.output[i])) { - std::memcpy(output[i], input[i], worker_params.sample_count * sizeof(s32)); + std::memcpy(output[i].data(), input[i].data(), + worker_params.sample_count * sizeof(s32)); } } } @@ -600,8 +607,8 @@ void CommandGenerator::GenerateBiquadFilterEffectCommand(s32 mix_buffer_offset, for (s32 i = 0; i < channel_count; i++) { // TODO(ogniK): Actually implement biquad filter if (params.input[i] != params.output[i]) { - const auto* input = GetMixBuffer(mix_buffer_offset + params.input[i]); - auto* output = GetMixBuffer(mix_buffer_offset + params.output[i]); + std::span<const s32> input = GetMixBuffer(mix_buffer_offset + params.input[i]); + std::span<s32> output = GetMixBuffer(mix_buffer_offset + params.output[i]); ApplyMix<1>(output, input, 32768, worker_params.sample_count); } } @@ -640,14 +647,15 @@ void CommandGenerator::GenerateAuxCommand(s32 mix_buffer_offset, EffectBase* inf if (samples_read != static_cast<int>(worker_params.sample_count) && samples_read <= params.sample_count) { - std::memset(GetMixBuffer(output_index), 0, params.sample_count - samples_read); + std::memset(GetMixBuffer(output_index).data(), 0, + params.sample_count - samples_read); } } else { AuxInfoDSP empty{}; memory.WriteBlock(aux->GetSendInfo(), &empty, sizeof(AuxInfoDSP)); memory.WriteBlock(aux->GetRecvInfo(), &empty, sizeof(AuxInfoDSP)); if (output_index != input_index) { - std::memcpy(GetMixBuffer(output_index), GetMixBuffer(input_index), + std::memcpy(GetMixBuffer(output_index).data(), GetMixBuffer(input_index).data(), worker_params.sample_count * sizeof(s32)); } } @@ -665,7 +673,7 @@ ServerSplitterDestinationData* CommandGenerator::GetDestinationData(s32 splitter } s32 CommandGenerator::WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u32 max_samples, - const s32* data, u32 sample_count, u32 write_offset, + std::span<const s32> data, u32 sample_count, u32 write_offset, u32 write_count) { if (max_samples == 0) { return 0; @@ -675,14 +683,14 @@ s32 CommandGenerator::WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u3 return 0; } - std::size_t data_offset{}; + s32 data_offset{}; u32 remaining = sample_count; while (remaining > 0) { // Get position in buffer const auto base = send_buffer + (offset * sizeof(u32)); const auto samples_to_grab = std::min(max_samples - offset, remaining); // Write to output - memory.WriteBlock(base, (data + data_offset), samples_to_grab * sizeof(u32)); + memory.WriteBlock(base, (data.data() + data_offset), samples_to_grab * sizeof(u32)); offset = (offset + samples_to_grab) % max_samples; remaining -= samples_to_grab; data_offset += samples_to_grab; @@ -695,7 +703,7 @@ s32 CommandGenerator::WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u3 } s32 CommandGenerator::ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u32 max_samples, - s32* out_data, u32 sample_count, u32 read_offset, + std::span<s32> out_data, u32 sample_count, u32 read_offset, u32 read_count) { if (max_samples == 0) { return 0; @@ -707,15 +715,16 @@ s32 CommandGenerator::ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u3 } u32 remaining = sample_count; + s32 data_offset{}; while (remaining > 0) { const auto base = recv_buffer + (offset * sizeof(u32)); const auto samples_to_grab = std::min(max_samples - offset, remaining); std::vector<s32> buffer(samples_to_grab); memory.ReadBlock(base, buffer.data(), buffer.size() * sizeof(u32)); - std::memcpy(out_data, buffer.data(), buffer.size() * sizeof(u32)); - out_data += samples_to_grab; + std::memcpy(out_data.data() + data_offset, buffer.data(), buffer.size() * sizeof(u32)); offset = (offset + samples_to_grab) % max_samples; remaining -= samples_to_grab; + data_offset += samples_to_grab; } if (read_count != 0) { @@ -795,7 +804,7 @@ void CommandGenerator::UpdateI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbSta state.lowpass_1 = 0.0f; } else { const auto a = 1.0f - hf_gain; - const auto b = 2.0f * (1.0f - hf_gain * CosD(256.0f * info.hf_reference / + const auto b = 2.0f * (2.0f - hf_gain * CosD(256.0f * info.hf_reference / static_cast<f32>(info.sample_rate))); const auto c = std::sqrt(b * b - 4.0f * a * a); @@ -843,7 +852,7 @@ void CommandGenerator::UpdateI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbSta } const auto max_early_delay = state.early_delay_line.GetMaxDelay(); - const auto reflection_time = 1000.0f * (0.0098f * info.reverb_delay + 0.02f); + const auto reflection_time = 1000.0f * (0.9998f * info.reverb_delay + 0.02f); for (std::size_t tap = 0; tap < AudioCommon::I3DL2REVERB_TAPS; tap++) { const auto length = AudioCommon::CalculateDelaySamples( sample_rate, 1000.0f * info.reflection_delay + reflection_time * EARLY_TAP_TIMES[tap]); @@ -962,8 +971,8 @@ void CommandGenerator::GenerateMixCommand(std::size_t output_offset, std::size_t node_id, input_offset, output_offset, volume); } - auto* output = GetMixBuffer(output_offset); - const auto* input = GetMixBuffer(input_offset); + std::span<s32> output = GetMixBuffer(output_offset); + std::span<const s32> input = GetMixBuffer(input_offset); const s32 gain = static_cast<s32>(volume * 32768.0f); // Mix with loop unrolling @@ -1003,8 +1012,10 @@ void CommandGenerator::GenerateFinalMixCommand() { } } -s32 CommandGenerator::DecodePcm16(ServerVoiceInfo& voice_info, VoiceState& dsp_state, - s32 sample_count, s32 channel, std::size_t mix_offset) { +template <typename T> +s32 CommandGenerator::DecodePcm(ServerVoiceInfo& voice_info, VoiceState& dsp_state, + s32 sample_start_offset, s32 sample_end_offset, s32 sample_count, + s32 channel, std::size_t mix_offset) { const auto& in_params = voice_info.GetInParams(); const auto& wave_buffer = in_params.wave_buffer[dsp_state.wave_buffer_index]; if (wave_buffer.buffer_address == 0) { @@ -1013,39 +1024,50 @@ s32 CommandGenerator::DecodePcm16(ServerVoiceInfo& voice_info, VoiceState& dsp_s if (wave_buffer.buffer_size == 0) { return 0; } - if (wave_buffer.end_sample_offset < wave_buffer.start_sample_offset) { + if (sample_end_offset < sample_start_offset) { return 0; } - const auto samples_remaining = - (wave_buffer.end_sample_offset - wave_buffer.start_sample_offset) - dsp_state.offset; + const auto samples_remaining = (sample_end_offset - sample_start_offset) - dsp_state.offset; const auto start_offset = - ((wave_buffer.start_sample_offset + dsp_state.offset) * in_params.channel_count) * - sizeof(s16); + ((dsp_state.offset + sample_start_offset) * in_params.channel_count) * sizeof(T); const auto buffer_pos = wave_buffer.buffer_address + start_offset; const auto samples_processed = std::min(sample_count, samples_remaining); - if (in_params.channel_count == 1) { - std::vector<s16> buffer(samples_processed); - memory.ReadBlock(buffer_pos, buffer.data(), buffer.size() * sizeof(s16)); - for (std::size_t i = 0; i < buffer.size(); i++) { - sample_buffer[mix_offset + i] = buffer[i]; - } - } else { - const auto channel_count = in_params.channel_count; - std::vector<s16> buffer(samples_processed * channel_count); - memory.ReadBlock(buffer_pos, buffer.data(), buffer.size() * sizeof(s16)); + const auto channel_count = in_params.channel_count; + std::vector<T> buffer(samples_processed * channel_count); + memory.ReadBlock(buffer_pos, buffer.data(), buffer.size() * sizeof(T)); + if constexpr (std::is_floating_point_v<T>) { + for (std::size_t i = 0; i < static_cast<std::size_t>(samples_processed); i++) { + sample_buffer[mix_offset + i] = static_cast<s32>(buffer[i * channel_count + channel] * + std::numeric_limits<s16>::max()); + } + } else if constexpr (sizeof(T) == 1) { + for (std::size_t i = 0; i < static_cast<std::size_t>(samples_processed); i++) { + sample_buffer[mix_offset + i] = + static_cast<s32>(static_cast<f32>(buffer[i * channel_count + channel] / + std::numeric_limits<s8>::max()) * + std::numeric_limits<s16>::max()); + } + } else if constexpr (sizeof(T) == 2) { for (std::size_t i = 0; i < static_cast<std::size_t>(samples_processed); i++) { sample_buffer[mix_offset + i] = buffer[i * channel_count + channel]; } + } else { + for (std::size_t i = 0; i < static_cast<std::size_t>(samples_processed); i++) { + sample_buffer[mix_offset + i] = + static_cast<s32>(static_cast<f32>(buffer[i * channel_count + channel] / + std::numeric_limits<s32>::max()) * + std::numeric_limits<s16>::max()); + } } return samples_processed; } s32 CommandGenerator::DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_state, - s32 sample_count, [[maybe_unused]] s32 channel, - std::size_t mix_offset) { + s32 sample_start_offset, s32 sample_end_offset, s32 sample_count, + [[maybe_unused]] s32 channel, std::size_t mix_offset) { const auto& in_params = voice_info.GetInParams(); const auto& wave_buffer = in_params.wave_buffer[dsp_state.wave_buffer_index]; if (wave_buffer.buffer_address == 0) { @@ -1054,7 +1076,7 @@ s32 CommandGenerator::DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_s if (wave_buffer.buffer_size == 0) { return 0; } - if (wave_buffer.end_sample_offset < wave_buffer.start_sample_offset) { + if (sample_end_offset < sample_start_offset) { return 0; } @@ -1079,10 +1101,9 @@ s32 CommandGenerator::DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_s s32 coef1 = coeffs[idx * 2]; s32 coef2 = coeffs[idx * 2 + 1]; - const auto samples_remaining = - (wave_buffer.end_sample_offset - wave_buffer.start_sample_offset) - dsp_state.offset; + const auto samples_remaining = (sample_end_offset - sample_start_offset) - dsp_state.offset; const auto samples_processed = std::min(sample_count, samples_remaining); - const auto sample_pos = wave_buffer.start_sample_offset + dsp_state.offset; + const auto sample_pos = dsp_state.offset + sample_start_offset; const auto samples_remaining_in_frame = sample_pos % SAMPLES_PER_FRAME; auto position_in_frame = ((sample_pos / SAMPLES_PER_FRAME) * NIBBLES_PER_SAMPLE) + @@ -1157,12 +1178,14 @@ s32 CommandGenerator::DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_s return samples_processed; } -s32* CommandGenerator::GetMixBuffer(std::size_t index) { - return mix_buffer.data() + (index * worker_params.sample_count); +std::span<s32> CommandGenerator::GetMixBuffer(std::size_t index) { + return std::span<s32>(mix_buffer.data() + (index * worker_params.sample_count), + worker_params.sample_count); } -const s32* CommandGenerator::GetMixBuffer(std::size_t index) const { - return mix_buffer.data() + (index * worker_params.sample_count); +std::span<const s32> CommandGenerator::GetMixBuffer(std::size_t index) const { + return std::span<const s32>(mix_buffer.data() + (index * worker_params.sample_count), + worker_params.sample_count); } std::size_t CommandGenerator::GetMixChannelBufferOffset(s32 channel) const { @@ -1173,15 +1196,15 @@ std::size_t CommandGenerator::GetTotalMixBufferCount() const { return worker_params.mix_buffer_count + AudioCommon::MAX_CHANNEL_COUNT; } -s32* CommandGenerator::GetChannelMixBuffer(s32 channel) { +std::span<s32> CommandGenerator::GetChannelMixBuffer(s32 channel) { return GetMixBuffer(worker_params.mix_buffer_count + channel); } -const s32* CommandGenerator::GetChannelMixBuffer(s32 channel) const { +std::span<const s32> CommandGenerator::GetChannelMixBuffer(s32 channel) const { return GetMixBuffer(worker_params.mix_buffer_count + channel); } -void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* output, +void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, std::span<s32> output, VoiceState& dsp_state, s32 channel, s32 target_sample_rate, s32 sample_count, s32 node_id) { @@ -1193,7 +1216,7 @@ void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* o node_id, channel, in_params.sample_format, sample_count, in_params.sample_rate, in_params.mix_id, in_params.splitter_info_id); } - ASSERT_OR_EXECUTE(output != nullptr, { return; }); + ASSERT_OR_EXECUTE(output.data() != nullptr, { return; }); const auto resample_rate = static_cast<s32>( static_cast<float>(in_params.sample_rate) / static_cast<float>(target_sample_rate) * @@ -1210,9 +1233,9 @@ void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* o } std::size_t temp_mix_offset{}; - bool is_buffer_completed{false}; + s32 samples_output{}; auto samples_remaining = sample_count; - while (samples_remaining > 0 && !is_buffer_completed) { + while (samples_remaining > 0) { const auto samples_to_output = std::min(samples_remaining, min_required_samples); const auto samples_to_read = (samples_to_output * resample_rate + dsp_state.fraction) >> 15; @@ -1229,24 +1252,53 @@ void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* o const auto& wave_buffer = in_params.wave_buffer[dsp_state.wave_buffer_index]; // No more data can be read if (!dsp_state.is_wave_buffer_valid[dsp_state.wave_buffer_index]) { - is_buffer_completed = true; break; } if (in_params.sample_format == SampleFormat::Adpcm && dsp_state.offset == 0 && wave_buffer.context_address != 0 && wave_buffer.context_size != 0) { - // TODO(ogniK): ADPCM loop context + memory.ReadBlock(wave_buffer.context_address, &dsp_state.context, + sizeof(ADPCMContext)); + } + + s32 samples_offset_start; + s32 samples_offset_end; + if (dsp_state.loop_count > 0 && wave_buffer.loop_start_sample != 0 && + wave_buffer.loop_end_sample != 0 && + wave_buffer.loop_start_sample <= wave_buffer.loop_end_sample) { + samples_offset_start = wave_buffer.loop_start_sample; + samples_offset_end = wave_buffer.loop_end_sample; + } else { + samples_offset_start = wave_buffer.start_sample_offset; + samples_offset_end = wave_buffer.end_sample_offset; } s32 samples_decoded{0}; switch (in_params.sample_format) { + case SampleFormat::Pcm8: + samples_decoded = + DecodePcm<s8>(voice_info, dsp_state, samples_offset_start, samples_offset_end, + samples_to_read - samples_read, channel, temp_mix_offset); + break; case SampleFormat::Pcm16: - samples_decoded = DecodePcm16(voice_info, dsp_state, samples_to_read - samples_read, - channel, temp_mix_offset); + samples_decoded = + DecodePcm<s16>(voice_info, dsp_state, samples_offset_start, samples_offset_end, + samples_to_read - samples_read, channel, temp_mix_offset); + break; + case SampleFormat::Pcm32: + samples_decoded = + DecodePcm<s32>(voice_info, dsp_state, samples_offset_start, samples_offset_end, + samples_to_read - samples_read, channel, temp_mix_offset); + break; + case SampleFormat::PcmFloat: + samples_decoded = + DecodePcm<f32>(voice_info, dsp_state, samples_offset_start, samples_offset_end, + samples_to_read - samples_read, channel, temp_mix_offset); break; case SampleFormat::Adpcm: - samples_decoded = DecodeAdpcm(voice_info, dsp_state, samples_to_read - samples_read, - channel, temp_mix_offset); + samples_decoded = + DecodeAdpcm(voice_info, dsp_state, samples_offset_start, samples_offset_end, + samples_to_read - samples_read, channel, temp_mix_offset); break; default: UNREACHABLE_MSG("Unimplemented sample format={}", in_params.sample_format); @@ -1257,15 +1309,19 @@ void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* o dsp_state.offset += samples_decoded; dsp_state.played_sample_count += samples_decoded; - if (dsp_state.offset >= - (wave_buffer.end_sample_offset - wave_buffer.start_sample_offset) || + if (dsp_state.offset >= (samples_offset_end - samples_offset_start) || samples_decoded == 0) { // Reset our sample offset dsp_state.offset = 0; if (wave_buffer.is_looping) { - if (samples_decoded == 0) { + dsp_state.loop_count++; + if (wave_buffer.loop_count > 0 && + (dsp_state.loop_count > wave_buffer.loop_count || samples_decoded == 0)) { // End of our buffer - is_buffer_completed = true; + voice_info.SetWaveBufferCompleted(dsp_state, wave_buffer); + } + + if (samples_decoded == 0) { break; } @@ -1273,35 +1329,29 @@ void CommandGenerator::DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* o dsp_state.played_sample_count = 0; } } else { - // Update our wave buffer states - dsp_state.is_wave_buffer_valid[dsp_state.wave_buffer_index] = false; - dsp_state.wave_buffer_consumed++; - dsp_state.wave_buffer_index = - (dsp_state.wave_buffer_index + 1) % AudioCommon::MAX_WAVE_BUFFERS; - if (wave_buffer.end_of_stream) { - dsp_state.played_sample_count = 0; - } + voice_info.SetWaveBufferCompleted(dsp_state, wave_buffer); } } } if (in_params.behavior_flags.is_pitch_and_src_skipped.Value()) { // No need to resample - std::memcpy(output, sample_buffer.data(), samples_read * sizeof(s32)); + std::memcpy(output.data() + samples_output, sample_buffer.data(), + samples_read * sizeof(s32)); } else { std::fill(sample_buffer.begin() + temp_mix_offset, sample_buffer.begin() + temp_mix_offset + (samples_to_read - samples_read), 0); - AudioCore::Resample(output, sample_buffer.data(), resample_rate, dsp_state.fraction, - samples_to_output); + AudioCore::Resample(output.data() + samples_output, sample_buffer.data(), resample_rate, + dsp_state.fraction, samples_to_output); // Resample for (std::size_t i = 0; i < AudioCommon::MAX_SAMPLE_HISTORY; i++) { dsp_state.sample_history[i] = sample_buffer[samples_to_read + i]; } } - output += samples_to_output; samples_remaining -= samples_to_output; + samples_output += samples_to_output; } } diff --git a/src/audio_core/command_generator.h b/src/audio_core/command_generator.h index 2ebb755b04..59a33ba768 100644 --- a/src/audio_core/command_generator.h +++ b/src/audio_core/command_generator.h @@ -5,6 +5,7 @@ #pragma once #include <array> +#include <span> #include "audio_core/common.h" #include "audio_core/voice_context.h" #include "common/common_types.h" @@ -41,10 +42,10 @@ public: void PreCommand(); void PostCommand(); - [[nodiscard]] s32* GetChannelMixBuffer(s32 channel); - [[nodiscard]] const s32* GetChannelMixBuffer(s32 channel) const; - [[nodiscard]] s32* GetMixBuffer(std::size_t index); - [[nodiscard]] const s32* GetMixBuffer(std::size_t index) const; + [[nodiscard]] std::span<s32> GetChannelMixBuffer(s32 channel); + [[nodiscard]] std::span<const s32> GetChannelMixBuffer(s32 channel) const; + [[nodiscard]] std::span<s32> GetMixBuffer(std::size_t index); + [[nodiscard]] std::span<const s32> GetMixBuffer(std::size_t index) const; [[nodiscard]] std::size_t GetMixChannelBufferOffset(s32 channel) const; [[nodiscard]] std::size_t GetTotalMixBufferCount() const; @@ -77,21 +78,24 @@ private: void GenerateAuxCommand(s32 mix_buffer_offset, EffectBase* info, bool enabled); [[nodiscard]] ServerSplitterDestinationData* GetDestinationData(s32 splitter_id, s32 index); - s32 WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u32 max_samples, const s32* data, - u32 sample_count, u32 write_offset, u32 write_count); - s32 ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u32 max_samples, s32* out_data, - u32 sample_count, u32 read_offset, u32 read_count); + s32 WriteAuxBuffer(AuxInfoDSP& dsp_info, VAddr send_buffer, u32 max_samples, + std::span<const s32> data, u32 sample_count, u32 write_offset, + u32 write_count); + s32 ReadAuxBuffer(AuxInfoDSP& recv_info, VAddr recv_buffer, u32 max_samples, + std::span<s32> out_data, u32 sample_count, u32 read_offset, u32 read_count); void InitializeI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state, std::vector<u8>& work_buffer); void UpdateI3dl2Reverb(I3dl2ReverbParams& info, I3dl2ReverbState& state, bool should_clear); // DSP Code - s32 DecodePcm16(ServerVoiceInfo& voice_info, VoiceState& dsp_state, s32 sample_count, - s32 channel, std::size_t mix_offset); - s32 DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_state, s32 sample_count, - s32 channel, std::size_t mix_offset); - void DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, s32* output, VoiceState& dsp_state, - s32 channel, s32 target_sample_rate, s32 sample_count, s32 node_id); + template <typename T> + s32 DecodePcm(ServerVoiceInfo& voice_info, VoiceState& dsp_state, s32 sample_start_offset, + s32 sample_end_offset, s32 sample_count, s32 channel, std::size_t mix_offset); + s32 DecodeAdpcm(ServerVoiceInfo& voice_info, VoiceState& dsp_state, s32 sample_start_offset, + s32 sample_end_offset, s32 sample_count, s32 channel, std::size_t mix_offset); + void DecodeFromWaveBuffers(ServerVoiceInfo& voice_info, std::span<s32> output, + VoiceState& dsp_state, s32 channel, s32 target_sample_rate, + s32 sample_count, s32 node_id); AudioCommon::AudioRendererParameter& worker_params; VoiceContext& voice_context; diff --git a/src/audio_core/common.h b/src/audio_core/common.h index fe546c55d0..1ab5375888 100644 --- a/src/audio_core/common.h +++ b/src/audio_core/common.h @@ -15,7 +15,7 @@ constexpr ResultCode ERR_INVALID_PARAMETERS{ErrorModule::Audio, 41}; constexpr ResultCode ERR_SPLITTER_SORT_FAILED{ErrorModule::Audio, 43}; } // namespace Audren -constexpr u32_le CURRENT_PROCESS_REVISION = Common::MakeMagic('R', 'E', 'V', '8'); +constexpr u32_le CURRENT_PROCESS_REVISION = Common::MakeMagic('R', 'E', 'V', '9'); constexpr std::size_t MAX_MIX_BUFFERS = 24; constexpr std::size_t MAX_BIQUAD_FILTERS = 2; constexpr std::size_t MAX_CHANNEL_COUNT = 6; diff --git a/src/audio_core/info_updater.cpp b/src/audio_core/info_updater.cpp index 4a5b1b4abe..9b4ca1851a 100644 --- a/src/audio_core/info_updater.cpp +++ b/src/audio_core/info_updater.cpp @@ -189,9 +189,6 @@ bool InfoUpdater::UpdateVoices(VoiceContext& voice_context, if (voice_in_params.is_new) { // Default our values for our voice voice_info.Initialize(); - if (channel_count == 0 || channel_count > AudioCommon::MAX_CHANNEL_COUNT) { - continue; - } // Zero out our voice states for (std::size_t channel = 0; channel < channel_count; channel++) { diff --git a/src/audio_core/sink_context.cpp b/src/audio_core/sink_context.cpp index a69543696a..cc55b290c8 100644 --- a/src/audio_core/sink_context.cpp +++ b/src/audio_core/sink_context.cpp @@ -15,10 +15,17 @@ std::size_t SinkContext::GetCount() const { void SinkContext::UpdateMainSink(const SinkInfo::InParams& in) { ASSERT(in.type == SinkTypes::Device); - has_downmix_coefs = in.device.down_matrix_enabled; - if (has_downmix_coefs) { + if (in.device.down_matrix_enabled) { downmix_coefficients = in.device.down_matrix_coef; + } else { + downmix_coefficients = { + 1.0f, // front + 0.707f, // center + 0.0f, // lfe + 0.707f, // back + }; } + in_use = in.in_use; use_count = in.device.input_count; buffers = in.device.input; @@ -34,10 +41,6 @@ std::vector<u8> SinkContext::OutputBuffers() const { return buffer_ret; } -bool SinkContext::HasDownMixingCoefficients() const { - return has_downmix_coefs; -} - const DownmixCoefficients& SinkContext::GetDownmixCoefficients() const { return downmix_coefficients; } diff --git a/src/audio_core/sink_context.h b/src/audio_core/sink_context.h index 9e2b69785b..254961fe29 100644 --- a/src/audio_core/sink_context.h +++ b/src/audio_core/sink_context.h @@ -84,7 +84,6 @@ public: [[nodiscard]] bool InUse() const; [[nodiscard]] std::vector<u8> OutputBuffers() const; - [[nodiscard]] bool HasDownMixingCoefficients() const; [[nodiscard]] const DownmixCoefficients& GetDownmixCoefficients() const; private: @@ -92,7 +91,6 @@ private: s32 use_count{}; std::array<u8, AudioCommon::MAX_CHANNEL_COUNT> buffers{}; std::size_t sink_count{}; - bool has_downmix_coefs{false}; DownmixCoefficients downmix_coefficients{}; }; } // namespace AudioCore diff --git a/src/audio_core/voice_context.cpp b/src/audio_core/voice_context.cpp index 867b8fc6ba..d8c954b603 100644 --- a/src/audio_core/voice_context.cpp +++ b/src/audio_core/voice_context.cpp @@ -66,7 +66,7 @@ void ServerVoiceInfo::Initialize() { in_params.last_volume = 0.0f; in_params.biquad_filter.fill({}); in_params.wave_buffer_count = 0; - in_params.wave_bufffer_head = 0; + in_params.wave_buffer_head = 0; in_params.mix_id = AudioCommon::NO_MIX; in_params.splitter_info_id = AudioCommon::NO_SPLITTER; in_params.additional_params_address = 0; @@ -75,7 +75,7 @@ void ServerVoiceInfo::Initialize() { out_params.played_sample_count = 0; out_params.wave_buffer_consumed = 0; in_params.voice_drop_flag = false; - in_params.buffer_mapped = false; + in_params.buffer_mapped = true; in_params.wave_buffer_flush_request_count = 0; in_params.was_biquad_filter_enabled.fill(false); @@ -126,7 +126,7 @@ void ServerVoiceInfo::UpdateParameters(const VoiceInfo::InParams& voice_in, in_params.volume = voice_in.volume; in_params.biquad_filter = voice_in.biquad_filter; in_params.wave_buffer_count = voice_in.wave_buffer_count; - in_params.wave_bufffer_head = voice_in.wave_buffer_head; + in_params.wave_buffer_head = voice_in.wave_buffer_head; if (behavior_info.IsFlushVoiceWaveBuffersSupported()) { const auto in_request_count = in_params.wave_buffer_flush_request_count; const auto voice_request_count = voice_in.wave_buffer_flush_request_count; @@ -185,14 +185,16 @@ void ServerVoiceInfo::UpdateWaveBuffers( wave_buffer.buffer_size = 0; wave_buffer.context_address = 0; wave_buffer.context_size = 0; + wave_buffer.loop_start_sample = 0; + wave_buffer.loop_end_sample = 0; wave_buffer.sent_to_dsp = true; } // Mark all our wave buffers as invalid for (std::size_t channel = 0; channel < static_cast<std::size_t>(in_params.channel_count); channel++) { - for (auto& is_valid : voice_states[channel]->is_wave_buffer_valid) { - is_valid = false; + for (std::size_t i = 0; i < AudioCommon::MAX_WAVE_BUFFERS; ++i) { + voice_states[channel]->is_wave_buffer_valid[i] = false; } } } @@ -211,7 +213,7 @@ void ServerVoiceInfo::UpdateWaveBuffer(ServerWaveBuffer& out_wavebuffer, const WaveBuffer& in_wave_buffer, SampleFormat sample_format, bool is_buffer_valid, [[maybe_unused]] BehaviorInfo& behavior_info) { - if (!is_buffer_valid && out_wavebuffer.sent_to_dsp) { + if (!is_buffer_valid && out_wavebuffer.sent_to_dsp && out_wavebuffer.buffer_address != 0) { out_wavebuffer.buffer_address = 0; out_wavebuffer.buffer_size = 0; } @@ -219,11 +221,40 @@ void ServerVoiceInfo::UpdateWaveBuffer(ServerWaveBuffer& out_wavebuffer, if (!in_wave_buffer.sent_to_server || !in_params.buffer_mapped) { // Validate sample offset sizings if (sample_format == SampleFormat::Pcm16) { - const auto buffer_size = in_wave_buffer.buffer_size; - if (in_wave_buffer.start_sample_offset < 0 || in_wave_buffer.end_sample_offset < 0 || - (buffer_size < (sizeof(s16) * in_wave_buffer.start_sample_offset)) || - (buffer_size < (sizeof(s16) * in_wave_buffer.end_sample_offset))) { + const s64 buffer_size = static_cast<s64>(in_wave_buffer.buffer_size); + const s64 start = sizeof(s16) * in_wave_buffer.start_sample_offset; + const s64 end = sizeof(s16) * in_wave_buffer.end_sample_offset; + if (0 > start || start > buffer_size || 0 > end || end > buffer_size) { // TODO(ogniK): Write error info + LOG_ERROR(Audio, + "PCM16 wavebuffer has an invalid size. Buffer has size 0x{:08X}, but " + "offsets were " + "{:08X} - 0x{:08X}", + buffer_size, sizeof(s16) * in_wave_buffer.start_sample_offset, + sizeof(s16) * in_wave_buffer.end_sample_offset); + return; + } + } else if (sample_format == SampleFormat::Adpcm) { + const s64 buffer_size = static_cast<s64>(in_wave_buffer.buffer_size); + const s64 start_frames = in_wave_buffer.start_sample_offset / 14; + const s64 start_extra = in_wave_buffer.start_sample_offset % 14 == 0 + ? 0 + : (in_wave_buffer.start_sample_offset % 14) / 2 + 1 + + (in_wave_buffer.start_sample_offset % 2); + const s64 start = start_frames * 8 + start_extra; + const s64 end_frames = in_wave_buffer.end_sample_offset / 14; + const s64 end_extra = in_wave_buffer.end_sample_offset % 14 == 0 + ? 0 + : (in_wave_buffer.end_sample_offset % 14) / 2 + 1 + + (in_wave_buffer.end_sample_offset % 2); + const s64 end = end_frames * 8 + end_extra; + if (in_wave_buffer.start_sample_offset < 0 || start > buffer_size || + in_wave_buffer.end_sample_offset < 0 || end > buffer_size) { + LOG_ERROR(Audio, + "ADPMC wavebuffer has an invalid size. Buffer has size 0x{:08X}, but " + "offsets were " + "{:08X} - 0x{:08X}", + in_wave_buffer.buffer_size, start, end); return; } } @@ -239,29 +270,34 @@ void ServerVoiceInfo::UpdateWaveBuffer(ServerWaveBuffer& out_wavebuffer, out_wavebuffer.buffer_size = in_wave_buffer.buffer_size; out_wavebuffer.context_address = in_wave_buffer.context_address; out_wavebuffer.context_size = in_wave_buffer.context_size; + out_wavebuffer.loop_start_sample = in_wave_buffer.loop_start_sample; + out_wavebuffer.loop_end_sample = in_wave_buffer.loop_end_sample; in_params.buffer_mapped = in_wave_buffer.buffer_address != 0 && in_wave_buffer.buffer_size != 0; // TODO(ogniK): Pool mapper attachment // TODO(ogniK): IsAdpcmLoopContextBugFixed + if (sample_format == SampleFormat::Adpcm && in_wave_buffer.context_address != 0 && + in_wave_buffer.context_size != 0 && behavior_info.IsAdpcmLoopContextBugFixed()) { + } else { + out_wavebuffer.context_address = 0; + out_wavebuffer.context_size = 0; + } } } void ServerVoiceInfo::WriteOutStatus( VoiceInfo::OutParams& voice_out, VoiceInfo::InParams& voice_in, std::array<VoiceState*, AudioCommon::MAX_CHANNEL_COUNT>& voice_states) { - if (voice_in.is_new) { + if (voice_in.is_new || in_params.is_new) { in_params.is_new = true; voice_out.wave_buffer_consumed = 0; voice_out.played_sample_count = 0; voice_out.voice_dropped = false; - } else if (!in_params.is_new) { - voice_out.wave_buffer_consumed = voice_states[0]->wave_buffer_consumed; - voice_out.played_sample_count = voice_states[0]->played_sample_count; - voice_out.voice_dropped = in_params.voice_drop_flag; } else { - voice_out.wave_buffer_consumed = 0; - voice_out.played_sample_count = 0; - voice_out.voice_dropped = false; + const auto& state = voice_states[0]; + voice_out.wave_buffer_consumed = state->wave_buffer_consumed; + voice_out.played_sample_count = state->played_sample_count; + voice_out.voice_dropped = state->voice_dropped; } } @@ -283,7 +319,8 @@ ServerVoiceInfo::OutParams& ServerVoiceInfo::GetOutParams() { bool ServerVoiceInfo::ShouldSkip() const { // TODO(ogniK): Handle unmapped wave buffers or parameters - return !in_params.in_use || (in_params.wave_buffer_count == 0) || in_params.voice_drop_flag; + return !in_params.in_use || in_params.wave_buffer_count == 0 || !in_params.buffer_mapped || + in_params.voice_drop_flag; } bool ServerVoiceInfo::UpdateForCommandGeneration(VoiceContext& voice_context) { @@ -381,7 +418,7 @@ bool ServerVoiceInfo::UpdateParametersForCommandGeneration( void ServerVoiceInfo::FlushWaveBuffers( u8 flush_count, std::array<VoiceState*, AudioCommon::MAX_CHANNEL_COUNT>& dsp_voice_states, s32 channel_count) { - auto wave_head = in_params.wave_bufffer_head; + auto wave_head = in_params.wave_buffer_head; for (u8 i = 0; i < flush_count; i++) { in_params.wave_buffer[wave_head].sent_to_dsp = true; @@ -401,6 +438,17 @@ bool ServerVoiceInfo::HasValidWaveBuffer(const VoiceState* state) const { return std::find(valid_wb.begin(), valid_wb.end(), true) != valid_wb.end(); } +void ServerVoiceInfo::SetWaveBufferCompleted(VoiceState& dsp_state, + const ServerWaveBuffer& wave_buffer) { + dsp_state.is_wave_buffer_valid[dsp_state.wave_buffer_index] = false; + dsp_state.wave_buffer_consumed++; + dsp_state.wave_buffer_index = (dsp_state.wave_buffer_index + 1) % AudioCommon::MAX_WAVE_BUFFERS; + dsp_state.loop_count = 0; + if (wave_buffer.end_of_stream) { + dsp_state.played_sample_count = 0; + } +} + VoiceContext::VoiceContext(std::size_t voice_count_) : voice_count{voice_count_} { for (std::size_t i = 0; i < voice_count; i++) { voice_channel_resources.emplace_back(static_cast<s32>(i)); diff --git a/src/audio_core/voice_context.h b/src/audio_core/voice_context.h index 70359cadb1..e1050897bb 100644 --- a/src/audio_core/voice_context.h +++ b/src/audio_core/voice_context.h @@ -60,10 +60,12 @@ struct WaveBuffer { u8 is_looping{}; u8 end_of_stream{}; u8 sent_to_server{}; - INSERT_PADDING_BYTES(5); + INSERT_PADDING_BYTES(1); + s32 loop_count{}; u64 context_address{}; u64 context_size{}; - INSERT_PADDING_BYTES(8); + u32 loop_start_sample{}; + u32 loop_end_sample{}; }; static_assert(sizeof(WaveBuffer) == 0x38, "WaveBuffer is an invalid size"); @@ -76,6 +78,9 @@ struct ServerWaveBuffer { bool end_of_stream{}; VAddr context_address{}; std::size_t context_size{}; + s32 loop_count{}; + u32 loop_start_sample{}; + u32 loop_end_sample{}; bool sent_to_dsp{true}; }; @@ -108,6 +113,7 @@ struct VoiceState { u32 external_context_size; bool is_external_context_used; bool voice_dropped; + s32 loop_count; }; class VoiceChannelResource { @@ -206,7 +212,7 @@ public: float last_volume{}; std::array<BiquadFilterParameter, AudioCommon::MAX_BIQUAD_FILTERS> biquad_filter{}; s32 wave_buffer_count{}; - s16 wave_bufffer_head{}; + s16 wave_buffer_head{}; INSERT_PADDING_BYTES(2); BehaviorFlags behavior_flags{}; VAddr additional_params_address{}; @@ -252,6 +258,7 @@ public: void FlushWaveBuffers(u8 flush_count, std::array<VoiceState*, AudioCommon::MAX_CHANNEL_COUNT>& dsp_voice_states, s32 channel_count); + void SetWaveBufferCompleted(VoiceState& dsp_state, const ServerWaveBuffer& wave_buffer); private: std::vector<s16> stored_samples; diff --git a/src/common/fs/file.cpp b/src/common/fs/file.cpp index 077f349950..274f57659b 100644 --- a/src/common/fs/file.cpp +++ b/src/common/fs/file.cpp @@ -306,9 +306,9 @@ bool IOFile::Flush() const { errno = 0; #ifdef _WIN32 - const auto flush_result = std::fflush(file) == 0 && _commit(fileno(file)) == 0; + const auto flush_result = std::fflush(file) == 0; #else - const auto flush_result = std::fflush(file) == 0 && fsync(fileno(file)) == 0; + const auto flush_result = std::fflush(file) == 0; #endif if (!flush_result) { @@ -320,6 +320,28 @@ bool IOFile::Flush() const { return flush_result; } +bool IOFile::Commit() const { + if (!IsOpen()) { + return false; + } + + errno = 0; + +#ifdef _WIN32 + const auto commit_result = std::fflush(file) == 0 && _commit(fileno(file)) == 0; +#else + const auto commit_result = std::fflush(file) == 0 && fsync(fileno(file)) == 0; +#endif + + if (!commit_result) { + const auto ec = std::error_code{errno, std::generic_category()}; + LOG_ERROR(Common_Filesystem, "Failed to commit the file at path={}, ec_message={}", + PathToUTF8String(file_path), ec.message()); + } + + return commit_result; +} + bool IOFile::SetSize(u64 size) const { if (!IsOpen()) { return false; @@ -347,6 +369,9 @@ u64 IOFile::GetSize() const { return 0; } + // Flush any unwritten buffered data into the file prior to retrieving the file size. + std::fflush(file); + std::error_code ec; const auto file_size = fs::file_size(file_path, ec); diff --git a/src/common/fs/file.h b/src/common/fs/file.h index 588fe619df..2c4ab43320 100644 --- a/src/common/fs/file.h +++ b/src/common/fs/file.h @@ -396,13 +396,22 @@ public: [[nodiscard]] size_t WriteString(std::span<const char> string) const; /** - * Attempts to flush any unwritten buffered data into the file and flush the file into the disk. + * Attempts to flush any unwritten buffered data into the file. * * @returns True if the flush was successful, false otherwise. */ bool Flush() const; /** + * Attempts to commit the file into the disk. + * Note that this is an expensive operation as this forces the operating system to write + * the contents of the file associated with the file descriptor into the disk. + * + * @returns True if the commit was successful, false otherwise. + */ + bool Commit() const; + + /** * Resizes the file to a given size. * If the file is resized to a smaller size, the remainder of the file is discarded. * If the file is resized to a larger size, the new area appears as if zero-filled. diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp index b6fa4affb8..61dddab3f5 100644 --- a/src/common/logging/backend.cpp +++ b/src/common/logging/backend.cpp @@ -171,19 +171,22 @@ FileBackend::FileBackend(const std::filesystem::path& filename) { FileBackend::~FileBackend() = default; void FileBackend::Write(const Entry& entry) { + if (!file->IsOpen()) { + return; + } + using namespace Common::Literals; - // prevent logs from going over the maximum size (in case its spamming and the user doesn't - // know) + // Prevent logs from exceeding a set maximum size in the event that log entries are spammed. constexpr std::size_t MAX_BYTES_WRITTEN = 100_MiB; constexpr std::size_t MAX_BYTES_WRITTEN_EXTENDED = 1_GiB; - if (!file->IsOpen()) { - return; - } + const bool write_limit_exceeded = + bytes_written > MAX_BYTES_WRITTEN_EXTENDED || + (bytes_written > MAX_BYTES_WRITTEN && !Settings::values.extended_logging); - if (Settings::values.extended_logging && bytes_written > MAX_BYTES_WRITTEN_EXTENDED) { - return; - } else if (!Settings::values.extended_logging && bytes_written > MAX_BYTES_WRITTEN) { + // Close the file after the write limit is exceeded. + if (write_limit_exceeded) { + file->Close(); return; } diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 83b5b76769..b2b0dbe051 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -654,24 +654,19 @@ endif() if (MSVC) target_compile_options(core PRIVATE - /we4018 # 'expression' : signed/unsigned mismatch - /we4244 # 'argument' : conversion from 'type1' to 'type2', possible loss of data (floating-point) - /we4245 # 'conversion' : conversion from 'type1' to 'type2', signed/unsigned mismatch + /we4242 # 'identifier': conversion from 'type1' to 'type2', possible loss of data + /we4244 # 'conversion': conversion from 'type1' to 'type2', possible loss of data + /we4245 # 'conversion': conversion from 'type1' to 'type2', signed/unsigned mismatch /we4254 # 'operator': conversion from 'type1:field_bits' to 'type2:field_bits', possible loss of data - /we4267 # 'var' : conversion from 'size_t' to 'type', possible loss of data - /we4305 # 'context' : truncation from 'type1' to 'type2' /we4456 # Declaration of 'identifier' hides previous local declaration /we4457 # Declaration of 'identifier' hides function parameter /we4458 # Declaration of 'identifier' hides class member /we4459 # Declaration of 'identifier' hides global declaration - /we4715 # 'function' : not all control paths return a value ) else() target_compile_options(core PRIVATE -Werror=conversion -Werror=ignored-qualifiers - -Werror=implicit-fallthrough - -Werror=sign-compare -Werror=shadow $<$<CXX_COMPILER_ID:GNU>:-Werror=class-memaccess> diff --git a/src/core/file_sys/patch_manager.cpp b/src/core/file_sys/patch_manager.cpp index 53b8b7ca0d..7c0950bb01 100644 --- a/src/core/file_sys/patch_manager.cpp +++ b/src/core/file_sys/patch_manager.cpp @@ -345,8 +345,10 @@ std::vector<Core::Memory::CheatEntry> PatchManager::CreateCheatList( static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType type, const Service::FileSystem::FileSystemController& fs_controller) { const auto load_dir = fs_controller.GetModificationLoadRoot(title_id); + const auto sdmc_load_dir = fs_controller.GetSDMCModificationLoadRoot(title_id); if ((type != ContentRecordType::Program && type != ContentRecordType::Data) || - load_dir == nullptr || load_dir->GetSize() <= 0) { + ((load_dir == nullptr || load_dir->GetSize() <= 0) && + (sdmc_load_dir == nullptr || sdmc_load_dir->GetSize() <= 0))) { return; } @@ -356,7 +358,10 @@ static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType t } const auto& disabled = Settings::values.disabled_addons[title_id]; - auto patch_dirs = load_dir->GetSubdirectories(); + std::vector<VirtualDir> patch_dirs = load_dir->GetSubdirectories(); + if (std::find(disabled.cbegin(), disabled.cend(), "SDMC") == disabled.cend()) { + patch_dirs.push_back(sdmc_load_dir); + } std::sort(patch_dirs.begin(), patch_dirs.end(), [](const VirtualDir& l, const VirtualDir& r) { return l->GetName() < r->GetName(); }); @@ -402,7 +407,7 @@ static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType t } VirtualFile PatchManager::PatchRomFS(VirtualFile romfs, u64 ivfc_offset, ContentRecordType type, - VirtualFile update_raw) const { + VirtualFile update_raw, bool apply_layeredfs) const { const auto log_string = fmt::format("Patching RomFS for title_id={:016X}, type={:02X}", title_id, static_cast<u8>(type)); @@ -442,7 +447,9 @@ VirtualFile PatchManager::PatchRomFS(VirtualFile romfs, u64 ivfc_offset, Content } // LayeredFS - ApplyLayeredFS(romfs, title_id, type, fs_controller); + if (apply_layeredfs) { + ApplyLayeredFS(romfs, title_id, type, fs_controller); + } return romfs; } @@ -524,6 +531,15 @@ PatchManager::PatchVersionNames PatchManager::GetPatchVersionNames(VirtualFile u } } + // SDMC mod directory (RomFS LayeredFS) + const auto sdmc_mod_dir = fs_controller.GetSDMCModificationLoadRoot(title_id); + if (sdmc_mod_dir != nullptr && sdmc_mod_dir->GetSize() > 0 && + IsDirValidAndNonEmpty(FindSubdirectoryCaseless(sdmc_mod_dir, "romfs"))) { + const auto mod_disabled = + std::find(disabled.begin(), disabled.end(), "SDMC") != disabled.end(); + out.insert_or_assign(mod_disabled ? "[D] SDMC" : "SDMC", "LayeredFS"); + } + // DLC const auto dlc_entries = content_provider.ListEntriesFilter(TitleType::AOC, ContentRecordType::Data); diff --git a/src/core/file_sys/patch_manager.h b/src/core/file_sys/patch_manager.h index fb18530355..3be871f35d 100644 --- a/src/core/file_sys/patch_manager.h +++ b/src/core/file_sys/patch_manager.h @@ -64,7 +64,8 @@ public: // - LayeredFS [[nodiscard]] VirtualFile PatchRomFS(VirtualFile base, u64 ivfc_offset, ContentRecordType type = ContentRecordType::Program, - VirtualFile update_raw = nullptr) const; + VirtualFile update_raw = nullptr, + bool apply_layeredfs = true) const; // Returns a vector of pairs between patch names and patch versions. // i.e. Update 3.2.2 will return {"Update", "3.2.2"} diff --git a/src/core/file_sys/sdmc_factory.cpp b/src/core/file_sys/sdmc_factory.cpp index cb56d8f2db..e5c72cd4d8 100644 --- a/src/core/file_sys/sdmc_factory.cpp +++ b/src/core/file_sys/sdmc_factory.cpp @@ -12,23 +12,32 @@ namespace FileSys { constexpr u64 SDMC_TOTAL_SIZE = 0x10000000000; // 1 TiB -SDMCFactory::SDMCFactory(VirtualDir dir_) - : dir(std::move(dir_)), contents(std::make_unique<RegisteredCache>( - GetOrCreateDirectoryRelative(dir, "/Nintendo/Contents/registered"), - [](const VirtualFile& file, const NcaID& id) { - return NAX{file, id}.GetDecrypted(); - })), +SDMCFactory::SDMCFactory(VirtualDir sd_dir_, VirtualDir sd_mod_dir_) + : sd_dir(std::move(sd_dir_)), sd_mod_dir(std::move(sd_mod_dir_)), + contents(std::make_unique<RegisteredCache>( + GetOrCreateDirectoryRelative(sd_dir, "/Nintendo/Contents/registered"), + [](const VirtualFile& file, const NcaID& id) { + return NAX{file, id}.GetDecrypted(); + })), placeholder(std::make_unique<PlaceholderCache>( - GetOrCreateDirectoryRelative(dir, "/Nintendo/Contents/placehld"))) {} + GetOrCreateDirectoryRelative(sd_dir, "/Nintendo/Contents/placehld"))) {} SDMCFactory::~SDMCFactory() = default; ResultVal<VirtualDir> SDMCFactory::Open() const { - return MakeResult<VirtualDir>(dir); + return MakeResult<VirtualDir>(sd_dir); +} + +VirtualDir SDMCFactory::GetSDMCModificationLoadRoot(u64 title_id) const { + // LayeredFS doesn't work on updates and title id-less homebrew + if (title_id == 0 || (title_id & 0xFFF) == 0x800) { + return nullptr; + } + return GetOrCreateDirectoryRelative(sd_mod_dir, fmt::format("/{:016X}", title_id)); } VirtualDir SDMCFactory::GetSDMCContentDirectory() const { - return GetOrCreateDirectoryRelative(dir, "/Nintendo/Contents"); + return GetOrCreateDirectoryRelative(sd_dir, "/Nintendo/Contents"); } RegisteredCache* SDMCFactory::GetSDMCContents() const { @@ -40,11 +49,11 @@ PlaceholderCache* SDMCFactory::GetSDMCPlaceholder() const { } VirtualDir SDMCFactory::GetImageDirectory() const { - return GetOrCreateDirectoryRelative(dir, "/Nintendo/Album"); + return GetOrCreateDirectoryRelative(sd_dir, "/Nintendo/Album"); } u64 SDMCFactory::GetSDMCFreeSpace() const { - return GetSDMCTotalSpace() - dir->GetSize(); + return GetSDMCTotalSpace() - sd_dir->GetSize(); } u64 SDMCFactory::GetSDMCTotalSpace() const { diff --git a/src/core/file_sys/sdmc_factory.h b/src/core/file_sys/sdmc_factory.h index 2bb92ba936..3a3d11f3a2 100644 --- a/src/core/file_sys/sdmc_factory.h +++ b/src/core/file_sys/sdmc_factory.h @@ -16,11 +16,12 @@ class PlaceholderCache; /// File system interface to the SDCard archive class SDMCFactory { public: - explicit SDMCFactory(VirtualDir dir); + explicit SDMCFactory(VirtualDir sd_dir_, VirtualDir sd_mod_dir_); ~SDMCFactory(); ResultVal<VirtualDir> Open() const; + VirtualDir GetSDMCModificationLoadRoot(u64 title_id) const; VirtualDir GetSDMCContentDirectory() const; RegisteredCache* GetSDMCContents() const; @@ -32,7 +33,8 @@ public: u64 GetSDMCTotalSpace() const; private: - VirtualDir dir; + VirtualDir sd_dir; + VirtualDir sd_mod_dir; std::unique_ptr<RegisteredCache> contents; std::unique_ptr<PlaceholderCache> placeholder; diff --git a/src/core/hle/ipc_helpers.h b/src/core/hle/ipc_helpers.h index 61bda37865..ceff2532dd 100644 --- a/src/core/hle/ipc_helpers.h +++ b/src/core/hle/ipc_helpers.h @@ -345,8 +345,12 @@ public: explicit RequestParser(u32* command_buffer) : RequestHelperBase(command_buffer) {} explicit RequestParser(Kernel::HLERequestContext& ctx) : RequestHelperBase(ctx) { - ASSERT_MSG(ctx.GetDataPayloadOffset(), "context is incomplete"); - Skip(ctx.GetDataPayloadOffset(), false); + // TIPC does not have data payload offset + if (!ctx.IsTipc()) { + ASSERT_MSG(ctx.GetDataPayloadOffset(), "context is incomplete"); + Skip(ctx.GetDataPayloadOffset(), false); + } + // Skip the u64 command id, it's already stored in the context static constexpr u32 CommandIdSize = 2; Skip(CommandIdSize, false); diff --git a/src/core/hle/service/aoc/aoc_u.cpp b/src/core/hle/service/aoc/aoc_u.cpp index fec704c65e..dd945e058a 100644 --- a/src/core/hle/service/aoc/aoc_u.cpp +++ b/src/core/hle/service/aoc/aoc_u.cpp @@ -117,7 +117,7 @@ AOC_U::AOC_U(Core::System& system_) {7, &AOC_U::PrepareAddOnContent, "PrepareAddOnContent"}, {8, &AOC_U::GetAddOnContentListChangedEvent, "GetAddOnContentListChangedEvent"}, {9, nullptr, "GetAddOnContentLostErrorCode"}, - {10, nullptr, "GetAddOnContentListChangedEventWithProcessId"}, + {10, &AOC_U::GetAddOnContentListChangedEventWithProcessId, "GetAddOnContentListChangedEventWithProcessId"}, {100, &AOC_U::CreateEcPurchasedEventManager, "CreateEcPurchasedEventManager"}, {101, &AOC_U::CreatePermanentEcPurchasedEventManager, "CreatePermanentEcPurchasedEventManager"}, {110, nullptr, "CreateContentsServiceManager"}, @@ -257,6 +257,14 @@ void AOC_U::GetAddOnContentListChangedEvent(Kernel::HLERequestContext& ctx) { rb.PushCopyObjects(aoc_change_event.GetReadableEvent()); } +void AOC_U::GetAddOnContentListChangedEventWithProcessId(Kernel::HLERequestContext& ctx) { + LOG_WARNING(Service_AOC, "(STUBBED) called"); + + IPC::ResponseBuilder rb{ctx, 2, 1}; + rb.Push(ResultSuccess); + rb.PushCopyObjects(aoc_change_event.GetReadableEvent()); +} + void AOC_U::CreateEcPurchasedEventManager(Kernel::HLERequestContext& ctx) { LOG_WARNING(Service_AOC, "(STUBBED) called"); diff --git a/src/core/hle/service/aoc/aoc_u.h b/src/core/hle/service/aoc/aoc_u.h index 65095baa20..bb6ffb8eb9 100644 --- a/src/core/hle/service/aoc/aoc_u.h +++ b/src/core/hle/service/aoc/aoc_u.h @@ -28,6 +28,7 @@ private: void GetAddOnContentBaseId(Kernel::HLERequestContext& ctx); void PrepareAddOnContent(Kernel::HLERequestContext& ctx); void GetAddOnContentListChangedEvent(Kernel::HLERequestContext& ctx); + void GetAddOnContentListChangedEventWithProcessId(Kernel::HLERequestContext& ctx); void CreateEcPurchasedEventManager(Kernel::HLERequestContext& ctx); void CreatePermanentEcPurchasedEventManager(Kernel::HLERequestContext& ctx); diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp index 800feba6e8..b769fe9599 100644 --- a/src/core/hle/service/audio/audren_u.cpp +++ b/src/core/hle/service/audio/audren_u.cpp @@ -96,7 +96,7 @@ private: void RequestUpdateImpl(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_Audio, "(STUBBED) called"); - std::vector<u8> output_params(ctx.GetWriteBufferSize()); + std::vector<u8> output_params(ctx.GetWriteBufferSize(), 0); auto result = renderer->UpdateAudioRenderer(ctx.ReadBuffer(), output_params); if (result.IsSuccess()) { @@ -110,17 +110,19 @@ private: void Start(Kernel::HLERequestContext& ctx) { LOG_WARNING(Service_Audio, "(STUBBED) called"); - IPC::ResponseBuilder rb{ctx, 2}; + const auto result = renderer->Start(); - rb.Push(ResultSuccess); + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(result); } void Stop(Kernel::HLERequestContext& ctx) { LOG_WARNING(Service_Audio, "(STUBBED) called"); - IPC::ResponseBuilder rb{ctx, 2}; + const auto result = renderer->Stop(); - rb.Push(ResultSuccess); + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(result); } void QuerySystemEvent(Kernel::HLERequestContext& ctx) { @@ -288,7 +290,7 @@ private: IPC::ResponseBuilder rb{ctx, 3}; rb.Push(ResultSuccess); - rb.Push<u32>(1); + rb.Push<u32>(2); } // Should be similar to QueryAudioDeviceOutputEvent diff --git a/src/core/hle/service/audio/hwopus.cpp b/src/core/hle/service/audio/hwopus.cpp index 10e6f7a64a..33a6dbbb6e 100644 --- a/src/core/hle/service/audio/hwopus.cpp +++ b/src/core/hle/service/audio/hwopus.cpp @@ -253,7 +253,11 @@ void HwOpus::GetWorkBufferSize(Kernel::HLERequestContext& ctx) { rb.Push<u32>(worker_buffer_sz); } -void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) { +void HwOpus::GetWorkBufferSizeEx(Kernel::HLERequestContext& ctx) { + GetWorkBufferSize(ctx); +} + +void HwOpus::OpenHardwareOpusDecoder(Kernel::HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; const auto sample_rate = rp.Pop<u32>(); const auto channel_count = rp.Pop<u32>(); @@ -291,14 +295,47 @@ void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) { system, OpusDecoderState{std::move(decoder), sample_rate, channel_count}); } +void HwOpus::OpenHardwareOpusDecoderEx(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const auto sample_rate = rp.Pop<u32>(); + const auto channel_count = rp.Pop<u32>(); + + LOG_CRITICAL(Audio, "called sample_rate={}, channel_count={}", sample_rate, channel_count); + + ASSERT_MSG(sample_rate == 48000 || sample_rate == 24000 || sample_rate == 16000 || + sample_rate == 12000 || sample_rate == 8000, + "Invalid sample rate"); + ASSERT_MSG(channel_count == 1 || channel_count == 2, "Invalid channel count"); + + const int num_stereo_streams = channel_count == 2 ? 1 : 0; + const auto mapping_table = CreateMappingTable(channel_count); + + int error = 0; + OpusDecoderPtr decoder{ + opus_multistream_decoder_create(sample_rate, static_cast<int>(channel_count), 1, + num_stereo_streams, mapping_table.data(), &error)}; + if (error != OPUS_OK || decoder == nullptr) { + LOG_ERROR(Audio, "Failed to create Opus decoder (error={}).", error); + IPC::ResponseBuilder rb{ctx, 2}; + // TODO(ogniK): Use correct error code + rb.Push(ResultUnknown); + return; + } + + IPC::ResponseBuilder rb{ctx, 2, 0, 1}; + rb.Push(ResultSuccess); + rb.PushIpcInterface<IHardwareOpusDecoderManager>( + system, OpusDecoderState{std::move(decoder), sample_rate, channel_count}); +} + HwOpus::HwOpus(Core::System& system_) : ServiceFramework{system_, "hwopus"} { static const FunctionInfo functions[] = { - {0, &HwOpus::OpenOpusDecoder, "OpenOpusDecoder"}, + {0, &HwOpus::OpenHardwareOpusDecoder, "OpenHardwareOpusDecoder"}, {1, &HwOpus::GetWorkBufferSize, "GetWorkBufferSize"}, {2, nullptr, "OpenOpusDecoderForMultiStream"}, {3, nullptr, "GetWorkBufferSizeForMultiStream"}, - {4, nullptr, "OpenHardwareOpusDecoderEx"}, - {5, nullptr, "GetWorkBufferSizeEx"}, + {4, &HwOpus::OpenHardwareOpusDecoderEx, "OpenHardwareOpusDecoderEx"}, + {5, &HwOpus::GetWorkBufferSizeEx, "GetWorkBufferSizeEx"}, {6, nullptr, "OpenHardwareOpusDecoderForMultiStreamEx"}, {7, nullptr, "GetWorkBufferSizeForMultiStreamEx"}, }; diff --git a/src/core/hle/service/audio/hwopus.h b/src/core/hle/service/audio/hwopus.h index 4f921f18ed..b74824ff32 100644 --- a/src/core/hle/service/audio/hwopus.h +++ b/src/core/hle/service/audio/hwopus.h @@ -18,8 +18,10 @@ public: ~HwOpus() override; private: - void OpenOpusDecoder(Kernel::HLERequestContext& ctx); + void OpenHardwareOpusDecoder(Kernel::HLERequestContext& ctx); + void OpenHardwareOpusDecoderEx(Kernel::HLERequestContext& ctx); void GetWorkBufferSize(Kernel::HLERequestContext& ctx); + void GetWorkBufferSizeEx(Kernel::HLERequestContext& ctx); }; } // namespace Service::Audio diff --git a/src/core/hle/service/filesystem/filesystem.cpp b/src/core/hle/service/filesystem/filesystem.cpp index 3c16fe6c71..4a9b13e451 100644 --- a/src/core/hle/service/filesystem/filesystem.cpp +++ b/src/core/hle/service/filesystem/filesystem.cpp @@ -703,6 +703,16 @@ FileSys::VirtualDir FileSystemController::GetModificationLoadRoot(u64 title_id) return bis_factory->GetModificationLoadRoot(title_id); } +FileSys::VirtualDir FileSystemController::GetSDMCModificationLoadRoot(u64 title_id) const { + LOG_TRACE(Service_FS, "Opening SDMC mod load root for tid={:016X}", title_id); + + if (sdmc_factory == nullptr) { + return nullptr; + } + + return sdmc_factory->GetSDMCModificationLoadRoot(title_id); +} + FileSys::VirtualDir FileSystemController::GetModificationDumpRoot(u64 title_id) const { LOG_TRACE(Service_FS, "Opening mod dump root for tid={:016X}", title_id); @@ -733,20 +743,23 @@ void FileSystemController::CreateFactories(FileSys::VfsFilesystem& vfs, bool ove } using YuzuPath = Common::FS::YuzuPath; + const auto sdmc_dir_path = Common::FS::GetYuzuPath(YuzuPath::SDMCDir); + const auto sdmc_load_dir_path = sdmc_dir_path / "atmosphere/contents"; const auto rw_mode = FileSys::Mode::ReadWrite; auto nand_directory = vfs.OpenDirectory(Common::FS::GetYuzuPathString(YuzuPath::NANDDir), rw_mode); - auto sd_directory = - vfs.OpenDirectory(Common::FS::GetYuzuPathString(YuzuPath::SDMCDir), rw_mode); + auto sd_directory = vfs.OpenDirectory(Common::FS::PathToUTF8String(sdmc_dir_path), rw_mode); auto load_directory = vfs.OpenDirectory(Common::FS::GetYuzuPathString(YuzuPath::LoadDir), FileSys::Mode::Read); + auto sd_load_directory = + vfs.OpenDirectory(Common::FS::PathToUTF8String(sdmc_load_dir_path), FileSys::Mode::Read); auto dump_directory = vfs.OpenDirectory(Common::FS::GetYuzuPathString(YuzuPath::DumpDir), rw_mode); if (bis_factory == nullptr) { - bis_factory = - std::make_unique<FileSys::BISFactory>(nand_directory, load_directory, dump_directory); + bis_factory = std::make_unique<FileSys::BISFactory>( + nand_directory, std::move(load_directory), std::move(dump_directory)); system.RegisterContentProvider(FileSys::ContentProviderUnionSlot::SysNAND, bis_factory->GetSystemNANDContents()); system.RegisterContentProvider(FileSys::ContentProviderUnionSlot::UserNAND, @@ -759,7 +772,8 @@ void FileSystemController::CreateFactories(FileSys::VfsFilesystem& vfs, bool ove } if (sdmc_factory == nullptr) { - sdmc_factory = std::make_unique<FileSys::SDMCFactory>(std::move(sd_directory)); + sdmc_factory = std::make_unique<FileSys::SDMCFactory>(std::move(sd_directory), + std::move(sd_load_directory)); system.RegisterContentProvider(FileSys::ContentProviderUnionSlot::SDMC, sdmc_factory->GetSDMCContents()); } diff --git a/src/core/hle/service/filesystem/filesystem.h b/src/core/hle/service/filesystem/filesystem.h index b6b1b92207..d387af3cbe 100644 --- a/src/core/hle/service/filesystem/filesystem.h +++ b/src/core/hle/service/filesystem/filesystem.h @@ -115,6 +115,7 @@ public: FileSys::VirtualDir GetContentDirectory(ContentStorageId id) const; FileSys::VirtualDir GetImageDirectory(ImageDirectoryId id) const; + FileSys::VirtualDir GetSDMCModificationLoadRoot(u64 title_id) const; FileSys::VirtualDir GetModificationLoadRoot(u64 title_id) const; FileSys::VirtualDir GetModificationDumpRoot(u64 title_id) const; diff --git a/src/core/hle/service/mii/manager.cpp b/src/core/hle/service/mii/manager.cpp index 114aff31c7..869d2763f0 100644 --- a/src/core/hle/service/mii/manager.cpp +++ b/src/core/hle/service/mii/manager.cpp @@ -20,6 +20,7 @@ namespace { constexpr ResultCode ERROR_CANNOT_FIND_ENTRY{ErrorModule::Mii, 4}; +constexpr std::size_t BaseMiiCount{2}; constexpr std::size_t DefaultMiiCount{RawData::DefaultMii.size()}; constexpr MiiStoreData::Name DefaultMiiName{u'y', u'u', u'z', u'u'}; @@ -415,7 +416,7 @@ u32 MiiManager::GetCount(SourceFlag source_flag) const { count += 0; } if ((source_flag & SourceFlag::Default) != SourceFlag::None) { - count += DefaultMiiCount; + count += (DefaultMiiCount - BaseMiiCount); } return static_cast<u32>(count); } @@ -445,7 +446,7 @@ ResultVal<std::vector<MiiInfoElement>> MiiManager::GetDefault(SourceFlag source_ return MakeResult(std::move(result)); } - for (std::size_t index = 0; index < DefaultMiiCount; index++) { + for (std::size_t index = BaseMiiCount; index < DefaultMiiCount; index++) { result.emplace_back(BuildDefault(index), Source::Default); } diff --git a/src/input_common/CMakeLists.txt b/src/input_common/CMakeLists.txt index 7c5763f9c1..c4283a9526 100644 --- a/src/input_common/CMakeLists.txt +++ b/src/input_common/CMakeLists.txt @@ -34,28 +34,17 @@ if (MSVC) /W4 /WX - # 'expression' : signed/unsigned mismatch - /we4018 - # 'argument' : conversion from 'type1' to 'type2', possible loss of data (floating-point) - /we4244 - # 'conversion' : conversion from 'type1' to 'type2', signed/unsigned mismatch - /we4245 - # 'operator': conversion from 'type1:field_bits' to 'type2:field_bits', possible loss of data - /we4254 - # 'var' : conversion from 'size_t' to 'type', possible loss of data - /we4267 - # 'context' : truncation from 'type1' to 'type2' - /we4305 + /we4242 # 'identifier': conversion from 'type1' to 'type2', possible loss of data + /we4244 # 'conversion': conversion from 'type1' to 'type2', possible loss of data + /we4245 # 'conversion': conversion from 'type1' to 'type2', signed/unsigned mismatch + /we4254 # 'operator': conversion from 'type1:field_bits' to 'type2:field_bits', possible loss of data ) else() target_compile_options(input_common PRIVATE -Werror -Werror=conversion -Werror=ignored-qualifiers - -Werror=implicit-fallthrough - -Werror=reorder -Werror=shadow - -Werror=sign-compare $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter> $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable> -Werror=unused-variable diff --git a/src/input_common/analog_from_button.cpp b/src/input_common/analog_from_button.cpp index 100138d114..2fafd077f7 100755 --- a/src/input_common/analog_from_button.cpp +++ b/src/input_common/analog_from_button.cpp @@ -27,6 +27,7 @@ public: down->SetCallback(callbacks); left->SetCallback(callbacks); right->SetCallback(callbacks); + modifier->SetCallback(callbacks); } bool IsAngleGreater(float old_angle, float new_angle) const { diff --git a/src/input_common/gcadapter/gc_adapter.cpp b/src/input_common/gcadapter/gc_adapter.cpp index 320f51ee65..a2f1bb67c8 100644 --- a/src/input_common/gcadapter/gc_adapter.cpp +++ b/src/input_common/gcadapter/gc_adapter.cpp @@ -5,14 +5,7 @@ #include <chrono> #include <thread> -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4200) // nonstandard extension used : zero-sized array in struct/union -#endif #include <libusb.h> -#ifdef _MSC_VER -#pragma warning(pop) -#endif #include "common/logging/log.h" #include "common/param_package.h" diff --git a/src/input_common/udp/protocol.h b/src/input_common/udp/protocol.h index a3d276697c..1bdc9209e7 100644 --- a/src/input_common/udp/protocol.h +++ b/src/input_common/udp/protocol.h @@ -8,14 +8,7 @@ #include <optional> #include <type_traits> -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4701) -#endif #include <boost/crc.hpp> -#ifdef _MSC_VER -#pragma warning(pop) -#endif #include "common/bit_field.h" #include "common/swap.h" diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index f9454bbaac..e4de55f4d8 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -292,12 +292,12 @@ endif() if (MSVC) target_compile_options(video_core PRIVATE - /we4267 # 'var' : conversion from 'size_t' to 'type', possible loss of data + /we4242 # 'identifier': conversion from 'type1' to 'type2', possible loss of data + /we4244 # 'conversion': conversion from 'type1' to 'type2', possible loss of data /we4456 # Declaration of 'identifier' hides previous local declaration /we4457 # Declaration of 'identifier' hides function parameter /we4458 # Declaration of 'identifier' hides class member /we4459 # Declaration of 'identifier' hides global declaration - /we4715 # 'function' : not all control paths return a value ) else() target_compile_options(video_core PRIVATE diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 9d726a6fb4..cad7f902dc 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -99,7 +99,7 @@ class BufferCache { }; public: - static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = 4_KiB; + static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB); explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_, @@ -109,8 +109,6 @@ public: void TickFrame(); - void RunGarbageCollector(); - void WriteMemory(VAddr cpu_addr, u64 size); void CachedWriteMemory(VAddr cpu_addr, u64 size); @@ -197,6 +195,8 @@ private: ((cpu_addr + size) & ~Core::Memory::PAGE_MASK); } + void RunGarbageCollector(); + void BindHostIndexBuffer(); void BindHostVertexBuffers(); @@ -416,8 +416,9 @@ void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) { template <class P> void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) { - ForEachBufferInRange(cpu_addr, size, - [&](BufferId, Buffer& buffer) { DownloadBufferMemory(buffer); }); + ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { + DownloadBufferMemory(buffer, cpu_addr, size); + }); } template <class P> diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp index a3fda1094a..8b86ad0508 100644 --- a/src/video_core/cdma_pusher.cpp +++ b/src/video_core/cdma_pusher.cpp @@ -103,8 +103,7 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) { case ThiMethod::SetMethod1: LOG_DEBUG(Service_NVDRV, "NVDEC method 0x{:X}", static_cast<u32>(nvdec_thi_state.method_0)); - nvdec_processor->ProcessMethod(static_cast<Nvdec::Method>(nvdec_thi_state.method_0), - data); + nvdec_processor->ProcessMethod(nvdec_thi_state.method_0, data); break; default: break; diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index d02dc6260d..1b4bbc8ac4 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -23,8 +23,8 @@ void AVFrameDeleter(AVFrame* ptr) { av_free(ptr); } -Codec::Codec(GPU& gpu_) - : gpu(gpu_), h264_decoder(std::make_unique<Decoder::H264>(gpu)), +Codec::Codec(GPU& gpu_, const NvdecCommon::NvdecRegisters& regs) + : gpu(gpu_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(gpu)), vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {} Codec::~Codec() { @@ -43,46 +43,48 @@ Codec::~Codec() { avcodec_close(av_codec_ctx); } +void Codec::Initialize() { + AVCodecID codec{AV_CODEC_ID_NONE}; + switch (current_codec) { + case NvdecCommon::VideoCodec::H264: + codec = AV_CODEC_ID_H264; + break; + case NvdecCommon::VideoCodec::Vp9: + codec = AV_CODEC_ID_VP9; + break; + default: + return; + } + av_codec = avcodec_find_decoder(codec); + av_codec_ctx = avcodec_alloc_context3(av_codec); + av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); + + // TODO(ameerj): libavcodec gpu hw acceleration + + const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr); + if (av_error < 0) { + LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed."); + avcodec_close(av_codec_ctx); + return; + } + initialized = true; + return; +} + void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) { if (current_codec != codec) { - LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", static_cast<u32>(codec)); current_codec = codec; + LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", GetCurrentCodecName()); } } -void Codec::StateWrite(u32 offset, u64 arguments) { - u8* const state_offset = reinterpret_cast<u8*>(&state) + offset * sizeof(u64); - std::memcpy(state_offset, &arguments, sizeof(u64)); -} - void Codec::Decode() { - bool is_first_frame = false; + const bool is_first_frame = !initialized; if (!initialized) { - if (current_codec == NvdecCommon::VideoCodec::H264) { - av_codec = avcodec_find_decoder(AV_CODEC_ID_H264); - } else if (current_codec == NvdecCommon::VideoCodec::Vp9) { - av_codec = avcodec_find_decoder(AV_CODEC_ID_VP9); - } else { - LOG_ERROR(Service_NVDRV, "Unknown video codec {}", current_codec); - return; - } - - av_codec_ctx = avcodec_alloc_context3(av_codec); - av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); - - // TODO(ameerj): libavcodec gpu hw acceleration - - const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr); - if (av_error < 0) { - LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed."); - avcodec_close(av_codec_ctx); - return; - } - initialized = true; - is_first_frame = true; + Initialize(); } - bool vp9_hidden_frame = false; + bool vp9_hidden_frame = false; AVPacket packet{}; av_init_packet(&packet); std::vector<u8> frame_data; @@ -95,7 +97,7 @@ void Codec::Decode() { } packet.data = frame_data.data(); - packet.size = static_cast<int>(frame_data.size()); + packet.size = static_cast<s32>(frame_data.size()); avcodec_send_packet(av_codec_ctx, &packet); @@ -127,4 +129,21 @@ NvdecCommon::VideoCodec Codec::GetCurrentCodec() const { return current_codec; } +std::string_view Codec::GetCurrentCodecName() const { + switch (current_codec) { + case NvdecCommon::VideoCodec::None: + return "None"; + case NvdecCommon::VideoCodec::H264: + return "H264"; + case NvdecCommon::VideoCodec::Vp8: + return "VP8"; + case NvdecCommon::VideoCodec::H265: + return "H265"; + case NvdecCommon::VideoCodec::Vp9: + return "VP9"; + default: + return "Unknown"; + } +}; + } // namespace Tegra diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h index 8a2a6c360d..96c823c765 100644 --- a/src/video_core/command_classes/codecs/codec.h +++ b/src/video_core/command_classes/codecs/codec.h @@ -34,15 +34,15 @@ class VP9; class Codec { public: - explicit Codec(GPU& gpu); + explicit Codec(GPU& gpu, const NvdecCommon::NvdecRegisters& regs); ~Codec(); + /// Initialize the codec, returning success or failure + void Initialize(); + /// Sets NVDEC video stream codec void SetTargetCodec(NvdecCommon::VideoCodec codec); - /// Populate NvdecRegisters state with argument value at the provided offset - void StateWrite(u32 offset, u64 arguments); - /// Call decoders to construct headers, decode AVFrame with ffmpeg void Decode(); @@ -51,6 +51,8 @@ public: /// Returns the value of current_codec [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const; + /// Return name of the current codec + [[nodiscard]] std::string_view GetCurrentCodecName() const; private: bool initialized{}; @@ -60,10 +62,10 @@ private: AVCodecContext* av_codec_ctx{nullptr}; GPU& gpu; + const NvdecCommon::NvdecRegisters& state; std::unique_ptr<Decoder::H264> h264_decoder; std::unique_ptr<Decoder::VP9> vp9_decoder; - NvdecCommon::NvdecRegisters state{}; std::queue<AVFramePtr> av_frames{}; }; diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/command_classes/codecs/h264.cpp index fea6aed989..5fb6d45eeb 100644 --- a/src/video_core/command_classes/codecs/h264.cpp +++ b/src/video_core/command_classes/codecs/h264.cpp @@ -45,134 +45,129 @@ H264::~H264() = default; const std::vector<u8>& H264::ComposeFrameHeader(const NvdecCommon::NvdecRegisters& state, bool is_first_frame) { - H264DecoderContext context{}; + H264DecoderContext context; gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext)); - const s32 frame_number = static_cast<s32>((context.h264_parameter_set.flags >> 46) & 0x1ffff); + const s64 frame_number = context.h264_parameter_set.frame_number.Value(); if (!is_first_frame && frame_number != 0) { - frame.resize(context.frame_data_size); - + frame.resize(context.stream_len); gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, frame.data(), frame.size()); - } else { - /// Encode header - H264BitWriter writer{}; - writer.WriteU(1, 24); - writer.WriteU(0, 1); - writer.WriteU(3, 2); - writer.WriteU(7, 5); - writer.WriteU(100, 8); - writer.WriteU(0, 8); - writer.WriteU(31, 8); - writer.WriteUe(0); - const auto chroma_format_idc = - static_cast<u32>((context.h264_parameter_set.flags >> 12) & 3); - writer.WriteUe(chroma_format_idc); - if (chroma_format_idc == 3) { - writer.WriteBit(false); - } - - writer.WriteUe(0); - writer.WriteUe(0); - writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag - writer.WriteBit(false); // Scaling matrix present flag - - const auto order_cnt_type = static_cast<u32>((context.h264_parameter_set.flags >> 14) & 3); - writer.WriteUe(static_cast<u32>((context.h264_parameter_set.flags >> 8) & 0xf)); - writer.WriteUe(order_cnt_type); - if (order_cnt_type == 0) { - writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt); - } else if (order_cnt_type == 1) { - writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0); - - writer.WriteSe(0); - writer.WriteSe(0); - writer.WriteUe(0); - } - - const s32 pic_height = context.h264_parameter_set.pic_height_in_map_units / - (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2); + return frame; + } - writer.WriteUe(16); + // Encode header + H264BitWriter writer{}; + writer.WriteU(1, 24); + writer.WriteU(0, 1); + writer.WriteU(3, 2); + writer.WriteU(7, 5); + writer.WriteU(100, 8); + writer.WriteU(0, 8); + writer.WriteU(31, 8); + writer.WriteUe(0); + const u32 chroma_format_idc = + static_cast<u32>(context.h264_parameter_set.chroma_format_idc.Value()); + writer.WriteUe(chroma_format_idc); + if (chroma_format_idc == 3) { writer.WriteBit(false); - writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1); - writer.WriteUe(pic_height - 1); - writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0); - - if (!context.h264_parameter_set.frame_mbs_only_flag) { - writer.WriteBit(((context.h264_parameter_set.flags >> 0) & 1) != 0); - } + } - writer.WriteBit(((context.h264_parameter_set.flags >> 1) & 1) != 0); - writer.WriteBit(false); // Frame cropping flag - writer.WriteBit(false); // VUI parameter present flag + writer.WriteUe(0); + writer.WriteUe(0); + writer.WriteBit(false); // QpprimeYZeroTransformBypassFlag + writer.WriteBit(false); // Scaling matrix present flag - writer.End(); + writer.WriteUe(static_cast<u32>(context.h264_parameter_set.log2_max_frame_num_minus4.Value())); - // H264 PPS - writer.WriteU(1, 24); - writer.WriteU(0, 1); - writer.WriteU(3, 2); - writer.WriteU(8, 5); + const auto order_cnt_type = + static_cast<u32>(context.h264_parameter_set.pic_order_cnt_type.Value()); + writer.WriteUe(order_cnt_type); + if (order_cnt_type == 0) { + writer.WriteUe(context.h264_parameter_set.log2_max_pic_order_cnt_lsb_minus4); + } else if (order_cnt_type == 1) { + writer.WriteBit(context.h264_parameter_set.delta_pic_order_always_zero_flag != 0); + writer.WriteSe(0); + writer.WriteSe(0); writer.WriteUe(0); - writer.WriteUe(0); + } - writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0); - writer.WriteBit(false); - writer.WriteUe(0); - writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active); - writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active); - writer.WriteBit(((context.h264_parameter_set.flags >> 2) & 1) != 0); - writer.WriteU(static_cast<s32>((context.h264_parameter_set.flags >> 32) & 0x3), 2); - s32 pic_init_qp = static_cast<s32>((context.h264_parameter_set.flags >> 16) & 0x3f); - pic_init_qp = (pic_init_qp << 26) >> 26; - writer.WriteSe(pic_init_qp); - writer.WriteSe(0); - s32 chroma_qp_index_offset = - static_cast<s32>((context.h264_parameter_set.flags >> 22) & 0x1f); - chroma_qp_index_offset = (chroma_qp_index_offset << 27) >> 27; + const s32 pic_height = context.h264_parameter_set.frame_height_in_map_units / + (context.h264_parameter_set.frame_mbs_only_flag ? 1 : 2); + + writer.WriteUe(16); + writer.WriteBit(false); + writer.WriteUe(context.h264_parameter_set.pic_width_in_mbs - 1); + writer.WriteUe(pic_height - 1); + writer.WriteBit(context.h264_parameter_set.frame_mbs_only_flag != 0); - writer.WriteSe(chroma_qp_index_offset); - writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_flag != 0); - writer.WriteBit(((context.h264_parameter_set.flags >> 3) & 1) != 0); - writer.WriteBit(context.h264_parameter_set.redundant_pic_count_flag != 0); - writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0); + if (!context.h264_parameter_set.frame_mbs_only_flag) { + writer.WriteBit(context.h264_parameter_set.flags.mbaff_frame.Value() != 0); + } + writer.WriteBit(context.h264_parameter_set.flags.direct_8x8_inference.Value() != 0); + writer.WriteBit(false); // Frame cropping flag + writer.WriteBit(false); // VUI parameter present flag + + writer.End(); + + // H264 PPS + writer.WriteU(1, 24); + writer.WriteU(0, 1); + writer.WriteU(3, 2); + writer.WriteU(8, 5); + + writer.WriteUe(0); + writer.WriteUe(0); + + writer.WriteBit(context.h264_parameter_set.entropy_coding_mode_flag != 0); + writer.WriteBit(false); + writer.WriteUe(0); + writer.WriteUe(context.h264_parameter_set.num_refidx_l0_default_active); + writer.WriteUe(context.h264_parameter_set.num_refidx_l1_default_active); + writer.WriteBit(context.h264_parameter_set.flags.weighted_pred.Value() != 0); + writer.WriteU(static_cast<s32>(context.h264_parameter_set.weighted_bipred_idc.Value()), 2); + s32 pic_init_qp = static_cast<s32>(context.h264_parameter_set.pic_init_qp_minus26.Value()); + writer.WriteSe(pic_init_qp); + writer.WriteSe(0); + s32 chroma_qp_index_offset = + static_cast<s32>(context.h264_parameter_set.chroma_qp_index_offset.Value()); + + writer.WriteSe(chroma_qp_index_offset); + writer.WriteBit(context.h264_parameter_set.deblocking_filter_control_present_flag != 0); + writer.WriteBit(context.h264_parameter_set.flags.constrained_intra_pred.Value() != 0); + writer.WriteBit(context.h264_parameter_set.redundant_pic_cnt_present_flag != 0); + writer.WriteBit(context.h264_parameter_set.transform_8x8_mode_flag != 0); + + writer.WriteBit(true); + + for (s32 index = 0; index < 6; index++) { writer.WriteBit(true); + std::span<const u8> matrix{context.weight_scale}; + writer.WriteScalingList(matrix, index * 16, 16); + } - for (s32 index = 0; index < 6; index++) { + if (context.h264_parameter_set.transform_8x8_mode_flag) { + for (s32 index = 0; index < 2; index++) { writer.WriteBit(true); - const auto matrix_x4 = - std::vector<u8>(context.scaling_matrix_4.begin(), context.scaling_matrix_4.end()); - writer.WriteScalingList(matrix_x4, index * 16, 16); - } - - if (context.h264_parameter_set.transform_8x8_mode_flag) { - for (s32 index = 0; index < 2; index++) { - writer.WriteBit(true); - const auto matrix_x8 = std::vector<u8>(context.scaling_matrix_8.begin(), - context.scaling_matrix_8.end()); - - writer.WriteScalingList(matrix_x8, index * 64, 64); - } + std::span<const u8> matrix{context.weight_scale_8x8}; + writer.WriteScalingList(matrix, index * 64, 64); } + } - s32 chroma_qp_index_offset2 = - static_cast<s32>((context.h264_parameter_set.flags >> 27) & 0x1f); - chroma_qp_index_offset2 = (chroma_qp_index_offset2 << 27) >> 27; + s32 chroma_qp_index_offset2 = + static_cast<s32>(context.h264_parameter_set.second_chroma_qp_index_offset.Value()); - writer.WriteSe(chroma_qp_index_offset2); + writer.WriteSe(chroma_qp_index_offset2); - writer.End(); + writer.End(); - const auto& encoded_header = writer.GetByteArray(); - frame.resize(encoded_header.size() + context.frame_data_size); - std::memcpy(frame.data(), encoded_header.data(), encoded_header.size()); + const auto& encoded_header = writer.GetByteArray(); + frame.resize(encoded_header.size() + context.stream_len); + std::memcpy(frame.data(), encoded_header.data(), encoded_header.size()); - gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, - frame.data() + encoded_header.size(), - context.frame_data_size); - } + gpu.MemoryManager().ReadBlock(state.frame_bitstream_offset, + frame.data() + encoded_header.size(), context.stream_len); return frame; } @@ -202,7 +197,7 @@ void H264BitWriter::WriteBit(bool state) { WriteBits(state ? 1 : 0, 1); } -void H264BitWriter::WriteScalingList(const std::vector<u8>& list, s32 start, s32 count) { +void H264BitWriter::WriteScalingList(std::span<const u8> list, s32 start, s32 count) { std::vector<u8> scan(count); if (count == 16) { std::memcpy(scan.data(), zig_zag_scan.data(), scan.size()); diff --git a/src/video_core/command_classes/codecs/h264.h b/src/video_core/command_classes/codecs/h264.h index 0f3a1d9f3f..bfe84a4720 100644 --- a/src/video_core/command_classes/codecs/h264.h +++ b/src/video_core/command_classes/codecs/h264.h @@ -20,7 +20,9 @@ #pragma once +#include <span> #include <vector> +#include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" #include "video_core/command_classes/nvdec_common.h" @@ -48,7 +50,7 @@ public: /// Based on section 7.3.2.1.1.1 and Table 7-4 in the H.264 specification /// Writes the scaling matrices of the sream - void WriteScalingList(const std::vector<u8>& list, s32 start, s32 count); + void WriteScalingList(std::span<const u8> list, s32 start, s32 count); /// Return the bitstream as a vector. [[nodiscard]] std::vector<u8>& GetByteArray(); @@ -78,40 +80,110 @@ public: const NvdecCommon::NvdecRegisters& state, bool is_first_frame = false); private: + std::vector<u8> frame; + GPU& gpu; + struct H264ParameterSet { - u32 log2_max_pic_order_cnt{}; - u32 delta_pic_order_always_zero_flag{}; - u32 frame_mbs_only_flag{}; - u32 pic_width_in_mbs{}; - u32 pic_height_in_map_units{}; - INSERT_PADDING_WORDS(1); - u32 entropy_coding_mode_flag{}; - u32 bottom_field_pic_order_flag{}; - u32 num_refidx_l0_default_active{}; - u32 num_refidx_l1_default_active{}; - u32 deblocking_filter_control_flag{}; - u32 redundant_pic_count_flag{}; - u32 transform_8x8_mode_flag{}; - INSERT_PADDING_WORDS(9); - u64 flags{}; - u32 frame_number{}; - u32 frame_number2{}; + s32 log2_max_pic_order_cnt_lsb_minus4; ///< 0x00 + s32 delta_pic_order_always_zero_flag; ///< 0x04 + s32 frame_mbs_only_flag; ///< 0x08 + u32 pic_width_in_mbs; ///< 0x0C + u32 frame_height_in_map_units; ///< 0x10 + union { ///< 0x14 + BitField<0, 2, u32> tile_format; + BitField<2, 3, u32> gob_height; + }; + u32 entropy_coding_mode_flag; ///< 0x18 + s32 pic_order_present_flag; ///< 0x1C + s32 num_refidx_l0_default_active; ///< 0x20 + s32 num_refidx_l1_default_active; ///< 0x24 + s32 deblocking_filter_control_present_flag; ///< 0x28 + s32 redundant_pic_cnt_present_flag; ///< 0x2C + u32 transform_8x8_mode_flag; ///< 0x30 + u32 pitch_luma; ///< 0x34 + u32 pitch_chroma; ///< 0x38 + u32 luma_top_offset; ///< 0x3C + u32 luma_bot_offset; ///< 0x40 + u32 luma_frame_offset; ///< 0x44 + u32 chroma_top_offset; ///< 0x48 + u32 chroma_bot_offset; ///< 0x4C + u32 chroma_frame_offset; ///< 0x50 + u32 hist_buffer_size; ///< 0x54 + union { ///< 0x58 + union { + BitField<0, 1, u64> mbaff_frame; + BitField<1, 1, u64> direct_8x8_inference; + BitField<2, 1, u64> weighted_pred; + BitField<3, 1, u64> constrained_intra_pred; + BitField<4, 1, u64> ref_pic; + BitField<5, 1, u64> field_pic; + BitField<6, 1, u64> bottom_field; + BitField<7, 1, u64> second_field; + } flags; + BitField<8, 4, u64> log2_max_frame_num_minus4; + BitField<12, 2, u64> chroma_format_idc; + BitField<14, 2, u64> pic_order_cnt_type; + BitField<16, 6, s64> pic_init_qp_minus26; + BitField<22, 5, s64> chroma_qp_index_offset; + BitField<27, 5, s64> second_chroma_qp_index_offset; + BitField<32, 2, u64> weighted_bipred_idc; + BitField<34, 7, u64> curr_pic_idx; + BitField<41, 5, u64> curr_col_idx; + BitField<46, 16, u64> frame_number; + BitField<62, 1, u64> frame_surfaces; + BitField<63, 1, u64> output_memory_layout; + }; }; - static_assert(sizeof(H264ParameterSet) == 0x68, "H264ParameterSet is an invalid size"); + static_assert(sizeof(H264ParameterSet) == 0x60, "H264ParameterSet is an invalid size"); struct H264DecoderContext { - INSERT_PADDING_BYTES(0x48); - u32 frame_data_size{}; - INSERT_PADDING_BYTES(0xc); - H264ParameterSet h264_parameter_set{}; - INSERT_PADDING_BYTES(0x100); - std::array<u8, 0x60> scaling_matrix_4; - std::array<u8, 0x80> scaling_matrix_8; + INSERT_PADDING_WORDS_NOINIT(18); ///< 0x0000 + u32 stream_len; ///< 0x0048 + INSERT_PADDING_WORDS_NOINIT(3); ///< 0x004C + H264ParameterSet h264_parameter_set; ///< 0x0058 + INSERT_PADDING_WORDS_NOINIT(66); ///< 0x00B8 + std::array<u8, 0x60> weight_scale; ///< 0x01C0 + std::array<u8, 0x80> weight_scale_8x8; ///< 0x0220 }; - static_assert(sizeof(H264DecoderContext) == 0x2a0, "H264DecoderContext is an invalid size"); - - std::vector<u8> frame; - GPU& gpu; + static_assert(sizeof(H264DecoderContext) == 0x2A0, "H264DecoderContext is an invalid size"); + +#define ASSERT_POSITION(field_name, position) \ + static_assert(offsetof(H264ParameterSet, field_name) == position, \ + "Field " #field_name " has invalid position") + + ASSERT_POSITION(log2_max_pic_order_cnt_lsb_minus4, 0x00); + ASSERT_POSITION(delta_pic_order_always_zero_flag, 0x04); + ASSERT_POSITION(frame_mbs_only_flag, 0x08); + ASSERT_POSITION(pic_width_in_mbs, 0x0C); + ASSERT_POSITION(frame_height_in_map_units, 0x10); + ASSERT_POSITION(tile_format, 0x14); + ASSERT_POSITION(entropy_coding_mode_flag, 0x18); + ASSERT_POSITION(pic_order_present_flag, 0x1C); + ASSERT_POSITION(num_refidx_l0_default_active, 0x20); + ASSERT_POSITION(num_refidx_l1_default_active, 0x24); + ASSERT_POSITION(deblocking_filter_control_present_flag, 0x28); + ASSERT_POSITION(redundant_pic_cnt_present_flag, 0x2C); + ASSERT_POSITION(transform_8x8_mode_flag, 0x30); + ASSERT_POSITION(pitch_luma, 0x34); + ASSERT_POSITION(pitch_chroma, 0x38); + ASSERT_POSITION(luma_top_offset, 0x3C); + ASSERT_POSITION(luma_bot_offset, 0x40); + ASSERT_POSITION(luma_frame_offset, 0x44); + ASSERT_POSITION(chroma_top_offset, 0x48); + ASSERT_POSITION(chroma_bot_offset, 0x4C); + ASSERT_POSITION(chroma_frame_offset, 0x50); + ASSERT_POSITION(hist_buffer_size, 0x54); + ASSERT_POSITION(flags, 0x58); +#undef ASSERT_POSITION + +#define ASSERT_POSITION(field_name, position) \ + static_assert(offsetof(H264DecoderContext, field_name) == position, \ + "Field " #field_name " has invalid position") + + ASSERT_POSITION(stream_len, 0x48); + ASSERT_POSITION(h264_parameter_set, 0x58); + ASSERT_POSITION(weight_scale, 0x1C0); +#undef ASSERT_POSITION }; } // namespace Decoder diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp index 29bb314183..902bc2a982 100644 --- a/src/video_core/command_classes/codecs/vp9.cpp +++ b/src/video_core/command_classes/codecs/vp9.cpp @@ -354,7 +354,7 @@ void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_ } Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) { - PictureInfo picture_info{}; + PictureInfo picture_info; gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo)); Vp9PictureInfo vp9_info = picture_info.Convert(); @@ -370,7 +370,7 @@ Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) } void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) { - EntropyProbs entropy{}; + EntropyProbs entropy; gpu.MemoryManager().ReadBlock(offset, &entropy, sizeof(EntropyProbs)); entropy.Convert(dst); } diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h index 139501a1c6..2da14f3ca5 100644 --- a/src/video_core/command_classes/codecs/vp9_types.h +++ b/src/video_core/command_classes/codecs/vp9_types.h @@ -15,10 +15,10 @@ class GPU; namespace Decoder { struct Vp9FrameDimensions { - s16 width{}; - s16 height{}; - s16 luma_pitch{}; - s16 chroma_pitch{}; + s16 width; + s16 height; + s16 luma_pitch; + s16 chroma_pitch; }; static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size"); @@ -49,87 +49,87 @@ enum class TxMode { }; struct Segmentation { - u8 enabled{}; - u8 update_map{}; - u8 temporal_update{}; - u8 abs_delta{}; - std::array<u32, 8> feature_mask{}; - std::array<std::array<s16, 4>, 8> feature_data{}; + u8 enabled; + u8 update_map; + u8 temporal_update; + u8 abs_delta; + std::array<u32, 8> feature_mask; + std::array<std::array<s16, 4>, 8> feature_data; }; static_assert(sizeof(Segmentation) == 0x64, "Segmentation is an invalid size"); struct LoopFilter { - u8 mode_ref_delta_enabled{}; - std::array<s8, 4> ref_deltas{}; - std::array<s8, 2> mode_deltas{}; + u8 mode_ref_delta_enabled; + std::array<s8, 4> ref_deltas; + std::array<s8, 2> mode_deltas; }; static_assert(sizeof(LoopFilter) == 0x7, "LoopFilter is an invalid size"); struct Vp9EntropyProbs { - std::array<u8, 36> y_mode_prob{}; - std::array<u8, 64> partition_prob{}; - std::array<u8, 1728> coef_probs{}; - std::array<u8, 8> switchable_interp_prob{}; - std::array<u8, 28> inter_mode_prob{}; - std::array<u8, 4> intra_inter_prob{}; - std::array<u8, 5> comp_inter_prob{}; - std::array<u8, 10> single_ref_prob{}; - std::array<u8, 5> comp_ref_prob{}; - std::array<u8, 6> tx_32x32_prob{}; - std::array<u8, 4> tx_16x16_prob{}; - std::array<u8, 2> tx_8x8_prob{}; - std::array<u8, 3> skip_probs{}; - std::array<u8, 3> joints{}; - std::array<u8, 2> sign{}; - std::array<u8, 20> classes{}; - std::array<u8, 2> class_0{}; - std::array<u8, 20> prob_bits{}; - std::array<u8, 12> class_0_fr{}; - std::array<u8, 6> fr{}; - std::array<u8, 2> class_0_hp{}; - std::array<u8, 2> high_precision{}; + std::array<u8, 36> y_mode_prob; ///< 0x0000 + std::array<u8, 64> partition_prob; ///< 0x0024 + std::array<u8, 1728> coef_probs; ///< 0x0064 + std::array<u8, 8> switchable_interp_prob; ///< 0x0724 + std::array<u8, 28> inter_mode_prob; ///< 0x072C + std::array<u8, 4> intra_inter_prob; ///< 0x0748 + std::array<u8, 5> comp_inter_prob; ///< 0x074C + std::array<u8, 10> single_ref_prob; ///< 0x0751 + std::array<u8, 5> comp_ref_prob; ///< 0x075B + std::array<u8, 6> tx_32x32_prob; ///< 0x0760 + std::array<u8, 4> tx_16x16_prob; ///< 0x0766 + std::array<u8, 2> tx_8x8_prob; ///< 0x076A + std::array<u8, 3> skip_probs; ///< 0x076C + std::array<u8, 3> joints; ///< 0x076F + std::array<u8, 2> sign; ///< 0x0772 + std::array<u8, 20> classes; ///< 0x0774 + std::array<u8, 2> class_0; ///< 0x0788 + std::array<u8, 20> prob_bits; ///< 0x078A + std::array<u8, 12> class_0_fr; ///< 0x079E + std::array<u8, 6> fr; ///< 0x07AA + std::array<u8, 2> class_0_hp; ///< 0x07B0 + std::array<u8, 2> high_precision; ///< 0x07B2 }; static_assert(sizeof(Vp9EntropyProbs) == 0x7B4, "Vp9EntropyProbs is an invalid size"); struct Vp9PictureInfo { - bool is_key_frame{}; - bool intra_only{}; - bool last_frame_was_key{}; - bool frame_size_changed{}; - bool error_resilient_mode{}; - bool last_frame_shown{}; - bool show_frame{}; - std::array<s8, 4> ref_frame_sign_bias{}; - s32 base_q_index{}; - s32 y_dc_delta_q{}; - s32 uv_dc_delta_q{}; - s32 uv_ac_delta_q{}; - bool lossless{}; - s32 transform_mode{}; - bool allow_high_precision_mv{}; - s32 interp_filter{}; - s32 reference_mode{}; - s8 comp_fixed_ref{}; - std::array<s8, 2> comp_var_ref{}; - s32 log2_tile_cols{}; - s32 log2_tile_rows{}; - bool segment_enabled{}; - bool segment_map_update{}; - bool segment_map_temporal_update{}; - s32 segment_abs_delta{}; - std::array<u32, 8> segment_feature_enable{}; - std::array<std::array<s16, 4>, 8> segment_feature_data{}; - bool mode_ref_delta_enabled{}; - bool use_prev_in_find_mv_refs{}; - std::array<s8, 4> ref_deltas{}; - std::array<s8, 2> mode_deltas{}; - Vp9EntropyProbs entropy{}; - Vp9FrameDimensions frame_size{}; - u8 first_level{}; - u8 sharpness_level{}; - u32 bitstream_size{}; - std::array<u64, 4> frame_offsets{}; - std::array<bool, 4> refresh_frame{}; + bool is_key_frame; + bool intra_only; + bool last_frame_was_key; + bool frame_size_changed; + bool error_resilient_mode; + bool last_frame_shown; + bool show_frame; + std::array<s8, 4> ref_frame_sign_bias; + s32 base_q_index; + s32 y_dc_delta_q; + s32 uv_dc_delta_q; + s32 uv_ac_delta_q; + bool lossless; + s32 transform_mode; + bool allow_high_precision_mv; + s32 interp_filter; + s32 reference_mode; + s8 comp_fixed_ref; + std::array<s8, 2> comp_var_ref; + s32 log2_tile_cols; + s32 log2_tile_rows; + bool segment_enabled; + bool segment_map_update; + bool segment_map_temporal_update; + s32 segment_abs_delta; + std::array<u32, 8> segment_feature_enable; + std::array<std::array<s16, 4>, 8> segment_feature_data; + bool mode_ref_delta_enabled; + bool use_prev_in_find_mv_refs; + std::array<s8, 4> ref_deltas; + std::array<s8, 2> mode_deltas; + Vp9EntropyProbs entropy; + Vp9FrameDimensions frame_size; + u8 first_level; + u8 sharpness_level; + u32 bitstream_size; + std::array<u64, 4> frame_offsets; + std::array<bool, 4> refresh_frame; }; struct Vp9FrameContainer { @@ -138,35 +138,35 @@ struct Vp9FrameContainer { }; struct PictureInfo { - INSERT_PADDING_WORDS(12); - u32 bitstream_size{}; - INSERT_PADDING_WORDS(5); - Vp9FrameDimensions last_frame_size{}; - Vp9FrameDimensions golden_frame_size{}; - Vp9FrameDimensions alt_frame_size{}; - Vp9FrameDimensions current_frame_size{}; - u32 vp9_flags{}; - std::array<s8, 4> ref_frame_sign_bias{}; - u8 first_level{}; - u8 sharpness_level{}; - u8 base_q_index{}; - u8 y_dc_delta_q{}; - u8 uv_ac_delta_q{}; - u8 uv_dc_delta_q{}; - u8 lossless{}; - u8 tx_mode{}; - u8 allow_high_precision_mv{}; - u8 interp_filter{}; - u8 reference_mode{}; - s8 comp_fixed_ref{}; - std::array<s8, 2> comp_var_ref{}; - u8 log2_tile_cols{}; - u8 log2_tile_rows{}; - Segmentation segmentation{}; - LoopFilter loop_filter{}; - INSERT_PADDING_BYTES(5); - u32 surface_params{}; - INSERT_PADDING_WORDS(3); + INSERT_PADDING_WORDS_NOINIT(12); ///< 0x00 + u32 bitstream_size; ///< 0x30 + INSERT_PADDING_WORDS_NOINIT(5); ///< 0x34 + Vp9FrameDimensions last_frame_size; ///< 0x48 + Vp9FrameDimensions golden_frame_size; ///< 0x50 + Vp9FrameDimensions alt_frame_size; ///< 0x58 + Vp9FrameDimensions current_frame_size; ///< 0x60 + u32 vp9_flags; ///< 0x68 + std::array<s8, 4> ref_frame_sign_bias; ///< 0x6C + u8 first_level; ///< 0x70 + u8 sharpness_level; ///< 0x71 + u8 base_q_index; ///< 0x72 + u8 y_dc_delta_q; ///< 0x73 + u8 uv_ac_delta_q; ///< 0x74 + u8 uv_dc_delta_q; ///< 0x75 + u8 lossless; ///< 0x76 + u8 tx_mode; ///< 0x77 + u8 allow_high_precision_mv; ///< 0x78 + u8 interp_filter; ///< 0x79 + u8 reference_mode; ///< 0x7A + s8 comp_fixed_ref; ///< 0x7B + std::array<s8, 2> comp_var_ref; ///< 0x7C + u8 log2_tile_cols; ///< 0x7E + u8 log2_tile_rows; ///< 0x7F + Segmentation segmentation; ///< 0x80 + LoopFilter loop_filter; ///< 0xE4 + INSERT_PADDING_BYTES_NOINIT(5); ///< 0xEB + u32 surface_params; ///< 0xF0 + INSERT_PADDING_WORDS_NOINIT(3); ///< 0xF4 [[nodiscard]] Vp9PictureInfo Convert() const { return { @@ -176,6 +176,7 @@ struct PictureInfo { .frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0, .error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0, .last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0, + .show_frame = false, .ref_frame_sign_bias = ref_frame_sign_bias, .base_q_index = base_q_index, .y_dc_delta_q = y_dc_delta_q, @@ -204,45 +205,48 @@ struct PictureInfo { !(vp9_flags == (FrameFlags::LastFrameIsKeyFrame)), .ref_deltas = loop_filter.ref_deltas, .mode_deltas = loop_filter.mode_deltas, + .entropy{}, .frame_size = current_frame_size, .first_level = first_level, .sharpness_level = sharpness_level, .bitstream_size = bitstream_size, + .frame_offsets{}, + .refresh_frame{}, }; } }; static_assert(sizeof(PictureInfo) == 0x100, "PictureInfo is an invalid size"); struct EntropyProbs { - INSERT_PADDING_BYTES(1024); - std::array<u8, 28> inter_mode_prob{}; - std::array<u8, 4> intra_inter_prob{}; - INSERT_PADDING_BYTES(80); - std::array<u8, 2> tx_8x8_prob{}; - std::array<u8, 4> tx_16x16_prob{}; - std::array<u8, 6> tx_32x32_prob{}; - std::array<u8, 4> y_mode_prob_e8{}; - std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7{}; - INSERT_PADDING_BYTES(64); - std::array<u8, 64> partition_prob{}; - INSERT_PADDING_BYTES(10); - std::array<u8, 8> switchable_interp_prob{}; - std::array<u8, 5> comp_inter_prob{}; - std::array<u8, 3> skip_probs{}; - INSERT_PADDING_BYTES(1); - std::array<u8, 3> joints{}; - std::array<u8, 2> sign{}; - std::array<u8, 2> class_0{}; - std::array<u8, 6> fr{}; - std::array<u8, 2> class_0_hp{}; - std::array<u8, 2> high_precision{}; - std::array<u8, 20> classes{}; - std::array<u8, 12> class_0_fr{}; - std::array<u8, 20> pred_bits{}; - std::array<u8, 10> single_ref_prob{}; - std::array<u8, 5> comp_ref_prob{}; - INSERT_PADDING_BYTES(17); - std::array<u8, 2304> coef_probs{}; + INSERT_PADDING_BYTES_NOINIT(1024); ///< 0x0000 + std::array<u8, 28> inter_mode_prob; ///< 0x0400 + std::array<u8, 4> intra_inter_prob; ///< 0x041C + INSERT_PADDING_BYTES_NOINIT(80); ///< 0x0420 + std::array<u8, 2> tx_8x8_prob; ///< 0x0470 + std::array<u8, 4> tx_16x16_prob; ///< 0x0472 + std::array<u8, 6> tx_32x32_prob; ///< 0x0476 + std::array<u8, 4> y_mode_prob_e8; ///< 0x047C + std::array<std::array<u8, 8>, 4> y_mode_prob_e0e7; ///< 0x0480 + INSERT_PADDING_BYTES_NOINIT(64); ///< 0x04A0 + std::array<u8, 64> partition_prob; ///< 0x04E0 + INSERT_PADDING_BYTES_NOINIT(10); ///< 0x0520 + std::array<u8, 8> switchable_interp_prob; ///< 0x052A + std::array<u8, 5> comp_inter_prob; ///< 0x0532 + std::array<u8, 3> skip_probs; ///< 0x0537 + INSERT_PADDING_BYTES_NOINIT(1); ///< 0x053A + std::array<u8, 3> joints; ///< 0x053B + std::array<u8, 2> sign; ///< 0x053E + std::array<u8, 2> class_0; ///< 0x0540 + std::array<u8, 6> fr; ///< 0x0542 + std::array<u8, 2> class_0_hp; ///< 0x0548 + std::array<u8, 2> high_precision; ///< 0x054A + std::array<u8, 20> classes; ///< 0x054C + std::array<u8, 12> class_0_fr; ///< 0x0560 + std::array<u8, 20> pred_bits; ///< 0x056C + std::array<u8, 10> single_ref_prob; ///< 0x0580 + std::array<u8, 5> comp_ref_prob; ///< 0x058A + INSERT_PADDING_BYTES_NOINIT(17); ///< 0x058F + std::array<u8, 2304> coef_probs; ///< 0x05A0 void Convert(Vp9EntropyProbs& fc) { fc.inter_mode_prob = inter_mode_prob; @@ -293,10 +297,45 @@ struct RefPoolElement { }; struct FrameContexts { - s64 from{}; - bool adapted{}; - Vp9EntropyProbs probs{}; + s64 from; + bool adapted; + Vp9EntropyProbs probs; }; +#define ASSERT_POSITION(field_name, position) \ + static_assert(offsetof(Vp9EntropyProbs, field_name) == position, \ + "Field " #field_name " has invalid position") + +ASSERT_POSITION(partition_prob, 0x0024); +ASSERT_POSITION(switchable_interp_prob, 0x0724); +ASSERT_POSITION(sign, 0x0772); +ASSERT_POSITION(class_0_fr, 0x079E); +ASSERT_POSITION(high_precision, 0x07B2); +#undef ASSERT_POSITION + +#define ASSERT_POSITION(field_name, position) \ + static_assert(offsetof(PictureInfo, field_name) == position, \ + "Field " #field_name " has invalid position") + +ASSERT_POSITION(bitstream_size, 0x30); +ASSERT_POSITION(last_frame_size, 0x48); +ASSERT_POSITION(first_level, 0x70); +ASSERT_POSITION(segmentation, 0x80); +ASSERT_POSITION(loop_filter, 0xE4); +ASSERT_POSITION(surface_params, 0xF0); +#undef ASSERT_POSITION + +#define ASSERT_POSITION(field_name, position) \ + static_assert(offsetof(EntropyProbs, field_name) == position, \ + "Field " #field_name " has invalid position") + +ASSERT_POSITION(inter_mode_prob, 0x400); +ASSERT_POSITION(tx_8x8_prob, 0x470); +ASSERT_POSITION(partition_prob, 0x4E0); +ASSERT_POSITION(class_0, 0x540); +ASSERT_POSITION(class_0_fr, 0x560); +ASSERT_POSITION(coef_probs, 0x5A0); +#undef ASSERT_POSITION + }; // namespace Decoder }; // namespace Tegra diff --git a/src/video_core/command_classes/nvdec.cpp b/src/video_core/command_classes/nvdec.cpp index e4f919afd1..b5e3b70fcc 100644 --- a/src/video_core/command_classes/nvdec.cpp +++ b/src/video_core/command_classes/nvdec.cpp @@ -8,22 +8,21 @@ namespace Tegra { -Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), codec(std::make_unique<Codec>(gpu)) {} +#define NVDEC_REG_INDEX(field_name) \ + (offsetof(NvdecCommon::NvdecRegisters, field_name) / sizeof(u64)) + +Nvdec::Nvdec(GPU& gpu_) : gpu(gpu_), state{}, codec(std::make_unique<Codec>(gpu, state)) {} Nvdec::~Nvdec() = default; -void Nvdec::ProcessMethod(Method method, u32 argument) { - if (method == Method::SetVideoCodec) { - codec->StateWrite(static_cast<u32>(method), argument); - } else { - codec->StateWrite(static_cast<u32>(method), static_cast<u64>(argument) << 8); - } +void Nvdec::ProcessMethod(u32 method, u32 argument) { + state.reg_array[method] = static_cast<u64>(argument) << 8; switch (method) { - case Method::SetVideoCodec: + case NVDEC_REG_INDEX(set_codec_id): codec->SetTargetCodec(static_cast<NvdecCommon::VideoCodec>(argument)); break; - case Method::Execute: + case NVDEC_REG_INDEX(execute): Execute(); break; } diff --git a/src/video_core/command_classes/nvdec.h b/src/video_core/command_classes/nvdec.h index e66be80b88..6e1da0b04f 100644 --- a/src/video_core/command_classes/nvdec.h +++ b/src/video_core/command_classes/nvdec.h @@ -14,16 +14,11 @@ class GPU; class Nvdec { public: - enum class Method : u32 { - SetVideoCodec = 0x80, - Execute = 0xc0, - }; - explicit Nvdec(GPU& gpu); ~Nvdec(); /// Writes the method into the state, Invoke Execute() if encountered - void ProcessMethod(Method method, u32 argument); + void ProcessMethod(u32 method, u32 argument); /// Return most recently decoded frame [[nodiscard]] AVFramePtr GetFrame(); @@ -33,6 +28,7 @@ private: void Execute(); GPU& gpu; + NvdecCommon::NvdecRegisters state; std::unique_ptr<Codec> codec; }; } // namespace Tegra diff --git a/src/video_core/command_classes/nvdec_common.h b/src/video_core/command_classes/nvdec_common.h index 01b5e086d4..6a24e00a0c 100644 --- a/src/video_core/command_classes/nvdec_common.h +++ b/src/video_core/command_classes/nvdec_common.h @@ -4,40 +4,13 @@ #pragma once +#include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" namespace Tegra::NvdecCommon { -struct NvdecRegisters { - INSERT_PADDING_WORDS(256); - u64 set_codec_id{}; - INSERT_PADDING_WORDS(254); - u64 set_platform_id{}; - u64 picture_info_offset{}; - u64 frame_bitstream_offset{}; - u64 frame_number{}; - u64 h264_slice_data_offsets{}; - u64 h264_mv_dump_offset{}; - INSERT_PADDING_WORDS(6); - u64 frame_stats_offset{}; - u64 h264_last_surface_luma_offset{}; - u64 h264_last_surface_chroma_offset{}; - std::array<u64, 17> surface_luma_offset{}; - std::array<u64, 17> surface_chroma_offset{}; - INSERT_PADDING_WORDS(132); - u64 vp9_entropy_probs_offset{}; - u64 vp9_backward_updates_offset{}; - u64 vp9_last_frame_segmap_offset{}; - u64 vp9_curr_frame_segmap_offset{}; - INSERT_PADDING_WORDS(2); - u64 vp9_last_frame_mvs_offset{}; - u64 vp9_curr_frame_mvs_offset{}; - INSERT_PADDING_WORDS(2); -}; -static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size"); - -enum class VideoCodec : u32 { +enum class VideoCodec : u64 { None = 0x0, H264 = 0x3, Vp8 = 0x5, @@ -45,4 +18,76 @@ enum class VideoCodec : u32 { Vp9 = 0x9, }; +// NVDEC should use a 32-bit address space, but is mapped to 64-bit, +// doubling the sizes here is compensating for that. +struct NvdecRegisters { + static constexpr std::size_t NUM_REGS = 0x178; + + union { + struct { + INSERT_PADDING_WORDS_NOINIT(256); ///< 0x0000 + VideoCodec set_codec_id; ///< 0x0400 + INSERT_PADDING_WORDS_NOINIT(126); ///< 0x0408 + u64 execute; ///< 0x0600 + INSERT_PADDING_WORDS_NOINIT(126); ///< 0x0608 + struct { ///< 0x0800 + union { + BitField<0, 3, VideoCodec> codec; + BitField<4, 1, u64> gp_timer_on; + BitField<13, 1, u64> mb_timer_on; + BitField<14, 1, u64> intra_frame_pslc; + BitField<17, 1, u64> all_intra_frame; + }; + } control_params; + u64 picture_info_offset; ///< 0x0808 + u64 frame_bitstream_offset; ///< 0x0810 + u64 frame_number; ///< 0x0818 + u64 h264_slice_data_offsets; ///< 0x0820 + u64 h264_mv_dump_offset; ///< 0x0828 + INSERT_PADDING_WORDS_NOINIT(6); ///< 0x0830 + u64 frame_stats_offset; ///< 0x0848 + u64 h264_last_surface_luma_offset; ///< 0x0850 + u64 h264_last_surface_chroma_offset; ///< 0x0858 + std::array<u64, 17> surface_luma_offset; ///< 0x0860 + std::array<u64, 17> surface_chroma_offset; ///< 0x08E8 + INSERT_PADDING_WORDS_NOINIT(132); ///< 0x0970 + u64 vp9_entropy_probs_offset; ///< 0x0B80 + u64 vp9_backward_updates_offset; ///< 0x0B88 + u64 vp9_last_frame_segmap_offset; ///< 0x0B90 + u64 vp9_curr_frame_segmap_offset; ///< 0x0B98 + INSERT_PADDING_WORDS_NOINIT(2); ///< 0x0BA0 + u64 vp9_last_frame_mvs_offset; ///< 0x0BA8 + u64 vp9_curr_frame_mvs_offset; ///< 0x0BB0 + INSERT_PADDING_WORDS_NOINIT(2); ///< 0x0BB8 + }; + std::array<u64, NUM_REGS> reg_array; + }; +}; +static_assert(sizeof(NvdecRegisters) == (0xBC0), "NvdecRegisters is incorrect size"); + +#define ASSERT_REG_POSITION(field_name, position) \ + static_assert(offsetof(NvdecRegisters, field_name) == position * sizeof(u64), \ + "Field " #field_name " has invalid position") + +ASSERT_REG_POSITION(set_codec_id, 0x80); +ASSERT_REG_POSITION(execute, 0xC0); +ASSERT_REG_POSITION(control_params, 0x100); +ASSERT_REG_POSITION(picture_info_offset, 0x101); +ASSERT_REG_POSITION(frame_bitstream_offset, 0x102); +ASSERT_REG_POSITION(frame_number, 0x103); +ASSERT_REG_POSITION(h264_slice_data_offsets, 0x104); +ASSERT_REG_POSITION(frame_stats_offset, 0x109); +ASSERT_REG_POSITION(h264_last_surface_luma_offset, 0x10A); +ASSERT_REG_POSITION(h264_last_surface_chroma_offset, 0x10B); +ASSERT_REG_POSITION(surface_luma_offset, 0x10C); +ASSERT_REG_POSITION(surface_chroma_offset, 0x11D); +ASSERT_REG_POSITION(vp9_entropy_probs_offset, 0x170); +ASSERT_REG_POSITION(vp9_backward_updates_offset, 0x171); +ASSERT_REG_POSITION(vp9_last_frame_segmap_offset, 0x172); +ASSERT_REG_POSITION(vp9_curr_frame_segmap_offset, 0x173); +ASSERT_REG_POSITION(vp9_last_frame_mvs_offset, 0x175); +ASSERT_REG_POSITION(vp9_curr_frame_mvs_offset, 0x176); + +#undef ASSERT_REG_POSITION + } // namespace Tegra::NvdecCommon diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp index 0a8b82f2b8..ff3db0aeea 100644 --- a/src/video_core/command_classes/vic.cpp +++ b/src/video_core/command_classes/vic.cpp @@ -3,7 +3,21 @@ // Refer to the license.txt file included. #include <array> + +extern "C" { +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wconversion" +#endif +#include <libswscale/swscale.h> +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic pop +#endif +} + #include "common/assert.h" +#include "common/logging/log.h" + #include "video_core/command_classes/nvdec.h" #include "video_core/command_classes/vic.h" #include "video_core/engines/maxwell_3d.h" @@ -11,10 +25,6 @@ #include "video_core/memory_manager.h" #include "video_core/textures/decoders.h" -extern "C" { -#include <libswscale/swscale.h> -} - namespace Tegra { Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_) diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 0f640fdaeb..f26530ede8 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -7,6 +7,10 @@ #include "video_core/engines/fermi_2d.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" +#include "video_core/surface.h" + +using VideoCore::Surface::BytesPerBlock; +using VideoCore::Surface::PixelFormatFromRenderTargetFormat; namespace Tegra::Engines { @@ -49,7 +53,7 @@ void Fermi2D::Blit() { UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled"); const auto& args = regs.pixels_from_memory; - const Config config{ + Config config{ .operation = regs.operation, .filter = args.sample_mode.filter, .dst_x0 = args.dst_x0, @@ -61,7 +65,21 @@ void Fermi2D::Blit() { .src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32), .src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32), }; - if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, config)) { + Surface src = regs.src; + const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format)); + const auto need_align_to_pitch = + src.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch && + static_cast<s32>(src.width) == config.src_x1 && + config.src_x1 > static_cast<s32>(src.pitch / bytes_per_pixel) && config.src_x0 > 0; + if (need_align_to_pitch) { + auto address = src.Address() + config.src_x0 * bytes_per_pixel; + src.addr_upper = static_cast<u32>(address >> 32); + src.addr_lower = static_cast<u32>(address); + src.width -= config.src_x0; + config.src_x1 -= config.src_x0; + config.src_x0 = 0; + } + if (!rasterizer->AccelerateSurfaceCopy(src, regs.dst, config)) { UNIMPLEMENTED(); } } diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 2208e19226..c9cff74503 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -18,7 +18,10 @@ set(SHADER_FILES vulkan_uint8.comp ) -find_program(GLSLANGVALIDATOR "glslangValidator" REQUIRED) +find_program(GLSLANGVALIDATOR "glslangValidator") +if ("${GLSLANGVALIDATOR}" STREQUAL "GLSLANGVALIDATOR-NOTFOUND") + message(FATAL_ERROR "Required program `glslangValidator` not found.") +endif() set(GLSL_FLAGS "") set(QUIET_FLAG "--quiet") diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 7124c755c7..d2b9d5f2b0 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -69,11 +69,16 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { } else { UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr); } - // Flush and invalidate through the GPU interface, to be asynchronous if possible. - const std::optional<VAddr> cpu_addr = GpuToCpuAddress(gpu_addr); - ASSERT(cpu_addr); - rasterizer->UnmapMemory(*cpu_addr, size); + const auto submapped_ranges = GetSubmappedRange(gpu_addr, size); + + for (const auto& map : submapped_ranges) { + // Flush and invalidate through the GPU interface, to be asynchronous if possible. + const std::optional<VAddr> cpu_addr = GpuToCpuAddress(map.first); + ASSERT(cpu_addr); + + rasterizer->UnmapMemory(*cpu_addr, map.second); + } UpdateRange(gpu_addr, PageEntry::State::Unmapped, size); } @@ -127,8 +132,14 @@ void MemoryManager::SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::s //// Lock the new page // TryLockPage(page_entry, size); + auto& current_page = page_table[PageEntryIndex(gpu_addr)]; - page_table[PageEntryIndex(gpu_addr)] = page_entry; + if ((!current_page.IsValid() && page_entry.IsValid()) || + current_page.ToAddress() != page_entry.ToAddress()) { + rasterizer->ModifyGPUMemory(gpu_addr, size); + } + + current_page = page_entry; } std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align, @@ -174,6 +185,19 @@ std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const { return page_entry.ToAddress() + (gpu_addr & page_mask); } +std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t size) const { + size_t page_index{addr >> page_bits}; + const size_t page_last{(addr + size + page_size - 1) >> page_bits}; + while (page_index < page_last) { + const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; + if (page_addr && *page_addr != 0) { + return page_addr; + } + ++page_index; + } + return std::nullopt; +} + template <typename T> T MemoryManager::Read(GPUVAddr addr) const { if (auto page_pointer{GetPointer(addr)}; page_pointer) { @@ -370,4 +394,79 @@ bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const { return page <= Core::Memory::PAGE_SIZE; } +bool MemoryManager::IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const { + size_t page_index{gpu_addr >> page_bits}; + const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits}; + std::optional<VAddr> old_page_addr{}; + while (page_index != page_last) { + const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; + if (!page_addr || *page_addr == 0) { + return false; + } + if (old_page_addr) { + if (*old_page_addr + page_size != *page_addr) { + return false; + } + } + old_page_addr = page_addr; + ++page_index; + } + return true; +} + +bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const { + size_t page_index{gpu_addr >> page_bits}; + const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits}; + while (page_index < page_last) { + if (!page_table[page_index].IsValid() || page_table[page_index].ToAddress() == 0) { + return false; + } + ++page_index; + } + return true; +} + +std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( + GPUVAddr gpu_addr, std::size_t size) const { + std::vector<std::pair<GPUVAddr, std::size_t>> result{}; + size_t page_index{gpu_addr >> page_bits}; + size_t remaining_size{size}; + size_t page_offset{gpu_addr & page_mask}; + std::optional<std::pair<GPUVAddr, std::size_t>> last_segment{}; + std::optional<VAddr> old_page_addr{}; + const auto extend_size = [this, &last_segment, &page_index](std::size_t bytes) { + if (!last_segment) { + GPUVAddr new_base_addr = page_index << page_bits; + last_segment = {new_base_addr, bytes}; + } else { + last_segment->second += bytes; + } + }; + const auto split = [this, &last_segment, &result] { + if (last_segment) { + result.push_back(*last_segment); + last_segment = std::nullopt; + } + }; + while (remaining_size > 0) { + const size_t num_bytes{std::min(page_size - page_offset, remaining_size)}; + const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; + if (!page_addr) { + split(); + } else if (old_page_addr) { + if (*old_page_addr + page_size != *page_addr) { + split(); + } + extend_size(num_bytes); + } else { + extend_size(num_bytes); + } + ++page_index; + page_offset = 0; + remaining_size -= num_bytes; + } + split(); + return result; +} + } // namespace Tegra diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index b3538d503d..99d13e7f66 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -76,6 +76,8 @@ public: [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const; + [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr, std::size_t size) const; + template <typename T> [[nodiscard]] T Read(GPUVAddr addr) const; @@ -112,10 +114,28 @@ public: void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size); /** - * IsGranularRange checks if a gpu region can be simply read with a pointer. + * Checks if a gpu region can be simply read with a pointer. */ [[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const; + /** + * Checks if a gpu region is mapped by a single range of cpu addresses. + */ + [[nodiscard]] bool IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const; + + /** + * Checks if a gpu region is mapped entirely. + */ + [[nodiscard]] bool IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const; + + /** + * Returns a vector with all the subranges of cpu addresses mapped beneath. + * if the region is continous, a single pair will be returned. If it's unmapped, an empty vector + * will be returned; + */ + std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr, + std::size_t size) const; + [[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size); [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align); [[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size); diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 07939432f3..0cec4225b0 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -87,6 +87,9 @@ public: /// Unmap memory range virtual void UnmapMemory(VAddr addr, u64 size) = 0; + /// Remap GPU memory range. This means underneath backing memory changed + virtual void ModifyGPUMemory(GPUVAddr addr, u64 size) = 0; + /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory /// and invalidated virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index 320ee8d30d..63d8ad42a4 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -42,6 +42,8 @@ public: [[nodiscard]] virtual RasterizerInterface* ReadRasterizer() = 0; + [[nodiscard]] virtual std::string GetDeviceVendor() const = 0; + // Getter/setter functions: // ------------------------ diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 3f4532ca72..3b00614e70 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -202,13 +202,13 @@ Device::Device() { LOG_ERROR(Render_OpenGL, "OpenGL 4.6 is not available"); throw std::runtime_error{"Insufficient version"}; } - const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); + vendor_name = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION)); const std::vector extensions = GetExtensions(); - const bool is_nvidia = vendor == "NVIDIA Corporation"; - const bool is_amd = vendor == "ATI Technologies Inc."; - const bool is_intel = vendor == "Intel"; + const bool is_nvidia = vendor_name == "NVIDIA Corporation"; + const bool is_amd = vendor_name == "ATI Technologies Inc."; + const bool is_intel = vendor_name == "Intel"; #ifdef __unix__ const bool is_linux = true; @@ -275,6 +275,56 @@ Device::Device() { } } +std::string Device::GetVendorName() const { + if (vendor_name == "NVIDIA Corporation") { + return "NVIDIA"; + } + if (vendor_name == "ATI Technologies Inc.") { + return "AMD"; + } + if (vendor_name == "Intel") { + // For Mesa, `Intel` is an overloaded vendor string that could mean crocus or iris. + // Simply return `INTEL` for those as well as the Windows driver. + return "INTEL"; + } + if (vendor_name == "Intel Open Source Technology Center") { + return "I965"; + } + if (vendor_name == "Mesa Project") { + return "I915"; + } + if (vendor_name == "Mesa/X.org") { + // This vendor string is overloaded between llvmpipe, softpipe, and virgl, so just return + // MESA instead of one of those driver names. + return "MESA"; + } + if (vendor_name == "AMD") { + return "RADEONSI"; + } + if (vendor_name == "nouveau") { + return "NOUVEAU"; + } + if (vendor_name == "X.Org") { + return "R600"; + } + if (vendor_name == "Collabora Ltd") { + return "ZINK"; + } + if (vendor_name == "Intel Corporation") { + return "OPENSWR"; + } + if (vendor_name == "Microsoft Corporation") { + return "D3D12"; + } + if (vendor_name == "NVIDIA") { + // Mesa's tegra driver reports `NVIDIA`. Only present in this list because the default + // strategy would have returned `NVIDIA` here for this driver, the same result as the + // proprietary driver. + return "TEGRA"; + } + return vendor_name; +} + Device::Device(std::nullptr_t) { max_uniform_buffers.fill(std::numeric_limits<u32>::max()); uniform_buffer_alignment = 4; diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index f24bd0c7b7..2c2b13767a 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -22,6 +22,8 @@ public: explicit Device(); explicit Device(std::nullptr_t); + [[nodiscard]] std::string GetVendorName() const; + u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept { return max_uniform_buffers[static_cast<std::size_t>(shader_type)]; } @@ -130,6 +132,7 @@ private: static bool TestVariableAoffi(); static bool TestPreciseBug(); + std::string vendor_name; std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{}; std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{}; size_t uniform_buffer_alignment{}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index eb8bdaa85a..07ad0e205b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -611,6 +611,13 @@ void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { shader_cache.OnCPUWrite(addr, size); } +void RasterizerOpenGL::ModifyGPUMemory(GPUVAddr addr, u64 size) { + { + std::scoped_lock lock{texture_cache.mutex}; + texture_cache.UnmapGPUMemory(addr, size); + } +} + void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) { if (!gpu.IsAsync()) { gpu_memory.Write<u32>(addr, value); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 9995a563b6..482efed7a0 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -80,6 +80,7 @@ public: void OnCPUWrite(VAddr addr, u64 size) override; void SyncGuestHost() override; void UnmapMemory(VAddr addr, u64 size) override; + void ModifyGPUMemory(GPUVAddr addr, u64 size) override; void SignalSemaphore(GPUVAddr addr, u32 value) override; void SignalSyncPoint(u32 value) override; void ReleaseFences() override; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 23948feed4..ff0f03e997 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -327,7 +327,8 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4 if (format_info.is_compressed) { return false; } - if (std::ranges::find(ACCELERATED_FORMATS, internal_format) == ACCELERATED_FORMATS.end()) { + if (std::ranges::find(ACCELERATED_FORMATS, static_cast<int>(internal_format)) == + ACCELERATED_FORMATS.end()) { return false; } if (format_info.compatibility_by_size) { @@ -341,6 +342,20 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4 [[nodiscard]] CopyOrigin MakeCopyOrigin(VideoCommon::Offset3D offset, VideoCommon::SubresourceLayers subresource, GLenum target) { switch (target) { + case GL_TEXTURE_1D: + return CopyOrigin{ + .level = static_cast<GLint>(subresource.base_level), + .x = static_cast<GLint>(offset.x), + .y = static_cast<GLint>(0), + .z = static_cast<GLint>(0), + }; + case GL_TEXTURE_1D_ARRAY: + return CopyOrigin{ + .level = static_cast<GLint>(subresource.base_level), + .x = static_cast<GLint>(offset.x), + .y = static_cast<GLint>(0), + .z = static_cast<GLint>(subresource.base_layer), + }; case GL_TEXTURE_2D_ARRAY: case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: return CopyOrigin{ @@ -366,6 +381,18 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4 VideoCommon::SubresourceLayers dst_subresource, GLenum target) { switch (target) { + case GL_TEXTURE_1D: + return CopyRegion{ + .width = static_cast<GLsizei>(extent.width), + .height = static_cast<GLsizei>(1), + .depth = static_cast<GLsizei>(1), + }; + case GL_TEXTURE_1D_ARRAY: + return CopyRegion{ + .width = static_cast<GLsizei>(extent.width), + .height = static_cast<GLsizei>(1), + .depth = static_cast<GLsizei>(dst_subresource.num_layers), + }; case GL_TEXTURE_2D_ARRAY: case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: return CopyRegion{ diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index cc19a110f1..0b66f83322 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -70,6 +70,10 @@ public: return &rasterizer; } + [[nodiscard]] std::string GetDeviceVendor() const override { + return device.GetVendorName(); + } + private: /// Initializes the OpenGL state and creates persistent objects. void InitOpenGLObjects(); diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index abaf1ee6a7..8fb5be3935 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -261,9 +261,9 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z); glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z); glBindImageTexture(BINDING_INPUT_IMAGE, src_image.StorageHandle(), - copy.src_subresource.base_level, GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI); + copy.src_subresource.base_level, GL_TRUE, 0, GL_READ_ONLY, GL_RG32UI); glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.StorageHandle(), - copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8UI); + copy.dst_subresource.base_level, GL_TRUE, 0, GL_WRITE_ONLY, GL_RGBA8UI); glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth); } program_manager.RestoreGuestCompute(); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 72071316c9..d7d17e1101 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -47,6 +47,10 @@ public: return &rasterizer; } + [[nodiscard]] std::string GetDeviceVendor() const override { + return device.GetDriverName(); + } + private: void Report() const; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 8cb65e5881..0df4e1a1cb 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -55,8 +55,9 @@ size_t BytesPerIndex(VkIndexType index_type) { template <typename T> std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) { std::array<T, 6> indices{0, 1, 2, 0, 2, 3}; - std::ranges::transform(indices, indices.begin(), - [quad, first](u32 index) { return first + index + quad * 4; }); + for (T& index : indices) { + index = static_cast<T>(first + index + quad * 4); + } return indices; } } // Anonymous namespace diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 1c91201709..bd4d649cc7 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -557,6 +557,13 @@ void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { pipeline_cache.OnCPUWrite(addr, size); } +void RasterizerVulkan::ModifyGPUMemory(GPUVAddr addr, u64 size) { + { + std::scoped_lock lock{texture_cache.mutex}; + texture_cache.UnmapGPUMemory(addr, size); + } +} + void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) { if (!gpu.IsAsync()) { gpu_memory.Write<u32>(addr, value); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index cb8c5c279a..41459c5c5c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -72,6 +72,7 @@ public: void OnCPUWrite(VAddr addr, u64 size) override; void SyncGuestHost() override; void UnmapMemory(VAddr addr, u64 size) override; + void ModifyGPUMemory(GPUVAddr addr, u64 size) override; void SignalSemaphore(GPUVAddr addr, u32 value) override; void SignalSyncPoint(u32 value) override; void ReleaseFences() override; diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp index ad69d32d1f..6052d148ae 100644 --- a/src/video_core/texture_cache/image_base.cpp +++ b/src/video_core/texture_cache/image_base.cpp @@ -69,6 +69,9 @@ ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_ } } +ImageMapView::ImageMapView(GPUVAddr gpu_addr_, VAddr cpu_addr_, size_t size_, ImageId image_id_) + : gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_}, size{size_}, image_id{image_id_} {} + std::optional<SubresourceBase> ImageBase::TryFindBase(GPUVAddr other_addr) const noexcept { if (other_addr < gpu_addr) { // Subresource address can't be lower than the base @@ -82,7 +85,7 @@ std::optional<SubresourceBase> ImageBase::TryFindBase(GPUVAddr other_addr) const if (info.type != ImageType::e3D) { const auto [layer, mip_offset] = LayerMipOffset(diff, info.layer_stride); const auto end = mip_level_offsets.begin() + info.resources.levels; - const auto it = std::find(mip_level_offsets.begin(), end, mip_offset); + const auto it = std::find(mip_level_offsets.begin(), end, static_cast<u32>(mip_offset)); if (layer > info.resources.layers || it == end) { return std::nullopt; } diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index e326cab71c..ff1feda9b3 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -25,12 +25,14 @@ enum class ImageFlagBits : u32 { Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted Registered = 1 << 6, ///< True when the image is registered Picked = 1 << 7, ///< Temporary flag to mark the image as picked + Remapped = 1 << 8, ///< Image has been remapped. + Sparse = 1 << 9, ///< Image has non continous submemory. // Garbage Collection Flags - BadOverlap = 1 << 8, ///< This image overlaps other but doesn't fit, has higher - ///< garbage collection priority - Alias = 1 << 9, ///< This image has aliases and has priority on garbage - ///< collection + BadOverlap = 1 << 10, ///< This image overlaps other but doesn't fit, has higher + ///< garbage collection priority + Alias = 1 << 11, ///< This image has aliases and has priority on garbage + ///< collection }; DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) @@ -57,6 +59,12 @@ struct ImageBase { return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end; } + [[nodiscard]] bool OverlapsGPU(GPUVAddr overlap_gpu_addr, size_t overlap_size) const noexcept { + const VAddr overlap_end = overlap_gpu_addr + overlap_size; + const GPUVAddr gpu_addr_end = gpu_addr + guest_size_bytes; + return gpu_addr < overlap_end && overlap_gpu_addr < gpu_addr_end; + } + void CheckBadOverlapState(); void CheckAliasState(); @@ -84,6 +92,29 @@ struct ImageBase { std::vector<AliasedImage> aliased_images; std::vector<ImageId> overlapping_images; + ImageMapId map_view_id{}; +}; + +struct ImageMapView { + explicit ImageMapView(GPUVAddr gpu_addr, VAddr cpu_addr, size_t size, ImageId image_id); + + [[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept { + const VAddr overlap_end = overlap_cpu_addr + overlap_size; + const VAddr cpu_addr_end = cpu_addr + size; + return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end; + } + + [[nodiscard]] bool OverlapsGPU(GPUVAddr overlap_gpu_addr, size_t overlap_size) const noexcept { + const GPUVAddr overlap_end = overlap_gpu_addr + overlap_size; + const GPUVAddr gpu_addr_end = gpu_addr + size; + return gpu_addr < overlap_end && overlap_gpu_addr < gpu_addr_end; + } + + GPUVAddr gpu_addr; + VAddr cpu_addr; + size_t size; + ImageId image_id; + bool picked{}; }; struct ImageAllocBase { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 84530a1791..01de2d4984 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -13,6 +13,7 @@ #include <span> #include <type_traits> #include <unordered_map> +#include <unordered_set> #include <utility> #include <vector> @@ -110,9 +111,6 @@ public: /// Notify the cache that a new frame has been queued void TickFrame(); - /// Runs the Garbage Collector. - void RunGarbageCollector(); - /// Return a constant reference to the given image view id [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept; @@ -155,12 +153,13 @@ public: /// Remove images in a region void UnmapMemory(VAddr cpu_addr, size_t size); + /// Remove images in a region + void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size); + /// Blit an image with the given parameters void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src, - const Tegra::Engines::Fermi2D::Config& copy, - std::optional<Region2D> src_region_override = {}, - std::optional<Region2D> dst_region_override = {}); + const Tegra::Engines::Fermi2D::Config& copy); /// Invalidate the contents of the color buffer index /// These contents become unspecified, the cache can assume aggressive optimizations. @@ -193,7 +192,22 @@ public: private: /// Iterate over all page indices in a range template <typename Func> - static void ForEachPage(VAddr addr, size_t size, Func&& func) { + static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) { + static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>; + const u64 page_end = (addr + size - 1) >> PAGE_BITS; + for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) { + if constexpr (RETURNS_BOOL) { + if (func(page)) { + break; + } + } else { + func(page); + } + } + } + + template <typename Func> + static void ForEachGPUPage(GPUVAddr addr, size_t size, Func&& func) { static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>; const u64 page_end = (addr + size - 1) >> PAGE_BITS; for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) { @@ -207,6 +221,9 @@ private: } } + /// Runs the Garbage Collector. + void RunGarbageCollector(); + /// Fills image_view_ids in the image views in indices void FillImageViews(DescriptorTable<TICEntry>& table, std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices, @@ -220,7 +237,7 @@ private: FramebufferId GetFramebufferId(const RenderTargets& key); /// Refresh the contents (pixel data) of an image - void RefreshContents(Image& image); + void RefreshContents(Image& image, ImageId image_id); /// Upload data from guest to an image template <typename StagingBuffer> @@ -269,6 +286,16 @@ private: template <typename Func> void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func); + template <typename Func> + void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func); + + template <typename Func> + void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func); + + /// Iterates over all the images in a region calling func + template <typename Func> + void ForEachSparseSegment(ImageBase& image, Func&& func); + /// Find or create an image view in the given image with the passed parameters [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info); @@ -279,10 +306,10 @@ private: void UnregisterImage(ImageId image); /// Track CPU reads and writes for image - void TrackImage(ImageBase& image); + void TrackImage(ImageBase& image, ImageId image_id); /// Stop tracking CPU reads and writes for image - void UntrackImage(ImageBase& image); + void UntrackImage(ImageBase& image, ImageId image_id); /// Delete image from the cache void DeleteImage(ImageId image); @@ -340,7 +367,13 @@ private: std::unordered_map<TSCEntry, SamplerId> samplers; std::unordered_map<RenderTargets, FramebufferId> framebuffers; - std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table; + std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table; + std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table; + std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table; + + std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views; + + VAddr virtual_invalid_space{}; bool has_deleted_images = false; u64 total_used_memory = 0; @@ -349,6 +382,7 @@ private: u64 critical_memory; SlotVector<Image> slot_images; + SlotVector<ImageMapView> slot_map_views; SlotVector<ImageView> slot_image_views; SlotVector<ImageAlloc> slot_image_allocs; SlotVector<Sampler> slot_samplers; @@ -459,7 +493,7 @@ void TextureCache<P>::RunGarbageCollector() { } } if (True(image->flags & ImageFlagBits::Tracked)) { - UntrackImage(*image); + UntrackImage(*image, image_id); } UnregisterImage(image_id); DeleteImage(image_id); @@ -658,7 +692,9 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) { return; } image.flags |= ImageFlagBits::CpuModified; - UntrackImage(image); + if (True(image.flags & ImageFlagBits::Tracked)) { + UntrackImage(image, image_id); + } }); } @@ -695,7 +731,7 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { for (const ImageId id : deleted_images) { Image& image = slot_images[id]; if (True(image.flags & ImageFlagBits::Tracked)) { - UntrackImage(image); + UntrackImage(image, id); } UnregisterImage(id); DeleteImage(id); @@ -703,11 +739,26 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { } template <class P> +void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) { + std::vector<ImageId> deleted_images; + ForEachImageInRegionGPU(gpu_addr, size, + [&](ImageId id, Image&) { deleted_images.push_back(id); }); + for (const ImageId id : deleted_images) { + Image& image = slot_images[id]; + if (True(image.flags & ImageFlagBits::Remapped)) { + continue; + } + image.flags |= ImageFlagBits::Remapped; + if (True(image.flags & ImageFlagBits::Tracked)) { + UntrackImage(image, id); + } + } +} + +template <class P> void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src, - const Tegra::Engines::Fermi2D::Config& copy, - std::optional<Region2D> src_override, - std::optional<Region2D> dst_override) { + const Tegra::Engines::Fermi2D::Config& copy) { const BlitImages images = GetBlitImages(dst, src); const ImageId dst_id = images.dst_id; const ImageId src_id = images.src_id; @@ -718,47 +769,25 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, const ImageBase& src_image = slot_images[src_id]; // TODO: Deduplicate - const std::optional dst_base = dst_image.TryFindBase(dst.Address()); - const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}}; - const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range); - const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); - const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples); - - // out of bounds texture blit checking - const bool use_override = src_override.has_value(); - const s32 src_x0 = copy.src_x0 >> src_samples_x; - s32 src_x1 = use_override ? src_override->end.x : copy.src_x1 >> src_samples_x; - const s32 src_y0 = copy.src_y0 >> src_samples_y; - const s32 src_y1 = copy.src_y1 >> src_samples_y; - - const auto src_width = static_cast<s32>(src_image.info.size.width); - const bool width_oob = src_x1 > src_width; - const auto width_diff = width_oob ? src_x1 - src_width : 0; - if (width_oob) { - src_x1 = src_width; - } - - const Region2D src_dimensions{ - Offset2D{.x = src_x0, .y = src_y0}, - Offset2D{.x = src_x1, .y = src_y1}, - }; - const auto src_region = use_override ? *src_override : src_dimensions; - const std::optional src_base = src_image.TryFindBase(src.Address()); const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}}; const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range); const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info); - const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples); + const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples); + const Region2D src_region{ + Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y}, + Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y}, + }; - const s32 dst_x0 = copy.dst_x0 >> dst_samples_x; - const s32 dst_x1 = copy.dst_x1 >> dst_samples_x; - const s32 dst_y0 = copy.dst_y0 >> dst_samples_y; - const s32 dst_y1 = copy.dst_y1 >> dst_samples_y; - const Region2D dst_dimensions{ - Offset2D{.x = dst_x0, .y = dst_y0}, - Offset2D{.x = dst_x1 - width_diff, .y = dst_y1}, + const std::optional dst_base = dst_image.TryFindBase(dst.Address()); + const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}}; + const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range); + const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); + const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples); + const Region2D dst_region{ + Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y}, + Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y}, }; - const auto dst_region = use_override ? *dst_override : dst_dimensions; // Always call this after src_framebuffer_id was queried, as the address might be invalidated. Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; @@ -775,21 +804,6 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter, copy.operation); } - - if (width_oob) { - // Continue copy of the oob region of the texture on the next row - auto oob_src = src; - oob_src.height++; - const Region2D src_region_override{ - Offset2D{.x = 0, .y = src_y0 + 1}, - Offset2D{.x = width_diff, .y = src_y1 + 1}, - }; - const Region2D dst_region_override{ - Offset2D{.x = dst_x1 - width_diff, .y = dst_y0}, - Offset2D{.x = dst_x1, .y = dst_y1}, - }; - BlitImage(dst, oob_src, copy, src_region_override, dst_region_override); - } } template <class P> @@ -833,9 +847,10 @@ typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_ad if (it == page_table.end()) { return nullptr; } - const auto& image_ids = it->second; - for (const ImageId image_id : image_ids) { - const ImageBase& image = slot_images[image_id]; + const auto& image_map_ids = it->second; + for (const ImageMapId map_id : image_map_ids) { + const ImageMapView& map = slot_map_views[map_id]; + const ImageBase& image = slot_images[map.image_id]; if (image.cpu_addr != cpu_addr) { continue; } @@ -915,13 +930,13 @@ bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { } template <class P> -void TextureCache<P>::RefreshContents(Image& image) { +void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) { if (False(image.flags & ImageFlagBits::CpuModified)) { // Only upload modified images return; } image.flags &= ~ImageFlagBits::CpuModified; - TrackImage(image); + TrackImage(image, image_id); if (image.info.num_samples > 1) { LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); @@ -958,7 +973,7 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) template <class P> ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) { - if (!IsValidAddress(gpu_memory, config)) { + if (!IsValidEntry(gpu_memory, config)) { return NULL_IMAGE_VIEW_ID; } const auto [pair, is_new] = image_views.try_emplace(config); @@ -1000,14 +1015,20 @@ ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_a template <class P> ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, RelaxedOptions options) { - const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); if (!cpu_addr) { - return ImageId{}; + cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); + if (!cpu_addr) { + return ImageId{}; + } } const bool broken_views = runtime.HasBrokenTextureViewFormats(); const bool native_bgr = runtime.HasNativeBgr(); ImageId image_id; const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { + if (True(existing_image.flags & ImageFlagBits::Remapped)) { + return false; + } if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { const bool strict_size = False(options & RelaxedOptions::Size) && True(existing_image.flags & ImageFlagBits::Strong); @@ -1033,7 +1054,16 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, template <class P> ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, RelaxedOptions options) { - const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + if (!cpu_addr) { + const auto size = CalculateGuestSizeInBytes(info); + cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size); + if (!cpu_addr) { + const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; + virtual_invalid_space += Common::AlignUp(size, 32); + cpu_addr = std::optional<VAddr>(fake_addr); + } + } ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); const Image& image = slot_images[image_id]; @@ -1053,11 +1083,14 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA const bool broken_views = runtime.HasBrokenTextureViewFormats(); const bool native_bgr = runtime.HasNativeBgr(); std::vector<ImageId> overlap_ids; + std::unordered_set<ImageId> overlaps_found; std::vector<ImageId> left_aliased_ids; std::vector<ImageId> right_aliased_ids; + std::unordered_set<ImageId> ignore_textures; std::vector<ImageId> bad_overlap_ids; - ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) { - if (info.type != overlap.info.type) { + const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) { + if (True(overlap.flags & ImageFlagBits::Remapped)) { + ignore_textures.insert(overlap_id); return; } if (info.type == ImageType::Linear) { @@ -1067,6 +1100,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA } return; } + overlaps_found.insert(overlap_id); static constexpr bool strict_size = true; const std::optional<OverlapResult> solution = ResolveOverlap( new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr); @@ -1090,12 +1124,40 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA bad_overlap_ids.push_back(overlap_id); overlap.flags |= ImageFlagBits::BadOverlap; } - }); + }; + ForEachImageInRegion(cpu_addr, size_bytes, region_check); + const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) { + if (!overlaps_found.contains(overlap_id)) { + if (True(overlap.flags & ImageFlagBits::Remapped)) { + ignore_textures.insert(overlap_id); + } + if (overlap.gpu_addr == gpu_addr && overlap.guest_size_bytes == size_bytes) { + ignore_textures.insert(overlap_id); + } + } + }; + ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu); const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); Image& new_image = slot_images[new_image_id]; + if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) { + new_image.flags |= ImageFlagBits::Sparse; + } + + for (const ImageId overlap_id : ignore_textures) { + Image& overlap = slot_images[overlap_id]; + if (True(overlap.flags & ImageFlagBits::GpuModified)) { + UNIMPLEMENTED(); + } + if (True(overlap.flags & ImageFlagBits::Tracked)) { + UntrackImage(overlap, overlap_id); + } + UnregisterImage(overlap_id); + DeleteImage(overlap_id); + } + // TODO: Only upload what we need - RefreshContents(new_image); + RefreshContents(new_image, new_image_id); for (const ImageId overlap_id : overlap_ids) { Image& overlap = slot_images[overlap_id]; @@ -1107,7 +1169,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA runtime.CopyImage(new_image, overlap, copies); } if (True(overlap.flags & ImageFlagBits::Tracked)) { - UntrackImage(overlap); + UntrackImage(overlap, overlap_id); } UnregisterImage(overlap_id); DeleteImage(overlap_id); @@ -1242,7 +1304,8 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; boost::container::small_vector<ImageId, 32> images; - ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) { + boost::container::small_vector<ImageMapId, 32> maps; + ForEachCPUPage(cpu_addr, size, [this, &images, &maps, cpu_addr, size, func](u64 page) { const auto it = page_table.find(page); if (it == page_table.end()) { if constexpr (BOOL_BREAK) { @@ -1251,12 +1314,105 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f return; } } + for (const ImageMapId map_id : it->second) { + ImageMapView& map = slot_map_views[map_id]; + if (map.picked) { + continue; + } + if (!map.Overlaps(cpu_addr, size)) { + continue; + } + map.picked = true; + maps.push_back(map_id); + Image& image = slot_images[map.image_id]; + if (True(image.flags & ImageFlagBits::Picked)) { + continue; + } + image.flags |= ImageFlagBits::Picked; + images.push_back(map.image_id); + if constexpr (BOOL_BREAK) { + if (func(map.image_id, image)) { + return true; + } + } else { + func(map.image_id, image); + } + } + if constexpr (BOOL_BREAK) { + return false; + } + }); + for (const ImageId image_id : images) { + slot_images[image_id].flags &= ~ImageFlagBits::Picked; + } + for (const ImageMapId map_id : maps) { + slot_map_views[map_id].picked = false; + } +} + +template <class P> +template <typename Func> +void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func) { + using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; + static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; + boost::container::small_vector<ImageId, 8> images; + ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { + const auto it = gpu_page_table.find(page); + if (it == gpu_page_table.end()) { + if constexpr (BOOL_BREAK) { + return false; + } else { + return; + } + } + for (const ImageId image_id : it->second) { + Image& image = slot_images[image_id]; + if (True(image.flags & ImageFlagBits::Picked)) { + continue; + } + if (!image.OverlapsGPU(gpu_addr, size)) { + continue; + } + image.flags |= ImageFlagBits::Picked; + images.push_back(image_id); + if constexpr (BOOL_BREAK) { + if (func(image_id, image)) { + return true; + } + } else { + func(image_id, image); + } + } + if constexpr (BOOL_BREAK) { + return false; + } + }); + for (const ImageId image_id : images) { + slot_images[image_id].flags &= ~ImageFlagBits::Picked; + } +} + +template <class P> +template <typename Func> +void TextureCache<P>::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func) { + using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; + static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; + boost::container::small_vector<ImageId, 8> images; + ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { + const auto it = sparse_page_table.find(page); + if (it == sparse_page_table.end()) { + if constexpr (BOOL_BREAK) { + return false; + } else { + return; + } + } for (const ImageId image_id : it->second) { Image& image = slot_images[image_id]; if (True(image.flags & ImageFlagBits::Picked)) { continue; } - if (!image.Overlaps(cpu_addr, size)) { + if (!image.OverlapsGPU(gpu_addr, size)) { continue; } image.flags |= ImageFlagBits::Picked; @@ -1279,6 +1435,27 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f } template <class P> +template <typename Func> +void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) { + using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type; + static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>; + const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); + for (auto& segment : segments) { + const auto gpu_addr = segment.first; + const auto size = segment.second; + std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + ASSERT(cpu_addr); + if constexpr (RETURNS_BOOL) { + if (func(gpu_addr, *cpu_addr, size)) { + break; + } + } else { + func(gpu_addr, *cpu_addr, size); + } + } +} + +template <class P> ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) { Image& image = slot_images[image_id]; if (const ImageViewId image_view_id = image.FindView(info); image_view_id) { @@ -1295,8 +1472,6 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Trying to register an already registered image"); image.flags |= ImageFlagBits::Registered; - ForEachPage(image.cpu_addr, image.guest_size_bytes, - [this, image_id](u64 page) { page_table[page].push_back(image_id); }); u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); if ((IsPixelFormatASTC(image.info.format) && True(image.flags & ImageFlagBits::AcceleratedUpload)) || @@ -1304,6 +1479,27 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); } total_used_memory += Common::AlignUp(tentative_size, 1024); + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, + [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); }); + if (False(image.flags & ImageFlagBits::Sparse)) { + auto map_id = + slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id); + ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, + [this, map_id](u64 page) { page_table[page].push_back(map_id); }); + image.map_view_id = map_id; + return; + } + std::vector<ImageViewId> sparse_maps{}; + ForEachSparseSegment( + image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { + auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); + ForEachCPUPage(cpu_addr, size, + [this, map_id](u64 page) { page_table[page].push_back(map_id); }); + sparse_maps.push_back(map_id); + }); + sparse_views.emplace(image_id, std::move(sparse_maps)); + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, + [this, image_id](u64 page) { sparse_page_table[page].push_back(image_id); }); } template <class P> @@ -1320,34 +1516,125 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) { tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); } total_used_memory -= Common::AlignUp(tentative_size, 1024); - ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { - const auto page_it = page_table.find(page); - if (page_it == page_table.end()) { - UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); - return; - } - std::vector<ImageId>& image_ids = page_it->second; - const auto vector_it = std::ranges::find(image_ids, image_id); - if (vector_it == image_ids.end()) { - UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", page << PAGE_BITS); - return; - } - image_ids.erase(vector_it); + const auto& clear_page_table = + [this, image_id]( + u64 page, + std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>>& selected_page_table) { + const auto page_it = selected_page_table.find(page); + if (page_it == selected_page_table.end()) { + UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); + return; + } + std::vector<ImageId>& image_ids = page_it->second; + const auto vector_it = std::ranges::find(image_ids, image_id); + if (vector_it == image_ids.end()) { + UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", + page << PAGE_BITS); + return; + } + image_ids.erase(vector_it); + }; + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, + [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); }); + if (False(image.flags & ImageFlagBits::Sparse)) { + const auto map_id = image.map_view_id; + ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) { + const auto page_it = page_table.find(page); + if (page_it == page_table.end()) { + UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); + return; + } + std::vector<ImageMapId>& image_map_ids = page_it->second; + const auto vector_it = std::ranges::find(image_map_ids, map_id); + if (vector_it == image_map_ids.end()) { + UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", + page << PAGE_BITS); + return; + } + image_map_ids.erase(vector_it); + }); + slot_map_views.erase(map_id); + return; + } + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) { + clear_page_table(page, sparse_page_table); }); + auto it = sparse_views.find(image_id); + ASSERT(it != sparse_views.end()); + auto& sparse_maps = it->second; + for (auto& map_view_id : sparse_maps) { + const auto& map_range = slot_map_views[map_view_id]; + const VAddr cpu_addr = map_range.cpu_addr; + const std::size_t size = map_range.size; + ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) { + const auto page_it = page_table.find(page); + if (page_it == page_table.end()) { + UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); + return; + } + std::vector<ImageMapId>& image_map_ids = page_it->second; + auto vector_it = image_map_ids.begin(); + while (vector_it != image_map_ids.end()) { + ImageMapView& map = slot_map_views[*vector_it]; + if (map.image_id != image_id) { + vector_it++; + continue; + } + if (!map.picked) { + map.picked = true; + } + vector_it = image_map_ids.erase(vector_it); + } + }); + slot_map_views.erase(map_view_id); + } + sparse_views.erase(it); } template <class P> -void TextureCache<P>::TrackImage(ImageBase& image) { +void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) { ASSERT(False(image.flags & ImageFlagBits::Tracked)); image.flags |= ImageFlagBits::Tracked; - rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); + if (False(image.flags & ImageFlagBits::Sparse)) { + rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); + return; + } + if (True(image.flags & ImageFlagBits::Registered)) { + auto it = sparse_views.find(image_id); + ASSERT(it != sparse_views.end()); + auto& sparse_maps = it->second; + for (auto& map_view_id : sparse_maps) { + const auto& map = slot_map_views[map_view_id]; + const VAddr cpu_addr = map.cpu_addr; + const std::size_t size = map.size; + rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); + } + return; + } + ForEachSparseSegment(image, + [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { + rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); + }); } template <class P> -void TextureCache<P>::UntrackImage(ImageBase& image) { +void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) { ASSERT(True(image.flags & ImageFlagBits::Tracked)); image.flags &= ~ImageFlagBits::Tracked; - rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); + if (False(image.flags & ImageFlagBits::Sparse)) { + rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); + return; + } + ASSERT(True(image.flags & ImageFlagBits::Registered)); + auto it = sparse_views.find(image_id); + ASSERT(it != sparse_views.end()); + auto& sparse_maps = it->second; + for (auto& map_view_id : sparse_maps) { + const auto& map = slot_map_views[map_view_id]; + const VAddr cpu_addr = map.cpu_addr; + const std::size_t size = map.size; + rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); + } } template <class P> @@ -1489,10 +1776,10 @@ void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool if (invalidate) { image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); if (False(image.flags & ImageFlagBits::Tracked)) { - TrackImage(image); + TrackImage(image, image_id); } } else { - RefreshContents(image); + RefreshContents(image, image_id); SynchronizeAliases(image_id); } if (is_modification) { diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h index c9571f7e49..9fbdc1ac6c 100644 --- a/src/video_core/texture_cache/types.h +++ b/src/video_core/texture_cache/types.h @@ -16,6 +16,7 @@ constexpr size_t MAX_MIP_LEVELS = 14; constexpr SlotId CORRUPT_ID{0xfffffffe}; using ImageId = SlotId; +using ImageMapId = SlotId; using ImageViewId = SlotId; using ImageAllocId = SlotId; using SamplerId = SlotId; diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 4efe042b66..c872517b8e 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -394,7 +394,7 @@ template <u32 GOB_EXTENT> const s32 mip_offset = diff % layer_stride; const std::array offsets = CalculateMipLevelOffsets(new_info); const auto end = offsets.begin() + new_info.resources.levels; - const auto it = std::find(offsets.begin(), end, mip_offset); + const auto it = std::find(offsets.begin(), end, static_cast<u32>(mip_offset)); if (it == end) { // Mipmap is not aligned to any valid size return std::nullopt; @@ -664,6 +664,16 @@ LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept { return offsets; } +LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept { + const u32 num_levels = info.resources.levels; + const LevelInfo level_info = MakeLevelInfo(info); + LevelArray sizes{}; + for (u32 level = 0; level < num_levels; ++level) { + sizes[level] = CalculateLevelSize(level_info, level); + } + return sizes; +} + std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) { ASSERT(info.type == ImageType::e3D); std::vector<u32> offsets; @@ -776,14 +786,20 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn return copies; } -bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) { - if (config.Address() == 0) { +bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) { + const GPUVAddr address = config.Address(); + if (address == 0) { return false; } - if (config.Address() > (u64(1) << 48)) { + if (address > (1ULL << 48)) { return false; } - return gpu_memory.GpuToCpuAddress(config.Address()).has_value(); + if (gpu_memory.GpuToCpuAddress(address).has_value()) { + return true; + } + const ImageInfo info{config}; + const size_t guest_size_bytes = CalculateGuestSizeInBytes(info); + return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value(); } std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h index cdc5cbc75d..766502908f 100644 --- a/src/video_core/texture_cache/util.h +++ b/src/video_core/texture_cache/util.h @@ -40,6 +40,8 @@ struct OverlapResult { [[nodiscard]] LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept; +[[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept; + [[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info); [[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info); @@ -55,7 +57,7 @@ struct OverlapResult { const ImageInfo& src, SubresourceBase base); -[[nodiscard]] bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); +[[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); [[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index 7b756ba418..3ab500760f 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp @@ -1365,8 +1365,8 @@ static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth, // each partition. // Determine partitions, partition index, and color endpoint modes - s32 planeIdx = -1; - u32 partitionIndex; + u32 planeIdx{UINT32_MAX}; + u32 partitionIndex{}; u32 colorEndpointMode[4] = {0, 0, 0, 0}; // Define color data. diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 23814afd2c..f214510da6 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -532,6 +532,27 @@ bool Device::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags want return (supported_usage & wanted_usage) == wanted_usage; } +std::string Device::GetDriverName() const { + switch (driver_id) { + case VK_DRIVER_ID_AMD_PROPRIETARY: + return "AMD"; + case VK_DRIVER_ID_AMD_OPEN_SOURCE: + return "AMDVLK"; + case VK_DRIVER_ID_MESA_RADV: + return "RADV"; + case VK_DRIVER_ID_NVIDIA_PROPRIETARY: + return "NVIDIA"; + case VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS: + return "INTEL"; + case VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA: + return "ANV"; + case VK_DRIVER_ID_MESA_LLVMPIPE: + return "LAVAPIPE"; + default: + return vendor_name; + } +} + void Device::CheckSuitability(bool requires_swapchain) const { std::bitset<REQUIRED_EXTENSIONS.size()> available_extensions; bool has_swapchain = false; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 88b2981969..96c0f8c600 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -45,6 +45,9 @@ public: /// Reports a shader to Nsight Aftermath. void SaveShader(const std::vector<u32>& spirv) const; + /// Returns the name of the VkDriverId reported from Vulkan. + std::string GetDriverName() const; + /// Returns the dispatch loader with direct function pointers of the device. const vk::DeviceDispatch& GetDispatchLoader() const { return dld; diff --git a/src/yuzu/debugger/profiler.cpp b/src/yuzu/debugger/profiler.cpp index efdc6aa50d..7a6f84d967 100644 --- a/src/yuzu/debugger/profiler.cpp +++ b/src/yuzu/debugger/profiler.cpp @@ -143,24 +143,25 @@ void MicroProfileWidget::hideEvent(QHideEvent* ev) { } void MicroProfileWidget::mouseMoveEvent(QMouseEvent* ev) { - MicroProfileMousePosition(ev->x() / x_scale, ev->y() / y_scale, 0); + MicroProfileMousePosition(ev->pos().x() / x_scale, ev->pos().y() / y_scale, 0); ev->accept(); } void MicroProfileWidget::mousePressEvent(QMouseEvent* ev) { - MicroProfileMousePosition(ev->x() / x_scale, ev->y() / y_scale, 0); + MicroProfileMousePosition(ev->pos().x() / x_scale, ev->pos().y() / y_scale, 0); MicroProfileMouseButton(ev->buttons() & Qt::LeftButton, ev->buttons() & Qt::RightButton); ev->accept(); } void MicroProfileWidget::mouseReleaseEvent(QMouseEvent* ev) { - MicroProfileMousePosition(ev->x() / x_scale, ev->y() / y_scale, 0); + MicroProfileMousePosition(ev->pos().x() / x_scale, ev->pos().y() / y_scale, 0); MicroProfileMouseButton(ev->buttons() & Qt::LeftButton, ev->buttons() & Qt::RightButton); ev->accept(); } void MicroProfileWidget::wheelEvent(QWheelEvent* ev) { - MicroProfileMousePosition(ev->x() / x_scale, ev->y() / y_scale, ev->delta() / 120); + MicroProfileMousePosition(ev->pos().x() / x_scale, ev->pos().y() / y_scale, + ev->angleDelta().y() / 120); ev->accept(); } diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp index 9c5aeb833e..218b4782b6 100644 --- a/src/yuzu/game_list.cpp +++ b/src/yuzu/game_list.cpp @@ -522,7 +522,9 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri QAction* remove_custom_config = remove_menu->addAction(tr("Remove Custom Configuration")); remove_menu->addSeparator(); QAction* remove_all_content = remove_menu->addAction(tr("Remove All Installed Contents")); - QAction* dump_romfs = context_menu.addAction(tr("Dump RomFS")); + QMenu* dump_romfs_menu = context_menu.addMenu(tr("Dump RomFS")); + QAction* dump_romfs = dump_romfs_menu->addAction(tr("Dump RomFS")); + QAction* dump_romfs_sdmc = dump_romfs_menu->addAction(tr("Dump RomFS to SDMC")); QAction* copy_tid = context_menu.addAction(tr("Copy Title ID to Clipboard")); QAction* navigate_to_gamedb_entry = context_menu.addAction(tr("Navigate to GameDB entry")); context_menu.addSeparator(); @@ -571,8 +573,12 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri connect(remove_custom_config, &QAction::triggered, [this, program_id, path]() { emit RemoveFileRequested(program_id, GameListRemoveTarget::CustomConfiguration, path); }); - connect(dump_romfs, &QAction::triggered, - [this, program_id, path]() { emit DumpRomFSRequested(program_id, path); }); + connect(dump_romfs, &QAction::triggered, [this, program_id, path]() { + emit DumpRomFSRequested(program_id, path, DumpRomFSTarget::Normal); + }); + connect(dump_romfs_sdmc, &QAction::triggered, [this, program_id, path]() { + emit DumpRomFSRequested(program_id, path, DumpRomFSTarget::SDMC); + }); connect(copy_tid, &QAction::triggered, [this, program_id]() { emit CopyTIDRequested(program_id); }); connect(navigate_to_gamedb_entry, &QAction::triggered, [this, program_id]() { diff --git a/src/yuzu/game_list.h b/src/yuzu/game_list.h index b630e34ffa..50402da51c 100644 --- a/src/yuzu/game_list.h +++ b/src/yuzu/game_list.h @@ -45,6 +45,11 @@ enum class GameListRemoveTarget { CustomConfiguration, }; +enum class DumpRomFSTarget { + Normal, + SDMC, +}; + enum class InstalledEntryType { Game, Update, @@ -92,7 +97,7 @@ signals: void RemoveInstalledEntryRequested(u64 program_id, InstalledEntryType type); void RemoveFileRequested(u64 program_id, GameListRemoveTarget target, const std::string& game_path); - void DumpRomFSRequested(u64 program_id, const std::string& game_path); + void DumpRomFSRequested(u64 program_id, const std::string& game_path, DumpRomFSTarget target); void CopyTIDRequested(u64 program_id); void NavigateToGamedbEntryRequested(u64 program_id, const CompatibilityList& compatibility_list); diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index cb9d7a8633..5ed3b90b8f 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -104,6 +104,7 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual #include "input_common/main.h" #include "util/overlay_dialog.h" #include "video_core/gpu.h" +#include "video_core/renderer_base.h" #include "video_core/shader_notify.h" #include "yuzu/about_dialog.h" #include "yuzu/bootmanager.h" @@ -1426,8 +1427,12 @@ void GMainWindow::BootGame(const QString& filename, std::size_t program_index, S title_name = Common::FS::PathToUTF8String( std::filesystem::path{filename.toStdU16String()}.filename()); } + const bool is_64bit = system.Kernel().CurrentProcess()->Is64BitProcess(); + const auto instruction_set_suffix = is_64bit ? " (64-bit)" : " (32-bit)"; + title_name += instruction_set_suffix; LOG_INFO(Frontend, "Booting game: {:016X} | {} | {}", title_id, title_name, title_version); - UpdateWindowTitle(title_name, title_version); + const auto gpu_vendor = system.GPU().Renderer().GetDeviceVendor(); + UpdateWindowTitle(title_name, title_version, gpu_vendor); loading_screen->Prepare(system.GetAppLoader()); loading_screen->show(); @@ -1881,7 +1886,8 @@ void GMainWindow::RemoveCustomConfiguration(u64 program_id, const std::string& g } } -void GMainWindow::OnGameListDumpRomFS(u64 program_id, const std::string& game_path) { +void GMainWindow::OnGameListDumpRomFS(u64 program_id, const std::string& game_path, + DumpRomFSTarget target) { const auto failed = [this] { QMessageBox::warning(this, tr("RomFS Extraction Failed!"), tr("There was an error copying the RomFS files or the user " @@ -1909,7 +1915,10 @@ void GMainWindow::OnGameListDumpRomFS(u64 program_id, const std::string& game_pa return; } - const auto dump_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::DumpDir); + const auto dump_dir = + target == DumpRomFSTarget::Normal + ? Common::FS::GetYuzuPath(Common::FS::YuzuPath::DumpDir) + : Common::FS::GetYuzuPath(Common::FS::YuzuPath::SDMCDir) / "atmosphere" / "contents"; const auto romfs_dir = fmt::format("{:016X}/romfs", *romfs_title_id); const auto path = Common::FS::PathToUTF8String(dump_dir / romfs_dir); @@ -1919,7 +1928,8 @@ void GMainWindow::OnGameListDumpRomFS(u64 program_id, const std::string& game_pa if (*romfs_title_id == program_id) { const u64 ivfc_offset = loader->ReadRomFSIVFCOffset(); const FileSys::PatchManager pm{program_id, system.GetFileSystemController(), installed}; - romfs = pm.PatchRomFS(file, ivfc_offset, FileSys::ContentRecordType::Program); + romfs = + pm.PatchRomFS(file, ivfc_offset, FileSys::ContentRecordType::Program, nullptr, false); } else { romfs = installed.GetEntry(*romfs_title_id, FileSys::ContentRecordType::Data)->GetRomFS(); } @@ -2858,8 +2868,8 @@ void GMainWindow::MigrateConfigFiles() { } } -void GMainWindow::UpdateWindowTitle(const std::string& title_name, - const std::string& title_version) { +void GMainWindow::UpdateWindowTitle(std::string_view title_name, std::string_view title_version, + std::string_view gpu_vendor) { const auto branch_name = std::string(Common::g_scm_branch); const auto description = std::string(Common::g_scm_desc); const auto build_id = std::string(Common::g_build_id); @@ -2872,7 +2882,8 @@ void GMainWindow::UpdateWindowTitle(const std::string& title_name, if (title_name.empty()) { setWindowTitle(QString::fromStdString(window_title)); } else { - const auto run_title = fmt::format("{} | {} | {}", window_title, title_name, title_version); + const auto run_title = + fmt::format("{} | {} | {} | {}", window_title, title_name, title_version, gpu_vendor); setWindowTitle(QString::fromStdString(run_title)); } } diff --git a/src/yuzu/main.h b/src/yuzu/main.h index 11f152cbea..45c8310e12 100644 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h @@ -34,6 +34,7 @@ class QProgressDialog; class WaitTreeWidget; enum class GameListOpenTarget; enum class GameListRemoveTarget; +enum class DumpRomFSTarget; enum class InstalledEntryType; class GameListPlaceholder; @@ -244,7 +245,7 @@ private slots: void OnGameListRemoveInstalledEntry(u64 program_id, InstalledEntryType type); void OnGameListRemoveFile(u64 program_id, GameListRemoveTarget target, const std::string& game_path); - void OnGameListDumpRomFS(u64 program_id, const std::string& game_path); + void OnGameListDumpRomFS(u64 program_id, const std::string& game_path, DumpRomFSTarget target); void OnGameListCopyTID(u64 program_id); void OnGameListNavigateToGamedbEntry(u64 program_id, const CompatibilityList& compatibility_list); @@ -287,8 +288,8 @@ private: InstallResult InstallNSPXCI(const QString& filename); InstallResult InstallNCA(const QString& filename); void MigrateConfigFiles(); - void UpdateWindowTitle(const std::string& title_name = {}, - const std::string& title_version = {}); + void UpdateWindowTitle(std::string_view title_name = {}, std::string_view title_version = {}, + std::string_view gpu_vendor = {}); void UpdateStatusBar(); void UpdateStatusButtons(); void UpdateUISettings(); |